[PATCH 1/5] kernelbase/locale: Implement sortkey generation on official tables
Zebediah Figura
z.figura12 at gmail.com
Wed Apr 29 19:43:12 CDT 2020
On 4/28/20 1:17 PM, Fabian Maurer wrote:
> Signed-off-by: Fabian Maurer <dark.shadow4 at web.de>
> ---
> dlls/kernel32/tests/locale.c | 110 ++++++++
> dlls/kernelbase/locale.c | 477 ++++++++++++++++++++++++++---------
> 2 files changed, 464 insertions(+), 123 deletions(-)
>
So as far as I understand, the sort key algorithm writes the level 0
weights (script and alphabetic weight) for the whole string to the sort
key, then the level 1 weights (diacritic), and so on, right?
In that case, what seems potentially simpler to me is to calculate those
weights one level at a time, rather than one character at a time. That
is, you'd end up doing something like
static int get_sortkey( DWORD flags, const WCHAR *src, int srclen, char
*dst, int dstlen )
{
int used = 0;
for (i = 0; i < srclen; ++i)
{
used += get_main_weights(src[i], dst + used, dstlen - used);
if (!(flags & NORM_IGNORENONSPACE))
used += get_diacritic_weights(src[i], dst + used, dstlen -
used);
...
}
}
This avoids the need to store temporary buffers.
As that example shows, I also think it's probably simpler to just pass
the buffer directly to whatever functions are writing sortkey bytes into it.
> diff --git a/dlls/kernel32/tests/locale.c b/dlls/kernel32/tests/locale.c
> index 4c1e1b4d73..13839bb10a 100644
> --- a/dlls/kernel32/tests/locale.c
> +++ b/dlls/kernel32/tests/locale.c
> @@ -2681,6 +2681,13 @@ static void test_lcmapstring_unicode(lcmapstring_wrapper func_ptr, const char *f
> lstrlenW(symbols_stripped) + 1, ret);
> ok(!lstrcmpW(buf, symbols_stripped), "%s string comparison mismatch\n", func_name);
>
> + /* test small buffer */
> + lstrcpyW(buf, fooW);
> + ret = func_ptr(LCMAP_SORTKEY, lower_case, -1, buf, 2);
> + ok(ret == 0, "Expected a failure\n");
> + ok(GetLastError() == ERROR_INSUFFICIENT_BUFFER,
> + "%s unexpected error code %d\n", func_name, GetLastError());;
> +
> /* test srclen = 0 */
> SetLastError(0xdeadbeef);
> ret = func_ptr(0, upper_case, 0, buf, ARRAY_SIZE(buf));
> @@ -3108,6 +3115,108 @@ static void test_sorting(void)
> }
> }
>
> +struct sorting_test_entry {
> + const WCHAR* locale;
> + DWORD flags;
> + const WCHAR* first;
> + const WCHAR* second;
> + int result_sortkey;
> + int result_compare;
> + BOOL broken_on_old_win;
> +};
> +
> +static const struct sorting_test_entry unicode_sorting_tests[] =
> +{
> + /* 0 */ { L"en-US", 0, L"\ue6e3\u0a02", L"\ue6e3\u20dc", CSTR_LESS_THAN, 0, TRUE }, /* Test default character, when there is main weight extra there must be no diacritic weight */
> + /* 1 */ { L"en-US", 0, L"\u276a", L"\u2768", CSTR_GREATER_THAN }, /* Test symbols, must add diacritic weight */
> + /* 2 */ { L"en-US", 0, L"\u204d", L"\uff02", CSTR_LESS_THAN }, /* Test symbols, must add case weight */
> + /* 3 */ { L"en-US", 0, L"a \u2060 b", L"a b", CSTR_EQUAL }, /* Test unsortable characters */
> + /* 4 */ { L"en-US", 0, L"a \xfff0 b", L"a b", CSTR_EQUAL }, /* Test invalid characters */
> + /* 5 */ { L"en-US", 0, L"\x00fc", L"\x016d", CSTR_LESS_THAN },
> + /* 6 */ { L"en-US", 0, L"\x3fcb\x7fd5", L"\x0006\x3032", CSTR_GREATER_THAN },
> + /* 7 */ { L"en-US", 0, L"\x00fc\x30fd", L"\x00fa\x1833", CSTR_LESS_THAN },
> + /* 8 */ { L"en-US", 0, L"\x0037", L"\x277c", CSTR_LESS_THAN, 0, TRUE }, /* Normal character */
> + /* 9 */ { L"en-US", 0, L"\x1eca", L"\x1ecb", CSTR_GREATER_THAN }, /* Normal character */
> + /* 10 */ { L"en-US", 0, L"\x1d05", L"\x1d48", CSTR_GREATER_THAN }, /* Normal character */
> + /* 11 */ { L"en-US", 0, L"\x19d7", L"\x096d", CSTR_GREATER_THAN }, /* Normal character diacritics */
> + /* 12 */ { L"en-US", 0, L"\x00f5", L"\x1ecf", CSTR_LESS_THAN }, /* Normal character diacritics */
> + /* 13 */ { L"en-US", 0, L"\x2793", L"\x0d70", CSTR_LESS_THAN, 0, TRUE }, /* Normal character diacritics */
> + /* 14 */ { L"en-US", 0, L"A", L"a", CSTR_GREATER_THAN }, /* Normal character case weights */
> + /* 15 */ { L"en-US", 0, L"z", L"Z", CSTR_LESS_THAN }, /* Normal character case weights */
> + /* 16 */ { L"en-US", 0, L"\xe5a6", L"\xe5a5\x0333", CSTR_GREATER_THAN, 0, TRUE }, /* CJK with extra value */
> + /* 17 */ { L"en-US", 0, L"\xe5d7", L"\xe5d6\x0330", CSTR_GREATER_THAN, 0, TRUE }, /* CJK with extra value */
> + /* 18 */ { L"en-US", 0, L"\x1B56\x0330", L"\x1096", CSTR_GREATER_THAN }, /* Diacritic is added */
> + /* 19 */ { L"en-US", 0, L"\x1817\x0333", L"\x19d7", CSTR_GREATER_THAN }, /* Diacritic is added */
> + /* 20 */ { L"en-US", 0, L"\x04de\x05ac", L"\x0499", CSTR_GREATER_THAN }, /* Diacritic is added */
> + /* 21 */ { L"en-US", 0, L"\x01ba\x0654", L"\x01b8", CSTR_LESS_THAN }, /* Diacritic can overflow */
> + /* 22 */ { L"en-US", 0, L"\x06b7\x06eb", L"\x06b6", CSTR_LESS_THAN }, /* Diacritic can overflow */
> + /* 23 */ { L"en-US", 0, L"\x1420\x0333", L"\x141f", CSTR_LESS_THAN }, /* Diacritic can overflow */
> + /* 24 */ { L"en-US", 0, L"\x11bc", L"\x110b", CSTR_GREATER_THAN }, /* Jamo case weight */
> + /* 25 */ { L"en-US", 0, L"\x11c1", L"\x1111", CSTR_GREATER_THAN }, /* Jamo case weight */
> + /* 26 */ { L"en-US", 0, L"\x11af", L"\x1105", CSTR_GREATER_THAN }, /* Jamo case weight */
> + /* 27 */ { L"en-US", 0, L"\x11c2", L"\x11f5", CSTR_LESS_THAN }, /* Jamo main weight */
> + /* 28 */ { L"en-US", 0, L"\x1108", L"\x1121", CSTR_LESS_THAN }, /* Jamo main weight */
> + /* 29 */ { L"en-US", 0, L"\x1116", L"\x11c7", CSTR_LESS_THAN }, /* Jamo main weight */
> + /* 30 */ { L"en-US", 0, L"\x11b1", L"\x11d1", CSTR_LESS_THAN }, /* Jamo main weight */
> + /* 31 */ { L"en-US", 0, L"\x4550\x73d2", L"\x3211\x23ad", CSTR_GREATER_THAN }, /* Script 5 main weight 1 */
> + /* 32 */ { L"en-US", 0, L"\x3265", L"\x4079", CSTR_LESS_THAN }, /* Script 5 main weight 1 */
> + /* 33 */ { L"en-US", 0, L"\x4c19\x68d0\x52d0", L"\x316d", CSTR_GREATER_THAN }, /* Script 5 main weight 1 */
> + /* 34 */ { L"en-US", 0, L"\x72dd", L"\x6b8a", CSTR_GREATER_THAN }, /* Script 5 main weight 2 */
> + /* 35 */ { L"en-US", 0, L"\x6785\x3bff\x6f83", L"\x7550\x34c9\x71a7", CSTR_LESS_THAN }, /* Script 5 main weight 2 */
> + /* 36 */ { L"en-US", 0, L"\x5d61", L"\x3aef", CSTR_LESS_THAN }, /* Script 5 main weight 2 */
> + /* 37 */ { L"en-US", 0, L"\x207a", L"\xfe62", CSTR_GREATER_THAN }, /* Symbols case weights */
> + /* 38 */ { L"en-US", 0, L"\xfe65", L"\xff1e", CSTR_GREATER_THAN }, /* Symbols case weights */
> + /* 39 */ { L"en-US", 0, L"\x2502", L"\xffe8", CSTR_GREATER_THAN }, /* Symbols case weights */
> + /* 40 */ { L"en-US", 0, L"\x21da", L"\x21dc", CSTR_LESS_THAN }, /* Symbols diacritic weights */
> + /* 41 */ { L"en-US", 0, L"\x29fb", L"\x2295", CSTR_LESS_THAN }, /* Symbols diacritic weights */
> + /* 42 */ { L"en-US", 0, L"\x0092", L"\x009c", CSTR_LESS_THAN }, /* Symbols diacritic weights */
> + /* 43 */ { L"en-US", NORM_IGNORESYMBOLS, L"\x21da", L"\x21dc", CSTR_EQUAL }, /* NORM_IGNORESYMBOLS */
> + /* 44 */ { L"en-US", NORM_IGNORESYMBOLS, L"\x29fb", L"\x2295", CSTR_EQUAL }, /* NORM_IGNORESYMBOLS */
> + /* 45 */ { L"en-US", NORM_IGNORESYMBOLS, L"\x0092", L"\x009c", CSTR_EQUAL }, /* NORM_IGNORESYMBOLS */
> + /* 46 */ { L"en-US", 0, L"\x3099", L"\x309a", CSTR_EQUAL }, /* MIN_WEIGHT */
> + /* 47 */ { L"en-US", 0, L"\x309b", L"\x05a2", CSTR_EQUAL }, /* MIN_WEIGHT */
> + /* 48 */ { L"en-US", 0, L"\xff9e", L"\x0e47", CSTR_EQUAL }, /* MIN_WEIGHT */
> +};
> +
> +static void test_unicode_sorting(void)
> +{
> + int i;
> + if (!pLCMapStringEx)
> + {
> +
> + win_skip("LCMapStringEx not available\n");
> + return;
> + }
> + for (i = 0; i < ARRAY_SIZE(unicode_sorting_tests); i++)
> + {
> + int pos;
> + BYTE buff1[1000];
> + BYTE buff2[1000];
> + int len1, len2;
> + int result = CSTR_EQUAL;
> + const struct sorting_test_entry* entry = &unicode_sorting_tests[i];
> +
> + len1 = pLCMapStringEx(entry->locale, LCMAP_SORTKEY | entry->flags, entry->first, -1, (WCHAR*)buff1, ARRAY_SIZE(buff1), NULL, NULL, 0);
> + len2 = pLCMapStringEx(entry->locale, LCMAP_SORTKEY | entry->flags, entry->second, -1, (WCHAR*)buff2, ARRAY_SIZE(buff2), NULL, NULL, 0);
Is there a reason to use LCMapStringEx() here rather than LCMapString()?
> +
> + for (pos = 0; pos < len1 && pos < len2; pos++)
> + {
> + if (buff1[pos] > buff2[pos])
> + {
> + result = CSTR_GREATER_THAN;
> + break;
> + }
> + else if (buff1[pos] < buff2[pos])
> + {
> + result = CSTR_LESS_THAN;
> + break;
> + }
> + }
> +
> + ok (result == entry->result_sortkey || broken(entry->broken_on_old_win), "Test %d - Expected %d, got %d\n", i, entry->result_sortkey, result);
> + }
> +}
> +
> static void test_FoldStringA(void)
> {
> int ret, i, j;
> @@ -6897,4 +7006,5 @@ START_TEST(locale)
> test_NLSVersion();
> /* this requires collation table patch to make it MS compatible */
> if (0) test_sorting();
The fact that this test is commented out never struck me as great. I'm
pretty sure that with todo_wine added as appropriate, it could pass. A
first patch in this series could be to do that.
> + test_unicode_sorting();
> }
> diff --git a/dlls/kernelbase/locale.c b/dlls/kernelbase/locale.c
> index 53e4e42da3..74177371d9 100644
> --- a/dlls/kernelbase/locale.c
> +++ b/dlls/kernelbase/locale.c
> @@ -2126,127 +2126,6 @@ static int wcstombs_codepage( UINT codepage, DWORD flags, const WCHAR *src, int
> return wcstombs_sbcs( info, src, srclen, dst, dstlen );
> }
>
> -
> -static int get_sortkey( DWORD flags, const WCHAR *src, int srclen, char *dst, int dstlen )
> -{
> - WCHAR dummy[4]; /* no decomposition is larger than 4 chars */
> - int key_len[4];
> - char *key_ptr[4];
> - const WCHAR *src_save = src;
> - int srclen_save = srclen;
> -
> - key_len[0] = key_len[1] = key_len[2] = key_len[3] = 0;
> - for (; srclen; srclen--, src++)
> - {
> - unsigned int i, decomposed_len = 1;/*wine_decompose(*src, dummy, 4);*/
> - dummy[0] = *src;
> - if (decomposed_len)
> - {
> - for (i = 0; i < decomposed_len; i++)
> - {
> - WCHAR wch = dummy[i];
> - unsigned int ce;
> -
> - if ((flags & NORM_IGNORESYMBOLS) &&
> - (get_char_type( CT_CTYPE1, wch ) & (C1_PUNCT | C1_SPACE)))
> - continue;
> -
> - if (flags & NORM_IGNORECASE) wch = casemap( nls_info.LowerCaseTable, wch );
> -
> - ce = collation_table[collation_table[collation_table[wch >> 8] + ((wch >> 4) & 0x0f)] + (wch & 0xf)];
> - if (ce != (unsigned int)-1)
> - {
> - if (ce >> 16) key_len[0] += 2;
> - if ((ce >> 8) & 0xff) key_len[1]++;
> - if ((ce >> 4) & 0x0f) key_len[2]++;
> - if (ce & 1)
> - {
> - if (wch >> 8) key_len[3]++;
> - key_len[3]++;
> - }
> - }
> - else
> - {
> - key_len[0] += 2;
> - if (wch >> 8) key_len[0]++;
> - if (wch & 0xff) key_len[0]++;
> - }
> - }
> - }
> - }
> -
> - if (!dstlen) /* compute length */
> - /* 4 * '\1' + key length */
> - return key_len[0] + key_len[1] + key_len[2] + key_len[3] + 4;
> -
> - if (dstlen < key_len[0] + key_len[1] + key_len[2] + key_len[3] + 4 + 1)
> - return 0; /* overflow */
> -
> - src = src_save;
> - srclen = srclen_save;
> -
> - key_ptr[0] = dst;
> - key_ptr[1] = key_ptr[0] + key_len[0] + 1;
> - key_ptr[2] = key_ptr[1] + key_len[1] + 1;
> - key_ptr[3] = key_ptr[2] + key_len[2] + 1;
> -
> - for (; srclen; srclen--, src++)
> - {
> - unsigned int i, decomposed_len = 1;/*wine_decompose(*src, dummy, 4);*/
> - dummy[0] = *src;
> - if (decomposed_len)
> - {
> - for (i = 0; i < decomposed_len; i++)
> - {
> - WCHAR wch = dummy[i];
> - unsigned int ce;
> -
> - if ((flags & NORM_IGNORESYMBOLS) &&
> - (get_char_type( CT_CTYPE1, wch ) & (C1_PUNCT | C1_SPACE)))
> - continue;
> -
> - if (flags & NORM_IGNORECASE) wch = casemap( nls_info.LowerCaseTable, wch );
> -
> - ce = collation_table[collation_table[collation_table[wch >> 8] + ((wch >> 4) & 0x0f)] + (wch & 0xf)];
> - if (ce != (unsigned int)-1)
> - {
> - WCHAR key;
> - if ((key = ce >> 16))
> - {
> - *key_ptr[0]++ = key >> 8;
> - *key_ptr[0]++ = key & 0xff;
> - }
> - /* make key 1 start from 2 */
> - if ((key = (ce >> 8) & 0xff)) *key_ptr[1]++ = key + 1;
> - /* make key 2 start from 2 */
> - if ((key = (ce >> 4) & 0x0f)) *key_ptr[2]++ = key + 1;
> - /* key 3 is always a character code */
> - if (ce & 1)
> - {
> - if (wch >> 8) *key_ptr[3]++ = wch >> 8;
> - if (wch & 0xff) *key_ptr[3]++ = wch & 0xff;
> - }
> - }
> - else
> - {
> - *key_ptr[0]++ = 0xff;
> - *key_ptr[0]++ = 0xfe;
> - if (wch >> 8) *key_ptr[0]++ = wch >> 8;
> - if (wch & 0xff) *key_ptr[0]++ = wch & 0xff;
> - }
> - }
> - }
> - }
> -
> - *key_ptr[0] = 1;
> - *key_ptr[1] = 1;
> - *key_ptr[2] = 1;
> - *key_ptr[3]++ = 1;
> - *key_ptr[3] = 0;
> - return key_ptr[3] - dst;
> -}
> -
> -
> /* compose a full-width katakana. return consumed source characters. */
> static int compose_katakana( const WCHAR *src, int srclen, WCHAR *dst )
> {
> @@ -2574,6 +2453,358 @@ static int compare_weights(int flags, const WCHAR *str1, int len1,
> return len1 - len2;
> }
>
> +/* Start sortkey handler code. */
> +
> +/* Defines */
> +
> +#define JAPANESE 3
> +#define MIN_WEIGHT 2
> +#define LIST_STACK_BUFFER 1000
> +
> +/* Internal structures */
Are these comments useful?
> +
> +typedef struct _character_info
> +{
> + BYTE weight_primary;
> + BYTE script_member;
> + BYTE weight_diacritic;
> + BYTE weight_case;
> +} character_info;
> +
I get the impression that typedefs have largely fallen out of favour.
> +typedef struct _weight_main_info
> +{
> + BYTE script_member;
> + BYTE weight_primary;
> + BYTE extra;
> +} weight_main_info;
> +
> +typedef struct _list
> +{
> + int extra_len;
> + int len;
> + BYTE buffer[LIST_STACK_BUFFER];
> + int buffer_count;
> + BYTE* extra;
> + int element_size;
> +} list;
> +
> +typedef struct _sortkey_data
> +{
> + int flags;
> + list key;
> + list weights_main;
> + list weights_diacritic;
> + list weights_case;
> +} sortkey_data;
> +
> +/* List functions */
> +
> +static void LIST_INIT(list* name, int type_size)
> +{
> + name->extra_len = 0;
> + name->len = 0;
> + name->extra = 0;
> + name->buffer_count = LIST_STACK_BUFFER / type_size;
> + name->element_size = type_size;
> +}
> +
> +static void LIST_DESTROY(list* name)
> +{
> + RtlFreeHeap(GetProcessHeap(), 0, name->extra);
> +}
> +
> +static void* LIST_GET(list* name, int index)
> +{
> + if ((index + 1) * name->element_size <= LIST_STACK_BUFFER)
> + return &name->buffer[index * name->element_size];
> + else
> + return &name->extra[index * name->element_size - name->buffer_count];
> +}
> +
> +/* Add entry to list, resizing as needed */
> +static void LIST_ADD(list* name, const void *value)
> +{
> + void* entry;
> + if ((name->len + 1) * name->element_size > name->extra_len + LIST_STACK_BUFFER)
> + {
> + if (!name->extra) /* First allocation */
> + {
> + name->extra_len = LIST_STACK_BUFFER;
> + name->extra = RtlAllocateHeap(GetProcessHeap(), 0, name->extra_len);
> + }
> + else
> + {
> + name->extra_len *= 2;
> + name->extra = RtlReAllocateHeap(GetProcessHeap(), 0,name->extra, name->extra_len);
> + }
> + }
> + entry = LIST_GET(name, name->len);
> + memcpy(entry, value, name->element_size);
> + name->len++;
> +}
> +
> +/* Append a weight list to the sortkey */
> +#define APPEND_LIST_TO_SORTKEY(data, weights, type, statement_get_value, statement_is_ignored) \
> + do { \
> + int z; \
> + int end = data->weights.len - 1; \
> + while (end >= 0) \
> + { \
> + const type* element = LIST_GET(&data->weights, end); \
> + (void)element; \
> + if (!(statement_is_ignored)) break; \
> + end--; \
> + } \
> + for (z = 0; z <= end; z++) \
> + { \
> + const type* element = LIST_GET(&data->weights, z); \
> + LIST_ADD(&data->key, statement_get_value); \
> + } \
> + } while (0);
> +
> +/* Helper functions */
> +
> +static BOOL get_char(sortkey_data* data, character_info* info, WCHAR ch)
> +{
> + DWORD value = sort.keys[ch];
> +
> + info->weight_case = value >> 24;
> + info->weight_diacritic = (value >> 16) & 0xff;
> + info->script_member = (value >> 8) & 0xff;
> + info->weight_primary = value & 0xff;
> + return info->script_member != 0;
> +}
> +
> +static void sortkey_data_init(sortkey_data* data, int flags, const WCHAR* locale, BOOL is_compare_string)
> +{
> + data->flags = flags;
> + LIST_INIT(&data->key, sizeof(BYTE));
> + LIST_INIT(&data->weights_main, sizeof(BYTE));
> + LIST_INIT(&data->weights_diacritic, sizeof(BYTE));
> + LIST_INIT(&data->weights_case, sizeof(BYTE));
> +}
> +
> +static void sortkey_data_destroy(sortkey_data* data)
> +{
> + LIST_DESTROY(&data->key);
> + LIST_DESTROY(&data->weights_main);
> + LIST_DESTROY(&data->weights_diacritic);
> + LIST_DESTROY(&data->weights_case);
> +}
> +
> +static weight_main_info create_weight_main(BYTE script_member, BYTE weight_primary)
> +{
> + weight_main_info ret = { 0 };
> + ret.script_member = script_member;
> + ret.weight_primary = weight_primary;
> + return ret;
> +}
> +
> +static void case_weights_add(sortkey_data* data, BYTE value)
> +{
> + int flags = data->flags;
> + if (NORM_IGNORECASE & flags)
> + value = value & ~(16 + 8);
> + if (NORM_IGNOREWIDTH & flags)
> + value = value & ~(1);
> + if (NORM_IGNOREKANATYPE & flags)
> + value = value & ~(32);
> +
> + LIST_ADD(&data->weights_case, &value);
> +}
> +
> +static void main_weights_add(sortkey_data *data, weight_main_info* value)
> +{
> + LIST_ADD(&data->weights_main, &value->script_member);
> + LIST_ADD(&data->weights_main, &value->weight_primary);
> + if (value->extra > 0)
> + LIST_ADD(&data->weights_main, &value->extra);
> +}
> +
> +static void diacritic_weights_add(sortkey_data* data, const character_info* info, BYTE value)
> +{
> + LIST_ADD(&data->weights_diacritic, &value);
> +}
> +
> +/* Main sortkey logic */
> +
> +static void sortkey_handle_default_character(sortkey_data* data, WCHAR c)
> +{
> + weight_main_info weightmain;
> + character_info info;
> +
> + if (!get_char(data, &info, c))
> + {
> + return;
> + }
> +
> + weightmain = create_weight_main(info.script_member, info.weight_primary);
> + if (info.script_member >= 0xa9 && info.script_member <= 0xaf) /* Some CJK have extra value */
> + weightmain.extra = info.weight_diacritic;
> + else
> + diacritic_weights_add(data, &info, info.weight_diacritic);
> +
> + main_weights_add(data, &weightmain);
> +
> + case_weights_add(data, info.weight_case);
> +}
> +
> +static BOOL sortkey_handle_character(sortkey_data* data, WCHAR c, const WCHAR* str, int i)
> +{
> + weight_main_info weightmain;
> + character_info info;
> + int flags = data->flags;
> +
> + if (!get_char(data, &info, c))
> + {
> + return FALSE;
> + }
> +
> + switch (info.script_member)
> + {
> + case 0: /* Not sorted */
> + break;
> +
> + case 1:
> + if (data->weights_diacritic.len > 0)
> + {
> + BYTE* entry = LIST_GET(&data->weights_diacritic, data->weights_diacritic.len - 1);
> + *entry += info.weight_diacritic; /* Overflow can happen, that's okay */
> + }
> + else
> + diacritic_weights_add(data, &info, info.weight_diacritic);
> + break;
> +
> + case JAPANESE:
> + /* TODO */
> + break;
> +
> + case 4: /* Jamo */
> + weightmain = create_weight_main(info.weight_primary, info.weight_diacritic);
> + main_weights_add(data, &weightmain);
> +
> + diacritic_weights_add(data, &info, MIN_WEIGHT);
> +
> + case_weights_add(data, info.weight_case);
> + break;
> +
> + case 5:
> + weightmain = create_weight_main(253, 255);
> + main_weights_add(data, &weightmain);
> +
> + weightmain = create_weight_main(info.weight_primary, info.weight_diacritic);
> + main_weights_add(data, &weightmain);
> +
> + diacritic_weights_add(data, &info, MIN_WEIGHT);
> +
> + case_weights_add(data, MIN_WEIGHT);
> + break;
> +
> + case 6: /* Punctuation */
> + /* TODO */
> + break;
> +
> + case 7: /* Symbols */
> + case 8: /* Symbols */
> + case 9: /* Symbols */
> + case 10: /* Symbols */
> + case 11: /* Symbols */
> + case 12: /* Symbols */
> + if (flags & NORM_IGNORESYMBOLS)
> + break;
> +
> + weightmain = create_weight_main(info.script_member, info.weight_primary);
> + main_weights_add(data, &weightmain);
> +
> + diacritic_weights_add(data, &info, info.weight_diacritic);
> +
> + case_weights_add(data, info.weight_case);
> + break;
> +
> + default:
> + sortkey_handle_default_character(data, c);
> + break;
The fact that exactly one of these integer cases has a symbolic constant
attached seems less than ideal.
> + }
> + return TRUE;
> +}
> +
> +static void sortkey_write_result(sortkey_data* data)
> +{
> + int flags = data->flags;
> +
> + const BYTE SORTKEY_SEPARATOR = 1;
> + const BYTE SORTKEY_TERMINATOR = 0;
> +
> + /* Main weights */
> +
> + APPEND_LIST_TO_SORTKEY(data, weights_main, BYTE, element, FALSE);
> +
> + LIST_ADD(&data->key, &SORTKEY_SEPARATOR);
> +
> + /* Diacritic weights */
> +
> + if ((flags & NORM_IGNORENONSPACE) == 0)
> + {
> + APPEND_LIST_TO_SORTKEY(data, weights_diacritic, BYTE, element, *element <= MIN_WEIGHT);
> + }
> +
> + LIST_ADD(&data->key, &SORTKEY_SEPARATOR);
> +
> + /* Case weights */
> + if ((NORM_IGNORECASE & flags) == 0 || (NORM_IGNOREWIDTH & flags) == 0)
> + {
> + APPEND_LIST_TO_SORTKEY(data, weights_case, BYTE, element, FALSE);
> + }
> +
> + LIST_ADD(&data->key, &SORTKEY_SEPARATOR);
> +
> + /* Extra weights */
> + /* TODO */
> +
> + LIST_ADD(&data->key, &SORTKEY_SEPARATOR);
> +
> + /* Special weights */
> + /* TODO */
> +
> + LIST_ADD(&data->key, &SORTKEY_TERMINATOR);
> +}
> +
> +static int sortkey_generate(int flags, const WCHAR* locale, const WCHAR* str, int str_len, BYTE* buffer, int buffer_len)
> +{
> + int i;
> + sortkey_data data;
> + int ret = 0;
> +
> + sortkey_data_init(&data, flags, locale, FALSE);
> +
> + if (str_len == -1)
> + str_len = wcslen(str);
> +
> + for (i = 0; i < str_len; i++)
> + {
> + sortkey_handle_character(&data, str[i], str, i);
> + }
> +
> + sortkey_write_result(&data);
> +
> + if (data.key.len <= buffer_len)
> + {
> + for (i = 0; i < data.key.len; i++)
> + {
> + BYTE* value = LIST_GET(&data.key, i);
> + buffer[i] = *value;
> + }
> + ret = data.key.len;
> + }
> + else if (!buffer)
> + {
> + ret = data.key.len;
> + }
> + sortkey_data_destroy(&data);
> + return ret;
> +}
> +
> +/* End sortkey handler code */
>
> static const struct geoinfo *get_geoinfo_ptr( GEOID geoid )
> {
> @@ -4964,8 +5195,8 @@ INT WINAPI DECLSPEC_HOTPATCH LCMapStringEx( const WCHAR *locale, DWORD flags, co
> TRACE( "(%s,0x%08x,%s,%d,%p,%d)\n",
> debugstr_w(locale), flags, debugstr_wn(src, srclen), srclen, dst, dstlen );
>
> - if ((ret = get_sortkey( flags, src, srclen, (char *)dst, dstlen ))) ret++;
> - else SetLastError( ERROR_INSUFFICIENT_BUFFER );
> + if (!(ret = sortkey_generate(flags, L"", src, srclen, (BYTE *)dst, dstlen )))
> + SetLastError( ERROR_INSUFFICIENT_BUFFER );
> return ret;
> }
>
> --
> 2.26.2
>
>
More information about the wine-devel
mailing list