[RFC 3/5] kernelbase/locale: Implement sortkey japanese
Fabian Maurer
dark.shadow4 at web.de
Sat Apr 11 14:45:47 CDT 2020
---
dlls/kernel32/tests/locale.c | 26 +++++++++++++++
dlls/kernelbase/locale.c | 65 ++++++++++++++++++++++++++++++++++--
2 files changed, 89 insertions(+), 2 deletions(-)
diff --git a/dlls/kernel32/tests/locale.c b/dlls/kernel32/tests/locale.c
index 4b0c106cd2..01ba3c0cb6 100644
--- a/dlls/kernel32/tests/locale.c
+++ b/dlls/kernel32/tests/locale.c
@@ -3190,6 +3190,32 @@ static const struct sorting_test_entry unicode_sorting_tests[] =
/* 60 */ { L"en-US", SORT_STRINGSORT, L"\xfe32", L"\x2013", CSTR_GREATER_THAN }, /* Punctuation SORT_STRINGSORT case weight */
/* 61 */ { L"en-US", SORT_STRINGSORT, L"\xfe31", L"\xfe58", CSTR_GREATER_THAN }, /* Punctuation SORT_STRINGSORT case weight */
/* 62 */ { L"en-US", SORT_STRINGSORT, L"\xff07", L"\x0027", CSTR_GREATER_THAN }, /* Punctuation SORT_STRINGSORT case weight */
+ /* 63 */ { L"en-US", 0, L"\x04b0", L"\x32db", CSTR_LESS_THAN }, /* Japanese main weight */
+ /* 64 */ { L"en-US", 0, L"\x3093", L"\x1e62\x013f", CSTR_GREATER_THAN }, /* japanese main weight */
+ /* 65 */ { L"en-US", 0, L"\x30d3", L"\x30d4", CSTR_LESS_THAN }, /* japanese diacritic weight */
+ /* 66 */ { L"en-US", 0, L"\x307b", L"\x307c", CSTR_LESS_THAN }, /* japanese diacritic weight */
+ /* 67 */ { L"en-US", 0, L"\x30ea", L"\x32f7", CSTR_LESS_THAN }, /* japanese diacritic weight */
+ /* 68 */ { L"en-US", 0, L"\x31fb", L"\x30e9", CSTR_LESS_THAN }, /* japanese case weight small */
+ /* 69 */ { L"en-US", 0, L"\x30db", L"\x31f9", CSTR_GREATER_THAN }, /* japanese case weight small */
+ /* 70 */ { L"en-US", 0, L"\xff6d", L"\xff95", CSTR_LESS_THAN }, /* japanese case weight small */
+ /* 71 */ { L"en-US", NORM_IGNORENONSPACE, L"\x31fb", L"\x30e9", CSTR_EQUAL }, /* japanese case weight small */
+ /* 72 */ { L"en-US", NORM_IGNORENONSPACE, L"\x30db", L"\x31f9", CSTR_EQUAL }, /* japanese case weight small */
+ /* 73 */ { L"en-US", NORM_IGNORENONSPACE, L"\xff6d", L"\xff95", CSTR_EQUAL }, /* japanese case weight small */
+ /* 74 */ { L"en-US", 0, L"\x30d5", L"\x3075", CSTR_LESS_THAN }, /* japanese case weight kana */
+ /* 75 */ { L"en-US", 0, L"\x306a", L"\x30ca", CSTR_GREATER_THAN }, /* japanese case weight kana */
+ /* 76 */ { L"en-US", 0, L"\x305a", L"\x30ba", CSTR_GREATER_THAN }, /* japanese case weight kana */
+ /* 77 */ { L"en-US", NORM_IGNOREKANATYPE, L"\x30d5", L"\x3075", CSTR_EQUAL }, /* japanese case weight kana */
+ /* 78 */ { L"en-US", NORM_IGNOREKANATYPE, L"\x306a", L"\x30ca", CSTR_EQUAL }, /* japanese case weight kana */
+ /* 79 */ { L"en-US", NORM_IGNOREKANATYPE, L"\x305a", L"\x30ba", CSTR_EQUAL }, /* japanese case weight kana */
+ /* 80 */ { L"en-US", 0, L"\x30bf", L"\xff80", CSTR_GREATER_THAN }, /* japanese case weight width */
+ /* 81 */ { L"en-US", 0, L"\x30ab", L"\xff76", CSTR_GREATER_THAN }, /* japanese case weight width */
+ /* 82 */ { L"en-US", 0, L"\x30a2", L"\xff71", CSTR_GREATER_THAN }, /* japanese case weight width */
+ /* 83 */ { L"en-US", NORM_IGNOREWIDTH, L"\x30bf", L"\xff80", CSTR_EQUAL }, /* japanese case weight width */
+ /* 84 */ { L"en-US", NORM_IGNOREWIDTH, L"\x30ab", L"\xff76", CSTR_EQUAL }, /* japanese case weight width */
+ /* 85 */ { L"en-US", NORM_IGNOREWIDTH, L"\x30a2", L"\xff71", CSTR_EQUAL }, /* japanese case weight width */
+ /* 86 */ { L"en-US", NORM_IGNORENONSPACE, L"\x31a2", L"\x3110", CSTR_EQUAL }, /* NORM_IGNORENONSPACE */
+ /* 87 */ { L"en-US", NORM_IGNORENONSPACE, L"\x1342", L"\x133a", CSTR_EQUAL }, /* NORM_IGNORENONSPACE */
+ /* 88 */ { L"en-US", NORM_IGNORENONSPACE, L"\x16a4", L"\x16a5", CSTR_EQUAL }, /* NORM_IGNORENONSPACE */
};
static void test_unicode_sorting(void)
diff --git a/dlls/kernelbase/locale.c b/dlls/kernelbase/locale.c
index 242caf6764..789e90cbf5 100644
--- a/dlls/kernelbase/locale.c
+++ b/dlls/kernelbase/locale.c
@@ -2484,6 +2484,13 @@ typedef struct _weight_special_info
BYTE weight_primary;
} weight_special_info;
+typedef struct _weight_extra_info
+{
+ BYTE flag_small;
+ BYTE flag_kana;
+ BYTE flag_width;
+} weight_extra_info;
+
typedef struct _list
{
int extra_len;
@@ -2502,6 +2509,7 @@ typedef struct _sortkey_data
list weights_diacritic;
list weights_case;
list weights_special;
+ list weights_extra;
} sortkey_data;
/* List functions */
@@ -2579,6 +2587,7 @@ static void sortkey_data_init(sortkey_data* data, int flags, const WCHAR* locale
LIST_INIT(data->weights_diacritic, BYTE);
LIST_INIT(data->weights_case, BYTE);
LIST_INIT(data->weights_special, BYTE);
+ LIST_INIT(data->weights_extra, weight_extra_info);
}
static void sortkey_data_destroy(sortkey_data* data)
@@ -2588,6 +2597,7 @@ static void sortkey_data_destroy(sortkey_data* data)
LIST_DESTROY(data->weights_diacritic);
LIST_DESTROY(data->weights_case);
LIST_DESTROY(data->weights_special);
+ LIST_DESTROY(data->weights_extra);
}
static weight_main_info create_weight_main(BYTE script_member, BYTE weight_primary)
@@ -2653,6 +2663,36 @@ static void sortkey_handle_default_character(sortkey_data* data, WCHAR c)
case_weights_add(data, info.weight_case);
}
+static void sortkey_handle_japanese_character(sortkey_data* data, weight_main_info* weightmain, const character_info* info, const character_info* info_other)
+{
+ const BYTE BASELINE_EXTRA = 0xc4;
+ const BYTE ISOLATE_KANA = 0x20 | BASELINE_EXTRA; /* if bit is set then hiragana, else katakana */
+ const BYTE ISOLATE_SMALL = 0x2 | BASELINE_EXTRA; /* if bit is set then normal kana, else small kana */
+ const BYTE ISOLATE_WIDTH = 0x1 | BASELINE_EXTRA; /* if bit is set then full width, else half width */
+ int weight_case;
+ weight_extra_info extra;
+
+ weightmain->script_member = 34;
+ weightmain->weight_primary = info_other->weight_primary;
+
+ main_weights_add(data, weightmain);
+
+ weight_case = info_other->weight_case | BASELINE_EXTRA;
+
+ extra.flag_small = (BYTE)(weight_case & ISOLATE_SMALL);
+ extra.flag_kana = (BYTE)(weight_case & ISOLATE_KANA);
+ extra.flag_width = (BYTE)(weight_case & ISOLATE_WIDTH);
+
+ if (data->flags & NORM_IGNOREKANATYPE)
+ extra.flag_kana = BASELINE_EXTRA;
+ if (data->flags & NORM_IGNOREWIDTH)
+ extra.flag_width = BASELINE_EXTRA;
+ LIST_ADD(data->weights_extra, weight_extra_info, extra);
+
+ diacritic_weights_add(data, info, info->weight_diacritic);
+ case_weights_add(data, MIN_WEIGHT);
+}
+
static BOOL sortkey_handle_character(sortkey_data* data, WCHAR c, const WCHAR* str, int i)
{
weight_main_info weightmain;
@@ -2680,7 +2720,16 @@ static BOOL sortkey_handle_character(sortkey_data* data, WCHAR c, const WCHAR* s
break;
case JAPANESE:
- /* TODO */
+ weightmain = create_weight_main(info.script_member, info.weight_primary);
+
+ if (weightmain.weight_primary <= 1)
+ {
+ /* TODO */
+ }
+ else
+ {
+ sortkey_handle_japanese_character(data, &weightmain, &info, &info);
+ }
break;
case 4: /* Jamo */
@@ -2782,7 +2831,19 @@ static void sortkey_write_result(sortkey_data* data)
LIST_ADD(data->key, BYTE, SORTKEY_SEPARATOR);
/* Extra weights */
- /* TODO */
+ if (data->weights_extra.len > 0)
+ {
+ if ((NORM_IGNORENONSPACE & flags) == 0)
+ {
+ APPEND_LIST_TO_SORTKEY(data, weights_extra, weight_extra_info, element->flag_small, element->flag_small > 196);
+ }
+ LIST_ADD(data->key, BYTE, 0xff);
+ LIST_ADD(data->key, BYTE, 0x02);
+ APPEND_LIST_TO_SORTKEY(data, weights_extra, weight_extra_info, element->flag_kana, element->flag_kana > 196);
+ LIST_ADD(data->key, BYTE, 0xff);
+ APPEND_LIST_TO_SORTKEY(data, weights_extra, weight_extra_info, element->flag_width, element->flag_width > 196);
+ LIST_ADD(data->key, BYTE, 0xff);
+ }
LIST_ADD(data->key, BYTE, SORTKEY_SEPARATOR);
--
2.26.0
More information about the wine-devel
mailing list