[RFC 3/5] kernelbase/locale: Implement sortkey japanese

Fabian Maurer dark.shadow4 at web.de
Sat Apr 11 14:45:47 CDT 2020


---
 dlls/kernel32/tests/locale.c | 26 +++++++++++++++
 dlls/kernelbase/locale.c     | 65 ++++++++++++++++++++++++++++++++++--
 2 files changed, 89 insertions(+), 2 deletions(-)

diff --git a/dlls/kernel32/tests/locale.c b/dlls/kernel32/tests/locale.c
index 4b0c106cd2..01ba3c0cb6 100644
--- a/dlls/kernel32/tests/locale.c
+++ b/dlls/kernel32/tests/locale.c
@@ -3190,6 +3190,32 @@ static const struct sorting_test_entry unicode_sorting_tests[] =
     /*  60 */ { L"en-US", SORT_STRINGSORT, L"\xfe32", L"\x2013", CSTR_GREATER_THAN }, /* Punctuation SORT_STRINGSORT case weight */
     /*  61 */ { L"en-US", SORT_STRINGSORT, L"\xfe31", L"\xfe58", CSTR_GREATER_THAN }, /* Punctuation SORT_STRINGSORT case weight */
     /*  62 */ { L"en-US", SORT_STRINGSORT, L"\xff07", L"\x0027", CSTR_GREATER_THAN }, /* Punctuation SORT_STRINGSORT case weight */
+    /*  63 */ { L"en-US", 0, L"\x04b0", L"\x32db", CSTR_LESS_THAN }, /* Japanese main weight */
+    /*  64 */ { L"en-US", 0, L"\x3093", L"\x1e62\x013f", CSTR_GREATER_THAN }, /* japanese main weight */
+    /*  65 */ { L"en-US", 0, L"\x30d3", L"\x30d4", CSTR_LESS_THAN }, /* japanese diacritic weight */
+    /*  66 */ { L"en-US", 0, L"\x307b", L"\x307c", CSTR_LESS_THAN }, /* japanese diacritic weight */
+    /*  67 */ { L"en-US", 0, L"\x30ea", L"\x32f7", CSTR_LESS_THAN }, /* japanese diacritic weight */
+    /*  68 */ { L"en-US", 0, L"\x31fb", L"\x30e9", CSTR_LESS_THAN }, /* japanese case weight small */
+    /*  69 */ { L"en-US", 0, L"\x30db", L"\x31f9", CSTR_GREATER_THAN }, /* japanese case weight small */
+    /*  70 */ { L"en-US", 0, L"\xff6d", L"\xff95", CSTR_LESS_THAN }, /* japanese case weight small */
+    /*  71 */ { L"en-US", NORM_IGNORENONSPACE, L"\x31fb", L"\x30e9", CSTR_EQUAL }, /* japanese case weight small */
+    /*  72 */ { L"en-US", NORM_IGNORENONSPACE, L"\x30db", L"\x31f9", CSTR_EQUAL }, /* japanese case weight small */
+    /*  73 */ { L"en-US", NORM_IGNORENONSPACE, L"\xff6d", L"\xff95", CSTR_EQUAL }, /* japanese case weight small */
+    /*  74 */ { L"en-US", 0, L"\x30d5", L"\x3075", CSTR_LESS_THAN }, /* japanese case weight kana */
+    /*  75 */ { L"en-US", 0, L"\x306a", L"\x30ca", CSTR_GREATER_THAN }, /* japanese case weight kana */
+    /*  76 */ { L"en-US", 0, L"\x305a", L"\x30ba", CSTR_GREATER_THAN }, /* japanese case weight kana */
+    /*  77 */ { L"en-US", NORM_IGNOREKANATYPE, L"\x30d5", L"\x3075", CSTR_EQUAL }, /* japanese case weight kana */
+    /*  78 */ { L"en-US", NORM_IGNOREKANATYPE, L"\x306a", L"\x30ca", CSTR_EQUAL }, /* japanese case weight kana */
+    /*  79 */ { L"en-US", NORM_IGNOREKANATYPE, L"\x305a", L"\x30ba", CSTR_EQUAL }, /* japanese case weight kana */
+    /*  80 */ { L"en-US", 0, L"\x30bf", L"\xff80", CSTR_GREATER_THAN }, /* japanese case weight width */
+    /*  81 */ { L"en-US", 0, L"\x30ab", L"\xff76", CSTR_GREATER_THAN }, /* japanese case weight width */
+    /*  82 */ { L"en-US", 0, L"\x30a2", L"\xff71", CSTR_GREATER_THAN }, /* japanese case weight width */
+    /*  83 */ { L"en-US", NORM_IGNOREWIDTH, L"\x30bf", L"\xff80", CSTR_EQUAL }, /* japanese case weight width */
+    /*  84 */ { L"en-US", NORM_IGNOREWIDTH, L"\x30ab", L"\xff76", CSTR_EQUAL }, /* japanese case weight width */
+    /*  85 */ { L"en-US", NORM_IGNOREWIDTH, L"\x30a2", L"\xff71", CSTR_EQUAL }, /* japanese case weight width */
+    /*  86 */ { L"en-US", NORM_IGNORENONSPACE, L"\x31a2", L"\x3110", CSTR_EQUAL }, /* NORM_IGNORENONSPACE */
+    /*  87 */ { L"en-US", NORM_IGNORENONSPACE, L"\x1342", L"\x133a", CSTR_EQUAL }, /* NORM_IGNORENONSPACE */
+    /*  88 */ { L"en-US", NORM_IGNORENONSPACE, L"\x16a4", L"\x16a5", CSTR_EQUAL }, /* NORM_IGNORENONSPACE */
 };

 static void test_unicode_sorting(void)
diff --git a/dlls/kernelbase/locale.c b/dlls/kernelbase/locale.c
index 242caf6764..789e90cbf5 100644
--- a/dlls/kernelbase/locale.c
+++ b/dlls/kernelbase/locale.c
@@ -2484,6 +2484,13 @@ typedef struct _weight_special_info
     BYTE weight_primary;
 } weight_special_info;

+typedef struct _weight_extra_info
+{
+    BYTE flag_small;
+    BYTE flag_kana;
+    BYTE flag_width;
+} weight_extra_info;
+
 typedef struct _list
 {
     int extra_len;
@@ -2502,6 +2509,7 @@ typedef struct _sortkey_data
     list weights_diacritic;
     list weights_case;
     list weights_special;
+    list weights_extra;
 } sortkey_data;

 /* List functions */
@@ -2579,6 +2587,7 @@ static void sortkey_data_init(sortkey_data* data, int flags, const WCHAR* locale
     LIST_INIT(data->weights_diacritic, BYTE);
     LIST_INIT(data->weights_case, BYTE);
     LIST_INIT(data->weights_special, BYTE);
+    LIST_INIT(data->weights_extra, weight_extra_info);
 }

 static void sortkey_data_destroy(sortkey_data* data)
@@ -2588,6 +2597,7 @@ static void sortkey_data_destroy(sortkey_data* data)
     LIST_DESTROY(data->weights_diacritic);
     LIST_DESTROY(data->weights_case);
     LIST_DESTROY(data->weights_special);
+    LIST_DESTROY(data->weights_extra);
 }

 static weight_main_info create_weight_main(BYTE script_member, BYTE weight_primary)
@@ -2653,6 +2663,36 @@ static void sortkey_handle_default_character(sortkey_data* data, WCHAR c)
     case_weights_add(data, info.weight_case);
 }

+static void sortkey_handle_japanese_character(sortkey_data* data, weight_main_info* weightmain, const character_info* info, const character_info* info_other)
+{
+    const BYTE BASELINE_EXTRA = 0xc4;
+    const BYTE ISOLATE_KANA = 0x20 | BASELINE_EXTRA; /* if bit is set then hiragana, else katakana */
+    const BYTE ISOLATE_SMALL = 0x2 | BASELINE_EXTRA; /* if bit is set then normal kana, else small kana */
+    const BYTE ISOLATE_WIDTH = 0x1 | BASELINE_EXTRA; /* if bit is set then full width, else half width */
+    int weight_case;
+    weight_extra_info extra;
+
+    weightmain->script_member = 34;
+    weightmain->weight_primary = info_other->weight_primary;
+
+    main_weights_add(data, weightmain);
+
+    weight_case = info_other->weight_case | BASELINE_EXTRA;
+
+    extra.flag_small = (BYTE)(weight_case & ISOLATE_SMALL);
+    extra.flag_kana = (BYTE)(weight_case & ISOLATE_KANA);
+    extra.flag_width = (BYTE)(weight_case & ISOLATE_WIDTH);
+
+    if (data->flags & NORM_IGNOREKANATYPE)
+        extra.flag_kana = BASELINE_EXTRA;
+    if (data->flags & NORM_IGNOREWIDTH)
+        extra.flag_width = BASELINE_EXTRA;
+    LIST_ADD(data->weights_extra, weight_extra_info, extra);
+
+    diacritic_weights_add(data, info, info->weight_diacritic);
+    case_weights_add(data, MIN_WEIGHT);
+}
+
 static BOOL sortkey_handle_character(sortkey_data* data, WCHAR c, const WCHAR* str, int i)
 {
     weight_main_info weightmain;
@@ -2680,7 +2720,16 @@ static BOOL sortkey_handle_character(sortkey_data* data, WCHAR c, const WCHAR* s
         break;

     case JAPANESE:
-        /* TODO */
+        weightmain = create_weight_main(info.script_member, info.weight_primary);
+
+        if (weightmain.weight_primary <= 1)
+        {
+            /* TODO */
+        }
+        else
+        {
+            sortkey_handle_japanese_character(data, &weightmain, &info, &info);
+        }
         break;

     case 4: /* Jamo */
@@ -2782,7 +2831,19 @@ static void sortkey_write_result(sortkey_data* data)
     LIST_ADD(data->key, BYTE, SORTKEY_SEPARATOR);

     /* Extra weights */
-    /* TODO */
+    if (data->weights_extra.len > 0)
+    {
+        if ((NORM_IGNORENONSPACE & flags) == 0)
+        {
+            APPEND_LIST_TO_SORTKEY(data, weights_extra, weight_extra_info, element->flag_small, element->flag_small > 196);
+        }
+        LIST_ADD(data->key, BYTE, 0xff);
+        LIST_ADD(data->key, BYTE, 0x02);
+        APPEND_LIST_TO_SORTKEY(data, weights_extra, weight_extra_info, element->flag_kana, element->flag_kana > 196);
+        LIST_ADD(data->key, BYTE, 0xff);
+        APPEND_LIST_TO_SORTKEY(data, weights_extra, weight_extra_info, element->flag_width, element->flag_width > 196);
+        LIST_ADD(data->key, BYTE, 0xff);
+    }

     LIST_ADD(data->key, BYTE, SORTKEY_SEPARATOR);

--
2.26.0




More information about the wine-devel mailing list