libs/wine: Add better handling of level 4 weights in unicode collation (try3)

André Hentschel nerv at dawncrow.de
Thu Jun 17 12:15:39 CDT 2010


try3:
changed the detection to only apply my changes to standard ascii characters,
which is definitly true and for now enough for 1.2 and not too much.
It still fixes bug 5163. After code freeze i will write more tests and patches:

try2:
I found some existing tests :)
(i removed this old if (0), maybe from a time when there was no todo_wine)

try1:
First, this fixes bug 5163[1] (1.2 listed).
Windows compares two variable weights(marked with an asterisk[3]) with the blanked[2] option.
There are 1849 variable weights where level 4 is the same like the unicode character and 54 which are not[3].
The 54 are left as they were by "and $1 eq $6". So just the others are improved.
So we easily have the value of level 4 for 1849 characters, so we should use this information when comparing two variable weight characters.
Therefor an before unused bit is now set in our collation table to show that this is the case.

[1] http://bugs.winehq.org/show_bug.cgi?id=5163
[2] http://unicode.org/reports/tr10/#Variable_Weighting
[3] http://www.unicode.org/reports/tr10/allkeys.txt

---
 dlls/kernel32/tests/locale.c |    8 +++-----
 libs/wine/collation.c        |   16 ++++++++--------
 libs/wine/cpmap.pl           |   10 +++++++---
 libs/wine/sortkey.c          |   11 +++++++++++
 4 files changed, 29 insertions(+), 16 deletions(-)

diff --git a/dlls/kernel32/tests/locale.c b/dlls/kernel32/tests/locale.c
index f37d503..b44a657 100644
--- a/dlls/kernel32/tests/locale.c
+++ b/dlls/kernel32/tests/locale.c
@@ -1282,7 +1282,6 @@ static void test_CompareStringA(void)
     ret = CompareStringA(LOCALE_SYSTEM_DEFAULT, SORT_STRINGSORT, "/m", -1, "'o", -1 );
     ok(ret == 3, "/m vs 'o expected 3, got %d\n", ret);
 
-    if (0) { /* this requires collation table patch to make it MS compatible */
     ret = CompareStringA(LOCALE_SYSTEM_DEFAULT, 0, "'o", -1, "-o", -1 );
     ok(ret == 1, "'o vs -o expected 1, got %d\n", ret);
 
@@ -1318,7 +1317,6 @@ static void test_CompareStringA(void)
 
     ret = CompareStringA(LOCALE_SYSTEM_DEFAULT, SORT_STRINGSORT, "-m", -1, "`o", -1 );
     ok(ret == 1, "-m vs `o expected 1, got %d\n", ret);
-    }
 
     ret = CompareStringA(LOCALE_USER_DEFAULT, 0, "aLuZkUtZ", 8, "aLuZkUtZ", 9);
     ok(ret == 2, "aLuZkUtZ vs aLuZkUtZ\\0 expected 2, got %d\n", ret);
@@ -1345,13 +1343,13 @@ static void test_CompareStringA(void)
     todo_wine ok(ret != 2, "\\2 vs \\1 expected unequal\n");
 
     ret = CompareStringA(lcid, NORM_IGNORECASE | LOCALE_USE_CP_ACP, "#", -1, ".", -1);
-    todo_wine ok(ret == CSTR_LESS_THAN, "\"#\" vs \".\" expected CSTR_LESS_THAN, got %d\n", ret);
+    ok(ret == CSTR_LESS_THAN, "\"#\" vs \".\" expected CSTR_LESS_THAN, got %d\n", ret);
 
     ret = CompareStringA(lcid, NORM_IGNORECASE, "_", -1, ".", -1);
-    todo_wine ok(ret == CSTR_GREATER_THAN, "\"_\" vs \".\" expected CSTR_GREATER_THAN, got %d\n", ret);
+    ok(ret == CSTR_GREATER_THAN, "\"_\" vs \".\" expected CSTR_GREATER_THAN, got %d\n", ret);
 
     ret = lstrcmpi("#", ".");
-    todo_wine ok(ret == -1, "\"#\" vs \".\" expected -1, got %d\n", ret);
+    ok(ret == -1, "\"#\" vs \".\" expected -1, got %d\n", ret);
 }
 
 static void test_LCMapStringA(void)
diff --git a/libs/wine/collation.c b/libs/wine/collation.c
index 465d740..432b527 100644
--- a/libs/wine/collation.c
+++ b/libs/wine/collation.c
@@ -72,21 +72,21 @@ const unsigned int collation_table[12800] =
     0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
     /* 0x0000 .. 0x00ff */
     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-    0x00000000, 0x02010111, 0x02020111, 0x02030111, 0x02040111, 0x02050111, 0x00000000, 0x00000000,
+    0x00000000, 0x02010113, 0x02020113, 0x02030113, 0x02040113, 0x02050113, 0x00000000, 0x00000000,
     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-    0x02090111, 0x024b0111, 0x02700111, 0x02a90111, 0x09e00111, 0x02aa0111, 0x02a70111, 0x02690111,
-    0x027a0111, 0x027b0111, 0x02a20111, 0x039f0111, 0x022d0111, 0x02210111, 0x02550111, 0x02a40111,
+    0x02090113, 0x024b0113, 0x02700113, 0x02a90113, 0x09e00111, 0x02aa0113, 0x02a70113, 0x02690113,
+    0x027a0113, 0x027b0113, 0x02a20113, 0x039f0113, 0x022d0113, 0x02210113, 0x02550113, 0x02a40113,
     0x0a0b0111, 0x0a0c0111, 0x0a0d0111, 0x0a0e0111, 0x0a0f0111, 0x0a100111, 0x0a110111, 0x0a120111,
-    0x0a130111, 0x0a140111, 0x02370111, 0x02350111, 0x03a30111, 0x03a40111, 0x03a50111, 0x024e0111,
-    0x02a10111, 0x0a150151, 0x0a290141, 0x0a3d0151, 0x0a490151, 0x0a650151, 0x0a910151, 0x0a990151,
+    0x0a130111, 0x0a140111, 0x02370113, 0x02350113, 0x03a30113, 0x03a40113, 0x03a50113, 0x024e0113,
+    0x02a10113, 0x0a150151, 0x0a290141, 0x0a3d0151, 0x0a490151, 0x0a650151, 0x0a910151, 0x0a990151,
     0x0ab90151, 0x0ad30161, 0x0ae70141, 0x0af70141, 0x0b030161, 0x0b2b0151, 0x0b330151, 0x0b4b0161,
     0x0b670141, 0x0b730141, 0x0b7f0141, 0x0ba70151, 0x0bbf0151, 0x0bd70141, 0x0bef0151, 0x0bfb0141,
-    0x0c030151, 0x0c070141, 0x0c130141, 0x027c0111, 0x02a60111, 0x027d0111, 0x020f0111, 0x021b0111,
-    0x020c0111, 0x0a150111, 0x0a290111, 0x0a3d0111, 0x0a490111, 0x0a650111, 0x0a910111, 0x0a990111,
+    0x0c030151, 0x0c070141, 0x0c130141, 0x027c0113, 0x02a60113, 0x027d0113, 0x020f0113, 0x021b0113,
+    0x020c0113, 0x0a150111, 0x0a290111, 0x0a3d0111, 0x0a490111, 0x0a650111, 0x0a910111, 0x0a990111,
     0x0ab90111, 0x0ad30111, 0x0ae70111, 0x0af70111, 0x0b030111, 0x0b2b0111, 0x0b330111, 0x0b4b0111,
     0x0b670111, 0x0b730111, 0x0b7f0111, 0x0ba70111, 0x0bbf0111, 0x0bd70111, 0x0bef0111, 0x0bfb0111,
-    0x0c030111, 0x0c070111, 0x0c130111, 0x027e0111, 0x03a70111, 0x027f0111, 0x03aa0111, 0x00000000,
+    0x0c030111, 0x0c070111, 0x0c130111, 0x027e0113, 0x03a70113, 0x027f0113, 0x03aa0113, 0x00000000,
     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x02060111, 0x00000000, 0x00000000,
     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
diff --git a/libs/wine/cpmap.pl b/libs/wine/cpmap.pl
index 89773e8..e1a6be7 100755
--- a/libs/wine/cpmap.pl
+++ b/libs/wine/cpmap.pl
@@ -550,7 +550,7 @@ sub READ_JIS0208_FILE($)
 sub READ_SORTKEYS_FILE()
 {
     my @sortkeys = ();
-    for (my $i = 0; $i < 65536; $i++) { $sortkeys[$i] = [ -1, 0, 0, 0, 0 ] };
+    for (my $i = 0; $i < 65536; $i++) { $sortkeys[$i] = [ -1, 0, 0, 0, 0, 0 ] };
 
     my $INPUT = open_data_file $SORTKEYS;
     while (<$INPUT>)
@@ -563,7 +563,11 @@ sub READ_SORTKEYS_FILE()
         {
             my ($uni,$variable) = (hex $1, $2);
             next if $uni > 65535;
-            $sortkeys[$uni] = [ $uni, hex $3, hex $4, hex $5, hex $6 ];
+            if ($variable eq "*" and $1 eq $6 and (hex $1 < 128))
+                {$variable = 1;}
+            else
+                {$variable = 0;}
+            $sortkeys[$uni] = [ $uni, hex $3, hex $4, hex $5, hex $6, $variable ];
             next;
         }
         if (/^([0-9a-fA-F]+\s+)+;\s+\[[*.]([0-9a-fA-F]{4})\.([0-9a-fA-F]{4})\.([0-9a-fA-F]{4})\.([0-9a-fA-F]+)\]/)
@@ -629,7 +633,7 @@ sub READ_SORTKEYS_FILE()
         if ($current[3]) { $current[3] = $n3; }
         if ($current[4]) { $current[4] = 1; }
 
-        $flatkeys[$current[0]] = ($current[1] << 16) | ($current[2] << 8) | ($current[3] << 4) | $current[4];
+        $flatkeys[$current[0]] = ($current[1] << 16) | ($current[2] << 8) | ($current[3] << 4) | ($current[5] << 1) | $current[4];
     }
     return @flatkeys;
 }
diff --git a/libs/wine/sortkey.c b/libs/wine/sortkey.c
index 17b5537..6e4bb79 100644
--- a/libs/wine/sortkey.c
+++ b/libs/wine/sortkey.c
@@ -164,6 +164,9 @@ static inline int compare_unicode_weights(int flags, const WCHAR *str1, int len1
     /* 32-bit collation element table format:
      * unicode weight - high 16 bit, diacritic weight - high 8 bit of low 16 bit,
      * case weight - high 4 bit of low 8 bit.
+     * bit 2 and 3 are not used.
+     * if it is a variable weight then bit 1 is set.
+     * if it has a level 4 weight then bit 0 is set.
      */
     while (len1 > 0 && len2 > 0)
     {
@@ -211,6 +214,8 @@ static inline int compare_unicode_weights(int flags, const WCHAR *str1, int len1
         ce1 = collation_table[collation_table[*str1 >> 8] + (*str1 & 0xff)];
         ce2 = collation_table[collation_table[*str2 >> 8] + (*str2 & 0xff)];
 
+        if (ce1 & 2 && ce2 & 2) return *str1 - *str2;
+
         if (ce1 != (unsigned int)-1 && ce2 != (unsigned int)-1)
             ret = (ce1 >> 16) - (ce2 >> 16);
         else
@@ -235,6 +240,9 @@ static inline int compare_diacritic_weights(int flags, const WCHAR *str1, int le
     /* 32-bit collation element table format:
      * unicode weight - high 16 bit, diacritic weight - high 8 bit of low 16 bit,
      * case weight - high 4 bit of low 8 bit.
+     * bit 2 and 3 are not used.
+     * if it is a variable weight then bit 1 is set.
+     * if it has a level 4 weight then bit 0 is set.
      */
     while (len1 > 0 && len2 > 0)
     {
@@ -284,6 +292,9 @@ static inline int compare_case_weights(int flags, const WCHAR *str1, int len1,
     /* 32-bit collation element table format:
      * unicode weight - high 16 bit, diacritic weight - high 8 bit of low 16 bit,
      * case weight - high 4 bit of low 8 bit.
+     * bit 2 and 3 are not used.
+     * if it is a variable weight then bit 1 is set.
+     * if it has a level 4 weight then bit 0 is set.
      */
     while (len1 > 0 && len2 > 0)
     {
-- 

Best Regards, André Hentschel



More information about the wine-patches mailing list