[PATCH 2/6] kernel32: Initial support for canonical decomposition in NormalizeString().

Huw Davies huw at codeweavers.com
Fri Feb 8 04:41:23 CST 2019


Signed-off-by: Huw Davies <huw at codeweavers.com>
---
 dlls/kernel32/locale.c       | 56 ++++++++++++++++++++++++++++++++----
 dlls/kernel32/tests/locale.c | 42 +++++++++++++--------------
 include/wine/unicode.h       |  4 +++
 libs/port/Makefile.in        |  1 +
 libs/port/decompose.c        |  4 +--
 libs/port/mbtowc.c           | 10 +++----
 libs/port/normalize.c        | 38 ++++++++++++++++++++++++
 libs/wine/sortkey.c          |  2 +-
 tools/make_unicode           |  4 +--
 9 files changed, 124 insertions(+), 37 deletions(-)
 create mode 100644 libs/port/normalize.c

diff --git a/dlls/kernel32/locale.c b/dlls/kernel32/locale.c
index e62b849d09..a4e5ef5a38 100644
--- a/dlls/kernel32/locale.c
+++ b/dlls/kernel32/locale.c
@@ -48,6 +48,7 @@
 #include "winerror.h"
 #include "winver.h"
 #include "kernel_private.h"
+#include "wine/heap.h"
 #include "wine/debug.h"
 
 WINE_DEFAULT_DEBUG_CHANNEL(nls);
@@ -5361,12 +5362,57 @@ INT WINAPI GetUserDefaultLocaleName(LPWSTR localename, int buffersize)
 /******************************************************************************
  *           NormalizeString (KERNEL32.@)
  */
-INT WINAPI NormalizeString(NORM_FORM NormForm, LPCWSTR lpSrcString, INT cwSrcLength,
-                           LPWSTR lpDstString, INT cwDstLength)
+INT WINAPI NormalizeString(NORM_FORM form, const WCHAR *src, INT src_len, WCHAR *dst, INT dst_len)
 {
-    FIXME("%x %p %d %p %d\n", NormForm, lpSrcString, cwSrcLength, lpDstString, cwDstLength);
-    SetLastError(ERROR_CALL_NOT_IMPLEMENTED);
-    return 0;
+    int flags = 0, compose = 0;
+    unsigned int res, buf_len;
+    WCHAR *buf = NULL;
+
+    TRACE("%x %s %d %p %d\n", form, debugstr_wn(src, src_len), src_len, dst, dst_len);
+
+    if (src_len == -1) src_len = strlenW(src) + 1;
+
+    if (form == NormalizationKC || form == NormalizationKD) flags |= WINE_DECOMPOSE_COMPAT;
+    if (form == NormalizationC || form == NormalizationKC) compose = 1;
+
+    if (!compose && dst_len)
+    {
+        res = wine_decompose_string( flags, src, src_len, dst, dst_len );
+        if (!res)
+        {
+            SetLastError( ERROR_INSUFFICIENT_BUFFER );
+            goto done;
+        }
+        buf = dst;
+    }
+    else
+    {
+        buf_len = src_len * 4;
+        do
+        {
+            WCHAR *old_buf = buf;
+
+            buf = heap_realloc( buf, buf_len );
+            if (!buf)
+            {
+                heap_free( old_buf );
+                SetLastError( ERROR_OUTOFMEMORY );
+                return 0;
+            }
+            res = wine_decompose_string( flags, src, src_len, buf, buf_len );
+            buf_len *= 2;
+        } while (!res);
+    }
+
+    if (compose)
+    {
+        FIXME("Composing not yet implemented\n");
+        res = 0;
+    }
+
+done:
+    if (buf != dst) heap_free( buf );
+    return res;
 }
 
 /******************************************************************************
diff --git a/dlls/kernel32/tests/locale.c b/dlls/kernel32/tests/locale.c
index d93fdb224e..0fa6aeb7b4 100644
--- a/dlls/kernel32/tests/locale.c
+++ b/dlls/kernel32/tests/locale.c
@@ -5692,27 +5692,27 @@ static void test_NormalizeString(void)
     };
     static const struct test_data_normal test_arr[] =
     {
-        { part0_str1, { part0_str1, part0_nfd1, part0_str1, part0_nfd1 }, { 1, 1, 1, 1 } },
-        { part0_str2, { part0_nfc2, part0_str2, part0_nfc2, part0_str2 }, { 1, 1, 1, 1 } },
-        { part0_str3, { part0_nfc3, part0_str3, part0_nfc3, part0_str3 }, { 1, 1, 1, 1 } },
-        { part0_str4, { part0_nfc4, part0_str4, part0_nfc4, part0_str4 }, { 1, 1, 1, 1 } },
+        { part0_str1, { part0_str1, part0_nfd1, part0_str1, part0_nfd1 }, { 1, 0, 1, 0 } },
+        { part0_str2, { part0_nfc2, part0_str2, part0_nfc2, part0_str2 }, { 1, 0, 1, 0 } },
+        { part0_str3, { part0_nfc3, part0_str3, part0_nfc3, part0_str3 }, { 1, 0, 1, 0 } },
+        { part0_str4, { part0_nfc4, part0_str4, part0_nfc4, part0_str4 }, { 1, 0, 1, 0 } },
         { part0_str5, { part0_nfc5, part0_nfc5, part0_nfc5, part0_nfc5 }, { 1, 1, 1, 1 } },
         { part0_str6, { part0_nfc6, part0_nfc6, part0_nfc6, part0_nfc6 }, { 1, 1, 1, 1 } },
-        { part0_str8, { part0_str8, part0_nfd8, part0_str8, part0_nfd8 }, { 1, 1, 1, 1 } },
+        { part0_str8, { part0_str8, part0_nfd8, part0_str8, part0_nfd8 }, { 1, 0, 1, 0 } },
         { part0_str9, { part0_nfc9, part0_nfd9, part0_nfc9, part0_nfd9 }, { 1, 1, 1, 1 } },
-        { part0_str10, { part0_str10, part0_nfd10, part0_str10, part0_nfd10 }, { 1, 1, 1, 1 } },
-        { part0_str11, { part0_str11, part0_nfd11, part0_str11, part0_nfd11 }, { 1, 1, 1, 1 } },
-        { part0_str12, { part0_nfc12, part0_nfd12, part0_nfc12, part0_nfd12 }, { 1, 1, 1, 1 } },
-        { part1_str1, { part1_str1, part1_str1, part1_nfkc1, part1_nfkc1 }, { 1, 1, 1, 1 } },
-        { part1_str2, { part1_str2, part1_str2, part1_nfkc2, part1_nfkc2 }, { 1, 1, 1, 1 } },
-        { part1_str3, { part1_str3, part1_nfd3, part1_str3, part1_nfd3 }, { 1, 1, 1, 1 } },
-        { part1_str4, { part1_str4, part1_str4, part1_nfkc4, part1_nfkc4 }, { 1, 1, 1, 1 } },
-        { part1_str5, { part1_str5, part1_nfd5, part1_str5, part1_nfd5 }, { 1, 1, 1, 1 } },
-        { part1_str6, { part1_str6, part1_nfd6, part1_str6, part1_nfd6 }, { 1, 1, 1, 1 } },
-        { part1_str7, { part1_str7, part1_str7, part1_nfkc7, part1_nfkc7 }, { 1, 1, 1, 1 } },
-        { part1_str8, { part1_str8, part1_nfd8, part1_str8, part1_nfd8 }, { 1, 1, 1, 1 } },
-        { part1_str9, { part1_str9, part1_str9, part1_nfkc9, part1_nfkc9 }, { 1, 1, 1, 1 } },
-        { part1_str10, { part1_str10, part1_str10, part1_nfkc10, part1_nfkc10 }, { 1, 1, 1, 1 } },
+        { part0_str10, { part0_str10, part0_nfd10, part0_str10, part0_nfd10 }, { 1, 0, 1, 0 } },
+        { part0_str11, { part0_str11, part0_nfd11, part0_str11, part0_nfd11 }, { 1, 0, 1, 0 } },
+        { part0_str12, { part0_nfc12, part0_nfd12, part0_nfc12, part0_nfd12 }, { 1, 0, 1, 0 } },
+        { part1_str1, { part1_str1, part1_str1, part1_nfkc1, part1_nfkc1 }, { 1, 0, 1, 1 } },
+        { part1_str2, { part1_str2, part1_str2, part1_nfkc2, part1_nfkc2 }, { 1, 0, 1, 1 } },
+        { part1_str3, { part1_str3, part1_nfd3, part1_str3, part1_nfd3 }, { 1, 0, 1, 0 } },
+        { part1_str4, { part1_str4, part1_str4, part1_nfkc4, part1_nfkc4 }, { 1, 0, 1, 1 } },
+        { part1_str5, { part1_str5, part1_nfd5, part1_str5, part1_nfd5 }, { 1, 0, 1, 0 } },
+        { part1_str6, { part1_str6, part1_nfd6, part1_str6, part1_nfd6 }, { 1, 0, 1, 0 } },
+        { part1_str7, { part1_str7, part1_str7, part1_nfkc7, part1_nfkc7 }, { 1, 0, 1, 1 } },
+        { part1_str8, { part1_str8, part1_nfd8, part1_str8, part1_nfd8 }, { 1, 0, 1, 0 } },
+        { part1_str9, { part1_str9, part1_str9, part1_nfkc9, part1_nfkc9 }, { 1, 0, 1, 1 } },
+        { part1_str10, { part1_str10, part1_str10, part1_nfkc10, part1_nfkc10 }, { 1, 0, 1, 1 } },
         { 0 }
     };
     const struct test_data_normal *ptest = test_arr;
@@ -5726,10 +5726,8 @@ static void test_NormalizeString(void)
         return;
     }
 
-    todo_wine {
-        dstlen = pNormalizeString( NormalizationD, ptest->str, -1, dst, 1 );
-        ok(GetLastError() == ERROR_INSUFFICIENT_BUFFER, "Should have failed with ERROR_INSUFFICIENT_BUFFER\n");
-    }
+    dstlen = pNormalizeString( NormalizationD, ptest->str, -1, dst, 1 );
+    ok(GetLastError() == ERROR_INSUFFICIENT_BUFFER, "Should have failed with ERROR_INSUFFICIENT_BUFFER\n");
 
     /*
      * For each string, first test passing -1 as srclen to NormalizeString,
diff --git a/include/wine/unicode.h b/include/wine/unicode.h
index 35c61666d2..2c21cdfe48 100644
--- a/include/wine/unicode.h
+++ b/include/wine/unicode.h
@@ -101,6 +101,10 @@ extern int wine_compare_string( int flags, const WCHAR *str1, int len1, const WC
 extern int wine_get_sortkey( int flags, const WCHAR *src, int srclen, char *dst, int dstlen );
 extern int wine_fold_string( int flags, const WCHAR *src, int srclen , WCHAR *dst, int dstlen );
 
+extern unsigned int wine_decompose_string( int flags, const WCHAR *src, unsigned int srclen, WCHAR *dst, unsigned int dstlen );
+#define WINE_DECOMPOSE_COMPAT     1
+#define WINE_DECOMPOSE_REORDER    2
+
 extern int strcmpiW( const WCHAR *str1, const WCHAR *str2 );
 extern int strncmpiW( const WCHAR *str1, const WCHAR *str2, int n );
 extern int memicmpW( const WCHAR *str1, const WCHAR *str2, int n );
diff --git a/libs/port/Makefile.in b/libs/port/Makefile.in
index c87b99de6d..1c753fd893 100644
--- a/libs/port/Makefile.in
+++ b/libs/port/Makefile.in
@@ -92,6 +92,7 @@ C_SRCS = \
 	memcpy_unaligned.c \
 	memmove.c \
 	mkstemps.c \
+	normalize.c \
 	poll.c \
 	pread.c \
 	pwrite.c \
diff --git a/libs/port/decompose.c b/libs/port/decompose.c
index 5e6b2214ec..c7568a01ea 100644
--- a/libs/port/decompose.c
+++ b/libs/port/decompose.c
@@ -747,7 +747,7 @@ static const WCHAR table[4704] =
     0x05d1, 0x05bf, 0x05db, 0x05bf, 0x05e4, 0x05bf, 0x0000, 0x0000
 };
 
-unsigned int DECLSPEC_HIDDEN wine_decompose( WCHAR ch, WCHAR *dst, unsigned int dstlen )
+unsigned int DECLSPEC_HIDDEN wine_decompose( int flags, WCHAR ch, WCHAR *dst, unsigned int dstlen )
 {
     const WCHAR *ptr = table + table[table[ch >> 8] + ((ch >> 4) & 0x0f)] + 2 * (ch & 0xf);
     unsigned int res;
@@ -756,6 +756,6 @@ unsigned int DECLSPEC_HIDDEN wine_decompose( WCHAR ch, WCHAR *dst, unsigned int
     if (!*ptr) return 1;
     if (dstlen <= 1) return 0;
     /* apply the decomposition recursively to the first char */
-    if ((res = wine_decompose( *ptr, dst, dstlen-1 ))) dst[res++] = ptr[1];
+    if ((res = wine_decompose( flags, *ptr, dst, dstlen-1 ))) dst[res++] = ptr[1];
     return res;
 }
diff --git a/libs/port/mbtowc.c b/libs/port/mbtowc.c
index f5d0059e15..471b87be6c 100644
--- a/libs/port/mbtowc.c
+++ b/libs/port/mbtowc.c
@@ -22,7 +22,7 @@
 
 #include "wine/unicode.h"
 
-extern unsigned int wine_decompose( WCHAR ch, WCHAR *dst, unsigned int dstlen ) DECLSPEC_HIDDEN;
+extern unsigned int wine_decompose( int flags, WCHAR ch, WCHAR *dst, unsigned int dstlen ) DECLSPEC_HIDDEN;
 
 /* check the code whether it is in Unicode Private Use Area (PUA). */
 /* MB_ERR_INVALID_CHARS raises an error converting from 1-byte character to PUA. */
@@ -125,13 +125,13 @@ static int mbstowcs_sbcs_decompose( const struct sbcs_table *table, int flags,
     {
         WCHAR dummy[4]; /* no decomposition is larger than 4 chars */
         for (len = 0; srclen; srclen--, src++)
-            len += wine_decompose( cp2uni[*src], dummy, 4 );
+            len += wine_decompose( 0, cp2uni[*src], dummy, 4 );
         return len;
     }
 
     for (len = dstlen; srclen && len; srclen--, src++)
     {
-        unsigned int res = wine_decompose( cp2uni[*src], dst, len );
+        unsigned int res = wine_decompose( 0, cp2uni[*src], dst, len );
         if (!res) break;
         len -= res;
         dst += res;
@@ -237,7 +237,7 @@ static int mbstowcs_dbcs_decompose( const struct dbcs_table *table,
                 ch = cp2uni[(off << 8) + *src];
             }
             else ch = cp2uni[*src];
-            len += wine_decompose( ch, dummy, 4 );
+            len += wine_decompose( 0, ch, dummy, 4 );
         }
         return len;
     }
@@ -252,7 +252,7 @@ static int mbstowcs_dbcs_decompose( const struct dbcs_table *table,
             ch = cp2uni[(off << 8) + *src];
         }
         else ch = cp2uni[*src];
-        if (!(res = wine_decompose( ch, dst, len ))) break;
+        if (!(res = wine_decompose( 0, ch, dst, len ))) break;
         dst += res;
         len -= res;
     }
diff --git a/libs/port/normalize.c b/libs/port/normalize.c
new file mode 100644
index 0000000000..ba6e39fe0e
--- /dev/null
+++ b/libs/port/normalize.c
@@ -0,0 +1,38 @@
+/*
+ * Unicode normalization functions
+ *
+ * Copyright 2019 Huw Davies
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+
+#include "wine/unicode.h"
+
+extern unsigned int wine_decompose( int flags, WCHAR ch, WCHAR *dst, unsigned int dstlen ) DECLSPEC_HIDDEN;
+
+unsigned int wine_decompose_string( int flags, const WCHAR *src, unsigned int src_len,
+                                    WCHAR *dst, unsigned int dst_len )
+{
+    unsigned int src_pos, dst_pos = 0, decomp_len;
+
+    for (src_pos = 0; src_pos < src_len; src_pos++)
+    {
+        if (dst_pos == dst_len) return 0;
+        decomp_len = wine_decompose( flags, src[src_pos], dst + dst_pos, dst_len - dst_pos );
+        if (decomp_len == 0) return 0;
+        dst_pos += decomp_len;
+    }
+    return dst_pos;
+}
diff --git a/libs/wine/sortkey.c b/libs/wine/sortkey.c
index 634e910d4c..ef4488908b 100644
--- a/libs/wine/sortkey.c
+++ b/libs/wine/sortkey.c
@@ -19,7 +19,7 @@
  */
 #include "wine/unicode.h"
 
-extern unsigned int wine_decompose( WCHAR ch, WCHAR *dst, unsigned int dstlen );
+extern unsigned int wine_decompose( int flags, WCHAR ch, WCHAR *dst, unsigned int dstlen );
 extern const unsigned int collation_table[];
 
 /*
diff --git a/tools/make_unicode b/tools/make_unicode
index 56d1905656..1533c46cea 100755
--- a/tools/make_unicode
+++ b/tools/make_unicode
@@ -2353,7 +2353,7 @@ sub dump_decompose_table($)
 
     printf OUTPUT "\n};\n\n";
     print OUTPUT <<"EOF";
-unsigned int DECLSPEC_HIDDEN wine_decompose( WCHAR ch, WCHAR *dst, unsigned int dstlen )
+unsigned int DECLSPEC_HIDDEN wine_decompose( int flags, WCHAR ch, WCHAR *dst, unsigned int dstlen )
 {
     const WCHAR *ptr = table + table[table[ch >> 8] + ((ch >> 4) & 0x0f)] + 2 * (ch & 0xf);
     unsigned int res;
@@ -2362,7 +2362,7 @@ unsigned int DECLSPEC_HIDDEN wine_decompose( WCHAR ch, WCHAR *dst, unsigned int
     if (!*ptr) return 1;
     if (dstlen <= 1) return 0;
     /* apply the decomposition recursively to the first char */
-    if ((res = wine_decompose( *ptr, dst, dstlen-1 ))) dst[res++] = ptr[1];
+    if ((res = wine_decompose( flags, *ptr, dst, dstlen-1 ))) dst[res++] = ptr[1];
     return res;
 }
 EOF
-- 
2.18.0




More information about the wine-devel mailing list