[PATCH 3/3] kernel32: Implement NormalizeString API function.

Sergio Gómez Del Real sdelreal at codeweavers.com
Sat Mar 24 18:25:13 CDT 2018


Signed-off-by: Sergio Gómez Del Real <sdelreal at codeweavers.com>
---
 dlls/kernel32/locale.c                    |  56 ++++++++-
 dlls/kernel32/tests/locale.c              |  84 +++++++++++++
 dlls/kernel32/tests/normalization_tests.h | 190 ++++++++++++++++++++++++++++++
 3 files changed, 325 insertions(+), 5 deletions(-)
 create mode 100644 dlls/kernel32/tests/normalization_tests.h

diff --git a/dlls/kernel32/locale.c b/dlls/kernel32/locale.c
index c5eeabfbbe..234a707adb 100644
--- a/dlls/kernel32/locale.c
+++ b/dlls/kernel32/locale.c
@@ -5353,15 +5353,61 @@ INT WINAPI GetUserDefaultLocaleName(LPWSTR localename, int buffersize)
     return LCIDToLocaleName(userlcid, localename, buffersize, 0);
 }
 
+extern void unicode_canon_order( WCHAR *str, int strlen );
 /******************************************************************************
  *           NormalizeString (KERNEL32.@)
  */
-INT WINAPI NormalizeString(NORM_FORM NormForm, LPCWSTR lpSrcString, INT cwSrcLength,
-                           LPWSTR lpDstString, INT cwDstLength)
+INT WINAPI NormalizeString(NORM_FORM norm, LPCWSTR src, INT srclen,
+                           LPWSTR dst, INT dstlen)
 {
-    FIXME("%x %p %d %p %d\n", NormForm, lpSrcString, cwSrcLength, lpDstString, cwDstLength);
-    SetLastError(ERROR_CALL_NOT_IMPLEMENTED);
-    return 0;
+    WCHAR *decomp = NULL;
+    INT compat = 0;
+    INT ret = 0;
+    INT slen = srclen;
+    UINT needed_len;
+    BOOL null_term = FALSE;
+
+    if (norm == NormalizationKC || norm == NormalizationKD) compat++;
+
+    if (slen == -1)
+    {
+        slen = strlenW( src ) + 1;
+        null_term = TRUE;
+    }
+    else if (src[slen] == 0) null_term = TRUE;
+
+    needed_len = wine_unicode_decompose_string( compat, src, slen, NULL, 0 );
+    if (norm == NormalizationC || norm == NormalizationKC)
+    {
+        decomp = HeapAlloc( GetProcessHeap(), 0, needed_len*sizeof(WCHAR)+1 );
+        wine_unicode_decompose_string( compat, src, slen, decomp, needed_len );
+        unicode_canon_order( decomp, needed_len );
+        needed_len = unicode_canonical_composition( decomp, needed_len );
+    }
+    if (dstlen < needed_len && dstlen > 0)
+    {
+        if (decomp) HeapFree(GetProcessHeap(), 0, decomp);
+        SetLastError(ERROR_INSUFFICIENT_BUFFER);
+        return -1;
+    }
+    else if (dstlen <= 0)
+    {
+        if (decomp) HeapFree(GetProcessHeap(), 0, decomp);
+        return needed_len;
+    }
+
+    if (norm == NormalizationC || norm == NormalizationKC)
+    {
+        lstrcpynW( dst, decomp, needed_len );
+        HeapFree(GetProcessHeap(), 0, decomp);
+        return needed_len;
+    }
+    else
+    {
+        int decomp_len = wine_unicode_decompose_string( compat, src, slen, dst, needed_len );
+        unicode_canon_order( dst, needed_len );
+        return decomp_len;
+    }
 }
 
 /******************************************************************************
diff --git a/dlls/kernel32/tests/locale.c b/dlls/kernel32/tests/locale.c
index efb42319fe..425bbbcb95 100644
--- a/dlls/kernel32/tests/locale.c
+++ b/dlls/kernel32/tests/locale.c
@@ -36,6 +36,8 @@
 #include "winerror.h"
 #include "winnls.h"
 
+#include "normalization_tests.h"
+
 static const WCHAR upper_case[] = {'\t','J','U','S','T','!',' ','A',',',' ','T','E','S','T',';',' ','S','T','R','I','N','G',' ','1','/','*','+','-','.','\r','\n',0};
 static const WCHAR lower_case[] = {'\t','j','u','s','t','!',' ','a',',',' ','t','e','s','t',';',' ','s','t','r','i','n','g',' ','1','/','*','+','-','.','\r','\n',0};
 static const WCHAR title_case[] = {'\t','J','u','s','t','!',' ','A',',',' ','T','e','s','t',';',' ','S','t','r','i','n','g',' ','1','/','*','+','-','.','\r','\n',0};
@@ -104,6 +106,7 @@ static BOOL (WINAPI *pGetUserPreferredUILanguages)(DWORD, ULONG*, WCHAR*, ULONG*
 static WCHAR (WINAPI *pRtlUpcaseUnicodeChar)(WCHAR);
 static INT (WINAPI *pGetNumberFormatEx)(LPCWSTR, DWORD, LPCWSTR, const NUMBERFMTW *, LPWSTR, int);
 static INT (WINAPI *pFindNLSStringEx)(LPCWSTR, DWORD, LPCWSTR, INT, LPCWSTR, INT, LPINT, LPNLSVERSIONINFO, LPVOID, LPARAM);
+static INT (WINAPI *pNormalizeString)(NORM_FORM, LPCWSTR, INT, LPWSTR, INT);
 
 static void InitFunctionPointers(void)
 {
@@ -137,6 +140,7 @@ static void InitFunctionPointers(void)
   X(GetUserPreferredUILanguages);
   X(GetNumberFormatEx);
   X(FindNLSStringEx);
+  X(NormalizeString);
 
   mod = GetModuleHandleA("ntdll");
   X(RtlUpcaseUnicodeChar);
@@ -5443,6 +5447,85 @@ static void test_FindNLSStringEx(void)
     }
 }
 
+static void test_NormalizeString(void)
+{
+    struct test_data_normal test_arr[] =
+    {
+        { part0_str1, part0_nfc1, part0_nfd1, part0_nfkc1, part0_nfkd1 },
+        { part0_str2, part0_nfc2, part0_nfd2, part0_nfkc2, part0_nfkd2 },
+        { part0_str3, part0_nfc3, part0_nfd3, part0_nfkc3, part0_nfkd3 },
+        { part0_str4, part0_nfc4, part0_nfd4, part0_nfkc4, part0_nfkd4 },
+        { part0_str5, part0_nfc5, part0_nfd5, part0_nfkc5, part0_nfkd5 },
+        { part0_str6, part0_nfc6, part0_nfd6, part0_nfkc6, part0_nfkd6 },
+        { part0_str8, part0_nfc8, part0_nfd8, part0_nfkc8, part0_nfkd8 },
+        { part0_str9, part0_nfc9, part0_nfd9, part0_nfkc9, part0_nfkd9 },
+        { part0_str10, part0_nfc10, part0_nfd10, part0_nfkc10, part0_nfkd10 },
+        { part0_str11, part0_nfc11, part0_nfd11, part0_nfkc11, part0_nfkd11 },
+        { part0_str12, part0_nfc12, part0_nfd12, part0_nfkc12, part0_nfkd12 },
+        { part1_str1, part1_nfc1, part1_nfd1, part1_nfkc1, part1_nfkd1 },
+        { part1_str2, part1_nfc2, part1_nfd2, part1_nfkc2, part1_nfkd2 },
+        { part1_str3, part1_nfc3, part1_nfd3, part1_nfkc3, part1_nfkd3 },
+        { part1_str4, part1_nfc4, part1_nfd4, part1_nfkc4, part1_nfkd4 },
+        { part1_str5, part1_nfc5, part1_nfd5, part1_nfkc5, part1_nfkd5 },
+        { part1_str6, part1_nfc6, part1_nfd6, part1_nfkc6, part1_nfkd6 },
+        { part1_str7, part1_nfc7, part1_nfd7, part1_nfkc7, part1_nfkd7 },
+        { part1_str8, part1_nfc8, part1_nfd8, part1_nfkc8, part1_nfkd8 },
+        { part1_str9, part1_nfc9, part1_nfd9, part1_nfkc9, part1_nfkd9 },
+        { part1_str10, part1_nfc10, part1_nfd10, part1_nfkc10, part1_nfkd10 },
+        { part1_str11, part1_nfc11, part1_nfd11, part1_nfkc11, part1_nfkd11 },
+        { 0 }
+    };
+
+    struct test_data_normal *ptest = test_arr;
+
+    if (!pFindNLSStringEx)
+    {
+        win_skip("NormalizeString is not available.\n");
+        return;
+    }
+
+    while (ptest->str != 0)
+    {
+        WCHAR *dst;
+        int str_cmp;
+        int dstlen;
+
+        dstlen = pNormalizeString( NormalizationD, ptest->str, -1, NULL, 0 );
+        dst = HeapAlloc(GetProcessHeap(), 0, dstlen * sizeof(WCHAR) + 1);
+        dstlen = pNormalizeString( NormalizationD, ptest->str, -1, dst, dstlen );
+        ok(dstlen == strlenW(ptest->nfd)+1, "Copied length differed: was %d, should be %d\n",
+           dstlen, strlenW(ptest->nfd)+1);
+        str_cmp = strncmpW(ptest->nfd, dst, dstlen + 1);
+        ok(str_cmp == 0, "NFD test failed: returned value was %d\n", str_cmp);
+
+        dstlen = pNormalizeString( NormalizationC, ptest->str, -1, NULL, 0 );
+        dst = HeapAlloc(GetProcessHeap(), 0, dstlen * sizeof(WCHAR) + 1);
+        dstlen = pNormalizeString( NormalizationC, ptest->str, -1, dst, dstlen );
+        ok(dstlen == strlenW(ptest->nfc)+1, "Copied length differed: was %d, should be %d\n",
+           dstlen, strlenW(ptest->nfc)+1);
+        str_cmp = strncmpW(ptest->nfc, dst, dstlen + 1);
+        ok(str_cmp == 0, "NFC test failed: returned value was %d\n", str_cmp);
+
+        dstlen = pNormalizeString( NormalizationKD, ptest->str, -1, NULL, 0 );
+        dst = HeapAlloc(GetProcessHeap(), 0, dstlen * sizeof(WCHAR) + 1);
+        dstlen = pNormalizeString( NormalizationKD, ptest->str, -1, dst, dstlen );
+        ok(dstlen == strlenW(ptest->nfkd)+1, "Copied length differed: was %d, should be %d\n",
+           dstlen, strlenW(ptest->nfkd)+1);
+        str_cmp = strncmpW(ptest->nfkd, dst, dstlen + 1);
+        ok(str_cmp == 0, "NFKD test failed: returned value was %d\n", str_cmp);
+
+        dstlen = pNormalizeString( NormalizationKC, ptest->str, -1, NULL, 0 );
+        dst = HeapAlloc(GetProcessHeap(), 0, dstlen * sizeof(WCHAR) + 1);
+        dstlen = pNormalizeString( NormalizationKC, ptest->str, -1, dst, dstlen );
+        ok(dstlen == strlenW(ptest->nfkc)+1, "Copied length differed: was %d, should be %d\n",
+           dstlen, strlenW(ptest->nfkc)+1);
+        str_cmp = strncmpW(ptest->nfkc, dst, dstlen + 1);
+        ok(str_cmp == 0, "NFKC test failed: returned value was %d\n", str_cmp);
+
+        ptest++;
+    }
+}
+
 START_TEST(locale)
 {
   InitFunctionPointers();
@@ -5490,6 +5573,7 @@ START_TEST(locale)
   test_GetThreadPreferredUILanguages();
   test_GetUserPreferredUILanguages();
   test_FindNLSStringEx();
+  test_NormalizeString();
   /* this requires collation table patch to make it MS compatible */
   if (0) test_sorting();
 }
diff --git a/dlls/kernel32/tests/normalization_tests.h b/dlls/kernel32/tests/normalization_tests.h
new file mode 100644
index 0000000000..2f435bab30
--- /dev/null
+++ b/dlls/kernel32/tests/normalization_tests.h
@@ -0,0 +1,190 @@
+/*
+ * Test data for use in normalization tests.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+
+/* part 0: specific cases */
+/* LATIN CAPITAL LETTER D WITH DOT ABOVE */
+static WCHAR part0_str1[] = {0x1e0a,0};
+static WCHAR *part0_nfc1 = part0_str1;
+static WCHAR part0_nfd1[] = {0x0044,0x0307,0};
+static WCHAR *part0_nfkc1 = part0_str1;
+static WCHAR *part0_nfkd1 = part0_nfd1;
+
+/* LATIN CAPITAL LETTER D, COMBINING DOT BELOW, COMBINING DOT ABOVE */
+static WCHAR part0_str2[] = {0x0044,0x0323,0x0307,0};
+static WCHAR part0_nfc2[] = {0x1e0c,0x0307,0};
+static WCHAR *part0_nfd2 = part0_str2;
+static WCHAR *part0_nfkc2 = part0_nfc2;
+static WCHAR *part0_nfkd2 = part0_str2;
+
+/* LATIN CAPITAL LETTER D, COMBINING HORN, COMBINING DOT BELOW, COMBINING DOT ABOVE */
+static WCHAR part0_str3[] = {0x0044,0x031b,0x0323,0x0307,0};
+static WCHAR part0_nfc3[] = {0x1e0c,0x031b,0x0307,0};
+static WCHAR *part0_nfd3 = part0_str3;
+static WCHAR *part0_nfkc3 = part0_nfc3;
+static WCHAR *part0_nfkd3 = part0_str3;
+
+/* LATIN CAPITAL LETTER D, COMBINING HORN, COMBINING DOT BELOW, COMBINING DOT ABOVE */
+static WCHAR part0_str4[] = {0x0044,0x031b,0x0323,0x0307,0};
+static WCHAR part0_nfc4[] = {0x1e0c,0x031b,0x0307,0};
+static WCHAR *part0_nfd4 = part0_str4;
+static WCHAR *part0_nfkc4 = part0_nfc4;
+static WCHAR *part0_nfkd4 = part0_str4;
+
+/*
+ * HEBREW ACCENT SEGOL, HEBREW POINT PATAH, HEBREW POINT DAGESH OR MAPIQ,
+ * HEBREW ACCENT MERKHA, HEBREW POINT SHEVA, HEBREW PUNCTUATION PASEQ,
+ * HEBREW MARK UPPER DOT, HEBREW ACCENT DEHI
+ */
+static WCHAR part0_str5[] = {0x0592,0x05B7,0x05BC,0x05A5,0x05B0,0x05C0,0x05C4,0x05AD,0};
+static WCHAR part0_nfc5[] = {0x05B0,0x05B7,0x05BC,0x05A5,0x0592,0x05C0,0x05AD,0x05C4,0};
+static WCHAR *part0_nfd5 = part0_nfc5;
+static WCHAR *part0_nfkc5 = part0_nfc5;
+static WCHAR *part0_nfkd5 = part0_nfc5;
+
+/*
+ * HEBREW POINT QAMATS, HEBREW POINT HOLAM, HEBREW POINT HATAF SEGOL,
+ * HEBREW ACCENT ETNAHTA, HEBREW PUNCTUATION SOF PASUQ, HEBREW POINT SHEVA,
+ * HEBREW ACCENT ILUY, HEBREW ACCENT QARNEY PARA
+ */
+static WCHAR part0_str6[] = {0x05B8,0x05B9,0x05B1,0x0591,0x05C3,0x05B0,0x05AC,0x059F,0};
+static WCHAR part0_nfc6[] = {0x05B1,0x05B8,0x05B9,0x0591,0x05C3,0x05B0,0x05AC,0x059F,0};
+static WCHAR *part0_nfd6 = part0_nfc6;
+static WCHAR *part0_nfkc6 = part0_nfc6;
+static WCHAR *part0_nfkd6 = part0_nfc6;
+
+/* LATIN CAPITAL LETTER D WITH DOT BELOW */
+static WCHAR part0_str8[] = {0x1E0C,0};
+static WCHAR *part0_nfc8 = part0_str8;
+static WCHAR part0_nfd8[] = {0x0044,0x0323,0};
+static WCHAR *part0_nfkc8 = part0_str8;
+static WCHAR *part0_nfkd8 = part0_nfd8;
+
+/* LATIN CAPITAL LETTER D WITH DOT ABOVE, COMBINING DOT BELOW */
+static WCHAR part0_str9[] = {0x1E0A,0x0323,0};
+static WCHAR part0_nfc9[] = {0x1E0C,0x0307,0};
+static WCHAR part0_nfd9[] = {0x0044,0x0323,0x0307,0};
+static WCHAR *part0_nfkc9 = part0_nfc9;
+static WCHAR *part0_nfkd9 = part0_nfd9;
+
+/* LATIN CAPITAL LETTER D WITH DOT BELOW, COMBINING DOT ABOVE */
+static WCHAR part0_str10[] = {0x1E0C,0x0307,0};
+static WCHAR *part0_nfc10 = part0_str10;
+static WCHAR part0_nfd10[] = {0x0044,0x0323,0x0307,0};
+static WCHAR *part0_nfkc10 = part0_str10;
+static WCHAR *part0_nfkd10 = part0_nfd10;
+
+/* LATIN CAPITAL LETTER E WITH MACRON AND GRAVE, COMBINING MACRON */
+static WCHAR part0_str11[] = {0x1E14,0x0304,0};
+static WCHAR *part0_nfc11 = part0_str11;
+static WCHAR part0_nfd11[] = {0x0045,0x0304,0x0300,0x0304,0};
+static WCHAR *part0_nfkc11 = part0_str11;
+static WCHAR *part0_nfkd11 = part0_nfd11;
+
+/* LATIN CAPITAL LETTER E WITH MACRON, COMBINING GRAVE ACCENT */
+static WCHAR part0_str12[] = {0x0112,0x0300,0};
+static WCHAR part0_nfc12[] = {0x1E14,0};
+static WCHAR part0_nfd12[] = {0x0045,0x0304,0x0300,0};
+static WCHAR *part0_nfkc12 = part0_nfc12;
+static WCHAR *part0_nfkd12 = part0_nfd12;
+
+/* part 1: character by character */
+/* DIAERESIS */
+static WCHAR part1_str1[] = {0x00a8,0};
+static WCHAR *part1_nfc1 = part1_str1;
+static WCHAR *part1_nfd1 = part1_str1;
+static WCHAR part1_nfkc1[] = {0x0020,0x0308,0};
+static WCHAR *part1_nfkd1 = part1_nfkc1;
+
+/* VULGAR FRACTION ONE QUARTER */
+static WCHAR part1_str2[] = {0x00bc,0};
+static WCHAR *part1_nfc2 = part1_str2;
+static WCHAR *part1_nfd2 = part1_str2;
+static WCHAR part1_nfkc2[] = {0x0031,0x2044,0x0034,0};
+static WCHAR *part1_nfkd2 = part1_nfkc2;
+
+/* LATIN CAPITAL LETTER E WITH CIRCUMFLEX */
+static WCHAR part1_str3[] = {0x00ca,0};
+static WCHAR *part1_nfc3 = part1_str3;
+static WCHAR part1_nfd3[] = {0x0045,0x0302,0};
+static WCHAR *part1_nfkc3 = part1_str3;
+static WCHAR *part1_nfkd3 = part1_nfd3;
+
+/* MODIFIER LETTER SMALL GAMMA */
+static WCHAR part1_str4[] = {0x02e0,0};
+static WCHAR *part1_nfc4 = part1_str4;
+static WCHAR *part1_nfd4 = part1_str4;
+static WCHAR part1_nfkc4[] = {0x0263,0};
+static WCHAR *part1_nfkd4 = part1_nfkc4;
+
+/* CYRILLIC CAPITAL LETTER IE WITH GRAVE */
+static WCHAR part1_str5[] = {0x0400,0};
+static WCHAR *part1_nfc5 = part1_str5;
+static WCHAR part1_nfd5[] = {0x0415,0x0300,0};
+static WCHAR *part1_nfkc5 = part1_str5;
+static WCHAR *part1_nfkd5 = part1_nfd5;
+
+/* CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT */
+static WCHAR part1_str6[] = {0x0476,0};
+static WCHAR *part1_nfc6 = part1_str6;
+static WCHAR part1_nfd6[] = {0x0474,0x030F,0};
+static WCHAR *part1_nfkc6 = part1_str6;
+static WCHAR *part1_nfkd6 = part1_nfd6;
+
+/* ARABIC LIGATURE HAH WITH JEEM INITIAL FORM */
+static WCHAR part1_str7[] = {0xFCA9,0};
+static WCHAR *part1_nfc7 = part1_str7;
+static WCHAR *part1_nfd7 = part1_str7;
+static WCHAR part1_nfkc7[] = {0x062D,0x062C,0};
+static WCHAR *part1_nfkd7 = part1_nfkc7;
+
+/* GREEK SMALL LETTER OMICRON WITH PSILI AND VARIA */
+static WCHAR part1_str8[] = {0x1F42,0};
+static WCHAR *part1_nfc8 = part1_str8;
+static WCHAR part1_nfd8[] = {0x03BF,0x0313,0x0300,0};
+static WCHAR *part1_nfkc8 = part1_str8;
+static WCHAR *part1_nfkd8 = part1_nfd8;
+
+/* GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI */
+static WCHAR part1_str9[] = {0x1F82,0};
+static WCHAR *part1_nfc9 = part1_str9;
+static WCHAR part1_nfd9[] = {0x03B1,0x0313,0x0300,0x0345,0};
+static WCHAR *part1_nfkc9 = part1_str9;
+static WCHAR *part1_nfkd9 = part1_nfd9;
+
+/* QUADRUPLE PRIME */
+static WCHAR part1_str10[] = {0x2057,0};
+static WCHAR *part1_nfc10 = part1_str10;
+static WCHAR *part1_nfd10 = part1_str10;
+static WCHAR part1_nfkc10[] = {0x2032,0x2032,0x2032,0x2032,0};
+static WCHAR *part1_nfkd10 = part1_nfkc10;
+
+/* KATAKANA-HIRAGANA VOICED SOUND MARK */
+static WCHAR part1_str11[] = {0x309B,0};
+static WCHAR *part1_nfc11 = part1_str11;
+static WCHAR *part1_nfd11 = part1_str11;
+static WCHAR part1_nfkc11[] = {0x20,0x3099,0};
+static WCHAR *part1_nfkd11 = part1_nfkc11;
+
+struct test_data_normal {
+    WCHAR *str;
+    WCHAR *nfc;
+    WCHAR *nfd;
+    WCHAR *nfkc;
+    WCHAR *nfkd;
+    UINT exp_dstlen;
+};
-- 
2.14.1




More information about the wine-devel mailing list