Alexandre Julliard : ntdll: Reimplement RtlUnicodeToUTF8N.

Alexandre Julliard julliard at winehq.org
Wed Dec 4 16:13:04 CST 2019


Module: wine
Branch: master
Commit: 0b39344ef0b02a969e9755a891f7d7f1a2986d48
URL:    https://source.winehq.org/git/wine.git/?a=commit;h=0b39344ef0b02a969e9755a891f7d7f1a2986d48

Author: Alexandre Julliard <julliard at winehq.org>
Date:   Tue Dec  3 10:10:39 2019 +0100

ntdll: Reimplement RtlUnicodeToUTF8N.

This avoids relying on wine_utf8_wcstombs().

Signed-off-by: Alexandre Julliard <julliard at winehq.org>

---

 dlls/ntdll/locale.c       | 111 ++++++++++++++++++++++++++++++++++++++++++++++
 dlls/ntdll/rtlstr.c       |  33 --------------
 dlls/ntdll/tests/rtlstr.c |  16 -------
 3 files changed, 111 insertions(+), 49 deletions(-)

diff --git a/dlls/ntdll/locale.c b/dlls/ntdll/locale.c
index 5ad32e4f1b..09f64b8dad 100644
--- a/dlls/ntdll/locale.c
+++ b/dlls/ntdll/locale.c
@@ -1307,6 +1307,117 @@ found:
 }
 
 
+/* get the next char value taking surrogates into account */
+static inline unsigned int get_surrogate_value( const WCHAR *src, unsigned int srclen )
+{
+    if (src[0] >= 0xd800 && src[0] <= 0xdfff)  /* surrogate pair */
+    {
+        if (src[0] > 0xdbff || /* invalid high surrogate */
+            srclen <= 1 ||     /* missing low surrogate */
+            src[1] < 0xdc00 || src[1] > 0xdfff) /* invalid low surrogate */
+            return 0;
+        return 0x10000 + ((src[0] & 0x3ff) << 10) + (src[1] & 0x3ff);
+    }
+    return src[0];
+}
+
+
+/**************************************************************************
+ *	RtlUnicodeToUTF8N   (NTDLL.@)
+ */
+NTSTATUS WINAPI RtlUnicodeToUTF8N( char *dst, DWORD dstlen, DWORD *reslen, const WCHAR *src, DWORD srclen )
+{
+    char *end;
+    unsigned int val, len;
+    NTSTATUS status = STATUS_SUCCESS;
+
+    if (!src) return STATUS_INVALID_PARAMETER_4;
+    if (!reslen) return STATUS_INVALID_PARAMETER;
+    if (dst && (srclen & 1)) return STATUS_INVALID_PARAMETER_5;
+
+    srclen /= sizeof(WCHAR);
+
+    if (!dst)
+    {
+        for (len = 0; srclen; srclen--, src++)
+        {
+            if (*src < 0x80) len++;  /* 0x00-0x7f: 1 byte */
+            else if (*src < 0x800) len += 2;  /* 0x80-0x7ff: 2 bytes */
+            else
+            {
+                if (!(val = get_surrogate_value( src, srclen )))
+                {
+                    val = 0xfffd;
+                    status = STATUS_SOME_NOT_MAPPED;
+                }
+                if (val < 0x10000) len += 3; /* 0x800-0xffff: 3 bytes */
+                else   /* 0x10000-0x10ffff: 4 bytes */
+                {
+                    len += 4;
+                    src++;
+                    srclen--;
+                }
+            }
+        }
+        *reslen = len;
+        return status;
+    }
+
+    for (end = dst + dstlen; srclen; srclen--, src++)
+    {
+        WCHAR ch = *src;
+
+        if (ch < 0x80)  /* 0x00-0x7f: 1 byte */
+        {
+            if (dst > end - 1) break;
+            *dst++ = ch;
+            continue;
+        }
+        if (ch < 0x800)  /* 0x80-0x7ff: 2 bytes */
+        {
+            if (dst > end - 2) break;
+            dst[1] = 0x80 | (ch & 0x3f);
+            ch >>= 6;
+            dst[0] = 0xc0 | ch;
+            dst += 2;
+            continue;
+        }
+        if (!(val = get_surrogate_value( src, srclen )))
+        {
+            val = 0xfffd;
+            status = STATUS_SOME_NOT_MAPPED;
+        }
+        if (val < 0x10000)  /* 0x800-0xffff: 3 bytes */
+        {
+            if (dst > end - 3) break;
+            dst[2] = 0x80 | (val & 0x3f);
+            val >>= 6;
+            dst[1] = 0x80 | (val & 0x3f);
+            val >>= 6;
+            dst[0] = 0xe0 | val;
+            dst += 3;
+        }
+        else   /* 0x10000-0x10ffff: 4 bytes */
+        {
+            if (dst > end - 4) break;
+            dst[3] = 0x80 | (val & 0x3f);
+            val >>= 6;
+            dst[2] = 0x80 | (val & 0x3f);
+            val >>= 6;
+            dst[1] = 0x80 | (val & 0x3f);
+            val >>= 6;
+            dst[0] = 0xf0 | val;
+            dst += 4;
+            src++;
+            srclen--;
+        }
+    }
+    if (srclen) status = STATUS_BUFFER_TOO_SMALL;
+    *reslen = dstlen - (end - dst);
+    return status;
+}
+
+
 /******************************************************************************
  *      RtlIsNormalizedString   (NTDLL.@)
  */
diff --git a/dlls/ntdll/rtlstr.c b/dlls/ntdll/rtlstr.c
index 8196dd7178..c0d8ec6477 100644
--- a/dlls/ntdll/rtlstr.c
+++ b/dlls/ntdll/rtlstr.c
@@ -709,39 +709,6 @@ NTSTATUS WINAPI RtlUnicodeStringToOemString( STRING *oem,
 }
 
 
-/**************************************************************************
- *	RtlUnicodeToUTF8N   (NTDLL.@)
- *
- * Converts a Unicode string to a UTF-8 string.
- *
- * RETURNS
- *  NTSTATUS code
- */
-NTSTATUS WINAPI RtlUnicodeToUTF8N( LPSTR dst, DWORD dstlen, LPDWORD reslen,
-                                   LPCWSTR src, DWORD srclen)
-{
-    int ret;
-
-    if (!src) return STATUS_INVALID_PARAMETER_4;
-    if (!reslen) return STATUS_INVALID_PARAMETER;
-    if (dst && (srclen & 1)) return STATUS_INVALID_PARAMETER_5;
-
-    if (!dstlen && dst)
-    {
-        char c;
-        dst = &c;
-        ret = wine_utf8_wcstombs( 0, src, srclen / sizeof(WCHAR), dst, 1 );
-        if (ret > 0) ret--;
-    }
-    else
-        ret = wine_utf8_wcstombs( 0, src, srclen / sizeof(WCHAR), dst, dstlen );
-    if (reslen)
-        *reslen = (ret >= 0) ? ret : dstlen; /* overflow -> we filled up to dstlen */
-    if (ret < 0) return STATUS_BUFFER_TOO_SMALL;
-    return STATUS_SUCCESS;
-}
-
-
 /*
      CASE CONVERSIONS
 */
diff --git a/dlls/ntdll/tests/rtlstr.c b/dlls/ntdll/tests/rtlstr.c
index 4583ddeb4b..6be036f406 100644
--- a/dlls/ntdll/tests/rtlstr.c
+++ b/dlls/ntdll/tests/rtlstr.c
@@ -2152,11 +2152,8 @@ static void test_RtlUnicodeToUTF8N(void)
     length_expect(0, 0, STATUS_SUCCESS);
     length_expect(1, 1, STATUS_SUCCESS);
     length_expect(2, 3, STATUS_SUCCESS);
-todo_wine
-{
     length_expect(3, 6, STATUS_SOME_NOT_MAPPED);
     length_expect(4, 7, STATUS_SOME_NOT_MAPPED);
-}
 #undef length_expect
 
     for (i = 0; i <= 6; i++)
@@ -2164,20 +2161,14 @@ todo_wine
         memset(buffer, 0x55, sizeof(buffer));
         bytes_out = 0xdeadbeef;
         status = pRtlUnicodeToUTF8N(buffer, i, &bytes_out, special_string, sizeof(special_string));
-todo_wine_if (i == 4 || i == 5 || i == 6)
         ok(status == STATUS_BUFFER_TOO_SMALL, "%d: status = 0x%x\n", i, status);
-todo_wine_if (bytes_out != special_string_len[i])
         ok(bytes_out == special_string_len[i], "%d: expected %u, got %u\n", i, special_string_len[i], bytes_out);
-todo_wine_if (i == 6)
         ok(memcmp(buffer, special_expected, special_string_len[i]) == 0, "%d: bad conversion\n", i);
     }
 
     status = pRtlUnicodeToUTF8N(buffer, 7, &bytes_out, special_string, sizeof(special_string));
-todo_wine
     ok(status == STATUS_SOME_NOT_MAPPED, "status = 0x%x\n", status);
-todo_wine
     ok(bytes_out == special_string_len[7], "expected %u, got %u\n", special_string_len[7], bytes_out);
-todo_wine
     ok(memcmp(buffer, special_expected, 7) == 0, "bad conversion\n");
 
     /* conversion behavior with varying input length */
@@ -2225,19 +2216,15 @@ todo_wine
         status = pRtlUnicodeToUTF8N(
             buffer, sizeof(buffer), &bytes_out,
             unicode_to_utf8[i].unicode, lstrlenW(unicode_to_utf8[i].unicode) * sizeof(WCHAR));
-todo_wine_if(unicode_to_utf8[i].status == STATUS_SOME_NOT_MAPPED)
         ok(status == unicode_to_utf8[i].status,
            "(test %d): status is 0x%x, expected 0x%x\n",
            i, status, unicode_to_utf8[i].status);
-todo_wine_if(i == 9 || i == 10 || i == 11)
-{
         ok(bytes_out == strlen(unicode_to_utf8[i].expected),
            "(test %d): bytes_out is %u, expected %u\n",
            i, bytes_out, lstrlenA(unicode_to_utf8[i].expected));
         ok(!memcmp(buffer, unicode_to_utf8[i].expected, bytes_out),
            "(test %d): got \"%.*s\", expected \"%s\"\n",
            i, bytes_out, buffer, unicode_to_utf8[i].expected);
-}
         ok(buffer[bytes_out] == 0x55,
            "(test %d): behind string: 0x%x\n", i, buffer[bytes_out]);
 
@@ -2247,8 +2234,6 @@ todo_wine_if(i == 9 || i == 10 || i == 11)
         status = pRtlUnicodeToUTF8N(
             buffer, sizeof(buffer), &bytes_out,
             unicode_to_utf8[i].unicode, (lstrlenW(unicode_to_utf8[i].unicode) + 1) * sizeof(WCHAR));
-todo_wine_if(i == 9 || i == 10 || i == 11)
-{
         ok(status == unicode_to_utf8[i].status,
            "(test %d): status is 0x%x, expected 0x%x\n",
            i, status, unicode_to_utf8[i].status);
@@ -2258,7 +2243,6 @@ todo_wine_if(i == 9 || i == 10 || i == 11)
         ok(!memcmp(buffer, unicode_to_utf8[i].expected, bytes_out),
            "(test %d): got \"%.*s\", expected \"%s\"\n",
            i, bytes_out, buffer, unicode_to_utf8[i].expected);
-}
         ok(buffer[bytes_out] == 0x55,
            "(test %d): behind string: 0x%x\n", i, buffer[bytes_out]);
     }




More information about the wine-cvs mailing list