Alexandre Julliard : ntdll: Reimplement RtlUnicodeToUTF8N.
Alexandre Julliard
julliard at winehq.org
Wed Dec 4 16:13:04 CST 2019
Module: wine
Branch: master
Commit: 0b39344ef0b02a969e9755a891f7d7f1a2986d48
URL: https://source.winehq.org/git/wine.git/?a=commit;h=0b39344ef0b02a969e9755a891f7d7f1a2986d48
Author: Alexandre Julliard <julliard at winehq.org>
Date: Tue Dec 3 10:10:39 2019 +0100
ntdll: Reimplement RtlUnicodeToUTF8N.
This avoids relying on wine_utf8_wcstombs().
Signed-off-by: Alexandre Julliard <julliard at winehq.org>
---
dlls/ntdll/locale.c | 111 ++++++++++++++++++++++++++++++++++++++++++++++
dlls/ntdll/rtlstr.c | 33 --------------
dlls/ntdll/tests/rtlstr.c | 16 -------
3 files changed, 111 insertions(+), 49 deletions(-)
diff --git a/dlls/ntdll/locale.c b/dlls/ntdll/locale.c
index 5ad32e4f1b..09f64b8dad 100644
--- a/dlls/ntdll/locale.c
+++ b/dlls/ntdll/locale.c
@@ -1307,6 +1307,117 @@ found:
}
+/* get the next char value taking surrogates into account */
+static inline unsigned int get_surrogate_value( const WCHAR *src, unsigned int srclen )
+{
+ if (src[0] >= 0xd800 && src[0] <= 0xdfff) /* surrogate pair */
+ {
+ if (src[0] > 0xdbff || /* invalid high surrogate */
+ srclen <= 1 || /* missing low surrogate */
+ src[1] < 0xdc00 || src[1] > 0xdfff) /* invalid low surrogate */
+ return 0;
+ return 0x10000 + ((src[0] & 0x3ff) << 10) + (src[1] & 0x3ff);
+ }
+ return src[0];
+}
+
+
+/**************************************************************************
+ * RtlUnicodeToUTF8N (NTDLL.@)
+ */
+NTSTATUS WINAPI RtlUnicodeToUTF8N( char *dst, DWORD dstlen, DWORD *reslen, const WCHAR *src, DWORD srclen )
+{
+ char *end;
+ unsigned int val, len;
+ NTSTATUS status = STATUS_SUCCESS;
+
+ if (!src) return STATUS_INVALID_PARAMETER_4;
+ if (!reslen) return STATUS_INVALID_PARAMETER;
+ if (dst && (srclen & 1)) return STATUS_INVALID_PARAMETER_5;
+
+ srclen /= sizeof(WCHAR);
+
+ if (!dst)
+ {
+ for (len = 0; srclen; srclen--, src++)
+ {
+ if (*src < 0x80) len++; /* 0x00-0x7f: 1 byte */
+ else if (*src < 0x800) len += 2; /* 0x80-0x7ff: 2 bytes */
+ else
+ {
+ if (!(val = get_surrogate_value( src, srclen )))
+ {
+ val = 0xfffd;
+ status = STATUS_SOME_NOT_MAPPED;
+ }
+ if (val < 0x10000) len += 3; /* 0x800-0xffff: 3 bytes */
+ else /* 0x10000-0x10ffff: 4 bytes */
+ {
+ len += 4;
+ src++;
+ srclen--;
+ }
+ }
+ }
+ *reslen = len;
+ return status;
+ }
+
+ for (end = dst + dstlen; srclen; srclen--, src++)
+ {
+ WCHAR ch = *src;
+
+ if (ch < 0x80) /* 0x00-0x7f: 1 byte */
+ {
+ if (dst > end - 1) break;
+ *dst++ = ch;
+ continue;
+ }
+ if (ch < 0x800) /* 0x80-0x7ff: 2 bytes */
+ {
+ if (dst > end - 2) break;
+ dst[1] = 0x80 | (ch & 0x3f);
+ ch >>= 6;
+ dst[0] = 0xc0 | ch;
+ dst += 2;
+ continue;
+ }
+ if (!(val = get_surrogate_value( src, srclen )))
+ {
+ val = 0xfffd;
+ status = STATUS_SOME_NOT_MAPPED;
+ }
+ if (val < 0x10000) /* 0x800-0xffff: 3 bytes */
+ {
+ if (dst > end - 3) break;
+ dst[2] = 0x80 | (val & 0x3f);
+ val >>= 6;
+ dst[1] = 0x80 | (val & 0x3f);
+ val >>= 6;
+ dst[0] = 0xe0 | val;
+ dst += 3;
+ }
+ else /* 0x10000-0x10ffff: 4 bytes */
+ {
+ if (dst > end - 4) break;
+ dst[3] = 0x80 | (val & 0x3f);
+ val >>= 6;
+ dst[2] = 0x80 | (val & 0x3f);
+ val >>= 6;
+ dst[1] = 0x80 | (val & 0x3f);
+ val >>= 6;
+ dst[0] = 0xf0 | val;
+ dst += 4;
+ src++;
+ srclen--;
+ }
+ }
+ if (srclen) status = STATUS_BUFFER_TOO_SMALL;
+ *reslen = dstlen - (end - dst);
+ return status;
+}
+
+
/******************************************************************************
* RtlIsNormalizedString (NTDLL.@)
*/
diff --git a/dlls/ntdll/rtlstr.c b/dlls/ntdll/rtlstr.c
index 8196dd7178..c0d8ec6477 100644
--- a/dlls/ntdll/rtlstr.c
+++ b/dlls/ntdll/rtlstr.c
@@ -709,39 +709,6 @@ NTSTATUS WINAPI RtlUnicodeStringToOemString( STRING *oem,
}
-/**************************************************************************
- * RtlUnicodeToUTF8N (NTDLL.@)
- *
- * Converts a Unicode string to a UTF-8 string.
- *
- * RETURNS
- * NTSTATUS code
- */
-NTSTATUS WINAPI RtlUnicodeToUTF8N( LPSTR dst, DWORD dstlen, LPDWORD reslen,
- LPCWSTR src, DWORD srclen)
-{
- int ret;
-
- if (!src) return STATUS_INVALID_PARAMETER_4;
- if (!reslen) return STATUS_INVALID_PARAMETER;
- if (dst && (srclen & 1)) return STATUS_INVALID_PARAMETER_5;
-
- if (!dstlen && dst)
- {
- char c;
- dst = &c;
- ret = wine_utf8_wcstombs( 0, src, srclen / sizeof(WCHAR), dst, 1 );
- if (ret > 0) ret--;
- }
- else
- ret = wine_utf8_wcstombs( 0, src, srclen / sizeof(WCHAR), dst, dstlen );
- if (reslen)
- *reslen = (ret >= 0) ? ret : dstlen; /* overflow -> we filled up to dstlen */
- if (ret < 0) return STATUS_BUFFER_TOO_SMALL;
- return STATUS_SUCCESS;
-}
-
-
/*
CASE CONVERSIONS
*/
diff --git a/dlls/ntdll/tests/rtlstr.c b/dlls/ntdll/tests/rtlstr.c
index 4583ddeb4b..6be036f406 100644
--- a/dlls/ntdll/tests/rtlstr.c
+++ b/dlls/ntdll/tests/rtlstr.c
@@ -2152,11 +2152,8 @@ static void test_RtlUnicodeToUTF8N(void)
length_expect(0, 0, STATUS_SUCCESS);
length_expect(1, 1, STATUS_SUCCESS);
length_expect(2, 3, STATUS_SUCCESS);
-todo_wine
-{
length_expect(3, 6, STATUS_SOME_NOT_MAPPED);
length_expect(4, 7, STATUS_SOME_NOT_MAPPED);
-}
#undef length_expect
for (i = 0; i <= 6; i++)
@@ -2164,20 +2161,14 @@ todo_wine
memset(buffer, 0x55, sizeof(buffer));
bytes_out = 0xdeadbeef;
status = pRtlUnicodeToUTF8N(buffer, i, &bytes_out, special_string, sizeof(special_string));
-todo_wine_if (i == 4 || i == 5 || i == 6)
ok(status == STATUS_BUFFER_TOO_SMALL, "%d: status = 0x%x\n", i, status);
-todo_wine_if (bytes_out != special_string_len[i])
ok(bytes_out == special_string_len[i], "%d: expected %u, got %u\n", i, special_string_len[i], bytes_out);
-todo_wine_if (i == 6)
ok(memcmp(buffer, special_expected, special_string_len[i]) == 0, "%d: bad conversion\n", i);
}
status = pRtlUnicodeToUTF8N(buffer, 7, &bytes_out, special_string, sizeof(special_string));
-todo_wine
ok(status == STATUS_SOME_NOT_MAPPED, "status = 0x%x\n", status);
-todo_wine
ok(bytes_out == special_string_len[7], "expected %u, got %u\n", special_string_len[7], bytes_out);
-todo_wine
ok(memcmp(buffer, special_expected, 7) == 0, "bad conversion\n");
/* conversion behavior with varying input length */
@@ -2225,19 +2216,15 @@ todo_wine
status = pRtlUnicodeToUTF8N(
buffer, sizeof(buffer), &bytes_out,
unicode_to_utf8[i].unicode, lstrlenW(unicode_to_utf8[i].unicode) * sizeof(WCHAR));
-todo_wine_if(unicode_to_utf8[i].status == STATUS_SOME_NOT_MAPPED)
ok(status == unicode_to_utf8[i].status,
"(test %d): status is 0x%x, expected 0x%x\n",
i, status, unicode_to_utf8[i].status);
-todo_wine_if(i == 9 || i == 10 || i == 11)
-{
ok(bytes_out == strlen(unicode_to_utf8[i].expected),
"(test %d): bytes_out is %u, expected %u\n",
i, bytes_out, lstrlenA(unicode_to_utf8[i].expected));
ok(!memcmp(buffer, unicode_to_utf8[i].expected, bytes_out),
"(test %d): got \"%.*s\", expected \"%s\"\n",
i, bytes_out, buffer, unicode_to_utf8[i].expected);
-}
ok(buffer[bytes_out] == 0x55,
"(test %d): behind string: 0x%x\n", i, buffer[bytes_out]);
@@ -2247,8 +2234,6 @@ todo_wine_if(i == 9 || i == 10 || i == 11)
status = pRtlUnicodeToUTF8N(
buffer, sizeof(buffer), &bytes_out,
unicode_to_utf8[i].unicode, (lstrlenW(unicode_to_utf8[i].unicode) + 1) * sizeof(WCHAR));
-todo_wine_if(i == 9 || i == 10 || i == 11)
-{
ok(status == unicode_to_utf8[i].status,
"(test %d): status is 0x%x, expected 0x%x\n",
i, status, unicode_to_utf8[i].status);
@@ -2258,7 +2243,6 @@ todo_wine_if(i == 9 || i == 10 || i == 11)
ok(!memcmp(buffer, unicode_to_utf8[i].expected, bytes_out),
"(test %d): got \"%.*s\", expected \"%s\"\n",
i, bytes_out, buffer, unicode_to_utf8[i].expected);
-}
ok(buffer[bytes_out] == 0x55,
"(test %d): behind string: 0x%x\n", i, buffer[bytes_out]);
}
More information about the wine-cvs
mailing list