[2/2] ntdll/tests: Add tests for RtlUTF8ToUnicodeN. (try 2)
Thomas Faber
thomas.faber at reactos.org
Mon Mar 31 03:16:11 CDT 2014
Try 2:
- Introduce unicode_expect_ function to avoid overlong macros and
simplify some test cases
-------------- next part --------------
From 448e8a58df0c4b1e6733d18519ae38d3d82a9efa Mon Sep 17 00:00:00 2001
From: Thomas Faber <thomas.faber at reactos.org>
Date: Sat, 29 Mar 2014 16:25:25 +0100
Subject: ntdll/tests: Add tests for RtlUTF8ToUnicodeN.
---
dlls/ntdll/tests/rtlstr.c | 275 ++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 275 insertions(+)
diff --git a/dlls/ntdll/tests/rtlstr.c b/dlls/ntdll/tests/rtlstr.c
index 7cd6be0..91373cd 100644
--- a/dlls/ntdll/tests/rtlstr.c
+++ b/dlls/ntdll/tests/rtlstr.c
@@ -69,6 +69,7 @@ static NTSTATUS (WINAPI *pRtlStringFromGUID)(const GUID*, UNICODE_STRING*);
static BOOLEAN (WINAPI *pRtlIsTextUnicode)(LPVOID, INT, INT *);
static NTSTATUS (WINAPI *pRtlHashUnicodeString)(PCUNICODE_STRING,BOOLEAN,ULONG,ULONG*);
static NTSTATUS (WINAPI *pRtlUnicodeToUTF8N)(CHAR *, ULONG, ULONG *, const WCHAR *, ULONG);
+static NTSTATUS (WINAPI *pRtlUTF8ToUnicodeN)(WCHAR *, ULONG, ULONG *, const CHAR *, ULONG);
/*static VOID (WINAPI *pRtlFreeOemString)(PSTRING);*/
/*static VOID (WINAPI *pRtlCopyUnicodeString)(UNICODE_STRING *, const UNICODE_STRING *);*/
@@ -139,6 +140,7 @@ static void InitFunctionPtrs(void)
pRtlIsTextUnicode = (void *)GetProcAddress(hntdll, "RtlIsTextUnicode");
pRtlHashUnicodeString = (void*)GetProcAddress(hntdll, "RtlHashUnicodeString");
pRtlUnicodeToUTF8N = (void*)GetProcAddress(hntdll, "RtlUnicodeToUTF8N");
+ pRtlUTF8ToUnicodeN = (void*)GetProcAddress(hntdll, "RtlUTF8ToUnicodeN");
}
}
@@ -2222,6 +2224,278 @@ static void test_RtlUnicodeToUTF8N(void)
}
}
+struct utf8_to_unicode_test {
+ const char *utf8;
+ WCHAR expected[128];
+ NTSTATUS status;
+};
+
+static const struct utf8_to_unicode_test utf8_to_unicode[] = {
+ { "", { 0 }, STATUS_SUCCESS },
+ { "-", { '-',0 }, STATUS_SUCCESS },
+ { "hello", { 'h','e','l','l','o',0 }, STATUS_SUCCESS },
+ /* first and last of each range */
+ { "-\x7F-\xC2\x80-\xC3\xBF-\xC4\x80-", { '-',0x7f,'-',0x80,'-',0xff,'-',0x100,'-',0 }, STATUS_SUCCESS },
+ { "-\xDF\xBF-\xE0\xA0\x80-", { '-',0x7ff,'-',0x800,'-',0 }, STATUS_SUCCESS },
+ { "-\xED\x9F\xBF-\xEE\x80\x80-", { '-',0xd7ff,'-',0xe000,'-',0 }, STATUS_SUCCESS },
+ /* 0x10000 */
+ { "-\xEF\xBF\xBF-\xF0\x90\x80\x80-", { '-',0xffff,'-',0xd800,0xdc00,'-',0 }, STATUS_SUCCESS },
+ /* 0x103ff */ /* 0x10400 */
+ { "-\xF0\x90\x8F\xBF-\xF0\x90\x90\x80-", { '-',0xd800,0xdfff,'-',0xd801,0xdc00,'-',0 }, STATUS_SUCCESS },
+ /* 0x10ffff */
+ { "-\xF4\x8F\xBF\xBF-", { '-',0xdbff,0xdfff,'-',0 }, STATUS_SUCCESS },
+ /* standalone surrogate code points */
+ /* 0xd800 */ /* 0xdbff */
+ { "-\xED\xA0\x80-\xED\xAF\xBF-", { '-',0xfffd,0xfffd,'-',0xfffd,0xfffd,'-',0 }, STATUS_SOME_NOT_MAPPED },
+ /* 0xdc00 */ /* 0xdfff */
+ { "-\xED\xB0\x80-\xED\xBF\xBF-", { '-',0xfffd,0xfffd,'-',0xfffd,0xfffd,'-',0 }, STATUS_SOME_NOT_MAPPED },
+ /* UTF-8 encoded surrogate pair */
+ /* 0xdbff *//* 0xdfff */
+ { "-\xED\xAF\xBF\xED\xBF\xBF-", { '-',0xfffd,0xfffd,0xfffd,0xfffd,'-',0 }, STATUS_SOME_NOT_MAPPED },
+ /* reverse surrogate pair */
+ /* 0xdfff *//* 0xdbff */
+ { "-\xED\xBF\xBF\xED\xAF\xBF-", { '-',0xfffd,0xfffd,0xfffd,0xfffd,'-',0 }, STATUS_SOME_NOT_MAPPED },
+ /* code points outside the UTF-16 range */
+ /* 0x110000 */
+ { "-\xF4\x90\x80\x80-", { '-',0xfffd,0xfffd,0xfffd,'-',0 }, STATUS_SOME_NOT_MAPPED },
+ /* 0x1fffff */
+ { "-\xF7\xBF\xBF\xBF-", { '-',0xfffd,0xfffd,0xfffd,0xfffd,'-',0 }, STATUS_SOME_NOT_MAPPED },
+ /* 0x200000 */
+ { "-\xFA\x80\x80\x80\x80-", { '-',0xfffd,0xfffd,0xfffd,0xfffd,0xfffd,'-',0 }, STATUS_SOME_NOT_MAPPED },
+ /* 0x3ffffff */
+ { "-\xFB\xBF\xBF\xBF\xBF-", { '-',0xfffd,0xfffd,0xfffd,0xfffd,0xfffd,'-',0 }, STATUS_SOME_NOT_MAPPED },
+ /* 0x4000000 */
+ { "-\xFC\x84\x80\x80\x80\x80-", { '-',0xfffd,0xfffd,0xfffd,0xfffd,0xfffd,0xfffd,'-',0 }, STATUS_SOME_NOT_MAPPED },
+ /* 0x7fffffff */
+ { "-\xFD\xBF\xBF\xBF\xBF\xBF-", { '-',0xfffd,0xfffd,0xfffd,0xfffd,0xfffd,0xfffd,'-',0 }, STATUS_SOME_NOT_MAPPED },
+ /* overlong encodings of each length for -, NUL, and the highest possible value */
+ { "-\xC0\xAD-\xC0\x80-\xC1\xBF-", { '-',0xfffd,0xfffd,'-',0xfffd,0xfffd,'-',0xfffd,0xfffd,'-',0 }, STATUS_SOME_NOT_MAPPED },
+ { "-\xE0\x80\xAD-\xE0\x80\x80-\xE0\x9F\xBF-", { '-',0xfffd,0xfffd,'-',0xfffd,0xfffd,'-',0xfffd,0xfffd,'-',0 }, STATUS_SOME_NOT_MAPPED },
+ { "-\xF0\x80\x80\xAD-", { '-',0xfffd,0xfffd,0xfffd,'-',0 }, STATUS_SOME_NOT_MAPPED },
+ { "-\xF0\x80\x80\x80-", { '-',0xfffd,0xfffd,0xfffd,'-',0 }, STATUS_SOME_NOT_MAPPED },
+ { "-\xF0\x8F\xBF\xBF-", { '-',0xfffd,0xfffd,0xfffd,'-',0 }, STATUS_SOME_NOT_MAPPED },
+ { "-\xF8\x80\x80\x80\xAD-", { '-',0xfffd,0xfffd,0xfffd,0xfffd,0xfffd,'-',0 }, STATUS_SOME_NOT_MAPPED },
+ { "-\xF8\x80\x80\x80\x80-", { '-',0xfffd,0xfffd,0xfffd,0xfffd,0xfffd,'-',0 }, STATUS_SOME_NOT_MAPPED },
+ { "-\xF8\x87\xBF\xBF\xBF-", { '-',0xfffd,0xfffd,0xfffd,0xfffd,0xfffd,'-',0 }, STATUS_SOME_NOT_MAPPED },
+ { "-\xFC\x80\x80\x80\x80\xAD-", { '-',0xfffd,0xfffd,0xfffd,0xfffd,0xfffd,0xfffd,'-',0 }, STATUS_SOME_NOT_MAPPED },
+ { "-\xFC\x80\x80\x80\x80\x80-", { '-',0xfffd,0xfffd,0xfffd,0xfffd,0xfffd,0xfffd,'-',0 }, STATUS_SOME_NOT_MAPPED },
+ { "-\xFC\x83\xBF\xBF\xBF\xBF-", { '-',0xfffd,0xfffd,0xfffd,0xfffd,0xfffd,0xfffd,'-',0 }, STATUS_SOME_NOT_MAPPED },
+ /* invalid bytes */
+ { "\xFE", { 0xfffd,0 }, STATUS_SOME_NOT_MAPPED },
+ { "\xFF", { 0xfffd,0 }, STATUS_SOME_NOT_MAPPED },
+ { "\xFE\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF", { 0xfffd,0xfffd,0xfffd,0xfffd,0xfffd,0xfffd,0xfffd,0xfffd,0xfffd,0 }, STATUS_SOME_NOT_MAPPED },
+ { "\xFF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF", { 0xfffd,0xfffd,0xfffd,0xfffd,0xfffd,0xfffd,0xfffd,0xfffd,0xfffd,0 }, STATUS_SOME_NOT_MAPPED },
+ { "\xFF\x80\x80\x80\x80\x80\x80\x80\x80", { 0xfffd,0xfffd,0xfffd,0xfffd,0xfffd,0xfffd,0xfffd,0xfffd,0xfffd,0 }, STATUS_SOME_NOT_MAPPED },
+ { "\xFF\x40\x80\x80\x80\x80\x80\x80\x80", { 0xfffd,0x40,0xfffd,0xfffd,0xfffd,0xfffd,0xfffd,0xfffd,0xfffd,0 }, STATUS_SOME_NOT_MAPPED },
+ /* lone continuation bytes */
+ { "\x80", { 0xfffd,0 }, STATUS_SOME_NOT_MAPPED },
+ { "\x80\x80", { 0xfffd,0xfffd,0 }, STATUS_SOME_NOT_MAPPED },
+ { "\xBF", { 0xfffd,0 }, STATUS_SOME_NOT_MAPPED },
+ { "\xBF\xBF", { 0xfffd,0xfffd,0 }, STATUS_SOME_NOT_MAPPED },
+ /* incomplete sequences */
+ { "\xC2-", { 0xfffd,'-',0 }, STATUS_SOME_NOT_MAPPED },
+ { "\xE0\xA0-", { 0xfffd,'-',0 }, STATUS_SOME_NOT_MAPPED },
+ { "\xF0\x90\x80-", { 0xfffd,'-',0 }, STATUS_SOME_NOT_MAPPED },
+ { "\xF4\x8F\xBF-", { 0xfffd,'-',0 }, STATUS_SOME_NOT_MAPPED },
+ { "\xFA\x80\x80\x80-", { 0xfffd,0xfffd,0xfffd,0xfffd,'-',0 }, STATUS_SOME_NOT_MAPPED },
+ { "\xFC\x84\x80\x80\x80-", { 0xfffd,0xfffd,0xfffd,0xfffd,0xfffd,'-',0 }, STATUS_SOME_NOT_MAPPED },
+ /* multibyte sequence followed by lone continuation byte */
+ { "\xE0\xA0\x80\x80-", { 0x800,0xfffd,'-',0 }, STATUS_SOME_NOT_MAPPED },
+ /* byte order marks */
+ { "-\xEF\xBB\xBF-\xEF\xBF\xBE-", { '-',0xfeff,'-',0xfffe,'-',0 }, STATUS_SUCCESS },
+ { "\xEF\xBB\xBF-", { 0xfeff,'-',0 }, STATUS_SUCCESS },
+ { "\xEF\xBF\xBE-", { 0xfffe,'-',0 }, STATUS_SUCCESS },
+ /* invalid code point */
+ /* 0xffff */
+ { "\xEF\xBF\xBF-", { 0xffff,'-',0 }, STATUS_SUCCESS },
+ /* canonically equivalent representations -- no normalization should happen */
+ { "-\xE1\xB8\x89-", { '-',0x1e09,'-',0 }, STATUS_SUCCESS },
+ { "-\xC4\x87\xCC\xA7-", { '-',0x0107,0x0327,'-',0 }, STATUS_SUCCESS },
+ { "-\xC3\xA7\xCC\x81-", { '-',0x00e7,0x0301,'-',0 }, STATUS_SUCCESS },
+ { "-\x63\xCC\xA7\xCC\x81-", { '-',0x0063,0x0327,0x0301,'-',0 }, STATUS_SUCCESS },
+ { "-\x63\xCC\x81\xCC\xA7-", { '-',0x0063,0x0301,0x0327,'-',0 }, STATUS_SUCCESS },
+};
+
+void unicode_expect_(const WCHAR *out_string, ULONG buflen, ULONG out_chars,
+ const char *in_string, ULONG in_chars,
+ NTSTATUS expect_status, int line)
+{
+ NTSTATUS status;
+ ULONG bytes_out;
+ WCHAR buffer[128];
+ unsigned int i;
+
+ if (buflen == (ULONG)-1)
+ buflen = sizeof(buffer);
+ bytes_out = 0x55555555;
+ memset(buffer, 0x55, sizeof(buffer));
+ status = pRtlUTF8ToUnicodeN(
+ out_string ? buffer : NULL, buflen, &bytes_out,
+ in_string, in_chars);
+ ok_(__FILE__, line)(status == expect_status, "status = 0x%x\n", status);
+ ok_(__FILE__, line)(bytes_out == out_chars * sizeof(WCHAR),
+ "bytes_out = %u, expected %u\n", bytes_out, out_chars * sizeof(WCHAR));
+ if (out_string)
+ {
+ for (i = 0; i < bytes_out / sizeof(WCHAR); i++)
+ ok_(__FILE__, line)(buffer[i] == out_string[i],
+ "buffer[%d] = 0x%x, expected 0x%x\n",
+ i, buffer[i], out_string[i]);
+ for (; i < sizeof(buffer) / sizeof(WCHAR); i++)
+ ok_(__FILE__, line)(buffer[i] == 0x5555,
+ "buffer[%d] = 0x%x, expected 0x5555\n",
+ i, buffer[i]);
+ }
+}
+#define unicode_expect(out_string, buflen, out_chars, in_string, in_chars, expect_status) \
+ unicode_expect_(out_string, buflen, out_chars, in_string, in_chars, expect_status, __LINE__)
+
+static void test_RtlUTF8ToUnicodeN(void)
+{
+ NTSTATUS status;
+ ULONG bytes_out;
+ ULONG bytes_out_array[2];
+ void * const invalid_pointer = (void *)0x8;
+ WCHAR buffer[128];
+ const char empty_string[] = "";
+ const char test_string[] = "A\0abcdefg";
+ const WCHAR test_stringW[] = {'A',0,'a','b','c','d','e','f','g',0 };
+ const char special_string[] = { 'X',0xc2,0x80,0xF0,0x90,0x80,0x80,0 };
+ const WCHAR special_expected[] = { 'X',0x80,0xd800,0xdc00,0 };
+ unsigned int input_len;
+ const unsigned int test_count = sizeof(utf8_to_unicode) / sizeof(utf8_to_unicode[0]);
+ unsigned int i;
+
+ if (!pRtlUTF8ToUnicodeN)
+ {
+ skip("RtlUTF8ToUnicodeN unavailable\n");
+ return;
+ }
+
+ /* show that bytes_out is really ULONG */
+ memset(bytes_out_array, 0x55, sizeof(bytes_out_array));
+ status = pRtlUTF8ToUnicodeN(NULL, 0, bytes_out_array, empty_string, 0);
+ ok(status == STATUS_SUCCESS, "status = 0x%x\n", status);
+ ok(bytes_out_array[0] == 0x00000000, "Got 0x%x\n", bytes_out_array[0]);
+ ok(bytes_out_array[1] == 0x55555555, "Got 0x%x\n", bytes_out_array[1]);
+
+ /* parameter checks */
+ status = pRtlUTF8ToUnicodeN(NULL, 0, NULL, NULL, 0);
+ ok(status == STATUS_INVALID_PARAMETER_4, "status = 0x%x\n", status);
+
+ status = pRtlUTF8ToUnicodeN(NULL, 0, NULL, empty_string, 0);
+ ok(status == STATUS_INVALID_PARAMETER, "status = 0x%x\n", status);
+
+ bytes_out = 0x55555555;
+ status = pRtlUTF8ToUnicodeN(NULL, 0, &bytes_out, NULL, 0);
+ ok(status == STATUS_INVALID_PARAMETER_4, "status = 0x%x\n", status);
+ ok(bytes_out == 0x55555555, "bytes_out = 0x%x\n", bytes_out);
+
+ bytes_out = 0x55555555;
+ status = pRtlUTF8ToUnicodeN(NULL, 0, &bytes_out, invalid_pointer, 0);
+ ok(status == STATUS_SUCCESS, "status = 0x%x\n", status);
+ ok(bytes_out == 0, "bytes_out = 0x%x\n", bytes_out);
+
+ bytes_out = 0x55555555;
+ status = pRtlUTF8ToUnicodeN(NULL, 0, &bytes_out, empty_string, 0);
+ ok(status == STATUS_SUCCESS, "status = 0x%x\n", status);
+ ok(bytes_out == 0, "bytes_out = 0x%x\n", bytes_out);
+
+ bytes_out = 0x55555555;
+ status = pRtlUTF8ToUnicodeN(NULL, 0, &bytes_out, test_string, 0);
+ ok(status == STATUS_SUCCESS, "status = 0x%x\n", status);
+ ok(bytes_out == 0, "bytes_out = 0x%x\n", bytes_out);
+
+ bytes_out = 0x55555555;
+ status = pRtlUTF8ToUnicodeN(NULL, 0, &bytes_out, empty_string, 1);
+ ok(status == STATUS_SUCCESS, "status = 0x%x\n", status);
+ ok(bytes_out == sizeof(WCHAR), "bytes_out = 0x%x\n", bytes_out);
+
+ /* length output with special chars */
+#define length_expect(in_chars, out_chars, expect_status) \
+ unicode_expect_(NULL, 0, out_chars, special_string, in_chars, \
+ expect_status, __LINE__)
+
+ length_expect(0, 0, STATUS_SUCCESS);
+ length_expect(1, 1, STATUS_SUCCESS);
+ length_expect(2, 2, STATUS_SOME_NOT_MAPPED);
+ length_expect(3, 2, STATUS_SUCCESS);
+ length_expect(4, 3, STATUS_SOME_NOT_MAPPED);
+ length_expect(5, 3, STATUS_SOME_NOT_MAPPED);
+ length_expect(6, 3, STATUS_SOME_NOT_MAPPED);
+ length_expect(7, 4, STATUS_SUCCESS);
+ length_expect(8, 5, STATUS_SUCCESS);
+#undef length_expect
+
+ /* output truncation */
+#define truncate_expect(buflen, out_chars, expect_status) \
+ unicode_expect_(special_expected, buflen, out_chars, \
+ special_string, sizeof(special_string), \
+ expect_status, __LINE__)
+
+ truncate_expect( 0, 0, STATUS_BUFFER_TOO_SMALL);
+ truncate_expect( 1, 0, STATUS_BUFFER_TOO_SMALL);
+ truncate_expect( 2, 1, STATUS_BUFFER_TOO_SMALL);
+ truncate_expect( 3, 1, STATUS_BUFFER_TOO_SMALL);
+ truncate_expect( 4, 2, STATUS_BUFFER_TOO_SMALL);
+ truncate_expect( 5, 2, STATUS_BUFFER_TOO_SMALL);
+ truncate_expect( 6, 3, STATUS_BUFFER_TOO_SMALL);
+ truncate_expect( 7, 3, STATUS_BUFFER_TOO_SMALL);
+ truncate_expect( 8, 4, STATUS_BUFFER_TOO_SMALL);
+ truncate_expect( 9, 4, STATUS_BUFFER_TOO_SMALL);
+ truncate_expect(10, 5, STATUS_SUCCESS);
+#undef truncate_expect
+
+ /* conversion behavior with varying input length */
+ for (input_len = 0; input_len <= sizeof(test_string); input_len++) {
+ /* no output buffer, just length */
+ unicode_expect(NULL, 0, input_len,
+ test_string, input_len, STATUS_SUCCESS);
+
+ /* write output */
+ unicode_expect(test_stringW, -1, input_len,
+ test_string, input_len, STATUS_SUCCESS);
+ }
+
+ /* test cases for special characters */
+ for (i = 0; i < test_count; i++) {
+ bytes_out = 0x55555555;
+ memset(buffer, 0x55, sizeof(buffer));
+ status = pRtlUTF8ToUnicodeN(
+ buffer, sizeof(buffer), &bytes_out,
+ utf8_to_unicode[i].utf8, strlen(utf8_to_unicode[i].utf8));
+ ok(status == utf8_to_unicode[i].status,
+ "(test %d): status is 0x%x, expected 0x%x\n",
+ i, status, utf8_to_unicode[i].status);
+ ok(bytes_out == lstrlenW(utf8_to_unicode[i].expected) * sizeof(WCHAR),
+ "(test %d): bytes_out is %u, expected %u\n",
+ i, bytes_out, lstrlenW(utf8_to_unicode[i].expected) * sizeof(WCHAR));
+ ok(!memcmp(buffer, utf8_to_unicode[i].expected, bytes_out),
+ "(test %d): got %s, expected %s\n",
+ i, wine_dbgstr_wn(buffer, bytes_out / sizeof(WCHAR)), wine_dbgstr_w(utf8_to_unicode[i].expected));
+ ok(buffer[bytes_out] == 0x5555,
+ "(test %d): behind string: 0x%x\n", i, buffer[bytes_out]);
+
+ /* same test but include the null terminator */
+ bytes_out = 0x55555555;
+ memset(buffer, 0x55, sizeof(buffer));
+ status = pRtlUTF8ToUnicodeN(
+ buffer, sizeof(buffer), &bytes_out,
+ utf8_to_unicode[i].utf8, strlen(utf8_to_unicode[i].utf8) + 1);
+ ok(status == utf8_to_unicode[i].status,
+ "(test %d): status is 0x%x, expected 0x%x\n",
+ i, status, utf8_to_unicode[i].status);
+ ok(bytes_out == (lstrlenW(utf8_to_unicode[i].expected) + 1) * sizeof(WCHAR),
+ "(test %d): bytes_out is %u, expected %u\n",
+ i, bytes_out, (lstrlenW(utf8_to_unicode[i].expected) + 1) * sizeof(WCHAR));
+ ok(!memcmp(buffer, utf8_to_unicode[i].expected, bytes_out),
+ "(test %d): got %s, expected %s\n",
+ i, wine_dbgstr_wn(buffer, bytes_out / sizeof(WCHAR)), wine_dbgstr_w(utf8_to_unicode[i].expected));
+ ok(buffer[bytes_out] == 0x5555,
+ "(test %d): behind string: 0x%x\n", i, buffer[bytes_out]);
+ }
+}
+
START_TEST(rtlstr)
{
InitFunctionPtrs();
@@ -2256,4 +2530,5 @@ START_TEST(rtlstr)
}
test_RtlHashUnicodeString();
test_RtlUnicodeToUTF8N();
+ test_RtlUTF8ToUnicodeN();
}
--
1.9.0.msysgit.0
More information about the wine-patches
mailing list