[PATCH 3/3] kernel32/tests: Add tests for UTF-7 conversion. (try 2)

Sun Oct 5 22:52:15 CDT 2014

---
 dlls/kernel32/tests/codepage.c | 219 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 219 insertions(+)

diff --git a/dlls/kernel32/tests/codepage.c b/dlls/kernel32/tests/codepage.c
index 8423c75..67927a1 100644
--- a/dlls/kernel32/tests/codepage.c
+++ b/dlls/kernel32/tests/codepage.c
@@ -412,6 +412,223 @@ static void test_string_conversion(LPBOOL bUsedDefaultChar)
     ok(GetLastError() == 0xdeadbeef, "GetLastError() is %u\n", GetLastError());
 }
 
+static void test_utf16_to_utf7(WCHAR* utf16_actual, char* utf7_expected, int utf7_expected_len)
+{
+    char c_buffer[1024];
+    int len;
+
+    memset(c_buffer, '#', sizeof(c_buffer));
+    len = WideCharToMultiByte(CP_UTF7, 0, utf16_actual, -1, c_buffer, sizeof(c_buffer), NULL, NULL);
+    ok(len == utf7_expected_len && strcmp(c_buffer, utf7_expected) == 0,
+       "src=%s dst=\"%s\" len=%i\n", wine_dbgstr_w(utf16_actual), c_buffer, len);
+}
+
+static void test_utf7_to_utf16(char* utf7_actual, WCHAR* utf16_expected, int utf16_expected_len)
+{
+    WCHAR w_buffer[1024];
+    int len;
+
+    memset(w_buffer, '#', sizeof(w_buffer));
+    len = MultiByteToWideChar(CP_UTF7, 0, utf7_actual, -1, w_buffer, sizeof(w_buffer) / sizeof(WCHAR));
+    ok(len == utf16_expected_len && winetest_strcmpW(w_buffer, utf16_expected) == 0,
+       "src=\"%s\" dst=%s len=%i\n", utf7_actual, wine_dbgstr_w(w_buffer), len);
+}
+
+static void test_utf7_string_conversion(void)
+{
+    /* tests which one-byte characters are base64-encoded and which are not */
+    WCHAR example_0_utf16[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,
+                               20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,
+                               36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,
+                               52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,
+                               68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,
+                               84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,
+                               100,101,102,103,104,105,106,107,108,109,110,111,
+                               112,113,114,115,116,117,118,119,120,121,122,123,
+                               124,125,126,127,128,129,130,131,132,133,134,135,
+                               136,137,138,139,140,141,142,143,144,145,146,147,
+                               148,149,150,151,152,153,154,155,156,157,158,159,
+                               160,161,162,163,164,165,166,167,168,169,170,171,
+                               172,173,174,175,176,177,178,179,180,181,182,183,
+                               184,185,186,187,188,189,190,191,192,193,194,195,
+                               196,197,198,199,200,201,202,203,204,205,206,207,
+                               208,209,210,211,212,213,214,215,216,217,218,219,
+                               220,221,222,223,224,225,226,227,228,229,230,231,
+                               232,233,234,235,236,237,238,239,240,241,242,243,
+                               244,245,246,247,248,249,250,251,252,253,254,255,
+                               256,0}; /* throw in 256 for good measure */
+    char example_0_utf7[] = "+AAEAAgADAAQABQAGAAcACA-\t\n+AAsADA-\r+AA4ADwAQABE"
+                            "AEgATABQAFQAWABcAGAAZABoAGwAcAB0AHgAf- +ACEAIgAjAC"
+                            "QAJQAm-'()+ACo-+-,-./0123456789:+ADsAPAA9AD4-?+AEA"
+                            "-ABCDEFGHIJKLMNOPQRSTUVWXYZ+AFsAXABdAF4AXwBg-abcde"
+                            "fghijklmnopqrstuvwxyz+AHsAfAB9AH4AfwCAAIEAggCDAIQA"
+                            "hQCGAIcAiACJAIoAiwCMAI0AjgCPAJAAkQCSAJMAlACVAJYAlw"
+                            "CYAJkAmgCbAJwAnQCeAJ8AoAChAKIAowCkAKUApgCnAKgAqQCq"
+                            "AKsArACtAK4ArwCwALEAsgCzALQAtQC2ALcAuAC5ALoAuwC8AL"
+                            "0AvgC/AMAAwQDCAMMAxADFAMYAxwDIAMkAygDLAMwAzQDOAM8A"
+                            "0ADRANIA0wDUANUA1gDXANgA2QDaANsA3ADdAN4A3wDgAOEA4g"
+                            "DjAOQA5QDmAOcA6ADpAOoA6wDsAO0A7gDvAPAA8QDyAPMA9AD1"
+                            "APYA9wD4APkA+gD7APwA/QD+AP8BAA-";
+
+    /* this string is the Unicode for "5 + (<ALPHA>+<BETA>+x) <NOT EQUAL TO> <ALPHA><BETA>"
+       it tests:
+       - a + before a non-base64 character
+       - a + between two non-directly-encodable characters
+       - a + before a base64 character
+       - a base64 character before a non-base64 character */
+    WCHAR example_1_utf16[] = {'5',' ','+',' ','(',0x0391,'+',0x0392,'+','x',')',' ',0x2260,' ',0x0391,0x0392,0};
+    char example_1_utf7[] = "5 +- (+A5E-+-+A5I-+-x) +ImA- +A5EDkg-";
+
+    /* tests some invalid UTF-16 */
+    /* (stray lead surrogate) */
+    WCHAR example_2_utf16[] = {0xD801,0};
+    char example_2_utf7[] = "+2AE-";
+
+    /* tests some more invalid UTF-16 */
+    /* (codepoint does not exist) */
+    WCHAR example_3_utf16[] = {0xFF00,0};
+    char example_3_utf7[] = "+/wA-";
+
+    /* tests a UTF-7 sequence implicitly terminated by a non-base64 ASCII character*/
+    char example_4_utf7[] = "+T2A hello";
+    WCHAR example_4_utf16[] = {0x4F60,' ','h','e','l','l','o',0};
+
+    /* tests a UTF-7 sequence implicitly terminated by a non-ASCII character*/
+    char example_5_utf7[] = "+T2A\xFFhello";
+    WCHAR example_5_utf16[] = {0x4F60,0x00FF,'h','e','l','l','o',0};
+
+    /* tests a + sign immediately followed by a non-base64 character */
+    /* (decoding should simply remove the + sign) */
+    char example_6_utf7[] = "+ hello";
+    WCHAR example_6_utf16[] = {' ','h','e','l','l','o',0};
+
+    /* tests some invalid UTF-7 */
+    /* (number of bits in base64 sequence is not a multiple of 16 and the last bit is a 1) */
+    char example_7_utf7[] = "+T2B-hello";
+    WCHAR example_7_utf16[] = {0x4F60,'h','e','l','l','o',0};
+
+    /* tests some more invalid UTF-7 */
+    /* (number of bits in base64 sequence is a multiple of 8 but not a multiple of 16) */
+    char example_8_utf7[] = "+T2BZ-hello";
+    WCHAR example_8_utf16[] = {0x4F60,'h','e','l','l','o',0};
+
+    /* tests UTF-7 followed by characters that should be encoded but aren't */
+    char example_9_utf7[] = "+T2BZ-\x82\xFE";
+    WCHAR example_9_utf16[] = {0x4F60,0x0082,0x00FE,0};
+
+    /* tests a null char before the end of the buffer */
+    WCHAR example_10_utf16[] = {'a',0,'b',0};
+    char example_10_utf7[] = "a\0b";
+
+    /* tests a buffer that runs out while not encoding a UTF-7 sequence */
+    /* additionally tests srclen < strlen(src) */
+    WCHAR example_11_utf16[] = {'h','e','l','l','o',0};
+
+    /* tests a buffer that runs out while not decoding a UTF-7 sequence */
+    /* additionally tests srclen < strlen(src) */
+    char example_12_utf7[] = "hello";
+
+    /* tests a buffer that runs out in the middle of encoding a UTF-7 sequence */
+    WCHAR example_13_utf16[] = {0x4F60,0x597D,0};
+
+    /* tests a buffer that runs out in the middle of decoding a UTF-7 sequence */
+    char example_14_utf7[] = "+T2BZfQ-";
+
+    char c_buffer[1024];
+    WCHAR w_buffer[1024];
+    int len;
+
+
+    /* WideCharToMultiByte tests */
+    test_utf16_to_utf7(example_0_utf16, example_0_utf7, sizeof(example_0_utf7));
+    test_utf16_to_utf7(example_1_utf16, example_1_utf7, sizeof(example_1_utf7));
+    test_utf16_to_utf7(example_2_utf16, example_2_utf7, sizeof(example_2_utf7));
+    test_utf16_to_utf7(example_3_utf16, example_3_utf7, sizeof(example_3_utf7));
+
+    /* MultiByteToWideChar tests */
+    test_utf7_to_utf16(example_0_utf7, example_0_utf16, sizeof(example_0_utf16) / sizeof(WCHAR));
+    test_utf7_to_utf16(example_1_utf7, example_1_utf16, sizeof(example_1_utf16) / sizeof(WCHAR));
+    test_utf7_to_utf16(example_2_utf7, example_2_utf16, sizeof(example_2_utf16) / sizeof(WCHAR));
+    test_utf7_to_utf16(example_3_utf7, example_3_utf16, sizeof(example_3_utf16) / sizeof(WCHAR));
+    test_utf7_to_utf16(example_4_utf7, example_4_utf16, sizeof(example_4_utf16) / sizeof(WCHAR));
+    test_utf7_to_utf16(example_5_utf7, example_5_utf16, sizeof(example_5_utf16) / sizeof(WCHAR));
+    test_utf7_to_utf16(example_6_utf7, example_6_utf16, sizeof(example_6_utf16) / sizeof(WCHAR));
+    test_utf7_to_utf16(example_7_utf7, example_7_utf16, sizeof(example_7_utf16) / sizeof(WCHAR));
+    test_utf7_to_utf16(example_8_utf7, example_8_utf16, sizeof(example_8_utf16) / sizeof(WCHAR));
+    test_utf7_to_utf16(example_9_utf7, example_9_utf16, sizeof(example_9_utf16) / sizeof(WCHAR));
+
+
+    /* 4 tests to just compute the required length if dstlen is 0 */
+    len = WideCharToMultiByte(CP_UTF7, 0, example_0_utf16, -1, NULL, 0, NULL, NULL);
+    ok(len == sizeof(example_0_utf7), "len=%i\n", len);
+
+    len = WideCharToMultiByte(CP_UTF7, 0, example_0_utf16, -1, c_buffer, 0, NULL, NULL);
+    ok(len == sizeof(example_0_utf7), "len=%i\n", len);
+
+    len = MultiByteToWideChar(CP_UTF7, 0, example_0_utf7, -1, NULL, 0);
+    ok(len == sizeof(example_0_utf16) / sizeof(WCHAR), "len=%i\n", len);
+
+    len = MultiByteToWideChar(CP_UTF7, 0, example_0_utf7, -1, w_buffer, 0);
+    ok(len == sizeof(example_0_utf16) / sizeof(WCHAR), "len=%i\n", len);
+
+    /* 2 more tests to check what happens if srclen < -1 */
+    memset(c_buffer, '#', sizeof(c_buffer));
+    len = WideCharToMultiByte(CP_UTF7, 0, example_0_utf16, -2, c_buffer, sizeof(c_buffer), NULL, NULL);
+    ok(len == sizeof(example_0_utf7) && strcmp(c_buffer, example_0_utf7) == 0,
+       "len=%i dst=\"%s\"\n", len, c_buffer);
+
+    memset(w_buffer, '#', sizeof(w_buffer));
+    len = MultiByteToWideChar(CP_UTF7, 0, example_0_utf7, -2, w_buffer, sizeof(w_buffer) / sizeof(WCHAR));
+    ok(len == sizeof(example_0_utf16) / sizeof(WCHAR) && winetest_strcmpW(w_buffer, example_0_utf16) == 0,
+       "len=%i dst=%s\n", len, wine_dbgstr_w(w_buffer));
+
+
+    /* example_10_utf16 and example_10_utf7 should convert to each other */
+    memset(c_buffer, '#', sizeof(c_buffer));
+    len = WideCharToMultiByte(CP_UTF7, 0, example_10_utf16, sizeof(example_10_utf16) / sizeof(WCHAR), c_buffer, sizeof(c_buffer), NULL, NULL);
+    ok(len == sizeof(example_10_utf7) && c_buffer[0] == 'a' && c_buffer[1] == 0 && c_buffer[2] == 'b' && c_buffer[3] == 0,
+       "len=%i dst=\"%s\"\n", len, c_buffer);
+
+    memset(w_buffer, '#', sizeof(w_buffer));
+    len = MultiByteToWideChar(CP_UTF7, 0, example_10_utf7, sizeof(example_10_utf7), w_buffer, sizeof(w_buffer) / sizeof(WCHAR));
+    ok(len == sizeof(example_10_utf16) / sizeof(WCHAR) && w_buffer[0] == 'a' && w_buffer[1] == 0 && w_buffer[2] == 'b' && w_buffer[3] == 0,
+       "len=%i dst=%s\n", len, wine_dbgstr_w(w_buffer));
+
+
+    /* example_11_utf16 with dstlen=2 should write two UTF-7 characters and stop without null-terminating */
+    memset(c_buffer, '#', sizeof(c_buffer));
+    len = WideCharToMultiByte(CP_UTF7, 0, example_11_utf16, -1, c_buffer, 2, NULL, NULL);
+    ok(len == 0 && c_buffer[0] == 'h' && c_buffer[1] == 'e' && c_buffer[2] == '#', "len=%i dst=\"%s\"\n", len, c_buffer);
+
+    /* example_11_utf16 with srclen=2 should write two UTF-7 characters and stop without null-terminating */
+    memset(c_buffer, '#', sizeof(c_buffer));
+    len = WideCharToMultiByte(CP_UTF7, 0, example_11_utf16, 2, c_buffer, sizeof(c_buffer), NULL, NULL);
+    ok(len == 2 && c_buffer[0] == 'h' && c_buffer[1] == 'e' && c_buffer[2] == '#', "len=%i dst=\"%s\"\n", len, c_buffer);
+
+
+    /* example_12_utf7 with dstlen=2 should write two UTF-16 characters and stop without null-terminating */
+    memset(w_buffer, '#', sizeof(w_buffer));
+    len = MultiByteToWideChar(CP_UTF7, 0, example_12_utf7, -1, w_buffer, 2);
+    ok(len == 0 && w_buffer[0] == 'h' && w_buffer[1] == 'e' && w_buffer[2] == 0x2323, "len=%i dst=%s\n", len, wine_dbgstr_w(w_buffer));
+
+    /* example_12_utf7 with srclen=2 should write two UTF-16 characters and stop without null-terminating */
+    memset(w_buffer, '#', sizeof(w_buffer));
+    len = MultiByteToWideChar(CP_UTF7, 0, example_12_utf7, 2, w_buffer, sizeof(w_buffer) / sizeof(WCHAR));
+    ok(len == 2 && w_buffer[0] == 'h' && w_buffer[1] == 'e' && w_buffer[2] == 0x2323, "len=%i dst=%s\n", len, wine_dbgstr_w(w_buffer));
+
+
+    /* example_13_utf16 with dstlen=2 should write two UTF-16 characters and stop without null-terminating */
+    memset(c_buffer, '#', sizeof(c_buffer));
+    len = WideCharToMultiByte(CP_UTF7, 0, example_13_utf16, -1, c_buffer, 2, NULL, NULL);
+    ok(len == 0 && c_buffer[0] == '+' && c_buffer[1] == 'T' && c_buffer[2] == '#', "len=%i dst=\"%s\"\n", len, c_buffer);
+
+
+    /* example_14_utf7 with dstlen=1 should write one UTF-16 character and stop without null-terminating */
+    memset(w_buffer, '#', sizeof(w_buffer));
+    len = MultiByteToWideChar(CP_UTF7, 0, example_14_utf7, -1, w_buffer, 1);
+    ok(len == 0 && w_buffer[0] == 0x4F60 && w_buffer[1] == 0x2323, "len=%i dst=%s\n", len, wine_dbgstr_w(w_buffer));
+}
+
 static void test_undefined_byte_char(void)
 {
     static const struct tag_testset {
@@ -618,6 +835,8 @@ START_TEST(codepage)
     test_string_conversion(NULL);
     test_string_conversion(&bUsedDefaultChar);
 
+    test_utf7_string_conversion();
+
     test_undefined_byte_char();
     test_threadcp();
 }
-- 
2.1.2