Remove some default unicode to code page mappings not existing in Windows

Dmitry Timoshkov dmitry at codeweavers.com
Thu Sep 21 06:38:46 CDT 2006


Hello,

A reason for differences in behaviour of WideCharToMultiByte between
Windows and Wine is in unicode to code page mappings.

Attached patch contains a test case for all characters currently included
in libs/wine/defaults, and requires regeneration of all the codepage files
in libs/wine.

Changelog:
    Remove some default unicode to code page mappings not existing
    in Windows.

diff -up cvs/hq/wine/libs/wine/cpmap.pl wine/libs/wine/cpmap.pl
--- cvs/hq/wine/libs/wine/cpmap.pl	2006-07-12 20:45:45.000000000 +0900
+++ wine/libs/wine/cpmap.pl	2006-09-21 20:18:13.000000000 +0900
@@ -311,14 +311,13 @@ sub READ_DEFAULTS
                          $1 eq "sub" ||
                          $1 eq "wide" ||
                          $1 eq "narrow" ||
-                         $1 eq "compat" ||
                          $1 eq "small");
             $dst = hex $2;
         }
         elsif ($decomp =~ /^<compat>\s+0020\s+([0-9a-fA-F]+)/)
         {
             # decomposition "<compat> 0020 1234" -> combining accent
-            $dst = hex $1;
+            next;
         }
         elsif ($decomp =~ /^([0-9a-fA-F]+)/)
         {
@@ -332,7 +331,7 @@ sub READ_DEFAULTS
                 push @compose_table, [ hex $1, hex $2, $src ];
             }
             elsif ($decomp =~ /^(<[a-z]+>\s)*([0-9a-fA-F]+)$/ &&
-                   (($src >= 0xf900 && $src < 0xfb00) || ($src >= 0xfe30 && $src < 0xfffd)))
+                   ($src >= 0xf900 && $src < 0xfb00))
             {
                 # Single char decomposition in the compatibility range
                 $compatmap_table[$src] = hex $2;
diff -up cvs/hq/wine/libs/wine/defaults wine/libs/wine/defaults
--- cvs/hq/wine/libs/wine/defaults	2006-07-12 20:45:45.000000000 +0900
+++ wine/libs/wine/defaults	2006-09-21 19:55:31.000000000 +0900
@@ -63,7 +63,6 @@
 0166           'T'  # LATIN CAPITAL LETTER T WITH STROKE
 0167           't'  # LATIN SMALL LETTER T WITH STROKE
 0180           'b'  # LATIN SMALL LETTER B WITH STROKE
-0190           'E'  # LATIN CAPITAL LETTER OPEN E
 0191           'F'  # LATIN CAPITAL LETTER F WITH HOOK
 0192           'f'  # LATIN SMALL LETTER F WITH HOOK
 0197           'I'  # LATIN CAPITAL LETTER I WITH STROKE
@@ -72,20 +71,6 @@
 01ab           't'  # LATIN SMALL LETTER T WITH PALATAL HOOK
 01ae           'T'  # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK
 01b6           'z'  # LATIN SMALL LETTER Z WITH STROKE
-0391 	       'A'  # GREEK CAPITAL LETTER ALPHA
-0392 	       'B'  # GREEK CAPITAL LETTER BETA
-0395 	       'E'  # GREEK CAPITAL LETTER EPSILON
-0396 	       'Z'  # GREEK CAPITAL LETTER ZETA
-0397 	       'H'  # GREEK CAPITAL LETTER ETA
-0399 	       'I'  # GREEK CAPITAL LETTER IOTA
-039a 	       'K'  # GREEK CAPITAL LETTER KAPPA
-039c 	       'M'  # GREEK CAPITAL LETTER MU
-039d 	       'N'  # GREEK CAPITAL LETTER NU
-039f 	       'O'  # GREEK CAPITAL LETTER OMICRON
-03a1 	       'P'  # GREEK CAPITAL LETTER RHO
-03a4 	       'T'  # GREEK CAPITAL LETTER TAU
-03a5 	       'Y'  # GREEK CAPITAL LETTER UPSILON
-03a7 	       'X'  # GREEK CAPITAL LETTER CHI
 01e4 	       'G'  # LATIN CAPITAL LETTER G WITH STROKE
 01e5 	       'g'  # LATIN SMALL LETTER G WITH STROKE
 0261 	       'g'  # LATIN SMALL LETTER SCRIPT G
@@ -98,10 +83,9 @@
 00af,02c9,0304           2014 # MACRON -> EM DASH
 00b4,02b9,02ca,0301,2032 '''  # ACUTE ACCENT, PRIME
 00b8,0327                ','  # CEDILLA
-02ba,02dd,030b,2033      '"'  # DOUBLE ACUTE ACCENT, DOUBLE PRIME
+02ba                     '"'  # DOUBLE ACUTE ACCENT, DOUBLE PRIME
 02c4,2303                '^'  # UP ARROWHEAD
 02c6,0302                '^'  # CIRCUMFLEX ACCENT
-02c7,030c                'v'  # COMBINING CARON -> CARON
 02c8                     '''  # MODIFIER LETTER VERTICAL LINE
 02cb,0300                '`'  # GRAVE ACCENT
 02cd,0331,0332           '_'  # MODIFIER LETTER LOW MACRON, COMBINING MACRON BELOW, COMBINING LOW LINE
@@ -110,9 +94,8 @@
 02da,030a,2070,2218      00b0 # RING ABOVE, SUPERSCRIPT ZERO, RING OPERATOR -> DEGREE SIGN
 02db,0328                none # OGONEK
 02dc,0303                '~'  # SMALL TILDE
-0305,203e                00af # OVERLINE -> MACRON
+0305                     00af # OVERLINE -> MACRON
 030e                     '"'  # COMBINING DOUBLE VERTICAL LINE ABOVE
-0333                     '_'  # COMBINING DOUBLE LOW LINE
 
 # mathematical symbols
 
@@ -121,12 +104,11 @@
 2216      '\'  # SET MINUS
 2217      '*'  # ASTERISK OPERATOR
 221a      'V'  # SQUARE ROOT
-221f      'L'  # RIGHT ANGLE
 2223      '|'  # DIVIDES
 2229      'n'  # INTERSECTION
 2236      ':'  # RATIO
 2248      02DC # ALMOST EQUAL TO -> SMALL TILDE
-2261,2263 '='  # IDENTICAL TO, STRICTLY EQUIVALENT TO
+2261      '='  # IDENTICAL TO, STRICTLY EQUIVALENT TO
 226a      00ab # MUCH LESS-THAN -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
 226b      00bb # MUCH GREATER-THAN -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
 22c5      00b7 # DOT OPERATOR -> MIDDLE DOT
@@ -139,50 +121,34 @@
 00bb,300b           '>'  # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK, RIGHT DOUBLE ANGLE BRACKET
 01c0                2502 # LATIN LETTER DENTAL CLICK -> BOX DRAWINGS LIGHT VERTICAL
 01c3                '!'  # LATIN LETTER RETROFLEX CLICK
-02bb                2018 # MODIFIER LETTER TURNED COMMA -> LEFT SINGLE QUOTATION MARK
 02bc,2019           '''  # RIGHT SINGLE QUOTATION MARK
 2010,2011,2212 	    '-'  # HYPHEN, MINUS SIGN
-2013,2014,2015 	    '-'  # EN DASH, EM DASH, HORIZONTAL BAR, MINUS SIGN
-2018,201b,2035      '`'  # LEFT SINGLE QUOTATION MARK, SINGLE HIGH-REVERSED-9 QUOTATION MARK, REVERSED PRIME
+2013,2014           '-'  # EN DASH, EM DASH, HORIZONTAL BAR, MINUS SIGN
+2018,2035           '`'  # LEFT SINGLE QUOTATION MARK, SINGLE HIGH-REVERSED-9 QUOTATION MARK, REVERSED PRIME
 201a                ','  # SINGLE LOW-9 QUOTATION MARK
-201c,301d           '"'  # LEFT DOUBLE QUOTATION MARK, REVERSED DOUBLE PRIME QUOTATION MARK
-201d,301e           '"'  # RIGHT DOUBLE QUOTATION MARK, DOUBLE PRIME QUOTATION MARK
-201e,301f           ','  # LOW DOUBLE PRIME QUOTATION MARK, DOUBLE LOW-9 QUOTATION MARK
+201c                '"'  # LEFT DOUBLE QUOTATION MARK, REVERSED DOUBLE PRIME QUOTATION MARK
+201d                '"'  # RIGHT DOUBLE QUOTATION MARK, DOUBLE PRIME QUOTATION MARK
+201e                ','  # LOW DOUBLE PRIME QUOTATION MARK, DOUBLE LOW-9 QUOTATION MARK
 2022,2219      	    none # BULLET, BULLET OPERATOR
 2039,3008      	    '<'  # SINGLE LEFT-POINTING ANGLE QUOTATION MARK, LEFT ANGLE BRACKET
 203a,3009      	    '>'  # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK, RIGHT ANGLE BRACKET
-203c                '!'  # DOUBLE EXCLAMATION MARK
-2190 		    2039 # LEFTWARDS ARROW -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
-2191 		    02c6 # UPWARDS ARROW -> MODIFIER LETTER CIRCUMFLEX ACCENT
-2192 		    203a # RIGHTWARDS ARROW -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
-2193 		    02c7 # DOWNWARDS ARROW -> CARON
-2194                '-'  # LEFT RIGHT ARROW
-2195                '|'  # UP DOWN ARROW
 223c           	    '~'  # TILDE
 301a                '['  # LEFT WHITE SQUARE BRACKET
 301b                ']'  # RIGHT WHITE SQUARE BRACKET
-fe49,fe4a,fe4b,fe4c 203e # DOUBLE WAVY OVERLINE -> OVERLINE
-fe4d,fe4e,fe4f      '_'  # DASHED/CENTRELINE/WAVY LOW LINE
 
 # box drawing chars
 
-2500,2501 	    '-'  # BOX DRAWINGS LIGHT/HEAVY HORIZONTAL
-2502,2503 	    '|'  # BOX DRAWINGS LIGHT/HEAVY VERTICAL
-2504,2505 	    '-'  # BOX DRAWINGS LIGHT/HEAVY TRIPLE DASH HORIZONTAL
-2506,2507 	    00a6 # BOX DRAWINGS LIGHT/HEAVY TRIPLE DASH VERTICAL -> BROKEN BAR
-2508,2509 	    '-'  # BOX DRAWINGS LIGHT/HEAVY QUADRUPLE DASH HORIZONTAL
-250a,250b 	    00a6 # BOX DRAWINGS LIGHT/HEAVY QUADRUPLE DASH VERTICAL -> BROKEN BAR
-250c,250d,250e,250f '+'  # BOX DRAWINGS LIGHT/HEAVY DOWN AND RIGHT
-2510,2511,2512,2513 00ac # BOX DRAWINGS LIGHT/HEAVY DOWN AND LEFT -> NOT SIGN
-2514,2515,2516,2517 'L'  # BOX DRAWINGS LIGHT/HEAVY UP AND RIGHT
-2518,2519,251a,251b '+'  # BOX DRAWINGS LIGHT/HEAVY UP AND LEFT
-251c,251d,251e,251f,2520,2521,2522,2523 '+'  # BOX DRAWINGS LIGHT/HEAVY VERTICAL AND RIGHT
-2524,2525,2526,2527,2528,2529,252a,252b '+'  # BOX DRAWINGS LIGHT/HEAVY VERTICAL AND LEFT
-252c,252d,252e,252f,2530,2531,2532,2533 'T'  # BOX DRAWINGS LIGHT/HEAVY DOWN AND HORIZONTAL
-2534,2535,2536,2537,2538,2539,253a,253b '+'  # BOX DRAWINGS LIGHT/HEAVY UP AND HORIZONTAL
-253c,253d,253e,253f,2540,2541,2542,2543,2544,2545,2546,2547,2548,2549,254a,254b '+'   # BOX DRAWINGS LIGHT/HEAVY VERTICAL AND HORIZONTAL
-254c,254d      	    '-'  # BOX DRAWINGS LIGHT/HEAVY DOUBLE DASH HORIZONTAL
-254e,254f      	    00a6 # BOX DRAWINGS LIGHT/HEAVY DOUBLE DASH VERTICAL -> BROKEN BAR
+2500                '-'  # BOX DRAWINGS LIGHT/HEAVY HORIZONTAL
+2502                '|'  # BOX DRAWINGS LIGHT/HEAVY VERTICAL
+250c                '+'  # BOX DRAWINGS LIGHT/HEAVY DOWN AND RIGHT
+2510                00ac # BOX DRAWINGS LIGHT/HEAVY DOWN AND LEFT -> NOT SIGN
+2514                'L'  # BOX DRAWINGS LIGHT/HEAVY UP AND RIGHT
+2518                '+'  # BOX DRAWINGS LIGHT/HEAVY UP AND LEFT
+251c                '+'  # BOX DRAWINGS LIGHT/HEAVY VERTICAL AND RIGHT
+2524                '+'  # BOX DRAWINGS LIGHT/HEAVY VERTICAL AND LEFT
+252c                'T'  # BOX DRAWINGS LIGHT/HEAVY DOWN AND HORIZONTAL
+2534                '+'  # BOX DRAWINGS LIGHT/HEAVY UP AND HORIZONTAL
+253c                '+'   # BOX DRAWINGS LIGHT/HEAVY VERTICAL AND HORIZONTAL
 2550           	    '='  # BOX DRAWINGS DOUBLE HORIZONTAL
 2551           	    '|'  # BOX DRAWINGS DOUBLE VERTICAL
 2552,2553,2554 	    '+'  # BOX DRAWINGS DOWN AND RIGHT
@@ -194,9 +160,3 @@ fe4d,fe4e,fe4f      '_'  # DASHED/CENTRE
 2564,2565,2566 	    'T'  # BOX DRAWINGS DOWN AND HORIZONTAL
 2567,2568,2569 	    '+'  # BOX DRAWINGS UP AND HORIZONTAL
 256a,256b,256c 	    '+'  # BOX DRAWINGS VERTICAL AND HORIZONTAL
-2571           	    '/'  # BOX DRAWINGS LIGHT DIAGONAL UPPER RIGHT TO LOWER LEFT
-2572           	    '\'  # BOX DRAWINGS LIGHT DIAGONAL UPPER LEFT TO LOWER RIGHT
-2573           	    'X'  # BOX DRAWINGS LIGHT DIAGONAL CROSS
-2574,2576,2578,257a,257c,257e '-'  # BOX DRAWINGS LIGHT/HEAVY LEFT/RIGHT
-2575,2577,2579,257b,257d,257f '|'  # BOX DRAWINGS LIGHT/HEAVY UP/DOWN
-25cb,25ef           'O'  # WHITE/LARGE CIRCLE
diff -up cvs/hq/wine/dlls/kernel32/tests/codepage.c wine/dlls/kernel32/tests/codepage.c
--- cvs/hq/wine/dlls/kernel32/tests/codepage.c	2006-09-12 21:31:37.000000000 +0900
+++ wine/dlls/kernel32/tests/codepage.c	2006-09-21 20:26:12.000000000 +0900
@@ -146,10 +146,71 @@ static void test_overlapped_buffers(void
     ok(!memcmp(buf, strA, sizeof(strA)), "conversion failed: %s\n", buf);
 }
 
+static void test_cp1252_WtoA_conversions(void)
+{
+    /* At one point we had all these characters mapped to an approximate
+     * visual equivalent, therefore the test.
+     */
+    static const WCHAR to_default[] = {
+        0x0190,0x0391,0x0392,0x0395,0x0396,0x0397,0x0399,0x039a,0x039c,0x039d,
+        0x039f,0x03a1,0x03a4,0x03a5,0x03a7,0x02dd,0x030b,0x2033,0x02c7,0x030c,
+        0x02d8,0x0306,0x02d9,0x0307,0x02db,0x0328,0x203e,0x0333,0x221f,0x2263,
+        0x02bb,0x2015,0x201b,0x301d,0x301e,0x301f,0x203c,0x2190,0x2191,0x2192,
+        0x2193,0x2194,0x2195,0xfe49,0xfe4a,0xfe4b,0xfe4c,0xfe4d,0xfe4e,0xfe4f,
+        0x2501,0x2503,0x2504,0x2505,0x2506,0x2507,0x2508,0x2509,0x250a,0x250b,
+        0x250d,0x250e,0x250f,0x2511,0x2512,0x2513,0x2515,0x2516,0x2517,0x2519,
+        0x251a,0x251b,0x251d,0x251e,0x251f,0x2520,0x2521,0x2522,0x2523,0x2525,
+        0x2526,0x2527,0x2528,0x2529,0x252a,0x252b,0x252d,0x252e,0x252f,0x2530,
+        0x2531,0x2532,0x2533,0x2535,0x2536,0x2537,0x2538,0x2539,0x253a,0x253b,
+        0x253d,0x253e,0x253f,0x2540,0x2541,0x2542,0x2543,0x2544,0x2545,0x2546,
+        0x2547,0x2548,0x2549,0x254a,0x254b,0x254c,0x254d,0x254e,0x254f,0x2571,
+        0x2572,0x2573,0x2574,0x2576,0x2578,0x257a,0x257c,0x257e,0x2575,0x2577,
+        0x2579,0x257b,0x257d,0x257f,0x25cb,0x25ef };
+    static const WCHAR not_to_default[] = {
+        0x00a2,0x20a1,0x00a3,0x20a4,0x00a5,0x00a6,0x00a9,0x00ae,0x00b5,0x03bc,
+        0x00c6,0x00d0,0x0110,0x0189,0x00d7,0x00d8,0x2205,0x00df,0x03b2,0x00e6,
+        0x00f0,0x00f8,0x0111,0x0126,0x0127,0x0131,0x0141,0x0142,0x0152,0x0153,
+        0x0166,0x0167,0x0180,0x0191,0x0192,0x0197,0x019a,0x019f,0x01ab,0x01ae,
+        0x01b6,0x01e4,0x01e5,0x0261,0x2118,0x212e,0x00a8,0x0308,0x00af,0x02c9,
+        0x0304,0x00b4,0x02b9,0x02ca,0x0301,0x2032,0x00b8,0x0327,0x02ba,0x02c4,
+        0x2303,0x02c6,0x0302,0x02c8,0x02cb,0x0300,0x02cd,0x0331,0x0332,0x02da,
+        0x030a,0x2070,0x2218,0x02dc,0x0303,0x0305,0x030e,0x00b1,0x2213,0x2044,
+        0x2215,0x2216,0x2217,0x221a,0x2223,0x2229,0x2236,0x2248,0x2261,0x226a,
+        0x226b,0x22c5,0x00a1,0x00ab,0x300a,0x00ad,0x00bb,0x300b,0x01c0,0x01c3,
+        0x02bc,0x2019,0x2010,0x2011,0x2212,0x2013,0x2014,0x2018,0x2035,0x201a,
+        0x201c,0x201d,0x201e,0x2022,0x2219,0x2039,0x3008,0x203a,0x3009,0x223c,
+        0x301a,0x301b,0x2500,0x2502,0x250c,0x2510,0x2514,0x2518,0x251c,0x2524,
+        0x252c,0x2534,0x253c,0x2550,0x2551,0x2552,0x2553,0x2554,0x2555,0x2556,
+        0x2557,0x2558,0x2559,0x255a,0x255b,0x255c,0x255d,0x255e,0x255f,0x2560,
+        0x2561,0x2562,0x2563,0x2564,0x2565,0x2566,0x2567,0x2568,0x2569,0x256a,
+        0x256b,0x256c };
+    INT i, ret;
+    char result;
+    CPINFO cpi;
+
+    ok(GetCPInfo(1252, &cpi), "Can't find codepage 1252 info\n");
+    ok(cpi.DefaultChar[0] == 0x3f, "cp1252 expected default char 0x3f, got %02x\n", cpi.DefaultChar[0]);
+
+    for (i = 0; i < sizeof(to_default)/sizeof(to_default[0]); i++)
+    {
+        ret = WideCharToMultiByte(1252, 0, &to_default[i], 1, &result, 1, NULL, NULL);
+        ok(ret == 1, "char %04x: expected ret 1, got %d\n", to_default[i], ret);
+        ok(result == 0x3f, "char %04x: expected 0x3f, got %02x\n", to_default[i], (BYTE)result);
+    }
+
+    for (i = 0; i < sizeof(not_to_default)/sizeof(not_to_default[0]); i++)
+    {
+        ret = WideCharToMultiByte(1252, 0, &not_to_default[i], 1, &result, 1, NULL, NULL);
+        ok(ret == 1, "char %04x: expected ret 1, got %d\n", not_to_default[i], ret);
+        ok(result != 0x3f, "char %04x: not expected 0x3f\n", not_to_default[i]);
+    }
+}
+
 START_TEST(codepage)
 {
     test_destination_buffer();
     test_null_source();
     test_negative_source_length();
     test_overlapped_buffers();
+    test_cp1252_WtoA_conversions();
 }





More information about the wine-patches mailing list