Remove some default unicode to code page mappings not existing in
Windows
Dmitry Timoshkov
dmitry at codeweavers.com
Thu Sep 21 06:38:46 CDT 2006
Hello,
A reason for differences in behaviour of WideCharToMultiByte between
Windows and Wine is in unicode to code page mappings.
Attached patch contains a test case for all characters currently included
in libs/wine/defaults, and requires regeneration of all the codepage files
in libs/wine.
Changelog:
Remove some default unicode to code page mappings not existing
in Windows.
diff -up cvs/hq/wine/libs/wine/cpmap.pl wine/libs/wine/cpmap.pl
--- cvs/hq/wine/libs/wine/cpmap.pl 2006-07-12 20:45:45.000000000 +0900
+++ wine/libs/wine/cpmap.pl 2006-09-21 20:18:13.000000000 +0900
@@ -311,14 +311,13 @@ sub READ_DEFAULTS
$1 eq "sub" ||
$1 eq "wide" ||
$1 eq "narrow" ||
- $1 eq "compat" ||
$1 eq "small");
$dst = hex $2;
}
elsif ($decomp =~ /^<compat>\s+0020\s+([0-9a-fA-F]+)/)
{
# decomposition "<compat> 0020 1234" -> combining accent
- $dst = hex $1;
+ next;
}
elsif ($decomp =~ /^([0-9a-fA-F]+)/)
{
@@ -332,7 +331,7 @@ sub READ_DEFAULTS
push @compose_table, [ hex $1, hex $2, $src ];
}
elsif ($decomp =~ /^(<[a-z]+>\s)*([0-9a-fA-F]+)$/ &&
- (($src >= 0xf900 && $src < 0xfb00) || ($src >= 0xfe30 && $src < 0xfffd)))
+ ($src >= 0xf900 && $src < 0xfb00))
{
# Single char decomposition in the compatibility range
$compatmap_table[$src] = hex $2;
diff -up cvs/hq/wine/libs/wine/defaults wine/libs/wine/defaults
--- cvs/hq/wine/libs/wine/defaults 2006-07-12 20:45:45.000000000 +0900
+++ wine/libs/wine/defaults 2006-09-21 19:55:31.000000000 +0900
@@ -63,7 +63,6 @@
0166 'T' # LATIN CAPITAL LETTER T WITH STROKE
0167 't' # LATIN SMALL LETTER T WITH STROKE
0180 'b' # LATIN SMALL LETTER B WITH STROKE
-0190 'E' # LATIN CAPITAL LETTER OPEN E
0191 'F' # LATIN CAPITAL LETTER F WITH HOOK
0192 'f' # LATIN SMALL LETTER F WITH HOOK
0197 'I' # LATIN CAPITAL LETTER I WITH STROKE
@@ -72,20 +71,6 @@
01ab 't' # LATIN SMALL LETTER T WITH PALATAL HOOK
01ae 'T' # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK
01b6 'z' # LATIN SMALL LETTER Z WITH STROKE
-0391 'A' # GREEK CAPITAL LETTER ALPHA
-0392 'B' # GREEK CAPITAL LETTER BETA
-0395 'E' # GREEK CAPITAL LETTER EPSILON
-0396 'Z' # GREEK CAPITAL LETTER ZETA
-0397 'H' # GREEK CAPITAL LETTER ETA
-0399 'I' # GREEK CAPITAL LETTER IOTA
-039a 'K' # GREEK CAPITAL LETTER KAPPA
-039c 'M' # GREEK CAPITAL LETTER MU
-039d 'N' # GREEK CAPITAL LETTER NU
-039f 'O' # GREEK CAPITAL LETTER OMICRON
-03a1 'P' # GREEK CAPITAL LETTER RHO
-03a4 'T' # GREEK CAPITAL LETTER TAU
-03a5 'Y' # GREEK CAPITAL LETTER UPSILON
-03a7 'X' # GREEK CAPITAL LETTER CHI
01e4 'G' # LATIN CAPITAL LETTER G WITH STROKE
01e5 'g' # LATIN SMALL LETTER G WITH STROKE
0261 'g' # LATIN SMALL LETTER SCRIPT G
@@ -98,10 +83,9 @@
00af,02c9,0304 2014 # MACRON -> EM DASH
00b4,02b9,02ca,0301,2032 ''' # ACUTE ACCENT, PRIME
00b8,0327 ',' # CEDILLA
-02ba,02dd,030b,2033 '"' # DOUBLE ACUTE ACCENT, DOUBLE PRIME
+02ba '"' # DOUBLE ACUTE ACCENT, DOUBLE PRIME
02c4,2303 '^' # UP ARROWHEAD
02c6,0302 '^' # CIRCUMFLEX ACCENT
-02c7,030c 'v' # COMBINING CARON -> CARON
02c8 ''' # MODIFIER LETTER VERTICAL LINE
02cb,0300 '`' # GRAVE ACCENT
02cd,0331,0332 '_' # MODIFIER LETTER LOW MACRON, COMBINING MACRON BELOW, COMBINING LOW LINE
@@ -110,9 +94,8 @@
02da,030a,2070,2218 00b0 # RING ABOVE, SUPERSCRIPT ZERO, RING OPERATOR -> DEGREE SIGN
02db,0328 none # OGONEK
02dc,0303 '~' # SMALL TILDE
-0305,203e 00af # OVERLINE -> MACRON
+0305 00af # OVERLINE -> MACRON
030e '"' # COMBINING DOUBLE VERTICAL LINE ABOVE
-0333 '_' # COMBINING DOUBLE LOW LINE
# mathematical symbols
@@ -121,12 +104,11 @@
2216 '\' # SET MINUS
2217 '*' # ASTERISK OPERATOR
221a 'V' # SQUARE ROOT
-221f 'L' # RIGHT ANGLE
2223 '|' # DIVIDES
2229 'n' # INTERSECTION
2236 ':' # RATIO
2248 02DC # ALMOST EQUAL TO -> SMALL TILDE
-2261,2263 '=' # IDENTICAL TO, STRICTLY EQUIVALENT TO
+2261 '=' # IDENTICAL TO, STRICTLY EQUIVALENT TO
226a 00ab # MUCH LESS-THAN -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
226b 00bb # MUCH GREATER-THAN -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
22c5 00b7 # DOT OPERATOR -> MIDDLE DOT
@@ -139,50 +121,34 @@
00bb,300b '>' # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK, RIGHT DOUBLE ANGLE BRACKET
01c0 2502 # LATIN LETTER DENTAL CLICK -> BOX DRAWINGS LIGHT VERTICAL
01c3 '!' # LATIN LETTER RETROFLEX CLICK
-02bb 2018 # MODIFIER LETTER TURNED COMMA -> LEFT SINGLE QUOTATION MARK
02bc,2019 ''' # RIGHT SINGLE QUOTATION MARK
2010,2011,2212 '-' # HYPHEN, MINUS SIGN
-2013,2014,2015 '-' # EN DASH, EM DASH, HORIZONTAL BAR, MINUS SIGN
-2018,201b,2035 '`' # LEFT SINGLE QUOTATION MARK, SINGLE HIGH-REVERSED-9 QUOTATION MARK, REVERSED PRIME
+2013,2014 '-' # EN DASH, EM DASH, HORIZONTAL BAR, MINUS SIGN
+2018,2035 '`' # LEFT SINGLE QUOTATION MARK, SINGLE HIGH-REVERSED-9 QUOTATION MARK, REVERSED PRIME
201a ',' # SINGLE LOW-9 QUOTATION MARK
-201c,301d '"' # LEFT DOUBLE QUOTATION MARK, REVERSED DOUBLE PRIME QUOTATION MARK
-201d,301e '"' # RIGHT DOUBLE QUOTATION MARK, DOUBLE PRIME QUOTATION MARK
-201e,301f ',' # LOW DOUBLE PRIME QUOTATION MARK, DOUBLE LOW-9 QUOTATION MARK
+201c '"' # LEFT DOUBLE QUOTATION MARK, REVERSED DOUBLE PRIME QUOTATION MARK
+201d '"' # RIGHT DOUBLE QUOTATION MARK, DOUBLE PRIME QUOTATION MARK
+201e ',' # LOW DOUBLE PRIME QUOTATION MARK, DOUBLE LOW-9 QUOTATION MARK
2022,2219 none # BULLET, BULLET OPERATOR
2039,3008 '<' # SINGLE LEFT-POINTING ANGLE QUOTATION MARK, LEFT ANGLE BRACKET
203a,3009 '>' # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK, RIGHT ANGLE BRACKET
-203c '!' # DOUBLE EXCLAMATION MARK
-2190 2039 # LEFTWARDS ARROW -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
-2191 02c6 # UPWARDS ARROW -> MODIFIER LETTER CIRCUMFLEX ACCENT
-2192 203a # RIGHTWARDS ARROW -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
-2193 02c7 # DOWNWARDS ARROW -> CARON
-2194 '-' # LEFT RIGHT ARROW
-2195 '|' # UP DOWN ARROW
223c '~' # TILDE
301a '[' # LEFT WHITE SQUARE BRACKET
301b ']' # RIGHT WHITE SQUARE BRACKET
-fe49,fe4a,fe4b,fe4c 203e # DOUBLE WAVY OVERLINE -> OVERLINE
-fe4d,fe4e,fe4f '_' # DASHED/CENTRELINE/WAVY LOW LINE
# box drawing chars
-2500,2501 '-' # BOX DRAWINGS LIGHT/HEAVY HORIZONTAL
-2502,2503 '|' # BOX DRAWINGS LIGHT/HEAVY VERTICAL
-2504,2505 '-' # BOX DRAWINGS LIGHT/HEAVY TRIPLE DASH HORIZONTAL
-2506,2507 00a6 # BOX DRAWINGS LIGHT/HEAVY TRIPLE DASH VERTICAL -> BROKEN BAR
-2508,2509 '-' # BOX DRAWINGS LIGHT/HEAVY QUADRUPLE DASH HORIZONTAL
-250a,250b 00a6 # BOX DRAWINGS LIGHT/HEAVY QUADRUPLE DASH VERTICAL -> BROKEN BAR
-250c,250d,250e,250f '+' # BOX DRAWINGS LIGHT/HEAVY DOWN AND RIGHT
-2510,2511,2512,2513 00ac # BOX DRAWINGS LIGHT/HEAVY DOWN AND LEFT -> NOT SIGN
-2514,2515,2516,2517 'L' # BOX DRAWINGS LIGHT/HEAVY UP AND RIGHT
-2518,2519,251a,251b '+' # BOX DRAWINGS LIGHT/HEAVY UP AND LEFT
-251c,251d,251e,251f,2520,2521,2522,2523 '+' # BOX DRAWINGS LIGHT/HEAVY VERTICAL AND RIGHT
-2524,2525,2526,2527,2528,2529,252a,252b '+' # BOX DRAWINGS LIGHT/HEAVY VERTICAL AND LEFT
-252c,252d,252e,252f,2530,2531,2532,2533 'T' # BOX DRAWINGS LIGHT/HEAVY DOWN AND HORIZONTAL
-2534,2535,2536,2537,2538,2539,253a,253b '+' # BOX DRAWINGS LIGHT/HEAVY UP AND HORIZONTAL
-253c,253d,253e,253f,2540,2541,2542,2543,2544,2545,2546,2547,2548,2549,254a,254b '+' # BOX DRAWINGS LIGHT/HEAVY VERTICAL AND HORIZONTAL
-254c,254d '-' # BOX DRAWINGS LIGHT/HEAVY DOUBLE DASH HORIZONTAL
-254e,254f 00a6 # BOX DRAWINGS LIGHT/HEAVY DOUBLE DASH VERTICAL -> BROKEN BAR
+2500 '-' # BOX DRAWINGS LIGHT/HEAVY HORIZONTAL
+2502 '|' # BOX DRAWINGS LIGHT/HEAVY VERTICAL
+250c '+' # BOX DRAWINGS LIGHT/HEAVY DOWN AND RIGHT
+2510 00ac # BOX DRAWINGS LIGHT/HEAVY DOWN AND LEFT -> NOT SIGN
+2514 'L' # BOX DRAWINGS LIGHT/HEAVY UP AND RIGHT
+2518 '+' # BOX DRAWINGS LIGHT/HEAVY UP AND LEFT
+251c '+' # BOX DRAWINGS LIGHT/HEAVY VERTICAL AND RIGHT
+2524 '+' # BOX DRAWINGS LIGHT/HEAVY VERTICAL AND LEFT
+252c 'T' # BOX DRAWINGS LIGHT/HEAVY DOWN AND HORIZONTAL
+2534 '+' # BOX DRAWINGS LIGHT/HEAVY UP AND HORIZONTAL
+253c '+' # BOX DRAWINGS LIGHT/HEAVY VERTICAL AND HORIZONTAL
2550 '=' # BOX DRAWINGS DOUBLE HORIZONTAL
2551 '|' # BOX DRAWINGS DOUBLE VERTICAL
2552,2553,2554 '+' # BOX DRAWINGS DOWN AND RIGHT
@@ -194,9 +160,3 @@ fe4d,fe4e,fe4f '_' # DASHED/CENTRE
2564,2565,2566 'T' # BOX DRAWINGS DOWN AND HORIZONTAL
2567,2568,2569 '+' # BOX DRAWINGS UP AND HORIZONTAL
256a,256b,256c '+' # BOX DRAWINGS VERTICAL AND HORIZONTAL
-2571 '/' # BOX DRAWINGS LIGHT DIAGONAL UPPER RIGHT TO LOWER LEFT
-2572 '\' # BOX DRAWINGS LIGHT DIAGONAL UPPER LEFT TO LOWER RIGHT
-2573 'X' # BOX DRAWINGS LIGHT DIAGONAL CROSS
-2574,2576,2578,257a,257c,257e '-' # BOX DRAWINGS LIGHT/HEAVY LEFT/RIGHT
-2575,2577,2579,257b,257d,257f '|' # BOX DRAWINGS LIGHT/HEAVY UP/DOWN
-25cb,25ef 'O' # WHITE/LARGE CIRCLE
diff -up cvs/hq/wine/dlls/kernel32/tests/codepage.c wine/dlls/kernel32/tests/codepage.c
--- cvs/hq/wine/dlls/kernel32/tests/codepage.c 2006-09-12 21:31:37.000000000 +0900
+++ wine/dlls/kernel32/tests/codepage.c 2006-09-21 20:26:12.000000000 +0900
@@ -146,10 +146,71 @@ static void test_overlapped_buffers(void
ok(!memcmp(buf, strA, sizeof(strA)), "conversion failed: %s\n", buf);
}
+static void test_cp1252_WtoA_conversions(void)
+{
+ /* At one point we had all these characters mapped to an approximate
+ * visual equivalent, therefore the test.
+ */
+ static const WCHAR to_default[] = {
+ 0x0190,0x0391,0x0392,0x0395,0x0396,0x0397,0x0399,0x039a,0x039c,0x039d,
+ 0x039f,0x03a1,0x03a4,0x03a5,0x03a7,0x02dd,0x030b,0x2033,0x02c7,0x030c,
+ 0x02d8,0x0306,0x02d9,0x0307,0x02db,0x0328,0x203e,0x0333,0x221f,0x2263,
+ 0x02bb,0x2015,0x201b,0x301d,0x301e,0x301f,0x203c,0x2190,0x2191,0x2192,
+ 0x2193,0x2194,0x2195,0xfe49,0xfe4a,0xfe4b,0xfe4c,0xfe4d,0xfe4e,0xfe4f,
+ 0x2501,0x2503,0x2504,0x2505,0x2506,0x2507,0x2508,0x2509,0x250a,0x250b,
+ 0x250d,0x250e,0x250f,0x2511,0x2512,0x2513,0x2515,0x2516,0x2517,0x2519,
+ 0x251a,0x251b,0x251d,0x251e,0x251f,0x2520,0x2521,0x2522,0x2523,0x2525,
+ 0x2526,0x2527,0x2528,0x2529,0x252a,0x252b,0x252d,0x252e,0x252f,0x2530,
+ 0x2531,0x2532,0x2533,0x2535,0x2536,0x2537,0x2538,0x2539,0x253a,0x253b,
+ 0x253d,0x253e,0x253f,0x2540,0x2541,0x2542,0x2543,0x2544,0x2545,0x2546,
+ 0x2547,0x2548,0x2549,0x254a,0x254b,0x254c,0x254d,0x254e,0x254f,0x2571,
+ 0x2572,0x2573,0x2574,0x2576,0x2578,0x257a,0x257c,0x257e,0x2575,0x2577,
+ 0x2579,0x257b,0x257d,0x257f,0x25cb,0x25ef };
+ static const WCHAR not_to_default[] = {
+ 0x00a2,0x20a1,0x00a3,0x20a4,0x00a5,0x00a6,0x00a9,0x00ae,0x00b5,0x03bc,
+ 0x00c6,0x00d0,0x0110,0x0189,0x00d7,0x00d8,0x2205,0x00df,0x03b2,0x00e6,
+ 0x00f0,0x00f8,0x0111,0x0126,0x0127,0x0131,0x0141,0x0142,0x0152,0x0153,
+ 0x0166,0x0167,0x0180,0x0191,0x0192,0x0197,0x019a,0x019f,0x01ab,0x01ae,
+ 0x01b6,0x01e4,0x01e5,0x0261,0x2118,0x212e,0x00a8,0x0308,0x00af,0x02c9,
+ 0x0304,0x00b4,0x02b9,0x02ca,0x0301,0x2032,0x00b8,0x0327,0x02ba,0x02c4,
+ 0x2303,0x02c6,0x0302,0x02c8,0x02cb,0x0300,0x02cd,0x0331,0x0332,0x02da,
+ 0x030a,0x2070,0x2218,0x02dc,0x0303,0x0305,0x030e,0x00b1,0x2213,0x2044,
+ 0x2215,0x2216,0x2217,0x221a,0x2223,0x2229,0x2236,0x2248,0x2261,0x226a,
+ 0x226b,0x22c5,0x00a1,0x00ab,0x300a,0x00ad,0x00bb,0x300b,0x01c0,0x01c3,
+ 0x02bc,0x2019,0x2010,0x2011,0x2212,0x2013,0x2014,0x2018,0x2035,0x201a,
+ 0x201c,0x201d,0x201e,0x2022,0x2219,0x2039,0x3008,0x203a,0x3009,0x223c,
+ 0x301a,0x301b,0x2500,0x2502,0x250c,0x2510,0x2514,0x2518,0x251c,0x2524,
+ 0x252c,0x2534,0x253c,0x2550,0x2551,0x2552,0x2553,0x2554,0x2555,0x2556,
+ 0x2557,0x2558,0x2559,0x255a,0x255b,0x255c,0x255d,0x255e,0x255f,0x2560,
+ 0x2561,0x2562,0x2563,0x2564,0x2565,0x2566,0x2567,0x2568,0x2569,0x256a,
+ 0x256b,0x256c };
+ INT i, ret;
+ char result;
+ CPINFO cpi;
+
+ ok(GetCPInfo(1252, &cpi), "Can't find codepage 1252 info\n");
+ ok(cpi.DefaultChar[0] == 0x3f, "cp1252 expected default char 0x3f, got %02x\n", cpi.DefaultChar[0]);
+
+ for (i = 0; i < sizeof(to_default)/sizeof(to_default[0]); i++)
+ {
+ ret = WideCharToMultiByte(1252, 0, &to_default[i], 1, &result, 1, NULL, NULL);
+ ok(ret == 1, "char %04x: expected ret 1, got %d\n", to_default[i], ret);
+ ok(result == 0x3f, "char %04x: expected 0x3f, got %02x\n", to_default[i], (BYTE)result);
+ }
+
+ for (i = 0; i < sizeof(not_to_default)/sizeof(not_to_default[0]); i++)
+ {
+ ret = WideCharToMultiByte(1252, 0, ¬_to_default[i], 1, &result, 1, NULL, NULL);
+ ok(ret == 1, "char %04x: expected ret 1, got %d\n", not_to_default[i], ret);
+ ok(result != 0x3f, "char %04x: not expected 0x3f\n", not_to_default[i]);
+ }
+}
+
START_TEST(codepage)
{
test_destination_buffer();
test_null_source();
test_negative_source_length();
test_overlapped_buffers();
+ test_cp1252_WtoA_conversions();
}
More information about the wine-patches
mailing list