msvcrt[1/5]: fill _mbctype table in _setmbcp (patch)
Mikolaj Zalewski
mikolajz at google.com
Mon Aug 20 12:14:29 CDT 2007
In the previous mail I forgot the patch
-------------- next part --------------
From 6424d6c2dcf0ced8730196019ff236a2f94efe0e Mon Sep 17 00:00:00 2001
From: =?utf-8?q?Miko=C5=82aj_Zalewski?= <mikolaj at zalewski.pl>
Date: Sun, 19 Aug 2007 20:19:37 -0700
Subject: [PATCH] msvcrt: fill _mbctype table in _setmbcp
---
dlls/msvcrt/locale.c | 51 ---------------
dlls/msvcrt/main.c | 2 +
dlls/msvcrt/mbcs.c | 146 ++++++++++++++++++++++++++++++++++++++++++++
dlls/msvcrt/tests/string.c | 101 +++++++++++++++++++++++++++++-
4 files changed, 246 insertions(+), 54 deletions(-)
diff --git a/dlls/msvcrt/locale.c b/dlls/msvcrt/locale.c
index 5e08873..15eef0a 100644
--- a/dlls/msvcrt/locale.c
+++ b/dlls/msvcrt/locale.c
@@ -60,7 +60,6 @@ extern WORD* MSVCRT__pctype;
/* mbctype data modified when the locale changes */
extern int MSVCRT___mb_cur_max;
-extern unsigned char MSVCRT_mbctype[257];
#define MSVCRT_LEADBYTE 0x8000
@@ -357,7 +356,6 @@ char* CDECL MSVCRT_setlocale(int category, const char* locale)
/* Restore C locale ctype info */
MSVCRT___mb_cur_max = 1;
memcpy(MSVCRT_current_ctype, MSVCRT__ctype, sizeof(MSVCRT__ctype));
- memset(MSVCRT_mbctype, 0, sizeof(MSVCRT_mbctype));
if (!lc_all) break;
case MSVCRT_LC_MONETARY:
if (!lc_all) break;
@@ -529,55 +527,6 @@ const char* CDECL _Strftime(char *out, unsigned int len, const char *fmt,
return "";
}
-/* FIXME: MBCP probably belongs in mbcs.c */
-
-/*********************************************************************
- * _setmbcp (MSVCRT.@)
- */
-int CDECL _setmbcp(int cp)
-{
- LOCK_LOCALE;
- if ( cp > _MB_CP_SBCS)
- {
- if( MSVCRT___lc_codepage != cp)
- /* FIXME: set ctype behaviour for this cp */
- MSVCRT___lc_codepage = cp;
- }
- else if(cp == _MB_CP_ANSI)
- {
- MSVCRT___lc_codepage = GetACP();
- }
- else if(cp == _MB_CP_OEM)
- {
- MSVCRT___lc_codepage = GetOEMCP();
- }
- else if(cp == _MB_CP_LOCALE)
- {
- GetLocaleInfoW( LOCALE_USER_DEFAULT, LOCALE_IDEFAULTANSICODEPAGE|LOCALE_RETURN_NUMBER,
- (WCHAR *)&MSVCRT___lc_codepage, sizeof(INT)/sizeof(WCHAR) );
- }
- else if(cp == _MB_CP_SBCS)
- {
- FIXME ("SBCS codepages not implemented\n");
- }
- else
- {
- FIXME ("Unreal codepages (e.g. %d) not implemented\n", cp);
- }
- MSVCRT___lc_collate_cp = MSVCRT___lc_codepage;
- UNLOCK_LOCALE;
- TRACE("(%d) -> %d\n", cp, MSVCRT___lc_codepage);
- return 0;
-}
-
-/*********************************************************************
- * _getmbcp (MSVCRT.@)
- */
-int CDECL _getmbcp(void)
-{
- return MSVCRT___lc_codepage;
-}
-
/*********************************************************************
* __crtLCMapStringA (MSVCRT.@)
*/
diff --git a/dlls/msvcrt/main.c b/dlls/msvcrt/main.c
index 1edaa73..781f068 100644
--- a/dlls/msvcrt/main.c
+++ b/dlls/msvcrt/main.c
@@ -18,6 +18,7 @@
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
*/
#include "msvcrt.h"
+#include "msvcrt/mbctype.h"
#include "wine/debug.h"
@@ -83,6 +84,7 @@ BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved)
msvcrt_init_args();
msvcrt_init_signals();
MSVCRT_setlocale(0, "C");
+ _setmbcp(_MB_CP_LOCALE);
TRACE("finished process init\n");
break;
case DLL_THREAD_ATTACH:
diff --git a/dlls/msvcrt/mbcs.c b/dlls/msvcrt/mbcs.c
index 0dc0120..af184b2 100644
--- a/dlls/msvcrt/mbcs.c
+++ b/dlls/msvcrt/mbcs.c
@@ -32,6 +32,25 @@ WINE_DEFAULT_DEBUG_CHANNEL(msvcrt);
unsigned char MSVCRT_mbctype[257];
int MSVCRT___mb_cur_max = 1;
+extern int MSVCRT___lc_collate_cp;
+
+/* It seems that the data about valid trail bytes is not available from kernel32
+ * so we have to store is here. The format is the same as for lead bytes in CPINFO */
+struct cp_extra_info_t
+{
+ int cp;
+ BYTE TrailBytes[MAX_LEADBYTES];
+};
+
+static struct cp_extra_info_t g_cpextrainfo[] =
+{
+ {932, {0x40, 0x7e, 0x80, 0xfc, 0, 0}},
+ {936, {0x40, 0xfe, 0, 0}},
+ {949, {0x41, 0xfe, 0, 0}},
+ {950, {0x40, 0x7e, 0xa1, 0xfe, 0, 0}},
+ {20932, {1, 255, 0, 0}},
+ {0, {1, 255, 0, 0}} /* match all with FIXME */
+};
static MSVCRT_wchar_t msvcrt_mbc_to_wc(unsigned int ch)
{
@@ -132,6 +151,133 @@ int* CDECL __p___mb_cur_max(void)
}
/*********************************************************************
+ * _setmbcp (MSVCRT.@)
+ */
+int CDECL _setmbcp(int cp)
+{
+ int newcp;
+ CPINFO cpi;
+ BYTE *bytes;
+ WORD chartypes[256];
+ WORD *curr_type;
+ char bufA[256];
+ WCHAR bufW[256];
+ int charcount;
+ int ret;
+ int i;
+
+ switch (cp)
+ {
+ case _MB_CP_ANSI:
+ newcp = GetACP();
+ break;
+ case _MB_CP_OEM:
+ newcp = GetOEMCP();
+ break;
+ case _MB_CP_LOCALE:
+ newcp = MSVCRT___lc_codepage;
+ break;
+ case _MB_CP_SBCS:
+ newcp = 20127; /* ASCII */
+ break;
+ default:
+ newcp = cp;
+ break;
+ }
+
+ if (!GetCPInfo(newcp, &cpi))
+ {
+ WARN("Codepage %d not found\n", newcp);
+ msvcrt_set_errno(MSVCRT_EINVAL);
+ return -1;
+ }
+
+ /* setup the _mbctype */
+ memset(MSVCRT_mbctype, 0, sizeof(MSVCRT_mbctype));
+
+ bytes = cpi.LeadByte;
+ while (bytes[0] || bytes[1])
+ {
+ for (i = bytes[0]; i <= bytes[1]; i++)
+ MSVCRT_mbctype[i + 1] |= _M1;
+ bytes += 2;
+ }
+
+ if (cpi.MaxCharSize > 1)
+ {
+ /* trail bytes not available through kernel32 but stored in a structure in msvcrt */
+ struct cp_extra_info_t *cpextra = g_cpextrainfo;
+ while (TRUE)
+ {
+ if (cpextra->cp == 0 || cpextra->cp == newcp)
+ {
+ if (cpextra->cp == 0)
+ FIXME("trail bytes data not available for DBCS codepage %d - assuming all bytes\n", newcp);
+
+ bytes = cpextra->TrailBytes;
+ while (bytes[0] || bytes[1])
+ {
+ for (i = bytes[0]; i <= bytes[1]; i++)
+ MSVCRT_mbctype[i + 1] |= _M2;
+ bytes += 2;
+ }
+ break;
+ }
+ cpextra++;
+ }
+ }
+
+ /* we can't use GetStringTypeA directly because we don't have a locale - only a code page
+ */
+ charcount = 0;
+ for (i = 0; i < 256; i++)
+ if (!(MSVCRT_mbctype[i + 1] & _M1))
+ bufA[charcount++] = i;
+
+ ret = MultiByteToWideChar(newcp, 0, bufA, charcount, bufW, charcount);
+ if (ret != charcount)
+ ERR("MultiByteToWideChar of chars failed for cp %d, ret=%d (exp %d), error=%d\n", newcp, ret, charcount, GetLastError());
+
+ GetStringTypeW(CT_CTYPE1, bufW, charcount, chartypes);
+
+ curr_type = chartypes;
+ for (i = 0; i < 256; i++)
+ if (!(MSVCRT_mbctype[i + 1] & _M1))
+ {
+ if ((*curr_type) & C1_UPPER)
+ MSVCRT_mbctype[i + 1] |= _SBUP;
+ if ((*curr_type) & C1_LOWER)
+ MSVCRT_mbctype[i + 1] |= _SBLOW;
+ curr_type++;
+ }
+
+ if (newcp == 932) /* CP932 only - set _MP and _MS */
+ {
+ /* On Windows it's possible to calculate the _MP and _MS from CT_CTYPE1
+ * and CT_CTYPE3. But as of Wine 0.9.43 we return wrong values what makes
+ * it hard. As this is set only for codepage 932 we hardcode it what gives
+ * also faster execution.
+ */
+ for (i = 161; i <= 165; i++)
+ MSVCRT_mbctype[i + 1] |= _MP;
+ for (i = 166; i <= 223; i++)
+ MSVCRT_mbctype[i + 1] |= _MS;
+ }
+
+ MSVCRT___lc_collate_cp = MSVCRT___lc_codepage = newcp;
+ TRACE("(%d) -> %d\n", cp, MSVCRT___lc_codepage);
+ return 0;
+}
+
+/*********************************************************************
+ * _getmbcp (MSVCRT.@)
+ */
+int CDECL _getmbcp(void)
+{
+ return MSVCRT___lc_codepage;
+}
+
+/*********************************************************************
* _mbsnextc(MSVCRT.@)
*/
unsigned int CDECL _mbsnextc(const unsigned char* str)
diff --git a/dlls/msvcrt/tests/string.c b/dlls/msvcrt/tests/string.c
index d7148da..aa29703 100644
--- a/dlls/msvcrt/tests/string.c
+++ b/dlls/msvcrt/tests/string.c
@@ -75,15 +75,110 @@ static void test_swab( void ) {
ok(memcmp(to,expected3,testsize) == 0, "Testing small size %d returned '%*.*s'\n", testsize, testsize, testsize, to);
}
-static void test_ismbblead(void)
+#if 0 /* use this to generate more tests */
+
+static void test_codepage(int cp)
+{
+ int i;
+ int prev;
+ int count = 1;
+
+ ok(_setmbcp(cp) == 0, "Couldn't set mbcp\n");
+
+ prev = _mbctype[0];
+ printf("static int result_cp_%d_mbctype[] = { ", cp);
+ for (i = 1; i < 257; i++)
+ {
+ if (_mbctype[i] != prev)
+ {
+ printf("0x%x,%d, ", prev, count);
+ prev = _mbctype[i];
+ count = 1;
+ }
+ else
+ count++;
+ }
+ printf("0x%x,%d };\n", prev, count);
+}
+
+#define test_codepage_todo(cp, todo) test_codepage(cp)
+
+#else
+
+/* RLE-encoded mbctype tables for given codepages */
+static int result_cp_1252_mbctype[] = { 0x0,66, 0x10,26, 0x0,6, 0x20,26, 0x0,8, 0x20,1,
+ 0x0,6, 0x10,1, 0x0,1, 0x10,1, 0x0,1, 0x10,1, 0x0,11, 0x20,1, 0x0,1, 0x20,1, 0x0,1,
+ 0x20,1, 0x10,1, 0x0,10, 0x20,1, 0x0,10, 0x20,1, 0x0,4, 0x20,1, 0x0,5, 0x10,23, 0x0,1,
+ 0x10,7, 0x20,24, 0x0,1, 32,8 };
+static int result_cp_1250_mbctype[] = { 0x0,66, 0x10,26, 0x0,6, 0x20,26, 0x0,15, 0x10,1,
+ 0x0,1, 0x10,4, 0x0,10, 0x20,1, 0x0,1, 0x20,4, 0x0,3, 0x10,1, 0x0,1, 0x10,1, 0x0,4,
+ 0x10,1, 0x0,4, 0x10,1, 0x0,3, 0x20,1, 0x0,1, 0x20,1, 0x0,3, 0x20,2, 0x0,1, 0x10,1,
+ 0x0,1, 0x20,2, 0x10,23, 0x0,1, 0x10,7, 0x20,24, 0x0,1, 0x20,7, 0,1 };
+static int result_cp_932_mbctype[] = { 0x0,65, 0x8,1, 0x18,26, 0x8,6, 0x28,26, 0x8,4,
+ 0x0,1, 0x8,1, 0xc,31, 0x8,1, 0xa,5, 0x9,58, 0xc,29, 0,3 };
+static int result_cp_936_mbctype[] = { 0x0,65, 0x8,1, 0x18,26, 0x8,6, 0x28,26, 0x8,6,
+ 0xc,126, 0,1 };
+static int result_cp_949_mbctype[] = { 0x0,66, 0x18,26, 0x8,6, 0x28,26, 0x8,6, 0xc,126,
+ 0,1 };
+static int result_cp_950_mbctype[] = { 0x0,65, 0x8,1, 0x18,26, 0x8,6, 0x28,26, 0x8,4,
+ 0x0,2, 0x4,32, 0xc,94, 0,1 };
+static int result_cp_20932_mbctype[] = { 0x0,2, 0x8,64, 0x18,26, 0x8,6, 0x28,26, 0x8,19,
+ 0xc,1, 0x8,18, 0xc,94, 0,1 };
+
+static int todo_none[] = { -2 };
+static int todo_cp_932[] = { 254, -2 };
+static int todo_cp_20932[] = { 143, -2 };
+
+void test_cp_table(int cp, int *result, int *todo)
+{
+ int i;
+ int count = 0;
+ int curr = 0;
+ _setmbcp(cp);
+ for (i = 0; i < 256; i++)
+ {
+ if (count == 0)
+ {
+ curr = result[0];
+ count = result[1];
+ result += 2;
+ }
+ if (i == *todo + 1)
+ {
+ todo_wine ok(_mbctype[i] == curr, "CP%d: Mismatch in ctype for character %d - %d instead of %d\n", cp, i-1, _mbctype[i], curr);
+ todo++;
+ }
+ else
+ ok(_mbctype[i] == curr, "CP%d: Mismatch in ctype for character %d - %d instead of %d\n", cp, i-1, _mbctype[i], curr);
+ count--;
+ }
+}
+
+#define test_codepage(num) test_cp_table(num, result_cp_##num##_mbctype, todo_none);
+#define test_codepage_todo(num, todo) test_cp_table(num, result_cp_##num##_mbctype, todo);
+
+#endif
+
+static void test_mbcp(void)
{
unsigned int s = '\354';
int mb_orig_max = __mb_cur_max;
+ int curr_mbcp = _getmbcp();
+
+ /* some two single-byte code pages*/
+ test_codepage(1252);
+ test_codepage(1250);
+ /* double byte code pages */
+ test_codepage_todo(932, todo_cp_932);
+ test_codepage(936);
+ test_codepage(949);
+ test_codepage(950);
+ test_codepage_todo(20932, todo_cp_20932);
_setmbcp(936);
ok(__mb_cur_max == mb_orig_max, "__mb_cur_max shouldn't be updated (is %d != %d)\n", __mb_cur_max, mb_orig_max);
todo_wine ok(_ismbblead(s), "got result %d\n", _ismbblead(s));
- _setmbcp(1252);
+ _setmbcp(curr_mbcp);
}
static void test_mbsspn( void)
@@ -155,7 +250,7 @@ START_TEST(string)
test_swab();
/* Test ismbblead*/
- test_ismbblead();
+ test_mbcp();
/* test _mbsspn */
test_mbsspn();
test_mbsspnp();
--
1.4.4.2
More information about the wine-patches
mailing list