msvcrt[1/5]: fill _mbctype table in _setmbcp (patch)

Mikolaj Zalewski mikolajz at google.com
Mon Aug 20 12:14:29 CDT 2007


In the previous mail I forgot the patch
-------------- next part --------------
From 6424d6c2dcf0ced8730196019ff236a2f94efe0e Mon Sep 17 00:00:00 2001
From: =?utf-8?q?Miko=C5=82aj_Zalewski?= <mikolaj at zalewski.pl>
Date: Sun, 19 Aug 2007 20:19:37 -0700
Subject: [PATCH] msvcrt: fill _mbctype table in _setmbcp

---
 dlls/msvcrt/locale.c       |   51 ---------------
 dlls/msvcrt/main.c         |    2 +
 dlls/msvcrt/mbcs.c         |  146 ++++++++++++++++++++++++++++++++++++++++++++
 dlls/msvcrt/tests/string.c |  101 +++++++++++++++++++++++++++++-
 4 files changed, 246 insertions(+), 54 deletions(-)

diff --git a/dlls/msvcrt/locale.c b/dlls/msvcrt/locale.c
index 5e08873..15eef0a 100644
--- a/dlls/msvcrt/locale.c
+++ b/dlls/msvcrt/locale.c
@@ -60,7 +60,6 @@ extern WORD* MSVCRT__pctype;
 
 /* mbctype data modified when the locale changes */
 extern int MSVCRT___mb_cur_max;
-extern unsigned char MSVCRT_mbctype[257];
 
 #define MSVCRT_LEADBYTE  0x8000
 
@@ -357,7 +356,6 @@ char* CDECL MSVCRT_setlocale(int category, const char* locale)
       /* Restore C locale ctype info */
       MSVCRT___mb_cur_max = 1;
       memcpy(MSVCRT_current_ctype, MSVCRT__ctype, sizeof(MSVCRT__ctype));
-      memset(MSVCRT_mbctype, 0, sizeof(MSVCRT_mbctype));
       if (!lc_all) break;
     case MSVCRT_LC_MONETARY:
       if (!lc_all) break;
@@ -529,55 +527,6 @@ const char* CDECL _Strftime(char *out, unsigned int len, const char *fmt,
   return "";
 }
 
-/* FIXME: MBCP probably belongs in mbcs.c */
-
-/*********************************************************************
- *		_setmbcp (MSVCRT.@)
- */
-int CDECL _setmbcp(int cp)
-{
-  LOCK_LOCALE;
-  if ( cp > _MB_CP_SBCS)
-  {
-    if( MSVCRT___lc_codepage != cp)
-      /* FIXME: set ctype behaviour for this cp */
-      MSVCRT___lc_codepage = cp;
-  }
-  else if(cp == _MB_CP_ANSI)
-  {
-    MSVCRT___lc_codepage = GetACP();
-  }
-  else if(cp == _MB_CP_OEM)
-  {
-    MSVCRT___lc_codepage = GetOEMCP();
-  }
-  else if(cp == _MB_CP_LOCALE)
-  {
-    GetLocaleInfoW( LOCALE_USER_DEFAULT, LOCALE_IDEFAULTANSICODEPAGE|LOCALE_RETURN_NUMBER,
-                    (WCHAR *)&MSVCRT___lc_codepage, sizeof(INT)/sizeof(WCHAR) );
-  }
-  else if(cp == _MB_CP_SBCS)
-  {
-    FIXME ("SBCS codepages not implemented\n");
-  }
-  else
-  {
-    FIXME ("Unreal codepages (e.g. %d) not implemented\n", cp);
-  }
-  MSVCRT___lc_collate_cp = MSVCRT___lc_codepage;
-  UNLOCK_LOCALE;
-  TRACE("(%d) -> %d\n", cp, MSVCRT___lc_codepage);
-  return 0;
-}
-
-/*********************************************************************
- *		_getmbcp (MSVCRT.@)
- */
-int CDECL _getmbcp(void)
-{
-  return MSVCRT___lc_codepage;
-}
-
 /*********************************************************************
  *		__crtLCMapStringA (MSVCRT.@)
  */
diff --git a/dlls/msvcrt/main.c b/dlls/msvcrt/main.c
index 1edaa73..781f068 100644
--- a/dlls/msvcrt/main.c
+++ b/dlls/msvcrt/main.c
@@ -18,6 +18,7 @@
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
  */
 #include "msvcrt.h"
+#include "msvcrt/mbctype.h"
 
 #include "wine/debug.h"
 
@@ -83,6 +84,7 @@ BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved)
     msvcrt_init_args();
     msvcrt_init_signals();
     MSVCRT_setlocale(0, "C");
+    _setmbcp(_MB_CP_LOCALE);
     TRACE("finished process init\n");
     break;
   case DLL_THREAD_ATTACH:
diff --git a/dlls/msvcrt/mbcs.c b/dlls/msvcrt/mbcs.c
index 0dc0120..af184b2 100644
--- a/dlls/msvcrt/mbcs.c
+++ b/dlls/msvcrt/mbcs.c
@@ -32,6 +32,25 @@ WINE_DEFAULT_DEBUG_CHANNEL(msvcrt);
 
 unsigned char MSVCRT_mbctype[257];
 int MSVCRT___mb_cur_max = 1;
+extern int MSVCRT___lc_collate_cp;
+
+/* It seems that the data about valid trail bytes is not available from kernel32
+ * so we have to store is here. The format is the same as for lead bytes in CPINFO */
+struct cp_extra_info_t
+{
+    int cp;
+    BYTE TrailBytes[MAX_LEADBYTES];
+};
+
+static struct cp_extra_info_t g_cpextrainfo[] =
+{
+    {932, {0x40, 0x7e, 0x80, 0xfc, 0, 0}},
+    {936, {0x40, 0xfe, 0, 0}},
+    {949, {0x41, 0xfe, 0, 0}},
+    {950, {0x40, 0x7e, 0xa1, 0xfe, 0, 0}},
+    {20932, {1, 255, 0, 0}},
+    {0, {1, 255, 0, 0}}       /* match all with FIXME */
+};
 
 static MSVCRT_wchar_t msvcrt_mbc_to_wc(unsigned int ch)
 {
@@ -132,6 +151,133 @@ int* CDECL __p___mb_cur_max(void)
 }
 
 /*********************************************************************
+ *		_setmbcp (MSVCRT.@)
+ */
+int CDECL _setmbcp(int cp)
+{
+  int newcp;
+  CPINFO cpi;
+  BYTE *bytes;
+  WORD chartypes[256];
+  WORD *curr_type;
+  char bufA[256];
+  WCHAR bufW[256];
+  int charcount;
+  int ret;
+  int i;
+
+  switch (cp)
+  {
+    case _MB_CP_ANSI:
+      newcp = GetACP();
+      break;
+    case _MB_CP_OEM:
+      newcp = GetOEMCP();
+      break;
+    case _MB_CP_LOCALE:
+      newcp = MSVCRT___lc_codepage;
+      break;
+    case _MB_CP_SBCS:
+      newcp = 20127;   /* ASCII */
+      break;
+    default:
+      newcp = cp;
+      break;
+  }
+  
+  if (!GetCPInfo(newcp, &cpi))
+  {
+    WARN("Codepage %d not found\n", newcp);
+    msvcrt_set_errno(MSVCRT_EINVAL);
+    return -1;
+  }
+  
+  /* setup the _mbctype */
+  memset(MSVCRT_mbctype, 0, sizeof(MSVCRT_mbctype));
+
+  bytes = cpi.LeadByte;
+  while (bytes[0] || bytes[1])
+  {
+    for (i = bytes[0]; i <= bytes[1]; i++)
+      MSVCRT_mbctype[i + 1] |= _M1;
+    bytes += 2;
+  }
+  
+  if (cpi.MaxCharSize > 1)
+  {
+    /* trail bytes not available through kernel32 but stored in a structure in msvcrt */
+    struct cp_extra_info_t *cpextra = g_cpextrainfo;
+    while (TRUE)
+    {
+      if (cpextra->cp == 0 || cpextra->cp == newcp)
+      {
+        if (cpextra->cp == 0)
+          FIXME("trail bytes data not available for DBCS codepage %d - assuming all bytes\n", newcp);
+      
+        bytes = cpextra->TrailBytes;
+        while (bytes[0] || bytes[1])
+        {
+          for (i = bytes[0]; i <= bytes[1]; i++)
+            MSVCRT_mbctype[i + 1] |= _M2;
+          bytes += 2;
+        }
+        break;
+      }
+      cpextra++;
+    }
+  }
+  
+  /* we can't use GetStringTypeA directly because we don't have a locale - only a code page
+   */
+  charcount = 0;
+  for (i = 0; i < 256; i++)
+    if (!(MSVCRT_mbctype[i + 1] & _M1))
+      bufA[charcount++] = i;
+      
+  ret = MultiByteToWideChar(newcp, 0, bufA, charcount, bufW, charcount);
+  if (ret != charcount)
+    ERR("MultiByteToWideChar of chars failed for cp %d, ret=%d (exp %d), error=%d\n", newcp, ret, charcount, GetLastError());
+
+  GetStringTypeW(CT_CTYPE1, bufW, charcount, chartypes);
+  
+  curr_type = chartypes;
+  for (i = 0; i < 256; i++)
+    if (!(MSVCRT_mbctype[i + 1] & _M1))
+    {
+	if ((*curr_type) & C1_UPPER)
+	    MSVCRT_mbctype[i + 1] |= _SBUP;
+	if ((*curr_type) & C1_LOWER)
+	    MSVCRT_mbctype[i + 1] |= _SBLOW;
+	curr_type++;
+    }
+    
+  if (newcp == 932)   /* CP932 only - set _MP and _MS */
+  {
+    /* On Windows it's possible to calculate the _MP and _MS from CT_CTYPE1
+     * and CT_CTYPE3. But as of Wine 0.9.43 we return wrong values what makes
+     * it hard. As this is set only for codepage 932 we hardcode it what gives
+     * also faster execution.
+     */
+    for (i = 161; i <= 165; i++)
+      MSVCRT_mbctype[i + 1] |= _MP;
+    for (i = 166; i <= 223; i++)
+      MSVCRT_mbctype[i + 1] |= _MS;
+  }
+  
+  MSVCRT___lc_collate_cp = MSVCRT___lc_codepage = newcp;
+  TRACE("(%d) -> %d\n", cp, MSVCRT___lc_codepage);
+  return 0;
+}
+
+/*********************************************************************
+ *		_getmbcp (MSVCRT.@)
+ */
+int CDECL _getmbcp(void)
+{
+  return MSVCRT___lc_codepage;
+}
+
+/*********************************************************************
  *		_mbsnextc(MSVCRT.@)
  */
 unsigned int CDECL _mbsnextc(const unsigned char* str)
diff --git a/dlls/msvcrt/tests/string.c b/dlls/msvcrt/tests/string.c
index d7148da..aa29703 100644
--- a/dlls/msvcrt/tests/string.c
+++ b/dlls/msvcrt/tests/string.c
@@ -75,15 +75,110 @@ static void test_swab( void ) {
     ok(memcmp(to,expected3,testsize) == 0, "Testing small size %d returned '%*.*s'\n", testsize, testsize, testsize, to);
 }
 
-static void test_ismbblead(void)
+#if 0      /* use this to generate more tests */
+
+static void test_codepage(int cp)
+{
+    int i;
+    int prev;
+    int count = 1;
+
+    ok(_setmbcp(cp) == 0, "Couldn't set mbcp\n");
+
+    prev = _mbctype[0];
+    printf("static int result_cp_%d_mbctype[] = { ", cp);
+    for (i = 1; i < 257; i++)
+    {
+        if (_mbctype[i] != prev)
+        {
+            printf("0x%x,%d, ", prev, count);
+            prev = _mbctype[i];
+            count = 1;
+        }
+        else
+            count++;
+    }
+    printf("0x%x,%d };\n", prev, count);
+}
+
+#define test_codepage_todo(cp, todo) test_codepage(cp)
+
+#else
+
+/* RLE-encoded mbctype tables for given codepages */
+static int result_cp_1252_mbctype[] = { 0x0,66, 0x10,26, 0x0,6, 0x20,26, 0x0,8, 0x20,1,
+  0x0,6, 0x10,1, 0x0,1, 0x10,1, 0x0,1, 0x10,1, 0x0,11, 0x20,1, 0x0,1, 0x20,1, 0x0,1,
+  0x20,1, 0x10,1, 0x0,10, 0x20,1, 0x0,10, 0x20,1, 0x0,4, 0x20,1, 0x0,5, 0x10,23, 0x0,1,
+  0x10,7, 0x20,24, 0x0,1, 32,8 };
+static int result_cp_1250_mbctype[] = { 0x0,66, 0x10,26, 0x0,6, 0x20,26, 0x0,15, 0x10,1,
+  0x0,1, 0x10,4, 0x0,10, 0x20,1, 0x0,1, 0x20,4, 0x0,3, 0x10,1, 0x0,1, 0x10,1, 0x0,4,
+  0x10,1, 0x0,4, 0x10,1, 0x0,3, 0x20,1, 0x0,1, 0x20,1, 0x0,3, 0x20,2, 0x0,1, 0x10,1,
+  0x0,1, 0x20,2, 0x10,23, 0x0,1, 0x10,7, 0x20,24, 0x0,1, 0x20,7, 0,1 };
+static int result_cp_932_mbctype[] = { 0x0,65, 0x8,1, 0x18,26, 0x8,6, 0x28,26, 0x8,4,
+  0x0,1, 0x8,1, 0xc,31, 0x8,1, 0xa,5, 0x9,58, 0xc,29, 0,3 };
+static int result_cp_936_mbctype[] = { 0x0,65, 0x8,1, 0x18,26, 0x8,6, 0x28,26, 0x8,6,
+  0xc,126, 0,1 };
+static int result_cp_949_mbctype[] = { 0x0,66, 0x18,26, 0x8,6, 0x28,26, 0x8,6, 0xc,126,
+  0,1 };
+static int result_cp_950_mbctype[] = { 0x0,65, 0x8,1, 0x18,26, 0x8,6, 0x28,26, 0x8,4,
+  0x0,2, 0x4,32, 0xc,94, 0,1 };
+static int result_cp_20932_mbctype[] = { 0x0,2, 0x8,64, 0x18,26, 0x8,6, 0x28,26, 0x8,19,
+  0xc,1, 0x8,18, 0xc,94, 0,1 };
+
+static int todo_none[] = { -2 };
+static int todo_cp_932[] = { 254, -2 };
+static int todo_cp_20932[] = { 143, -2 };
+
+void test_cp_table(int cp, int *result, int *todo)
+{
+    int i;
+    int count = 0;
+    int curr = 0;
+    _setmbcp(cp);
+    for (i = 0; i < 256; i++)
+    {
+        if (count == 0)
+        {
+            curr = result[0];
+            count = result[1];
+            result += 2;
+        }
+	if (i == *todo + 1)
+	{
+            todo_wine ok(_mbctype[i] == curr, "CP%d: Mismatch in ctype for character %d - %d instead of %d\n", cp, i-1, _mbctype[i], curr);
+            todo++;
+	}
+	else
+            ok(_mbctype[i] == curr, "CP%d: Mismatch in ctype for character %d - %d instead of %d\n", cp, i-1, _mbctype[i], curr);
+        count--;
+    }
+}
+
+#define test_codepage(num) test_cp_table(num, result_cp_##num##_mbctype, todo_none);
+#define test_codepage_todo(num, todo) test_cp_table(num, result_cp_##num##_mbctype, todo);
+
+#endif
+
+static void test_mbcp(void)
 {
     unsigned int s = '\354';
     int mb_orig_max = __mb_cur_max;
+    int curr_mbcp = _getmbcp();
+
+    /* some two single-byte code pages*/
+    test_codepage(1252);
+    test_codepage(1250);
+    /* double byte code pages */
+    test_codepage_todo(932, todo_cp_932);
+    test_codepage(936);
+    test_codepage(949);
+    test_codepage(950);
+    test_codepage_todo(20932, todo_cp_20932);
 
     _setmbcp(936);
     ok(__mb_cur_max == mb_orig_max, "__mb_cur_max shouldn't be updated (is %d != %d)\n", __mb_cur_max, mb_orig_max);
     todo_wine ok(_ismbblead(s), "got result %d\n", _ismbblead(s));
-    _setmbcp(1252);
+    _setmbcp(curr_mbcp);
 }
 
 static void test_mbsspn( void)
@@ -155,7 +250,7 @@ START_TEST(string)
     test_swab();
 
     /* Test ismbblead*/
-    test_ismbblead();
+    test_mbcp();
    /* test _mbsspn */
     test_mbsspn();
     test_mbsspnp();
-- 
1.4.4.2


More information about the wine-patches mailing list