Alexandre Julliard : wmc: Use external NLS files for codepage conversions.

Alexandre Julliard julliard at winehq.org
Wed Feb 12 16:21:04 CST 2020


Module: wine
Branch: master
Commit: c47910ec4730d9961ab9a2bbc7d40f336d394513
URL:    https://source.winehq.org/git/wine.git/?a=commit;h=c47910ec4730d9961ab9a2bbc7d40f336d394513

Author: Alexandre Julliard <julliard at winehq.org>
Date:   Wed Feb 12 09:05:04 2020 +0100

wmc: Use external NLS files for codepage conversions.

Signed-off-by: Alexandre Julliard <julliard at winehq.org>

---

 tools/wmc/lang.c  |  28 ------------
 tools/wmc/lang.h  |   2 -
 tools/wmc/mcl.c   |   7 ++-
 tools/wmc/utils.c | 124 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 tools/wmc/utils.h |   2 +
 5 files changed, 129 insertions(+), 34 deletions(-)

diff --git a/tools/wmc/lang.c b/tools/wmc/lang.c
index d7bceef8fd..b5ab3be722 100644
--- a/tools/wmc/lang.c
+++ b/tools/wmc/lang.c
@@ -185,31 +185,3 @@ const language_t *find_language(unsigned id)
 	return (const language_t *)bsearch(&id, languages, ARRAY_SIZE(languages),
 		sizeof(languages[0]), langcmp);
 }
-
-#ifdef _WIN32
-
-int is_valid_codepage(int id)
-{
-    return IsValidCodePage( id );
-}
-
-int wmc_mbstowcs( int codepage, int flags, const char *src, int srclen, WCHAR *dst, int dstlen )
-{
-    return MultiByteToWideChar( codepage, flags, src, srclen, dst, dstlen );
-}
-
-#else  /* _WIN32 */
-
-#include "wine/unicode.h"
-
-int is_valid_codepage(int id)
-{
-    return id == CP_UTF8 || wine_cp_get_table(id);
-}
-
-int wmc_mbstowcs( int codepage, int flags, const char *src, int srclen, WCHAR *dst, int dstlen )
-{
-    return wine_cp_mbstowcs( wine_cp_get_table( codepage ), flags, src, srclen, dst, dstlen );
-}
-
-#endif  /* _WIN32 */
diff --git a/tools/wmc/lang.h b/tools/wmc/lang.h
index d655556e8d..365aacdd76 100644
--- a/tools/wmc/lang.h
+++ b/tools/wmc/lang.h
@@ -32,7 +32,5 @@ typedef struct language {
 
 void show_languages(void);
 const language_t *find_language(unsigned id);
-int is_valid_codepage(int id);
-int wmc_mbstowcs( int codepage, int flags, const char *src, int srclen, WCHAR *dst, int dstlen );
 
 #endif
diff --git a/tools/wmc/mcl.c b/tools/wmc/mcl.c
index 1319113fff..829f955606 100644
--- a/tools/wmc/mcl.c
+++ b/tools/wmc/mcl.c
@@ -153,8 +153,6 @@ static int codepage;
 void set_codepage(int cp)
 {
 	codepage = cp;
-	if (!is_valid_codepage( cp ))
-		xyyerror("Codepage %d not found; cannot process\n", codepage);
 }
 
 /*
@@ -226,8 +224,9 @@ static int fill_inputbuffer(void)
     {
     case INPUT_ASCII:
         if (!fgets( buffer + len, sizeof(buffer) - len, yyin )) break;
-        ninputbuffer = wmc_mbstowcs( codepage, 0, buffer, strlen(buffer), inputbuffer, INPUTBUFFER_SIZE );
-        if (ninputbuffer < 0) internal_error(__FILE__, __LINE__, "Could not translate to unicode\n");
+        wbuf = codepage_to_unicode( codepage, buffer, strlen(buffer), &ninputbuffer );
+        memcpy( inputbuffer, wbuf, ninputbuffer * sizeof(WCHAR) );
+        free( wbuf );
         return 1;
     case INPUT_UTF8:
         if (!fgets( buffer + len, sizeof(buffer) - len, yyin )) break;
diff --git a/tools/wmc/utils.c b/tools/wmc/utils.c
index 7261fe715a..bd01f6cfe7 100644
--- a/tools/wmc/utils.c
+++ b/tools/wmc/utils.c
@@ -29,6 +29,7 @@
 #include <ctype.h>
 
 #include "wmctypes.h"
+#include "winnls.h"
 #include "utils.h"
 #include "wmc.h"
 
@@ -400,6 +401,129 @@ char *unicode_to_utf8( const WCHAR *src, int srclen, int *dstlen )
     return ret;
 }
 
+#ifdef _WIN32
+
+int is_valid_codepage(int id)
+{
+    return IsValidCodePage( id );
+}
+
+WCHAR *codepage_to_unicode( int codepage, const char *src, int srclen, int *dstlen )
+{
+    WCHAR *dst = xmalloc( (srclen + 1) * sizeof(WCHAR) );
+    DWORD ret = MultiByteToWideChar( codepage, MB_ERR_INVALID_CHARS, src, srclen, dst, srclen );
+    if (!ret) return NULL;
+    dst[ret] = 0;
+    *dstlen = ret;
+    return dst;
+}
+
+#else  /* _WIN32 */
+
+struct nls_info
+{
+    unsigned short  codepage;
+    unsigned short  unidef;
+    unsigned short  trans_unidef;
+    unsigned short *cp2uni;
+    unsigned short *dbcs_offsets;
+};
+
+static struct nls_info nlsinfo[128];
+
+static void init_nls_info( struct nls_info *info, unsigned short *ptr )
+{
+    unsigned short hdr_size = ptr[0];
+
+    info->codepage      = ptr[1];
+    info->unidef        = ptr[4];
+    info->trans_unidef  = ptr[6];
+    ptr += hdr_size;
+    info->cp2uni = ++ptr;
+    ptr += 256;
+    if (*ptr++) ptr += 256;  /* glyph table */
+    info->dbcs_offsets  = *ptr ? ptr + 1 : NULL;
+}
+
+static const struct nls_info *get_nls_info( unsigned int codepage )
+{
+    struct stat st;
+    unsigned short *data;
+    char *path;
+    unsigned int i;
+    int fd;
+
+    for (i = 0; i < ARRAY_SIZE(nlsinfo) && nlsinfo[i].codepage; i++)
+        if (nlsinfo[i].codepage == codepage) return &nlsinfo[i];
+
+    assert( i < ARRAY_SIZE(nlsinfo) );
+
+    for (i = 0; nlsdirs[i]; i++)
+    {
+        path = strmake( "%s/c_%03u.nls", nlsdirs[i], codepage );
+        if ((fd = open( path, O_RDONLY )) != -1) break;
+        free( path );
+    }
+    if (!nlsdirs[i]) return NULL;
+
+    fstat( fd, &st );
+    data = xmalloc( st.st_size );
+    if (read( fd, data, st.st_size ) != st.st_size) error( "failed to load %s\n", path );
+    close( fd );
+    free( path );
+    init_nls_info( &nlsinfo[i], data );
+    return &nlsinfo[i];
+}
+
+int is_valid_codepage(int cp)
+{
+    return cp == CP_UTF8 || get_nls_info( cp );
+}
+
+WCHAR *codepage_to_unicode( int codepage, const char *src, int srclen, int *dstlen )
+{
+    const struct nls_info *info = get_nls_info( codepage );
+    unsigned int i;
+    WCHAR dbch, *dst = xmalloc( (srclen + 1) * sizeof(WCHAR) );
+
+    if (!info) error( "codepage %u not supported\n", codepage );
+
+    if (info->dbcs_offsets)
+    {
+        for (i = 0; srclen; i++, srclen--, src++)
+        {
+            unsigned short off = info->dbcs_offsets[(unsigned char)*src];
+            if (off)
+            {
+                if (srclen == 1) return NULL;
+                dbch = (src[0] << 8) | (unsigned char)src[1];
+                src++;
+                srclen--;
+                dst[i] = info->dbcs_offsets[off + (unsigned char)*src];
+                if (dst[i] == info->unidef && dbch != info->trans_unidef) return NULL;
+            }
+            else
+            {
+                dst[i] = info->cp2uni[(unsigned char)*src];
+                if (dst[i] == info->unidef && *src != info->trans_unidef) return NULL;
+            }
+        }
+    }
+    else
+    {
+        for (i = 0; i < srclen; i++)
+        {
+            dst[i] = info->cp2uni[(unsigned char)src[i]];
+            if (dst[i] == info->unidef && src[i] != info->trans_unidef) return NULL;
+        }
+    }
+    dst[i] = 0;
+    *dstlen = i;
+    return dst;
+}
+
+#endif  /* _WIN32 */
+
 /*******************************************************************
  *         buffer management
  *
diff --git a/tools/wmc/utils.h b/tools/wmc/utils.h
index e4c546765d..726a36731a 100644
--- a/tools/wmc/utils.h
+++ b/tools/wmc/utils.h
@@ -52,6 +52,8 @@ int unistricmp(const WCHAR *s1, const WCHAR *s2);
 int unistrcmp(const WCHAR *s1, const WCHAR *s2);
 WCHAR *utf8_to_unicode( const char *src, int srclen, int *dstlen );
 char *unicode_to_utf8( const WCHAR *src, int srclen, int *dstlen );
+int is_valid_codepage(int id);
+WCHAR *codepage_to_unicode( int codepage, const char *src, int srclen, int *dstlen );
 
 /* buffer management */
 




More information about the wine-cvs mailing list