Alexandre Julliard : wrc: Print a warning when encountering utf8 strings in non-utf8 codepage.

Alexandre Julliard julliard at winehq.org
Thu Aug 20 12:59:05 CDT 2009


Module: wine
Branch: master
Commit: be7558fc30f301f891ee8dfc6fba3bf569c44913
URL:    http://source.winehq.org/git/wine.git/?a=commit;h=be7558fc30f301f891ee8dfc6fba3bf569c44913

Author: Alexandre Julliard <julliard at winehq.org>
Date:   Thu Aug 20 15:33:03 2009 +0200

wrc: Print a warning when encountering utf8 strings in non-utf8 codepage.

---

 tools/wrc/genres.c |    3 +++
 tools/wrc/parser.l |    3 +++
 tools/wrc/utils.c  |   23 +++++++++++++++++++++++
 tools/wrc/utils.h  |    1 +
 tools/wrc/wrc.c    |    3 +++
 tools/wrc/wrc.h    |    1 +
 6 files changed, 34 insertions(+), 0 deletions(-)

diff --git a/tools/wrc/genres.c b/tools/wrc/genres.c
index e4317d5..131f6f7 100644
--- a/tools/wrc/genres.c
+++ b/tools/wrc/genres.c
@@ -314,6 +314,9 @@ static void put_string(res_t *res, const string_t *str, enum str_e type, int ist
             if (!check_unicode_conversion( str, newstr, codepage ))
                 error( "String %s does not convert identically to Unicode and back in codepage %d. "
                        "Try using a Unicode string instead\n", str->str.cstr, codepage );
+            if (check_valid_utf8( str, codepage ))
+                warning( "string \"%s\" seems to be UTF-8 but codepage %u is in use.\n",
+                         str->str.cstr, codepage );
         }
         if (!isterm) put_word(res, newstr->size);
         for(cnt = 0; cnt < newstr->size; cnt++)
diff --git a/tools/wrc/parser.l b/tools/wrc/parser.l
index cafa35a..8ed0445 100644
--- a/tools/wrc/parser.l
+++ b/tools/wrc/parser.l
@@ -637,6 +637,9 @@ static string_t *get_buffered_cstring(void)
         if (!check_unicode_conversion( str, str_w, current_codepage ))
             parser_error("String %s does not convert identically to Unicode and back in codepage %d. "
                     "Try using a Unicode string instead", str->str.cstr, current_codepage );
+        if (check_valid_utf8( str, current_codepage ))
+            parser_warning( "string \"%s\" seems to be UTF-8 but codepage %u is in use.",
+                            str->str.cstr, current_codepage );
         free_string( str );
         return str_w;
     }
diff --git a/tools/wrc/utils.c b/tools/wrc/utils.c
index b6688af..d3b8cf7 100644
--- a/tools/wrc/utils.c
+++ b/tools/wrc/utils.c
@@ -311,6 +311,29 @@ void free_string(string_t *str)
     free( str );
 }
 
+/* check if the string is valid utf8 despite a different codepage being in use */
+int check_valid_utf8( const string_t *str, int codepage )
+{
+    unsigned int i;
+
+    if (!check_utf8) return 0;
+    if (!codepage) return 0;
+    if (!wine_cp_get_table( codepage )) return 0;
+
+    for (i = 0; i < str->size; i++)
+    {
+        if ((unsigned char)str->str.cstr[i] >= 0xf5) goto done;
+        if ((unsigned char)str->str.cstr[i] >= 0xc2) break;
+        if ((unsigned char)str->str.cstr[i] >= 0x80) goto done;
+    }
+    if (i == str->size) return 0;  /* no 8-bit chars at all */
+
+    if (wine_utf8_mbstowcs( MB_ERR_INVALID_CHARS, str->str.cstr, str->size, NULL, 0 ) >= 0) return 1;
+
+done:
+    check_utf8 = 0;  /* at least one 8-bit non-utf8 string found, stop checking */
+    return 0;
+}
 
 int check_unicode_conversion( const string_t *str_a, const string_t *str_w, int codepage )
 {
diff --git a/tools/wrc/utils.h b/tools/wrc/utils.h
index 09144e7..ced55ab 100644
--- a/tools/wrc/utils.h
+++ b/tools/wrc/utils.h
@@ -45,6 +45,7 @@ char *dup_basename(const char *name, const char *ext);
 int compare_name_id(const name_id_t *n1, const name_id_t *n2);
 string_t *convert_string(const string_t *str, enum str_e type, int codepage);
 void free_string( string_t *str );
+int check_valid_utf8( const string_t *str, int codepage );
 int check_unicode_conversion( const string_t *str_a, const string_t *str_w, int codepage );
 int get_language_codepage( unsigned short lang, unsigned short sublang );
 
diff --git a/tools/wrc/wrc.c b/tools/wrc/wrc.c
index 5879d92..7dd91fb 100644
--- a/tools/wrc/wrc.c
+++ b/tools/wrc/wrc.c
@@ -155,6 +155,8 @@ int preprocess_only = 0;
  */
 int no_preprocess = 0;
 
+int check_utf8 = 1;  /* whether to check for valid utf8 */
+
 static int verify_translations_mode;
 
 char *output_name = NULL;	/* The name given by the -o option */
@@ -292,6 +294,7 @@ static int load_file( const char *input_name, const char *output_name )
 
     /* Reset the language */
     currentlanguage = dup_language( defaultlanguage );
+    check_utf8 = 1;
 
     /* Go from .rc to .res */
     chat("Starting parse\n");
diff --git a/tools/wrc/wrc.h b/tools/wrc/wrc.h
index de8929a..0283546 100644
--- a/tools/wrc/wrc.h
+++ b/tools/wrc/wrc.h
@@ -43,6 +43,7 @@ extern int pedantic;
 extern int byteorder;
 extern int preprocess_only;
 extern int no_preprocess;
+extern int check_utf8;
 
 extern char *output_name;
 extern char *input_name;




More information about the wine-cvs mailing list