[PATCH 1/4] kernel32: Support UTF-7 in MultiByteToWideChar.

Alex Henrie alexhenrie24 at gmail.com
Sun Dec 2 22:13:37 CST 2012


Hello again,

I came back to the problem of UTF-7 support and made some improvements
to my previous submission. The tests are now more stringent, especially
in regard to null terminator checking, and they test the srclen
parameter more thoroughly now as well.

I also noticed that a related test for error checking was marked
todo_wine. The fix was trivial and is included as a fourth patch.

Please let me know what you think. I'm willing to put some more effort
into this.

---
 dlls/kernel32/locale.c |  248 +++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 244 insertions(+), 4 deletions(-)

diff --git a/dlls/kernel32/locale.c b/dlls/kernel32/locale.c
index 57daab1..c669a3c 100644
--- a/dlls/kernel32/locale.c
+++ b/dlls/kernel32/locale.c
@@ -1892,6 +1892,247 @@ BOOL WINAPI EnumSystemCodePagesW( CODEPAGE_ENUMPROCW lpfnCodePageEnum, DWORD fla
 
 
 /***********************************************************************
+ *              write_to_w_string
+ *
+ * Helper for utf7_mbstowcs
+ *
+ * RETURNS
+ *   0 on success, -1 on error
+ */
+static int write_to_w_string(WCHAR* dst, int dstlen, int* index, WCHAR character)
+{
+    if (*index >= dstlen)
+    {
+        return -1;
+    }
+
+    dst[*index] = character;
+    (*index)++;
+    return 0;
+}
+
+/***********************************************************************
+ *              utf7_mbstowcs
+ *
+ * UTF-7 to UTF-16 string conversion, helper for MultiByteToWideChar
+ *
+ * RETURNS
+ *   On success, the number of characters written
+ *   On dst buffer overflow, -1
+ *   On invalid input char, -2
+ */
+static int utf7_mbstowcs(const char* src, int srclen, WCHAR* dst, int dstlen)
+{
+    static const WCHAR base64_decoding_table[] = {
+        /* \0   */  -1,
+        /* \x01 */  -1,
+        /* \x02 */  -1,
+        /* \x03 */  -1,
+        /* \x04 */  -1,
+        /* \x05 */  -1,
+        /* \x06 */  -1,
+        /* \a   */  -1,
+        /* \b   */  -1,
+        /* \t   */  -1,
+        /* \n   */  -1,
+        /* \v   */  -1,
+        /* \f   */  -1,
+        /* \r   */  -1,
+        /* \x0E */  -1,
+        /* \x0F */  -1,
+        /* \x10 */  -1,
+        /* \x11 */  -1,
+        /* \x12 */  -1,
+        /* \x13 */  -1,
+        /* \x14 */  -1,
+        /* \x15 */  -1,
+        /* \x16 */  -1,
+        /* \x17 */  -1,
+        /* \x18 */  -1,
+        /* \x19 */  -1,
+        /* \x1A */  -1,
+        /* \e   */  -1,
+        /* \x1C */  -1,
+        /* \x1D */  -1,
+        /* \x1E */  -1,
+        /* \x1F */  -1,
+        /*      */  -1,
+        /* !    */  -1,
+        /* "    */  -1,
+        /* #    */  -1,
+        /* $    */  -1,
+        /* %    */  -1,
+        /* &    */  -1,
+        /* '    */  -1,
+        /* (    */  -1,
+        /* )    */  -1,
+        /* *    */  -1,
+        /* +    */  62,
+        /* ,    */  -1,
+        /* -    */  -1,
+        /* .    */  -1,
+        /* /    */  63,
+        /* 0    */  52,
+        /* 1    */  53,
+        /* 2    */  54,
+        /* 3    */  55,
+        /* 4    */  56,
+        /* 5    */  57,
+        /* 6    */  58,
+        /* 7    */  59,
+        /* 8    */  60,
+        /* 9    */  61,
+        /* :    */  -1,
+        /* ;    */  -1,
+        /* <    */  -1,
+        /* =    */  -1,
+        /* >    */  -1,
+        /* ?    */  -1,
+        /* @    */  -1,
+        /* A    */   0,
+        /* B    */   1,
+        /* C    */   2,
+        /* D    */   3,
+        /* E    */   4,
+        /* F    */   5,
+        /* G    */   6,
+        /* H    */   7,
+        /* I    */   8,
+        /* J    */   9,
+        /* K    */  10,
+        /* L    */  11,
+        /* M    */  12,
+        /* N    */  13,
+        /* O    */  14,
+        /* P    */  15,
+        /* Q    */  16,
+        /* R    */  17,
+        /* S    */  18,
+        /* T    */  19,
+        /* U    */  20,
+        /* V    */  21,
+        /* W    */  22,
+        /* X    */  23,
+        /* Y    */  24,
+        /* Z    */  25,
+        /* [    */  -1,
+        /* \    */  -1,
+        /* ]    */  -1,
+        /* ^    */  -1,
+        /* _    */  -1,
+        /* `    */  -1,
+        /* a    */  26,
+        /* b    */  27,
+        /* c    */  28,
+        /* d    */  29,
+        /* e    */  30,
+        /* f    */  31,
+        /* g    */  32,
+        /* h    */  33,
+        /* i    */  34,
+        /* j    */  35,
+        /* k    */  36,
+        /* l    */  37,
+        /* m    */  38,
+        /* n    */  39,
+        /* o    */  40,
+        /* p    */  41,
+        /* q    */  42,
+        /* r    */  43,
+        /* s    */  44,
+        /* t    */  45,
+        /* u    */  46,
+        /* v    */  47,
+        /* w    */  48,
+        /* x    */  49,
+        /* y    */  50,
+        /* z    */  51
+    };
+
+    BOOL dry_run = !dst || !dstlen;
+    const char* source_end = &src[srclen];
+    int dest_index = 0;
+
+    do
+    {
+        if (*src == '+')
+        {
+            WCHAR byte_pair = 0;
+            short offset = 0;
+
+            src++; /* skip the + sign */
+
+            if (*src == '-')
+            {
+                /* just a plus sign escaped as +- */
+                if (dry_run) dest_index++; else if (write_to_w_string(dst, dstlen, &dest_index, '+')) return -1;
+                src++;
+                continue;
+            }
+
+            for (;;)
+            {
+                WCHAR sextet = *src;
+                if (sextet == '-')
+                {
+                    /* skip over the dash and end base64 decoding */
+                    /* the current, unfinished byte pair is discarded */
+                    src++;
+                    break;
+                }
+                else if (sextet <= 'z')
+                {
+                    sextet = base64_decoding_table[sextet];
+                }
+                else
+                {
+                    sextet = -1;
+                }
+
+                if (sextet == (WCHAR)-1)
+                {
+                    /* -1 means that the next character of src is not part of a base64 sequence */
+                    /* in other words, all sextets in this base64 sequence have been processed */
+                    /* the current, unfinished byte pair is discarded */
+                    break;
+                }
+
+                if (offset > 0)
+                {
+                    byte_pair |= (sextet << 10) >> offset;
+                }
+                else
+                {
+                    byte_pair |= sextet << (10 - offset);
+                }
+                offset += 6;
+                if (offset > 15)
+                {
+                    /* this byte pair is done */
+                    if (dry_run) dest_index++; else if (write_to_w_string(dst, dstlen, &dest_index, byte_pair)) return -1;
+                    byte_pair = 0;
+                    /* back up the offset to begin writing to the next byte pair,
+                       including writing any part of the current sextet that didn't fit in the last byte pair */
+                    offset -= 22;
+                }
+                else
+                {
+                    /* this sextet is done */
+                    src++;
+                }
+            }
+        }
+        else
+        {
+            if (dry_run) dest_index++; else if (write_to_w_string(dst, dstlen, &dest_index, *src)) return -1;
+            src++;
+        }
+    } while (src < source_end);
+
+    return dest_index;
+}
+
+/***********************************************************************
  *              MultiByteToWideChar   (KERNEL32.@)
  *
  * Convert a multibyte character string into a Unicode string.
@@ -1901,7 +2142,7 @@ BOOL WINAPI EnumSystemCodePagesW( CODEPAGE_ENUMPROCW lpfnCodePageEnum, DWORD fla
  *   flags  [I] Character mapping flags
  *   src    [I] Source string buffer
  *   srclen [I] Length of src (in bytes), or -1 if src is NUL terminated
- *   dst    [O] Destination buffer
+ *   dst    [O] Destination buffer, or NULL to compute the required length
  *   dstlen [I] Length of dst (in WCHARs), or 0 to compute the required length
  *
  * RETURNS
@@ -1944,9 +2185,8 @@ INT WINAPI MultiByteToWideChar( UINT page, DWORD flags, LPCSTR src, INT srclen,
             SetLastError( ERROR_INVALID_FLAGS );
             return 0;
         }
-        FIXME("UTF-7 not supported\n");
-        SetLastError( ERROR_CALL_NOT_IMPLEMENTED );
-        return 0;
+        ret = utf7_mbstowcs( src, srclen, dst, dstlen );
+        break;
     case CP_UNIXCP:
         if (unix_cptable)
         {
-- 
1.7.10.4



More information about the wine-patches mailing list