[PATCH 1/3] kernel32: Support UTF-7 in MultiByteToWideChar. (try 3)

Alex Henrie alexhenrie24 at gmail.com
Sun Oct 12 22:54:46 CDT 2014


Portions of utf7_mbstowcs were written by Sebastian Lackner
<sebastian at fds-team.de>
---
 dlls/kernel32/locale.c | 163 +++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 159 insertions(+), 4 deletions(-)

diff --git a/dlls/kernel32/locale.c b/dlls/kernel32/locale.c
index 730574b..499797d 100644
--- a/dlls/kernel32/locale.c
+++ b/dlls/kernel32/locale.c
@@ -1954,6 +1954,162 @@ BOOL WINAPI EnumSystemCodePagesW( CODEPAGE_ENUMPROCW lpfnCodePageEnum, DWORD fla
 
 
 /***********************************************************************
+ *              write_to_w_string
+ *
+ * Helper for utf7_mbstowcs
+ *
+ * RETURNS
+ *   TRUE on success, FALSE on error
+ */
+static inline BOOL write_to_w_string(WCHAR *dst, int dstlen, int *index, WCHAR character)
+{
+    if (dst)
+    {
+        if (*index >= dstlen)
+            return FALSE;
+
+        dst[*index] = character;
+    }
+
+    (*index)++;
+
+    return TRUE;
+}
+
+/***********************************************************************
+ *              utf7_mbstowcs
+ *
+ * UTF-7 to UTF-16 string conversion, helper for MultiByteToWideChar
+ *
+ * RETURNS
+ *   On success, the number of characters written
+ *   On dst buffer overflow, -1
+ */
+static int utf7_mbstowcs(const char *src, int srclen, WCHAR *dst, int dstlen)
+{
+    static const WCHAR base64_decoding_table[] = {
+        /* \0   */ -1,    /* \x01 */ -1,    /* \x02 */ -1,    /* \x03 */ -1,
+        /* \x04 */ -1,    /* \x05 */ -1,    /* \x06 */ -1,    /* \a   */ -1,
+        /* \b   */ -1,    /* \t   */ -1,    /* \n   */ -1,    /* \v   */ -1,
+        /* \f   */ -1,    /* \r   */ -1,    /* \x0E */ -1,    /* \x0F */ -1,
+        /* \x10 */ -1,    /* \x11 */ -1,    /* \x12 */ -1,    /* \x13 */ -1,
+        /* \x14 */ -1,    /* \x15 */ -1,    /* \x16 */ -1,    /* \x17 */ -1,
+        /* \x18 */ -1,    /* \x19 */ -1,    /* \x1A */ -1,    /* \e   */ -1,
+        /* \x1C */ -1,    /* \x1D */ -1,    /* \x1E */ -1,    /* \x1F */ -1,
+        /*      */ -1,    /* !    */ -1,    /* "    */ -1,    /* #    */ -1,
+        /* $    */ -1,    /* %    */ -1,    /* &    */ -1,    /* '    */ -1,
+        /* (    */ -1,    /* )    */ -1,    /* *    */ -1,    /* +    */ 62,
+        /* ,    */ -1,    /* -    */ -1,    /* .    */ -1,    /* /    */ 63,
+        /* 0    */ 52,    /* 1    */ 53,    /* 2    */ 54,    /* 3    */ 55,
+        /* 4    */ 56,    /* 5    */ 57,    /* 6    */ 58,    /* 7    */ 59,
+        /* 8    */ 60,    /* 9    */ 61,    /* :    */ -1,    /* ;    */ -1,
+        /* <    */ -1,    /* =    */ -1,    /* >    */ -1,    /* ?    */ -1,
+        /* @    */ -1,    /* A    */  0,    /* B    */  1,    /* C    */  2,
+        /* D    */  3,    /* E    */  4,    /* F    */  5,    /* G    */  6,
+        /* H    */  7,    /* I    */  8,    /* J    */  9,    /* K    */ 10,
+        /* L    */ 11,    /* M    */ 12,    /* N    */ 13,    /* O    */ 14,
+        /* P    */ 15,    /* Q    */ 16,    /* R    */ 17,    /* S    */ 18,
+        /* T    */ 19,    /* U    */ 20,    /* V    */ 21,    /* W    */ 22,
+        /* X    */ 23,    /* Y    */ 24,    /* Z    */ 25,    /* [    */ -1,
+        /* \    */ -1,    /* ]    */ -1,    /* ^    */ -1,    /* _    */ -1,
+        /* `    */ -1,    /* a    */ 26,    /* b    */ 27,    /* c    */ 28,
+        /* d    */ 29,    /* e    */ 30,    /* f    */ 31,    /* g    */ 32,
+        /* h    */ 33,    /* i    */ 34,    /* j    */ 35,    /* k    */ 36,
+        /* l    */ 37,    /* m    */ 38,    /* n    */ 39,    /* o    */ 40,
+        /* p    */ 41,    /* q    */ 42,    /* r    */ 43,    /* s    */ 44,
+        /* t    */ 45,    /* u    */ 46,    /* v    */ 47,    /* w    */ 48,
+        /* x    */ 49,    /* y    */ 50,    /* z    */ 51,    /* {    */ -1,
+        /* |    */ -1,    /* }    */ -1,    /* ~    */ -1,    /* \x7F */ -1
+    };
+
+    const char *source_end = src + srclen;
+    int dest_index = 0;
+
+    DWORD byte_pair = 0;
+    short offset = 0;
+
+    /* MultiByteToWideChar guarantees that srclen > 0 */
+    assert(srclen > 0);
+
+    if (!dstlen)
+        dst = NULL;
+
+    do
+    {
+        if (*src == '+')
+        {
+            src++; /* skip the + sign */
+            if (src >= source_end)
+                break;
+
+            if (*src == '-')
+            {
+                /* just a plus sign escaped as +- */
+                if (!write_to_w_string(dst, dstlen, &dest_index, '+'))
+                    return -1;
+                src++;
+                continue;
+            }
+
+            do
+            {
+                WCHAR sextet = *src;
+                if (sextet == '-')
+                {
+                    /* skip over the dash and end base64 decoding */
+                    /* the current, unfinished byte pair is discarded */
+                    src++;
+                    offset = 0;
+                    break;
+                }
+                else if (sextet <= 127)
+                {
+                    sextet = base64_decoding_table[sextet];
+                    if (sextet == (WCHAR)-1)
+                    {
+                        /* -1 means that the next character of src is not part of a base64 sequence */
+                        /* in other words, all sextets in this base64 sequence have been processed */
+                        /* the current, unfinished byte pair is discarded */
+                        offset = 0;
+                        break;
+                    }
+                }
+                else
+                {
+                    /* the next character of src is > 127 and therefore not part of a base64 sequence */
+                    /* the current, unfinished byte pair is NOT discarded in this case */
+                    /* this is probably a bug in Windows */
+                    break;
+                }
+
+                byte_pair = (byte_pair << 6) | sextet;
+                offset += 6;
+
+                if (offset >= 16)
+                {
+                    /* this byte pair is done */
+                    if (!write_to_w_string(dst, dstlen, &dest_index, (byte_pair >> (offset - 16)) & 0xFFFF))
+                        return -1;
+                    offset -= 16;
+                }
+
+                /* this sextet is done */
+                src++;
+            } while (src < source_end);
+        }
+        else
+        {
+            /* we have to convert to unsigned char in case *src > 127 */
+            if (!write_to_w_string(dst, dstlen, &dest_index, (unsigned char)*src))
+                return -1;
+            src++;
+        }
+    } while (src < source_end);
+
+    return dest_index;
+}
+
+/***********************************************************************
  *              MultiByteToWideChar   (KERNEL32.@)
  *
  * Convert a multibyte character string into a Unicode string.
@@ -1963,7 +2119,7 @@ BOOL WINAPI EnumSystemCodePagesW( CODEPAGE_ENUMPROCW lpfnCodePageEnum, DWORD fla
  *   flags  [I] Character mapping flags
  *   src    [I] Source string buffer
  *   srclen [I] Length of src (in bytes), or -1 if src is NUL terminated
- *   dst    [O] Destination buffer
+ *   dst    [O] Destination buffer, or NULL to compute the required length
  *   dstlen [I] Length of dst (in WCHARs), or 0 to compute the required length
  *
  * RETURNS
@@ -2006,9 +2162,8 @@ INT WINAPI MultiByteToWideChar( UINT page, DWORD flags, LPCSTR src, INT srclen,
             SetLastError( ERROR_INVALID_FLAGS );
             return 0;
         }
-        FIXME("UTF-7 not supported\n");
-        SetLastError( ERROR_CALL_NOT_IMPLEMENTED );
-        return 0;
+        ret = utf7_mbstowcs( src, srclen, dst, dstlen );
+        break;
     case CP_UNIXCP:
         if (unix_cptable)
         {
-- 
2.1.2



More information about the wine-patches mailing list