[PATCH 2/3] kernel32: Support UTF-7 in MultiByteToWideChar.

Alex Henrie alexhenrie24 at gmail.com
Fri Aug 31 20:25:25 CDT 2012


---
 dlls/kernel32/locale.c |  265 +++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 261 insertions(+), 4 deletions(-)

diff --git a/dlls/kernel32/locale.c b/dlls/kernel32/locale.c
index f9217c5..aa66602 100644
--- a/dlls/kernel32/locale.c
+++ b/dlls/kernel32/locale.c
@@ -2226,6 +2226,264 @@ INT WINAPI MultiByteToWideChar( UINT page, DWORD flags, LPCSTR src, INT srclen,
 
 
 /***********************************************************************
+ *              can_directly_encode
+ *
+ * Helper for utf7_wcstombs
+ */
+static BOOL utf7_can_directly_encode(WCHAR codepoint)
+{
+    static const BOOL directly_encodable_ascii[] = {
+        /* \0   */  TRUE,  /* special case */
+        /* \x01 */  FALSE,
+        /* \x02 */  FALSE,
+        /* \x03 */  FALSE,
+        /* \x04 */  FALSE,
+        /* \x05 */  FALSE,
+        /* \x06 */  FALSE,
+        /* \a   */  FALSE,
+        /* \b   */  FALSE,
+        /* \t   */  TRUE,
+        /* \n   */  TRUE,
+        /* \v   */  FALSE,
+        /* \f   */  FALSE,
+        /* \r   */  TRUE,
+        /* \x0E */  FALSE,
+        /* \x0F */  FALSE,
+        /* \x10 */  FALSE,
+        /* \x11 */  FALSE,
+        /* \x12 */  FALSE,
+        /* \x13 */  FALSE,
+        /* \x14 */  FALSE,
+        /* \x15 */  FALSE,
+        /* \x16 */  FALSE,
+        /* \x17 */  FALSE,
+        /* \x18 */  FALSE,
+        /* \x19 */  FALSE,
+        /* \x1A */  FALSE,
+        /* \e   */  FALSE,
+        /* \x1C */  FALSE,
+        /* \x1D */  FALSE,
+        /* \x1E */  FALSE,
+        /* \x1F */  FALSE,
+        /*      */  TRUE,
+        /* !    */  FALSE,
+        /* "    */  FALSE,
+        /* #    */  FALSE,
+        /* $    */  FALSE,
+        /* %    */  FALSE,
+        /* &    */  FALSE,
+        /* '    */  TRUE,
+        /* (    */  TRUE,
+        /* )    */  TRUE,
+        /* *    */  FALSE,
+        /* +    */  TRUE, /* special case */
+        /* ,    */  TRUE,
+        /* -    */  TRUE,
+        /* .    */  TRUE,
+        /* /    */  TRUE,
+        /* 0    */  TRUE,
+        /* 1    */  TRUE,
+        /* 2    */  TRUE,
+        /* 3    */  TRUE,
+        /* 4    */  TRUE,
+        /* 5    */  TRUE,
+        /* 6    */  TRUE,
+        /* 7    */  TRUE,
+        /* 8    */  TRUE,
+        /* 9    */  TRUE,
+        /* :    */  TRUE,
+        /* ;    */  FALSE,
+        /* <    */  FALSE,
+        /* =    */  FALSE,
+        /* >    */  FALSE,
+        /* ?    */  TRUE,
+        /* @    */  FALSE,
+        /* A    */  TRUE,
+        /* B    */  TRUE,
+        /* C    */  TRUE,
+        /* D    */  TRUE,
+        /* E    */  TRUE,
+        /* F    */  TRUE,
+        /* G    */  TRUE,
+        /* H    */  TRUE,
+        /* I    */  TRUE,
+        /* J    */  TRUE,
+        /* K    */  TRUE,
+        /* L    */  TRUE,
+        /* M    */  TRUE,
+        /* N    */  TRUE,
+        /* O    */  TRUE,
+        /* P    */  TRUE,
+        /* Q    */  TRUE,
+        /* R    */  TRUE,
+        /* S    */  TRUE,
+        /* T    */  TRUE,
+        /* U    */  TRUE,
+        /* V    */  TRUE,
+        /* W    */  TRUE,
+        /* X    */  TRUE,
+        /* Y    */  TRUE,
+        /* Z    */  TRUE,
+        /* [    */  FALSE,
+        /* \    */  FALSE,
+        /* ]    */  FALSE,
+        /* ^    */  FALSE,
+        /* _    */  FALSE,
+        /* `    */  FALSE,
+        /* a    */  TRUE,
+        /* b    */  TRUE,
+        /* c    */  TRUE,
+        /* d    */  TRUE,
+        /* e    */  TRUE,
+        /* f    */  TRUE,
+        /* g    */  TRUE,
+        /* h    */  TRUE,
+        /* i    */  TRUE,
+        /* j    */  TRUE,
+        /* k    */  TRUE,
+        /* l    */  TRUE,
+        /* m    */  TRUE,
+        /* n    */  TRUE,
+        /* o    */  TRUE,
+        /* p    */  TRUE,
+        /* q    */  TRUE,
+        /* r    */  TRUE,
+        /* s    */  TRUE,
+        /* t    */  TRUE,
+        /* u    */  TRUE,
+        /* v    */  TRUE,
+        /* w    */  TRUE,
+        /* x    */  TRUE,
+        /* y    */  TRUE,
+        /* z    */  TRUE,
+        /* {    */  FALSE,
+        /* |    */  FALSE,
+        /* }    */  FALSE,
+        /* ~    */  FALSE
+    };
+
+    if (codepoint <= '~')
+    {
+        return directly_encodable_ascii[codepoint];
+    }
+    else
+    {
+        return FALSE;
+    }
+}
+
+/***********************************************************************
+ *              write_to_c_string
+ *
+ * Helper for utf7_wcstombs
+ *
+ * RETURNS
+ *   0 on success, -1 on error
+ */
+static int write_to_c_string(char* dst, int dstlen, int* index, char character)
+{
+    if (*index >= dstlen)
+    {
+        return -1;
+    }
+
+    dst[*index] = character;
+    (*index)++;
+    return 0;
+}
+
+/***********************************************************************
+ *              utf7_wcstombs
+ *
+ * UTF-16 to UTF-7 string conversion, helper for WideCharToMultiByte
+ *
+ * RETURNS
+ *   On success, the number of characters written
+ *   On dst buffer overflow, -1
+ *   On invalid input char, -2
+ */
+static int utf7_wcstombs(const WCHAR* src, int srclen, char* dst, int dstlen)
+{
+    static const char base64_encoding_table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+
+    BOOL dry_run = !dst || !dstlen;
+    const WCHAR* source_end = &src[srclen];
+    int dest_index = 0;
+
+    do
+    {
+        if (*src == '+')
+        {
+            if (dry_run)
+            {
+                dest_index += 2;
+            }
+            else
+            {
+                if (write_to_c_string(dst, dstlen, &dest_index, '+')) return -1;
+                if (write_to_c_string(dst, dstlen, &dest_index, '-')) return -1;
+            }
+            src++;
+        }
+        else if (utf7_can_directly_encode(*src))
+        {
+            if (dry_run) dest_index++; else if (write_to_c_string(dst, dstlen, &dest_index, *src)) return -1;
+            src++;
+        }
+        else
+        {
+            unsigned int offset;
+            WCHAR char1;
+            WCHAR char2;
+
+            offset = 16;
+            char2 = *src;
+            src++;
+
+            if (dry_run) dest_index++; else if (write_to_c_string(dst, dstlen, &dest_index, '+')) return -1;
+
+            for (;;)
+            {
+                unsigned int chars_back_to_back;
+
+                if (offset > 15)
+                {
+                    if (char2 == 0)
+                    {
+                        /* signal to end; the next character of src is directly encodable */
+                        break;
+                    }
+                    char1 = char2;
+                    if (utf7_can_directly_encode(*src))
+                    {
+                        /* do not include the next character of src in the base64 sequence */
+                        /* pad the bits of the last character to be encoded with zeroes if needed */
+                        char2 = 0;
+                    }
+                    else
+                    {
+                        /* claim the next character for inclusion in the base64 sequence */
+                        char2 = *src;
+                        src++;
+                    }
+                    offset -= 16;
+                }
+
+                chars_back_to_back = ((unsigned int)char1 << 16) | (unsigned int)char2;
+                if (dry_run) dest_index++; else if (write_to_c_string(dst, dstlen, &dest_index, base64_encoding_table[(chars_back_to_back << offset) >> 26])) return -1;
+                offset += 6;
+            }
+
+            /* Windows always explicitly terminates the base64 sequence even though RFC 2152 (page 3, rule 2) does not require this */
+            if (dry_run) dest_index++; else if (write_to_c_string(dst, dstlen, &dest_index, '-')) return -1;
+        }
+    }
+    while (src < source_end);
+
+    return dest_index;
+}
+
+/***********************************************************************
  *              WideCharToMultiByte   (KERNEL32.@)
  *
  * Convert a Unicode character string into a multibyte string.
@@ -2235,7 +2493,7 @@ INT WINAPI MultiByteToWideChar( UINT page, DWORD flags, LPCSTR src, INT srclen,
  *   flags   [I] Mapping Flags (MB_ constants from "winnls.h").
  *   src     [I] Source string buffer
  *   srclen  [I] Length of src (in WCHARs), or -1 if src is NUL terminated
- *   dst     [O] Destination buffer
+ *   dst     [O] Destination buffer, or NULL to compute the required length
  *   dstlen  [I] Length of dst (in bytes), or 0 to compute the required length
  *   defchar [I] Default character to use for conversion if no exact
  *		    conversion can be made
@@ -2292,9 +2550,8 @@ INT WINAPI WideCharToMultiByte( UINT page, DWORD flags, LPCWSTR src, INT srclen,
             SetLastError( ERROR_INVALID_FLAGS );
             return 0;
         }
-        FIXME("UTF-7 not supported\n");
-        SetLastError( ERROR_CALL_NOT_IMPLEMENTED );
-        return 0;
+        ret = utf7_wcstombs( src, srclen, dst, dstlen );
+        break;
     case CP_UNIXCP:
         if (unix_cptable)
         {
-- 
1.7.10.4



More information about the wine-patches mailing list