kernel32: Add UTF-7 support

Alex Henrie alexhenrie24 at gmail.com
Tue May 1 00:20:41 CDT 2012


Fixes bug 27388.

The MSDN documentation of these behaviors can be found at
http://msdn.microsoft.com/en-us/library/windows/desktop/dd319072(v=vs.85).aspx

dlls/kernel32/tests/locale.c already has some UTF-7 tests. If you think more
tests are needed, please be very specific when you tell me what kind of tests
you'd like to see.

---
 dlls/kernel32/locale.c |   40 ++++--
 include/wine/unicode.h |    2 +
 libs/wine/Makefile.in  |    1 +
 libs/wine/utf7.c       |  340 ++++++++++++++++++++++++++++++++++++++++++++++++
 libs/wine/wine.map     |    2 +
 5 files changed, 374 insertions(+), 11 deletions(-)
 create mode 100644 libs/wine/utf7.c

diff --git a/dlls/kernel32/locale.c b/dlls/kernel32/locale.c
index b506f15..f9c70fb 100644
--- a/dlls/kernel32/locale.c
+++ b/dlls/kernel32/locale.c
@@ -1878,7 +1878,7 @@ INT WINAPI MultiByteToWideChar( UINT page, DWORD flags, LPCSTR src, INT srclen,
     const union cptable *table;
     int ret;
 
-    if (!src || (!dst && dstlen))
+    if (!src || !srclen || (!dst && dstlen))
     {
         SetLastError( ERROR_INVALID_PARAMETER );
         return 0;
@@ -1889,17 +1889,21 @@ INT WINAPI MultiByteToWideChar( UINT page, DWORD flags, LPCSTR src, INT srclen,
     switch(page)
     {
     case CP_SYMBOL:
-        if( flags)
+        if (flags)
         {
-            SetLastError( ERROR_INVALID_PARAMETER );
+            SetLastError( ERROR_INVALID_FLAGS );
             return 0;
         }
         ret = wine_cpsymbol_mbstowcs( src, srclen, dst, dstlen );
         break;
     case CP_UTF7:
-        FIXME("UTF-7 not supported\n");
-        SetLastError( ERROR_CALL_NOT_IMPLEMENTED );
-        return 0;
+        if (flags)
+        {
+            SetLastError( ERROR_INVALID_FLAGS );
+            return 0;
+        }
+        ret = wine_utf7_mbstowcs( src, srclen, dst, dstlen );
+        break;
     case CP_UNIXCP:
         if (unix_cptable)
         {
@@ -1969,7 +1973,7 @@ INT WINAPI WideCharToMultiByte( UINT page, DWORD flags, LPCWSTR src, INT srclen,
     const union cptable *table;
     int ret, used_tmp;
 
-    if (!src || (!dst && dstlen))
+    if (!src || !srclen || (!dst && dstlen))
     {
         SetLastError( ERROR_INVALID_PARAMETER );
         return 0;
@@ -1980,17 +1984,31 @@ INT WINAPI WideCharToMultiByte( UINT page, DWORD flags, LPCWSTR src, INT srclen,
     switch(page)
     {
     case CP_SYMBOL:
-        if( flags || defchar || used)
+        if (defchar || used)
         {
             SetLastError( ERROR_INVALID_PARAMETER );
             return 0;
         }
+        if (flags)
+        {
+            SetLastError( ERROR_INVALID_FLAGS );
+            return 0;
+        }
         ret = wine_cpsymbol_wcstombs( src, srclen, dst, dstlen );
         break;
     case CP_UTF7:
-        FIXME("UTF-7 not supported\n");
-        SetLastError( ERROR_CALL_NOT_IMPLEMENTED );
-        return 0;
+        if (defchar || used)
+        {
+            SetLastError( ERROR_INVALID_PARAMETER );
+            return 0;
+        }
+        if (flags)
+        {
+            SetLastError( ERROR_INVALID_FLAGS );
+            return 0;
+        }
+        ret = wine_utf7_wcstombs( src, srclen, dst, dstlen );
+        break;
     case CP_UNIXCP:
         if (unix_cptable)
         {
diff --git a/include/wine/unicode.h b/include/wine/unicode.h
index 35c6166..1827a8e 100644
--- a/include/wine/unicode.h
+++ b/include/wine/unicode.h
@@ -94,6 +94,8 @@ extern int wine_cp_wcstombs( const union cptable *table, int flags,
                              char *dst, int dstlen, const char *defchar, int *used );
 extern int wine_cpsymbol_mbstowcs( const char *src, int srclen, WCHAR *dst, int dstlen );
 extern int wine_cpsymbol_wcstombs( const WCHAR *src, int srclen, char *dst, int dstlen );
+extern INT wine_utf7_mbstowcs( LPCSTR pszUtf7, INT cchUtf7, LPWSTR pszWide, INT cchWide );
+extern INT wine_utf7_wcstombs( LPCWSTR pszWide, INT cchWide, LPSTR pszUtf7, INT cchUtf7 );
 extern int wine_utf8_mbstowcs( int flags, const char *src, int srclen, WCHAR *dst, int dstlen );
 extern int wine_utf8_wcstombs( int flags, const WCHAR *src, int srclen, char *dst, int dstlen );
 
diff --git a/libs/wine/Makefile.in b/libs/wine/Makefile.in
index ca93d26..431fafc 100644
--- a/libs/wine/Makefile.in
+++ b/libs/wine/Makefile.in
@@ -88,6 +88,7 @@ C_SRCS = \
 	port.c \
 	sortkey.c \
 	string.c \
+	utf7.c \
 	utf8.c \
 	wctomb.c \
 	wctype.c
diff --git a/libs/wine/utf7.c b/libs/wine/utf7.c
new file mode 100644
index 0000000..7b67b2a
--- /dev/null
+++ b/libs/wine/utf7.c
@@ -0,0 +1,340 @@
+/*
+ * UTF-7 support routines
+ *
+ * Copyright 2010 Katayama Hirofumi
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+
+#include "wine/unicode.h"
+#include <stdlib.h>
+
+static const signed char
+base64inv[] =
+{
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,
+    52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
+    -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
+    15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
+    -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
+    41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1
+};
+
+static VOID Utf7Base64Decode(BYTE *pbDest, LPCSTR pszSrc, INT cchSrc)
+{
+    INT i, j, n;
+    BYTE b;
+
+    for(i = 0; i < cchSrc / 4 * 4; i += 4)
+    {
+        for(j = n = 0; j < 4; )
+        {
+            b = (BYTE) base64inv[(BYTE) *pszSrc++];
+            n |= (((INT) b) << ((3 - j) * 6));
+            j++;
+        }
+        for(j = 0; j < 3; j++)
+            *pbDest++ = (BYTE) ((n >> (8 * (2 - j))) & 0xFF);
+    }
+    for(j = n = 0; j < cchSrc % 4; )
+    {
+        b = (BYTE) base64inv[(BYTE) *pszSrc++];
+        n |= (((INT) b) << ((3 - j) * 6));
+        j++;
+    }
+    for(j = 0; j < ((cchSrc % 4) * 6 / 8); j++)
+        *pbDest++ = (BYTE) ((n >> (8 * (2 - j))) & 0xFF);
+}
+
+static VOID myswab(LPVOID pv, INT cw)
+{
+    LPBYTE pb = (LPBYTE) pv;
+    BYTE b;
+    while(cw > 0)
+    {
+        b = *pb;
+        *pb = pb[1];
+        pb[1] = b;
+        pb += 2;
+        cw--;
+    }
+}
+
+static INT Utf7ToWideCharSize(LPCSTR pszUtf7, INT cchUtf7)
+{
+    INT n, c, cch;
+    CHAR ch;
+    LPCSTR pch;
+
+    c = 0;
+    while(cchUtf7 > 0)
+    {
+        ch = *pszUtf7++;
+        if (ch == '+')
+        {
+            ch = *pszUtf7;
+            if (ch == '-')
+            {
+                c++;
+                pszUtf7++;
+                cchUtf7 -= 2;
+                continue;
+            }
+            cchUtf7--;
+            pch = pszUtf7;
+            while(cchUtf7 > 0 && (BYTE) *pszUtf7 < 0x80 &&
+                  base64inv[(size_t)*pszUtf7] >= 0)
+            {
+                cchUtf7--;
+                pszUtf7++;
+            }
+            cch = pszUtf7 - pch;
+            n = (cch * 3) / 8;
+            c += n;
+            if (cchUtf7 > 0 && *pszUtf7 == '-')
+            {
+                pszUtf7++;
+                cchUtf7--;
+            }
+        }
+        else
+        {
+            c++;
+            cchUtf7--;
+        }
+    }
+
+    return c;
+}
+
+INT wine_utf7_mbstowcs(LPCSTR pszUtf7, INT cchUtf7, LPWSTR pszWide, INT cchWide)
+{
+    INT n, c, cch;
+    CHAR ch;
+    LPCSTR pch;
+    WORD *pwsz;
+
+    c = Utf7ToWideCharSize(pszUtf7, cchUtf7);
+    if (cchWide == 0)
+        return c;
+
+    if (cchWide < c)
+    {
+        return -1;
+    }
+
+    while(cchUtf7 > 0)
+    {
+        ch = *pszUtf7++;
+        if (ch == '+')
+        {
+            if (*pszUtf7 == '-')
+            {
+                *pszWide++ = L'+';
+                pszUtf7++;
+                cchUtf7 -= 2;
+                continue;
+            }
+            cchUtf7--;
+            pch = pszUtf7;
+            while(cchUtf7 > 0 && (BYTE) *pszUtf7 < 0x80 &&
+                  base64inv[(size_t)*pszUtf7] >= 0)
+            {
+                cchUtf7--;
+                pszUtf7++;
+            }
+            cch = pszUtf7 - pch;
+            n = (cch * 3) / 8;
+            pwsz = (WORD *) malloc((n + 1) * sizeof(WORD));
+            if (pwsz == NULL)
+                return 0;
+            ZeroMemory(pwsz, n * sizeof(WORD));
+            Utf7Base64Decode((BYTE *) pwsz, pch, cch);
+            myswab(pwsz, n);
+            CopyMemory(pszWide, pwsz, n * sizeof(WORD));
+            free(pwsz);
+            pszWide += n;
+            if (cchUtf7 > 0 && *pszUtf7 == '-')
+            {
+                pszUtf7++;
+                cchUtf7--;
+            }
+        }
+        else
+        {
+            *pszWide++ = (WCHAR) ch;
+            cchUtf7--;
+        }
+    }
+
+    return c;
+}
+
+static const char mustshift[] =
+{
+    0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
+    1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
+    1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1
+};
+
+static const char base64[] =
+"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+
+static INT WideCharToUtf7Size(LPCWSTR pszWide, INT cchWide)
+{
+    WCHAR wch;
+    INT c = 0;
+    BOOL fShift = FALSE;
+
+    while(cchWide > 0)
+    {
+        wch = *pszWide;
+        if (wch < 0x80 && !mustshift[wch])
+        {
+            c++;
+            cchWide--;
+            pszWide++;
+        }
+        else
+        {
+            if (wch == L'+')
+            {
+                c++;
+                c++;
+                cchWide--;
+                pszWide++;
+                continue;
+            }
+            if (!fShift)
+            {
+                c++;
+                fShift = TRUE;
+            }
+            pszWide++;
+            cchWide--;
+            c += 3;
+            if (cchWide > 0 && (*pszWide >= 0x80 || mustshift[*pszWide]))
+            {
+                pszWide++;
+                cchWide--;
+                c += 3;
+                if (cchWide > 0 && (*pszWide >= 0x80 || mustshift[*pszWide]))
+                {
+                    pszWide++;
+                    cchWide--;
+                    c += 2;
+                }
+            }
+            if (cchWide > 0 && *pszWide < 0x80 && !mustshift[*pszWide])
+            {
+                c++;
+                fShift = FALSE;
+            }
+        }
+    }
+    if (fShift)
+        c++;
+
+    return c;
+}
+
+INT wine_utf7_wcstombs(LPCWSTR pszWide, INT cchWide, LPSTR pszUtf7, INT cchUtf7)
+{
+    WCHAR wch;
+    INT c, n;
+    WCHAR wsz[3] = {0};
+    BOOL fShift = FALSE;
+
+    c = WideCharToUtf7Size(pszWide, cchWide);
+    if (cchUtf7 == 0)
+        return c;
+
+    if (cchUtf7 < c)
+    {
+        return -1;
+    }
+
+    while(cchWide > 0)
+    {
+        wch = *pszWide;
+        if (wch < 0x80 && !mustshift[wch])
+        {
+            *pszUtf7++ = (CHAR) wch;
+            cchWide--;
+            pszWide++;
+        }
+        else
+        {
+            if (wch == L'+')
+            {
+                *pszUtf7++ = '+';
+                *pszUtf7++ = '-';
+                cchWide--;
+                pszWide++;
+                continue;
+            }
+            if (!fShift)
+            {
+                *pszUtf7++ = '+';
+                fShift = TRUE;
+            }
+            wsz[0] = *pszWide++;
+            cchWide--;
+            n = 1;
+            if (cchWide > 0 && (*pszWide >= 0x80 || mustshift[*pszWide]))
+            {
+                wsz[1] = *pszWide++;
+                cchWide--;
+                n++;
+                if (cchWide > 0 && (*pszWide >= 0x80 || mustshift[*pszWide]))
+                {
+                    wsz[2] = *pszWide++;
+                    cchWide--;
+                    n++;
+                }
+            }
+            *pszUtf7++ = base64[wsz[0] >> 10];
+            *pszUtf7++ = base64[(wsz[0] >> 4) & 0x3F];
+            *pszUtf7++ = base64[(wsz[0] << 2 | wsz[1] >> 14) & 0x3F];
+            if (n >= 2)
+            {
+                *pszUtf7++ = base64[(wsz[1] >> 8) & 0x3F];
+                *pszUtf7++ = base64[(wsz[1] >> 2) & 0x3F];
+                *pszUtf7++ = base64[(wsz[1] << 4 | wsz[2] >> 12) & 0x3F];
+                if (n >= 3)
+                {
+                    *pszUtf7++ = base64[(wsz[2] >> 6) & 0x3F];
+                    *pszUtf7++ = base64[wsz[2] & 0x3F];
+                }
+            }
+            if (cchWide > 0 && *pszWide < 0x80 && !mustshift[*pszWide])
+            {
+                *pszUtf7++ = '-';
+                fShift = FALSE;
+            }
+        }
+    }
+    if (fShift)
+        *pszUtf7 = '-';
+
+    return c;
+}
diff --git a/libs/wine/wine.map b/libs/wine/wine.map
index 2159fac..3e0e6db 100644
--- a/libs/wine/wine.map
+++ b/libs/wine/wine.map
@@ -114,6 +114,8 @@ WINE_1.0
     wine_set_fs;
     wine_set_gs;
     wine_switch_to_stack;
+    wine_utf7_mbstowcs;
+    wine_utf7_wcstombs;
     wine_utf8_mbstowcs;
     wine_utf8_wcstombs;
     wine_wctype_table;
-- 
1.7.5.4




More information about the wine-patches mailing list