Hans Leidekker : webservices: Add support for encoded UTF-8 characters in WsDecodeUrl.

Alexandre Julliard julliard at winehq.org
Wed Jun 1 10:45:45 CDT 2016


Module: wine
Branch: master
Commit: 20686f47af224e9b5d56909213aa3670cdcf75c0
URL:    http://source.winehq.org/git/wine.git/?a=commit;h=20686f47af224e9b5d56909213aa3670cdcf75c0

Author: Hans Leidekker <hans at codeweavers.com>
Date:   Wed Jun  1 12:37:19 2016 +0200

webservices: Add support for encoded UTF-8 characters in WsDecodeUrl.

Signed-off-by: Hans Leidekker <hans at codeweavers.com>
Signed-off-by: Alexandre Julliard <julliard at winehq.org>

---

 dlls/webservices/tests/url.c |   7 ++-
 dlls/webservices/url.c       | 106 ++++++++++++++++++++++++++++++-------------
 2 files changed, 79 insertions(+), 34 deletions(-)

diff --git a/dlls/webservices/tests/url.c b/dlls/webservices/tests/url.c
index 3e11b5c..82059a0 100644
--- a/dlls/webservices/tests/url.c
+++ b/dlls/webservices/tests/url.c
@@ -54,8 +54,10 @@ static void test_WsDecodeUrl(void)
                             '%','2','0','2',0};
     static WCHAR url26[] = {'h','t','t','p',':','/','/','h','o','s','t','#','f','r','a','g',
                             '%','2','0','2',0};
+    static WCHAR url27[] = {'h','t','t','p',':','/','/','h','o','s','t','/','%','c','3','%','a','b','/',0};
     static WCHAR host2[] = {'h','o','s','t',' ','2'};
     static WCHAR path2[] = {'/','p','a','t','h',' ','2'};
+    static WCHAR path3[] = {'/',0xeb,'/'};
     static WCHAR query2[] = {'q','u','e','r','y',' ','2'};
     static WCHAR frag2[] = {'f','r','a','g',' ','2'};
     static const struct
@@ -103,8 +105,9 @@ static void test_WsDecodeUrl(void)
           url22 + 12, 4  },
         { url23, S_OK, WS_URL_HTTP_SCHEME_TYPE, host2, 6, 80 },
         { url24, S_OK, WS_URL_HTTP_SCHEME_TYPE, url24 + 7, 4, 80, NULL, 0, path2, 7 },
-        { url25, S_OK, WS_URL_HTTP_SCHEME_TYPE, url24 + 7, 4, 80, NULL, 0, NULL, 0, query2, 7 },
-        { url26, S_OK, WS_URL_HTTP_SCHEME_TYPE, url24 + 7, 4, 80, NULL, 0, NULL, 0, NULL, 0, frag2, 6 },
+        { url25, S_OK, WS_URL_HTTP_SCHEME_TYPE, url25 + 7, 4, 80, NULL, 0, NULL, 0, query2, 7 },
+        { url26, S_OK, WS_URL_HTTP_SCHEME_TYPE, url26 + 7, 4, 80, NULL, 0, NULL, 0, NULL, 0, frag2, 6 },
+        { url27, S_OK, WS_URL_HTTP_SCHEME_TYPE, url27 + 7, 4, 80, NULL, 0, path3, 3 },
     };
     WS_HEAP *heap;
     WS_STRING str;
diff --git a/dlls/webservices/url.c b/dlls/webservices/url.c
index 6951cd6..d28c669 100644
--- a/dlls/webservices/url.c
+++ b/dlls/webservices/url.c
@@ -71,11 +71,36 @@ static USHORT default_port( WS_URL_SCHEME_TYPE scheme )
     }
 }
 
+static unsigned char *strdup_utf8( const WCHAR *str, ULONG len, ULONG *ret_len )
+{
+    unsigned char *ret;
+    *ret_len = WideCharToMultiByte( CP_UTF8, 0, str, len, NULL, 0, NULL, NULL );
+    if ((ret = heap_alloc( *ret_len )))
+        WideCharToMultiByte( CP_UTF8, 0, str, len, (char *)ret, *ret_len, NULL, NULL );
+    return ret;
+}
+
+static inline int url_decode_byte( char c1, char c2 )
+{
+    int ret;
+
+    if (c1 >= '0' && c1 <= '9') ret = (c1 - '0') * 16;
+    else if (c1 >= 'a' && c1 <= 'f') ret = (c1 - 'a' + 10) * 16;
+    else ret = (c1 - 'A' + 10) * 16;
+
+    if (c2 >= '0' && c2 <= '9') ret += c2 - '0';
+    else if (c2 >= 'a' && c2 <= 'f') ret += c2 - 'a' + 10;
+    else ret += c2 - 'A' + 10;
+
+    return ret;
+}
+
 static WCHAR *url_decode( WCHAR *str, ULONG len, WS_HEAP *heap, ULONG *ret_len )
 {
     WCHAR *p = str, *q, *ret;
-    BOOL decode = FALSE;
-    ULONG i, val;
+    BOOL decode = FALSE, convert = FALSE;
+    ULONG i, len_utf8, len_left;
+    unsigned char *utf8, *r;
 
     *ret_len = len;
     for (i = 0; i < len; i++, p++)
@@ -84,36 +109,62 @@ static WCHAR *url_decode( WCHAR *str, ULONG len, WS_HEAP *heap, ULONG *ret_len )
         if (p[0] == '%' && isxdigitW( p[1] ) && isxdigitW( p[2] ))
         {
             decode = TRUE;
+            if (url_decode_byte( p[1], p[2] ) > 159)
+            {
+                convert = TRUE;
+                break;
+            }
             *ret_len -= 2;
         }
     }
     if (!decode) return str;
-
-    if (!(q = ret = ws_alloc( heap, *ret_len * sizeof(WCHAR) ))) return NULL;
-    p = str;
-    while (len)
+    if (!convert)
     {
-        if (len >= 3 && p[0] == '%' && isxdigitW( p[1] ) && isxdigitW( p[2] ))
+        if (!(q = ret = ws_alloc( heap, *ret_len * sizeof(WCHAR) ))) return NULL;
+        p = str;
+        while (len)
         {
-            if (p[1] >= '0' && p[1] <= '9') val = (p[1] - '0') * 16;
-            else if (p[1] >= 'a' && p[1] <= 'f') val = (p[1] - 'a') * 16;
-            else val = (p[1] - 'A') * 16;
-
-            if (p[2] >= '0' && p[2] <= '9') val += p[2] - '0';
-            else if (p[1] >= 'a' && p[1] <= 'f') val += p[2] - 'a';
-            else val += p[1] - 'A';
-
-            *q++ = val;
-            p += 3;
-            len -= 3;
+            if (len >= 3 && p[0] == '%' && isxdigitW( p[1] ) && isxdigitW( p[2] ))
+            {
+                *q++ = url_decode_byte( p[1], p[2] );
+                p += 3;
+                len -= 3;
+            }
+            else
+            {
+                *q++ = *p++;
+                len -= 1;
+            }
         }
-        else
+        return ret;
+    }
+
+    if (!(r = utf8 = strdup_utf8( str, len, &len_utf8 ))) return NULL;
+    len_left = len_utf8;
+    while (len_left)
+    {
+        if (len_left >= 3 && r[0] == '%' && isxdigit( r[1] ) && isxdigit( r[2] ))
         {
-            *q++ = *p++;
-            len -= 1;
+            r[0] = url_decode_byte( r[1], r[2] );
+            len_left -= 3;
+            memmove( r + 1, r + 3, len_left );
+            len_utf8 -= 2;
         }
+        else len_left -= 1;
+        r++;
+    }
+
+    if (!(*ret_len = MultiByteToWideChar( CP_UTF8, MB_ERR_INVALID_CHARS, (char *)utf8,
+                                          len_utf8, NULL, 0 )))
+    {
+        WARN( "invalid UTF-8 sequence\n" );
+        heap_free( utf8 );
+        return NULL;
     }
+    if ((ret = ws_alloc( heap, *ret_len * sizeof(WCHAR) )))
+        MultiByteToWideChar( CP_UTF8, 0, (char *)utf8, len_utf8, ret, *ret_len );
 
+    heap_free( utf8 );
     return ret;
 }
 
@@ -276,20 +327,11 @@ static inline ULONG escape_size( unsigned char ch, const char *except )
     }
 }
 
-static char *strdup_utf8( const WCHAR *str, ULONG len, ULONG *ret_len )
-{
-    char *ret;
-    *ret_len = WideCharToMultiByte( CP_UTF8, 0, str, len, NULL, 0, NULL, NULL );
-    if ((ret = heap_alloc( *ret_len )))
-        WideCharToMultiByte( CP_UTF8, 0, str, len, ret, *ret_len, NULL, NULL );
-    return ret;
-}
-
 static HRESULT url_encode_size( const WCHAR *str, ULONG len, const char *except, ULONG *ret_len )
 {
     ULONG i, len_utf8;
     BOOL convert = FALSE;
-    char *utf8;
+    unsigned char *utf8;
 
     *ret_len = 0;
     for (i = 0; i < len; i++)
@@ -339,7 +381,7 @@ static HRESULT url_encode( const WCHAR *str, ULONG len, WCHAR *buf, const char *
     ULONG i, len_utf8, len_enc;
     BOOL convert = FALSE;
     WCHAR *p = buf;
-    char *utf8;
+    unsigned char *utf8;
 
     *ret_len = 0;
     for (i = 0; i < len; i++)




More information about the wine-cvs mailing list