Thomas Mullaly : urlmon: Implemented a path parser for hierarchical URIs.

Alexandre Julliard julliard at winehq.org
Tue Aug 3 13:10:32 CDT 2010


Module: wine
Branch: master
Commit: 0ecec6d7c01050ac5b97a8bab7b9d4177f934b98
URL:    http://source.winehq.org/git/wine.git/?a=commit;h=0ecec6d7c01050ac5b97a8bab7b9d4177f934b98

Author: Thomas Mullaly <thomas.mullaly at gmail.com>
Date:   Sun Jul 18 10:27:21 2010 -0400

urlmon: Implemented a path parser for hierarchical URIs.

---

 dlls/urlmon/tests/uri.c |   32 ++++++++++++++++++-
 dlls/urlmon/uri.c       |   79 ++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 109 insertions(+), 2 deletions(-)

diff --git a/dlls/urlmon/tests/uri.c b/dlls/urlmon/tests/uri.c
index b87b9a3..7cd11ff 100644
--- a/dlls/urlmon/tests/uri.c
+++ b/dlls/urlmon/tests/uri.c
@@ -2217,6 +2217,34 @@ static const uri_properties uri_tests[] = {
             {URL_SCHEME_HTTP,S_OK,FALSE},
             {URLZONE_INVALID,E_NOTIMPL,FALSE}
         }
+    },
+    {   "zip://www.google.com\\test", Uri_CREATE_NO_CANONICALIZE, S_OK, FALSE,
+        Uri_HAS_ABSOLUTE_URI|Uri_HAS_AUTHORITY|Uri_HAS_DISPLAY_URI|Uri_HAS_DOMAIN|
+        Uri_HAS_HOST|Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME,
+        TRUE,
+        {
+            {"zip://www.google.com\\test",S_OK,TRUE},
+            {"www.google.com\\test",S_OK,FALSE},
+            {"zip://www.google.com\\test",S_OK,TRUE},
+            {"google.com\\test",S_OK,FALSE},
+            {"",S_FALSE,TRUE},
+            {"",S_FALSE,TRUE},
+            {"www.google.com\\test",S_OK,FALSE},
+            {"",S_FALSE,FALSE},
+            {"",S_FALSE,TRUE},
+            {"",S_FALSE,TRUE},
+            {"",S_FALSE,TRUE},
+            {"zip://www.google.com\\test",S_OK,FALSE},
+            {"zip",S_OK,FALSE},
+            {"",S_FALSE,FALSE},
+            {"",S_FALSE,FALSE}
+        },
+        {
+            {Uri_HOST_DNS,S_OK,FALSE},
+            {0,S_FALSE,FALSE},
+            {URL_SCHEME_UNKNOWN,S_OK,FALSE},
+            {URLZONE_INVALID,E_NOTIMPL,FALSE}
+        }
     }
 };
 
@@ -2264,7 +2292,9 @@ static const invalid_uri invalid_uri_tests[] = {
     /* Invalid port with IPv4 address. */
     {"http://www.winehq.org:1abcd",0,FALSE},
     /* Invalid port with IPv6 address. */
-    {"http://[::ffff]:32xy",0,FALSE}
+    {"http://[::ffff]:32xy",0,FALSE},
+    /* Not allowed to have backslashes with NO_CANONICALIZE. */
+    {"gopher://www.google.com\\test",Uri_CREATE_NO_CANONICALIZE,FALSE}
 };
 
 typedef struct _uri_equality {
diff --git a/dlls/urlmon/uri.c b/dlls/urlmon/uri.c
index 20d8bb1..9ab1dda 100644
--- a/dlls/urlmon/uri.c
+++ b/dlls/urlmon/uri.c
@@ -113,6 +113,9 @@ typedef struct {
     const WCHAR     *port;
     DWORD           port_len;
     USHORT          port_value;
+
+    const WCHAR     *path;
+    DWORD           path_len;
 } parse_data;
 
 static const CHAR hexDigits[] = "0123456789ABCDEF";
@@ -257,6 +260,10 @@ static inline BOOL is_hexdigit(WCHAR val) {
             (val >= '0' && val <= '9'));
 }
 
+static inline BOOL is_path_delim(WCHAR val) {
+    return (!val || val == '#' || val == '?');
+}
+
 /* Computes the size of the given IPv6 address.
  * Each h16 component is 16bits, if there is an IPv4 address, it's
  * 32bits. If there's an elision it can be 16bits to 128bits, depending
@@ -1482,6 +1489,70 @@ static BOOL parse_authority(const WCHAR **ptr, parse_data *data, DWORD flags) {
     return TRUE;
 }
 
+/* Attempts to parse the path information of a hierarchical URI. */
+static BOOL parse_path_hierarchical(const WCHAR **ptr, parse_data *data, DWORD flags) {
+    const WCHAR *start = *ptr;
+    static const WCHAR slash[] = {'/',0};
+
+    if(is_path_delim(**ptr)) {
+        if(data->scheme_type == URL_SCHEME_WILDCARD) {
+            /* Wildcard schemes don't get a '/' attached if their path is
+             * empty.
+             */
+            data->path = NULL;
+            data->path_len = 0;
+        } else if(!(flags & Uri_CREATE_NO_CANONICALIZE)) {
+            /* If the path component is empty, then a '/' is added. */
+            data->path = slash;
+            data->path_len = 1;
+        }
+    } else {
+        while(!is_path_delim(**ptr)) {
+            if(**ptr == '%' && data->scheme_type != URL_SCHEME_UNKNOWN &&
+               data->scheme_type != URL_SCHEME_FILE) {
+                if(!check_pct_encoded(ptr)) {
+                    *ptr = start;
+                    return FALSE;
+                } else
+                    continue;
+            } else if(**ptr == '\\') {
+                /* Not allowed to have a backslash if NO_CANONICALIZE is set
+                 * and the scheme is known type (but not a file scheme).
+                 */
+                if(flags & Uri_CREATE_NO_CANONICALIZE) {
+                    if(data->scheme_type != URL_SCHEME_FILE &&
+                       data->scheme_type != URL_SCHEME_UNKNOWN) {
+                        *ptr = start;
+                        return FALSE;
+                    }
+                }
+            }
+
+            ++(*ptr);
+        }
+
+        /* The only time a URI doesn't have a path is when
+         * the NO_CANONICALIZE flag is set and the raw URI
+         * didn't contain one.
+         */
+        if(*ptr == start) {
+            data->path = NULL;
+            data->path_len = 0;
+        } else {
+            data->path = start;
+            data->path_len = *ptr - start;
+        }
+    }
+
+    if(data->path)
+        TRACE("(%p %p %x): Parsed path %s len=%d\n", ptr, data, flags,
+            debugstr_wn(data->path, data->path_len), data->path_len);
+    else
+        TRACE("(%p %p %x): The URI contained no path\n", ptr, data, flags);
+
+    return TRUE;
+}
+
 /* Determines how the URI should be parsed after the scheme information.
  *
  * If the scheme is followed, by "//" then, it is treated as an hierarchical URI
@@ -1525,11 +1596,15 @@ static BOOL parse_hierpart(const WCHAR **ptr, parse_data *data, DWORD flags) {
             TRACE("(%p %p %x): Treating URI as an hierarchical URI.\n", ptr, data, flags);
             data->is_opaque = FALSE;
 
+            if(data->scheme_type == URL_SCHEME_FILE)
+                /* Skip past the "//" after the scheme (if any). */
+                check_hierarchical(ptr);
+
             /* TODO: Handle hierarchical URI's, parse authority then parse the path. */
             if(!parse_authority(ptr, data, flags))
                 return FALSE;
 
-            return TRUE;
+            return parse_path_hierarchical(ptr, data, flags);
         }
     }
 
@@ -1564,6 +1639,8 @@ static BOOL parse_uri(parse_data *data, DWORD flags) {
     if(!parse_hierpart(pptr, data, flags))
         return FALSE;
 
+    /* TODO: Parse query and fragment (if the URI has one). */
+
     TRACE("(%p %x): FINISHED PARSING URI.\n", data, flags);
     return TRUE;
 }




More information about the wine-cvs mailing list