Thomas Mullaly : urlmon: Implemented a path parser for hierarchical URIs.
Alexandre Julliard
julliard at winehq.org
Tue Aug 3 13:10:32 CDT 2010
Module: wine
Branch: master
Commit: 0ecec6d7c01050ac5b97a8bab7b9d4177f934b98
URL: http://source.winehq.org/git/wine.git/?a=commit;h=0ecec6d7c01050ac5b97a8bab7b9d4177f934b98
Author: Thomas Mullaly <thomas.mullaly at gmail.com>
Date: Sun Jul 18 10:27:21 2010 -0400
urlmon: Implemented a path parser for hierarchical URIs.
---
dlls/urlmon/tests/uri.c | 32 ++++++++++++++++++-
dlls/urlmon/uri.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 109 insertions(+), 2 deletions(-)
diff --git a/dlls/urlmon/tests/uri.c b/dlls/urlmon/tests/uri.c
index b87b9a3..7cd11ff 100644
--- a/dlls/urlmon/tests/uri.c
+++ b/dlls/urlmon/tests/uri.c
@@ -2217,6 +2217,34 @@ static const uri_properties uri_tests[] = {
{URL_SCHEME_HTTP,S_OK,FALSE},
{URLZONE_INVALID,E_NOTIMPL,FALSE}
}
+ },
+ { "zip://www.google.com\\test", Uri_CREATE_NO_CANONICALIZE, S_OK, FALSE,
+ Uri_HAS_ABSOLUTE_URI|Uri_HAS_AUTHORITY|Uri_HAS_DISPLAY_URI|Uri_HAS_DOMAIN|
+ Uri_HAS_HOST|Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME,
+ TRUE,
+ {
+ {"zip://www.google.com\\test",S_OK,TRUE},
+ {"www.google.com\\test",S_OK,FALSE},
+ {"zip://www.google.com\\test",S_OK,TRUE},
+ {"google.com\\test",S_OK,FALSE},
+ {"",S_FALSE,TRUE},
+ {"",S_FALSE,TRUE},
+ {"www.google.com\\test",S_OK,FALSE},
+ {"",S_FALSE,FALSE},
+ {"",S_FALSE,TRUE},
+ {"",S_FALSE,TRUE},
+ {"",S_FALSE,TRUE},
+ {"zip://www.google.com\\test",S_OK,FALSE},
+ {"zip",S_OK,FALSE},
+ {"",S_FALSE,FALSE},
+ {"",S_FALSE,FALSE}
+ },
+ {
+ {Uri_HOST_DNS,S_OK,FALSE},
+ {0,S_FALSE,FALSE},
+ {URL_SCHEME_UNKNOWN,S_OK,FALSE},
+ {URLZONE_INVALID,E_NOTIMPL,FALSE}
+ }
}
};
@@ -2264,7 +2292,9 @@ static const invalid_uri invalid_uri_tests[] = {
/* Invalid port with IPv4 address. */
{"http://www.winehq.org:1abcd",0,FALSE},
/* Invalid port with IPv6 address. */
- {"http://[::ffff]:32xy",0,FALSE}
+ {"http://[::ffff]:32xy",0,FALSE},
+ /* Not allowed to have backslashes with NO_CANONICALIZE. */
+ {"gopher://www.google.com\\test",Uri_CREATE_NO_CANONICALIZE,FALSE}
};
typedef struct _uri_equality {
diff --git a/dlls/urlmon/uri.c b/dlls/urlmon/uri.c
index 20d8bb1..9ab1dda 100644
--- a/dlls/urlmon/uri.c
+++ b/dlls/urlmon/uri.c
@@ -113,6 +113,9 @@ typedef struct {
const WCHAR *port;
DWORD port_len;
USHORT port_value;
+
+ const WCHAR *path;
+ DWORD path_len;
} parse_data;
static const CHAR hexDigits[] = "0123456789ABCDEF";
@@ -257,6 +260,10 @@ static inline BOOL is_hexdigit(WCHAR val) {
(val >= '0' && val <= '9'));
}
+static inline BOOL is_path_delim(WCHAR val) {
+ return (!val || val == '#' || val == '?');
+}
+
/* Computes the size of the given IPv6 address.
* Each h16 component is 16bits, if there is an IPv4 address, it's
* 32bits. If there's an elision it can be 16bits to 128bits, depending
@@ -1482,6 +1489,70 @@ static BOOL parse_authority(const WCHAR **ptr, parse_data *data, DWORD flags) {
return TRUE;
}
+/* Attempts to parse the path information of a hierarchical URI. */
+static BOOL parse_path_hierarchical(const WCHAR **ptr, parse_data *data, DWORD flags) {
+ const WCHAR *start = *ptr;
+ static const WCHAR slash[] = {'/',0};
+
+ if(is_path_delim(**ptr)) {
+ if(data->scheme_type == URL_SCHEME_WILDCARD) {
+ /* Wildcard schemes don't get a '/' attached if their path is
+ * empty.
+ */
+ data->path = NULL;
+ data->path_len = 0;
+ } else if(!(flags & Uri_CREATE_NO_CANONICALIZE)) {
+ /* If the path component is empty, then a '/' is added. */
+ data->path = slash;
+ data->path_len = 1;
+ }
+ } else {
+ while(!is_path_delim(**ptr)) {
+ if(**ptr == '%' && data->scheme_type != URL_SCHEME_UNKNOWN &&
+ data->scheme_type != URL_SCHEME_FILE) {
+ if(!check_pct_encoded(ptr)) {
+ *ptr = start;
+ return FALSE;
+ } else
+ continue;
+ } else if(**ptr == '\\') {
+ /* Not allowed to have a backslash if NO_CANONICALIZE is set
+ * and the scheme is known type (but not a file scheme).
+ */
+ if(flags & Uri_CREATE_NO_CANONICALIZE) {
+ if(data->scheme_type != URL_SCHEME_FILE &&
+ data->scheme_type != URL_SCHEME_UNKNOWN) {
+ *ptr = start;
+ return FALSE;
+ }
+ }
+ }
+
+ ++(*ptr);
+ }
+
+ /* The only time a URI doesn't have a path is when
+ * the NO_CANONICALIZE flag is set and the raw URI
+ * didn't contain one.
+ */
+ if(*ptr == start) {
+ data->path = NULL;
+ data->path_len = 0;
+ } else {
+ data->path = start;
+ data->path_len = *ptr - start;
+ }
+ }
+
+ if(data->path)
+ TRACE("(%p %p %x): Parsed path %s len=%d\n", ptr, data, flags,
+ debugstr_wn(data->path, data->path_len), data->path_len);
+ else
+ TRACE("(%p %p %x): The URI contained no path\n", ptr, data, flags);
+
+ return TRUE;
+}
+
/* Determines how the URI should be parsed after the scheme information.
*
* If the scheme is followed, by "//" then, it is treated as an hierarchical URI
@@ -1525,11 +1596,15 @@ static BOOL parse_hierpart(const WCHAR **ptr, parse_data *data, DWORD flags) {
TRACE("(%p %p %x): Treating URI as an hierarchical URI.\n", ptr, data, flags);
data->is_opaque = FALSE;
+ if(data->scheme_type == URL_SCHEME_FILE)
+ /* Skip past the "//" after the scheme (if any). */
+ check_hierarchical(ptr);
+
/* TODO: Handle hierarchical URI's, parse authority then parse the path. */
if(!parse_authority(ptr, data, flags))
return FALSE;
- return TRUE;
+ return parse_path_hierarchical(ptr, data, flags);
}
}
@@ -1564,6 +1639,8 @@ static BOOL parse_uri(parse_data *data, DWORD flags) {
if(!parse_hierpart(pptr, data, flags))
return FALSE;
+ /* TODO: Parse query and fragment (if the URI has one). */
+
TRACE("(%p %x): FINISHED PARSING URI.\n", data, flags);
return TRUE;
}
More information about the wine-cvs
mailing list