Thomas Mullaly : urlmon: Implemented parser for the userinfo of a URI.
Alexandre Julliard
julliard at winehq.org
Thu Jul 22 12:09:31 CDT 2010
Module: wine
Branch: master
Commit: 9590fef417b7f477dfa415b6bd6f2ef375c38169
URL: http://source.winehq.org/git/wine.git/?a=commit;h=9590fef417b7f477dfa415b6bd6f2ef375c38169
Author: Thomas Mullaly <thomas.mullaly at gmail.com>
Date: Wed Jun 16 19:48:23 2010 -0400
urlmon: Implemented parser for the userinfo of a URI.
---
dlls/urlmon/tests/uri.c | 31 ++++++++++
dlls/urlmon/uri.c | 148 ++++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 178 insertions(+), 1 deletions(-)
diff --git a/dlls/urlmon/tests/uri.c b/dlls/urlmon/tests/uri.c
index d674925..c309d19 100644
--- a/dlls/urlmon/tests/uri.c
+++ b/dlls/urlmon/tests/uri.c
@@ -853,6 +853,37 @@ static const uri_properties uri_tests[] = {
{URL_SCHEME_UNKNOWN,S_OK,FALSE},
{URLZONE_INVALID,E_NOTIMPL,FALSE}
}
+ },
+ /* Windows uses the first occurence of ':' to delimit the userinfo. */
+ { "ftp://user:pass:[email protected]/", 0, S_OK, FALSE,
+ Uri_HAS_ABSOLUTE_URI|Uri_HAS_AUTHORITY|Uri_HAS_DISPLAY_URI|Uri_HAS_DOMAIN|
+ Uri_HAS_HOST|Uri_HAS_PASSWORD|Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|Uri_HAS_RAW_URI|
+ Uri_HAS_SCHEME_NAME|Uri_HAS_USER_INFO|Uri_HAS_USER_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_PORT|
+ Uri_HAS_SCHEME,
+ TRUE,
+ {
+ {"ftp://user:pass:[email protected]/",S_OK,TRUE},
+ {"user:pass:word at winehq.org",S_OK,TRUE},
+ {"ftp://winehq.org/",S_OK,TRUE},
+ {"winehq.org",S_OK,TRUE},
+ {"",S_FALSE,TRUE},
+ {"",S_FALSE,TRUE},
+ {"winehq.org",S_OK,TRUE},
+ {"pass:word",S_OK,TRUE},
+ {"/",S_OK,TRUE},
+ {"/",S_OK,TRUE},
+ {"",S_FALSE,TRUE},
+ {"ftp://user:pass:[email protected]/",S_OK,FALSE},
+ {"ftp",S_OK,FALSE},
+ {"user:pass:word",S_OK,TRUE},
+ {"user",S_OK,TRUE}
+ },
+ {
+ {Uri_HOST_DNS,S_OK,TRUE},
+ {21,S_OK,TRUE},
+ {URL_SCHEME_FTP,S_OK,FALSE},
+ {URLZONE_INVALID,E_NOTIMPL,FALSE}
+ }
}
};
diff --git a/dlls/urlmon/uri.c b/dlls/urlmon/uri.c
index d8ff2cb..0942d8a 100644
--- a/dlls/urlmon/uri.c
+++ b/dlls/urlmon/uri.c
@@ -55,6 +55,10 @@ typedef struct {
const WCHAR *scheme;
DWORD scheme_len;
URL_SCHEME scheme_type;
+
+ const WCHAR *userinfo;
+ DWORD userinfo_len;
+ INT userinfo_split;
} parse_data;
/* List of scheme types/scheme names that are recognized by the IUri interface as of IE 7. */
@@ -125,6 +129,71 @@ static BOOL check_hierarchical(const WCHAR **ptr) {
return TRUE;
}
+/* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" */
+static inline BOOL is_unreserved(WCHAR val) {
+ return (is_alpha(val) || is_num(val) || val == '-' || val == '.' ||
+ val == '_' || val == '~');
+}
+
+/* sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
+ * / "*" / "+" / "," / ";" / "="
+ */
+static inline BOOL is_subdelim(WCHAR val) {
+ return (val == '!' || val == '$' || val == '&' ||
+ val == '\'' || val == '(' || val == ')' ||
+ val == '*' || val == '+' || val == ',' ||
+ val == ';' || val == '=');
+}
+
+/* gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" */
+static inline BOOL is_gendelim(WCHAR val) {
+ return (val == ':' || val == '/' || val == '?' ||
+ val == '#' || val == '[' || val == ']' ||
+ val == '@');
+}
+
+/* Characters that delimit the end of the authority
+ * section of a URI. Sometimes a '\\' is considered
+ * an authority delimeter.
+ */
+static inline BOOL is_auth_delim(WCHAR val, BOOL acceptSlash) {
+ return (val == '#' || val == '/' || val == '?' ||
+ val == '\0' || (acceptSlash && val == '\\'));
+}
+
+static inline BOOL is_hexdigit(WCHAR val) {
+ return ((val >= 'a' && val <= 'f') ||
+ (val >= 'A' && val <= 'F') ||
+ (val >= '0' && val <= '9'));
+}
+
+/* Checks if the characters pointed to by 'ptr' are
+ * a percent encoded data octet.
+ *
+ * pct-encoded = "%" HEXDIG HEXDIG
+ */
+static BOOL check_pct_encoded(const WCHAR **ptr) {
+ const WCHAR *start = *ptr;
+
+ if(**ptr != '%')
+ return FALSE;
+
+ ++(*ptr);
+ if(!is_hexdigit(**ptr)) {
+ *ptr = start;
+ return FALSE;
+ }
+
+ ++(*ptr);
+ if(!is_hexdigit(**ptr)) {
+ *ptr = start;
+ return FALSE;
+ }
+
+ ++(*ptr);
+ return TRUE;
+}
+
/* Tries to parse the scheme name of the URI.
*
* scheme = ALPHA *(ALPHA | NUM | '+' | '-' | '.') as defined by RFC 3896.
@@ -259,6 +328,80 @@ static BOOL parse_scheme(const WCHAR **ptr, parse_data *data, DWORD flags) {
return TRUE;
}
+/* Parses the userinfo part of the URI (if it exists). The userinfo field of
+ * a URI can consist of "username:password@", or just "username@".
+ *
+ * RFC def:
+ * userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
+ *
+ * NOTES:
+ * 1) If there is more than one ':' in the userinfo part of the URI Windows
+ * uses the first occurence of ':' to delimit the username and password
+ * components.
+ *
+ * ex:
+ * ftp://user:pass:[email protected]
+ *
+ * Would yield, "user" as the username and "pass:word" as the password.
+ *
+ * 2) Windows allows any character to appear in the "userinfo" part of
+ * a URI, as long as it's not an authority delimeter character set.
+ */
+static void parse_userinfo(const WCHAR **ptr, parse_data *data, DWORD flags) {
+ data->userinfo = *ptr;
+ data->userinfo_split = -1;
+
+ while(**ptr != '@') {
+ if(**ptr == ':' && data->userinfo_split == -1)
+ data->userinfo_split = *ptr - data->userinfo;
+ else if(**ptr == '%') {
+ /* If it's a known scheme type, it has to be a valid percent
+ * encoded value.
+ */
+ if(!check_pct_encoded(ptr)) {
+ if(data->scheme_type != URL_SCHEME_UNKNOWN) {
+ *ptr = data->userinfo;
+ data->userinfo = NULL;
+ data->userinfo_split = -1;
+
+ TRACE("(%p %p %x): URI contained no userinfo.\n", ptr, data, flags);
+ return;
+ }
+ } else
+ continue;
+ } else if(is_auth_delim(**ptr, data->scheme_type != URL_SCHEME_UNKNOWN))
+ break;
+
+ ++(*ptr);
+ }
+
+ if(**ptr != '@') {
+ *ptr = data->userinfo;
+ data->userinfo = NULL;
+ data->userinfo_split = -1;
+
+ TRACE("(%p %p %x): URI contained no userinfo.\n", ptr, data, flags);
+ return;
+ }
+
+ data->userinfo_len = *ptr - data->userinfo;
+ TRACE("(%p %p %x): Found userinfo=%s userinfo_len=%d split=%d.\n", ptr, data, flags,
+ debugstr_wn(data->userinfo, data->userinfo_len), data->userinfo_len, data->userinfo_split);
+ ++(*ptr);
+}
+
+/* Parses the authority information from the URI.
+ *
+ * authority = [ userinfo "@" ] host [ ":" port ]
+ */
+static BOOL parse_authority(const WCHAR **ptr, parse_data *data, DWORD flags) {
+ parse_userinfo(ptr, data, flags);
+
+ /* TODO: Parse host and port information. */
+
+ return TRUE;
+}
+
/* Determines how the URI should be parsed after the scheme information.
*
* If the scheme is followed, by "//" then, it is treated as an hierarchical URI
@@ -266,7 +409,7 @@ static BOOL parse_scheme(const WCHAR **ptr, parse_data *data, DWORD flags) {
* URI will be treated as an opaque URI which the authority information is not parsed
* out.
*
- * RFC 3896 defenition of hier-part:
+ * RFC 3896 definition of hier-part:
*
* hier-part = "//" authority path-abempty
* / path-absolute
@@ -303,6 +446,9 @@ static BOOL parse_hierpart(const WCHAR **ptr, parse_data *data, DWORD flags) {
data->is_opaque = FALSE;
/* TODO: Handle hierarchical URI's, parse authority then parse the path. */
+ if(!parse_authority(ptr, data, flags))
+ return FALSE;
+
return TRUE;
}
}
More information about the wine-cvs
mailing list