Thomas Mullaly : urlmon: Implemented parser for the userinfo of a URI.

Alexandre Julliard julliard at winehq.org
Thu Jul 22 12:09:31 CDT 2010


Module: wine
Branch: master
Commit: 9590fef417b7f477dfa415b6bd6f2ef375c38169
URL:    http://source.winehq.org/git/wine.git/?a=commit;h=9590fef417b7f477dfa415b6bd6f2ef375c38169

Author: Thomas Mullaly <thomas.mullaly at gmail.com>
Date:   Wed Jun 16 19:48:23 2010 -0400

urlmon: Implemented parser for the userinfo of a URI.

---

 dlls/urlmon/tests/uri.c |   31 ++++++++++
 dlls/urlmon/uri.c       |  148 ++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 178 insertions(+), 1 deletions(-)

diff --git a/dlls/urlmon/tests/uri.c b/dlls/urlmon/tests/uri.c
index d674925..c309d19 100644
--- a/dlls/urlmon/tests/uri.c
+++ b/dlls/urlmon/tests/uri.c
@@ -853,6 +853,37 @@ static const uri_properties uri_tests[] = {
             {URL_SCHEME_UNKNOWN,S_OK,FALSE},
             {URLZONE_INVALID,E_NOTIMPL,FALSE}
         }
+    },
+    /* Windows uses the first occurence of ':' to delimit the userinfo. */
+    {   "ftp://user:pass:[email protected]/", 0, S_OK, FALSE,
+        Uri_HAS_ABSOLUTE_URI|Uri_HAS_AUTHORITY|Uri_HAS_DISPLAY_URI|Uri_HAS_DOMAIN|
+        Uri_HAS_HOST|Uri_HAS_PASSWORD|Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|Uri_HAS_RAW_URI|
+        Uri_HAS_SCHEME_NAME|Uri_HAS_USER_INFO|Uri_HAS_USER_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_PORT|
+        Uri_HAS_SCHEME,
+        TRUE,
+        {
+            {"ftp://user:pass:[email protected]/",S_OK,TRUE},
+            {"user:pass:word at winehq.org",S_OK,TRUE},
+            {"ftp://winehq.org/",S_OK,TRUE},
+            {"winehq.org",S_OK,TRUE},
+            {"",S_FALSE,TRUE},
+            {"",S_FALSE,TRUE},
+            {"winehq.org",S_OK,TRUE},
+            {"pass:word",S_OK,TRUE},
+            {"/",S_OK,TRUE},
+            {"/",S_OK,TRUE},
+            {"",S_FALSE,TRUE},
+            {"ftp://user:pass:[email protected]/",S_OK,FALSE},
+            {"ftp",S_OK,FALSE},
+            {"user:pass:word",S_OK,TRUE},
+            {"user",S_OK,TRUE}
+        },
+        {
+            {Uri_HOST_DNS,S_OK,TRUE},
+            {21,S_OK,TRUE},
+            {URL_SCHEME_FTP,S_OK,FALSE},
+            {URLZONE_INVALID,E_NOTIMPL,FALSE}
+        }
     }
 };
 
diff --git a/dlls/urlmon/uri.c b/dlls/urlmon/uri.c
index d8ff2cb..0942d8a 100644
--- a/dlls/urlmon/uri.c
+++ b/dlls/urlmon/uri.c
@@ -55,6 +55,10 @@ typedef struct {
     const WCHAR     *scheme;
     DWORD           scheme_len;
     URL_SCHEME      scheme_type;
+
+    const WCHAR     *userinfo;
+    DWORD           userinfo_len;
+    INT             userinfo_split;
 } parse_data;
 
 /* List of scheme types/scheme names that are recognized by the IUri interface as of IE 7. */
@@ -125,6 +129,71 @@ static BOOL check_hierarchical(const WCHAR **ptr) {
     return TRUE;
 }
 
+/* unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~" */
+static inline BOOL is_unreserved(WCHAR val) {
+    return (is_alpha(val) || is_num(val) || val == '-' || val == '.' ||
+            val == '_' || val == '~');
+}
+
+/* sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
+ *               / "*" / "+" / "," / ";" / "="
+ */
+static inline BOOL is_subdelim(WCHAR val) {
+    return (val == '!' || val == '$' || val == '&' ||
+            val == '\'' || val == '(' || val == ')' ||
+            val == '*' || val == '+' || val == ',' ||
+            val == ';' || val == '=');
+}
+
+/* gen-delims  = ":" / "/" / "?" / "#" / "[" / "]" / "@" */
+static inline BOOL is_gendelim(WCHAR val) {
+    return (val == ':' || val == '/' || val == '?' ||
+            val == '#' || val == '[' || val == ']' ||
+            val == '@');
+}
+
+/* Characters that delimit the end of the authority
+ * section of a URI. Sometimes a '\\' is considered
+ * an authority delimeter.
+ */
+static inline BOOL is_auth_delim(WCHAR val, BOOL acceptSlash) {
+    return (val == '#' || val == '/' || val == '?' ||
+            val == '\0' || (acceptSlash && val == '\\'));
+}
+
+static inline BOOL is_hexdigit(WCHAR val) {
+    return ((val >= 'a' && val <= 'f') ||
+            (val >= 'A' && val <= 'F') ||
+            (val >= '0' && val <= '9'));
+}
+
+/* Checks if the characters pointed to by 'ptr' are
+ * a percent encoded data octet.
+ *
+ * pct-encoded = "%" HEXDIG HEXDIG
+ */
+static BOOL check_pct_encoded(const WCHAR **ptr) {
+    const WCHAR *start = *ptr;
+
+    if(**ptr != '%')
+        return FALSE;
+
+    ++(*ptr);
+    if(!is_hexdigit(**ptr)) {
+        *ptr = start;
+        return FALSE;
+    }
+
+    ++(*ptr);
+    if(!is_hexdigit(**ptr)) {
+        *ptr = start;
+        return FALSE;
+    }
+
+    ++(*ptr);
+    return TRUE;
+}
+
 /* Tries to parse the scheme name of the URI.
  *
  * scheme = ALPHA *(ALPHA | NUM | '+' | '-' | '.') as defined by RFC 3896.
@@ -259,6 +328,80 @@ static BOOL parse_scheme(const WCHAR **ptr, parse_data *data, DWORD flags) {
     return TRUE;
 }
 
+/* Parses the userinfo part of the URI (if it exists). The userinfo field of
+ * a URI can consist of "username:password@", or just "username@".
+ *
+ * RFC def:
+ * userinfo    = *( unreserved / pct-encoded / sub-delims / ":" )
+ *
+ * NOTES:
+ *  1)  If there is more than one ':' in the userinfo part of the URI Windows
+ *      uses the first occurence of ':' to delimit the username and password
+ *      components.
+ *
+ *      ex:
+ *          ftp://user:pass:[email protected]
+ *
+ *      Would yield, "user" as the username and "pass:word" as the password.
+ *
+ *  2)  Windows allows any character to appear in the "userinfo" part of
+ *      a URI, as long as it's not an authority delimeter character set.
+ */
+static void parse_userinfo(const WCHAR **ptr, parse_data *data, DWORD flags) {
+    data->userinfo = *ptr;
+    data->userinfo_split = -1;
+
+    while(**ptr != '@') {
+        if(**ptr == ':' && data->userinfo_split == -1)
+            data->userinfo_split = *ptr - data->userinfo;
+        else if(**ptr == '%') {
+            /* If it's a known scheme type, it has to be a valid percent
+             * encoded value.
+             */
+            if(!check_pct_encoded(ptr)) {
+                if(data->scheme_type != URL_SCHEME_UNKNOWN) {
+                    *ptr = data->userinfo;
+                    data->userinfo = NULL;
+                    data->userinfo_split = -1;
+
+                    TRACE("(%p %p %x): URI contained no userinfo.\n", ptr, data, flags);
+                    return;
+                }
+            } else
+                continue;
+        } else if(is_auth_delim(**ptr, data->scheme_type != URL_SCHEME_UNKNOWN))
+            break;
+
+        ++(*ptr);
+    }
+
+    if(**ptr != '@') {
+        *ptr = data->userinfo;
+        data->userinfo = NULL;
+        data->userinfo_split = -1;
+
+        TRACE("(%p %p %x): URI contained no userinfo.\n", ptr, data, flags);
+        return;
+    }
+
+    data->userinfo_len = *ptr - data->userinfo;
+    TRACE("(%p %p %x): Found userinfo=%s userinfo_len=%d split=%d.\n", ptr, data, flags,
+            debugstr_wn(data->userinfo, data->userinfo_len), data->userinfo_len, data->userinfo_split);
+    ++(*ptr);
+}
+
+/* Parses the authority information from the URI.
+ *
+ * authority   = [ userinfo "@" ] host [ ":" port ]
+ */
+static BOOL parse_authority(const WCHAR **ptr, parse_data *data, DWORD flags) {
+    parse_userinfo(ptr, data, flags);
+
+    /* TODO: Parse host and port information. */
+
+    return TRUE;
+}
+
 /* Determines how the URI should be parsed after the scheme information.
  *
  * If the scheme is followed, by "//" then, it is treated as an hierarchical URI
@@ -266,7 +409,7 @@ static BOOL parse_scheme(const WCHAR **ptr, parse_data *data, DWORD flags) {
  * URI will be treated as an opaque URI which the authority information is not parsed
  * out.
  *
- * RFC 3896 defenition of hier-part:
+ * RFC 3896 definition of hier-part:
  *
  * hier-part   = "//" authority path-abempty
  *                 / path-absolute
@@ -303,6 +446,9 @@ static BOOL parse_hierpart(const WCHAR **ptr, parse_data *data, DWORD flags) {
             data->is_opaque = FALSE;
 
             /* TODO: Handle hierarchical URI's, parse authority then parse the path. */
+            if(!parse_authority(ptr, data, flags))
+                return FALSE;
+
             return TRUE;
         }
     }




More information about the wine-cvs mailing list