[PATCH] urlmon: Reimplement canonicalize_ipv6address on top of ntdll functions

Alex Henrie alexhenrie24 at gmail.com
Mon Nov 16 23:35:48 CST 2020


And add a test for an address that was not canonicalized correctly in
the previous implementation.

Signed-off-by: Alex Henrie <alexhenrie24 at gmail.com>
---
 dlls/urlmon/tests/uri.c |  25 +++
 dlls/urlmon/uri.c       | 439 +++-------------------------------------
 2 files changed, 48 insertions(+), 416 deletions(-)

diff --git a/dlls/urlmon/tests/uri.c b/dlls/urlmon/tests/uri.c
index b6d2182a270..b161d1f3eec 100644
--- a/dlls/urlmon/tests/uri.c
+++ b/dlls/urlmon/tests/uri.c
@@ -1725,6 +1725,31 @@ static const uri_properties uri_tests[] = {
             {URLZONE_INVALID,E_NOTIMPL,FALSE}
         }
     },
+    {   "http://[::5efe:1.2.3.4]", 0, S_OK, FALSE,
+        {
+            {"http://[::5efe:1.2.3.4]/",S_OK,FALSE},
+            {"[::5efe:1.2.3.4]",S_OK,FALSE},
+            {"http://[::5efe:1.2.3.4]/",S_OK,FALSE},
+            {"",S_FALSE,FALSE},
+            {"",S_FALSE,FALSE},
+            {"",S_FALSE,FALSE},
+            {"::5efe:1.2.3.4",S_OK,FALSE},
+            {"",S_FALSE,FALSE},
+            {"/",S_OK,FALSE},
+            {"/",S_OK,FALSE},
+            {"",S_FALSE,FALSE},
+            {"http://[::5efe:1.2.3.4]",S_OK,FALSE},
+            {"http",S_OK,FALSE},
+            {"",S_FALSE,FALSE},
+            {"",S_FALSE,FALSE},
+        },
+        {
+            {Uri_HOST_IPV6,S_OK,FALSE},
+            {80,S_OK,FALSE},
+            {URL_SCHEME_HTTP,S_OK,FALSE},
+            {URLZONE_INVALID,E_NOTIMPL,FALSE}
+        }
+    },
     /* Windows doesn't do anything to IPv6's in unknown schemes. */
     {   "zip://[0001:0:000:0004:0005:0006:001.002.003.000]", 0, S_OK, FALSE,
         {
diff --git a/dlls/urlmon/uri.c b/dlls/urlmon/uri.c
index b16ad07d6a8..953e7ff070c 100644
--- a/dlls/urlmon/uri.c
+++ b/dlls/urlmon/uri.c
@@ -27,6 +27,10 @@
 #include "shlwapi.h"
 
 #include "strsafe.h"
+#include "winternl.h"
+#include "inaddr.h"
+#include "in6addr.h"
+#include "ip2string.h"
 
 #define URI_DISPLAY_NO_ABSOLUTE_URI         0x1
 #define URI_DISPLAY_NO_DEFAULT_PORT_AUTH    0x2
@@ -128,27 +132,6 @@ typedef struct {
     DWORD   username_len;
 } UriBuilder;
 
-typedef struct {
-    const WCHAR *str;
-    DWORD       len;
-} h16;
-
-typedef struct {
-    /* IPv6 addresses can hold up to 8 h16 components. */
-    h16         components[8];
-    DWORD       h16_count;
-
-    /* An IPv6 can have 1 elision ("::"). */
-    const WCHAR *elision;
-
-    /* An IPv6 can contain 1 IPv4 address as the last 32bits of the address. */
-    const WCHAR *ipv4;
-    DWORD       ipv4_len;
-
-    INT         components_size;
-    INT         elision_size;
-} ipv6_address;
-
 typedef struct {
     BSTR            uri;
 
@@ -173,7 +156,7 @@ typedef struct {
     DWORD           host_len;
     Uri_HOST_TYPE   host_type;
 
-    ipv6_address    ipv6_address;
+    IN6_ADDR        ipv6_address;
 
     BOOL            has_port;
     const WCHAR     *port;
@@ -434,31 +417,6 @@ static inline BOOL is_hierarchical_uri(const WCHAR **ptr, const parse_data *data
     return FALSE;
 }
 
-/* Computes the size of the given IPv6 address.
- * Each h16 component is 16 bits. If there is an IPv4 address, it's
- * 32 bits. If there's an elision it can be 16 to 128 bits, depending
- * on the number of other components.
- *
- * Modeled after google-url's CheckIPv6ComponentsSize function
- */
-static void compute_ipv6_comps_size(ipv6_address *address) {
-    address->components_size = address->h16_count * 2;
-
-    if(address->ipv4)
-        /* IPv4 address is 4 bytes. */
-        address->components_size += 4;
-
-    if(address->elision) {
-        /* An elision can be anywhere from 2 bytes up to 16 bytes.
-         * Its size depends on the size of the h16 and IPv4 components.
-         */
-        address->elision_size = 16 - address->components_size;
-        if(address->elision_size < 2)
-            address->elision_size = 2;
-    } else
-        address->elision_size = 0;
-}
-
 /* Taken from dlls/jscript/lex.c */
 static int hex_to_int(WCHAR val) {
     if(val >= '0' && val <= '9')
@@ -693,72 +651,6 @@ static INT find_file_extension(const WCHAR *path, DWORD path_len) {
     return -1;
 }
 
-/* Computes the location where the elision should occur in the IPv6
- * address using the numerical values of each component stored in
- * 'values'. If the address shouldn't contain an elision then 'index'
- * is assigned -1 as its value. Otherwise 'index' will contain the
- * starting index (into values) where the elision should be, and 'count'
- * will contain the number of cells the elision covers.
- *
- * NOTES:
- *  Windows will expand an elision if the elision only represents one h16
- *  component of the address.
- *
- *  Ex: [1::2:3:4:5:6:7] -> [1:0:2:3:4:5:6:7]
- *
- *  If the IPv6 address contains an IPv4 address, the IPv4 address is also
- *  considered for being included as part of an elision if all its components
- *  are zeros.
- *
- *  Ex: [1:2:3:4:5:6:0.0.0.0] -> [1:2:3:4:5:6::]
- */
-static void compute_elision_location(const ipv6_address *address, const USHORT values[8],
-                                     INT *index, DWORD *count) {
-    DWORD i, max_len, cur_len;
-    INT max_index, cur_index;
-
-    max_len = cur_len = 0;
-    max_index = cur_index = -1;
-    for(i = 0; i < 8; ++i) {
-        BOOL check_ipv4 = (address->ipv4 && i == 6);
-        BOOL is_end = (check_ipv4 || i == 7);
-
-        if(check_ipv4) {
-            /* Check if the IPv4 address contains only zeros. */
-            if(values[i] == 0 && values[i+1] == 0) {
-                if(cur_index == -1)
-                    cur_index = i;
-
-                cur_len += 2;
-                ++i;
-            }
-        } else if(values[i] == 0) {
-            if(cur_index == -1)
-                cur_index = i;
-
-            ++cur_len;
-        }
-
-        if(is_end || values[i] != 0) {
-            /* We only consider it for an elision if it's
-             * more than 1 component long.
-             */
-            if(cur_len > 1 && cur_len > max_len) {
-                /* Found the new elision location. */
-                max_len = cur_len;
-                max_index = cur_index;
-            }
-
-            /* Reset the current range for the next range of zeros. */
-            cur_index = -1;
-            cur_len = 0;
-        }
-    }
-
-    *index = max_index;
-    *count = max_len;
-}
-
 /* Removes all the leading and trailing white spaces or
  * control characters from the URI and removes all control
  * characters inside of the URI string.
@@ -798,30 +690,6 @@ static BSTR pre_process_uri(LPCWSTR uri) {
     return ret;
 }
 
-/* Converts the specified IPv4 address into an uint value.
- *
- * This function assumes that the IPv4 address has already been validated.
- */
-static UINT ipv4toui(const WCHAR *ip, DWORD len) {
-    UINT ret = 0;
-    DWORD comp_value = 0;
-    const WCHAR *ptr;
-
-    for(ptr = ip; ptr < ip+len; ++ptr) {
-        if(*ptr == '.') {
-            ret <<= 8;
-            ret += comp_value;
-            comp_value = 0;
-        } else
-            comp_value = comp_value*10 + (*ptr-'0');
-    }
-
-    ret <<= 8;
-    ret += comp_value;
-
-    return ret;
-}
-
 /* Converts an IPv4 address in numerical form into its fully qualified
  * string form. This function returns the number of characters written
  * to 'dest'. If 'dest' is NULL this function will return the number of
@@ -863,70 +731,6 @@ static DWORD ui2str(WCHAR *dest, UINT value) {
     return ret;
 }
 
-/* Converts a h16 component (from an IPv6 address) into its
- * numerical value.
- *
- * This function assumes that the h16 component has already been validated.
- */
-static USHORT h16tous(h16 component) {
-    DWORD i;
-    USHORT ret = 0;
-
-    for(i = 0; i < component.len; ++i) {
-        ret <<= 4;
-        ret += hex_to_int(component.str[i]);
-    }
-
-    return ret;
-}
-
-/* Converts an IPv6 address into its 128 bits (16 bytes) numerical value.
- *
- * This function assumes that the ipv6_address has already been validated.
- */
-static BOOL ipv6_to_number(const ipv6_address *address, USHORT number[8]) {
-    DWORD i, cur_component = 0;
-    BOOL already_passed_elision = FALSE;
-
-    for(i = 0; i < address->h16_count; ++i) {
-        if(address->elision) {
-            if(address->components[i].str > address->elision && !already_passed_elision) {
-                /* Means we just passed the elision and need to add its values to
-                 * 'number' before we do anything else.
-                 */
-                INT j;
-                for(j = 0; j < address->elision_size; j+=2)
-                    number[cur_component++] = 0;
-
-                already_passed_elision = TRUE;
-            }
-        }
-
-        number[cur_component++] = h16tous(address->components[i]);
-    }
-
-    /* Case when the elision appears after the h16 components. */
-    if(!already_passed_elision && address->elision) {
-        INT j;
-        for(j = 0; j < address->elision_size; j+=2)
-            number[cur_component++] = 0;
-    }
-
-    if(address->ipv4) {
-        UINT value = ipv4toui(address->ipv4, address->ipv4_len);
-
-        if(cur_component != 6) {
-            ERR("(%p %p): Failed sanity check with %d\n", address, number, cur_component);
-            return FALSE;
-        }
-
-        number[cur_component++] = (value >> 16) & 0xffff;
-        number[cur_component] = value & 0xffff;
-    }
-
-    return TRUE;
-}
-
 /* Checks if the characters pointed to by 'ptr' are
  * a percent encoded data octet.
  *
@@ -1566,141 +1370,17 @@ static BOOL parse_reg_name(const WCHAR **ptr, parse_data *data, DWORD extras) {
  *
  * h16         = 1*4HEXDIG
  *             ; 16 bits of address represented in hexadecimal.
- *
- * Modeled after google-url's 'DoParseIPv6' function.
  */
 static BOOL parse_ipv6address(const WCHAR **ptr, parse_data *data) {
-    const WCHAR *start, *cur_start;
-    ipv6_address ip;
-
-    start = cur_start = *ptr;
-    memset(&ip, 0, sizeof(ipv6_address));
-
-    for(;; ++(*ptr)) {
-        /* Check if we're on the last character of the host. */
-        BOOL is_end = (is_auth_delim(**ptr, data->scheme_type != URL_SCHEME_UNKNOWN)
-                        || **ptr == ']');
-
-        BOOL is_split = (**ptr == ':');
-        BOOL is_elision = (is_split && !is_end && *(*ptr+1) == ':');
-
-        /* Check if we're at the end of a component, or
-         * if we're at the end of the IPv6 address.
-         */
-        if(is_split || is_end) {
-            DWORD cur_len = 0;
-
-            cur_len = *ptr - cur_start;
-
-            /* h16 can't have a length > 4. */
-            if(cur_len > 4) {
-                *ptr = start;
-
-                TRACE("(%p %p): h16 component to long.\n", ptr, data);
-                return FALSE;
-            }
-
-            if(cur_len == 0) {
-                /* An h16 component can't have the length of 0 unless
-                 * the elision is at the beginning of the address, or
-                 * at the end of the address.
-                 */
-                if(!((*ptr == start && is_elision) ||
-                    (is_end && (*ptr-2) == ip.elision))) {
-                    *ptr = start;
-                    TRACE("(%p %p): IPv6 component cannot have a length of 0.\n", ptr, data);
-                    return FALSE;
-                }
-            }
-
-            if(cur_len > 0) {
-                /* An IPv6 address can have no more than 8 h16 components. */
-                if(ip.h16_count >= 8) {
-                    *ptr = start;
-                    TRACE("(%p %p): Not a IPv6 address, too many h16 components.\n", ptr, data);
-                    return FALSE;
-                }
-
-                ip.components[ip.h16_count].str = cur_start;
-                ip.components[ip.h16_count].len = cur_len;
-
-                TRACE("(%p %p): Found h16 component %s, len=%d, h16_count=%d\n",
-                    ptr, data, debugstr_wn(cur_start, cur_len), cur_len,
-                    ip.h16_count);
-                ++ip.h16_count;
-            }
-        }
-
-        if(is_end)
-            break;
+    const WCHAR *terminator;
 
-        if(is_elision) {
-            /* A IPv6 address can only have 1 elision ('::'). */
-            if(ip.elision) {
-                *ptr = start;
-
-                TRACE("(%p %p): IPv6 address cannot have 2 elisions.\n", ptr, data);
-                return FALSE;
-            }
-
-            ip.elision = *ptr;
-            ++(*ptr);
-        }
-
-        if(is_split)
-            cur_start = *ptr+1;
-        else {
-            if(!check_ipv4address(ptr, TRUE)) {
-                if(!is_hexdigit(**ptr)) {
-                    /* Not a valid character for an IPv6 address. */
-                    *ptr = start;
-                    return FALSE;
-                }
-            } else {
-                /* Found an IPv4 address. */
-                ip.ipv4 = cur_start;
-                ip.ipv4_len = *ptr - cur_start;
-
-                TRACE("(%p %p): Found an attached IPv4 address %s len=%d.\n",
-                    ptr, data, debugstr_wn(ip.ipv4, ip.ipv4_len), ip.ipv4_len);
-
-                /* IPv4 addresses can only appear at the end of a IPv6. */
-                break;
-            }
-        }
-    }
-
-    compute_ipv6_comps_size(&ip);
-
-    /* Make sure the IPv6 address adds up to 16 bytes. */
-    if(ip.components_size + ip.elision_size != 16) {
-        *ptr = start;
-        TRACE("(%p %p): Invalid IPv6 address, did not add up to 16 bytes.\n", ptr, data);
+    if(RtlIpv6StringToAddressW(*ptr, &terminator, &data->ipv6_address) != 0)
+        return FALSE;
+    if(*terminator != ']' && !is_auth_delim(*terminator, data->scheme_type != URL_SCHEME_UNKNOWN))
         return FALSE;
-    }
-
-    if(ip.elision_size == 2) {
-        /* For some reason on Windows if an elision that represents
-         * only one h16 component is encountered at the very begin or
-         * end of an IPv6 address, Windows does not consider it a
-         * valid IPv6 address.
-         *
-         *  Ex: [::2:3:4:5:6:7] is not valid, even though the sum
-         *      of all the components == 128bits.
-         */
-         if(ip.elision < ip.components[0].str ||
-            ip.elision > ip.components[ip.h16_count-1].str) {
-            *ptr = start;
-            TRACE("(%p %p): Invalid IPv6 address. Detected elision of 2 bytes at the beginning or end of the address.\n",
-                ptr, data);
-            return FALSE;
-        }
-    }
 
+    *ptr = terminator;
     data->host_type = Uri_HOST_IPV6;
-    data->ipv6_address = ip;
-
-    TRACE("(%p %p): Found valid IPv6 literal %s len=%d\n", ptr, data, debugstr_wn(start, *ptr-start), (int)(*ptr-start));
     return TRUE;
 }
 
@@ -2555,96 +2235,23 @@ static BOOL canonicalize_ipv6address(const parse_data *data, Uri *uri,
             memcpy(uri->canon_uri+uri->canon_len, data->host, data->host_len*sizeof(WCHAR));
         uri->canon_len += data->host_len;
     } else {
-        USHORT values[8];
-        INT elision_start;
-        DWORD i, elision_len;
+        WCHAR buffer[46];
+        ULONG size = ARRAY_SIZE(buffer);
 
-        if(!ipv6_to_number(&(data->ipv6_address), values)) {
-            TRACE("(%p %p %x %d): Failed to compute numerical value for IPv6 address.\n",
-                data, uri, flags, computeOnly);
-            return FALSE;
+        if(computeOnly)
+        {
+            RtlIpv6AddressToStringExW(&data->ipv6_address, 0, 0, buffer, &size);
+            uri->canon_len += size + 1;
         }
-
-        if(!computeOnly)
+        else
+        {
             uri->canon_uri[uri->canon_len] = '[';
-        ++uri->canon_len;
-
-        /* Find where the elision should occur (if any). */
-        compute_elision_location(&(data->ipv6_address), values, &elision_start, &elision_len);
-
-        TRACE("%p %p %x %d): Elision starts at %d, len=%u\n", data, uri, flags,
-            computeOnly, elision_start, elision_len);
-
-        for(i = 0; i < 8; ++i) {
-            BOOL in_elision = (elision_start > -1 && i >= elision_start &&
-                               i < elision_start+elision_len);
-            BOOL do_ipv4 = (i == 6 && data->ipv6_address.ipv4 && !in_elision &&
-                            data->ipv6_address.h16_count == 0);
-
-            if(i == elision_start) {
-                if(!computeOnly) {
-                    uri->canon_uri[uri->canon_len] = ':';
-                    uri->canon_uri[uri->canon_len+1] = ':';
-                }
-                uri->canon_len += 2;
-            }
-
-            /* We can ignore the current component if we're in the elision. */
-            if(in_elision)
-                continue;
-
-            /* We only add a ':' if we're not at i == 0, or when we're at
-             * the very end of elision range since the ':' colon was handled
-             * earlier. Otherwise we would end up with ":::" after elision.
-             */
-            if(i != 0 && !(elision_start > -1 && i == elision_start+elision_len)) {
-                if(!computeOnly)
-                    uri->canon_uri[uri->canon_len] = ':';
-                ++uri->canon_len;
-            }
-
-            if(do_ipv4) {
-                UINT val;
-                DWORD len;
-
-                /* Combine the two parts of the IPv4 address values. */
-                val = values[i];
-                val <<= 16;
-                val += values[i+1];
-
-                if(!computeOnly)
-                    len = ui2ipv4(uri->canon_uri+uri->canon_len, val);
-                else
-                    len = ui2ipv4(NULL, val);
-
-                uri->canon_len += len;
-                ++i;
-            } else {
-                /* Write a regular h16 component to the URI. */
-
-                /* Short circuit for the trivial case. */
-                if(values[i] == 0) {
-                    if(!computeOnly)
-                        uri->canon_uri[uri->canon_len] = '0';
-                    ++uri->canon_len;
-                } else {
-                    static const WCHAR formatW[] = {'%','x',0};
-
-                    if(!computeOnly)
-                        uri->canon_len += swprintf(uri->canon_uri+uri->canon_len, 5,
-                                            formatW, values[i]);
-                    else {
-                        WCHAR tmp[5];
-                        uri->canon_len += swprintf(tmp, ARRAY_SIZE(tmp), formatW, values[i]);
-                    }
-                }
-            }
-        }
-
-        /* Add the closing ']'. */
-        if(!computeOnly)
+            ++uri->canon_len;
+            RtlIpv6AddressToStringExW(&data->ipv6_address, 0, 0, uri->canon_uri + uri->canon_len, &size);
+            uri->canon_len += size - 1;
             uri->canon_uri[uri->canon_len] = ']';
-        ++uri->canon_len;
+            ++uri->canon_len;
+        }
     }
 
     uri->host_len = uri->canon_len - uri->host_start;
-- 
2.29.2




More information about the wine-devel mailing list