Nikolay Sivov : xmllite: Improve attribute value parsing.

Alexandre Julliard julliard at winehq.org
Mon Aug 19 14:32:59 CDT 2013


Module: wine
Branch: master
Commit: a04064b8868762ebd38aecaafd5d9f1e1a14844a
URL:    http://source.winehq.org/git/wine.git/?a=commit;h=a04064b8868762ebd38aecaafd5d9f1e1a14844a

Author: Nikolay Sivov <nsivov at codeweavers.com>
Date:   Mon Aug 19 10:46:49 2013 +0400

xmllite: Improve attribute value parsing.

---

 dlls/xmllite/reader.c       |   94 ++++++++++++++++++++++++++++++++++++++++++-
 dlls/xmllite/tests/reader.c |   10 +++++
 2 files changed, 102 insertions(+), 2 deletions(-)

diff --git a/dlls/xmllite/reader.c b/dlls/xmllite/reader.c
index 4dc8185..efdc6ff 100644
--- a/dlls/xmllite/reader.c
+++ b/dlls/xmllite/reader.c
@@ -1766,12 +1766,99 @@ static HRESULT reader_parse_qname(xmlreader *reader, strval *prefix, strval *loc
     return S_OK;
 }
 
+/* Applies normalization rules to a single char, used for attribute values.
+
+   Rules include 2 steps:
+
+   1) replacing \r\n with a single \n;
+   2) replacing all whitespace chars with ' '.
+
+ */
+static void reader_normalize_space(xmlreader *reader, WCHAR *ptr)
+{
+    encoded_buffer *buffer = &reader->input->buffer->utf16;
+
+    if (!is_wchar_space(*ptr)) return;
+
+    if (*ptr == '\r' && *(ptr+1) == '\n')
+    {
+        int len = buffer->written - ((char*)ptr - buffer->data) - 2*sizeof(WCHAR);
+        memmove(ptr+1, ptr+2, len);
+    }
+    *ptr = ' ';
+}
+
 /* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
    [67] Reference ::= EntityRef | CharRef
    [68] EntityRef ::= '&' Name ';' */
 static HRESULT reader_parse_reference(xmlreader *reader)
 {
-    FIXME("References not supported\n");
+    WCHAR *start = reader_get_cur(reader), *ptr;
+    WCHAR ch = 0;
+    int len;
+
+    /* skip '&' */
+    reader_skipn(reader, 1);
+    ptr = reader_get_cur(reader);
+
+    if (*ptr == '#')
+    {
+        encoded_buffer *buffer = &reader->input->buffer->utf16;
+
+        reader_skipn(reader, 1);
+        ptr = reader_get_cur(reader);
+
+        /* hex char or decimal */
+        if (*ptr == 'x')
+        {
+            reader_skipn(reader, 1);
+            ptr = reader_get_cur(reader);
+
+            while (*ptr != ';')
+            {
+                if ((*ptr >= '0' && *ptr <= '9'))
+                    ch = ch*16 + *ptr - '0';
+                else if ((*ptr >= 'a' && *ptr <= 'f'))
+                    ch = ch*16 + *ptr - 'a' + 10;
+                else if ((*ptr >= 'A' && *ptr <= 'F'))
+                    ch = ch*16 + *ptr - 'A' + 10;
+                else
+                    return ch ? WC_E_SEMICOLON : WC_E_HEXDIGIT;
+                reader_skipn(reader, 1);
+                ptr = reader_get_cur(reader);
+            }
+        }
+        else
+        {
+            while (*ptr != ';')
+            {
+                if ((*ptr >= '0' && *ptr <= '9'))
+                {
+                    ch = ch*10 + *ptr - '0';
+                    reader_skipn(reader, 1);
+                    ptr = reader_get_cur(reader);
+                }
+                else
+                    return ch ? WC_E_SEMICOLON : WC_E_DIGIT;
+            }
+        }
+
+        if (!is_char(ch)) return WC_E_XMLCHARACTER;
+
+        /* normalize */
+        if (is_wchar_space(ch)) ch = ' ';
+
+        len = buffer->written - ((char*)ptr - buffer->data) - sizeof(WCHAR);
+        memmove(start+1, ptr+1, len);
+        buffer->cur = (char*)start;
+
+        *start = ch;
+
+        return S_OK;
+    }
+    else
+        FIXME("Entity references not supported\n");
+
     return E_NOTIMPL;
 }
 
@@ -1806,7 +1893,10 @@ static HRESULT reader_parse_attvalue(xmlreader *reader, strval *value)
             if (FAILED(hr)) return hr;
         }
         else
+        {
+            reader_normalize_space(reader, ptr);
             reader_skipn(reader, 1);
+        }
         ptr = reader_get_cur(reader);
     }
 
@@ -1848,7 +1938,7 @@ static HRESULT reader_parse_attribute(xmlreader *reader)
     hr = reader_parse_attvalue(reader, &value);
     if (FAILED(hr)) return hr;
 
-    TRACE("%s=\"%s\"\n", debugstr_wn(local.str, local.len), debugstr_wn(value.str, value.len));
+    TRACE("%s=%s\n", debugstr_wn(local.str, local.len), debugstr_wn(value.str, value.len));
     return reader_add_attr(reader, &local, &value);
 }
 
diff --git a/dlls/xmllite/tests/reader.c b/dlls/xmllite/tests/reader.c
index 6d81482..b53c044 100644
--- a/dlls/xmllite/tests/reader.c
+++ b/dlls/xmllite/tests/reader.c
@@ -1571,6 +1571,16 @@ static struct test_entry attributes_tests[] = {
     { "<a attr1=\'a\"ttrvalue\'/>", "attr1", "a\"ttrvalue", S_OK },
     { "<a attr1=\' \'/>", "attr1", " ", S_OK },
     { "<a attr1=\" \"/>", "attr1", " ", S_OK },
+    { "<a attr1=\"\r\n \r \n \t\n\r\"/>", "attr1", "         ", S_OK },
+    { "<a attr1=\" val \"/>", "attr1", " val ", S_OK },
+    { "<a attr1=\"\r\n\tval\n\"/>", "attr1", "  val ", S_OK },
+    { "<a attr1=\"val \"/>", "attr1", "val ", S_OK },
+    { "<a attr1=\"val&#x20;\"/>", "attr1", "val ", S_OK },
+    { "<a attr1=\"val&#xfffe;\"/>", NULL, NULL, WC_E_XMLCHARACTER },
+    { "<a attr1=\"val &#a;\"/>", NULL, NULL, WC_E_DIGIT, WC_E_SEMICOLON },
+    { "<a attr1=\"val &#12a;\"/>", NULL, NULL, WC_E_SEMICOLON },
+    { "<a attr1=\"val &#x12g;\"/>", NULL, NULL, WC_E_SEMICOLON },
+    { "<a attr1=\"val &#xg;\"/>", NULL, NULL, WC_E_HEXDIGIT, WC_E_SEMICOLON },
     { "<a attr1=attrvalue/>", NULL, NULL, WC_E_QUOTE },
     { "<a attr1=\"attr<value\"/>", NULL, NULL, WC_E_LESSTHAN },
     { NULL }




More information about the wine-cvs mailing list