[1/3] xmllite: Implement PI parsing

Nikolay Sivov nsivov at codeweavers.com
Mon Jan 7 03:13:15 CST 2013


Implement PI parsing
-------------- next part --------------
>From a705a1e5655f035051cb5780e5985eb997dd4d83 Mon Sep 17 00:00:00 2001
From: Nikolay Sivov <nsivov at codeweavers.com>
Date: Sun, 6 Jan 2013 02:08:27 +0400
Subject: [PATCH 1/3] Implement PI parsing

---
 dlls/xmllite/reader.c       |  160 ++++++++++++++++++++++++++++++++++++++++---
 dlls/xmllite/tests/reader.c |   55 ++++++++++++++-
 2 files changed, 205 insertions(+), 10 deletions(-)

diff --git a/dlls/xmllite/reader.c b/dlls/xmllite/reader.c
index a6caf66..b8c3ba4 100644
--- a/dlls/xmllite/reader.c
+++ b/dlls/xmllite/reader.c
@@ -1,7 +1,7 @@
 /*
  * IXmlReader implementation
  *
- * Copyright 2010, 2012 Nikolay Sivov
+ * Copyright 2010, 2012-2013 Nikolay Sivov
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -80,7 +80,7 @@ typedef struct
 
 typedef struct input_buffer input_buffer;
 
-typedef struct _xmlreaderinput
+typedef struct
 {
     IXmlReaderInput IXmlReaderInput_iface;
     LONG ref;
@@ -110,7 +110,7 @@ struct attribute
     strval value;
 };
 
-typedef struct _xmlreader
+typedef struct
 {
     IXmlReader IXmlReader_iface;
     LONG ref;
@@ -402,7 +402,7 @@ static void readerinput_grow(xmlreaderinput *readerinput, int length)
 static HRESULT readerinput_detectencoding(xmlreaderinput *readerinput, xml_encoding *enc)
 {
     encoded_buffer *buffer = &readerinput->buffer->encoded;
-    static char startA[] = {'<','?','x','m'};
+    static char startA[] = {'<','?'};
     static WCHAR startW[] = {'<','?'};
     static char utf8bom[] = {0xef,0xbb,0xbf};
     static char utf16lebom[] = {0xff,0xfe};
@@ -807,11 +807,11 @@ static HRESULT reader_parse_sddecl(xmlreader *reader)
 /* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */
 static HRESULT reader_parse_xmldecl(xmlreader *reader)
 {
-    static const WCHAR xmldeclW[] = {'<','?','x','m','l',0};
+    static const WCHAR xmldeclW[] = {'<','?','x','m','l',' ',0};
     static const WCHAR declcloseW[] = {'?','>',0};
     HRESULT hr;
 
-    /* check if we have "<?xml" */
+    /* check if we have "<?xml " */
     if (reader_cmp(reader, xmldeclW)) return S_FALSE;
 
     reader_skipn(reader, 5);
@@ -879,11 +879,155 @@ static HRESULT reader_parse_comment(xmlreader *reader)
     return MX_E_INPUTEND;
 }
 
+static inline int is_namestartchar(WCHAR ch)
+{
+    return (ch == ':') || (ch >= 'A' && ch <= 'Z') ||
+           (ch == '_') || (ch >= 'a' && ch <= 'z') ||
+           (ch >= 0xc0   && ch <= 0xd6)   ||
+           (ch >= 0xd8   && ch <= 0xf6)   ||
+           (ch >= 0xf8   && ch <= 0x2ff)  ||
+           (ch >= 0x370  && ch <= 0x37d)  ||
+           (ch >= 0x37f  && ch <= 0x1fff) ||
+           (ch >= 0x200c && ch <= 0x200d) ||
+           (ch >= 0x2070 && ch <= 0x218f) ||
+           (ch >= 0x2c00 && ch <= 0x2fef) ||
+           (ch >= 0x3001 && ch <= 0xd7ff) ||
+           (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
+           (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
+           (ch >= 0xf900 && ch <= 0xfdcf) ||
+           (ch >= 0xfdf0 && ch <= 0xfffd);
+}
+
+static inline int is_namechar(WCHAR ch)
+{
+    return (ch == ':') || (ch >= 'A' && ch <= 'Z') ||
+           (ch == '_') || (ch >= 'a' && ch <= 'z') ||
+           (ch == '-') || (ch == '.') ||
+           (ch >= '0'    && ch <= '9')    ||
+           (ch == 0xb7)                   ||
+           (ch >= 0xc0   && ch <= 0xd6)   ||
+           (ch >= 0xd8   && ch <= 0xf6)   ||
+           (ch >= 0xf8   && ch <= 0x2ff)  ||
+           (ch >= 0x300  && ch <= 0x36f)  ||
+           (ch >= 0x370  && ch <= 0x37d)  ||
+           (ch >= 0x37f  && ch <= 0x1fff) ||
+           (ch >= 0x200c && ch <= 0x200d) ||
+           (ch >= 0x203f && ch <= 0x2040) ||
+           (ch >= 0x2070 && ch <= 0x218f) ||
+           (ch >= 0x2c00 && ch <= 0x2fef) ||
+           (ch >= 0x3001 && ch <= 0xd7ff) ||
+           (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
+           (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
+           (ch >= 0xf900 && ch <= 0xfdcf) ||
+           (ch >= 0xfdf0 && ch <= 0xfffd);
+}
+
+/* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
+                            [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
+                            [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
+   [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
+   [5]  Name     ::= NameStartChar (NameChar)* */
+static HRESULT reader_parse_name(xmlreader *reader, strval *name)
+{
+    const WCHAR *ptr, *start = reader_get_cur(reader);
+
+    ptr = start;
+    if (!is_namestartchar(*ptr)) return WC_E_NAMECHARACTER;
+
+    while (is_namechar(*ptr))
+    {
+        reader_skipn(reader, 1);
+        ptr = reader_get_cur(reader);
+    }
+
+    TRACE("name %s:%d\n", debugstr_wn(start, ptr-start), ptr-start);
+    name->str = start;
+    name->len = ptr-start;
+
+    return S_OK;
+}
+
+/* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) */
+static HRESULT reader_parse_pitarget(xmlreader *reader, strval *target)
+{
+    static const WCHAR xmlW[] = {'x','m','l'};
+    strval name;
+    HRESULT hr;
+    int i;
+
+    hr = reader_parse_name(reader, &name);
+    if (FAILED(hr)) return WC_E_PI;
+
+    /* now that we got name check for illegal content */
+    if (name.len == 3 && !strncmpiW(name.str, xmlW, 3))
+        return WC_E_LEADINGXML;
+
+    /* PITarget can't be a qualified name */
+    for (i = 0; i < name.len; i++)
+        if (name.str[i] == ':')
+            return i ? NC_E_NAMECOLON : WC_E_PI;
+
+    TRACE("pitarget %s:%d\n", debugstr_wn(name.str, name.len), name.len);
+    *target = name;
+    return S_OK;
+}
+
 /* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
 static HRESULT reader_parse_pi(xmlreader *reader)
 {
-    FIXME("PI not supported\n");
-    return E_NOTIMPL;
+    const WCHAR *ptr, *start;
+    strval target;
+    HRESULT hr;
+
+    /* skip '<?' */
+    reader_skipn(reader, 2);
+    reader_shrink(reader);
+
+    hr = reader_parse_pitarget(reader, &target);
+    if (FAILED(hr)) return hr;
+
+    ptr = reader_get_cur(reader);
+    /* exit earlier if there's no content */
+    if (ptr[0] == '?' && ptr[1] == '>')
+    {
+        /* skip '?>' */
+        reader_skipn(reader, 2);
+        reader->nodetype = XmlNodeType_ProcessingInstruction;
+        return S_OK;
+    }
+
+    /* now at least a single space char should be there */
+    if (!is_wchar_space(*ptr)) return WC_E_WHITESPACE;
+    reader_skipspaces(reader);
+
+    ptr = start = reader_get_cur(reader);
+
+    while (*ptr)
+    {
+        if (ptr[0] == '?')
+        {
+            if (ptr[1] == '>')
+            {
+                TRACE("%s\n", debugstr_wn(start, ptr-start));
+                /* skip '?>' */
+                reader_skipn(reader, 2);
+                reader->nodetype = XmlNodeType_ProcessingInstruction;
+                return S_OK;
+            }
+            else
+            {
+                ptr++;
+                reader_more(reader);
+            }
+        }
+        else
+        {
+            reader_skipn(reader, 1);
+            ptr = reader_get_cur(reader);
+        }
+    }
+
+    return S_OK;
 }
 
 /* [27] Misc ::= Comment | PI | S */
diff --git a/dlls/xmllite/tests/reader.c b/dlls/xmllite/tests/reader.c
index 41fc94e..7709e25 100644
--- a/dlls/xmllite/tests/reader.c
+++ b/dlls/xmllite/tests/reader.c
@@ -1,7 +1,7 @@
 /*
- * XMLLite IXmlReader tests
+ * IXmlReader tests
  *
- * Copyright 2010 (C) Nikolay Sivov
+ * Copyright 2010, 2012-2013 Nikolay Sivov
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -750,6 +750,56 @@ static void test_read_comment(void)
     IXmlReader_Release(reader);
 }
 
+struct test_entry {
+    const char *xml;
+    HRESULT hr;
+    HRESULT hr_broken; /* this is set to older version results */
+};
+
+static struct test_entry pi_tests[] = {
+    { "<?pi?>", S_OK },
+    { "<?pi ?>", S_OK },
+    { "<?pi:pi?>", NC_E_NAMECOLON, WC_E_NAMECHARACTER },
+    { "<?:pi ?>", WC_E_PI, WC_E_NAMECHARACTER },
+    { "<?-pi ?>", WC_E_PI, WC_E_NAMECHARACTER },
+    { "<?xml-stylesheet ?>", S_OK },
+    { NULL }
+};
+
+static void test_read_pi(void)
+{
+    struct test_entry *test = pi_tests;
+    IXmlReader *reader;
+    HRESULT hr;
+
+    hr = pCreateXmlReader(&IID_IXmlReader, (void**)&reader, NULL);
+    ok(hr == S_OK, "S_OK, got %08x\n", hr);
+
+    while (test->xml)
+    {
+        XmlNodeType type;
+        IStream *stream;
+
+        stream = create_stream_on_data(test->xml, strlen(test->xml)+1);
+        hr = IXmlReader_SetInput(reader, (IUnknown*)stream);
+        ok(hr == S_OK, "got %08x\n", hr);
+
+        type = XmlNodeType_None;
+        hr = IXmlReader_Read(reader, &type);
+        if (test->hr_broken)
+            ok(hr == test->hr || broken(hr == test->hr_broken), "got %08x for %s\n", hr, test->xml);
+        else
+            ok(hr == test->hr, "got %08x for %s\n", hr, test->xml);
+        if (hr == S_OK)
+            ok(type == XmlNodeType_ProcessingInstruction, "got %d for %s\n", type, test->xml);
+
+        IStream_Release(stream);
+        test++;
+    }
+
+    IXmlReader_Release(reader);
+}
+
 START_TEST(reader)
 {
     HRESULT r;
@@ -767,6 +817,7 @@ START_TEST(reader)
     test_readerinput();
     test_reader_state();
     test_read_comment();
+    test_read_pi();
     test_read_xmldeclaration();
 
     CoUninitialize();
-- 
1.7.10.4




More information about the wine-patches mailing list