[1/3] xmllite: Track internal parser state to select expected node type

Nikolay Sivov nsivov at codeweavers.com
Wed Dec 19 01:25:40 CST 2012


Track internal parser state to select expected node type
-------------- next part --------------
>From 61896071317d3e71506e7e76658a37a15363d1e8 Mon Sep 17 00:00:00 2001
From: Nikolay Sivov <nsivov at codeweavers.com>
Date: Wed, 19 Dec 2012 09:30:44 +0400
Subject: [PATCH 2/7] Track internal parser state to select expected node type

---
 dlls/xmllite/reader.c       |  162 ++++++++++++++++++++++++++++++++++---------
 dlls/xmllite/tests/reader.c |   62 +++++++++++++++++
 2 files changed, 190 insertions(+), 34 deletions(-)

diff --git a/dlls/xmllite/reader.c b/dlls/xmllite/reader.c
index 8ec2eae..8fe5ee3 100644
--- a/dlls/xmllite/reader.c
+++ b/dlls/xmllite/reader.c
@@ -44,6 +44,14 @@ typedef enum
     XmlEncoding_Unknown
 } xml_encoding;
 
+typedef enum
+{
+    XmlReadInState_Initial,
+    XmlReadInState_XmlDecl,
+    XmlReadInState_Misc_DTD,
+    XmlReadInState_DTD
+} XmlReaderInternalState;
+
 static const WCHAR utf16W[] = {'U','T','F','-','1','6',0};
 static const WCHAR utf8W[] = {'U','T','F','-','8',0};
 
@@ -109,6 +117,7 @@ typedef struct _xmlreader
     xmlreaderinput *input;
     IMalloc *imalloc;
     XmlReadState state;
+    XmlReaderInternalState instate;
     XmlNodeType nodetype;
     DtdProcessing dtdmode;
     UINT line, pos;           /* reader position in XML stream */
@@ -457,6 +466,7 @@ static void readerinput_switchencoding(xmlreaderinput *readerinput, xml_encoding
     encoded_buffer *dest = &readerinput->buffer->utf16;
     int len, dest_len;
     HRESULT hr;
+    WCHAR *ptr;
     UINT cp;
 
     hr = get_code_page(enc, &cp);
@@ -477,8 +487,9 @@ static void readerinput_switchencoding(xmlreaderinput *readerinput, xml_encoding
 
     dest_len = MultiByteToWideChar(cp, 0, src->cur, len, NULL, 0);
     readerinput_grow(readerinput, dest_len);
-    MultiByteToWideChar(cp, 0, src->cur, len, (WCHAR*)dest->data, dest_len);
-    dest->data[dest_len] = 0;
+    ptr = (WCHAR*)dest->data;
+    MultiByteToWideChar(cp, 0, src->cur, len, ptr, dest_len);
+    ptr[dest_len] = 0;
     readerinput->buffer->code_page = cp;
 }
 
@@ -515,13 +526,18 @@ static void reader_skipn(xmlreader *reader, int n)
     }
 }
 
+static inline int is_wchar_space(WCHAR ch)
+{
+    return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n';
+}
+
 /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
 static int reader_skipspaces(xmlreader *reader)
 {
     encoded_buffer *buffer = &reader->input->buffer->utf16;
     const WCHAR *ptr = reader_get_cur(reader), *start = ptr;
 
-    while (*ptr == ' ' || *ptr == '\t' || *ptr == '\r' || *ptr == '\n')
+    while (is_wchar_space(*ptr))
     {
         buffer->cur += sizeof(WCHAR);
         if (*ptr == '\r')
@@ -753,9 +769,105 @@ static HRESULT reader_parse_xmldecl(xmlreader *reader)
     if (reader_cmp(reader, declcloseW)) return WC_E_XMLDECL;
     reader_skipn(reader, 2);
 
+    reader->nodetype = XmlNodeType_XmlDeclaration;
+
     return S_OK;
 }
 
+/* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
+static HRESULT reader_parse_comment(xmlreader *reader)
+{
+    FIXME("comments not supported\n");
+    return E_NOTIMPL;
+}
+
+/* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
+static HRESULT reader_parse_pi(xmlreader *reader)
+{
+    FIXME("PI not supported\n");
+    return E_NOTIMPL;
+}
+
+/* [27] Misc ::= Comment | PI | S */
+static HRESULT reader_parse_misc(xmlreader *reader)
+{
+    HRESULT hr = S_FALSE;
+
+    while (1)
+    {
+        static const WCHAR commentW[] = {'<','!','-','-',0};
+        static const WCHAR piW[] = {'<','?',0};
+        const WCHAR *cur = reader_get_cur(reader);
+
+        if (is_wchar_space(*cur))
+            reader_skipspaces(reader);
+        else if (!reader_cmp(reader, commentW))
+            hr = reader_parse_comment(reader);
+        else if (!reader_cmp(reader, piW))
+            hr = reader_parse_pi(reader);
+        else
+            break;
+
+        if (FAILED(hr)) return hr;
+        cur = reader_get_cur(reader);
+    }
+
+    return hr;
+}
+
+static HRESULT reader_parse_nextnode(xmlreader *reader)
+{
+    HRESULT hr;
+
+    while (1)
+    {
+        switch (reader->instate)
+        {
+        /* if it's a first call for a new input we need to detect stream encoding */
+        case XmlReadInState_Initial:
+            {
+                xml_encoding enc;
+
+                hr = readerinput_growraw(reader->input);
+                if (FAILED(hr)) return hr;
+
+                /* try to detect encoding by BOM or data and set input code page */
+                hr = readerinput_detectencoding(reader->input, &enc);
+                TRACE("detected encoding %s, 0x%08x\n", debugstr_w(xml_encoding_map[enc].name), hr);
+                if (FAILED(hr)) return hr;
+
+                /* always switch first time cause we have to put something in */
+                readerinput_switchencoding(reader->input, enc);
+
+                /* parse xml declaration */
+                hr = reader_parse_xmldecl(reader);
+                if (FAILED(hr)) return hr;
+
+                reader->instate = XmlReadInState_Misc_DTD;
+                if (hr == S_OK) return hr;
+            }
+            break;
+        case XmlReadInState_Misc_DTD:
+            hr = reader_parse_misc(reader);
+            if (FAILED(hr)) return hr;
+            if (hr == S_FALSE)
+            {
+                reader->instate = XmlReadInState_DTD;
+                return S_OK;
+            }
+            break;
+        case XmlReadInState_DTD:
+            FIXME("DTD parsing not supported\n");
+            return E_NOTIMPL;
+        default:
+            FIXME("internal state %d not handled\n", reader->instate);
+            return E_NOTIMPL;
+        }
+    }
+
+    return E_NOTIMPL;
+}
+
 static HRESULT WINAPI xmlreader_QueryInterface(IXmlReader *iface, REFIID riid, void** ppvObject)
 {
     xmlreader *This = impl_from_IXmlReader(iface);
@@ -844,7 +956,10 @@ static HRESULT WINAPI xmlreader_SetInput(IXmlReader* iface, IUnknown *input)
     /* set stream for supplied IXmlReaderInput */
     hr = readerinput_query_for_stream(This->input);
     if (hr == S_OK)
+    {
         This->state = XmlReadState_Initial;
+        This->instate = XmlReadInState_Initial;
+    }
 
     return hr;
 }
@@ -893,44 +1008,22 @@ static HRESULT WINAPI xmlreader_SetProperty(IXmlReader* iface, UINT property, LO
     return S_OK;
 }
 
-static HRESULT WINAPI xmlreader_Read(IXmlReader* iface, XmlNodeType *node_type)
+static HRESULT WINAPI xmlreader_Read(IXmlReader* iface, XmlNodeType *nodetype)
 {
     xmlreader *This = impl_from_IXmlReader(iface);
+    XmlNodeType oldtype = This->nodetype;
+    HRESULT hr;
 
-    FIXME("(%p)->(%p): stub\n", This, node_type);
+    FIXME("(%p)->(%p): stub\n", This, nodetype);
 
     if (This->state == XmlReadState_Closed) return S_FALSE;
 
-    /* if it's a first call for a new input we need to detect stream encoding */
-    if (This->state == XmlReadState_Initial)
-    {
-        xml_encoding enc;
-        HRESULT hr;
-
-        hr = readerinput_growraw(This->input);
-        if (FAILED(hr)) return hr;
-
-        /* try to detect encoding by BOM or data and set input code page */
-        hr = readerinput_detectencoding(This->input, &enc);
-        TRACE("detected encoding %s, 0x%08x\n", debugstr_w(xml_encoding_map[enc].name), hr);
-        if (FAILED(hr)) return hr;
-
-        /* always switch first time cause we have to put something in */
-        readerinput_switchencoding(This->input, enc);
+    hr = reader_parse_nextnode(This);
+    if (oldtype == XmlNodeType_None && This->nodetype != oldtype)
+        This->state = XmlReadState_Interactive;
+    if (hr == S_OK) *nodetype = This->nodetype;
 
-        /* parse xml declaration */
-        hr = reader_parse_xmldecl(This);
-        if (FAILED(hr)) return hr;
-
-        if (hr == S_OK)
-        {
-            This->state = XmlReadState_Interactive;
-            This->nodetype = *node_type = XmlNodeType_XmlDeclaration;
-            return S_OK;
-        }
-    }
-
-    return E_NOTIMPL;
+    return hr;
 }
 
 static HRESULT WINAPI xmlreader_GetNodeType(IXmlReader* iface, XmlNodeType *node_type)
@@ -1223,6 +1316,7 @@ HRESULT WINAPI CreateXmlReader(REFIID riid, void **obj, IMalloc *imalloc)
     reader->ref = 1;
     reader->input = NULL;
     reader->state = XmlReadState_Closed;
+    reader->instate = XmlReadInState_Initial;
     reader->dtdmode = DtdProcessing_Prohibit;
     reader->line  = reader->pos = 0;
     reader->imalloc = imalloc;
diff --git a/dlls/xmllite/tests/reader.c b/dlls/xmllite/tests/reader.c
index 852f3d4..ed66dea 100644
--- a/dlls/xmllite/tests/reader.c
+++ b/dlls/xmllite/tests/reader.c
@@ -497,6 +497,7 @@ static void test_readerinput(void)
     IStream_Release(stream);
 
     /* test input interface selection sequence */
+    input = NULL;
     hr = testinput_createinstance((void**)&input);
     ok(hr == S_OK, "Expected S_OK, got %08x\n", hr);
 
@@ -688,10 +689,70 @@ todo_wine {
     ok(hr == S_OK, "got %08x\n", hr);
     ok(type == XmlNodeType_XmlDeclaration, "got %d\n", type);
 
+    type = XmlNodeType_XmlDeclaration;
+    hr = IXmlReader_Read(reader, &type);
+    /* newer versions return syntax error here cause document is incomplete,
+       it makes more sense than invalid char error */
+todo_wine {
+    ok(hr == WC_E_SYNTAX || broken(hr == WC_E_XMLCHARACTER), "got 0x%08x\n", hr);
+    ok(type == XmlNodeType_None, "got %d\n", type);
+}
     IStream_Release(stream);
     IXmlReader_Release(reader);
 }
 
+static const char xml_comment[] = "\xef\xbb\xbf<!-- comment -->";
+static const char xml_comment1[] = "\xef\xbb\xbf<!-- - comment-->";
+static const char xml_comment2[] = "\xef\xbb\xbf<!-- -- comment-->";
+
+static void test_read_comment(void)
+{
+    HRESULT hr;
+    IStream *stream;
+    IXmlReader *reader;
+    XmlNodeType type;
+
+    hr = pCreateXmlReader(&IID_IXmlReader, (void**)&reader, NULL);
+    ok(hr == S_OK, "S_OK, got %08x\n", hr);
+
+    stream = create_stream_on_data(xml_comment, sizeof(xml_comment));
+    hr = IXmlReader_SetInput(reader, (IUnknown*)stream);
+    ok(hr == S_OK, "got %08x\n", hr);
+
+    type = XmlNodeType_None;
+    hr = IXmlReader_Read(reader, &type);
+todo_wine {
+    ok(hr == S_OK, "got %08x\n", hr);
+    ok(type == XmlNodeType_Comment, "got %d\n", type);
+}
+    IStream_Release(stream);
+
+    stream = create_stream_on_data(xml_comment1, sizeof(xml_comment1));
+    hr = IXmlReader_SetInput(reader, (IUnknown*)stream);
+    ok(hr == S_OK, "got %08x\n", hr);
+
+    type = XmlNodeType_None;
+    hr = IXmlReader_Read(reader, &type);
+todo_wine {
+    ok(hr == S_OK, "got %08x\n", hr);
+    ok(type == XmlNodeType_Comment, "got %d\n", type);
+}
+    IStream_Release(stream);
+
+    stream = create_stream_on_data(xml_comment2, sizeof(xml_comment2));
+    hr = IXmlReader_SetInput(reader, (IUnknown*)stream);
+    ok(hr == S_OK, "got %08x\n", hr);
+
+    type = XmlNodeType_None;
+    hr = IXmlReader_Read(reader, &type);
+todo_wine
+    ok(hr == WC_E_COMMENT || broken(hr == WC_E_GREATERTHAN), "got %08x\n", hr);
+    ok(type == XmlNodeType_None, "got %d\n", type);
+    IStream_Release(stream);
+
+    IXmlReader_Release(reader);
+}
+
 START_TEST(reader)
 {
     HRESULT r;
@@ -708,6 +769,7 @@ START_TEST(reader)
     test_reader_create();
     test_readerinput();
     test_reader_state();
+    test_read_comment();
     test_read_xmldeclaration();
 
     CoUninitialize();
-- 
1.7.10.4




More information about the wine-patches mailing list