[1/3] xmllite: Track internal parser state to select expected node type
Nikolay Sivov
nsivov at codeweavers.com
Sun Dec 9 00:29:24 CST 2012
Track internal parser state to select expected node type
-------------- next part --------------
>From 5092de66745c872c5267c2c9bd9844f88e70f0eb Mon Sep 17 00:00:00 2001
From: Nikolay Sivov <nsivov at codeweavers.com>
Date: Sat, 8 Dec 2012 02:47:41 +0400
Subject: [PATCH 1/8] Track internal parser state to select expected node type
---
dlls/xmllite/reader.c | 162 ++++++++++++++++++++++++++++++++++---------
dlls/xmllite/tests/reader.c | 62 +++++++++++++++++
2 files changed, 190 insertions(+), 34 deletions(-)
diff --git a/dlls/xmllite/reader.c b/dlls/xmllite/reader.c
index 8ec2eae..8fe5ee3 100644
--- a/dlls/xmllite/reader.c
+++ b/dlls/xmllite/reader.c
@@ -44,6 +44,14 @@ typedef enum
XmlEncoding_Unknown
} xml_encoding;
+typedef enum
+{
+ XmlReadInState_Initial,
+ XmlReadInState_XmlDecl,
+ XmlReadInState_Misc_DTD,
+ XmlReadInState_DTD
+} XmlReaderInternalState;
+
static const WCHAR utf16W[] = {'U','T','F','-','1','6',0};
static const WCHAR utf8W[] = {'U','T','F','-','8',0};
@@ -109,6 +117,7 @@ typedef struct _xmlreader
xmlreaderinput *input;
IMalloc *imalloc;
XmlReadState state;
+ XmlReaderInternalState instate;
XmlNodeType nodetype;
DtdProcessing dtdmode;
UINT line, pos; /* reader position in XML stream */
@@ -457,6 +466,7 @@ static void readerinput_switchencoding(xmlreaderinput *readerinput, xml_encoding
encoded_buffer *dest = &readerinput->buffer->utf16;
int len, dest_len;
HRESULT hr;
+ WCHAR *ptr;
UINT cp;
hr = get_code_page(enc, &cp);
@@ -477,8 +487,9 @@ static void readerinput_switchencoding(xmlreaderinput *readerinput, xml_encoding
dest_len = MultiByteToWideChar(cp, 0, src->cur, len, NULL, 0);
readerinput_grow(readerinput, dest_len);
- MultiByteToWideChar(cp, 0, src->cur, len, (WCHAR*)dest->data, dest_len);
- dest->data[dest_len] = 0;
+ ptr = (WCHAR*)dest->data;
+ MultiByteToWideChar(cp, 0, src->cur, len, ptr, dest_len);
+ ptr[dest_len] = 0;
readerinput->buffer->code_page = cp;
}
@@ -515,13 +526,18 @@ static void reader_skipn(xmlreader *reader, int n)
}
}
+static inline int is_wchar_space(WCHAR ch)
+{
+ return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n';
+}
+
/* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
static int reader_skipspaces(xmlreader *reader)
{
encoded_buffer *buffer = &reader->input->buffer->utf16;
const WCHAR *ptr = reader_get_cur(reader), *start = ptr;
- while (*ptr == ' ' || *ptr == '\t' || *ptr == '\r' || *ptr == '\n')
+ while (is_wchar_space(*ptr))
{
buffer->cur += sizeof(WCHAR);
if (*ptr == '\r')
@@ -753,9 +769,105 @@ static HRESULT reader_parse_xmldecl(xmlreader *reader)
if (reader_cmp(reader, declcloseW)) return WC_E_XMLDECL;
reader_skipn(reader, 2);
+ reader->nodetype = XmlNodeType_XmlDeclaration;
+
return S_OK;
}
+/* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
+static HRESULT reader_parse_comment(xmlreader *reader)
+{
+ FIXME("comments not supported\n");
+ return E_NOTIMPL;
+}
+
+/* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
+static HRESULT reader_parse_pi(xmlreader *reader)
+{
+ FIXME("PI not supported\n");
+ return E_NOTIMPL;
+}
+
+/* [27] Misc ::= Comment | PI | S */
+static HRESULT reader_parse_misc(xmlreader *reader)
+{
+ HRESULT hr = S_FALSE;
+
+ while (1)
+ {
+ static const WCHAR commentW[] = {'<','!','-','-',0};
+ static const WCHAR piW[] = {'<','?',0};
+ const WCHAR *cur = reader_get_cur(reader);
+
+ if (is_wchar_space(*cur))
+ reader_skipspaces(reader);
+ else if (!reader_cmp(reader, commentW))
+ hr = reader_parse_comment(reader);
+ else if (!reader_cmp(reader, piW))
+ hr = reader_parse_pi(reader);
+ else
+ break;
+
+ if (FAILED(hr)) return hr;
+ cur = reader_get_cur(reader);
+ }
+
+ return hr;
+}
+
+static HRESULT reader_parse_nextnode(xmlreader *reader)
+{
+ HRESULT hr;
+
+ while (1)
+ {
+ switch (reader->instate)
+ {
+ /* if it's a first call for a new input we need to detect stream encoding */
+ case XmlReadInState_Initial:
+ {
+ xml_encoding enc;
+
+ hr = readerinput_growraw(reader->input);
+ if (FAILED(hr)) return hr;
+
+ /* try to detect encoding by BOM or data and set input code page */
+ hr = readerinput_detectencoding(reader->input, &enc);
+ TRACE("detected encoding %s, 0x%08x\n", debugstr_w(xml_encoding_map[enc].name), hr);
+ if (FAILED(hr)) return hr;
+
+ /* always switch first time cause we have to put something in */
+ readerinput_switchencoding(reader->input, enc);
+
+ /* parse xml declaration */
+ hr = reader_parse_xmldecl(reader);
+ if (FAILED(hr)) return hr;
+
+ reader->instate = XmlReadInState_Misc_DTD;
+ if (hr == S_OK) return hr;
+ }
+ break;
+ case XmlReadInState_Misc_DTD:
+ hr = reader_parse_misc(reader);
+ if (FAILED(hr)) return hr;
+ if (hr == S_FALSE)
+ {
+ reader->instate = XmlReadInState_DTD;
+ return S_OK;
+ }
+ break;
+ case XmlReadInState_DTD:
+ FIXME("DTD parsing not supported\n");
+ return E_NOTIMPL;
+ default:
+ FIXME("internal state %d not handled\n", reader->instate);
+ return E_NOTIMPL;
+ }
+ }
+
+ return E_NOTIMPL;
+}
+
static HRESULT WINAPI xmlreader_QueryInterface(IXmlReader *iface, REFIID riid, void** ppvObject)
{
xmlreader *This = impl_from_IXmlReader(iface);
@@ -844,7 +956,10 @@ static HRESULT WINAPI xmlreader_SetInput(IXmlReader* iface, IUnknown *input)
/* set stream for supplied IXmlReaderInput */
hr = readerinput_query_for_stream(This->input);
if (hr == S_OK)
+ {
This->state = XmlReadState_Initial;
+ This->instate = XmlReadInState_Initial;
+ }
return hr;
}
@@ -893,44 +1008,22 @@ static HRESULT WINAPI xmlreader_SetProperty(IXmlReader* iface, UINT property, LO
return S_OK;
}
-static HRESULT WINAPI xmlreader_Read(IXmlReader* iface, XmlNodeType *node_type)
+static HRESULT WINAPI xmlreader_Read(IXmlReader* iface, XmlNodeType *nodetype)
{
xmlreader *This = impl_from_IXmlReader(iface);
+ XmlNodeType oldtype = This->nodetype;
+ HRESULT hr;
- FIXME("(%p)->(%p): stub\n", This, node_type);
+ FIXME("(%p)->(%p): stub\n", This, nodetype);
if (This->state == XmlReadState_Closed) return S_FALSE;
- /* if it's a first call for a new input we need to detect stream encoding */
- if (This->state == XmlReadState_Initial)
- {
- xml_encoding enc;
- HRESULT hr;
-
- hr = readerinput_growraw(This->input);
- if (FAILED(hr)) return hr;
-
- /* try to detect encoding by BOM or data and set input code page */
- hr = readerinput_detectencoding(This->input, &enc);
- TRACE("detected encoding %s, 0x%08x\n", debugstr_w(xml_encoding_map[enc].name), hr);
- if (FAILED(hr)) return hr;
-
- /* always switch first time cause we have to put something in */
- readerinput_switchencoding(This->input, enc);
+ hr = reader_parse_nextnode(This);
+ if (oldtype == XmlNodeType_None && This->nodetype != oldtype)
+ This->state = XmlReadState_Interactive;
+ if (hr == S_OK) *nodetype = This->nodetype;
- /* parse xml declaration */
- hr = reader_parse_xmldecl(This);
- if (FAILED(hr)) return hr;
-
- if (hr == S_OK)
- {
- This->state = XmlReadState_Interactive;
- This->nodetype = *node_type = XmlNodeType_XmlDeclaration;
- return S_OK;
- }
- }
-
- return E_NOTIMPL;
+ return hr;
}
static HRESULT WINAPI xmlreader_GetNodeType(IXmlReader* iface, XmlNodeType *node_type)
@@ -1223,6 +1316,7 @@ HRESULT WINAPI CreateXmlReader(REFIID riid, void **obj, IMalloc *imalloc)
reader->ref = 1;
reader->input = NULL;
reader->state = XmlReadState_Closed;
+ reader->instate = XmlReadInState_Initial;
reader->dtdmode = DtdProcessing_Prohibit;
reader->line = reader->pos = 0;
reader->imalloc = imalloc;
diff --git a/dlls/xmllite/tests/reader.c b/dlls/xmllite/tests/reader.c
index 852f3d4..27afee4 100644
--- a/dlls/xmllite/tests/reader.c
+++ b/dlls/xmllite/tests/reader.c
@@ -497,6 +497,7 @@ static void test_readerinput(void)
IStream_Release(stream);
/* test input interface selection sequence */
+ input = NULL;
hr = testinput_createinstance((void**)&input);
ok(hr == S_OK, "Expected S_OK, got %08x\n", hr);
@@ -688,10 +689,70 @@ todo_wine {
ok(hr == S_OK, "got %08x\n", hr);
ok(type == XmlNodeType_XmlDeclaration, "got %d\n", type);
+ type = XmlNodeType_XmlDeclaration;
+ hr = IXmlReader_Read(reader, &type);
+ /* newer versions return syntax error here cause document is incomplete,
+ it makes more sense than invalid char error */
+todo_wine {
+ ok(hr == WC_E_SYNTAX || broken(hr == WC_E_XMLCHARACTER), "got 0x%08x\n", hr);
+ ok(type == XmlNodeType_None, "got %d\n", type);
+}
IStream_Release(stream);
IXmlReader_Release(reader);
}
+static const char xml_comment[] = "\xef\xbb\xbf<!-- comment -->";
+static const char xml_comment1[] = "\xef\xbb\xbf<!-- - comment-->";
+static const char xml_comment2[] = "\xef\xbb\xbf<!-- -- comment-->";
+
+static void test_read_comment(void)
+{
+ HRESULT hr;
+ IStream *stream;
+ IXmlReader *reader;
+ XmlNodeType type;
+
+ hr = pCreateXmlReader(&IID_IXmlReader, (void**)&reader, NULL);
+ ok(hr == S_OK, "S_OK, got %08x\n", hr);
+
+ stream = create_stream_on_data(xml_comment, sizeof(xml_comment));
+ hr = IXmlReader_SetInput(reader, (IUnknown*)stream);
+ ok(hr == S_OK, "got %08x\n", hr);
+
+ type = XmlNodeType_None;
+ hr = IXmlReader_Read(reader, &type);
+todo_wine {
+ ok(hr == S_OK, "got %08x\n", hr);
+ ok(type == XmlNodeType_Comment, "got %d\n", type);
+}
+ IStream_Release(stream);
+
+ stream = create_stream_on_data(xml_comment1, sizeof(xml_comment1));
+ hr = IXmlReader_SetInput(reader, (IUnknown*)stream);
+ ok(hr == S_OK, "got %08x\n", hr);
+
+ type = XmlNodeType_None;
+ hr = IXmlReader_Read(reader, &type);
+todo_wine {
+ ok(hr == S_OK, "got %08x\n", hr);
+ ok(type == XmlNodeType_Comment, "got %d\n", type);
+}
+ IStream_Release(stream);
+
+ stream = create_stream_on_data(xml_comment2, sizeof(xml_comment2));
+ hr = IXmlReader_SetInput(reader, (IUnknown*)stream);
+ ok(hr == S_OK, "got %08x\n", hr);
+
+ type = XmlNodeType_None;
+ hr = IXmlReader_Read(reader, &type);
+todo_wine
+ ok(hr == WC_E_COMMENT || broken(hr == WC_E_GREATERTHAN), "got %08x\n", hr);
+ ok(type == XmlNodeType_None, "got %d\n", type);
+ IStream_Release(stream);
+
+ IXmlReader_Release(reader);
+}
+
START_TEST(reader)
{
HRESULT r;
@@ -709,6 +770,7 @@ START_TEST(reader)
test_readerinput();
test_reader_state();
test_read_xmldeclaration();
+ test_read_comment();
CoUninitialize();
}
--
1.7.10.4
More information about the wine-patches
mailing list