xmllite: Initial support for start tag parsing
Nikolay Sivov
nsivov at codeweavers.com
Fri Jan 18 00:10:14 CST 2013
Initial support for start tag parsing
-------------- next part --------------
>From 38ef5faa48ab09537be9d952291d9350990417d5 Mon Sep 17 00:00:00 2001
From: Nikolay Sivov <nsivov at codeweavers.com>
Date: Fri, 18 Jan 2013 10:04:27 +0400
Subject: [PATCH 4/4] Initial support for start tag parsing
---
dlls/xmllite/reader.c | 140 +++++++++++++++++++++++++++++++++++++++----
dlls/xmllite/tests/reader.c | 67 +++++++++++++++++++++
2 files changed, 197 insertions(+), 10 deletions(-)
diff --git a/dlls/xmllite/reader.c b/dlls/xmllite/reader.c
index c3f53b4..058875a 100644
--- a/dlls/xmllite/reader.c
+++ b/dlls/xmllite/reader.c
@@ -51,7 +51,8 @@ typedef enum
XmlReadInState_Misc_DTD,
XmlReadInState_DTD,
XmlReadInState_DTD_Misc,
- XmlReadInState_Element
+ XmlReadInState_Element,
+ XmlReadInState_Content
} XmlReaderInternalState;
typedef enum
@@ -448,11 +449,28 @@ static void readerinput_grow(xmlreaderinput *readerinput, int length)
}
}
-static HRESULT readerinput_detectencoding(xmlreaderinput *readerinput, xml_encoding *enc)
+static inline int readerinput_is_utf8(xmlreaderinput *readerinput)
{
- encoded_buffer *buffer = &readerinput->buffer->encoded;
static char startA[] = {'<','?'};
static char commentA[] = {'<','!'};
+ encoded_buffer *buffer = &readerinput->buffer->encoded;
+ unsigned char *ptr = (unsigned char*)buffer->data;
+
+ return !memcmp(buffer->data, startA, sizeof(startA)) ||
+ !memcmp(buffer->data, commentA, sizeof(commentA)) ||
+ /* test start byte */
+ (ptr[0] == '<' &&
+ (
+ (ptr[1] && (ptr[1] <= 0x7f)) ||
+ (buffer->data[1] >> 5) == 0x6 || /* 2 bytes */
+ (buffer->data[1] >> 4) == 0xe || /* 3 bytes */
+ (buffer->data[1] >> 3) == 0x1e) /* 4 bytes */
+ );
+}
+
+static HRESULT readerinput_detectencoding(xmlreaderinput *readerinput, xml_encoding *enc)
+{
+ encoded_buffer *buffer = &readerinput->buffer->encoded;
static WCHAR startW[] = {'<','?'};
static WCHAR commentW[] = {'<','!'};
static char utf8bom[] = {0xef,0xbb,0xbf};
@@ -464,8 +482,7 @@ static HRESULT readerinput_detectencoding(xmlreaderinput *readerinput, xml_encod
/* try start symbols if we have enough data to do that, input buffer should contain
first chunk already */
- if (!memcmp(buffer->data, startA, sizeof(startA)) ||
- !memcmp(buffer->data, commentA, sizeof(commentA)))
+ if (readerinput_is_utf8(readerinput))
*enc = XmlEncoding_UTF8;
else if (!memcmp(buffer->data, startW, sizeof(startW)) ||
!memcmp(buffer->data, commentW, sizeof(commentW)))
@@ -987,9 +1004,10 @@ static inline int is_namestartchar(WCHAR ch)
(ch >= 0xfdf0 && ch <= 0xfffd);
}
-static inline int is_namechar(WCHAR ch)
+/* [4 NS] NCName ::= Name - (Char* ':' Char*) */
+static inline int is_ncnamechar(WCHAR ch)
{
- return (ch == ':') || (ch >= 'A' && ch <= 'Z') ||
+ return (ch >= 'A' && ch <= 'Z') ||
(ch == '_') || (ch >= 'a' && ch <= 'z') ||
(ch == '-') || (ch == '.') ||
(ch >= '0' && ch <= '9') ||
@@ -1011,6 +1029,11 @@ static inline int is_namechar(WCHAR ch)
(ch >= 0xfdf0 && ch <= 0xfffd);
}
+static inline int is_namechar(WCHAR ch)
+{
+ return (ch == ':') || is_ncnamechar(ch);
+}
+
/* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
[#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
[#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
@@ -1316,11 +1339,106 @@ static HRESULT reader_parse_dtd(xmlreader *reader)
return S_OK;
}
+/* [7 NS] QName ::= PrefixedName | UnprefixedName
+ [8 NS] PrefixedName ::= Prefix ':' LocalPart
+ [9 NS] UnprefixedName ::= LocalPart
+ [10 NS] Prefix ::= NCName
+ [11 NS] LocalPart ::= NCName */
+static HRESULT reader_parse_qname(xmlreader *reader, strval *prefix, strval *local, strval *qname)
+{
+ WCHAR *ptr, *start = reader_get_cur(reader);
+
+ ptr = start;
+ if (!is_ncnamechar(*ptr)) return NC_E_QNAMECHARACTER;
+
+ while (is_ncnamechar(*ptr))
+ {
+ reader_skipn(reader, 1);
+ ptr = reader_get_cur(reader);
+ }
+
+ /* got a qualified name */
+ if (*ptr == ':')
+ {
+ prefix->str = start;
+ prefix->len = ptr-start;
+
+ reader_skipn(reader, 1);
+ start = ptr = reader_get_cur(reader);
+
+ while (is_ncnamechar(*ptr))
+ {
+ reader_skipn(reader, 1);
+ ptr = reader_get_cur(reader);
+ }
+ }
+ else
+ {
+ prefix->str = NULL;
+ prefix->len = 0;
+ }
+
+ local->str = start;
+ local->len = ptr-start;
+
+ if (prefix->len)
+ TRACE("qname %s:%s\n", debugstr_wn(prefix->str, prefix->len), debugstr_wn(local->str, local->len));
+ else
+ TRACE("ncname %s\n", debugstr_wn(local->str, local->len));
+
+ qname->str = prefix->str ? prefix->str : local->str;
+ /* count ':' too */
+ qname->len = (prefix->len ? prefix->len + 1 : 0) + local->len;
+
+ return S_OK;
+}
+
+/* [12 NS] STag ::= '<' QName (S Attribute)* S? '>'
+ [14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */
+static HRESULT reader_parse_stag(xmlreader *reader, strval *prefix, strval *local, strval *qname)
+{
+ static const WCHAR endW[] = {'/','>',0};
+ HRESULT hr;
+
+ /* skip '<' */
+ reader_skipn(reader, 1);
+
+ hr = reader_parse_qname(reader, prefix, local, qname);
+ if (FAILED(hr)) return hr;
+
+ reader_skipspaces(reader);
+
+ if (!reader_cmp(reader, endW)) return S_OK;
+
+ FIXME("only empty elements without attributes supported\n");
+ return E_NOTIMPL;
+}
+
/* [39] element ::= EmptyElemTag | STag content ETag */
static HRESULT reader_parse_element(xmlreader *reader)
{
- FIXME("element parsing not implemented\n");
- return E_NOTIMPL;
+ static const WCHAR ltW[] = {'<',0};
+ strval qname, prefix, local;
+ HRESULT hr;
+
+ /* check if we are really on element */
+ if (reader_cmp(reader, ltW)) return S_FALSE;
+ reader_shrink(reader);
+
+ /* this handles empty elements too */
+ hr = reader_parse_stag(reader, &prefix, &local, &qname);
+ if (FAILED(hr)) return hr;
+
+ /* FIXME: need to check for defined namespace to reject invalid prefix,
+ currently reject all prefixes */
+ if (prefix.len) return NC_E_UNDECLAREDPREFIX;
+
+ reader->nodetype = XmlNodeType_Element;
+ reader_set_strvalue(reader, StringValue_LocalName, &local);
+ reader_set_strvalue(reader, StringValue_QualifiedName, &qname);
+
+ FIXME("element content parsing not implemented\n");
+ return hr;
}
static HRESULT reader_parse_nextnode(xmlreader *reader)
@@ -1389,7 +1507,9 @@ static HRESULT reader_parse_nextnode(xmlreader *reader)
case XmlReadInState_Element:
hr = reader_parse_element(reader);
if (FAILED(hr)) return hr;
- break;
+
+ reader->instate = XmlReadInState_Content;
+ return hr;
default:
FIXME("internal state %d not handled\n", reader->instate);
return E_NOTIMPL;
diff --git a/dlls/xmllite/tests/reader.c b/dlls/xmllite/tests/reader.c
index d084e7b..7394564 100644
--- a/dlls/xmllite/tests/reader.c
+++ b/dlls/xmllite/tests/reader.c
@@ -1022,6 +1022,72 @@ todo_wine {
IXmlReader_Release(reader);
}
+static struct test_entry element_tests[] = {
+ { "<a/>", "a", "", S_OK },
+ { "<a />", "a", "", S_OK },
+ { "<a:b/>", "a:b", "", NC_E_UNDECLAREDPREFIX },
+ { "<:a/>", NULL, NULL, NC_E_QNAMECHARACTER },
+ { "< a/>", NULL, NULL, NC_E_QNAMECHARACTER },
+ { NULL }
+};
+
+static void test_read_element(void)
+{
+ struct test_entry *test = element_tests;
+ IXmlReader *reader;
+ HRESULT hr;
+
+ hr = pCreateXmlReader(&IID_IXmlReader, (void**)&reader, NULL);
+ ok(hr == S_OK, "S_OK, got %08x\n", hr);
+
+ while (test->xml)
+ {
+ XmlNodeType type;
+ IStream *stream;
+
+ stream = create_stream_on_data(test->xml, strlen(test->xml)+1);
+ hr = IXmlReader_SetInput(reader, (IUnknown*)stream);
+ ok(hr == S_OK, "got %08x\n", hr);
+
+ type = XmlNodeType_None;
+ hr = IXmlReader_Read(reader, &type);
+ if (test->hr_broken)
+ ok(hr == test->hr || broken(hr == test->hr_broken), "got %08x for %s\n", hr, test->xml);
+ else
+ ok(hr == test->hr, "got %08x for %s\n", hr, test->xml);
+ if (hr == S_OK)
+ {
+ const WCHAR *str;
+ WCHAR *str_exp;
+ UINT len;
+
+ ok(type == XmlNodeType_Element, "got %d for %s\n", type, test->xml);
+
+ len = 0;
+ str = NULL;
+ hr = IXmlReader_GetQualifiedName(reader, &str, &len);
+ ok(hr == S_OK, "got 0x%08x\n", hr);
+ ok(len == strlen(test->name), "got %u\n", len);
+ str_exp = a2w(test->name);
+ ok(!lstrcmpW(str, str_exp), "got %s\n", wine_dbgstr_w(str));
+ free_str(str_exp);
+
+ /* value */
+ len = 1;
+ str = NULL;
+ hr = IXmlReader_GetValue(reader, &str, &len);
+ ok(hr == S_OK, "got 0x%08x\n", hr);
+ ok(len == 0, "got %u\n", len);
+ ok(*str == 0, "got %s\n", wine_dbgstr_w(str));
+ }
+
+ IStream_Release(stream);
+ test++;
+ }
+
+ IXmlReader_Release(reader);
+}
+
START_TEST(reader)
{
HRESULT r;
@@ -1041,6 +1107,7 @@ START_TEST(reader)
test_read_comment();
test_read_pi();
test_read_dtd();
+ test_read_element();
test_read_full();
test_read_xmldeclaration();
--
1.7.10.4
More information about the wine-patches
mailing list