xmllite: Initial implementation of DTD external id parsing
Nikolay Sivov
nsivov at codeweavers.com
Wed Jan 16 01:32:21 CST 2013
Initial implementation of DTD external id parsing
-------------- next part --------------
>From aff0cd681fc4d0b22c464d9eda441a1233278e2a Mon Sep 17 00:00:00 2001
From: Nikolay Sivov <nsivov at codeweavers.com>
Date: Tue, 15 Jan 2013 23:54:14 +0400
Subject: [PATCH 1/2] Initial implementation of DTD external id parsing
---
dlls/xmllite/reader.c | 198 ++++++++++++++++++++++++++++++++++++++++---
dlls/xmllite/tests/reader.c | 87 +++++++++++++++++++
2 files changed, 272 insertions(+), 13 deletions(-)
diff --git a/dlls/xmllite/reader.c b/dlls/xmllite/reader.c
index 12b1d4a..c3f53b4 100644
--- a/dlls/xmllite/reader.c
+++ b/dlls/xmllite/reader.c
@@ -944,6 +944,30 @@ static HRESULT reader_parse_comment(xmlreader *reader)
return MX_E_INPUTEND;
}
+/* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */
+static inline int is_char(WCHAR ch)
+{
+ return (ch == '\t') || (ch == '\r') || (ch == '\n') ||
+ (ch >= 0x20 && ch <= 0xd7ff) ||
+ (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
+ (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
+ (ch >= 0xe000 && ch <= 0xfffd);
+}
+
+/* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
+static inline int is_pubchar(WCHAR ch)
+{
+ return (ch == ' ') ||
+ (ch >= 'a' && ch <= 'z') ||
+ (ch >= 'A' && ch <= 'Z') ||
+ (ch >= '0' && ch <= '9') ||
+ (ch >= '-' && ch <= ';') || /* '()*+,-./:; */
+ (ch == '=') || (ch == '?') ||
+ (ch == '@') || (ch == '!') ||
+ (ch >= '#' && ch <= '%') || /* #$% */
+ (ch == '_') || (ch == '\r') || (ch == '\n');
+}
+
static inline int is_namestartchar(WCHAR ch)
{
return (ch == ':') || (ch >= 'A' && ch <= 'Z') ||
@@ -1148,13 +1172,154 @@ static HRESULT reader_parse_misc(xmlreader *reader)
return hr;
}
+/* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
+static HRESULT reader_parse_sys_literal(xmlreader *reader, strval *literal)
+{
+ WCHAR *start = reader_get_cur(reader), *cur, quote;
+
+ if (*start != '"' && *start != '\'') return WC_E_QUOTE;
+
+ quote = *start;
+ reader_skipn(reader, 1);
+
+ cur = start = reader_get_cur(reader);
+ while (is_char(*cur) && *cur != quote)
+ {
+ reader_skipn(reader, 1);
+ cur = reader_get_cur(reader);
+ }
+ if (*cur == quote) reader_skipn(reader, 1);
+
+ literal->str = start;
+ literal->len = cur-start;
+ TRACE("%s\n", debugstr_wn(start, cur-start));
+ return S_OK;
+}
+
+/* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
+ [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
+static HRESULT reader_parse_pub_literal(xmlreader *reader, strval *literal)
+{
+ WCHAR *start = reader_get_cur(reader), *cur, quote;
+
+ if (*start != '"' && *start != '\'') return WC_E_QUOTE;
+
+ quote = *start;
+ reader_skipn(reader, 1);
+
+ cur = start;
+ while (is_pubchar(*cur) && *cur != quote)
+ {
+ reader_skipn(reader, 1);
+ cur = reader_get_cur(reader);
+ }
+
+ literal->str = start;
+ literal->len = cur-start;
+ TRACE("%s\n", debugstr_wn(start, cur-start));
+ return S_OK;
+}
+
+/* [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral */
+static HRESULT reader_parse_externalid(xmlreader *reader)
+{
+ static WCHAR systemW[] = {'S','Y','S','T','E','M',0};
+ static WCHAR publicW[] = {'P','U','B','L','I','C',0};
+ strval name;
+ HRESULT hr;
+ int cnt;
+
+ if (reader_cmp(reader, systemW))
+ {
+ if (reader_cmp(reader, publicW))
+ return S_FALSE;
+ else
+ {
+ strval pub;
+
+ /* public id */
+ reader_skipn(reader, 6);
+ cnt = reader_skipspaces(reader);
+ if (!cnt) return WC_E_WHITESPACE;
+
+ hr = reader_parse_pub_literal(reader, &pub);
+ if (FAILED(hr)) return hr;
+
+ name.str = publicW;
+ name.len = strlenW(publicW);
+ return reader_add_attr(reader, &name, &pub);
+ }
+ }
+ else
+ {
+ strval sys;
+
+ /* system id */
+ reader_skipn(reader, 6);
+ cnt = reader_skipspaces(reader);
+ if (!cnt) return WC_E_WHITESPACE;
+
+ hr = reader_parse_sys_literal(reader, &sys);
+ if (FAILED(hr)) return hr;
+
+ name.str = systemW;
+ name.len = strlenW(systemW);
+ return reader_add_attr(reader, &name, &sys);
+ }
+
+ return hr;
+}
+
/* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' */
static HRESULT reader_parse_dtd(xmlreader *reader)
{
- static const WCHAR doctypeW[] = {'D','O','C','T','Y','P','E',0};
+ static const WCHAR doctypeW[] = {'<','!','D','O','C','T','Y','P','E',0};
+ strval name;
+ WCHAR *cur;
+ HRESULT hr;
+
/* check if we have "<!DOCTYPE" */
if (reader_cmp(reader, doctypeW)) return S_FALSE;
- FIXME("DTD parsing not implemented\n");
+ reader_shrink(reader);
+
+ /* DTD processing is not allowed by default */
+ if (reader->dtdmode == DtdProcessing_Prohibit) return WC_E_DTDPROHIBITED;
+
+ reader_skipn(reader, 9);
+ if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
+
+ /* name */
+ hr = reader_parse_name(reader, &name);
+ if (FAILED(hr)) return WC_E_DECLDOCTYPE;
+
+ reader_skipspaces(reader);
+
+ hr = reader_parse_externalid(reader);
+ if (FAILED(hr)) return hr;
+
+ reader_skipspaces(reader);
+
+ cur = reader_get_cur(reader);
+ if (*cur != '>')
+ {
+ FIXME("internal subset parsing not implemented\n");
+ return E_NOTIMPL;
+ }
+
+ /* skip '>' */
+ reader_skipn(reader, 1);
+
+ reader->nodetype = XmlNodeType_DocumentType;
+ reader_set_strvalue(reader, StringValue_LocalName, &name);
+ reader_set_strvalue(reader, StringValue_QualifiedName, &name);
+
+ return S_OK;
+}
+
+/* [39] element ::= EmptyElemTag | STag content ETag */
+static HRESULT reader_parse_element(xmlreader *reader)
+{
+ FIXME("element parsing not implemented\n");
return E_NOTIMPL;
}
@@ -1194,29 +1359,36 @@ static HRESULT reader_parse_nextnode(xmlreader *reader)
case XmlReadInState_Misc_DTD:
hr = reader_parse_misc(reader);
if (FAILED(hr)) return hr;
- if (hr == S_FALSE) {
+
+ if (hr == S_FALSE)
reader->instate = XmlReadInState_DTD;
- continue;
- }
- else if (hr == S_OK) return hr;
+ else
+ return hr;
break;
case XmlReadInState_DTD:
hr = reader_parse_dtd(reader);
if (FAILED(hr)) return hr;
- if (hr == S_FALSE) {
+
+ if (hr == S_OK)
+ {
reader->instate = XmlReadInState_DTD_Misc;
- continue;
+ return hr;
}
- else if (hr == S_OK) return hr;
+ else
+ reader->instate = XmlReadInState_Element;
break;
case XmlReadInState_DTD_Misc:
hr = reader_parse_misc(reader);
if (FAILED(hr)) return hr;
- if (hr == S_FALSE) {
+
+ if (hr == S_FALSE)
reader->instate = XmlReadInState_Element;
- continue;
- }
- else if (hr == S_OK) return hr;
+ else
+ return hr;
+ break;
+ case XmlReadInState_Element:
+ hr = reader_parse_element(reader);
+ if (FAILED(hr)) return hr;
break;
default:
FIXME("internal state %d not handled\n", reader->instate);
diff --git a/dlls/xmllite/tests/reader.c b/dlls/xmllite/tests/reader.c
index a8614a1..d084e7b 100644
--- a/dlls/xmllite/tests/reader.c
+++ b/dlls/xmllite/tests/reader.c
@@ -936,6 +936,92 @@ static void test_read_full(void)
IXmlReader_Release(reader);
}
+static const char test_dtd[] =
+ "<!DOCTYPE testdtd SYSTEM \"externalid uri\" >"
+ "<!-- comment -->";
+
+static void test_read_dtd(void)
+{
+ static const WCHAR sysvalW[] = {'e','x','t','e','r','n','a','l','i','d',' ','u','r','i',0};
+ static const WCHAR dtdnameW[] = {'t','e','s','t','d','t','d',0};
+ static const WCHAR sysW[] = {'S','Y','S','T','E','M',0};
+ IXmlReader *reader;
+ const WCHAR *str;
+ XmlNodeType type;
+ IStream *stream;
+ UINT len, count;
+ HRESULT hr;
+
+ hr = pCreateXmlReader(&IID_IXmlReader, (void**)&reader, NULL);
+ ok(hr == S_OK, "S_OK, got %08x\n", hr);
+
+ hr = IXmlReader_SetProperty(reader, XmlReaderProperty_DtdProcessing, DtdProcessing_Parse);
+ ok(hr == S_OK, "got 0x%8x\n", hr);
+
+ stream = create_stream_on_data(test_dtd, sizeof(test_dtd));
+ hr = IXmlReader_SetInput(reader, (IUnknown*)stream);
+ ok(hr == S_OK, "got %08x\n", hr);
+
+ type = XmlNodeType_None;
+ hr = IXmlReader_Read(reader, &type);
+ ok(hr == S_OK, "got 0x%8x\n", hr);
+ ok(type == XmlNodeType_DocumentType, "got type %d\n", type);
+
+ count = 0;
+ hr = IXmlReader_GetAttributeCount(reader, &count);
+ ok(hr == S_OK, "got %08x\n", hr);
+ ok(count == 1, "got %d\n", count);
+
+ hr = IXmlReader_MoveToFirstAttribute(reader);
+ ok(hr == S_OK, "got %08x\n", hr);
+
+ type = XmlNodeType_None;
+ hr = IXmlReader_GetNodeType(reader, &type);
+ ok(hr == S_OK, "got %08x\n", hr);
+ ok(type == XmlNodeType_Attribute, "got %d\n", type);
+
+ len = 0;
+ str = NULL;
+ hr = IXmlReader_GetLocalName(reader, &str, &len);
+ ok(hr == S_OK, "got 0x%08x\n", hr);
+todo_wine {
+ ok(len == lstrlenW(sysW), "got %u\n", len);
+ ok(!lstrcmpW(str, sysW), "got %s\n", wine_dbgstr_w(str));
+}
+ len = 0;
+ str = NULL;
+ hr = IXmlReader_GetValue(reader, &str, &len);
+ ok(hr == S_OK, "got 0x%08x\n", hr);
+todo_wine {
+ ok(len == lstrlenW(sysvalW), "got %u\n", len);
+ ok(!lstrcmpW(str, sysvalW), "got %s\n", wine_dbgstr_w(str));
+}
+ hr = IXmlReader_MoveToElement(reader);
+ ok(hr == S_OK, "got 0x%08x\n", hr);
+
+ len = 0;
+ str = NULL;
+ hr = IXmlReader_GetLocalName(reader, &str, &len);
+ ok(hr == S_OK, "got 0x%08x\n", hr);
+ ok(len == lstrlenW(dtdnameW), "got %u\n", len);
+ ok(!lstrcmpW(str, dtdnameW), "got %s\n", wine_dbgstr_w(str));
+
+ len = 0;
+ str = NULL;
+ hr = IXmlReader_GetQualifiedName(reader, &str, &len);
+ ok(hr == S_OK, "got 0x%08x\n", hr);
+ ok(len == lstrlenW(dtdnameW), "got %u\n", len);
+ ok(!lstrcmpW(str, dtdnameW), "got %s\n", wine_dbgstr_w(str));
+
+ type = XmlNodeType_None;
+ hr = IXmlReader_Read(reader, &type);
+ ok(hr == S_OK, "got 0x%8x\n", hr);
+ ok(type == XmlNodeType_Comment, "got type %d\n", type);
+
+ IStream_Release(stream);
+ IXmlReader_Release(reader);
+}
+
START_TEST(reader)
{
HRESULT r;
@@ -954,6 +1040,7 @@ START_TEST(reader)
test_reader_state();
test_read_comment();
test_read_pi();
+ test_read_dtd();
test_read_full();
test_read_xmldeclaration();
--
1.7.10.4
More information about the wine-patches
mailing list