xmllite: Implement encoding declaration parsing

Nikolay Sivov nsivov at codeweavers.com
Tue Nov 27 02:47:52 CST 2012


Implement encoding declaration parsing
-------------- next part --------------
>From 550fe6544eb542f35ba4a9c3397b19e8ada1902b Mon Sep 17 00:00:00 2001
From: Nikolay Sivov <nsivov at codeweavers.com>
Date: Tue, 27 Nov 2012 08:46:42 -0500
Subject: [PATCH 4/4] Implement encoding declaration parsing

---
 dlls/xmllite/reader.c |   99 +++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 88 insertions(+), 11 deletions(-)

diff --git a/dlls/xmllite/reader.c b/dlls/xmllite/reader.c
index 0d21e79..87d7936 100644
--- a/dlls/xmllite/reader.c
+++ b/dlls/xmllite/reader.c
@@ -46,6 +46,10 @@ typedef enum
 static const WCHAR utf16W[] = {'U','T','F','-','1','6',0};
 static const WCHAR utf8W[] = {'U','T','F','-','8',0};
 
+static const WCHAR dblquoteW[] = {'\"',0};
+static const WCHAR quoteW[] = {'\'',0};
+static const WCHAR eqW[] = {'=',0};
+
 struct xml_encoding_data
 {
     const WCHAR *name;
@@ -213,7 +217,7 @@ static HRESULT get_code_page(xml_encoding encoding, UINT *cp)
     return S_OK;
 }
 
-static xml_encoding parse_encoding_name(const WCHAR *name)
+static xml_encoding parse_encoding_name(const WCHAR *name, int len)
 {
     int min, max, n, c;
 
@@ -226,7 +230,10 @@ static xml_encoding parse_encoding_name(const WCHAR *name)
     {
         n = (min+max)/2;
 
-        c = strcmpiW(xml_encoding_map[n].name, name);
+        if (len != -1)
+            c = strncmpiW(xml_encoding_map[n].name, name, len);
+        else
+            c = strcmpiW(xml_encoding_map[n].name, name);
         if (!c)
             return xml_encoding_map[n].enc;
 
@@ -491,20 +498,20 @@ static int reader_skipspaces(xmlreader *reader)
 /* [26] VersionNum ::= '1.' [0-9]+ */
 static HRESULT reader_parse_versionnum(xmlreader *reader)
 {
-    const WCHAR *ptr, *start = reader_get_cur(reader);
+    const WCHAR *ptr, *ptr2, *start = reader_get_cur(reader);
     static const WCHAR onedotW[] = {'1','.',0};
 
     if (reader_cmp(reader, onedotW)) return WC_E_XMLDECL;
     /* skip "1." */
     reader_skipn(reader, 2);
 
-    ptr = reader_get_cur(reader);
+    ptr2 = ptr = reader_get_cur(reader);
     while (*ptr >= '0' && *ptr <= '9')
         ptr++;
 
-    if (ptr == start) return WC_E_DIGIT;
-    TRACE("version=%s\n", debugstr_wn(start, ptr-start));
-    reader_skipn(reader, ptr-start);
+    if (ptr2 == ptr) return WC_E_DIGIT;
+    TRACE("version=%s", debugstr_wn(start, ptr-start));
+    reader_skipn(reader, ptr-ptr2);
     return S_OK;
 }
 
@@ -512,9 +519,6 @@ static HRESULT reader_parse_versionnum(xmlreader *reader)
 static HRESULT reader_parse_versioninfo(xmlreader *reader)
 {
     static const WCHAR versionW[] = {'v','e','r','s','i','o','n',0};
-    static const WCHAR dblquoteW[] = {'\"',0};
-    static const WCHAR quoteW[] = {'\'',0};
-    static const WCHAR eqW[] = {'=',0};
     HRESULT hr;
 
     if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
@@ -544,6 +548,75 @@ static HRESULT reader_parse_versioninfo(xmlreader *reader)
     return S_OK;
 }
 
+/* ([A-Za-z0-9._] | '-') */
+static inline int is_wchar_encname(WCHAR ch)
+{
+    return ((ch >= 'A' && ch <= 'Z') ||
+            (ch >= 'a' && ch <= 'z') ||
+            (ch >= '0' && ch <= '9') ||
+            (ch == '.') || (ch == '_') ||
+            (ch == '-'));
+}
+
+/* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */
+static HRESULT reader_parse_encname(xmlreader *reader)
+{
+    const WCHAR *start = reader_get_cur(reader), *ptr;
+    xml_encoding enc;
+    int len;
+
+    if ((*start < 'A' || *start > 'Z') && (*start < 'a' || *start > 'z'))
+        return WC_E_ENCNAME;
+
+    ptr = start;
+    while (is_wchar_encname(*++ptr))
+        ;
+
+    len = ptr - start;
+    enc = parse_encoding_name(start, len);
+    TRACE("encoding name %s\n", debugstr_wn(start, len));
+
+    if (enc == XmlEncoding_Unknown)
+        return WC_E_ENCNAME;
+
+    /* skip encoding name */
+    reader_skipn(reader, len);
+    return S_OK;
+}
+
+/* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */
+static HRESULT reader_parse_encdecl(xmlreader *reader)
+{
+    static const WCHAR encodingW[] = {'e','n','c','o','d','i','n','g',0};
+    HRESULT hr;
+
+    if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
+
+    if (reader_cmp(reader, encodingW)) return S_OK;
+    /* skip 'encoding' */
+    reader_skipn(reader, 8);
+
+    if (reader_cmp(reader, eqW)) return WC_E_EQUAL;
+    /* skip '=' */
+    reader_skipn(reader, 1);
+
+    if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
+        return WC_E_QUOTE;
+    /* skip "'"|'"' */
+    reader_skipn(reader, 1);
+
+    hr = reader_parse_encname(reader);
+    if (FAILED(hr)) return hr;
+
+    if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
+        return WC_E_QUOTE;
+
+    /* skip "'"|'"' */
+    reader_skipn(reader, 1);
+
+    return S_OK;
+}
+
 /* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */
 static HRESULT reader_parse_xmldecl(xmlreader *reader)
 {
@@ -558,6 +631,10 @@ static HRESULT reader_parse_xmldecl(xmlreader *reader)
     if (FAILED(hr))
         return hr;
 
+    hr = reader_parse_encdecl(reader);
+    if (FAILED(hr))
+        return hr;
+
     return E_NOTIMPL;
 }
 
@@ -1028,7 +1105,7 @@ HRESULT WINAPI CreateXmlReaderInputWithEncodingName(IUnknown *stream,
     readerinput->imalloc = imalloc;
     readerinput->stream = NULL;
     if (imalloc) IMalloc_AddRef(imalloc);
-    readerinput->encoding = parse_encoding_name(encoding);
+    readerinput->encoding = parse_encoding_name(encoding, -1);
     readerinput->hint = hint;
     readerinput->baseuri = readerinput_strdupW(readerinput, base_uri);
 
-- 
1.7.10.4




More information about the wine-patches mailing list