Nikolay Sivov : msxml3: Fix ignorable whitespace detection.

Alexandre Julliard julliard at winehq.org
Mon May 6 14:08:45 CDT 2013


Module: wine
Branch: master
Commit: 0403f34b78ef1f663f0703784e089e8396fb338c
URL:    http://source.winehq.org/git/wine.git/?a=commit;h=0403f34b78ef1f663f0703784e089e8396fb338c

Author: Nikolay Sivov <nsivov at codeweavers.com>
Date:   Sun May  5 15:44:06 2013 +0400

msxml3: Fix ignorable whitespace detection.

---

 dlls/msxml3/domdoc.c       |   14 ++++++++-
 dlls/msxml3/tests/domdoc.c |   64 ++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 74 insertions(+), 4 deletions(-)

diff --git a/dlls/msxml3/domdoc.c b/dlls/msxml3/domdoc.c
index 67902bd..b607997 100644
--- a/dlls/msxml3/domdoc.c
+++ b/dlls/msxml3/domdoc.c
@@ -416,10 +416,20 @@ static void sax_characters(void *ctx, const xmlChar *ch, int len)
 
     if (ctxt->node)
     {
-        /* during domdoc_loadXML() the xmlDocPtr->_private data is not available */
+        /* Characters are reported with multiple calls, for example each charref is reported with a separate
+           call and then parser appends it to a single text node or creates a new node if not created.
+           It's not possible to tell if it's ignorable data or not just looking at data itself cause it could be
+           a space chars that separate charrefs or similar case. We only need to skip leading and trailing spaces,
+           or whole node if it has nothing but space chars, so to detect leading space node->last is checked that
+           contains text node pointer if already created, trailing spaces are detected directly looking at parser input
+           for next '<' opening bracket - similar logic is used by libxml2 itself.
+
+           Note that during domdoc_loadXML() the xmlDocPtr->_private data is not available. */
         if (!This->properties->preserving &&
             !is_preserving_whitespace(ctxt->node) &&
-            strn_isspace(ch, len))
+            strn_isspace(ch, len) &&
+            (!ctxt->node->last ||
+            ((ctxt->node->last && (*ctxt->input->cur) == '<'))))
             return;
     }
 
diff --git a/dlls/msxml3/tests/domdoc.c b/dlls/msxml3/tests/domdoc.c
index d5f2413..cc79d4b 100644
--- a/dlls/msxml3/tests/domdoc.c
+++ b/dlls/msxml3/tests/domdoc.c
@@ -435,6 +435,13 @@ static const char szExampleXML[] =
 "    </elem>\n"
 "</root>\n";
 
+static const char charrefsxml[] =
+"<?xml version='1.0'?>"
+"<a>"
+"<b1> Text A end </b1>"
+"<b2>AB C </b2>"
+"</a>";
+
 static const CHAR szNodeTypesXML[] =
 "<?xml version='1.0'?>"
 "<!-- comment node 0 -->"
@@ -4154,10 +4161,58 @@ static inline void _check_ws_preserved(int line, IXMLDOMDocument2* doc, char con
     IXMLDOMNode_Release(node2);
 }
 
-static void test_whitespace(void)
+static void test_preserve_charref(IXMLDOMDocument2 *doc, VARIANT_BOOL preserve)
 {
+    static const WCHAR b1_p[] = {' ','T','e','x','t',' ','A',' ','e','n','d',' ',0};
+    static const WCHAR b1_i[] = {'T','e','x','t',' ','A',' ','e','n','d',0};
+    static const WCHAR b2_p[] = {'A','B',' ','C',' ',0};
+    static const WCHAR b2_i[] = {'A','B',' ','C',0};
+    IXMLDOMNodeList *list;
+    IXMLDOMElement *root;
+    IXMLDOMNode *node;
+    const WCHAR *text;
     VARIANT_BOOL b;
+    HRESULT hr;
+    BSTR s;
+
+    hr = IXMLDOMDocument2_put_preserveWhiteSpace(doc, preserve);
+    ok(hr == S_OK, "got 0x%08x\n", hr);
+
+    hr = IXMLDOMDocument2_loadXML(doc, _bstr_(charrefsxml), &b);
+    ok(hr == S_OK, "got 0x%08x\n", hr);
+
+    hr = IXMLDOMDocument2_get_documentElement(doc, &root);
+    ok(hr == S_OK, "got 0x%08x\n", hr);
+
+    hr = IXMLDOMElement_get_childNodes(root, &list);
+    ok(hr == S_OK, "got 0x%08x\n", hr);
+    IXMLDOMElement_Release(root);
+
+    text = preserve == VARIANT_TRUE ? b1_p : b1_i;
+    hr = IXMLDOMNodeList_get_item(list, 0, &node);
+    ok(hr == S_OK, "got 0x%08x\n", hr);
+    hr = IXMLDOMNode_get_text(node, &s);
+    ok(hr == S_OK, "got 0x%08x\n", hr);
+    ok(!lstrcmpW(s, text), "0x%x, got %s\n", preserve, wine_dbgstr_w(s));
+    SysFreeString(s);
+    IXMLDOMNode_Release(node);
+
+    text = preserve == VARIANT_TRUE ? b2_p : b2_i;
+    hr = IXMLDOMNodeList_get_item(list, 1, &node);
+    ok(hr == S_OK, "got 0x%08x\n", hr);
+    hr = IXMLDOMNode_get_text(node, &s);
+    ok(hr == S_OK, "got 0x%08x\n", hr);
+    ok(!lstrcmpW(s, text), "0x%x, got %s\n", preserve, wine_dbgstr_w(s));
+    SysFreeString(s);
+    IXMLDOMNode_Release(node);
+
+    IXMLDOMNodeList_Release(list);
+}
+
+static void test_whitespace(void)
+{
     IXMLDOMDocument2 *doc1, *doc2, *doc3, *doc4;
+    VARIANT_BOOL b;
 
     doc1 = create_document(&IID_IXMLDOMDocument2);
     doc2 = create_document(&IID_IXMLDOMDocument2);
@@ -4224,10 +4279,15 @@ static void test_whitespace(void)
     check_ws_preserved(doc3, NULL);
     check_ws_ignored(doc4, NULL);
 
-    IXMLDOMDocument2_Release(doc1);
     IXMLDOMDocument2_Release(doc2);
     IXMLDOMDocument2_Release(doc3);
     IXMLDOMDocument2_Release(doc4);
+
+    /* text with char references */
+    test_preserve_charref(doc1, VARIANT_TRUE);
+    test_preserve_charref(doc1, VARIANT_FALSE);
+    IXMLDOMDocument2_Release(doc1);
+
     free_bstrs();
 }
 




More information about the wine-cvs mailing list