[PATCH 1/6] Hint parser to use UTF-8 if it's specfied as BOM.

Nikolay Sivov nsivov at codeweavers.com
Sat Oct 30 09:49:43 CDT 2010


Libxml2 changed a way to handle encoding mismatch between 2.6.32 and 2.7.0.
Now if you got a document in UTF-8 and UTF-16 in prolog you'll encounter
a fatal encoding mismatch error. Telling parser to use specific encoding
makes it bypass this mismatch check. This fix doesn't depend on libxml2 version.
---
 dlls/msxml3/saxreader.c |   21 +++++++++++++++++++++
 1 files changed, 21 insertions(+), 0 deletions(-)

diff --git a/dlls/msxml3/saxreader.c b/dlls/msxml3/saxreader.c
index a1df5c7..7d8caf4 100644
--- a/dlls/msxml3/saxreader.c
+++ b/dlls/msxml3/saxreader.c
@@ -1770,6 +1770,8 @@ static HRESULT SAXLocator_create(saxreader *reader, saxlocator **ppsaxlocator, B
 /*** SAXXMLReader internal functions ***/
 static HRESULT internal_parseBuffer(saxreader *This, const char *buffer, int size, BOOL vbInterface)
 {
+    xmlCharEncoding encoding = XML_CHAR_ENCODING_NONE;
+    xmlChar *enc_name = NULL;
     saxlocator *locator;
     HRESULT hr;
 
@@ -1777,6 +1779,22 @@ static HRESULT internal_parseBuffer(saxreader *This, const char *buffer, int siz
     if(FAILED(hr))
         return hr;
 
+    if (size >= 4)
+    {
+        const unsigned char *buff = (unsigned char*)buffer;
+
+        encoding = xmlDetectCharEncoding((xmlChar*)buffer, 4);
+        enc_name = (xmlChar*)xmlGetCharEncodingName(encoding);
+        TRACE("detected encoding: %s\n", enc_name);
+        /* skip BOM, parser won't switch encodings and so won't skip it on its own */
+        if ((encoding == XML_CHAR_ENCODING_UTF8) &&
+            buff[0] == 0xEF && buff[1] == 0xBB && buff[2] == 0xBF)
+        {
+            buffer += 3;
+            size -= 3;
+        }
+    }
+
     locator->pParserCtxt = xmlCreateMemoryParserCtxt(buffer, size);
     if(!locator->pParserCtxt)
     {
@@ -1784,6 +1802,9 @@ static HRESULT internal_parseBuffer(saxreader *This, const char *buffer, int siz
         return E_FAIL;
     }
 
+    if (encoding == XML_CHAR_ENCODING_UTF8)
+        locator->pParserCtxt->encoding = xmlStrdup(enc_name);
+
     xmlFree(locator->pParserCtxt->sax);
     locator->pParserCtxt->sax = &locator->saxreader->sax;
     locator->pParserCtxt->userData = locator;
-- 
1.5.6.5


--------------040503090105040000080507--



More information about the wine-patches mailing list