Nikolay Sivov : xmllite: Implement initial encoding switching.
Alexandre Julliard
julliard at winehq.org
Mon Nov 26 15:19:20 CST 2012
Module: wine
Branch: master
Commit: eddd7fcf29994645e4047fe9738109defa78100d
URL: http://source.winehq.org/git/wine.git/?a=commit;h=eddd7fcf29994645e4047fe9738109defa78100d
Author: Nikolay Sivov <nsivov at codeweavers.com>
Date: Sun Nov 25 00:10:46 2012 -0500
xmllite: Implement initial encoding switching.
---
dlls/xmllite/reader.c | 127 +++++++++++++++++++++++++++++++++++--------
dlls/xmllite/tests/reader.c | 2 +-
2 files changed, 106 insertions(+), 23 deletions(-)
diff --git a/dlls/xmllite/reader.c b/dlls/xmllite/reader.c
index 8ad78d8..b0ceac4 100644
--- a/dlls/xmllite/reader.c
+++ b/dlls/xmllite/reader.c
@@ -61,6 +61,7 @@ static const struct xml_encoding_data xml_encoding_map[] = {
typedef struct
{
char *data;
+ char *cur;
unsigned int allocated;
unsigned int written;
} encoded_buffer;
@@ -187,6 +188,7 @@ static HRESULT init_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer
if (!buffer->data) return E_OUTOFMEMORY;
memset(buffer->data, 0, 4);
+ buffer->cur = buffer->data;
buffer->allocated = initial_len;
buffer->written = 0;
@@ -198,18 +200,15 @@ static void free_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
readerinput_free(input, buffer->data);
}
-static HRESULT get_code_page(xml_encoding encoding, xmlreaderinput *input)
+static HRESULT get_code_page(xml_encoding encoding, UINT *cp)
{
- const struct xml_encoding_data *data;
-
if (encoding == XmlEncoding_Unknown)
{
FIXME("unsupported encoding %d\n", encoding);
return E_NOTIMPL;
}
- data = &xml_encoding_map[encoding];
- input->buffer->code_page = data->cp;
+ *cp = xml_encoding_map[encoding].cp;
return S_OK;
}
@@ -324,31 +323,112 @@ static HRESULT readerinput_growraw(xmlreaderinput *readerinput)
return hr;
}
-static xml_encoding readerinput_detectencoding(xmlreaderinput *readerinput)
+/* grows UTF-16 buffer so it has at least 'length' bytes free on return */
+static void readerinput_grow(xmlreaderinput *readerinput, int length)
+{
+ encoded_buffer *buffer = &readerinput->buffer->utf16;
+
+ /* grow if needed, plus 4 bytes to be sure null terminator will fit in */
+ if (buffer->allocated < buffer->written + length + 4)
+ {
+ int grown_size = max(2*buffer->allocated, buffer->allocated + length);
+ buffer->data = readerinput_realloc(readerinput, buffer->data, grown_size);
+ buffer->allocated = grown_size;
+ }
+}
+
+static HRESULT readerinput_detectencoding(xmlreaderinput *readerinput, xml_encoding *enc)
{
encoded_buffer *buffer = &readerinput->buffer->encoded;
+ static char startA[] = {'<','?','x','m'};
+ static WCHAR startW[] = {'<','?'};
+ static char utf8bom[] = {0xef,0xbb,0xbf};
+ static char utf16lebom[] = {0xff,0xfe};
+
+ *enc = XmlEncoding_Unknown;
+
+ if (buffer->written <= 3) return MX_E_INPUTEND;
/* try start symbols if we have enough data to do that, input buffer should contain
first chunk already */
- if (buffer->written >= 4)
+ if (!memcmp(buffer->data, startA, sizeof(startA)))
+ *enc = XmlEncoding_UTF8;
+ else if (!memcmp(buffer->data, startW, sizeof(startW)))
+ *enc = XmlEncoding_UTF16;
+ /* try with BOM now */
+ else if (!memcmp(buffer->data, utf8bom, sizeof(utf8bom)))
{
- static char startA[] = {'<','?','x','m'};
- static WCHAR startW[] = {'<','?'};
-
- if (!memcmp(buffer->data, startA, sizeof(startA))) return XmlEncoding_UTF8;
- if (!memcmp(buffer->data, startW, sizeof(startW))) return XmlEncoding_UTF16;
+ buffer->cur += sizeof(utf8bom);
+ *enc = XmlEncoding_UTF8;
+ }
+ else if (!memcmp(buffer->data, utf16lebom, sizeof(utf16lebom)))
+ {
+ buffer->cur += sizeof(utf16lebom);
+ *enc = XmlEncoding_UTF16;
}
- /* try with BOM now */
- if (buffer->written >= 3)
+ return S_OK;
+}
+
+static int readerinput_get_utf8_convlen(xmlreaderinput *readerinput)
+{
+ encoded_buffer *buffer = &readerinput->buffer->encoded;
+ int len = buffer->written;
+
+ /* complete single byte char */
+ if (!(buffer->data[len-1] & 0x80)) return len;
+
+ /* find start byte of multibyte char */
+ while (--len && !(buffer->data[len] & 0xc0))
+ ;
+
+ return len;
+}
+
+/* returns byte length of complete char sequence for specified code page, */
+static int readerinput_get_convlen(xmlreaderinput *readerinput, UINT cp)
+{
+ encoded_buffer *buffer = &readerinput->buffer->encoded;
+ int len = buffer->written;
+
+ if (cp == CP_UTF8)
+ len = readerinput_get_utf8_convlen(readerinput);
+ else
+ len = buffer->written;
+
+ return len - (buffer->cur - buffer->data);
+}
+
+/* note that raw buffer content is kept */
+static void readerinput_switchencoding(xmlreaderinput *readerinput, xml_encoding enc)
+{
+ encoded_buffer *src = &readerinput->buffer->encoded;
+ encoded_buffer *dest = &readerinput->buffer->utf16;
+ int len, dest_len;
+ HRESULT hr;
+ UINT cp;
+
+ hr = get_code_page(enc, &cp);
+ if (FAILED(hr)) return;
+
+ len = readerinput_get_convlen(readerinput, cp);
+
+ TRACE("switching to cp %d\n", cp);
+
+ /* just copy in this case */
+ if (enc == XmlEncoding_UTF16)
{
- static char utf8bom[] = {0xef,0xbb,0xbf};
- static char utf16lebom[] = {0xff,0xfe};
- if (!memcmp(buffer->data, utf8bom, sizeof(utf8bom))) return XmlEncoding_UTF8;
- if (!memcmp(buffer->data, utf16lebom, sizeof(utf16lebom))) return XmlEncoding_UTF16;
+ readerinput_grow(readerinput, len);
+ memcpy(dest->data, src->cur, len);
+ readerinput->buffer->code_page = cp;
+ return;
}
- return XmlEncoding_Unknown;
+ dest_len = MultiByteToWideChar(cp, 0, src->cur, len, NULL, 0);
+ readerinput_grow(readerinput, dest_len);
+ MultiByteToWideChar(cp, 0, src->cur, len, (WCHAR*)dest->data, dest_len);
+ dest->data[dest_len] = 0;
+ readerinput->buffer->code_page = cp;
}
static HRESULT WINAPI xmlreader_QueryInterface(IXmlReader *iface, REFIID riid, void** ppvObject)
@@ -505,9 +585,12 @@ static HRESULT WINAPI xmlreader_Read(IXmlReader* iface, XmlNodeType *node_type)
if (FAILED(hr)) return hr;
/* try to detect encoding by BOM or data and set input code page */
- enc = readerinput_detectencoding(This->input);
- TRACE("detected encoding %d\n", enc);
- get_code_page(enc, This->input);
+ hr = readerinput_detectencoding(This->input, &enc);
+ TRACE("detected encoding %d, 0x%08x\n", enc, hr);
+ if (FAILED(hr)) return hr;
+
+ /* always switch first time cause we have to put something in */
+ readerinput_switchencoding(This->input, enc);
}
return E_NOTIMPL;
diff --git a/dlls/xmllite/tests/reader.c b/dlls/xmllite/tests/reader.c
index 7d77fe9..729d134 100644
--- a/dlls/xmllite/tests/reader.c
+++ b/dlls/xmllite/tests/reader.c
@@ -52,7 +52,7 @@ static const char *debugstr_guid(REFIID riid)
return buf;
}
-static const char xmldecl_full[] = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n";
+static const char xmldecl_full[] = "\xef\xbb\xbf<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n";
static IStream *create_stream_on_data(const char *data, int size)
{
More information about the wine-cvs
mailing list