Nikolay Sivov : xmllite: Initial support for reader input encoding detection.
Alexandre Julliard
julliard at winehq.org
Tue Nov 20 13:52:22 CST 2012
Module: wine
Branch: master
Commit: d693790175ac8c8d1b2ac88ac2026e9a10f3102d
URL: http://source.winehq.org/git/wine.git/?a=commit;h=d693790175ac8c8d1b2ac88ac2026e9a10f3102d
Author: Nikolay Sivov <nsivov at codeweavers.com>
Date: Sun Nov 18 18:01:46 2012 -0500
xmllite: Initial support for reader input encoding detection.
---
dlls/xmllite/reader.c | 123 ++++++++++++++++++++++++++++++++--------
dlls/xmllite/tests/reader.c | 11 +++-
dlls/xmllite/xmllite_private.h | 5 ++
3 files changed, 115 insertions(+), 24 deletions(-)
diff --git a/dlls/xmllite/reader.c b/dlls/xmllite/reader.c
index ae05057..188b419 100644
--- a/dlls/xmllite/reader.c
+++ b/dlls/xmllite/reader.c
@@ -117,6 +117,14 @@ static inline void *m_alloc(IMalloc *imalloc, size_t len)
return heap_alloc(len);
}
+static inline void *m_realloc(IMalloc *imalloc, void *mem, size_t len)
+{
+ if (imalloc)
+ return IMalloc_Realloc(imalloc, mem, len);
+ else
+ return heap_realloc(mem, len);
+}
+
static inline void m_free(IMalloc *imalloc, void *mem)
{
if (imalloc)
@@ -142,6 +150,11 @@ static inline void *readerinput_alloc(xmlreaderinput *input, size_t len)
return m_alloc(input->imalloc, len);
}
+static inline void *readerinput_realloc(xmlreaderinput *input, void *mem, size_t len)
+{
+ return m_realloc(input->imalloc, mem, len);
+}
+
static inline void readerinput_free(xmlreaderinput *input, void *mem)
{
return m_free(input->imalloc, mem);
@@ -165,7 +178,7 @@ static void free_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
readerinput_free(input, buffer->data);
}
-static HRESULT get_code_page(xml_encoding encoding, UINT *cp)
+static HRESULT get_code_page(xml_encoding encoding, xmlreaderinput *input)
{
const struct xml_encoding_data *data;
@@ -176,12 +189,12 @@ static HRESULT get_code_page(xml_encoding encoding, UINT *cp)
}
data = &xml_encoding_map[encoding];
- *cp = data->cp;
+ input->buffer->code_page = data->cp;
return S_OK;
}
-static HRESULT alloc_input_buffer(xmlreaderinput *input, xml_encoding encoding)
+static HRESULT alloc_input_buffer(xmlreaderinput *input)
{
input_buffer *buffer;
HRESULT hr;
@@ -192,29 +205,20 @@ static HRESULT alloc_input_buffer(xmlreaderinput *input, xml_encoding encoding)
if (!buffer) return E_OUTOFMEMORY;
buffer->input = input;
- hr = get_code_page(encoding, &buffer->code_page);
+ buffer->code_page = ~0; /* code page is unknown at this point */
+ hr = init_encoded_buffer(input, &buffer->utf16);
if (hr != S_OK) {
readerinput_free(input, buffer);
return hr;
}
- hr = init_encoded_buffer(input, &buffer->utf16);
+ hr = init_encoded_buffer(input, &buffer->encoded);
if (hr != S_OK) {
+ free_encoded_buffer(input, &buffer->utf16);
readerinput_free(input, buffer);
return hr;
}
- if (encoding != XmlEncoding_UTF16) {
- hr = init_encoded_buffer(input, &buffer->encoded);
- if (hr != S_OK) {
- free_encoded_buffer(input, &buffer->utf16);
- readerinput_free(input, buffer);
- return hr;
- }
- }
- else
- memset(&buffer->encoded, 0, sizeof(buffer->encoded));
-
input->buffer = buffer;
return S_OK;
}
@@ -226,7 +230,7 @@ static void free_input_buffer(input_buffer *buffer)
readerinput_free(buffer->input, buffer);
}
-static void xmlreaderinput_release_stream(xmlreaderinput *readerinput)
+static void readerinput_release_stream(xmlreaderinput *readerinput)
{
if (readerinput->stream) {
ISequentialStream_Release(readerinput->stream);
@@ -236,11 +240,11 @@ static void xmlreaderinput_release_stream(xmlreaderinput *readerinput)
/* Queries already stored interface for IStream/ISequentialStream.
Interface supplied on creation will be overwritten */
-static HRESULT xmlreaderinput_query_for_stream(xmlreaderinput *readerinput)
+static HRESULT readerinput_query_for_stream(xmlreaderinput *readerinput)
{
HRESULT hr;
- xmlreaderinput_release_stream(readerinput);
+ readerinput_release_stream(readerinput);
hr = IUnknown_QueryInterface(readerinput->input, &IID_IStream, (void**)&readerinput->stream);
if (hr != S_OK)
hr = IUnknown_QueryInterface(readerinput->input, &IID_ISequentialStream, (void**)&readerinput->stream);
@@ -248,6 +252,59 @@ static HRESULT xmlreaderinput_query_for_stream(xmlreaderinput *readerinput)
return hr;
}
+/* reads a chunk to raw buffer */
+static HRESULT readerinput_growraw(xmlreaderinput *readerinput)
+{
+ encoded_buffer *buffer = &readerinput->buffer->encoded;
+ ULONG len = buffer->allocated - buffer->written, read;
+ HRESULT hr;
+
+ /* always try to get aligned to 4 bytes, so the only case we can get partialy read characters is
+ variable width encodings like UTF-8 */
+ len = (len + 3) & ~3;
+ /* try to use allocated space or grow */
+ if (buffer->allocated - buffer->written < len)
+ {
+ buffer->allocated *= 2;
+ buffer->data = readerinput_realloc(readerinput, buffer->data, buffer->allocated);
+ len = buffer->allocated - buffer->written;
+ }
+
+ hr = ISequentialStream_Read(readerinput->stream, buffer->data + buffer->written, len, &read);
+ if (FAILED(hr)) return hr;
+ TRACE("requested %d, read %d, ret 0x%08x\n", len, read, hr);
+ buffer->written += read;
+
+ return hr;
+}
+
+static xml_encoding readerinput_detectencoding(xmlreaderinput *readerinput)
+{
+ encoded_buffer *buffer = &readerinput->buffer->encoded;
+
+ /* try start symbols if we have enough data to do that, input buffer should contain
+ first chunk already */
+ if (buffer->written >= 4)
+ {
+ static char startA[] = {'<','?','x','m'};
+ static WCHAR startW[] = {'<','?'};
+
+ if (!memcmp(buffer->data, startA, sizeof(startA))) return XmlEncoding_UTF8;
+ if (!memcmp(buffer->data, startW, sizeof(startW))) return XmlEncoding_UTF16;
+ }
+
+ /* try with BOM now */
+ if (buffer->written >= 3)
+ {
+ static char utf8bom[] = {0xef,0xbb,0xbf};
+ static char utf16lebom[] = {0xff,0xfe};
+ if (!memcmp(buffer->data, utf8bom, sizeof(utf8bom))) return XmlEncoding_UTF8;
+ if (!memcmp(buffer->data, utf16lebom, sizeof(utf16lebom))) return XmlEncoding_UTF16;
+ }
+
+ return XmlEncoding_Unknown;
+}
+
static HRESULT WINAPI xmlreader_QueryInterface(IXmlReader *iface, REFIID riid, void** ppvObject)
{
xmlreader *This = impl_from_IXmlReader(iface);
@@ -305,7 +362,7 @@ static HRESULT WINAPI xmlreader_SetInput(IXmlReader* iface, IUnknown *input)
if (This->input)
{
- xmlreaderinput_release_stream(This->input);
+ readerinput_release_stream(This->input);
IUnknown_Release(&This->input->IXmlReaderInput_iface);
This->input = NULL;
}
@@ -333,7 +390,7 @@ static HRESULT WINAPI xmlreader_SetInput(IXmlReader* iface, IUnknown *input)
}
/* set stream for supplied IXmlReaderInput */
- hr = xmlreaderinput_query_for_stream(This->input);
+ hr = readerinput_query_for_stream(This->input);
if (hr == S_OK)
This->state = XmlReadState_Initial;
@@ -386,7 +443,27 @@ static HRESULT WINAPI xmlreader_SetProperty(IXmlReader* iface, UINT property, LO
static HRESULT WINAPI xmlreader_Read(IXmlReader* iface, XmlNodeType *node_type)
{
- FIXME("(%p %p): stub\n", iface, node_type);
+ xmlreader *This = impl_from_IXmlReader(iface);
+
+ FIXME("(%p)->(%p): stub\n", This, node_type);
+
+ if (This->state == XmlReadState_Closed) return S_FALSE;
+
+ /* if it's a first call for a new input we need to detect stream encoding */
+ if (This->state == XmlReadState_Initial)
+ {
+ xml_encoding enc;
+ HRESULT hr;
+
+ hr = readerinput_growraw(This->input);
+ if (FAILED(hr)) return hr;
+
+ /* try to detect encoding by BOM or data and set input code page */
+ enc = readerinput_detectencoding(This->input);
+ TRACE("detected encoding %d\n", enc);
+ get_code_page(enc, This->input);
+ }
+
return E_NOTIMPL;
}
@@ -683,7 +760,7 @@ HRESULT WINAPI CreateXmlReaderInputWithEncodingName(IUnknown *stream,
readerinput->stream = NULL;
if (imalloc) IMalloc_AddRef(imalloc);
- hr = alloc_input_buffer(readerinput, XmlEncoding_UTF16);
+ hr = alloc_input_buffer(readerinput);
if (hr != S_OK)
{
readerinput_free(readerinput, readerinput);
diff --git a/dlls/xmllite/tests/reader.c b/dlls/xmllite/tests/reader.c
index 582ad48..f5b7680 100644
--- a/dlls/xmllite/tests/reader.c
+++ b/dlls/xmllite/tests/reader.c
@@ -551,15 +551,24 @@ static void test_readerinput(void)
static void test_reader_state(void)
{
IXmlReader *reader;
+ XmlNodeType nodetype;
HRESULT hr;
- hr = pCreateXmlReader(&IID_IXmlReader, (LPVOID*)&reader, NULL);
+ hr = pCreateXmlReader(&IID_IXmlReader, (void**)&reader, NULL);
ok(hr == S_OK, "Expected S_OK, got %08x\n", hr);
/* invalid arguments */
hr = IXmlReader_GetProperty(reader, XmlReaderProperty_ReadState, NULL);
ok(hr == E_INVALIDARG, "Expected E_INVALIDARG, got %08x\n", hr);
+ /* attempt to read on closed reader */
+ test_read_state(reader, XmlReadState_Closed, -1, 0);
+if (0)
+{
+ /* newer versions crash here, probably cause no input was set */
+ hr = IXmlReader_Read(reader, &nodetype);
+ ok(hr == S_FALSE, "got %08x\n", hr);
+}
IXmlReader_Release(reader);
}
diff --git a/dlls/xmllite/xmllite_private.h b/dlls/xmllite/xmllite_private.h
index 1677e5c..97993e1 100644
--- a/dlls/xmllite/xmllite_private.h
+++ b/dlls/xmllite/xmllite_private.h
@@ -27,6 +27,11 @@ static inline void *heap_alloc(size_t len)
return HeapAlloc(GetProcessHeap(), 0, len);
}
+static inline void *heap_realloc(void *mem, size_t len)
+{
+ return HeapReAlloc(GetProcessHeap(), 0, mem, len);
+}
+
static inline BOOL heap_free(void *mem)
{
return HeapFree(GetProcessHeap(), 0, mem);
More information about the wine-cvs
mailing list