Alexandre Julliard : libwine: Add option for Unicode composition in utf-8 - > wchar conversion.
Alexandre Julliard
julliard at winehq.org
Fri Feb 15 05:36:11 CST 2008
Module: wine
Branch: master
Commit: 86ff825f41572f9d74b899810aa1b613287daeaa
URL: http://source.winehq.org/git/wine.git/?a=commit;h=86ff825f41572f9d74b899810aa1b613287daeaa
Author: Alexandre Julliard <julliard at winehq.org>
Date: Fri Feb 15 12:06:58 2008 +0100
libwine: Add option for Unicode composition in utf-8 -> wchar conversion.
---
libs/wine/utf8.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++++
libs/wine/wctomb.c | 2 +-
2 files changed, 93 insertions(+), 1 deletions(-)
diff --git a/libs/wine/utf8.c b/libs/wine/utf8.c
index fcb4e23..c95bc51 100644
--- a/libs/wine/utf8.c
+++ b/libs/wine/utf8.c
@@ -22,6 +22,8 @@
#include "wine/unicode.h"
+extern WCHAR compose( const WCHAR *str );
+
/* number of following bytes in sequence based on first byte value (for bytes above 0x7f) */
static const char utf8_length[128] =
{
@@ -183,6 +185,94 @@ static inline unsigned int decode_utf8_char( unsigned char ch, const char **str,
return ~0;
}
+/* query necessary dst length for src string with composition */
+static inline int get_length_mbs_utf8_compose( int flags, const char *src, int srclen )
+{
+ int ret = 0;
+ unsigned int res;
+ WCHAR composed[2];
+ const char *srcend = src + srclen;
+
+ composed[0] = 0;
+ while (src < srcend)
+ {
+ unsigned char ch = *src++;
+ if (ch < 0x80) /* special fast case for 7-bit ASCII */
+ {
+ composed[0] = ch;
+ ret++;
+ continue;
+ }
+ if ((res = decode_utf8_char( ch, &src, srcend )) <= 0xffff)
+ {
+ if (composed[0])
+ {
+ composed[1] = res;
+ if ((composed[0] = compose( composed ))) continue;
+ }
+ composed[0] = res;
+ ret++;
+ }
+ else if (res <= 0x10ffff)
+ {
+ ret += 2;
+ composed[0] = 0; /* no composition for surrogates */
+ }
+ else if (flags & MB_ERR_INVALID_CHARS) return -2; /* bad char */
+ /* otherwise ignore it */
+ }
+ return ret;
+}
+
+/* UTF-8 to wide char string conversion with composition */
+/* return -1 on dst buffer overflow, -2 on invalid input char */
+static int utf8_mbstowcs_compose( int flags, const char *src, int srclen, WCHAR *dst, int dstlen )
+{
+ unsigned int res;
+ const char *srcend = src + srclen;
+ WCHAR composed[2];
+ WCHAR *dstend = dst + dstlen;
+
+ if (!dstlen) return get_length_mbs_utf8_compose( flags, src, srclen );
+
+ composed[0] = 0;
+ while (src < srcend)
+ {
+ unsigned char ch = *src++;
+ if (ch < 0x80) /* special fast case for 7-bit ASCII */
+ {
+ if (dst >= dstend) return -1; /* overflow */
+ *dst++ = composed[0] = ch;
+ continue;
+ }
+ if ((res = decode_utf8_char( ch, &src, srcend )) <= 0xffff)
+ {
+ if (composed[0])
+ {
+ composed[1] = res;
+ if ((composed[0] = compose( composed )))
+ {
+ dst[-1] = composed[0];
+ continue;
+ }
+ }
+ if (dst >= dstend) return -1; /* overflow */
+ *dst++ = composed[0] = res;
+ }
+ else if (res <= 0x10ffff) /* we need surrogates */
+ {
+ if (dst >= dstend - 1) return -1; /* overflow */
+ res -= 0x10000;
+ *dst++ = 0xd800 | (res >> 10);
+ *dst++ = 0xdc00 | (res & 0x3ff);
+ composed[0] = 0; /* no composition for surrogates */
+ }
+ else if (flags & MB_ERR_INVALID_CHARS) return -2; /* bad char */
+ /* otherwise ignore it */
+ }
+ return dstlen - (dstend - dst);
+}
+
/* query necessary dst length for src string */
static inline int get_length_mbs_utf8( int flags, const char *src, int srclen )
{
@@ -217,6 +307,8 @@ int wine_utf8_mbstowcs( int flags, const char *src, int srclen, WCHAR *dst, int
const char *srcend = src + srclen;
WCHAR *dstend = dst + dstlen;
+ if (flags & MB_COMPOSITE) return utf8_mbstowcs_compose( flags, src, srclen, dst, dstlen );
+
if (!dstlen) return get_length_mbs_utf8( flags, src, srclen );
while ((dst < dstend) && (src < srcend))
diff --git a/libs/wine/wctomb.c b/libs/wine/wctomb.c
index 5ad027a..966e6af 100644
--- a/libs/wine/wctomb.c
+++ b/libs/wine/wctomb.c
@@ -45,7 +45,7 @@ static inline int binary_search( WCHAR ch, int low, int high )
}
/* return the result of the composition of two Unicode chars, or 0 if none */
-static WCHAR compose( const WCHAR *str )
+WCHAR compose( const WCHAR *str )
{
extern const WCHAR unicode_compose_table[];
extern const unsigned int unicode_compose_table_size;
More information about the wine-cvs
mailing list