Aric Stewart : usp10: Decode surrogate pairs in get_char_script and handle ranges beyond the BMP .
Alexandre Julliard
julliard at winehq.org
Mon Dec 19 13:39:22 CST 2011
Module: wine
Branch: master
Commit: 0404d68999c6daa1a28e10300664d3314fa03c44
URL: http://source.winehq.org/git/wine.git/?a=commit;h=0404d68999c6daa1a28e10300664d3314fa03c44
Author: Aric Stewart <aric at codeweavers.com>
Date: Fri Dec 16 13:15:43 2011 -0600
usp10: Decode surrogate pairs in get_char_script and handle ranges beyond the BMP.
---
dlls/usp10/usp10.c | 53 ++++++++++++++++++++++++++++++++++++++++++---------
1 files changed, 43 insertions(+), 10 deletions(-)
diff --git a/dlls/usp10/usp10.c b/dlls/usp10/usp10.c
index a0e143e..5f1ac5c 100644
--- a/dlls/usp10/usp10.c
+++ b/dlls/usp10/usp10.c
@@ -44,8 +44,8 @@ WINE_DEFAULT_DEBUG_CHANNEL(uniscribe);
typedef struct _scriptRange
{
WORD script;
- WORD rangeFirst;
- WORD rangeLast;
+ DWORD rangeFirst;
+ DWORD rangeLast;
WORD numericScript;
WORD punctScript;
} scriptRange;
@@ -774,24 +774,38 @@ static WCHAR mirror_char( WCHAR ch )
return ch + wine_mirror_map[wine_mirror_map[ch >> 8] + (ch & 0xff)];
}
-static WORD get_char_script( WCHAR ch)
+static inline DWORD decode_surrogate_pair(LPCWSTR str, INT index, INT end)
+{
+ if (index < end-1 && IS_SURROGATE_PAIR(str[index],str[index+1]))
+ {
+ DWORD ch = 0x10000 + ((str[index] - 0xd800) << 10) + (str[index+1] - 0xdc00);
+ TRACE("Surrogate Pair %x %x => %x\n",str[index], str[index+1], ch);
+ return ch;
+ }
+ return 0;
+}
+
+static WORD get_char_script( LPCWSTR str, INT index, INT end, INT *consumed)
{
static const WCHAR latin_punc[] = {'#','$','&','\'',',',';','<','>','?','@','\\','^','_','`','{','|','}','~', 0x00a0, 0};
WORD type = 0;
+ DWORD ch;
int i;
- if (ch == 0xc || ch == 0x20 || ch == 0x202f)
+ *consumed = 1;
+
+ if (str[index] == 0xc || str[index] == 0x20 || str[index] == 0x202f)
return Script_CR;
/* These punctuation are separated out as Latin punctuation */
- if (strchrW(latin_punc,ch))
+ if (strchrW(latin_punc,str[index]))
return Script_Punctuation2;
/* These chars are itemized as Punctuation by Windows */
- if (ch == 0x2212 || ch == 0x2044)
+ if (str[index] == 0x2212 || str[index] == 0x2044)
return Script_Punctuation;
- GetStringTypeW(CT_CTYPE1, &ch, 1, &type);
+ GetStringTypeW(CT_CTYPE1, &str[index], 1, &type);
if (type == 0)
return SCRIPT_UNDEFINED;
@@ -799,6 +813,12 @@ static WORD get_char_script( WCHAR ch)
if (type & C1_CNTRL)
return Script_Control;
+ ch = decode_surrogate_pair(str, index, end);
+ if (ch)
+ *consumed = 2;
+ else
+ ch = str[index];
+
i = 0;
do
{
@@ -1114,6 +1134,7 @@ HRESULT WINAPI ScriptItemizeOpenType(const WCHAR *pwcInChars, int cInChars, int
WORD last_indic = -1;
WORD layoutRTL = 0;
BOOL forceLevels = FALSE;
+ INT consumed = 0;
TRACE("%s,%d,%d,%p,%p,%p,%p\n", debugstr_wn(pwcInChars, cInChars), cInChars, cMaxItems,
psControl, psState, pItems, pcItems);
@@ -1127,7 +1148,16 @@ HRESULT WINAPI ScriptItemizeOpenType(const WCHAR *pwcInChars, int cInChars, int
for (i = 0; i < cInChars; i++)
{
- scripts[i] = get_char_script(pwcInChars[i]);
+ if (consumed <= 0)
+ {
+ scripts[i] = get_char_script(pwcInChars,i,cInChars,&consumed);
+ consumed --;
+ }
+ else
+ {
+ scripts[i] = scripts[i-1];
+ consumed --;
+ }
/* Devanagari danda (U+0964) and double danda (U+0965) are used for
all Indic scripts */
if ((pwcInChars[i] == 0x964 || pwcInChars[i] ==0x965) && last_indic > 0)
@@ -2543,17 +2573,20 @@ HRESULT WINAPI ScriptBreak(const WCHAR *chars, int count, const SCRIPT_ANALYSIS
HRESULT WINAPI ScriptIsComplex(const WCHAR *chars, int len, DWORD flag)
{
int i;
+ INT consumed = 0;
TRACE("(%s,%d,0x%x)\n", debugstr_wn(chars, len), len, flag);
- for (i = 0; i < len; i++)
+ for (i = 0; i < len; i+=consumed)
{
int script;
+ if (i >= len)
+ break;
if ((flag & SIC_ASCIIDIGIT) && chars[i] >= 0x30 && chars[i] <= 0x39)
return S_OK;
- script = get_char_script(chars[i]);
+ script = get_char_script(chars,i,len, &consumed);
if ((scriptInformation[script].props.fComplex && (flag & SIC_COMPLEX))||
(!scriptInformation[script].props.fComplex && (flag & SIC_NEUTRAL)))
return S_OK;
More information about the wine-cvs
mailing list