Aric Stewart : usp10: Decode surrogate pairs in get_char_script and handle ranges beyond the BMP .

Alexandre Julliard julliard at winehq.org
Mon Dec 19 13:39:22 CST 2011


Module: wine
Branch: master
Commit: 0404d68999c6daa1a28e10300664d3314fa03c44
URL:    http://source.winehq.org/git/wine.git/?a=commit;h=0404d68999c6daa1a28e10300664d3314fa03c44

Author: Aric Stewart <aric at codeweavers.com>
Date:   Fri Dec 16 13:15:43 2011 -0600

usp10: Decode surrogate pairs in get_char_script and handle ranges beyond the BMP.

---

 dlls/usp10/usp10.c |   53 ++++++++++++++++++++++++++++++++++++++++++---------
 1 files changed, 43 insertions(+), 10 deletions(-)

diff --git a/dlls/usp10/usp10.c b/dlls/usp10/usp10.c
index a0e143e..5f1ac5c 100644
--- a/dlls/usp10/usp10.c
+++ b/dlls/usp10/usp10.c
@@ -44,8 +44,8 @@ WINE_DEFAULT_DEBUG_CHANNEL(uniscribe);
 typedef struct _scriptRange
 {
     WORD script;
-    WORD rangeFirst;
-    WORD rangeLast;
+    DWORD rangeFirst;
+    DWORD rangeLast;
     WORD numericScript;
     WORD punctScript;
 } scriptRange;
@@ -774,24 +774,38 @@ static WCHAR mirror_char( WCHAR ch )
     return ch + wine_mirror_map[wine_mirror_map[ch >> 8] + (ch & 0xff)];
 }
 
-static WORD get_char_script( WCHAR ch)
+static inline DWORD decode_surrogate_pair(LPCWSTR str, INT index, INT end)
+{
+    if (index < end-1 && IS_SURROGATE_PAIR(str[index],str[index+1]))
+    {
+        DWORD ch = 0x10000 + ((str[index] - 0xd800) << 10) + (str[index+1] - 0xdc00);
+        TRACE("Surrogate Pair %x %x => %x\n",str[index], str[index+1], ch);
+        return ch;
+    }
+    return 0;
+}
+
+static WORD get_char_script( LPCWSTR str, INT index, INT end, INT *consumed)
 {
     static const WCHAR latin_punc[] = {'#','$','&','\'',',',';','<','>','?','@','\\','^','_','`','{','|','}','~', 0x00a0, 0};
     WORD type = 0;
+    DWORD ch;
     int i;
 
-    if (ch == 0xc || ch == 0x20 || ch == 0x202f)
+    *consumed = 1;
+
+    if (str[index] == 0xc || str[index] == 0x20 || str[index] == 0x202f)
         return Script_CR;
 
     /* These punctuation are separated out as Latin punctuation */
-    if (strchrW(latin_punc,ch))
+    if (strchrW(latin_punc,str[index]))
         return Script_Punctuation2;
 
     /* These chars are itemized as Punctuation by Windows */
-    if (ch == 0x2212 || ch == 0x2044)
+    if (str[index] == 0x2212 || str[index] == 0x2044)
         return Script_Punctuation;
 
-    GetStringTypeW(CT_CTYPE1, &ch, 1, &type);
+    GetStringTypeW(CT_CTYPE1, &str[index], 1, &type);
 
     if (type == 0)
         return SCRIPT_UNDEFINED;
@@ -799,6 +813,12 @@ static WORD get_char_script( WCHAR ch)
     if (type & C1_CNTRL)
         return Script_Control;
 
+    ch = decode_surrogate_pair(str, index, end);
+    if (ch)
+        *consumed = 2;
+    else
+        ch = str[index];
+
     i = 0;
     do
     {
@@ -1114,6 +1134,7 @@ HRESULT WINAPI ScriptItemizeOpenType(const WCHAR *pwcInChars, int cInChars, int
     WORD  last_indic = -1;
     WORD layoutRTL = 0;
     BOOL forceLevels = FALSE;
+    INT consumed = 0;
 
     TRACE("%s,%d,%d,%p,%p,%p,%p\n", debugstr_wn(pwcInChars, cInChars), cInChars, cMaxItems, 
           psControl, psState, pItems, pcItems);
@@ -1127,7 +1148,16 @@ HRESULT WINAPI ScriptItemizeOpenType(const WCHAR *pwcInChars, int cInChars, int
 
     for (i = 0; i < cInChars; i++)
     {
-        scripts[i] = get_char_script(pwcInChars[i]);
+        if (consumed <= 0)
+        {
+            scripts[i] = get_char_script(pwcInChars,i,cInChars,&consumed);
+            consumed --;
+        }
+        else
+        {
+            scripts[i] = scripts[i-1];
+            consumed --;
+        }
         /* Devanagari danda (U+0964) and double danda (U+0965) are used for
            all Indic scripts */
         if ((pwcInChars[i] == 0x964 || pwcInChars[i] ==0x965) && last_indic > 0)
@@ -2543,17 +2573,20 @@ HRESULT WINAPI ScriptBreak(const WCHAR *chars, int count, const SCRIPT_ANALYSIS
 HRESULT WINAPI ScriptIsComplex(const WCHAR *chars, int len, DWORD flag)
 {
     int i;
+    INT consumed = 0;
 
     TRACE("(%s,%d,0x%x)\n", debugstr_wn(chars, len), len, flag);
 
-    for (i = 0; i < len; i++)
+    for (i = 0; i < len; i+=consumed)
     {
         int script;
+        if (i >= len)
+            break;
 
         if ((flag & SIC_ASCIIDIGIT) && chars[i] >= 0x30 && chars[i] <= 0x39)
             return S_OK;
 
-        script = get_char_script(chars[i]);
+        script = get_char_script(chars,i,len, &consumed);
         if ((scriptInformation[script].props.fComplex && (flag & SIC_COMPLEX))||
             (!scriptInformation[script].props.fComplex && (flag & SIC_NEUTRAL)))
             return S_OK;




More information about the wine-cvs mailing list