Andrew Eikum : riched20: Continue interpreting data as UTF-8 after the first chunk boundary.

Alexandre Julliard julliard at winehq.org
Wed Oct 2 15:49:12 CDT 2013


Module: wine
Branch: master
Commit: 360afb93b57460ed113607a970504de1f86651e9
URL:    http://source.winehq.org/git/wine.git/?a=commit;h=360afb93b57460ed113607a970504de1f86651e9

Author: Andrew Eikum <aeikum at codeweavers.com>
Date:   Tue Oct  1 12:58:53 2013 -0500

riched20: Continue interpreting data as UTF-8 after the first chunk boundary.

---

 dlls/riched20/editor.c       |   56 +++++++++++++++++++++++++++++++++++++++--
 dlls/riched20/tests/editor.c |   39 +++++++++++++++++++++++++++++
 2 files changed, 92 insertions(+), 3 deletions(-)

diff --git a/dlls/riched20/editor.c b/dlls/riched20/editor.c
index 9af8a23..42f5770 100644
--- a/dlls/riched20/editor.c
+++ b/dlls/riched20/editor.c
@@ -286,6 +286,9 @@ static LRESULT ME_StreamInText(ME_TextEditor *editor, DWORD dwFormat, ME_InStrea
   WCHAR *pText;
   LRESULT total_bytes_read = 0;
   BOOL is_read = FALSE;
+  DWORD cp = CP_ACP, copy = 0;
+  char conv_buf[4 + STREAMIN_BUFFER_SIZE]; /* up to 4 additional UTF-8 bytes */
+
   static const char bom_utf8[] = {0xEF, 0xBB, 0xBF};
 
   TRACE("%08x %p\n", dwFormat, stream);
@@ -307,8 +310,7 @@ static LRESULT ME_StreamInText(ME_TextEditor *editor, DWORD dwFormat, ME_InStrea
     if (!(dwFormat & SF_UNICODE))
     {
       char * buf = stream->buffer;
-      DWORD size = stream->dwSize;
-      DWORD cp = CP_ACP;
+      DWORD size = stream->dwSize, end;
 
       if (!is_read)
       {
@@ -321,8 +323,56 @@ static LRESULT ME_StreamInText(ME_TextEditor *editor, DWORD dwFormat, ME_InStrea
         }
       }
 
-      nWideChars = MultiByteToWideChar(cp, 0, buf, size, wszText, STREAMIN_BUFFER_SIZE);
+      if (cp == CP_UTF8)
+      {
+        if (copy)
+        {
+          memcpy(conv_buf + copy, buf, size);
+          buf = conv_buf;
+          size += copy;
+        }
+        end = size;
+        while ((buf[end-1] & 0xC0) == 0x80)
+        {
+          --end;
+          --total_bytes_read; /* strange, but seems to match windows */
+        }
+        if (buf[end-1] & 0x80)
+        {
+          DWORD need = 0;
+          if ((buf[end-1] & 0xE0) == 0xC0)
+            need = 1;
+          if ((buf[end-1] & 0xF0) == 0xE0)
+            need = 2;
+          if ((buf[end-1] & 0xF8) == 0xF0)
+            need = 3;
+
+          if (size - end >= need)
+          {
+            /* we have enough bytes for this sequence */
+            end = size;
+          }
+          else
+          {
+            /* need more bytes, so don't transcode this sequence */
+            --end;
+          }
+        }
+      }
+      else
+        end = size;
+
+      nWideChars = MultiByteToWideChar(cp, 0, buf, end, wszText, STREAMIN_BUFFER_SIZE);
       pText = wszText;
+
+      if (cp == CP_UTF8)
+      {
+        if (end != size)
+        {
+          memcpy(conv_buf, buf + end, size - end);
+          copy = size - end;
+        }
+      }
     }
     else
     {
diff --git a/dlls/riched20/tests/editor.c b/dlls/riched20/tests/editor.c
index 15e1132..3c2e471 100644
--- a/dlls/riched20/tests/editor.c
+++ b/dlls/riched20/tests/editor.c
@@ -5035,6 +5035,29 @@ static DWORD CALLBACK test_EM_STREAMIN_esCallback(DWORD_PTR dwCookie,
   return 0;
 }
 
+static DWORD CALLBACK test_EM_STREAMIN_esCallback_UTF8Split(DWORD_PTR dwCookie,
+                                         LPBYTE pbBuff,
+                                         LONG cb,
+                                         LONG *pcb)
+{
+    DWORD *phase = (DWORD *)dwCookie;
+
+    if(*phase == 0){
+        static const char first[] = "\xef\xbb\xbf\xc3\x96\xc3";
+        *pcb = sizeof(first) - 1;
+        memcpy(pbBuff, first, *pcb);
+    }else if(*phase == 1){
+        static const char second[] = "\x8f\xc3\x8b";
+        *pcb = sizeof(second) - 1;
+        memcpy(pbBuff, second, *pcb);
+    }else
+        *pcb = 0;
+
+    ++*phase;
+
+    return 0;
+}
+
 struct StringWithLength {
     int length;
     char *buffer;
@@ -5063,6 +5086,7 @@ static DWORD CALLBACK test_EM_STREAMIN_esCallback2(DWORD_PTR dwCookie,
 static void test_EM_STREAMIN(void)
 {
   HWND hwndRichEdit = new_richedit(NULL);
+  DWORD phase;
   LRESULT result;
   EDITSTREAM es;
   char buffer[1024] = {0};
@@ -5204,6 +5228,21 @@ static void test_EM_STREAMIN(void)
       "EM_STREAMIN: Test UTF8WithBOM set wrong text: Result: %s\n",buffer);
   ok(es.dwError == 0, "EM_STREAMIN: Test UTF8WithBOM set error %d, expected %d\n", es.dwError, 0);
 
+  phase = 0;
+  es.dwCookie = (DWORD_PTR)&phase;
+  es.dwError = 0;
+  es.pfnCallback = test_EM_STREAMIN_esCallback_UTF8Split;
+  result = SendMessage(hwndRichEdit, EM_STREAMIN, SF_TEXT, (LPARAM)&es);
+  ok(result == 8, "got %ld\n", result);
+
+  result = SendMessage(hwndRichEdit, WM_GETTEXT, 1024, (LPARAM) buffer);
+  ok(result  == 3,
+      "EM_STREAMIN: Test UTF8Split returned %ld\n", result);
+  result = memcmp (buffer,"\xd6\xcf\xcb", 3);
+  ok(result  == 0,
+      "EM_STREAMIN: Test UTF8Split set wrong text: Result: %s\n",buffer);
+  ok(es.dwError == 0, "EM_STREAMIN: Test UTF8Split set error %d, expected %d\n", es.dwError, 0);
+
   es.dwCookie = (DWORD_PTR)&cookieForStream4;
   es.dwError = 0;
   es.pfnCallback = test_EM_STREAMIN_esCallback2;




More information about the wine-cvs mailing list