Handling overlapping buffers in wcstombs_sbcs

François Gouget fgouget at codeweavers.com
Fri Mar 23 02:41:46 CST 2001


   Found while debugging the version info problems (VS_VERSION_INFO got
corrupted by ConvertVersionInfo32To16):

 - look at the code of wcstombcs_sbcs
 - assume dst==src
 - with the loop unrolling that has been done we start converting
src[16]
 - this modifies dst[16] which happens to be part of src[8]
 - when we get to src[8] it has been corrupted and we get garbage

   Actually the documentation specifies that WideCharToMultiByte (called
by ConvertVersionInfo32To16) fails if the source and destination
pointers are the same. But it does not specify what happens if the
buffers merelyt overlap. And of course in the ConvertVersionInfo32To16
case there's a 2 byte difference between them!

   So we should really process the charaters from left to right, this
would allow the source and destination buffers to overlap 'in reasonable
ways'.
   I also had other beefs with the loop unrolling:
 - using dst[15], dst[14], etc means we keep adding constants (loaded
from memory) to that pointer, and that we must still update that pointer
at the end of the loop. This seems less efficient than a *dst++
 - same for src[x] vs. *src and src++
 - the goal of loop unrolling is to reduce the number of jumps. But a
switch statement is nothing more than a calculated jump (or a series
thereof) so by putting the switch in the loop we perform two jumps per
loop. One of them being calculated which is not very good. So I believe
it's better to move the switch outside (besides, I've seen it done this
way somewhere).

   I'm not really convinced by the loop unrolling but I tried to keep it
anyway. I just modified it a bit. If someone's interested in doing
actual performance tests, wants to look at the assembly or knows how
manual loop unrolling must be done, go ahead.


Changelog:

   François Gouget <fgouget at codeweavers.com>

 * unicode/wctomb.c

   wcstombcs_sbcs: Behave more predicatably in case the source and
destination buffers overlap
   Optimize the loop unrolling?


-- 
François Gouget
fgouget at codeweavers.com
-------------- next part --------------
Index: unicode/wctomb.c
===================================================================
RCS file: /home/wine/wine/unicode/wctomb.c,v
retrieving revision 1.5
diff -u -r1.5 wctomb.c
--- unicode/wctomb.c	2000/12/29 03:56:06	1.5
+++ unicode/wctomb.c	2001/03/23 07:46:33
@@ -101,6 +101,7 @@
     const unsigned char  * const uni2cp_low = table->uni2cp_low;
     const unsigned short * const uni2cp_high = table->uni2cp_high;
     int ret = srclen;
+    int srclen16;
 
     if (dstlen < srclen)
     {
@@ -109,34 +110,46 @@
         ret = -1;
     }
 
-    for (;;)
+    for (srclen16=srclen/16;srclen16>0;srclen16--)
     {
-        switch(srclen)
-        {
-        default:
-        case 16: dst[15] = uni2cp_low[uni2cp_high[src[15] >> 8] + (src[15] & 0xff)];
-        case 15: dst[14] = uni2cp_low[uni2cp_high[src[14] >> 8] + (src[14] & 0xff)];
-        case 14: dst[13] = uni2cp_low[uni2cp_high[src[13] >> 8] + (src[13] & 0xff)];
-        case 13: dst[12] = uni2cp_low[uni2cp_high[src[12] >> 8] + (src[12] & 0xff)];
-        case 12: dst[11] = uni2cp_low[uni2cp_high[src[11] >> 8] + (src[11] & 0xff)];
-        case 11: dst[10] = uni2cp_low[uni2cp_high[src[10] >> 8] + (src[10] & 0xff)];
-        case 10: dst[9]  = uni2cp_low[uni2cp_high[src[9]  >> 8] + (src[9]  & 0xff)];
-        case 9:  dst[8]  = uni2cp_low[uni2cp_high[src[8]  >> 8] + (src[8]  & 0xff)];
-        case 8:  dst[7]  = uni2cp_low[uni2cp_high[src[7]  >> 8] + (src[7]  & 0xff)];
-        case 7:  dst[6]  = uni2cp_low[uni2cp_high[src[6]  >> 8] + (src[6]  & 0xff)];
-        case 6:  dst[5]  = uni2cp_low[uni2cp_high[src[5]  >> 8] + (src[5]  & 0xff)];
-        case 5:  dst[4]  = uni2cp_low[uni2cp_high[src[4]  >> 8] + (src[4]  & 0xff)];
-        case 4:  dst[3]  = uni2cp_low[uni2cp_high[src[3]  >> 8] + (src[3]  & 0xff)];
-        case 3:  dst[2]  = uni2cp_low[uni2cp_high[src[2]  >> 8] + (src[2]  & 0xff)];
-        case 2:  dst[1]  = uni2cp_low[uni2cp_high[src[1]  >> 8] + (src[1]  & 0xff)];
-        case 1:  dst[0]  = uni2cp_low[uni2cp_high[src[0]  >> 8] + (src[0]  & 0xff)];
+        *dst++ = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)];src++;
+        *dst++ = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)];src++;
+        *dst++ = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)];src++;
+        *dst++ = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)];src++;
+        *dst++ = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)];src++;
+        *dst++ = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)];src++;
+        *dst++ = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)];src++;
+        *dst++ = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)];src++;
+        *dst++ = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)];src++;
+        *dst++ = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)];src++;
+        *dst++ = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)];src++;
+        *dst++ = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)];src++;
+        *dst++ = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)];src++;
+        *dst++ = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)];src++;
+        *dst++ = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)];src++;
+        *dst++ = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)];src++;
+    }
+    switch (srclen & 15)
+    {
+        case 15: *dst++ = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)];src++;
+        case 14: *dst++ = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)];src++;
+        case 13: *dst++ = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)];src++;
+        case 12: *dst++ = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)];src++;
+        case 11: *dst++ = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)];src++;
+        case 10: *dst++ = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)];src++;
+        case 9:  *dst++ = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)];src++;
+        case 8:  *dst++ = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)];src++;
+        case 7:  *dst++ = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)];src++;
+        case 6:  *dst++ = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)];src++;
+        case 5:  *dst++ = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)];src++;
+        case 4:  *dst++ = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)];src++;
+        case 3:  *dst++ = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)];src++;
+        case 2:  *dst++ = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)];src++;
+        case 1:  *dst++ = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)];src++;
         case 0: break;
-        }
-        if (srclen < 16) return ret;
-        dst += 16;
-        src += 16;
-        srclen -= 16;
     }
+
+    return ret;
 }
 
 /* slow version of wcstombs_sbcs that handles the various flags */


More information about the wine-patches mailing list