Another attempt to handle overlapping buffers in WideCharToMultiByte
Dmitry Timoshkov
dmitry at baikal.ru
Sat Jul 16 06:14:22 CDT 2005
Hello,
new assembly looks quite similar to the old one, so I assume that
it's as fast as it was before. But getting rid of 2 comparisons
per each WideCharToMultiByte call for testing whether the buffers
overlap should slightly improve performance, taking into account
amount of A<->W translations in a usual application run.
Changelog:
Dmitry Timoshkov <dmitry at codeweavers.com>
Another attempt to handle overlapping buffers in WideCharToMultiByte.
diff -up cvs/hq/wine/dlls/kernel/tests/codepage.c wine/dlls/kernel/tests/codepage.c
--- cvs/hq/wine/dlls/kernel/tests/codepage.c Sat Jul 16 14:45:25 2005
+++ wine/dlls/kernel/tests/codepage.c Sat Jul 16 15:55:02 2005
@@ -58,8 +58,8 @@ static void test_negative_source_length(
static void test_overlapped_buffers(void)
{
- static const WCHAR strW[] = {'j','u','s','t',' ','a',' ','t','e','s','t',0};
- static const char strA[] = "just a test";
+ static const WCHAR strW[] = {'j','u','s','t',' ','a',' ','t','e','s','t',' ','s','t','r','i','n','g',' ','l','o','n','g','e','r',' ','1','6',' ','c','h','a','r','a','c','t','e','r','s',0};
+ static const char strA[] = "just a test string longer 16 characters";
char buf[256];
int ret;
diff -up cvs/hq/wine/libs/unicode/wctomb.c wine/libs/unicode/wctomb.c
--- cvs/hq/wine/libs/unicode/wctomb.c Thu Dec 2 14:04:52 2004
+++ wine/libs/unicode/wctomb.c Sat Jul 16 19:42:26 2005
@@ -133,7 +133,7 @@ static inline int wcstombs_sbcs( const s
{
const unsigned char * const uni2cp_low = table->uni2cp_low;
const unsigned short * const uni2cp_high = table->uni2cp_high;
- int ret = srclen;
+ int ret = srclen, inc;
if (dstlen < srclen)
{
@@ -142,44 +142,39 @@ static inline int wcstombs_sbcs( const s
ret = -1;
}
- if (dst <= (const char *)src && dst + 16 > (const char *)src)
- {
- /* overlapping buffers, do it char by char */
- while (srclen--)
- {
- *dst++ = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)];
- src++;
- }
- return ret;
- }
+ /* handle remainder first */
+ inc = srclen & 0xf;
for (;;)
{
- switch(srclen)
+ dst += inc;
+ src += inc;
+
+ switch(inc)
{
+ /* do it char by char in order to handle overlapping buffers */
default:
- case 16: dst[15] = uni2cp_low[uni2cp_high[src[15] >> 8] + (src[15] & 0xff)];
- case 15: dst[14] = uni2cp_low[uni2cp_high[src[14] >> 8] + (src[14] & 0xff)];
- case 14: dst[13] = uni2cp_low[uni2cp_high[src[13] >> 8] + (src[13] & 0xff)];
- case 13: dst[12] = uni2cp_low[uni2cp_high[src[12] >> 8] + (src[12] & 0xff)];
- case 12: dst[11] = uni2cp_low[uni2cp_high[src[11] >> 8] + (src[11] & 0xff)];
- case 11: dst[10] = uni2cp_low[uni2cp_high[src[10] >> 8] + (src[10] & 0xff)];
- case 10: dst[9] = uni2cp_low[uni2cp_high[src[9] >> 8] + (src[9] & 0xff)];
- case 9: dst[8] = uni2cp_low[uni2cp_high[src[8] >> 8] + (src[8] & 0xff)];
- case 8: dst[7] = uni2cp_low[uni2cp_high[src[7] >> 8] + (src[7] & 0xff)];
- case 7: dst[6] = uni2cp_low[uni2cp_high[src[6] >> 8] + (src[6] & 0xff)];
- case 6: dst[5] = uni2cp_low[uni2cp_high[src[5] >> 8] + (src[5] & 0xff)];
- case 5: dst[4] = uni2cp_low[uni2cp_high[src[4] >> 8] + (src[4] & 0xff)];
- case 4: dst[3] = uni2cp_low[uni2cp_high[src[3] >> 8] + (src[3] & 0xff)];
- case 3: dst[2] = uni2cp_low[uni2cp_high[src[2] >> 8] + (src[2] & 0xff)];
- case 2: dst[1] = uni2cp_low[uni2cp_high[src[1] >> 8] + (src[1] & 0xff)];
- case 1: dst[0] = uni2cp_low[uni2cp_high[src[0] >> 8] + (src[0] & 0xff)];
+ case 16: dst[-16] = uni2cp_low[uni2cp_high[src[-16] >> 8] + (src[-16] & 0xff)];
+ case 15: dst[-15] = uni2cp_low[uni2cp_high[src[-15] >> 8] + (src[-15] & 0xff)];
+ case 14: dst[-14] = uni2cp_low[uni2cp_high[src[-14] >> 8] + (src[-14] & 0xff)];
+ case 13: dst[-13] = uni2cp_low[uni2cp_high[src[-13] >> 8] + (src[-13] & 0xff)];
+ case 12: dst[-12] = uni2cp_low[uni2cp_high[src[-12] >> 8] + (src[-12] & 0xff)];
+ case 11: dst[-11] = uni2cp_low[uni2cp_high[src[-11] >> 8] + (src[-11] & 0xff)];
+ case 10: dst[-10] = uni2cp_low[uni2cp_high[src[-10] >> 8] + (src[-10] & 0xff)];
+ case 9: dst[-9] = uni2cp_low[uni2cp_high[src[-9] >> 8] + (src[-9] & 0xff)];
+ case 8: dst[-8] = uni2cp_low[uni2cp_high[src[-8] >> 8] + (src[-8] & 0xff)];
+ case 7: dst[-7] = uni2cp_low[uni2cp_high[src[-7] >> 8] + (src[-7] & 0xff)];
+ case 6: dst[-6] = uni2cp_low[uni2cp_high[src[-6] >> 8] + (src[-6] & 0xff)];
+ case 5: dst[-5] = uni2cp_low[uni2cp_high[src[-5] >> 8] + (src[-5] & 0xff)];
+ case 4: dst[-4] = uni2cp_low[uni2cp_high[src[-4] >> 8] + (src[-4] & 0xff)];
+ case 3: dst[-3] = uni2cp_low[uni2cp_high[src[-3] >> 8] + (src[-3] & 0xff)];
+ case 2: dst[-2] = uni2cp_low[uni2cp_high[src[-2] >> 8] + (src[-2] & 0xff)];
+ case 1: dst[-1] = uni2cp_low[uni2cp_high[src[-1] >> 8] + (src[-1] & 0xff)];
case 0: break;
}
if (srclen < 16) return ret;
- dst += 16;
- src += 16;
srclen -= 16;
+ inc = 16; /* from now on srclen is aligned to 16 */
}
}
More information about the wine-patches
mailing list