settlers 3 and dib engine

Piotr Maceluch skrzynka365 at konto.pl
Mon Sep 10 16:20:00 CDT 2007


Jesse Allen wrote:

>On 9/8/07, Martin Owens <doctormo at gmail.com> wrote:
>  
>
>>Does this mean that the current _painfully_ slow Imperialism II that
>>uses the DIB drawing to draw it's in game maps will work faster? I'm
>>waiting for this functionality and have pledged $20 just in case it
>>can spur on the solution.
>>
>>Best Regards, Martin Owens
>>    
>>
>
>
>I don't know, you'll have to try it yourself. It depends on what the
>app is doing. You can get a checkout of the tree here:
>http://repo.or.cz/w/wine/dibdrv.git
>
>Beware there are probably visual regressions, but it's speed you're
>checking here right? :)
>
>Jesse
>  
>
Hi,
as a part of struggle to understand the drivers I played a little with 
winex11.drv and made some simple optimizations. Actually the changes 
make 'Settlers 3' playable. On my Duron/900 the delay caused by copying 
bitmaps is hardly noticable (it's no longer a .5sec lag, now it looks 
like a frame or two dropped once every few seconds).
If anyone wants to give it a try with S3, then there's another problem 
with races condition on WaitFor..Object/SetEvent/ResetEvent, but that 
can be overcome by just disabling ResetEvent (simple return 
STATUS_SUCCESS; at the beginning of NtResetEvent). I'm trying to get 
some more generic solution to the ResetEvent problem but it seems 
related to the lack of thread boost after WaitFor(Single|Multiple)Object 
on Wine (seems like poor design of game code) and is quite difficult in 
general.
As I'm new to Wine/X development, what I'd appreciate the most, is 
critical comments on the attached patch: is omitting XGetPixel/XSetPixel 
and reading directly XImage data a correct solution? Am I allowed to use 
inline assembler in above case? Any other issues?
Thanks & regards
  Piotr Maceluch


--
----- AlphaNet - najtaniej w sieci! --------
  Odnowienia domen w rewelacyjnych cenach!
 .pl - 65 zl, .com.pl - 50 zl, reg - 20 zl
       http://www.domeny.alpha.pl
--------------------------------------------


-------------- next part --------------
>From 9d26b16c37a08aaf54f66712563cdcd523c1a95a Mon Sep 17 00:00:00 2001
From: Piotr Maceluch <skrzynka365 at konto.pl>
Date: Mon, 10 Sep 2007 22:56:25 +0200
Subject: [PATCH] Optimized copying of 1bit bitmaps to and from XImage data.

The copying operations for copying 1bit bitmap to memory and other way no longer employ XGetPixel/XSetPixel. Instead memory operations are used. They're implemented in asm memcpy which does bit order swapping on the fly (swapping is lookup table based).
---
 dlls/winex11.drv/dib.c |  137 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 137 insertions(+), 0 deletions(-)

diff --git a/dlls/winex11.drv/dib.c b/dlls/winex11.drv/dib.c
index 00b1f23..0e938b8 100644
--- a/dlls/winex11.drv/dib.c
+++ b/dlls/winex11.drv/dib.c
@@ -58,6 +58,9 @@ static PVOID dibs_handler;
 
 static int ximageDepthTable[32];
 
+static BYTE bit_reversal_lookup_table[0x100]; //bit-order-reversed value for every 8bit value
+static int bit_reversal_lookup_table_initialized = 0;
+
 /* This structure holds the arguments for DIB_SetImageBits() */
 typedef struct
 {
@@ -101,6 +104,9 @@ static INT X11DRV_DIB_Coerce(X_PHYSBITMAP *,INT,BOOL);
 static INT X11DRV_DIB_Lock(X_PHYSBITMAP *,INT,BOOL);
 static void X11DRV_DIB_Unlock(X_PHYSBITMAP *,BOOL);
 
+static void X11DRV_InitBitReversalLookupTable();
+static void X11DRV_MemoryCopyWithBitOrderSwap(void *dest, const void *src, int byteCount);
+
 /* 
   Some of the following helper functions are duplicated in
   dlls/gdi/dib.c
@@ -508,6 +514,30 @@ static void X11DRV_DIB_SetImageBits_1( int lines, const BYTE *srcbits,
     width = min(srcwidth, dstwidth);
 
     /* ==== pal 1 dib -> any bmp format ==== */
+    if(((bmpImage->bits_per_pixel | bmpImage->depth) == 1) &&
+		   (bmpImage->byte_order == bmpImage->bitmap_bit_order))
+    {
+        //optimization for 1to1 copy
+        int y;
+        BYTE* bitmapPtr = ((BYTE*)(bmpImage->data)) + (bmpImage->xoffset >> 3)
+            + ((lines - 1) * bmpImage->bytes_per_line);
+        const BYTE* dataPtr = srcbits;
+
+        TRACE("1 bit data -> 1 bit bmp, copy: width=%i, lines=%i, image->bpl=%i, dest_bpl=%i\n", 
+                width, lines, bmpImage->bytes_per_line, linebytes);
+        
+        X11DRV_InitBitReversalLookupTable();
+
+        for (y = 0; y != lines; y++)
+        { 
+            X11DRV_MemoryCopyWithBitOrderSwap(bitmapPtr, dataPtr, (width + 7) >> 3);
+              
+            dataPtr += linebytes;
+            bitmapPtr -= bmpImage->bytes_per_line;
+        }
+    }
+    else
+    {
     for (h = lines-1; h >=0; h--) {
         srcbyte=srcbits;
         /* FIXME: should avoid putting x<left pixels (minor speed issue) */
@@ -537,6 +567,7 @@ static void X11DRV_DIB_SetImageBits_1( int lines, const BYTE *srcbits,
         }
         srcbits += linebytes;
     }
+    }
 }
 
 /***********************************************************************
@@ -561,6 +592,27 @@ static void X11DRV_DIB_GetImageBits_1( int lines, BYTE *dstbits,
     switch (bmpImage->depth)
     {
     case 1:
+        {
+            int y;
+            const BYTE* bitmapPtr = ((BYTE*)(bmpImage->data)) + (bmpImage->xoffset >> 3)
+                + ((lines - 1) * bmpImage->bytes_per_line);
+            BYTE* dataPtr = dstbits;
+
+            TRACE("1 bit bmp -> 1 bit data, copy: width=%i, lines=%i, image->bpl=%i, dest_bpl=%i\n", 
+                    width, lines, bmpImage->bytes_per_line, linebytes);
+
+            X11DRV_InitBitReversalLookupTable();
+            
+            for (y = 0; y != lines; y++)
+            {
+                //used (width + 7) so rounded down will always contain last pixels in line when ((width % 8) != 0)
+                X11DRV_MemoryCopyWithBitOrderSwap(dataPtr, bitmapPtr, (width + 7) >> 3);
+                  
+                dataPtr += linebytes;
+                bitmapPtr -= bmpImage->bytes_per_line;
+            }
+        }
+        break;
     case 4:
         if (X11DRV_DIB_CheckMask(bmpImage->red_mask,bmpImage->green_mask,bmpImage->blue_mask)
             && srccolors) {
@@ -4880,3 +4932,88 @@ Pixmap X11DRV_DIB_CreatePixmapFromDIB( HGLOBAL hPackedDIB, HDC hdc )
     TRACE("Returning Pixmap %ld\n", pixmap);
     return pixmap;
 }
+
+
+/** Returns given input byte with bit order reversed. */
+static BYTE X11DRV_ReverseBits(BYTE input)
+{
+    BYTE output = 0;
+    int i;
+    for(i = 0; i != 8; i++)
+    {
+        output |= (input & 1);
+        input >>= 1;
+        if(i != 7)
+        {
+            output <<= 1;
+        }
+    }
+    return output;
+}
+
+/* Initializes bit_reversal_lookup_table with bit-order-reversed values for 
+ * every possible byte value. */
+static void X11DRV_InitBitReversalLookupTable()
+{
+    int i;
+  
+    if(bit_reversal_lookup_table_initialized)
+    { 
+        //skip if already initialized
+        return;
+    }
+  
+    for(i = 0; i != 0x100; i++)
+    {
+        bit_reversal_lookup_table[i] = X11DRV_ReverseBits((BYTE)i);
+    }
+
+    bit_reversal_lookup_table_initialized = 1;
+}
+
+/* Copies byteCount bytes from src to dest and changes bit order of copied
+ * data bytes.
+ * FIXME: The function doesn't work when declared as inline or static inline.
+ * */
+static void X11DRV_MemoryCopyWithBitOrderSwap(void *dest, const void *src, int byteCount)
+{
+  __asm__ __volatile__ (
+      "pushl %%ecx\n"
+      "cld\n"
+      "xorl %%edx, %%edx\n"
+      "shr $2, %%ecx\n"
+      "jz 2f\n"
+      
+      "1:\n"
+      "lodsl\n"
+      "movb %%al, %%dl\n"
+      "movb bit_reversal_lookup_table(%%edx),%%al\n"
+      "movb %%ah, %%dl\n"
+      "movb bit_reversal_lookup_table(%%edx),%%ah\n"
+      "bswap %%eax\n"
+      "movb %%al, %%dl\n"
+      "movb bit_reversal_lookup_table(%%edx),%%al\n"
+      "movb %%ah, %%dl\n"
+      "movb bit_reversal_lookup_table(%%edx),%%ah\n"
+      "bswap %%eax\n"
+
+      "stosl\n"
+      "loop 1b\n"
+      
+      "2:\n"
+      "popl %%ecx\n"
+      "andl $3, %%ecx\n"
+      "jz 4f\n"
+
+      "3:\n"
+      "lodsb\n"
+      "movb %%al, %%dl\n"
+      "movb bit_reversal_lookup_table(%%edx),%%al\n"
+      "stosb\n"
+      "loop 3b\n"
+      
+      "4:\n"
+      : //output
+      : "S" (src), "D" (dest), "c" (byteCount) //input
+      : "eax", "edx", "cc" );//cc - condition codes (flags)
+}
-- 
1.5.3.1




More information about the wine-devel mailing list