[PATCH] WineD3D: Optimize the fragment processing description=0A=

Stefan Doesinger stefan at codeweavers.com
Sat Jul 26 10:33:20 CDT 2008


=0A=
This patch modifies the texture_stage_op for more compact data=0A=
storage. Instead of storing small values in DWORDs, they are=0A=
packed into 2 DWORD bitmaps, thus reducing the size. This reduces=0A=
the amount of time spent in find_ffp_shader, but increases the=0A=
time in gen_ffp_op, because the assignments are replaced with=0A=
more expensive bit masking operations.=0A=
=0A=
I have tested the performance difference with oprofile and the=0A=
dx8 sdk dolphinvs sample. This program uses only two different=0A=
fragment processing settings, so the current linear serach in=0A=
find_ffp_shader should not hurt too much(Due to an not yet found=0A=
bug 4 shaders are generated) . The original code spent 3.5966% of=0A=
CPU=0A=
time spent in wined3d in find_ffp_shader, and 0.8444% in=0A=
gen_ffp_op. The new code spends 1.0739% in find_ffp_shader and=0A=
0.9429% in gen_ffp_op, so it is an overall improvement. I expect=0A=
the difference to be even bigger in a real-world application,=0A=
even once we've replaced the linear search with a binary search=0A=
or a hashmap.=0A=
---=0A=
 dlls/wined3d/ati_fragment_shader.c |   76 +++++++++++-----------=0A=
 dlls/wined3d/utils.c               |  114 =
++++++++++++++++++--------------=0A=
 dlls/wined3d/wined3d_private.h     |  127 =
+++++++++++++++++++++++++++++++++---=0A=
 3 files changed, 221 insertions(+), 96 deletions(-)=0A=
=0A=
diff --git a/dlls/wined3d/ati_fragment_shader.c =
b/dlls/wined3d/ati_fragment_shader.c=0A=
index eb7837f..44f34e0 100644=0A=
--- a/dlls/wined3d/ati_fragment_shader.c=0A=
+++ b/dlls/wined3d/ati_fragment_shader.c=0A=
@@ -153,7 +153,7 @@ static GLuint register_for_arg(DWORD arg, =
WineD3D_GL_Info *gl_info, unsigned int=0A=
     GLenum ret;=0A=
 =0A=
     if(mod) *mod =3D GL_NONE;=0A=
-    if(arg =3D=3D 0xFFFFFFFF) return -1; /* This is the marker for =
unused registers */=0A=
+    if(arg =3D=3D (carg0_mask >> carg0_shift)) return -1; /* This is =
the marker for unused registers */=0A=
 =0A=
     switch(arg & WINED3DTA_SELECTMASK) {=0A=
         case WINED3DTA_DIFFUSE:=0A=
@@ -216,22 +216,22 @@ static GLuint find_tmpreg(struct texture_stage_op =
op[MAX_TEXTURES]) {=0A=
 =0A=
     memset(tex_used, 0, sizeof(tex_used));=0A=
     for(i =3D 0; i < MAX_TEXTURES; i++) {=0A=
-        if(op[i].cop =3D=3D WINED3DTOP_DISABLE) {=0A=
+        if(ffp_cop(op[i]) =3D=3D WINED3DTOP_DISABLE) {=0A=
             break;=0A=
         }=0A=
 =0A=
         if(lowest_read =3D=3D -1 &&=0A=
-          (op[i].carg1 =3D=3D WINED3DTA_TEMP || op[i].carg2 =3D=3D =
WINED3DTA_TEMP || op[i].carg0 =3D=3D WINED3DTA_TEMP ||=0A=
-           op[i].aarg1 =3D=3D WINED3DTA_TEMP || op[i].aarg2 =3D=3D =
WINED3DTA_TEMP || op[i].aarg0 =3D=3D WINED3DTA_TEMP)) {=0A=
+          (ffp_carg1(op[i]) =3D=3D WINED3DTA_TEMP || ffp_carg2(op[i]) =
=3D=3D WINED3DTA_TEMP || ffp_carg0(op[i]) =3D=3D WINED3DTA_TEMP ||=0A=
+           ffp_aarg1(op[i]) =3D=3D WINED3DTA_TEMP || ffp_aarg2(op[i]) =
=3D=3D WINED3DTA_TEMP || ffp_aarg0(op[i]) =3D=3D WINED3DTA_TEMP)) {=0A=
             lowest_read =3D i;=0A=
         }=0A=
 =0A=
-        if(lowest_write =3D=3D -1 && op[i].dst =3D=3D WINED3DTA_TEMP) {=0A=
+        if(lowest_write =3D=3D -1 && ffp_dst(op[i]) =3D=3D =
WINED3DTA_TEMP) {=0A=
             lowest_write =3D i;=0A=
         }=0A=
 =0A=
-        if(op[i].carg1 =3D=3D WINED3DTA_TEXTURE || op[i].carg2 =3D=3D =
WINED3DTA_TEXTURE || op[i].carg0 =3D=3D WINED3DTA_TEXTURE ||=0A=
-           op[i].aarg1 =3D=3D WINED3DTA_TEXTURE || op[i].aarg2 =3D=3D =
WINED3DTA_TEXTURE || op[i].aarg0 =3D=3D WINED3DTA_TEXTURE) {=0A=
+        if(ffp_carg1(op[i]) =3D=3D WINED3DTA_TEXTURE || =
ffp_carg2(op[i]) =3D=3D WINED3DTA_TEXTURE || ffp_carg0(op[i]) =3D=3D =
WINED3DTA_TEXTURE ||=0A=
+           ffp_aarg1(op[i]) =3D=3D WINED3DTA_TEXTURE || =
ffp_aarg2(op[i]) =3D=3D WINED3DTA_TEXTURE || ffp_aarg0(op[i]) =3D=3D =
WINED3DTA_TEXTURE) {=0A=
             tex_used[i] =3D TRUE;=0A=
         }=0A=
     }=0A=
@@ -290,9 +290,9 @@ static GLuint gen_ati_shader(struct texture_stage_op =
op[MAX_TEXTURES], WineD3D_G=0A=
 =0A=
     /* Pass 1: Generate sampling instructions for perturbation maps */=0A=
       for(stage =3D 0; stage < GL_LIMITS(textures); stage++) {=0A=
-        if(op[stage].cop =3D=3D WINED3DTOP_DISABLE) break;=0A=
-        if(op[stage].cop !=3D WINED3DTOP_BUMPENVMAP &&=0A=
-           op[stage].cop !=3D WINED3DTOP_BUMPENVMAPLUMINANCE) continue;=0A=
+        if(ffp_cop(op[stage]) =3D=3D WINED3DTOP_DISABLE) break;=0A=
+        if(ffp_cop(op[stage]) !=3D WINED3DTOP_BUMPENVMAP &&=0A=
+           ffp_cop(op[stage]) !=3D WINED3DTOP_BUMPENVMAPLUMINANCE) =
continue;=0A=
 =0A=
         TRACE("glSampleMapATI(GL_REG_%d_ATI, GL_TEXTURE_%d_ARB, =
GL_SWIZZLE_STR_ATI)\n",=0A=
               stage, stage);=0A=
@@ -315,9 +315,9 @@ static GLuint gen_ati_shader(struct texture_stage_op =
op[MAX_TEXTURES], WineD3D_G=0A=
 =0A=
     /* Pass 2: Generate perturbation calculations */=0A=
     for(stage =3D 0; stage < GL_LIMITS(textures); stage++) {=0A=
-        if(op[stage].cop =3D=3D WINED3DTOP_DISABLE) break;=0A=
-        if(op[stage].cop !=3D WINED3DTOP_BUMPENVMAP &&=0A=
-           op[stage].cop !=3D WINED3DTOP_BUMPENVMAPLUMINANCE) continue;=0A=
+        if(ffp_cop(op[stage]) =3D=3D WINED3DTOP_DISABLE) break;=0A=
+        if(ffp_cop(op[stage]) !=3D WINED3DTOP_BUMPENVMAP &&=0A=
+           ffp_cop(op[stage]) !=3D WINED3DTOP_BUMPENVMAPLUMINANCE) =
continue;=0A=
 =0A=
         /* Nice thing, we get the color correction for free :-) */=0A=
         if(op[stage].color_correction =3D=3D WINED3DFMT_V8U8) {=0A=
@@ -357,13 +357,13 @@ static GLuint gen_ati_shader(struct =
texture_stage_op op[MAX_TEXTURES], WineD3D_G=0A=
 =0A=
     /* Pass 3: Generate sampling instructions for regular textures */=0A=
     for(stage =3D 0; stage < GL_LIMITS(textures); stage++) {=0A=
-        if(op[stage].cop =3D=3D WINED3DTOP_DISABLE) {=0A=
+        if(ffp_cop(op[stage]) =3D=3D WINED3DTOP_DISABLE) {=0A=
             break;=0A=
         }=0A=
 =0A=
-        if(op[stage].projected =3D=3D proj_none) {=0A=
+        if(ffp_proj(op[stage]) =3D=3D proj_none) {=0A=
             swizzle =3D GL_SWIZZLE_STR_ATI;=0A=
-        } else if(op[stage].projected =3D=3D proj_count3) {=0A=
+        } else if(ffp_proj(op[stage]) =3D=3D proj_count3) {=0A=
             /* TODO: D3DTTFF_COUNT3 | D3DTTFF_PROJECTED would be =
GL_SWIZZLE_STR_DR_ATI.=0A=
              * However, the FFP vertex processing texture transform =
matrix handler does=0A=
              * some transformations in the texture matrix which makes =
the 3rd coordinate=0A=
@@ -377,17 +377,17 @@ static GLuint gen_ati_shader(struct =
texture_stage_op op[MAX_TEXTURES], WineD3D_G=0A=
             swizzle =3D GL_SWIZZLE_STQ_DQ_ATI;=0A=
         }=0A=
 =0A=
-        if((op[stage].carg0 & WINED3DTA_SELECTMASK) =3D=3D =
WINED3DTA_TEXTURE ||=0A=
-           (op[stage].carg1 & WINED3DTA_SELECTMASK) =3D=3D =
WINED3DTA_TEXTURE ||=0A=
-           (op[stage].carg2 & WINED3DTA_SELECTMASK) =3D=3D =
WINED3DTA_TEXTURE ||=0A=
-           (op[stage].aarg0 & WINED3DTA_SELECTMASK) =3D=3D =
WINED3DTA_TEXTURE ||=0A=
-           (op[stage].aarg1 & WINED3DTA_SELECTMASK) =3D=3D =
WINED3DTA_TEXTURE ||=0A=
-           (op[stage].aarg2 & WINED3DTA_SELECTMASK) =3D=3D =
WINED3DTA_TEXTURE ||=0A=
-            op[stage].cop =3D=3D WINED3DTOP_BLENDTEXTUREALPHA) {=0A=
+        if((ffp_carg0(op[stage]) & WINED3DTA_SELECTMASK) =3D=3D =
WINED3DTA_TEXTURE ||=0A=
+           (ffp_carg1(op[stage]) & WINED3DTA_SELECTMASK) =3D=3D =
WINED3DTA_TEXTURE ||=0A=
+           (ffp_carg2(op[stage]) & WINED3DTA_SELECTMASK) =3D=3D =
WINED3DTA_TEXTURE ||=0A=
+           (ffp_aarg0(op[stage]) & WINED3DTA_SELECTMASK) =3D=3D =
WINED3DTA_TEXTURE ||=0A=
+           (ffp_aarg1(op[stage]) & WINED3DTA_SELECTMASK) =3D=3D =
WINED3DTA_TEXTURE ||=0A=
+           (ffp_aarg2(op[stage]) & WINED3DTA_SELECTMASK) =3D=3D =
WINED3DTA_TEXTURE ||=0A=
+            ffp_cop(op[stage]) =3D=3D WINED3DTOP_BLENDTEXTUREALPHA) {=0A=
 =0A=
             if(stage > 0 &&=0A=
-               (op[stage - 1].cop =3D=3D WINED3DTOP_BUMPENVMAP ||=0A=
-                op[stage - 1].cop =3D=3D =
WINED3DTOP_BUMPENVMAPLUMINANCE)) {=0A=
+               (ffp_cop(op[stage - 1]) =3D=3D WINED3DTOP_BUMPENVMAP ||=0A=
+                ffp_cop(op[stage - 1]) =3D=3D =
WINED3DTOP_BUMPENVMAPLUMINANCE)) {=0A=
                 TRACE("glSampleMapATI(GL_REG_%d_ATI, GL_REG_%d_ATI, =
GL_SWIZZLE_STR_ATI)\n",=0A=
                       stage, stage);=0A=
                 GL_EXTCALL(glSampleMapATI(GL_REG_0_ATI + stage,=0A=
@@ -405,7 +405,7 @@ static GLuint gen_ati_shader(struct texture_stage_op =
op[MAX_TEXTURES], WineD3D_G=0A=
 =0A=
     /* Pass 4: Generate the arithmetic instructions */=0A=
     for(stage =3D 0; stage < MAX_TEXTURES; stage++) {=0A=
-        if(op[stage].cop =3D=3D WINED3DTOP_DISABLE) {=0A=
+        if(ffp_cop(op[stage]) =3D=3D WINED3DTOP_DISABLE) {=0A=
             if(stage =3D=3D 0) {=0A=
                 /* Handle complete texture disabling gracefully */=0A=
                 TRACE("glColorFragmentOp1ATI(GL_MOV_ATI, GL_REG_0_ATI, =
GL_NONE, GL_NONE, GL_PRIMARY_COLOR, GL_NONE, GL_NONE)\n");=0A=
@@ -418,7 +418,7 @@ static GLuint gen_ati_shader(struct texture_stage_op =
op[MAX_TEXTURES], WineD3D_G=0A=
             break;=0A=
         }=0A=
 =0A=
-        if(op[stage].dst =3D=3D WINED3DTA_TEMP) {=0A=
+        if(ffp_dst(op[stage]) =3D=3D WINED3DTA_TEMP) {=0A=
             /* If we're writing to D3DTA_TEMP, but never reading from =
it we don't have to write there in the first place.=0A=
              * skip the entire stage, this saves some GPU time=0A=
              */=0A=
@@ -429,14 +429,14 @@ static GLuint gen_ati_shader(struct =
texture_stage_op op[MAX_TEXTURES], WineD3D_G=0A=
             dstreg =3D GL_REG_0_ATI;=0A=
         }=0A=
 =0A=
-        arg0 =3D register_for_arg(op[stage].carg0, gl_info, stage, =
&argmod0, tmparg);=0A=
-        arg1 =3D register_for_arg(op[stage].carg1, gl_info, stage, =
&argmod1, tmparg);=0A=
-        arg2 =3D register_for_arg(op[stage].carg2, gl_info, stage, =
&argmod2, tmparg);=0A=
+        arg0 =3D register_for_arg(ffp_carg0(op[stage]), gl_info, stage, =
&argmod0, tmparg);=0A=
+        arg1 =3D register_for_arg(ffp_carg1(op[stage]), gl_info, stage, =
&argmod1, tmparg);=0A=
+        arg2 =3D register_for_arg(ffp_carg2(op[stage]), gl_info, stage, =
&argmod2, tmparg);=0A=
         dstmod =3D GL_NONE;=0A=
         argmodextra =3D GL_NONE;=0A=
         extrarg =3D GL_NONE;=0A=
 =0A=
-        switch(op[stage].cop) {=0A=
+        switch(ffp_cop(op[stage])) {=0A=
             case WINED3DTOP_SELECTARG2:=0A=
                 arg1 =3D arg2;=0A=
                 argmod1 =3D argmod2;=0A=
@@ -603,17 +603,17 @@ static GLuint gen_ati_shader(struct =
texture_stage_op op[MAX_TEXTURES], WineD3D_G=0A=
                 /* Those are handled in the first pass of the =
shader(generation pass 1 and 2) already */=0A=
                 break;=0A=
 =0A=
-            default: FIXME("Unhandled color operation %d on stage =
%d\n", op[stage].cop, stage);=0A=
+            default: FIXME("Unhandled color operation %d on stage =
%d\n", ffp_cop(op[stage]), stage);=0A=
         }=0A=
 =0A=
-        arg0 =3D register_for_arg(op[stage].aarg0, gl_info, stage, =
&argmod0, tmparg);=0A=
-        arg1 =3D register_for_arg(op[stage].aarg1, gl_info, stage, =
&argmod1, tmparg);=0A=
-        arg2 =3D register_for_arg(op[stage].aarg2, gl_info, stage, =
&argmod2, tmparg);=0A=
+        arg0 =3D register_for_arg(ffp_aarg0(op[stage]), gl_info, stage, =
&argmod0, tmparg);=0A=
+        arg1 =3D register_for_arg(ffp_aarg1(op[stage]), gl_info, stage, =
&argmod1, tmparg);=0A=
+        arg2 =3D register_for_arg(ffp_aarg2(op[stage]), gl_info, stage, =
&argmod2, tmparg);=0A=
         dstmod =3D GL_NONE;=0A=
         argmodextra =3D GL_NONE;=0A=
         extrarg =3D GL_NONE;=0A=
 =0A=
-        switch(op[stage].aop) {=0A=
+        switch(ffp_aop(op[stage])) {=0A=
             case WINED3DTOP_DISABLE:=0A=
                 /* Get the primary color to the output if on stage 0, =
otherwise leave register 0 untouched */=0A=
                 if(stage =3D=3D 0) {=0A=
@@ -765,7 +765,7 @@ static GLuint gen_ati_shader(struct texture_stage_op =
op[MAX_TEXTURES], WineD3D_G=0A=
                 ERR("Application uses an invalid alpha operation\n");=0A=
                 break;=0A=
 =0A=
-            default: FIXME("Unhandled alpha operation %d on stage =
%d\n", op[stage].aop, stage);=0A=
+            default: FIXME("Unhandled alpha operation %d on stage =
%d\n", ffp_aop(op[stage]), stage);=0A=
         }=0A=
     }=0A=
 =0A=
@@ -795,7 +795,7 @@ static void set_tex_op_atifs(DWORD state, =
IWineD3DStateBlockImpl *stateblock, Wi=0A=
         }=0A=
         desc->num_textures_used =3D 0;=0A=
         for(i =3D 0; i < GL_LIMITS(texture_stages); i++) {=0A=
-            if(settings.op[i].cop =3D=3D WINED3DTOP_DISABLE) break;=0A=
+            if(ffp_cop(settings.op[i]) =3D=3D WINED3DTOP_DISABLE) break;=0A=
             desc->num_textures_used =3D i;=0A=
         }=0A=
 =0A=
diff --git a/dlls/wined3d/utils.c b/dlls/wined3d/utils.c=0A=
index cbcaa17..43111f6 100644=0A=
--- a/dlls/wined3d/utils.c=0A=
+++ b/dlls/wined3d/utils.c=0A=
@@ -1785,16 +1785,23 @@ void gen_ffp_op(IWineD3DStateBlockImpl =
*stateblock, struct ffp_settings *setting=0A=
     };=0A=
     unsigned int i;=0A=
     DWORD ttff;=0A=
+    DWORD carg0, carg1, carg2, aarg0, aarg1, aarg2, cop, aop;=0A=
 =0A=
     for(i =3D 0; i < GL_LIMITS(texture_stages); i++) {=0A=
         IWineD3DBaseTextureImpl *texture;=0A=
+        settings->op[i].texop_1 =3D 0;=0A=
+        settings->op[i].texop_2 =3D 0;=0A=
         if(stateblock->textureState[i][WINED3DTSS_COLOROP] =3D=3D =
WINED3DTOP_DISABLE) {=0A=
-            settings->op[i].cop =3D WINED3DTOP_DISABLE;=0A=
-            settings->op[i].aop =3D WINED3DTOP_DISABLE;=0A=
-            settings->op[i].carg0 =3D settings->op[i].carg1 =3D =
settings->op[i].carg2 =3D 0xffffffff;=0A=
-            settings->op[i].aarg0 =3D settings->op[i].aarg1 =3D =
settings->op[i].aarg2 =3D 0xffffffff;=0A=
+            ffp_set_cop(&settings->op[i], WINED3DTOP_DISABLE);=0A=
+            ffp_set_aop(&settings->op[i], WINED3DTOP_DISABLE);=0A=
+            ffp_set_carg0(&settings->op[i], carg0_mask >> carg0_shift);=0A=
+            ffp_set_carg1(&settings->op[i], carg1_mask >> carg1_shift);=0A=
+            ffp_set_carg2(&settings->op[i], carg2_mask >> carg2_shift);=0A=
+            ffp_set_aarg0(&settings->op[i], aarg0_mask >> aarg0_shift);=0A=
+            ffp_set_aarg1(&settings->op[i], aarg1_mask >> aarg1_shift);=0A=
+            ffp_set_aarg2(&settings->op[i], aarg2_mask >> aarg2_shift);=0A=
             settings->op[i].color_correction =3D WINED3DFMT_UNKNOWN;=0A=
-            settings->op[i].dst =3D 0xffffffff;=0A=
+            ffp_set_dst(&settings->op[i], WINED3DTA_CURRENT);=0A=
             i++;=0A=
             break;=0A=
         }=0A=
@@ -1803,40 +1810,44 @@ void gen_ffp_op(IWineD3DStateBlockImpl =
*stateblock, struct ffp_settings *setting=0A=
         if(texture) {=0A=
             settings->op[i].color_correction =3D =
texture->baseTexture.shader_conversion_group;=0A=
             if(ignore_textype) {=0A=
-                settings->op[i].tex_type =3D 0;=0A=
+                ffp_set_textype(&settings->op[i], GL_TEXTURE_1D);=0A=
             } else {=0A=
-                settings->op[i].tex_type =3D =
stateblock->textureDimensions[i];=0A=
+                ffp_set_textype(&settings->op[i], =
stateblock->textureDimensions[i]);=0A=
             }=0A=
         } else {=0A=
             settings->op[i].color_correction =3D WINED3DFMT_UNKNOWN;=0A=
-            settings->op[i].tex_type =3D 0;=0A=
+            ffp_set_textype(&settings->op[i], GL_TEXTURE_1D);=0A=
         }=0A=
 =0A=
-        settings->op[i].cop =3D =
stateblock->textureState[i][WINED3DTSS_COLOROP];=0A=
-        settings->op[i].aop =3D =
stateblock->textureState[i][WINED3DTSS_ALPHAOP];=0A=
-=0A=
-        settings->op[i].carg1 =3D (args[settings->op[i].cop] & ARG1) ? =
stateblock->textureState[i][WINED3DTSS_COLORARG1] : 0xffffffff;=0A=
-        settings->op[i].carg2 =3D (args[settings->op[i].cop] & ARG2) ? =
stateblock->textureState[i][WINED3DTSS_COLORARG2] : 0xffffffff;=0A=
-        settings->op[i].carg0 =3D (args[settings->op[i].cop] & ARG0) ? =
stateblock->textureState[i][WINED3DTSS_COLORARG0] : 0xffffffff;=0A=
-=0A=
-        if(is_invalid_op(stateblock->wineD3DDevice, =
i,settings->op[i].cop,=0A=
-                         settings->op[i].carg1, settings->op[i].carg2, =
settings->op[i].carg0)) {=0A=
-            settings->op[i].carg0 =3D 0xffffffff;=0A=
-            settings->op[i].carg2 =3D 0xffffffff;=0A=
-            settings->op[i].carg1 =3D WINED3DTA_CURRENT;=0A=
-            settings->op[i].cop =3D WINED3DTOP_SELECTARG1;=0A=
+        cop =3D stateblock->textureState[i][WINED3DTSS_COLOROP];=0A=
+        carg0 =3D (args[cop] & ARG0) ? =
stateblock->textureState[i][WINED3DTSS_COLORARG0] : (carg0_mask >> =
carg0_shift);=0A=
+        carg1 =3D (args[cop] & ARG1) ? =
stateblock->textureState[i][WINED3DTSS_COLORARG1] : (carg1_mask >> =
carg1_shift);=0A=
+        carg2 =3D (args[cop] & ARG2) ? =
stateblock->textureState[i][WINED3DTSS_COLORARG2] : (carg2_mask >> =
carg2_shift);=0A=
+=0A=
+        if(is_invalid_op(stateblock->wineD3DDevice, i, cop,=0A=
+                         carg1, carg2, carg0)) {=0A=
+            carg0 =3D (carg0_mask >> carg0_shift);=0A=
+            carg2 =3D (carg2_mask >> carg2_shift);=0A=
+            carg1 =3D WINED3DTA_CURRENT;=0A=
+            cop =3D WINED3DTOP_SELECTARG1;=0A=
         }=0A=
 =0A=
-        settings->op[i].aarg1 =3D (args[settings->op[i].aop] & ARG1) ? =
stateblock->textureState[i][WINED3DTSS_ALPHAARG1] : 0xffffffff;=0A=
-        settings->op[i].aarg2 =3D (args[settings->op[i].aop] & ARG2) ? =
stateblock->textureState[i][WINED3DTSS_ALPHAARG2] : 0xffffffff;=0A=
-        settings->op[i].aarg0 =3D (args[settings->op[i].aop] & ARG0) ? =
stateblock->textureState[i][WINED3DTSS_ALPHAARG0] : 0xffffffff;=0A=
-=0A=
-        if(is_invalid_op(stateblock->wineD3DDevice, i, =
settings->op[i].aop,=0A=
-                         settings->op[i].aarg1, settings->op[i].aarg2, =
settings->op[i].aarg0)) {=0A=
-            settings->op[i].aarg0 =3D 0xffffffff;=0A=
-            settings->op[i].aarg2 =3D 0xffffffff;=0A=
-            settings->op[i].aarg1 =3D WINED3DTA_CURRENT;=0A=
-            settings->op[i].aop =3D WINED3DTOP_SELECTARG1;=0A=
+        ffp_set_carg0(&settings->op[i], carg0);=0A=
+        ffp_set_carg1(&settings->op[i], carg1);=0A=
+        ffp_set_carg2(&settings->op[i], carg2);=0A=
+        ffp_set_cop(&settings->op[i], cop);=0A=
+=0A=
+        aop =3D stateblock->textureState[i][WINED3DTSS_ALPHAOP];=0A=
+        aarg1 =3D (args[aop] & ARG1) ? =
stateblock->textureState[i][WINED3DTSS_ALPHAARG1] : (aarg0_mask >> =
aarg0_shift);=0A=
+        aarg2 =3D (args[aop] & ARG2) ? =
stateblock->textureState[i][WINED3DTSS_ALPHAARG2] : (aarg1_mask >> =
aarg1_shift);=0A=
+        aarg0 =3D (args[aop] & ARG0) ? =
stateblock->textureState[i][WINED3DTSS_ALPHAARG0] : (aarg2_mask >> =
aarg2_shift);=0A=
+=0A=
+        if(is_invalid_op(stateblock->wineD3DDevice, i, aop,=0A=
+                         aarg1, aarg2, aarg0)) {=0A=
+            aarg0 =3D (aarg0_mask >> aarg0_shift);=0A=
+            aarg2 =3D (aarg2_mask >> aarg2_shift);=0A=
+            aarg1 =3D WINED3DTA_CURRENT;=0A=
+            aop =3D WINED3DTOP_SELECTARG1;=0A=
         } else if(i =3D=3D 0 && stateblock->textures[0] &&=0A=
                   stateblock->renderState[WINED3DRS_COLORKEYENABLE] &&=0A=
                  (stateblock->textureDimensions[0] =3D=3D GL_TEXTURE_2D =
||=0A=
@@ -1846,42 +1857,47 @@ void gen_ffp_op(IWineD3DStateBlockImpl =
*stateblock, struct ffp_settings *setting=0A=
             if(surf->CKeyFlags & WINEDDSD_CKSRCBLT &&=0A=
                getFormatDescEntry(surf->resource.format, NULL, =
NULL)->alphaMask =3D=3D 0x00000000) {=0A=
 =0A=
-                if(settings->op[0].aop =3D=3D WINED3DTOP_DISABLE) {=0A=
-                   settings->op[0].aarg1 =3D WINED3DTA_TEXTURE;=0A=
-                   settings->op[0].aop =3D WINED3DTOP_SELECTARG1;=0A=
+                if(aop =3D=3D WINED3DTOP_DISABLE) {=0A=
+                   aarg1 =3D WINED3DTA_TEXTURE;=0A=
+                   aop =3D WINED3DTOP_SELECTARG1;=0A=
                 }=0A=
-                else if(settings->op[0].aop =3D=3D =
WINED3DTOP_SELECTARG1 && settings->op[0].aarg1 !=3D WINED3DTA_TEXTURE) {=0A=
+                else if(aop =3D=3D WINED3DTOP_SELECTARG1 && aarg1 !=3D =
WINED3DTA_TEXTURE) {=0A=
                     if =
(stateblock->renderState[WINED3DRS_ALPHABLENDENABLE]) {=0A=
-                        settings->op[0].aarg2 =3D WINED3DTA_TEXTURE;=0A=
-                        settings->op[0].aop =3D WINED3DTOP_MODULATE;=0A=
+                        aarg2 =3D WINED3DTA_TEXTURE;=0A=
+                        aop =3D WINED3DTOP_MODULATE;=0A=
                     }=0A=
-                    else settings->op[0].aarg1 =3D WINED3DTA_TEXTURE;=0A=
+                    else aarg1 =3D WINED3DTA_TEXTURE;=0A=
                 }=0A=
-                else if(settings->op[0].aop =3D=3D =
WINED3DTOP_SELECTARG2 && settings->op[0].aarg2 !=3D WINED3DTA_TEXTURE) {=0A=
+                else if(aop =3D=3D WINED3DTOP_SELECTARG2 && aarg2 !=3D =
WINED3DTA_TEXTURE) {=0A=
                     if =
(stateblock->renderState[WINED3DRS_ALPHABLENDENABLE]) {=0A=
-                        settings->op[0].aarg1 =3D WINED3DTA_TEXTURE;=0A=
-                        settings->op[0].aop =3D WINED3DTOP_MODULATE;=0A=
+                        aarg1 =3D WINED3DTA_TEXTURE;=0A=
+                        aop =3D WINED3DTOP_MODULATE;=0A=
                     }=0A=
-                    else settings->op[0].aarg2 =3D WINED3DTA_TEXTURE;=0A=
+                    else aarg2 =3D WINED3DTA_TEXTURE;=0A=
                 }=0A=
             }=0A=
         }=0A=
 =0A=
-        if(settings->op[i].carg1 =3D=3D WINED3DTA_TEXTURE || =
settings->op[i].carg2 =3D=3D WINED3DTA_TEXTURE || settings->op[i].carg0 =
=3D=3D WINED3DTA_TEXTURE ||=0A=
-           settings->op[i].aarg1 =3D=3D WINED3DTA_TEXTURE || =
settings->op[i].aarg2 =3D=3D WINED3DTA_TEXTURE || settings->op[i].aarg0 =
=3D=3D WINED3DTA_TEXTURE) {=0A=
+        ffp_set_aarg0(&settings->op[i], aarg0);=0A=
+        ffp_set_aarg1(&settings->op[i], aarg1);=0A=
+        ffp_set_aarg2(&settings->op[i], aarg2);=0A=
+        ffp_set_aop(&settings->op[i], aop);=0A=
+=0A=
+        if(carg1 =3D=3D WINED3DTA_TEXTURE || carg2 =3D=3D =
WINED3DTA_TEXTURE || carg0 =3D=3D WINED3DTA_TEXTURE ||=0A=
+           aarg1 =3D=3D WINED3DTA_TEXTURE || aarg2 =3D=3D =
WINED3DTA_TEXTURE || aarg0 =3D=3D WINED3DTA_TEXTURE) {=0A=
             ttff =3D =
stateblock->textureState[i][WINED3DTSS_TEXTURETRANSFORMFLAGS];=0A=
             if(ttff =3D=3D (WINED3DTTFF_PROJECTED | =
WINED3DTTFF_COUNT3)) {=0A=
-                settings->op[i].projected =3D proj_count3;=0A=
+                ffp_set_proj(&settings->op[i], proj_count3);=0A=
             } else if(ttff =3D=3D (WINED3DTTFF_PROJECTED | =
WINED3DTTFF_COUNT4)) {=0A=
-                settings->op[i].projected =3D proj_count4;=0A=
+                ffp_set_proj(&settings->op[i], proj_count4);=0A=
             } else {=0A=
-                settings->op[i].projected =3D proj_none;=0A=
+                ffp_set_proj(&settings->op[i], proj_none);=0A=
             }=0A=
         } else {=0A=
-            settings->op[i].projected =3D proj_none;=0A=
+            ffp_set_proj(&settings->op[i], proj_none);=0A=
         }=0A=
 =0A=
-        settings->op[i].dst =3D =
stateblock->textureState[i][WINED3DTSS_RESULTARG];=0A=
+        ffp_set_dst(&settings->op[i], =
stateblock->textureState[i][WINED3DTSS_RESULTARG]);=0A=
     }=0A=
 =0A=
     /* Clear unsupported stages */=0A=
diff --git a/dlls/wined3d/wined3d_private.h =
b/dlls/wined3d/wined3d_private.h=0A=
index e86bb1e..0d6a08a 100644=0A=
--- a/dlls/wined3d/wined3d_private.h=0A=
+++ b/dlls/wined3d/wined3d_private.h=0A=
@@ -714,25 +714,134 @@ HRESULT tesselate_rectpatch(IWineD3DDeviceImpl =
*This, struct WineD3DRectPatch *p=0A=
 =0A=
 enum projection_types=0A=
 {=0A=
-    proj_none,=0A=
-    proj_count3,=0A=
-    proj_count4=0A=
+    proj_none       =3D 0x0,=0A=
+    proj_count3     =3D 0x1,=0A=
+    proj_count4     =3D 0x2=0A=
 };=0A=
 =0A=
 =
/************************************************************************=
*****=0A=
  * Fixed function pipeline replacements=0A=
  */=0A=
+=0A=
 struct texture_stage_op=0A=
 {=0A=
-    WINED3DTEXTUREOP        cop, aop;=0A=
-    DWORD                   carg1, carg2, carg0;=0A=
-    DWORD                   aarg1, aarg2, aarg0;=0A=
+    DWORD                   texop_1, texop_2;=0A=
     WINED3DFORMAT           color_correction;=0A=
-    DWORD                   tex_type;=0A=
-    DWORD                   dst;=0A=
-    enum projection_types   projected;=0A=
 };=0A=
 =0A=
+enum ffp_textype {=0A=
+    tex_1d =3D 0,=0A=
+    tex_2d =3D 1,=0A=
+    tex_3d =3D 2,=0A=
+    tex_cube =3D 3,=0A=
+    tex_rect =3D 4=0A=
+};=0A=
+=0A=
+/* Fixed function pipeline description setter functions=0A=
+ * cop: color operation=0A=
+ * aop: alpha operation=0A=
+ * carg0, carg1, carg2, aarg0, aarg1, aarg2: color and alpha arguments=0A=
+ * proj: projection type(proj3, proj4, none)=0A=
+ * textype: texture_type(GL_TEXTURE_1D, _2D, _3D, _CUBE, _RECT)=0A=
+ *=0A=
+ * ffp_*(const struct op) returns the values, ffp_set_* sets them. For =
the setters it=0A=
+ * is assumed that the bitfields are empty before setting(clear to 0!), =
and that the=0A=
+ * values that are set fit into the mask sizes=0A=
+ */=0A=
+#define cop_shift       27=0A=
+#define cop_mask        (0x1f << cop_shift)=0A=
+static inline DWORD ffp_cop(const struct texture_stage_op op) { return =
((op.texop_1 & cop_mask) >> cop_shift); }=0A=
+static inline void ffp_set_cop(struct texture_stage_op *op, DWORD cop) {=0A=
+    op->texop_1 |=3D (cop << cop_shift);=0A=
+}=0A=
+#define aop_shift       22=0A=
+#define aop_mask        (0x1f << aop_shift)=0A=
+static inline DWORD ffp_aop(const struct texture_stage_op op) { return =
((op.texop_1 & aop_mask) >> aop_shift); }=0A=
+static inline void ffp_set_aop(struct texture_stage_op *op, DWORD aop) {=0A=
+    op->texop_1 |=3D (aop << aop_shift);=0A=
+}=0A=
+#define carg0_shift     16=0A=
+#define carg0_mask      (0x3f << carg0_shift)=0A=
+static inline DWORD ffp_carg0(const struct texture_stage_op op) { =
return ((op.texop_1 & carg0_mask) >> carg0_shift); }=0A=
+static inline void ffp_set_carg0(struct texture_stage_op *op, DWORD =
arg) {=0A=
+    op->texop_1 |=3D (arg << carg0_shift);=0A=
+}=0A=
+#define carg1_shift     10=0A=
+#define carg1_mask      (0x3f << carg1_shift)=0A=
+static inline DWORD ffp_carg1(const struct texture_stage_op op) { =
return ((op.texop_1 & carg1_mask) >> carg1_shift); }=0A=
+static inline void ffp_set_carg1(struct texture_stage_op *op, DWORD =
arg) {=0A=
+    op->texop_1 |=3D (arg << carg1_shift);=0A=
+}=0A=
+#define carg2_shift     4=0A=
+#define carg2_mask      (0x3f << carg2_shift)=0A=
+static inline DWORD ffp_carg2(const struct texture_stage_op op) { =
return ((op.texop_1 & carg2_mask) >> carg2_shift); }=0A=
+static inline void ffp_set_carg2(struct texture_stage_op *op, DWORD =
arg) {=0A=
+    op->texop_1 |=3D (arg << carg2_shift);=0A=
+}=0A=
+#define textype_shift   1=0A=
+#define textype_mask    (0x3 << textype_shift)=0A=
+static inline DWORD ffp_textype(struct texture_stage_op op) {=0A=
+    switch((op.texop_1 & textype_mask) >> textype_shift) {=0A=
+        case tex_1d: return GL_TEXTURE_1D;=0A=
+        case tex_2d: return GL_TEXTURE_2D;=0A=
+        case tex_3d: return GL_TEXTURE_3D;=0A=
+        case tex_cube: return GL_TEXTURE_CUBE_MAP_ARB;=0A=
+        case tex_rect: return GL_TEXTURE_RECTANGLE_ARB;=0A=
+    }=0A=
+    return -1;=0A=
+}=0A=
+static inline void ffp_set_textype(struct texture_stage_op *op, DWORD =
textype) {=0A=
+    DWORD packaged_textype;=0A=
+    switch(textype) {=0A=
+        case GL_TEXTURE_1D: packaged_textype =3D tex_1d; break;=0A=
+        case GL_TEXTURE_2D: packaged_textype =3D tex_2d; break;=0A=
+        case GL_TEXTURE_3D: packaged_textype =3D tex_3d; break;=0A=
+        case GL_TEXTURE_CUBE_MAP_ARB: packaged_textype =3D tex_cube; =
break;=0A=
+        case GL_TEXTURE_RECTANGLE_ARB: packaged_textype =3D tex_rect; =
break;=0A=
+        default: packaged_textype =3D -1;=0A=
+    }=0A=
+    op->texop_1 |=3D (packaged_textype << textype_shift);=0A=
+}=0A=
+#define ffp_dst_shift   0=0A=
+#define ffp_dst_mask    (0x1 << ffp_dst_shift)=0A=
+static inline DWORD ffp_dst(const struct texture_stage_op op) {=0A=
+    if((op.texop_1 & ffp_dst_mask) >> ffp_dst_shift) {=0A=
+        return WINED3DTA_TEMP;=0A=
+    } else {=0A=
+        return WINED3DTA_CURRENT;=0A=
+    };=0A=
+}=0A=
+static inline void ffp_set_dst(struct texture_stage_op *op, DWORD dst) {=0A=
+    if(dst =3D=3D WINED3DTA_TEMP) {=0A=
+        op->texop_1 |=3D (1 << ffp_dst_shift);=0A=
+    }=0A=
+}=0A=
+=0A=
+#define aarg0_shift     17=0A=
+#define aarg0_mask      (0x3f << aarg0_shift)=0A=
+static inline DWORD ffp_aarg0(const struct texture_stage_op op) { =
return ((op.texop_2 & aarg0_mask) >> aarg0_shift); }=0A=
+static inline void ffp_set_aarg0(struct texture_stage_op *op, DWORD =
arg) {=0A=
+    op->texop_2 |=3D (arg << aarg0_shift);=0A=
+}=0A=
+#define aarg1_shift     11=0A=
+#define aarg1_mask      (0x3f << aarg1_shift)=0A=
+static inline DWORD ffp_aarg1(const struct texture_stage_op op) { =
return ((op.texop_2 & aarg1_mask) >> aarg1_shift); }=0A=
+static inline void ffp_set_aarg1(struct texture_stage_op *op, DWORD =
arg) {=0A=
+    op->texop_2 |=3D (arg << aarg1_shift);=0A=
+}=0A=
+#define aarg2_shift     5=0A=
+#define aarg2_mask      (0x3f << aarg2_shift)=0A=
+static inline DWORD ffp_aarg2(const struct texture_stage_op op) { =
return ((op.texop_2 & aarg2_mask) >> aarg2_shift); }=0A=
+static inline void ffp_set_aarg2(struct texture_stage_op *op, DWORD =
arg) {=0A=
+    op->texop_2 |=3D (arg << aarg2_shift);=0A=
+}=0A=
+#define proj_shift      0=0A=
+#define proj_mask       (0x3 << proj_shift)=0A=
+static inline DWORD ffp_proj(const struct texture_stage_op op) { return =
((op.texop_2 & proj_mask) >> proj_shift); }=0A=
+static inline void ffp_set_proj(struct texture_stage_op *op, DWORD =
proj) {=0A=
+    op->texop_2 |=3D (proj << proj_shift);=0A=
+}=0A=
+=0A=
 struct ffp_settings {=0A=
     struct texture_stage_op     op[MAX_TEXTURES];=0A=
     enum {=0A=
-- =0A=
1.5.4.5=0A=
=0A=

------=_NextPart_000_0028_01C8EFF4.1FAB7C50--




More information about the wine-patches mailing list