Stefan Dösinger : wined3d: Get rid of TMP accesses in texm3x3* instructions.

Alexandre Julliard julliard at winehq.org
Tue May 19 09:23:06 CDT 2009


Module: wine
Branch: master
Commit: ad217029b0348cacfa45ab0d6b656ed431f3d144
URL:    http://source.winehq.org/git/wine.git/?a=commit;h=ad217029b0348cacfa45ab0d6b656ed431f3d144

Author: Stefan Dösinger <stefan at codeweavers.com>
Date:   Thu May  7 19:07:34 2009 +0200

wined3d: Get rid of TMP accesses in texm3x3* instructions.

---

 dlls/wined3d/arb_program_shader.c |   55 +++++++++++++++++++++++-------------
 1 files changed, 35 insertions(+), 20 deletions(-)

diff --git a/dlls/wined3d/arb_program_shader.c b/dlls/wined3d/arb_program_shader.c
index e2b6005..229c323 100644
--- a/dlls/wined3d/arb_program_shader.c
+++ b/dlls/wined3d/arb_program_shader.c
@@ -1294,9 +1294,16 @@ static void pshader_hw_texm3x3pad(const struct wined3d_shader_instruction *ins)
     SHADER_BUFFER *buffer = ins->ctx->buffer;
     SHADER_PARSE_STATE* current_state = &This->baseShader.parse_state;
     char src0_name[50];
+    unsigned int dst;
+
+    /* There are always 2 texm3x3pad instructions followed by one texm3x3[tex,vspec, ...] instruction, with
+     * incrementing ins->dst[0].register_idx numbers. So the pad instruction already knows the final destination
+     * register, and this register is uninitialized(otherwise the assembler complains that it is 'redeclared')
+     */
+    dst = reg + 2 - current_state->current_row;
 
     shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name);
-    shader_addline(buffer, "DP3 TMP.%c, fragment.texcoord[%u], %s;\n", 'x' + current_state->current_row, reg, src0_name);
+    shader_addline(buffer, "DP3 T%u.%c, fragment.texcoord[%u], %s;\n", dst, 'x' + current_state->current_row, reg, src0_name);
     current_state->texcoord_w[current_state->current_row++] = reg;
 }
 
@@ -1309,15 +1316,17 @@ static void pshader_hw_texm3x3tex(const struct wined3d_shader_instruction *ins)
     SHADER_BUFFER *buffer = ins->ctx->buffer;
     SHADER_PARSE_STATE* current_state = &This->baseShader.parse_state;
     char dst_str[50];
+    char dst_reg[8];
     char src0_name[50];
 
+    sprintf(dst_reg, "T%u", reg);
     shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name);
-    shader_addline(buffer, "DP3 TMP.z, fragment.texcoord[%u], %s;\n", reg, src0_name);
+    shader_addline(buffer, "DP3 %s.z, fragment.texcoord[%u], %s;\n", dst_reg, reg, src0_name);
 
     /* Sample the texture using the calculated coordinates */
     shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
     flags = reg < MAX_TEXTURES ? deviceImpl->stateBlock->textureState[reg][WINED3DTSS_TEXTURETRANSFORMFLAGS] : 0;
-    shader_hw_sample(ins, reg, dst_str, "TMP", flags & WINED3DTTFF_PROJECTED, FALSE);
+    shader_hw_sample(ins, reg, dst_str, dst_reg, flags & WINED3DTTFF_PROJECTED, FALSE);
     current_state->current_row = 0;
 }
 
@@ -1331,10 +1340,14 @@ static void pshader_hw_texm3x3vspec(const struct wined3d_shader_instruction *ins
     SHADER_PARSE_STATE* current_state = &This->baseShader.parse_state;
     char dst_str[50];
     char src0_name[50];
+    char dst_reg[8];
 
+    /* Get the dst reg without writemask strings. We know this register is uninitialized, so we can use all
+     * components for temporary data storage
+     */
+    sprintf(dst_reg, "T%u", reg);
     shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name);
-    /* Note: TMP.xy is input here, generated in earlier texm3x3pad instructions */
-    shader_addline(buffer, "DP3 TMP.z, fragment.texcoord[%u], %s;\n", reg, src0_name);
+    shader_addline(buffer, "DP3 %s.z, fragment.texcoord[%u], %s;\n", dst_reg, reg, src0_name);
 
     /* Construct the eye-ray vector from w coordinates */
     shader_addline(buffer, "MOV TB.x, fragment.texcoord[%u].w;\n", current_state->texcoord_w[0]);
@@ -1343,18 +1356,18 @@ static void pshader_hw_texm3x3vspec(const struct wined3d_shader_instruction *ins
 
     /* Calculate reflection vector
      */
-    shader_addline(buffer, "DP3 TMP.w, TMP, TB;\n");
+    shader_addline(buffer, "DP3 %s.w, %s, TB;\n", dst_reg, dst_reg);
     /* The .w is ignored when sampling, so I can use TB.w to calculate dot(N, N) */
-    shader_addline(buffer, "DP3 TB.w, TMP, TMP;\n");
+    shader_addline(buffer, "DP3 TB.w, %s, %s;\n", dst_reg, dst_reg);
     shader_addline(buffer, "RCP TB.w, TB.w;\n");
-    shader_addline(buffer, "MUL TMP.w, TMP.w, TB.w;\n");
-    shader_addline(buffer, "MUL TMP, TMP.w, TMP;\n");
-    shader_addline(buffer, "MAD TMP, coefmul.x, TMP, -TB;\n");
+    shader_addline(buffer, "MUL %s.w, %s.w, TB.w;\n", dst_reg, dst_reg);
+    shader_addline(buffer, "MUL %s, %s.w, %s;\n", dst_reg, dst_reg, dst_reg);
+    shader_addline(buffer, "MAD %s, coefmul.x, %s, -TB;\n", dst_reg, dst_reg);
 
     /* Sample the texture using the calculated coordinates */
     shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
     flags = reg < MAX_TEXTURES ? deviceImpl->stateBlock->textureState[reg][WINED3DTSS_TEXTURETRANSFORMFLAGS] : 0;
-    shader_hw_sample(ins, reg, dst_str, "TMP", flags & WINED3DTTFF_PROJECTED, FALSE);
+    shader_hw_sample(ins, reg, dst_str, dst_reg, flags & WINED3DTTFF_PROJECTED, FALSE);
     current_state->current_row = 0;
 }
 
@@ -1369,11 +1382,13 @@ static void pshader_hw_texm3x3spec(const struct wined3d_shader_instruction *ins)
     char dst_str[50];
     char src0_name[50];
     char src1_name[50];
+    char dst_reg[8];
 
     shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name);
     shader_arb_get_src_param(ins, &ins->src[0], 1, src1_name);
     /* Note: TMP.xy is input here, generated by two texm3x3pad instructions */
-    shader_addline(buffer, "DP3 TMP.z, fragment.texcoord[%u], %s;\n", reg, src0_name);
+    sprintf(dst_reg, "T%u", reg);
+    shader_addline(buffer, "DP3 %s.z, fragment.texcoord[%u], %s;\n", dst_reg, reg, src0_name);
 
     /* Calculate reflection vector.
      *
@@ -1383,17 +1398,17 @@ static void pshader_hw_texm3x3spec(const struct wined3d_shader_instruction *ins)
      *
      * Which normalizes the normal vector
      */
-    shader_addline(buffer, "DP3 TMP.w, TMP, %s;\n", src1_name);
-    shader_addline(buffer, "DP3 TC.w, TMP, TMP;\n");
+    shader_addline(buffer, "DP3 %s.w, %s, %s;\n", dst_reg, dst_reg, src1_name);
+    shader_addline(buffer, "DP3 TC.w, %s, %s;\n", dst_reg, dst_reg);
     shader_addline(buffer, "RCP TC.w, TC.w;\n");
-    shader_addline(buffer, "MUL TMP.w, TMP.w, TC.w;\n");
-    shader_addline(buffer, "MUL TMP, TMP.w, TMP;\n");
-    shader_addline(buffer, "MAD TMP, coefmul.x, TMP, -%s;\n", src1_name);
+    shader_addline(buffer, "MUL %s.w, %s.w, TC.w;\n", dst_reg, dst_reg);
+    shader_addline(buffer, "MUL %s, %s.w, %s;\n", dst_reg, dst_reg, dst_reg);
+    shader_addline(buffer, "MAD %s, coefmul.x, %s, -%s;\n", dst_reg, dst_reg, src1_name);
 
     /* Sample the texture using the calculated coordinates */
     shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
     flags = reg < MAX_TEXTURES ? deviceImpl->stateBlock->textureState[reg][WINED3DTSS_TEXTURETRANSFORMFLAGS] : 0;
-    shader_hw_sample(ins, reg, dst_str, "TMP", flags & WINED3DTTFF_PROJECTED, FALSE);
+    shader_hw_sample(ins, reg, dst_str, dst_reg, flags & WINED3DTTFF_PROJECTED, FALSE);
     current_state->current_row = 0;
 }
 
@@ -1469,8 +1484,8 @@ static void pshader_hw_texm3x3(const struct wined3d_shader_instruction *ins)
 
     shader_arb_get_dst_param(ins, dst, dst_str);
     shader_arb_get_src_param(ins, &ins->src[0], 0, src0);
-    shader_addline(buffer, "DP3 TMP.z, fragment.texcoord[%u], %s;\n", dst->reg.idx, src0);
-    shader_addline(buffer, "MOV %s, TMP;\n", dst_str);
+    shader_addline(buffer, "DP3 T%u.z, fragment.texcoord[%u], %s;\n", dst->reg.idx, dst->reg.idx, src0);
+    shader_addline(buffer, "MOV %s, T%u;\n", dst_str, dst->reg.idx);
 }
 
 /** Process the WINED3DSIO_TEXM3X2DEPTH instruction in ARB:




More information about the wine-cvs mailing list