Stefan Dösinger : wined3d: Implement the nrm instruction in arb.

Alexandre Julliard julliard at winehq.org
Thu Sep 27 09:27:27 CDT 2007


Module: wine
Branch: master
Commit: daf2290ea4d92a2e4f11f16ddc4a69cb9e1ae064
URL:    http://source.winehq.org/git/wine.git/?a=commit;h=daf2290ea4d92a2e4f11f16ddc4a69cb9e1ae064

Author: Stefan Dösinger <stefan at codeweavers.com>
Date:   Wed Sep  5 20:09:06 2007 +0200

wined3d: Implement the nrm instruction in arb.

---

 dlls/wined3d/arb_program_shader.c |   22 ++++++++++++++++++++++
 dlls/wined3d/baseshader.c         |    5 ++++-
 dlls/wined3d/pixelshader.c        |   11 +----------
 dlls/wined3d/vertexshader.c       |   16 ++++++----------
 dlls/wined3d/wined3d_private.h    |    4 ++++
 5 files changed, 37 insertions(+), 21 deletions(-)

diff --git a/dlls/wined3d/arb_program_shader.c b/dlls/wined3d/arb_program_shader.c
index 4d7dd58..8bc458f 100644
--- a/dlls/wined3d/arb_program_shader.c
+++ b/dlls/wined3d/arb_program_shader.c
@@ -1477,6 +1477,28 @@ void vshader_hw_rsq_rcp(SHADER_OPCODE_ARG* arg) {
     shader_addline(buffer, "%s;\n", tmpLine);
 }
 
+void shader_hw_nrm(SHADER_OPCODE_ARG* arg) {
+    SHADER_BUFFER* buffer = arg->buffer;
+    char dst_name[50];
+    char src_name[50];
+    char dst_wmask[20];
+    DWORD shift = (arg->dst & WINED3DSP_DSTSHIFT_MASK) >> WINED3DSP_DSTSHIFT_SHIFT;
+    BOOL sat = (arg->dst & WINED3DSP_DSTMOD_MASK) & WINED3DSPDM_SATURATE;
+
+    pshader_get_register_name(arg->dst, dst_name);
+    shader_arb_get_write_mask(arg, arg->dst, dst_wmask);
+
+    pshader_gen_input_modifier_line(buffer, arg->src[0], 0, src_name);
+    shader_addline(buffer, "DP3 TMP, %s, %s;\n", src_name, src_name);
+    shader_addline(buffer, "RSQ TMP, TMP.x;\n");
+    /* dst.w = src[0].w * 1 / (src.x^2 + src.y^2 + src.z^2)^(1/2) according to msdn*/
+    shader_addline(buffer, "MUL%s %s%s, %s, TMP;\n", sat ? "_SAT" : "", dst_name, dst_wmask,
+                   src_name);
+
+    if (shift != 0)
+        pshader_gen_output_modifier_line(buffer, FALSE, dst_wmask, shift, dst_name);
+}
+
 /* TODO: merge with pixel shader */
 /* Map the opcode 1-to-1 to the GL code */
 void vshader_hw_map2gl(SHADER_OPCODE_ARG* arg) {
diff --git a/dlls/wined3d/baseshader.c b/dlls/wined3d/baseshader.c
index df9c3e5..555929c 100644
--- a/dlls/wined3d/baseshader.c
+++ b/dlls/wined3d/baseshader.c
@@ -381,6 +381,9 @@ HRESULT shader_get_registers_used(
                     }
                 }
             }
+            if(WINED3DSIO_NRM  == curOpcode->opcode) {
+                reg_maps->usesnrm = 1;
+            }
 
             /* This will loop over all the registers and try to
              * make a bitmask of the ones we're interested in. 
@@ -416,7 +419,7 @@ HRESULT shader_get_registers_used(
 
                 else if (WINED3DSPR_RASTOUT == regtype && reg == 1)
                     reg_maps->fog = 1;
-             }
+            }
         }
     }
 
diff --git a/dlls/wined3d/pixelshader.c b/dlls/wined3d/pixelshader.c
index 10fb732..0c18b26 100644
--- a/dlls/wined3d/pixelshader.c
+++ b/dlls/wined3d/pixelshader.c
@@ -177,16 +177,7 @@ CONST SHADER_OPCODE IWineD3DPixelShaderImpl_shader_ins[] = {
     {WINED3DSIO_CMP,  "cmp",  NULL, 1, 4, pshader_hw_cmp, shader_glsl_cmp, WINED3DPS_VERSION(1,2), WINED3DPS_VERSION(3,0)},
     {WINED3DSIO_POW,  "pow",  "POW", 1, 3, pshader_hw_map2gl, shader_glsl_pow, 0, 0},
     {WINED3DSIO_CRS,  "crs",  "XPD", 1, 3, pshader_hw_map2gl, shader_glsl_cross, 0, 0},
-    /* TODO: xyz normalise can be performed as VS_ARB using one temporary register,
-        DP3 tmp , vec, vec;
-        RSQ tmp, tmp.x;
-        MUL vec.xyz, vec, tmp;
-    but I think this is better because it accounts for w properly.
-        DP3 tmp , vec, vec;
-        RSQ tmp, tmp.x;
-        MUL vec, vec, tmp;
-    */
-    {WINED3DSIO_NRM,      "nrm",      NULL, 1, 2, NULL, shader_glsl_map2gl, 0, 0},
+    {WINED3DSIO_NRM,      "nrm",      NULL, 1, 2, shader_hw_nrm, shader_glsl_map2gl, 0, 0},
     {WINED3DSIO_SINCOS,   "sincos",   NULL, 1, 4, NULL, shader_glsl_sincos, WINED3DPS_VERSION(2,0), WINED3DPS_VERSION(2,1)},
     {WINED3DSIO_SINCOS,   "sincos",   NULL, 1, 2, NULL, shader_glsl_sincos, WINED3DPS_VERSION(3,0), -1},
     {WINED3DSIO_DP2ADD,   "dp2add",   NULL, 1, 4, pshader_hw_dp2add, pshader_glsl_dp2add, WINED3DPS_VERSION(2,0), -1},
diff --git a/dlls/wined3d/vertexshader.c b/dlls/wined3d/vertexshader.c
index 3f8f5bf..8564c06 100644
--- a/dlls/wined3d/vertexshader.c
+++ b/dlls/wined3d/vertexshader.c
@@ -114,16 +114,7 @@ CONST SHADER_OPCODE IWineD3DVertexShaderImpl_shader_ins[] = {
         RCP tmp, vec
         MUL out, tmp, vec*/
     {WINED3DSIO_SGN,  "sgn",  NULL,  1, 2, NULL,                shader_glsl_map2gl, 0, 0},
-    /* TODO: xyz normalise can be performed as VS_ARB using one temporary register,
-        DP3 tmp , vec, vec;
-        RSQ tmp, tmp.x;
-        MUL vec.xyz, vec, tmp;
-    but I think this is better because it accounts for w properly.
-        DP3 tmp , vec, vec;
-        RSQ tmp, tmp.x;
-        MUL vec, vec, tmp;
-    */
-    {WINED3DSIO_NRM,    "nrm",      NULL, 1, 2, NULL, shader_glsl_map2gl, 0, 0},
+    {WINED3DSIO_NRM,    "nrm",      NULL, 1, 2, shader_hw_nrm, shader_glsl_map2gl, 0, 0},
     {WINED3DSIO_SINCOS, "sincos",   NULL, 1, 4, NULL, shader_glsl_sincos, WINED3DVS_VERSION(2,0), WINED3DVS_VERSION(2,1)},
     {WINED3DSIO_SINCOS, "sincos",   NULL, 1, 2, NULL, shader_glsl_sincos, WINED3DVS_VERSION(3,0), -1},
     /* Matrix */
@@ -382,6 +373,11 @@ static VOID IWineD3DVertexShaderImpl_GenerateShader(
             This->baseShader.limits.constant_float = 
                 min(95, This->baseShader.limits.constant_float);
 
+        /* Some instructions need a temporary register. Add it if needed, but only if it is really needed */
+        if(reg_maps->usesnrm) {
+            shader_addline(&buffer, "TEMP TMP;\n");
+        }
+
         /* Base Declarations */
         shader_generate_arb_declarations( (IWineD3DBaseShader*) This, reg_maps, &buffer, &GLINFO_LOCATION);
 
diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h
index 70d44e4..a5aa8e3 100644
--- a/dlls/wined3d/wined3d_private.h
+++ b/dlls/wined3d/wined3d_private.h
@@ -1652,6 +1652,7 @@ typedef struct shader_reg_maps {
      * Use 0 as default (bit 31 is always 1 on a valid token) */
     DWORD samplers[max(MAX_FRAGMENT_SAMPLERS, MAX_VERTEX_SAMPLERS)];
     char bumpmat, luminanceparams;
+    char usesnrm;
 
     /* Whether or not a loop is used in this shader */
     char loop;
@@ -1788,6 +1789,9 @@ extern void pshader_hw_texm3x3(SHADER_OPCODE_ARG* arg);
 extern void pshader_hw_texm3x2depth(SHADER_OPCODE_ARG* arg);
 extern void pshader_hw_dp2add(SHADER_OPCODE_ARG* arg);
 
+/* ARB vertex / pixel shader common prototypes */
+extern void shader_hw_nrm(SHADER_OPCODE_ARG* arg);
+
 /* ARB vertex shader prototypes */
 extern void vshader_hw_map2gl(SHADER_OPCODE_ARG* arg);
 extern void vshader_hw_mnxn(SHADER_OPCODE_ARG* arg);




More information about the wine-cvs mailing list