[WINED3D 6/9] Add support for shader model 3.0 I/O registers.

Ivan Gyurdiev ivg231 at gmail.com
Mon Jun 12 01:57:07 CDT 2006


SM 3.0 can pack multiple "semantics" into 12 generic input/output registers.

To support that, define temporaries called IN and OUT, and use those as 
the output registers. At the end of the vshader, unpack the OUT temps 
into the proper GL variables. At the beginning of the pshader, pack the 
GL variables back into 12 IN registers.

-------------- next part --------------
---

 dlls/wined3d/baseshader.c      |   15 +++++
 dlls/wined3d/glsl_shader.c     |  132 ++++++++++++++++++++++++++++++++++++++--
 dlls/wined3d/pixelshader.c     |   10 +++
 dlls/wined3d/vertexshader.c    |    9 +++
 dlls/wined3d/wined3d_private.h |   15 +++++
 5 files changed, 176 insertions(+), 5 deletions(-)

1be206c4ac17f0c9ac3bb4beac1d45d7c2ab18ed
diff --git a/dlls/wined3d/baseshader.c b/dlls/wined3d/baseshader.c
index 6f53ce2..7877bcd 100644
--- a/dlls/wined3d/baseshader.c
+++ b/dlls/wined3d/baseshader.c
@@ -343,10 +343,13 @@ void shader_get_registers_used(
 
                 if (!pshader)
                     reg_maps->attributes[regnum] = 1;
+                else
+                    reg_maps->packed_input[regnum] = 1;
 
                 shader_parse_decl_usage(reg_maps->semantics_in, usage, param);
 
             } else if (D3DSPR_OUTPUT == regtype) {
+                reg_maps->packed_output[regnum] = 1;
                 shader_parse_decl_usage(reg_maps->semantics_out, usage, param);
             }
 
@@ -723,6 +726,18 @@ void shader_generate_glsl_declarations(
         shader_addline(buffer, "vec4 T%lu = gl_TexCoord[%lu];\n", i, i);
     }
 
+    /* Declare input register temporaries */
+    for (i=0; i < This->baseShader.limits.packed_input; i++) {
+        if (reg_maps->packed_input[i])
+            shader_addline(buffer, "vec4 IN%lu;\n", i);
+    }
+
+    /* Declare output register temporaries */
+    for (i = 0; i < This->baseShader.limits.packed_output; i++) {
+        if (reg_maps->packed_output[i])
+            shader_addline(buffer, "vec4 OUT%lu;\n", i);
+    }
+
     /* Declare temporary variables */
     for(i = 0; i < This->baseShader.limits.temporary; i++) {
         if (reg_maps->temporary[i])
diff --git a/dlls/wined3d/glsl_shader.c b/dlls/wined3d/glsl_shader.c
index 9cc3f2b..017fe5e 100644
--- a/dlls/wined3d/glsl_shader.c
+++ b/dlls/wined3d/glsl_shader.c
@@ -177,10 +177,14 @@ static void shader_glsl_get_register_nam
     break;
     case D3DSPR_INPUT:
         if (pshader) {
-            if (reg==0) {
-                strcpy(tmpStr, "gl_Color");
-            } else {
-                strcpy(tmpStr, "gl_SecondaryColor");
+            /* Pixel shaders >= 3.0 */
+            if (D3DSHADER_VERSION_MAJOR(This->baseShader.hex_version) >= 3)
+                sprintf(tmpStr, "IN%lu", reg);
+             else {
+                if (reg==0)
+                    strcpy(tmpStr, "gl_Color");
+                else
+                    strcpy(tmpStr, "gl_SecondaryColor");
             }
         } else {
             IWineD3DVertexShaderImpl *vshader = (IWineD3DVertexShaderImpl*) arg->shader;
@@ -263,7 +267,11 @@ static void shader_glsl_get_register_nam
         }
     break;
     case D3DSPR_TEXCRDOUT:
-        sprintf(tmpStr, "gl_TexCoord[%lu]", reg);
+        /* Vertex shaders >= 3.0: D3DSPR_OUTPUT */
+        if (D3DSHADER_VERSION_MAJOR(This->baseShader.hex_version) >= 3)
+            sprintf(tmpStr, "OUT%lu", reg);
+        else
+            sprintf(tmpStr, "gl_TexCoord[%lu]", reg);
     break;
     default:
         FIXME("Unhandled register name Type(%ld)\n", regtype);
@@ -795,3 +803,117 @@ void pshader_glsl_texm3x2tex(SHADER_OPCO
     shader_addline(buffer, "tmp0.y = dot(vec3(T%lu), vec3(%s));\n", reg, src0_str);
     shader_addline(buffer, "T%lu = texture2D(mytex%lu, tmp0.st);\n", reg, reg);
 }
+
+void pshader_glsl_input_pack(
+   SHADER_BUFFER* buffer,
+   DWORD* semantics_in) {
+
+   unsigned int i;
+
+   for (i = 0; i < WINED3DSHADERDECLUSAGE_MAX_USAGE; i++) {
+
+       DWORD reg = semantics_in[i];
+       unsigned int regnum = reg & D3DSP_REGNUM_MASK;
+       char reg_mask[6];
+
+       /* Uninitialized */
+       if (!reg) continue;
+
+       shader_glsl_get_output_register_swizzle(reg, reg_mask);
+
+       switch(i) {
+
+           case WINED3DSHADERDECLUSAGE_DIFFUSE:
+               shader_addline(buffer, "IN%lu%s = vec4(gl_Color)%s;\n",
+                   regnum, reg_mask, reg_mask);
+               break;
+
+           case WINED3DSHADERDECLUSAGE_SPECULAR:
+               shader_addline(buffer, "IN%lu%s = vec4(gl_SecondaryColor)%s;\n",
+                   regnum, reg_mask, reg_mask);
+               break;
+
+           case WINED3DSHADERDECLUSAGE_TEXCOORD0:
+           case WINED3DSHADERDECLUSAGE_TEXCOORD1:
+           case WINED3DSHADERDECLUSAGE_TEXCOORD2:
+           case WINED3DSHADERDECLUSAGE_TEXCOORD3:
+           case WINED3DSHADERDECLUSAGE_TEXCOORD4:
+           case WINED3DSHADERDECLUSAGE_TEXCOORD5:
+           case WINED3DSHADERDECLUSAGE_TEXCOORD6:
+           case WINED3DSHADERDECLUSAGE_TEXCOORD7:
+               shader_addline(buffer, "IN%lu%s = vec4(gl_TexCoord[%lu])%s;\n",
+                   regnum, reg_mask, i - WINED3DSHADERDECLUSAGE_TEXCOORD0, reg_mask );
+               break;
+
+           case WINED3DSHADERDECLUSAGE_FOG:
+               shader_addline(buffer, "IN%lu%s = vec4(gl_FogFragCoord)%s;\n",
+                   regnum, reg_mask, reg_mask);
+               break;
+
+           default:
+               shader_addline(buffer, "IN%lu%s = vec4(unsupported_input)%s;\n",
+                   regnum, reg_mask, reg_mask);
+        }
+    }
+}
+
+/*********************************************
+ * Vertex Shader Specific Code begins here
+ ********************************************/
+
+void vshader_glsl_output_unpack(
+   SHADER_BUFFER* buffer,
+   DWORD* semantics_out) {
+
+   unsigned int i;
+
+   for (i = 0; i < WINED3DSHADERDECLUSAGE_MAX_USAGE; i++) {
+
+       DWORD reg = semantics_out[i];
+       unsigned int regnum = reg & D3DSP_REGNUM_MASK;
+       char reg_mask[6];
+
+       /* Uninitialized */
+       if (!reg) continue;
+
+       shader_glsl_get_output_register_swizzle(reg, reg_mask);
+
+       switch(i) {
+
+           case WINED3DSHADERDECLUSAGE_DIFFUSE:
+               shader_addline(buffer, "gl_FrontColor%s = OUT%lu%s;\n", reg_mask, regnum, reg_mask);
+               break;
+
+           case WINED3DSHADERDECLUSAGE_SPECULAR:
+               shader_addline(buffer, "gl_FrontSecondaryColor%s = OUT%lu%s;\n", reg_mask, regnum, reg_mask);
+               break;
+
+           case WINED3DSHADERDECLUSAGE_POSITION:
+               shader_addline(buffer, "gl_Position%s = OUT%lu%s;\n", reg_mask, regnum, reg_mask);
+               break;
+
+           case WINED3DSHADERDECLUSAGE_TEXCOORD0:
+           case WINED3DSHADERDECLUSAGE_TEXCOORD1:
+           case WINED3DSHADERDECLUSAGE_TEXCOORD2:
+           case WINED3DSHADERDECLUSAGE_TEXCOORD3:
+           case WINED3DSHADERDECLUSAGE_TEXCOORD4:
+           case WINED3DSHADERDECLUSAGE_TEXCOORD5:
+           case WINED3DSHADERDECLUSAGE_TEXCOORD6:
+           case WINED3DSHADERDECLUSAGE_TEXCOORD7:
+               shader_addline(buffer, "gl_TexCoord[%lu]%s = OUT%lu%s;\n",
+                   i - WINED3DSHADERDECLUSAGE_TEXCOORD0, reg_mask, regnum, reg_mask);
+               break;
+
+           case WINED3DSHADERDECLUSAGE_PSIZE:
+               shader_addline(buffer, "gl_PointSize = OUT%lu.x;\n", regnum);
+               break;
+
+           case WINED3DSHADERDECLUSAGE_FOG:
+               shader_addline(buffer, "gl_FogFragCoord%s = OUT%lu%s;\n", reg_mask, regnum, reg_mask);
+               break;
+
+           default:
+               shader_addline(buffer, "unsupported_output%s = OUT%lu%s;\n", reg_mask, regnum, reg_mask);
+      }
+   }
+}
diff --git a/dlls/wined3d/pixelshader.c b/dlls/wined3d/pixelshader.c
index cf01e50..9b83e62 100644
--- a/dlls/wined3d/pixelshader.c
+++ b/dlls/wined3d/pixelshader.c
@@ -936,6 +936,7 @@ static void pshader_set_limits(
 
       This->baseShader.limits.attributes = 0;
       This->baseShader.limits.address = 0;
+      This->baseShader.limits.packed_output = 0;
 
       switch (This->baseShader.hex_version) {
           case D3DPS_VERSION(1,0):
@@ -947,6 +948,7 @@ static void pshader_set_limits(
                    This->baseShader.limits.constant_int = 0;
                    This->baseShader.limits.constant_bool = 0;
                    This->baseShader.limits.texture = 4;
+                   This->baseShader.limits.packed_input = 0;
                    break;
 
           case D3DPS_VERSION(1,4):
@@ -955,6 +957,7 @@ static void pshader_set_limits(
                    This->baseShader.limits.constant_int = 0;
                    This->baseShader.limits.constant_bool = 0;
                    This->baseShader.limits.texture = 6;
+                   This->baseShader.limits.packed_input = 0;
                    break;
                
           /* FIXME: temporaries must match D3DPSHADERCAPS2_0.NumTemps */ 
@@ -965,6 +968,7 @@ static void pshader_set_limits(
                    This->baseShader.limits.constant_int = 16;
                    This->baseShader.limits.constant_bool = 16;
                    This->baseShader.limits.texture = 8;
+                   This->baseShader.limits.packed_input = 0;
                    break;
 
           case D3DPS_VERSION(3,0):
@@ -973,6 +977,7 @@ static void pshader_set_limits(
                    This->baseShader.limits.constant_int = 16;
                    This->baseShader.limits.constant_bool = 16;
                    This->baseShader.limits.texture = 0;
+                   This->baseShader.limits.packed_input = 12;
                    break;
 
           default: This->baseShader.limits.temporary = 32;
@@ -980,6 +985,7 @@ static void pshader_set_limits(
                    This->baseShader.limits.constant_int = 0;
                    This->baseShader.limits.constant_bool = 0;
                    This->baseShader.limits.texture = 8;
+                   This->baseShader.limits.packed_input = 0;
                    FIXME("Unrecognized pixel shader version %#lx\n", 
                        This->baseShader.hex_version);
       }
@@ -1330,6 +1336,10 @@ #endif
         /* Base Declarations */
         shader_generate_glsl_declarations( (IWineD3DBaseShader*) This, &reg_maps, &buffer);
 
+        /* Pack 3.0 inputs */
+        if (This->baseShader.hex_version >= D3DPS_VERSION(3,0))
+            pshader_glsl_input_pack(&buffer, semantics_in);
+
         /* Base Shader Body */
         shader_generate_main( (IWineD3DBaseShader*) This, &buffer, &reg_maps, pFunction);
 
diff --git a/dlls/wined3d/vertexshader.c b/dlls/wined3d/vertexshader.c
index c116595..04a8337 100644
--- a/dlls/wined3d/vertexshader.c
+++ b/dlls/wined3d/vertexshader.c
@@ -714,6 +714,7 @@ static void vshader_set_limits(
 
       This->baseShader.limits.texture = 0;
       This->baseShader.limits.attributes = 16;
+      This->baseShader.limits.packed_input = 0;
 
       /* Must match D3DCAPS9.MaxVertexShaderConst: at least 256 for vs_2_0 */
       This->baseShader.limits.constant_float = WINED3D_VSHADER_MAX_CONSTANTS;
@@ -725,6 +726,7 @@ static void vshader_set_limits(
                    This->baseShader.limits.constant_bool = 0;
                    This->baseShader.limits.constant_int = 0;
                    This->baseShader.limits.address = 1;
+                   This->baseShader.limits.packed_output = 0;
                    break;
       
           case D3DVS_VERSION(2,0):
@@ -733,6 +735,7 @@ static void vshader_set_limits(
                    This->baseShader.limits.constant_bool = 16;
                    This->baseShader.limits.constant_int = 16;
                    This->baseShader.limits.address = 1;
+                   This->baseShader.limits.packed_output = 0;
                    break;
 
           case D3DVS_VERSION(3,0):
@@ -740,12 +743,14 @@ static void vshader_set_limits(
                    This->baseShader.limits.constant_bool = 32;
                    This->baseShader.limits.constant_int = 32;
                    This->baseShader.limits.address = 1;
+                   This->baseShader.limits.packed_output = 12;
                    break;
 
           default: This->baseShader.limits.temporary = 12;
                    This->baseShader.limits.constant_bool = 0;
                    This->baseShader.limits.constant_int = 0;
                    This->baseShader.limits.address = 1;
+                   This->baseShader.limits.packed_output = 0;
                    FIXME("Unrecognized vertex shader version %#lx\n",
                        This->baseShader.hex_version);
       }
@@ -870,6 +875,10 @@ #endif
         /* Base Shader Body */
         shader_generate_main( (IWineD3DBaseShader*) This, &buffer, &reg_maps, pFunction);
 
+        /* Unpack 3.0 outputs */
+        if (This->baseShader.hex_version >= D3DVS_VERSION(3,0))
+            vshader_glsl_output_unpack(&buffer, semantics_out);
+
         shader_addline(&buffer, "}\n\0");
 
         TRACE("Compiling shader object %u\n", shader_obj);
diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h
index 91c0082..a53bc1c 100644
--- a/dlls/wined3d/wined3d_private.h
+++ b/dlls/wined3d/wined3d_private.h
@@ -1252,6 +1252,8 @@ struct glsl_shader_prog_link {
 #define MAX_REG_ADDR 1
 #define MAX_REG_TEMP 32
 #define MAX_REG_TEXCRD 8
+#define MAX_REG_INPUT 12
+#define MAX_REG_OUTPUT 12
 #define MAX_ATTRIBS 16
 #define MAX_CONST_F 256
 
@@ -1260,6 +1262,8 @@ typedef struct shader_reg_maps {
     char texcoord[MAX_REG_TEXCRD];          /* pixel < 3.0 */
     char temporary[MAX_REG_TEMP];           /* pixel, vertex */
     char address[MAX_REG_ADDR];             /* vertex */
+    char packed_input[MAX_REG_INPUT];       /* pshader >= 3.0 */
+    char packed_output[MAX_REG_OUTPUT];     /* vertex >= 3.0 */
     char attributes[MAX_ATTRIBS];           /* vertex */
 
     char constantsF[MAX_CONST_F];           /* pixel, vertex */
@@ -1308,6 +1312,8 @@ typedef struct SHADER_LIMITS {
     unsigned int constant_float;
     unsigned int constant_bool;
     unsigned int address;
+    unsigned int packed_output;
+    unsigned int packed_input;
     unsigned int attributes;
 } SHADER_LIMITS;
 
@@ -1347,11 +1353,20 @@ extern void shader_glsl_cnd(SHADER_OPCOD
 extern void shader_glsl_compare(SHADER_OPCODE_ARG* arg);
 extern void shader_glsl_def(SHADER_OPCODE_ARG* arg);
 extern void shader_glsl_cmp(SHADER_OPCODE_ARG* arg);
+
 /** GLSL Pixel Shader Prototypes */
 extern void pshader_glsl_tex(SHADER_OPCODE_ARG* arg);
 extern void pshader_glsl_texcoord(SHADER_OPCODE_ARG* arg);
 extern void pshader_glsl_texm3x2pad(SHADER_OPCODE_ARG* arg);
 extern void pshader_glsl_texm3x2tex(SHADER_OPCODE_ARG* arg);
+extern void pshader_glsl_input_pack(
+   SHADER_BUFFER* buffer,
+   DWORD* semantics_out);
+
+/** GLSL Vertex Shader Prototypes */
+extern void vshader_glsl_output_unpack(
+   SHADER_BUFFER* buffer,
+   DWORD* semantics_out);
 
 /*****************************************************************************
  * IDirect3DBaseShader implementation structure
-- 
1.3.3



More information about the wine-patches mailing list