[D3D8] Implemented Pixel Shaders using ARB_fragment_program

Christian Costa titan.costa at wanadoo.fr
Sat May 8 06:42:29 CDT 2004


Hi,

Here is a patch that implements Pixel Shaders using the 
ARB_fragment_program extension.
I used the NV Effects Browser app (thx Raphael) and several effects for 
testing.
Both effects :
- Brushed Metal2 (2.0)
- Dot3 Bump Diffuse and Specular (2.0)
seem to work fine.

TODO:
- add remaining missing instructions
- handle properly texture target (2D, CUBEMAP, ...)
- optimize code generation

Note:
Pixel shaders are disabled by default and can be enabled through the 
registry.

Changelog:
Implemented Pixel Shaders using ARB_fragment_program extension.

Christian Costa   titan.costa at wanadoo.fr

-------------- next part --------------
Index: d3d8_main.c
===================================================================
RCS file: /home/wine/wine/dlls/d3d8/d3d8_main.c,v
retrieving revision 1.16
diff -u -r1.16 d3d8_main.c
--- d3d8_main.c	20 Apr 2004 00:28:48 -0000	1.16
+++ d3d8_main.c	8 May 2004 09:19:23 -0000
@@ -35,7 +35,8 @@
 int num_lock = 0;
 void (*wine_tsx11_lock_ptr)(void) = NULL;
 void (*wine_tsx11_unlock_ptr)(void) = NULL;
-int vs_mode = VS_HW; /* Hardware by default */
+int vs_mode = VS_HW;   /* Hardware by default */
+int ps_mode = PS_NONE; /* Disabled by default */
 
 HRESULT WINAPI D3D8GetSWInfo(void)
 {
@@ -81,22 +82,34 @@
            wine_tsx11_lock_ptr   = (void *)GetProcAddress( mod, "wine_tsx11_lock" );
            wine_tsx11_unlock_ptr = (void *)GetProcAddress( mod, "wine_tsx11_unlock" );
        }
-       if ( !RegOpenKeyA( HKEY_LOCAL_MACHINE, "Software\\Wine\\Direct3D", &hkey) &&
-            !RegQueryValueExA( hkey, "VertexShaderMode", 0, NULL, buffer, &size) )
+       if ( !RegOpenKeyA( HKEY_LOCAL_MACHINE, "Software\\Wine\\Direct3D", &hkey) )
        {
-           if (!strcmp(buffer,"none"))
+           if ( !RegQueryValueExA( hkey, "VertexShaderMode", 0, NULL, buffer, &size) )
            {
-               TRACE("Disable vertex shader\n");
-               vs_mode = VS_NONE;
-	   }
-	   else if (!strcmp(buffer,"emulation"))
+               if (!strcmp(buffer,"none"))
+               {
+                   TRACE("Disable vertex shaders\n");
+                   vs_mode = VS_NONE;
+	       }
+	       else if (!strcmp(buffer,"emulation"))
+               {
+                   TRACE("Force SW vertex shaders\n");
+                   vs_mode = VS_SW;
+               }
+           }
+           if ( !RegQueryValueExA( hkey, "PixelShaderMode", 0, NULL, buffer, &size) )
            {
-               TRACE("Force SW vertex shader\n");
-               vs_mode = VS_SW;
+               if (!strcmp(buffer,"enabled"))
+               {
+                   TRACE("Allow pixel shaders\n");
+                   ps_mode = PS_HW;
+	       }
            }
        }
        if (vs_mode == VS_HW)
-           FIXME("Allow HW vertex shader\n");
+           TRACE("Allow HW vertex shaders\n");
+       if (ps_mode == PS_NONE)
+           TRACE("Disable pixel shaders\n");
     }
     return TRUE;
 }
Index: d3d8_private.h
===================================================================
RCS file: /home/wine/wine/dlls/d3d8/d3d8_private.h,v
retrieving revision 1.52
diff -u -r1.52 d3d8_private.h
--- d3d8_private.h	29 Apr 2004 00:20:18 -0000	1.52
+++ d3d8_private.h	8 May 2004 09:19:34 -0000
@@ -72,6 +72,10 @@
 #define VS_HW   1
 #define VS_SW   2
 
+extern int ps_mode;
+#define PS_NONE 0
+#define PS_HW   1
+
 /* Device caps */
 #define MAX_PALETTES      256
 #define MAX_STREAMS       16
@@ -1262,6 +1266,10 @@
   DWORD* function;
   UINT functionLength;
   DWORD version;
+
+  /** fields for hw pixel shader use */
+  GLuint  prgId;
+
   /* run time datas */
   PSHADERDATA8* data;
   PSHADERINPUTDATA8 input;
@@ -1270,6 +1278,7 @@
 
 /* exported Interfaces */
 extern HRESULT WINAPI IDirect3DPixelShaderImpl_GetFunction(IDirect3DPixelShaderImpl* This, VOID* pData, UINT* pSizeOfData);
+extern HRESULT WINAPI IDirect3DPixelShaderImpl_SetConstantF(IDirect3DPixelShaderImpl* This, UINT StartRegister, CONST FLOAT* pConstantData, UINT Vector4fCount);
 /* internal Interfaces */
 extern DWORD WINAPI IDirect3DPixelShaderImpl_GetVersion(IDirect3DPixelShaderImpl* This);
 /* temporary internal Interfaces */
Index: d3dcore_gl.h
===================================================================
RCS file: /home/wine/wine/dlls/d3d8/d3dcore_gl.h,v
retrieving revision 1.14
diff -u -r1.14 d3dcore_gl.h
--- d3dcore_gl.h	29 Apr 2004 00:20:18 -0000	1.14
+++ d3dcore_gl.h	8 May 2004 09:19:41 -0000
@@ -357,6 +357,25 @@
 typedef void (APIENTRY * PGLFNGETVERTEXATTRIBIVARBPROC) (GLuint index, GLenum pname, GLint *params);
 typedef void (APIENTRY * PGLFNGETVERTEXATTRIBPOINTERVARBPROC) (GLuint index, GLenum pname, GLvoid* *pointer);
 typedef GLboolean (APIENTRY * PGLFNISPROGRAMARBPROC) (GLuint program);
+#ifndef GL_ARB_fragment_program
+#define GL_ARB_fragment_program 1
+#define GL_FRAGMENT_PROGRAM_ARB           0x8804
+#define GL_PROGRAM_ALU_INSTRUCTIONS_ARB   0x8805
+#define GL_PROGRAM_TEX_INSTRUCTIONS_ARB   0x8806
+#define GL_PROGRAM_TEX_INDIRECTIONS_ARB   0x8807
+#define GL_PROGRAM_NATIVE_ALU_INSTRUCTIONS_ARB 0x8808
+#define GL_PROGRAM_NATIVE_TEX_INSTRUCTIONS_ARB 0x8809
+#define GL_PROGRAM_NATIVE_TEX_INDIRECTIONS_ARB 0x880A
+#define GL_MAX_PROGRAM_ALU_INSTRUCTIONS_ARB 0x880B
+#define GL_MAX_PROGRAM_TEX_INSTRUCTIONS_ARB 0x880C
+#define GL_MAX_PROGRAM_TEX_INDIRECTIONS_ARB 0x880D
+#define GL_MAX_PROGRAM_NATIVE_ALU_INSTRUCTIONS_ARB 0x880E
+#define GL_MAX_PROGRAM_NATIVE_TEX_INSTRUCTIONS_ARB 0x880F
+#define GL_MAX_PROGRAM_NATIVE_TEX_INDIRECTIONS_ARB 0x8810
+#define GL_MAX_TEXTURE_COORDS_ARB         0x8871
+#define GL_MAX_TEXTURE_IMAGE_UNITS_ARB    0x8872
+/* All ARB_fragment_program entry points are shared with ARB_vertex_program. */
+#endif
 /* GL_EXT_texture_compression_s3tc */
 #ifndef GL_EXT_texture_compression_s3tc
 #define GL_EXT_texture_compression_s3tc 1
Index: device.c
===================================================================
RCS file: /home/wine/wine/dlls/d3d8/device.c,v
retrieving revision 1.108
diff -u -r1.108 device.c
--- device.c	5 May 2004 01:09:24 -0000	1.108
+++ device.c	8 May 2004 09:20:15 -0000
@@ -4146,6 +4146,7 @@
     *pHandle = 0xFFFFFFFF;
     return res;
 }
+
 HRESULT  WINAPI  IDirect3DDevice8Impl_SetPixelShader(LPDIRECT3DDEVICE8 iface, DWORD Handle) {
     ICOM_THIS(IDirect3DDevice8Impl,iface);
 
@@ -4159,15 +4160,15 @@
         return D3D_OK;
     }
 
-    /* FIXME: Quieten when not being used */
     if (Handle != 0) {
-      FIXME_(d3d_shader)("(%p) : stub %ld\n", This, Handle);
+      TRACE_(d3d_shader)("(%p) : Set pixel shader with handle %lx\n", This, Handle);
     } else {
-      TRACE_(d3d_shader)("(%p) : stub %ld\n", This, Handle);
+      TRACE_(d3d_shader)("(%p) : Remove pixel shader\n", This);
     }
 
     return D3D_OK;
 }
+
 HRESULT  WINAPI  IDirect3DDevice8Impl_GetPixelShader(LPDIRECT3DDEVICE8 iface, DWORD* pHandle) {
     ICOM_THIS(IDirect3DDevice8Impl,iface);
     TRACE_(d3d_shader)("(%p) : GetPixelShader returning %ld\n", This, This->StateBlock->PixelShader);
@@ -4183,9 +4184,15 @@
       return D3DERR_INVALIDCALL;
     }
     object = PixelShaders[Handle - VS_HIGHESTFIXEDFXF];
+    if (NULL == object) {
+      return D3DERR_INVALIDCALL;
+    }
     TRACE_(d3d_shader)("(%p) : freeing PixelShader %p\n", This, object);
     /* TODO: check validity of object before free */
     if (NULL != object->function) HeapFree(GetProcessHeap(), 0, (void *)object->function);
+    if (object->prgId != 0) {
+        GL_EXTCALL(glDeleteProgramsARB( 1, &object->prgId ));
+    }
     HeapFree(GetProcessHeap(), 0, (void *)object->data);
     HeapFree(GetProcessHeap(), 0, (void *)object);
     PixelShaders[Handle - VS_HIGHESTFIXEDFXF] = NULL;
Index: directx.c
===================================================================
RCS file: /home/wine/wine/dlls/d3d8/directx.c,v
retrieving revision 1.67
diff -u -r1.67 directx.c
--- directx.c	2 May 2004 04:22:31 -0000	1.67
+++ directx.c	8 May 2004 09:20:24 -0000
@@ -599,7 +599,8 @@
                           D3DPTEXTURECAPS_ALPHAPALETTE | 
                           D3DPTEXTURECAPS_POW2         | 
                           D3DPTEXTURECAPS_VOLUMEMAP    | 
-                          D3DPTEXTURECAPS_MIPMAP;
+                          D3DPTEXTURECAPS_MIPMAP       |
+                          D3DPTEXTURECAPS_PROJECTED;
 
     if (GL_SUPPORT(ARB_TEXTURE_CUBE_MAP)) {
       pCaps->TextureCaps |= D3DPTEXTURECAPS_CUBEMAP      | 
@@ -667,7 +668,7 @@
 	                    D3DSTENCILCAPS_INCR;
     }
 
-    pCaps->FVFCaps = D3DFVFCAPS_PSIZE | 0x80000;
+    pCaps->FVFCaps = D3DFVFCAPS_PSIZE | 0x0008; /* 8 texture coords */
 
     pCaps->TextureOpCaps =  D3DTEXOPCAPS_ADD         | 
                             D3DTEXOPCAPS_ADDSIGNED   | 
@@ -763,13 +764,13 @@
         pCaps->VertexShaderVersion = 0;
     pCaps->MaxVertexShaderConst = D3D8_VSHADER_MAX_CONSTANTS;
 
-#if 0
-    pCaps->PixelShaderVersion = D3DPS_VERSION(1,1);
-    pCaps->MaxPixelShaderValue = 1.0;
-#else
-    pCaps->PixelShaderVersion = 0;
-    pCaps->MaxPixelShaderValue = 0.0;
-#endif
+    if ((ps_mode == PS_HW) && GL_SUPPORT(ARB_FRAGMENT_PROGRAM) && (DeviceType != D3DDEVTYPE_REF)) {
+        pCaps->PixelShaderVersion = D3DPS_VERSION(1,1);
+        pCaps->MaxPixelShaderValue = 1.0;
+    } else {
+        pCaps->PixelShaderVersion = 0;
+        pCaps->MaxPixelShaderValue = 0.0;
+    }
 
     /* If we created a dummy context, throw it away */
     WineD3DReleaseFakeGLContext(fake_ctx);
Index: drawprim.c
===================================================================
RCS file: /home/wine/wine/dlls/d3d8/drawprim.c,v
retrieving revision 1.20
diff -u -r1.20 drawprim.c
--- drawprim.c	2 May 2004 04:22:31 -0000	1.20
+++ drawprim.c	8 May 2004 09:20:46 -0000
@@ -1466,6 +1466,7 @@
     BOOL                          rc = FALSE;
     DWORD                         fvf = 0;
     IDirect3DVertexShaderImpl    *vertex_shader = NULL;
+    IDirect3DPixelShaderImpl     *pixel_shader = NULL;
     BOOL                          useVertexShaderFunction = FALSE;
     BOOL                          isLightingOn = FALSE;
     Direct3DVertexStridedData     dataLocations;
@@ -1497,6 +1498,28 @@
     /* Ok, we will be updating the screen from here onwards so grab the lock */
     ENTER_GL();
 
+    /* If we will be using a pixel, do some initialization for it */
+    if ((pixel_shader = PIXEL_SHADER(This->UpdateStateBlock->PixelShader))) {
+        TRACE("drawing with pixel shader handle %p\n", pixel_shader);
+        memset(&pixel_shader->input, 0, sizeof(PSHADERINPUTDATA8));
+
+        GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, pixel_shader->prgId));
+        checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, pixel_shader->prgId);");
+        glEnable(GL_FRAGMENT_PROGRAM_ARB);
+        checkGLcall("glEnable(GL_FRAGMENT_PROGRAM_ARB);");	
+
+        /* init Constants */
+        if (TRUE == This->UpdateStateBlock->Changed.pixelShaderConstant) {
+            TRACE_(d3d_shader)("pixel shader initializing constants %p\n",pixel_shader);
+            IDirect3DPixelShaderImpl_SetConstantF(pixel_shader, 0, (CONST FLOAT*) &This->UpdateStateBlock->pixelShaderConstant[0], 8);
+        }
+        /* Update the constants */
+        for (i=0; i<D3D8_PSHADER_MAX_CONSTANTS; i++) {
+            GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, i, (GLfloat *)&This->StateBlock->pixelShaderConstant[i]));
+            checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB");
+        }
+    }
+
     /* Setup transform matrices and sort out */
     if (useHW) {
 	/* Lighting is not completely bypassed with ATI drivers although it should be. Mesa is ok from this respect...
@@ -1595,6 +1618,17 @@
         TRACE("Restored lighting to original state\n");
     }
 
+    if (pixel_shader)
+    {
+#if 0
+      GLint errPos;
+      glGetIntegerv( GL_PROGRAM_ERROR_POSITION_ARB, &errPos );
+      if (errPos != -1)
+        FIXME("HW PixelShader Error at position: %d\n%s\n", errPos, glGetString( GL_PROGRAM_ERROR_STRING_ARB) );
+#endif
+      glDisable(GL_FRAGMENT_PROGRAM_ARB);      
+    }
+    
     /* Finshed updating the screen, restore lock */
     LEAVE_GL();
     TRACE("Done all gl drawing\n");
Index: shader.c
===================================================================
RCS file: /home/wine/wine/dlls/d3d8/shader.c,v
retrieving revision 1.25
diff -u -r1.25 shader.c
--- shader.c	28 Apr 2004 00:24:44 -0000	1.25
+++ shader.c	8 May 2004 09:21:32 -0000
@@ -1397,6 +1397,14 @@
   return NULL;
 }
 
+inline static BOOL pshader_is_version_token(DWORD token) {
+  return 0xFFFF0000 == (token & 0xFFFF0000);
+}
+
+inline static BOOL pshader_is_comment_token(DWORD token) {
+  return D3DSIO_COMMENT == (token & D3DSI_OPCODE_MASK);
+}
+
 inline static void pshader_program_dump_opcode(const SHADER_OPCODE* curOpcode, const DWORD code, const DWORD output) {
   if (0 != (code & ~D3DSI_OPCODE_MASK)) {
     DWORD mask = (code & ~D3DSI_OPCODE_MASK);
@@ -1417,14 +1425,12 @@
    */
   if (0 != (output & D3DSP_DSTSHIFT_MASK)) {
     DWORD shift = (output & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
-    if (shift > 0) {
+    if (shift < 8) {
       TRACE("_x%u", 1 << shift);
+    } else {
+      TRACE("_d%u", 1 << (16-shift));
     }
-  } 
-  /**
-   * TODO: fix the divide shifts: d2, d4, d8
-   *  so i have to find a sample
-   */
+  }
   if (0 != (output & D3DSP_DSTMOD_MASK)) {
     DWORD mask = output & D3DSP_DSTMOD_MASK;
     switch (mask) {
@@ -1443,8 +1449,14 @@
   DWORD reg = param & 0x00001FFF;
   DWORD regtype = ((param & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT);
 
-  if ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_NEG) {
-    TRACE("-");
+  if (input) {
+    if ( ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_NEG) ||
+         ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_BIASNEG) ||
+         ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_SIGNNEG) ||
+         ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_X2NEG) )
+      TRACE("-");
+    else if ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_COMP)
+      TRACE("1-");
   }
   
   switch (regtype << D3DSP_REGTYPE_SHIFT) {
@@ -1531,11 +1543,11 @@
       case D3DSPSM_NEG:     break;
       case D3DSPSM_BIAS:    TRACE("_bias"); break;
       case D3DSPSM_BIASNEG: TRACE("_bias"); break;
-      case D3DSPSM_SIGN:    TRACE("_sign"); break;
-      case D3DSPSM_SIGNNEG: TRACE("_sign"); break;
-      case D3DSPSM_COMP:    TRACE("_comp"); break;
+      case D3DSPSM_SIGN:    TRACE("_bx2"); break;
+      case D3DSPSM_SIGNNEG: TRACE("_bx2"); break;
+      case D3DSPSM_COMP:    break;
       case D3DSPSM_X2:      TRACE("_x2"); break;
-      case D3DSPSM_X2NEG:   TRACE("_bx2"); break;
+      case D3DSPSM_X2NEG:   TRACE("_x2"); break;
       case D3DSPSM_DZ:      TRACE("_dz"); break;
       case D3DSPSM_DW:      TRACE("_dw"); break;
       default:
@@ -1545,15 +1557,177 @@
   }
 }
 
-inline static BOOL pshader_is_version_token(DWORD token) {
-  return 0xFFFF0000 == (token & 0xFFFF0000);
+static int constants[D3D8_PSHADER_MAX_CONSTANTS];
+
+inline static void get_register_name(const DWORD param, char* regstr)
+{
+  static const char* rastout_reg_names[] = { "oC0", "oC1", "oC2", "oC3", "oDepth" };
+
+  DWORD reg = param & 0x00001FFF;
+  DWORD regtype = ((param & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT);
+
+  switch (regtype << D3DSP_REGTYPE_SHIFT) {
+  case D3DSPR_TEMP:
+    sprintf(regstr, "R%lu", reg);
+    break;
+  case D3DSPR_INPUT:
+    if (reg==0) {
+       strcpy(regstr, "fragment.color.primary");
+    } else {
+       strcpy(regstr, "fragment.color.secondary");
+    }
+    break;
+  case D3DSPR_CONST:
+    if (constants[reg])
+      sprintf(regstr, "C%lu", reg);
+    else
+      sprintf(regstr, "program.env[%lu]", reg);
+    break;
+  case D3DSPR_TEXTURE: /* case D3DSPR_ADDR: */
+    sprintf(regstr,"T%lu", reg);
+    break;
+  case D3DSPR_RASTOUT:
+    sprintf(regstr, "%s", rastout_reg_names[reg]);
+    break;
+  case D3DSPR_ATTROUT:
+    sprintf(regstr, "oD[%lu]", reg);
+    break;
+  case D3DSPR_TEXCRDOUT:
+    sprintf(regstr, "oT[%lu]", reg);
+    break;
+  default:
+    break;
+  }
+}	
+
+inline static void addline(int* lineNum, char* pgm, char* line)
+{
+  ++(*lineNum);
+  TRACE_(d3d_hw_shader)("GL HW (%u, %u) : %s\n", *lineNum, strlen(pgm), line);
+  strcat(pgm, line);
+  strcat(pgm, "\n");
 }
 
-inline static BOOL pshader_is_comment_token(DWORD token) {
-  return D3DSIO_COMMENT == (token & D3DSI_OPCODE_MASK);
+char* shift_tab[] = {
+  "dummy",     /*  0 (none) */ 
+  "coefmul.x", /*  1 (x2)   */ 
+  "coefmul.y", /*  2 (x4)   */ 
+  "coefmul.z", /*  3 (x8)   */ 
+  "coefmul.w", /*  4 (x16)  */ 
+  "dummy",     /*  5 (x32)  */ 
+  "dummy",     /*  6 (x64)  */ 
+  "dummy",     /*  7 (x128) */ 
+  "dummy",     /*  8 (d256) */ 
+  "dummy",     /*  9 (d128) */ 
+  "dummy",     /* 10 (d64)  */ 
+  "dummy",     /* 11 (d32)  */ 
+  "coefdiv.w", /* 12 (d16)  */ 
+  "coefdiv.z", /* 13 (d8)   */ 
+  "coefdiv.y", /* 14 (d4)   */ 
+  "coefdiv.x"  /* 15 (d2)   */ 
+};
+
+inline static void get_write_mask(const DWORD output_reg, char* write_mask) 
+{
+  *write_mask = 0;
+  if ((output_reg & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
+    if (output_reg & D3DSP_WRITEMASK_0) strcat(write_mask, ".r");
+    if (output_reg & D3DSP_WRITEMASK_1) strcat(write_mask, ".g");
+    if (output_reg & D3DSP_WRITEMASK_2) strcat(write_mask, ".b");
+    if (output_reg & D3DSP_WRITEMASK_3) strcat(write_mask, ".a");
+  }
 }
 
+inline static void get_input_register_swizzle(const DWORD instr, char* swzstring) 
+{
+    static const char swizzle_reg_chars[] = "rgba";
+    DWORD swizzle = (instr & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
+    DWORD swizzle_x = swizzle & 0x03;
+    DWORD swizzle_y = (swizzle >> 2) & 0x03;
+    DWORD swizzle_z = (swizzle >> 4) & 0x03;
+    DWORD swizzle_w = (swizzle >> 6) & 0x03;
+    /**
+     * swizzle bits fields:
+     *  WWZZYYXX
+     */
+    *swzstring = 0;
+    if ((D3DSP_NOSWIZZLE >> D3DSP_SWIZZLE_SHIFT) != swizzle) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
+      if (swizzle_x == swizzle_y && 
+	  swizzle_x == swizzle_z && 
+	  swizzle_x == swizzle_w) {
+	sprintf(swzstring, ".%c", swizzle_reg_chars[swizzle_x]);
+      } else {
+	sprintf(swzstring, ".%c%c%c%c", 
+	      swizzle_reg_chars[swizzle_x], 
+	      swizzle_reg_chars[swizzle_y], 
+	      swizzle_reg_chars[swizzle_z], 
+	      swizzle_reg_chars[swizzle_w]);
+      }
+    }
+}
+
+inline static void gen_output_modifier_line(int saturate, char* write_mask, int shift, char *regstr, char* line)
+{
+  /* Generate a line that does the output modifier computation */
+  sprintf(line, "MUL%s %s%s, %s, %s;", saturate ? "_SAT" : "", regstr, write_mask, regstr, shift_tab[shift]);
+}
+
+inline static int gen_input_modifier_line(const DWORD instr, int tmpreg, char* outregstr, char* line)
+{
+  /* Generate a line that does the input modifier computation and return the input register to use */
+  static char regstr[256];
+  int insert_line;
+ 
+  /* Assume a new line will be added */
+  insert_line = 1;
+  
+  /* Get register name */
+  get_register_name(instr, regstr);
+  
+  switch (instr & D3DSP_SRCMOD_MASK) {
+    case D3DSPSM_NONE:
+      strcpy(outregstr, regstr);
+      insert_line = 0;
+      break;
+    case D3DSPSM_NEG:
+      sprintf(outregstr, "-%s", regstr);
+      insert_line = 0;
+      break;
+    case D3DSPSM_BIAS:
+      sprintf(line, "ADD T%c, %s, -coefdiv.x;", 'A' + tmpreg, regstr);
+      break;
+    case D3DSPSM_BIASNEG:
+      sprintf(line, "ADD T%c, -%s, coefdiv.x;", 'A' + tmpreg, regstr);
+      break;
+    case D3DSPSM_SIGN:
+      sprintf(line, "MAD T%c, %s, coefmul.x, -one.x;", 'A' + tmpreg, regstr);
+      break;
+    case D3DSPSM_SIGNNEG:
+      sprintf(line, "MAD T%c, %s, -coefmul.x, one.x;", 'A' + tmpreg, regstr);
+      break;
+    case D3DSPSM_COMP:
+      sprintf(line, "SUB T%c, one.x, %s;", 'A' + tmpreg, regstr);
+      break;
+    case D3DSPSM_X2:
+      sprintf(line, "ADD T%c, %s, %s", 'A' + tmpreg, regstr, regstr);
+      break;
+    case D3DSPSM_X2NEG:
+      sprintf(line, "ADD T%c, %s, %s", 'A' + tmpreg, regstr, regstr);
+      break;
+    case D3DSPSM_DZ:
+    case D3DSPSM_DW:
+    default:
+      strcpy(outregstr, regstr);
+      insert_line = 0;
+  }
+
+  if (insert_line) {
+    /* Substitute the register name */
+    sprintf(outregstr, "T%c", 'A' + tmpreg);
+  }
 
+  return insert_line;
+}
 
 /**
  * Pixel Shaders
@@ -1563,6 +1737,305 @@
  * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/d3d/interfaces/IDirect3DPixelShader9/_IDirect3DPixelShader9.asp
  *
  */
+inline static VOID IDirect3DPixelShaderImpl_GenerateProgramArbHW(IDirect3DPixelShaderImpl* pshader, CONST DWORD* pFunction) {
+  const DWORD* pToken = pFunction;
+  const SHADER_OPCODE* curOpcode = NULL;
+  const DWORD* pInstr;
+  DWORD code;
+  DWORD i;
+  int autoparam;
+  unsigned lineNum = 0;
+  char *pgmStr = NULL;
+  char  tmpLine[255];
+  BOOL saturate;
+  IDirect3DDevice8Impl* This = pshader->device;
+
+  for(i = 0; i < D3D8_PSHADER_MAX_CONSTANTS; i++)
+    constants[i] = 0;
+  
+  pgmStr = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, 65535); /* 64kb should be enough */
+  
+  if (NULL != pToken) {
+    while (D3DPS_END() != *pToken) { 
+      if (pshader_is_version_token(*pToken)) { /** version */
+
+        /* Extract version *10 into integer value (ie. 1.0 == 10, 1.1==11 etc */
+        int version = (((*pToken >> 8) & 0x0F) * 10) + (*pToken & 0x0F);
+        int numTemps;
+        int numConstants;
+
+	TRACE_(d3d_hw_shader)("ps.%lu.%lu;\n", (*pToken >> 8) & 0x0F, (*pToken & 0x0F));
+
+        /* Each release of pixel shaders has had different numbers of temp registers */
+        switch (version) {
+        case 10: numTemps=12;
+                 numConstants=8;
+                 strcpy(tmpLine, "!!ARBfp1.0");
+                 break;
+        case 11: numTemps=12; 
+                 numConstants=8;
+                 strcpy(tmpLine, "!!ARBfp1.0");
+                 break;
+        case 20: numTemps=12;
+                 numConstants=8;
+                 strcpy(tmpLine, "!!ARBfp2.0");
+                 FIXME_(d3d_hw_shader)("No work done yet to support ps2.0 in hw\n");
+                 break;
+        case 30: numTemps=32; 
+                 numConstants=8;
+                 strcpy(tmpLine, "!!ARBfp3.0");
+                 FIXME_(d3d_hw_shader)("No work done yet to support ps3.0 in hw\n");
+                 break;
+        default:
+                 numTemps=12;
+                 numConstants=8;
+                 strcpy(tmpLine, "!!ARBfp1.0");
+                 FIXME_(d3d_hw_shader)("Unrecognized pixel shader version!\n");
+        }
+        addline(&lineNum, pgmStr, tmpLine);
+
+        for(i = 0; i < 4; i++) {
+          sprintf(tmpLine, "TEMP T%lu;", i);
+          addline(&lineNum, pgmStr, tmpLine);
+        }
+        for(i = 0; i < 2; i++) {
+          sprintf(tmpLine, "TEMP R%lu;", i);
+          addline(&lineNum, pgmStr, tmpLine);
+        }
+
+        sprintf(tmpLine, "TEMP TMP;");
+        addline(&lineNum, pgmStr, tmpLine);
+        sprintf(tmpLine, "TEMP TA;");
+        addline(&lineNum, pgmStr, tmpLine);
+        sprintf(tmpLine, "TEMP TB;");
+        addline(&lineNum, pgmStr, tmpLine);
+        sprintf(tmpLine, "TEMP TC;");
+        addline(&lineNum, pgmStr, tmpLine);
+
+        strcpy(tmpLine, "PARAM coefdiv = { 0.5, 0.25, 0.125, 0.0625 };");
+        addline(&lineNum, pgmStr, tmpLine);
+        strcpy(tmpLine, "PARAM coefmul = { 2, 4, 8, 16 };");
+        addline(&lineNum, pgmStr, tmpLine);
+        strcpy(tmpLine, "PARAM one = { 1.0, 1.0, 1.0, 1.0 };");
+        addline(&lineNum, pgmStr, tmpLine);
+
+        for(i = 0; i < 4; i++) {
+          sprintf(tmpLine, "MOV T%lu, fragment.texcoord[%lu];", i, i);
+          addline(&lineNum, pgmStr, tmpLine);
+        }
+
+        ++pToken;
+        continue;
+      } 
+      if (pshader_is_comment_token(*pToken)) { /** comment */
+        DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
+        ++pToken;
+        /*TRACE("comment[%ld] ;%s\n", comment_len, (char*)pToken);*/
+        pToken += comment_len;
+        continue;
+      }
+      code = *pToken;
+      pInstr = pToken;
+      curOpcode = pshader_program_get_opcode(code);
+      ++pToken;
+      if (NULL == curOpcode) {
+        /* unkown current opcode ... */
+        while (*pToken & 0x80000000) {
+          TRACE("unrecognized opcode: %08lx\n", *pToken);
+          ++pToken;
+        }
+      } else {
+        autoparam = 1;
+        saturate = FALSE;
+        /* Build opcode for GL vertex_program */
+        switch (curOpcode->opcode) {
+        case D3DSIO_DEF:
+            {
+              DWORD reg = *pToken & 0x00001FFF;
+              sprintf(tmpLine, "PARAM C%lu = { %f, %f, %f, %f };", reg,
+                              *((float*)(pToken+1)),
+                              *((float*)(pToken+2)),
+                              *((float*)(pToken+3)),
+                              *((float*)(pToken+4)) );
+              addline(&lineNum, pgmStr, tmpLine);
+              constants[reg] = 1;
+              autoparam = 0;
+              pToken+=5;
+            }
+            break;
+        case D3DSIO_TEXKILL:
+            strcpy(tmpLine, "KIL");
+            break;
+        case D3DSIO_TEX:
+            {
+              DWORD reg = *pToken & 0x00001FFF;
+              sprintf(tmpLine,"TEX T%lu, T%lu, texture[%lu], 2D;", reg, reg, reg);
+              addline(&lineNum, pgmStr, tmpLine);
+              autoparam = 0;
+              pToken++;
+            }
+            break;
+        case D3DSIO_TEXCOORD:
+            {
+              DWORD reg = *pToken & 0x00001FFF;
+              sprintf(tmpLine, "MOV T%lu, fragment.texcoord[%lu];", reg, reg);
+              addline(&lineNum, pgmStr, tmpLine);
+              autoparam = 0;
+              pToken++;
+            }
+            break;
+        case D3DSIO_TEXM3x2PAD:
+            {
+              DWORD reg = *pToken & 0x00001FFF;
+              char buf[50];
+              if (gen_input_modifier_line(*(pToken+1), 0, buf, tmpLine))
+                addline(&lineNum, pgmStr, tmpLine);
+              sprintf(tmpLine, "DP3 TMP.x, T%lu, %s;", reg, buf);
+              addline(&lineNum, pgmStr, tmpLine);
+              autoparam = 0;
+              pToken += 2;
+            }
+            break;
+        case D3DSIO_TEXM3x2TEX:
+            {
+              DWORD reg = *pToken & 0x00001FFF;
+              char buf[50];
+              if (gen_input_modifier_line(*(pToken+1), 0, buf, tmpLine))
+                addline(&lineNum, pgmStr, tmpLine);
+              sprintf(tmpLine, "DP3 TMP.y, T%lu, %s;", reg, buf);
+              addline(&lineNum, pgmStr, tmpLine);
+              sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;", reg, reg);
+              addline(&lineNum, pgmStr, tmpLine);
+              autoparam = 0;
+              pToken += 2;
+            }
+            break;
+	case D3DSIO_TEXREG2AR:
+            {
+              DWORD reg1 = *pToken & 0x00001FFF;
+              DWORD reg2 = *(pToken+1) & 0x00001FFF;
+              sprintf(tmpLine, "MOV TMP.r, T%lu.a;", reg2);
+              addline(&lineNum, pgmStr, tmpLine);
+              sprintf(tmpLine, "MOV TMP.g, T%lu.r;", reg2);
+              addline(&lineNum, pgmStr, tmpLine);
+              sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;", reg1, reg1);
+              addline(&lineNum, pgmStr, tmpLine);
+              autoparam = 0;
+              pToken+=2;
+            }
+            break;
+        case D3DSIO_TEXREG2GB:
+            {
+              DWORD reg1 = *pToken & 0x00001FFF;
+              DWORD reg2 = *(pToken+1) & 0x00001FFF;
+              sprintf(tmpLine, "MOV TMP.r, T%lu.g;", reg2);
+              addline(&lineNum, pgmStr, tmpLine);
+              sprintf(tmpLine, "MOV TMP.g, T%lu.b;", reg2);
+              addline(&lineNum, pgmStr, tmpLine);
+              sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;", reg1, reg1);
+              addline(&lineNum, pgmStr, tmpLine);
+              autoparam = 0;
+              pToken+=2;
+            }
+            break;
+        case D3DSIO_MOV:
+            strcpy(tmpLine, "MOV");
+            break;
+        case D3DSIO_MUL:
+            strcpy(tmpLine, "MUL");
+            break;
+        case D3DSIO_DP3:
+            strcpy(tmpLine, "DP3");
+            break;
+        case D3DSIO_MAD:
+            strcpy(tmpLine, "MAD");
+            break;
+        case D3DSIO_ADD:
+            strcpy(tmpLine, "ADD");
+            break;
+        case D3DSIO_SUB:
+            strcpy(tmpLine, "SUB");
+            break;
+        default:
+            FIXME_(d3d_hw_shader)("Can't handle opcode %s in hwShader\n", curOpcode->name);
+        }
+        if (0 != (*pToken & D3DSP_DSTMOD_MASK)) {
+          DWORD mask = *pToken & D3DSP_DSTMOD_MASK;
+          switch (mask) {
+          case D3DSPDM_SATURATE: saturate = TRUE; break;
+          default:
+            TRACE("_unhandled_modifier(0x%08lx)", mask);
+          }
+        }
+        if (autoparam && (curOpcode->num_params > 0)) {
+          char regs[3][50];
+          char tmp[256];
+          char swzstring[20];
+	  /* Generate lines that handle input modifier computation */
+          for (i = 1; i < curOpcode->num_params; i++) {
+            if (gen_input_modifier_line(*(pToken+i), i-1, regs[i-1], tmp))
+              addline(&lineNum, pgmStr, tmp);
+          }
+	  /* Handle saturation only when no shift is present in the output modifier */
+          if ((*pToken & D3DSPDM_SATURATE) && (0 == (*pToken & D3DSP_DSTSHIFT_MASK)))
+            strcat(tmpLine,"_SAT");
+          strcat(tmpLine, " ");
+	  /* Handle output register */
+          get_register_name(*pToken, tmp);
+          strcat(tmpLine, tmp);
+          get_write_mask(*pToken, tmp);
+          strcat(tmpLine, tmp);
+	  /* Handle input registers */
+          for (i = 1; i < curOpcode->num_params; i++) {
+            strcat(tmpLine, ", ");
+            strcat(tmpLine, regs[i-1]);
+            get_input_register_swizzle(*(pToken+i), swzstring);
+            strcat(tmpLine, swzstring);
+          }
+          strcat(tmpLine,";");
+          addline(&lineNum, pgmStr, tmpLine);
+          pToken += curOpcode->num_params;
+        }
+        if (curOpcode->num_params > 0) {
+          DWORD param = *(pInstr+1);
+          if (0 != (param & D3DSP_DSTSHIFT_MASK)) {
+            /* Generate a line that handle the output modifier computation */
+            char regstr[100];
+            char write_mask[20];
+            DWORD shift = (param & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
+            get_register_name(param, regstr);
+            get_write_mask(param, write_mask);
+            gen_output_modifier_line(saturate, write_mask, shift, regstr, tmpLine);
+            addline(&lineNum, pgmStr, tmpLine);
+          }
+        }
+      }
+    }
+    strcpy(tmpLine, "MOV result.color, R0;");
+    addline(&lineNum, pgmStr, tmpLine);
+
+    strcpy(tmpLine, "END");
+    addline(&lineNum, pgmStr, tmpLine);
+  }
+
+  /*  Create the hw shader */
+  GL_EXTCALL(glGenProgramsARB(1, &pshader->prgId));
+  TRACE_(d3d_hw_shader)("Creating a hw pixel shader, prg=%d\n", pshader->prgId);
+
+  GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, pshader->prgId));
+
+  /* Create the program and check for errors */
+  GL_EXTCALL(glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, strlen(pgmStr), pgmStr));
+  if (glGetError() == GL_INVALID_OPERATION) {
+    GLint errPos;
+    glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos);
+    FIXME_(d3d_hw_shader)("HW PixelShader Error at position: %d\n%s\n", errPos, glGetString(GL_PROGRAM_ERROR_STRING_ARB));
+    pshader->prgId = -1;
+  }
+  
+  HeapFree(GetProcessHeap(), 0, pgmStr);
+}
+
 inline static VOID IDirect3DPixelShaderImpl_ParseProgram(IDirect3DPixelShaderImpl* pshader, CONST DWORD* pFunction) {
   const DWORD* pToken = pFunction;
   const SHADER_OPCODE* curOpcode = NULL;
@@ -1622,6 +2095,11 @@
   } else {
     pshader->functionLength = 1; /* no Function defined use fixed function vertex processing */
   }
+
+  if (NULL != pFunction) {
+    IDirect3DPixelShaderImpl_GenerateProgramArbHW(pshader, pFunction);
+  }
+
   if (NULL != pFunction) {
     pshader->function = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, pshader->functionLength);
     memcpy(pshader->function, pFunction, pshader->functionLength);


More information about the wine-patches mailing list