[PATCH] WineD3D: Add the ability to duplicate GL pixel shaders=0A=

Stefan Doesinger stefan at codeweavers.com
Mon Nov 24 11:55:50 CST 2008


=0A=
Some stateblock parameters have to be compiled into the GL pixel=0A=
shader code, like lines for pixelformat fixups. This leads to=0A=
problems when applications switch those settings, requiring a=0A=
recompilation of the shader. This patch enables wined3d to have=0A=
multiple GL shaders for a D3D shader(pixel shaders only so far)=0A=
to handle this more efficiently.=0A=
=0A=
There are numerous advantages here:=0A=
1) We don't have to be bothered about enabling some color format=0A=
fixups like for U8V8 and R32F which fix some SDK demos but until=0A=
now broke performance in real games rather badly=0A=
=0A=
2) We can handle sRGB writing more efficiently. We don't have to=0A=
bother uniform loading with it, and we can remove the quite=0A=
expensive sRGB code from the shader entirely when sRGB writing is=0A=
off.=0A=
=0A=
3) Finding the shader in the shader array(usually there is just 1=0A=
shader anyway) is most likely just as fast as validating the=0A=
hardcoded assumptions. For GLSL, this extra parameter can be=0A=
included in the program hashmap(by a later patch)=0A=
=0A=
A few remarks:=0A=
We don't save any uniforms for sRGB constant loading because=0A=
hardware usually can't hardcode immediate values into the shader.=0A=
Thus if we use immediate values in the code, the compiler uses a=0A=
uniform silently. Thus I have dropped the render_offscreen and=0A=
height hardcoding checks because they didn't work anyway.=0A=
=0A=
Better packaging of the pixelshader compile arg structure will=0A=
will come in a different patch=0A=
=0A=
We pass this structure to the shader compilation code, but so far=0A=
it doesn't use it. It still reads the values from the stateblock.=0A=
This isn't an immediate problem, so I fix this in another patch=0A=
---=0A=
 dlls/wined3d/arb_program_shader.c |  116 +++++++++----------------------=0A=
 dlls/wined3d/baseshader.c         |    3 +-=0A=
 dlls/wined3d/glsl_shader.c        |  111 ++++++++++++-------------------=0A=
 dlls/wined3d/pixelshader.c        |  134 =
++++++++++++------------------------=0A=
 dlls/wined3d/wined3d_private.h    |   21 +++---=0A=
 5 files changed, 134 insertions(+), 251 deletions(-)=0A=
=0A=
diff --git a/dlls/wined3d/arb_program_shader.c =
b/dlls/wined3d/arb_program_shader.c=0A=
index f6cefe3..3a52453 100644=0A=
--- a/dlls/wined3d/arb_program_shader.c=0A=
+++ b/dlls/wined3d/arb_program_shader.c=0A=
@@ -220,32 +220,6 @@ static void shader_arb_load_constants(=0A=
                 =
deviceImpl->activeContext->pshader_const_dirty[psi->luminanceconst[i].con=
st_num] =3D 1;=0A=
             }=0A=
         }=0A=
-=0A=
-        if(((IWineD3DPixelShaderImpl *) pshader)->srgb_enabled &&=0A=
-           !((IWineD3DPixelShaderImpl *) pshader)->srgb_mode_hardcoded) =
{=0A=
-            float comparison[4];=0A=
-            float mul_low[4];=0A=
-=0A=
-            if(stateBlock->renderState[WINED3DRS_SRGBWRITEENABLE]) {=0A=
-                comparison[0] =3D srgb_cmp; comparison[1] =3D srgb_cmp;=0A=
-                comparison[2] =3D srgb_cmp; comparison[3] =3D srgb_cmp;=0A=
-=0A=
-                mul_low[0] =3D srgb_mul_low; mul_low[1] =3D =
srgb_mul_low;=0A=
-                mul_low[2] =3D srgb_mul_low; mul_low[3] =3D =
srgb_mul_low;=0A=
-            } else {=0A=
-                comparison[0] =3D 1.0 / 0.0; comparison[1] =3D 1.0 / =
0.0;=0A=
-                comparison[2] =3D 1.0 / 0.0; comparison[3] =3D 1.0 / =
0.0;=0A=
-=0A=
-                mul_low[0] =3D 1.0; mul_low[1] =3D 1.0;=0A=
-                mul_low[2] =3D 1.0; mul_low[3] =3D 1.0;=0A=
-            }=0A=
-            =
GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, =
psi->srgb_cmp_const, comparison));=0A=
-            =
GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, =
psi->srgb_low_const, mul_low));=0A=
-            checkGLcall("Load sRGB correction constants\n");=0A=
-            =
deviceImpl->activeContext->pshader_const_dirty[psi->srgb_low_const] =3D =
1;=0A=
-            =
deviceImpl->activeContext->pshader_const_dirty[psi->srgb_cmp_const] =3D =
1;=0A=
-=0A=
-        }=0A=
     }=0A=
 }=0A=
 =0A=
@@ -325,49 +299,16 @@ static void shader_generate_arb_declarations(=0A=
     }=0A=
 =0A=
     if(device->stateBlock->renderState[WINED3DRS_SRGBWRITEENABLE] && =
pshader) {=0A=
-        IWineD3DPixelShaderImpl *ps_impl =3D (IWineD3DPixelShaderImpl =
*) This;=0A=
-        /* If there are 2 constants left to use, use them to pass the =
sRGB correction values in. This way=0A=
-         * srgb write correction can be turned on and off dynamically =
without recompilation. Otherwise=0A=
-         * hardcode them. The drawback of hardcoding is that the shader =
needs recompilation to turn sRGB=0A=
-         * off again=0A=
-         */=0A=
-        if(max_constantsF + extra_constants_needed + 1 < =
GL_LIMITS(pshader_constantsF) && FALSE) {=0A=
-            /* The idea is that if srgb is enabled, then disabled, the =
constant loading code=0A=
-             * can effectively disable sRGB correction by passing 1.0 =
and INF as the multiplication=0A=
-             * and comparison constants. If it disables it that way, =
the shader won't be recompiled=0A=
-             * and the code will stay in, so sRGB writing can be turned =
on again by setting the=0A=
-             * constants from the spec=0A=
-             */=0A=
-            ps_impl->srgb_mode_hardcoded =3D 0;=0A=
-            ps_impl->srgb_low_const =3D GL_LIMITS(pshader_constantsF) - =
extra_constants_needed;=0A=
-            ps_impl->srgb_cmp_const =3D GL_LIMITS(pshader_constantsF) - =
extra_constants_needed - 1;=0A=
-            shader_addline(buffer, "PARAM srgb_mul_low =3D =
program.env[%d];\n", ps_impl->srgb_low_const);=0A=
-            shader_addline(buffer, "PARAM srgb_comparison =3D =
program.env[%d];\n", ps_impl->srgb_cmp_const);=0A=
-        } else {=0A=
-            shader_addline(buffer, "PARAM srgb_mul_low =3D {%f, %f, %f, =
1.0};\n",=0A=
-                           srgb_mul_low, srgb_mul_low, srgb_mul_low);=0A=
-            shader_addline(buffer, "PARAM srgb_comparison =3D  {%f, %f, =
%f, %f};\n",=0A=
-                           srgb_cmp, srgb_cmp, srgb_cmp, srgb_cmp);=0A=
-            ps_impl->srgb_mode_hardcoded =3D 1;=0A=
-        }=0A=
-        /* These can be hardcoded, they do not cause any harm because =
no fragment will enter the high=0A=
-         * path if the comparison value is set to INF=0A=
-         */=0A=
+        shader_addline(buffer, "PARAM srgb_mul_low =3D {%f, %f, %f, =
1.0};\n",=0A=
+                        srgb_mul_low, srgb_mul_low, srgb_mul_low);=0A=
+        shader_addline(buffer, "PARAM srgb_comparison =3D  {%f, %f, %f, =
%f};\n",=0A=
+                        srgb_cmp, srgb_cmp, srgb_cmp, srgb_cmp);=0A=
         shader_addline(buffer, "PARAM srgb_pow =3D  {%f, %f, %f, =
1.0};\n",=0A=
                        srgb_pow, srgb_pow, srgb_pow);=0A=
         shader_addline(buffer, "PARAM srgb_mul_hi =3D  {%f, %f, %f, =
1.0};\n",=0A=
                        srgb_mul_high, srgb_mul_high, srgb_mul_high);=0A=
         shader_addline(buffer, "PARAM srgb_sub_hi =3D  {%f, %f, %f, =
0.0};\n",=0A=
                        srgb_sub_high, srgb_sub_high, srgb_sub_high);=0A=
-        ps_impl->srgb_enabled =3D 1;=0A=
-    } else if(pshader) {=0A=
-        IWineD3DPixelShaderImpl *ps_impl =3D (IWineD3DPixelShaderImpl =
*) This;=0A=
-=0A=
-        /* Do not write any srgb fixup into the shader to save shader =
size and processing time.=0A=
-         * As a consequence, we can't toggle srgb write on without =
recompilation=0A=
-         */=0A=
-        ps_impl->srgb_enabled =3D 0;=0A=
-        ps_impl->srgb_mode_hardcoded =3D 1;=0A=
     }=0A=
 =0A=
     /* Load local constants using the program-local space,=0A=
@@ -1898,9 +1839,8 @@ static void shader_arb_select(IWineD3DDevice =
*iface, BOOL usePS, BOOL useVS) {=0A=
         struct ps_compile_args compile_args;=0A=
         TRACE("Using pixel shader\n");=0A=
         find_ps_compile_args((IWineD3DPixelShaderImpl *) =
This->stateBlock->pixelShader, This->stateBlock, &compile_args);=0A=
-        pixelshader_compile(This->stateBlock->pixelShader, =
&compile_args);=0A=
-=0A=
-        priv->current_fprogram_id =3D ((IWineD3DPixelShaderImpl =
*)This->stateBlock->pixelShader)->prgId;=0A=
+        priv->current_fprogram_id =3D =
find_gl_pshader((IWineD3DPixelShaderImpl *) =
This->stateBlock->pixelShader,=0A=
+                                                    &compile_args);=0A=
 =0A=
         /* Bind the fragment program */=0A=
         GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, =
priv->current_fprogram_id));=0A=
@@ -1978,24 +1918,33 @@ static void shader_arb_cleanup(IWineD3DDevice =
*iface) {=0A=
 }=0A=
 =0A=
 static void shader_arb_destroy(IWineD3DBaseShader *iface) {=0A=
-    IWineD3DBaseShaderImpl *This =3D (IWineD3DBaseShaderImpl *) iface;=0A=
-    WineD3D_GL_Info *gl_info =3D &((IWineD3DDeviceImpl *) =
This->baseShader.device)->adapter->gl_info;=0A=
-    char pshader =3D =
shader_is_pshader_version(This->baseShader.hex_version);=0A=
+    IWineD3DBaseShaderImpl *baseShader =3D (IWineD3DBaseShaderImpl *) =
iface;=0A=
+    WineD3D_GL_Info *gl_info =3D &((IWineD3DDeviceImpl *) =
baseShader->baseShader.device)->adapter->gl_info;=0A=
+    char pshader =3D =
shader_is_pshader_version(baseShader->baseShader.hex_version);=0A=
 =0A=
     if(pshader) {=0A=
+        IWineD3DPixelShaderImpl *This =3D (IWineD3DPixelShaderImpl *) =
iface;=0A=
+        UINT i;=0A=
+=0A=
         ENTER_GL();=0A=
-        GL_EXTCALL(glDeleteProgramsARB(1, &((IWineD3DPixelShaderImpl *) =
This)->prgId));=0A=
-        checkGLcall("GL_EXTCALL(glDeleteProgramsARB(1, =
&((IWineD3DPixelShaderImpl *) This)->prgId))");=0A=
-        ((IWineD3DPixelShaderImpl *) This)->prgId =3D 0;=0A=
+        for(i =3D 0; i < This->num_gl_shaders; i++) {=0A=
+            GL_EXTCALL(glDeleteProgramsARB(1, =
&This->gl_shaders[i].prgId));=0A=
+            checkGLcall("GL_EXTCALL(glDeleteProgramsARB(1, =
&This->gl_shaders[i].prgId))");=0A=
+        }=0A=
         LEAVE_GL();=0A=
+        HeapFree(GetProcessHeap(), 0, This->gl_shaders);=0A=
+        This->gl_shaders =3D NULL;=0A=
+        This->num_gl_shaders =3D 0;=0A=
     } else {=0A=
+        IWineD3DVertexShaderImpl *This =3D (IWineD3DVertexShaderImpl *) =
iface;=0A=
+=0A=
         ENTER_GL();=0A=
-        GL_EXTCALL(glDeleteProgramsARB(1, &((IWineD3DVertexShaderImpl =
*) This)->prgId));=0A=
-        checkGLcall("GL_EXTCALL(glDeleteProgramsARB(1, =
&((IWineD3DPixelShaderImpl *) This)->prgId))");=0A=
+        GL_EXTCALL(glDeleteProgramsARB(1, &This->prgId));=0A=
+        checkGLcall("GL_EXTCALL(glDeleteProgramsARB(1, &This->prgId))");=0A=
         ((IWineD3DVertexShaderImpl *) This)->prgId =3D 0;=0A=
         LEAVE_GL();=0A=
     }=0A=
-    This->baseShader.is_compiled =3D FALSE;=0A=
+    baseShader->baseShader.is_compiled =3D FALSE;=0A=
 }=0A=
 =0A=
 static HRESULT shader_arb_alloc(IWineD3DDevice *iface) {=0A=
@@ -2048,13 +1997,14 @@ static void =
arbfp_add_sRGB_correction(SHADER_BUFFER *buffer, const char *fragcol=0A=
     /* [0.0;1.0] clamping. Not needed, this is done implicitly */=0A=
 }=0A=
 =0A=
-static void shader_arb_generate_pshader(IWineD3DPixelShader *iface, =
SHADER_BUFFER *buffer) {=0A=
+static GLuint shader_arb_generate_pshader(IWineD3DPixelShader *iface, =
SHADER_BUFFER *buffer) {=0A=
     IWineD3DPixelShaderImpl *This =3D (IWineD3DPixelShaderImpl *)iface;=0A=
     shader_reg_maps* reg_maps =3D &This->baseShader.reg_maps;=0A=
     CONST DWORD *function =3D This->baseShader.function;=0A=
     const char *fragcolor;=0A=
     WineD3D_GL_Info *gl_info =3D &((IWineD3DDeviceImpl =
*)This->baseShader.device)->adapter->gl_info;=0A=
     local_constant* lconst;=0A=
+    GLuint retval;=0A=
 =0A=
     /*  Create the hw ARB shader */=0A=
     shader_addline(buffer, "!!ARBfp1.0\n");=0A=
@@ -2091,7 +2041,7 @@ static void =
shader_arb_generate_pshader(IWineD3DPixelShader *iface, SHADER_BUFFE=0A=
     } else {=0A=
         fragcolor =3D "TMP_COLOR";=0A=
     }=0A=
-    if(This->srgb_enabled) {=0A=
+    if(((IWineD3DDeviceImpl =
*)This->baseShader.device)->stateBlock->renderState[WINED3DRS_SRGBWRITEEN=
ABLE]) {=0A=
         arbfp_add_sRGB_correction(buffer, fragcolor, "TMP", "TMP2", =
"TA", "TB");=0A=
     }=0A=
     if (This->baseShader.hex_version < WINED3DPS_VERSION(3,0)) {=0A=
@@ -2102,12 +2052,12 @@ static void =
shader_arb_generate_pshader(IWineD3DPixelShader *iface, SHADER_BUFFE=0A=
     shader_addline(buffer, "END\n");=0A=
 =0A=
     /* TODO: change to resource.glObjectHandle or something like that */=0A=
-    GL_EXTCALL(glGenProgramsARB(1, &This->prgId));=0A=
+    GL_EXTCALL(glGenProgramsARB(1, &retval));=0A=
 =0A=
-    TRACE("Creating a hw pixel shader, prg=3D%d\n", This->prgId);=0A=
-    GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, This->prgId));=0A=
+    TRACE("Creating a hw pixel shader, prg=3D%d\n", retval);=0A=
+    GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, retval));=0A=
 =0A=
-    TRACE("Created hw pixel shader, prg=3D%d\n", This->prgId);=0A=
+    TRACE("Created hw pixel shader, prg=3D%d\n", retval);=0A=
     /* Create the program and check for errors */=0A=
     GL_EXTCALL(glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, =
GL_PROGRAM_FORMAT_ASCII_ARB,=0A=
                buffer->bsize, buffer->buffer));=0A=
@@ -2117,7 +2067,7 @@ static void =
shader_arb_generate_pshader(IWineD3DPixelShader *iface, SHADER_BUFFE=0A=
         glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos);=0A=
         FIXME("HW PixelShader Error at position %d: %s\n",=0A=
               errPos, debugstr_a((const char =
*)glGetString(GL_PROGRAM_ERROR_STRING_ARB)));=0A=
-        This->prgId =3D -1;=0A=
+        retval =3D 0;=0A=
     }=0A=
 =0A=
     /* Load immediate constants */=0A=
@@ -2128,6 +2078,8 @@ static void =
shader_arb_generate_pshader(IWineD3DPixelShader *iface, SHADER_BUFFE=0A=
             checkGLcall("glProgramLocalParameter4fvARB");=0A=
         }=0A=
     }=0A=
+=0A=
+    return retval;=0A=
 }=0A=
 =0A=
 static void shader_arb_generate_vshader(IWineD3DVertexShader *iface, =
SHADER_BUFFER *buffer) {=0A=
diff --git a/dlls/wined3d/baseshader.c b/dlls/wined3d/baseshader.c=0A=
index f7c8d35..292fede 100644=0A=
--- a/dlls/wined3d/baseshader.c=0A=
+++ b/dlls/wined3d/baseshader.c=0A=
@@ -1101,8 +1101,9 @@ static void shader_none_destroy(IWineD3DBaseShader =
*iface) {}=0A=
 static HRESULT shader_none_alloc(IWineD3DDevice *iface) {return =
WINED3D_OK;}=0A=
 static void shader_none_free(IWineD3DDevice *iface) {}=0A=
 static BOOL shader_none_dirty_const(IWineD3DDevice *iface) {return =
FALSE;}=0A=
-static void shader_none_generate_pshader(IWineD3DPixelShader *iface, =
SHADER_BUFFER *buffer) {=0A=
+static GLuint shader_none_generate_pshader(IWineD3DPixelShader *iface, =
SHADER_BUFFER *buffer) {=0A=
     FIXME("NONE shader backend asked to generate a pixel shader\n");=0A=
+    return 0;=0A=
 }=0A=
 static void shader_none_generate_vshader(IWineD3DVertexShader *iface, =
SHADER_BUFFER *buffer) {=0A=
     FIXME("NONE shader backend asked to generate a vertex shader\n");=0A=
diff --git a/dlls/wined3d/glsl_shader.c b/dlls/wined3d/glsl_shader.c=0A=
index eeec260..d0d7491 100644=0A=
--- a/dlls/wined3d/glsl_shader.c=0A=
+++ b/dlls/wined3d/glsl_shader.c=0A=
@@ -73,8 +73,6 @@ struct glsl_shader_prog_link {=0A=
     GLhandleARB             bumpenvmat_location[MAX_TEXTURES];=0A=
     GLhandleARB             luminancescale_location[MAX_TEXTURES];=0A=
     GLhandleARB             luminanceoffset_location[MAX_TEXTURES];=0A=
-    GLhandleARB             srgb_comparison_location;=0A=
-    GLhandleARB             srgb_mul_low_location;=0A=
     GLhandleARB             ycorrection_location;=0A=
     GLenum                  vertex_color_clamp;=0A=
     GLhandleARB             vshader;=0A=
@@ -492,28 +490,6 @@ static void shader_glsl_load_constants(=0A=
             }=0A=
         }=0A=
 =0A=
-        if(((IWineD3DPixelShaderImpl *) pshader)->srgb_enabled &&=0A=
-                  !((IWineD3DPixelShaderImpl *) =
pshader)->srgb_mode_hardcoded) {=0A=
-            float comparison[4];=0A=
-            float mul_low[4];=0A=
-=0A=
-            if(stateBlock->renderState[WINED3DRS_SRGBWRITEENABLE]) {=0A=
-                comparison[0] =3D srgb_cmp; comparison[1] =3D srgb_cmp;=0A=
-                comparison[2] =3D srgb_cmp; comparison[3] =3D srgb_cmp;=0A=
-=0A=
-                mul_low[0] =3D srgb_mul_low; mul_low[1] =3D =
srgb_mul_low;=0A=
-                mul_low[2] =3D srgb_mul_low; mul_low[3] =3D =
srgb_mul_low;=0A=
-            } else {=0A=
-                comparison[0] =3D 1.0 / 0.0; comparison[1] =3D 1.0 / =
0.0;=0A=
-                comparison[2] =3D 1.0 / 0.0; comparison[3] =3D 1.0 / =
0.0;=0A=
-=0A=
-                mul_low[0] =3D 1.0; mul_low[1] =3D 1.0;=0A=
-                mul_low[2] =3D 1.0; mul_low[3] =3D 1.0;=0A=
-            }=0A=
-=0A=
-            GL_EXTCALL(glUniform4fvARB(prog->srgb_comparison_location, =
1, comparison));=0A=
-            GL_EXTCALL(glUniform4fvARB(prog->srgb_mul_low_location, 1, =
mul_low));=0A=
-        }=0A=
         if(((IWineD3DPixelShaderImpl *) pshader)->vpos_uniform) {=0A=
             float correction_params[4];=0A=
             if(deviceImpl->render_offscreen) {=0A=
@@ -608,27 +584,10 @@ static void shader_generate_glsl_declarations(=0A=
         }=0A=
 =0A=
         if(device->stateBlock->renderState[WINED3DRS_SRGBWRITEENABLE]) {=0A=
-            ps_impl->srgb_enabled =3D 1;=0A=
-            if(This->baseShader.limits.constant_float + =
extra_constants_needed + 1 < GL_LIMITS(pshader_constantsF)) {=0A=
-                shader_addline(buffer, "uniform vec4 srgb_mul_low;\n");=0A=
-                shader_addline(buffer, "uniform vec4 =
srgb_comparison;\n");=0A=
-                ps_impl->srgb_mode_hardcoded =3D 0;=0A=
-                extra_constants_needed++;=0A=
-            } else {=0A=
-                ps_impl->srgb_mode_hardcoded =3D 1;=0A=
-                shader_addline(buffer, "const vec4 srgb_mul_low =3D =
vec4(%f, %f, %f, %f);\n",=0A=
-                               srgb_mul_low, srgb_mul_low, =
srgb_mul_low, srgb_mul_low);=0A=
-                shader_addline(buffer, "const vec4 srgb_comparison =3D =
vec4(%f, %f, %f, %f);\n",=0A=
-                               srgb_cmp, srgb_cmp, srgb_cmp, srgb_cmp);=0A=
-            }=0A=
-        } else {=0A=
-            IWineD3DPixelShaderImpl *ps_impl =3D =
(IWineD3DPixelShaderImpl *) This;=0A=
-=0A=
-            /* Do not write any srgb fixup into the shader to save =
shader size and processing time.=0A=
-             * As a consequence, we can't toggle srgb write on without =
recompilation=0A=
-             */=0A=
-            ps_impl->srgb_enabled =3D 0;=0A=
-            ps_impl->srgb_mode_hardcoded =3D 1;=0A=
+            shader_addline(buffer, "const vec4 srgb_mul_low =3D =
vec4(%f, %f, %f, %f);\n",=0A=
+                            srgb_mul_low, srgb_mul_low, srgb_mul_low, =
srgb_mul_low);=0A=
+            shader_addline(buffer, "const vec4 srgb_comparison =3D =
vec4(%f, %f, %f, %f);\n",=0A=
+                            srgb_cmp, srgb_cmp, srgb_cmp, srgb_cmp);=0A=
         }=0A=
         if(reg_maps->vpos || reg_maps->usesdsy) {=0A=
             if(This->baseShader.limits.constant_float + =
extra_constants_needed + 1 < GL_LIMITS(pshader_constantsF)) {=0A=
@@ -2798,7 +2757,8 @@ static void pshader_glsl_dp2add(SHADER_OPCODE_ARG* =
arg) {=0A=
 static void pshader_glsl_input_pack(=0A=
    SHADER_BUFFER* buffer,=0A=
    semantic* semantics_in,=0A=
-   IWineD3DPixelShader *iface) {=0A=
+   IWineD3DPixelShader *iface,=0A=
+   enum vertexprocessing_mode vertexprocessing) {=0A=
 =0A=
    unsigned int i;=0A=
    IWineD3DPixelShaderImpl *This =3D (IWineD3DPixelShaderImpl *) iface;=0A=
@@ -2819,7 +2779,7 @@ static void pshader_glsl_input_pack(=0A=
        switch(usage) {=0A=
 =0A=
            case WINED3DDECLUSAGE_TEXCOORD:=0A=
-               if(usage_idx < 8 && This->vertexprocessing =3D=3D =
pretransformed) {=0A=
+               if(usage_idx < 8 && vertexprocessing =3D=3D =
pretransformed) {=0A=
                    shader_addline(buffer, "IN[%u]%s =3D =
gl_TexCoord[%u]%s;\n",=0A=
                                   This->input_reg_map[i], reg_mask, =
usage_idx, reg_mask);=0A=
                } else {=0A=
@@ -3236,8 +3196,7 @@ static void set_glsl_shader_program(IWineD3DDevice =
*iface, BOOL use_ps, BOOL use=0A=
     if(use_ps) {=0A=
         struct ps_compile_args compile_args;=0A=
         =
find_ps_compile_args((IWineD3DPixelShaderImpl*)This->stateBlock->pixelSha=
der, This->stateBlock, &compile_args);=0A=
-        pixelshader_compile(pshader, &compile_args);=0A=
-        pshader_id =3D ((IWineD3DPixelShaderImpl*)pshader)->prgId;=0A=
+        pshader_id =3D find_gl_pshader((IWineD3DPixelShaderImpl *) =
pshader, &compile_args);=0A=
     } else {=0A=
         pshader_id =3D 0;=0A=
     }=0A=
@@ -3347,8 +3306,6 @@ static void set_glsl_shader_program(IWineD3DDevice =
*iface, BOOL use_ps, BOOL use=0A=
 =0A=
 =0A=
     entry->posFixup_location =3D =
GL_EXTCALL(glGetUniformLocationARB(programId, "posFixup"));=0A=
-    entry->srgb_comparison_location =3D =
GL_EXTCALL(glGetUniformLocationARB(programId, "srgb_comparison"));=0A=
-    entry->srgb_mul_low_location =3D =
GL_EXTCALL(glGetUniformLocationARB(programId, "srgb_mul_low"));=0A=
     entry->ycorrection_location =3D =
GL_EXTCALL(glGetUniformLocationARB(programId, "ycorrection"));=0A=
     checkGLcall("Find glsl program uniform locations");=0A=
 =0A=
@@ -3535,7 +3492,8 @@ static void shader_glsl_destroy(IWineD3DBaseShader =
*iface) {=0A=
     IWineD3DDeviceImpl *device =3D (IWineD3DDeviceImpl =
*)This->baseShader.device;=0A=
     struct shader_glsl_priv *priv =3D (struct shader_glsl_priv =
*)device->shader_priv;=0A=
     WineD3D_GL_Info *gl_info =3D &device->adapter->gl_info;=0A=
-    GLuint *prog;=0A=
+    IWineD3DPixelShaderImpl *ps =3D NULL;=0A=
+    IWineD3DVertexShaderImpl *vs =3D NULL;=0A=
 =0A=
     /* Note: Do not use QueryInterface here to find out which shader =
type this is because this code=0A=
      * can be called from IWineD3DBaseShader::Release=0A=
@@ -3543,11 +3501,13 @@ static void =
shader_glsl_destroy(IWineD3DBaseShader *iface) {=0A=
     char pshader =3D =
shader_is_pshader_version(This->baseShader.hex_version);=0A=
 =0A=
     if(pshader) {=0A=
-        prog =3D &((IWineD3DPixelShaderImpl *) This)->prgId;=0A=
+        ps =3D (IWineD3DPixelShaderImpl *) This;=0A=
+        if(ps->num_gl_shaders =3D=3D 0) return;=0A=
     } else {=0A=
-        prog =3D &((IWineD3DVertexShaderImpl *) This)->prgId;=0A=
+        vs =3D (IWineD3DVertexShaderImpl *) This;=0A=
+        if(vs->prgId =3D=3D 0) return;=0A=
     }=0A=
-    if(*prog =3D=3D 0) return;=0A=
+=0A=
     linked_programs =3D &This->baseShader.linked_programs;=0A=
 =0A=
     TRACE("Deleting linked programs\n");=0A=
@@ -3565,11 +3525,28 @@ static void =
shader_glsl_destroy(IWineD3DBaseShader *iface) {=0A=
         }=0A=
     }=0A=
 =0A=
-    TRACE("Deleting shader object %u\n", *prog);=0A=
-    GL_EXTCALL(glDeleteObjectARB(*prog));=0A=
-    checkGLcall("glDeleteObjectARB");=0A=
-    *prog =3D 0;=0A=
-    This->baseShader.is_compiled =3D FALSE;=0A=
+    if(pshader) {=0A=
+        UINT i;=0A=
+=0A=
+        ENTER_GL();=0A=
+        for(i =3D 0; i < ps->num_gl_shaders; i++) {=0A=
+            TRACE("deleting pshader %u\n", ps->gl_shaders[i].prgId);=0A=
+            GL_EXTCALL(glDeleteObjectARB(ps->gl_shaders[i].prgId));=0A=
+            checkGLcall("glDeleteObjectARB");=0A=
+        }=0A=
+        LEAVE_GL();=0A=
+        HeapFree(GetProcessHeap(), 0, ps->gl_shaders);=0A=
+        ps->gl_shaders =3D NULL;=0A=
+        ps->num_gl_shaders =3D 0;=0A=
+    } else {=0A=
+        TRACE("Deleting shader object %u\n", vs->prgId);=0A=
+        ENTER_GL();=0A=
+        GL_EXTCALL(glDeleteObjectARB(vs->prgId));=0A=
+        checkGLcall("glDeleteObjectARB");=0A=
+        LEAVE_GL();=0A=
+        vs->prgId =3D 0;=0A=
+        vs->baseShader.is_compiled =3D FALSE;=0A=
+    }=0A=
 }=0A=
 =0A=
 static unsigned int glsl_program_key_hash(void *key) {=0A=
@@ -3626,7 +3603,7 @@ static BOOL shader_glsl_dirty_const(IWineD3DDevice =
*iface) {=0A=
     return FALSE;=0A=
 }=0A=
 =0A=
-static void shader_glsl_generate_pshader(IWineD3DPixelShader *iface, =
SHADER_BUFFER *buffer) {=0A=
+static GLuint shader_glsl_generate_pshader(IWineD3DPixelShader *iface, =
SHADER_BUFFER *buffer) {=0A=
     IWineD3DPixelShaderImpl *This =3D (IWineD3DPixelShaderImpl *)iface;=0A=
     shader_reg_maps* reg_maps =3D &This->baseShader.reg_maps;=0A=
     CONST DWORD *function =3D This->baseShader.function;=0A=
@@ -3655,13 +3632,9 @@ static void =
shader_glsl_generate_pshader(IWineD3DPixelShader *iface, SHADER_BUFF=0A=
     if (This->baseShader.hex_version >=3D WINED3DPS_VERSION(3,0)) {=0A=
 =0A=
         if(((IWineD3DDeviceImpl *) =
This->baseShader.device)->strided_streams.u.s.position_transformed) {=0A=
-            This->vertexprocessing =3D pretransformed;=0A=
-            pshader_glsl_input_pack(buffer, This->semantics_in, iface);=0A=
+            pshader_glsl_input_pack(buffer, This->semantics_in, iface, =
pretransformed);=0A=
         } else if(!use_vs((IWineD3DDeviceImpl *) =
This->baseShader.device)) {=0A=
-            This->vertexprocessing =3D fixedfunction;=0A=
-            pshader_glsl_input_pack(buffer, This->semantics_in, iface);=0A=
-        } else {=0A=
-            This->vertexprocessing =3D vertexshader;=0A=
+            pshader_glsl_input_pack(buffer, This->semantics_in, iface, =
fixedfunction);=0A=
         }=0A=
     }=0A=
 =0A=
@@ -3682,7 +3655,7 @@ static void =
shader_glsl_generate_pshader(IWineD3DPixelShader *iface, SHADER_BUFF=0A=
     } else {=0A=
         fragcolor =3D "gl_FragColor";=0A=
     }=0A=
-    if(This->srgb_enabled) {=0A=
+    if(((IWineD3DDeviceImpl =
*)This->baseShader.device)->stateBlock->renderState[WINED3DRS_SRGBWRITEEN=
ABLE]) {=0A=
         shader_addline(buffer, "tmp0.xyz =3D pow(%s.xyz, vec3(%f, %f, =
%f)) * vec3(%f, %f, %f) - vec3(%f, %f, %f);\n",=0A=
                         fragcolor, srgb_pow, srgb_pow, srgb_pow, =
srgb_mul_high, srgb_mul_high, srgb_mul_high,=0A=
                         srgb_sub_high, srgb_sub_high, srgb_sub_high);=0A=
@@ -3711,7 +3684,7 @@ static void =
shader_glsl_generate_pshader(IWineD3DPixelShader *iface, SHADER_BUFF=0A=
     print_glsl_info_log(&GLINFO_LOCATION, shader_obj);=0A=
 =0A=
     /* Store the shader object */=0A=
-    This->prgId =3D shader_obj;=0A=
+    return shader_obj;=0A=
 }=0A=
 =0A=
 static void shader_glsl_generate_vshader(IWineD3DVertexShader *iface, =
SHADER_BUFFER *buffer) {=0A=
diff --git a/dlls/wined3d/pixelshader.c b/dlls/wined3d/pixelshader.c=0A=
index 1ce2849..7b2f639 100644=0A=
--- a/dlls/wined3d/pixelshader.c=0A=
+++ b/dlls/wined3d/pixelshader.c=0A=
@@ -272,32 +272,16 @@ static void pshader_set_limits(=0A=
 =0A=
 /** Generate a pixel shader string using either GL_FRAGMENT_PROGRAM_ARB=0A=
     or GLSL and send it to the card */=0A=
-static inline VOID IWineD3DPixelShaderImpl_GenerateShader(=0A=
-    IWineD3DPixelShader *iface) {=0A=
-    IWineD3DPixelShaderImpl *This =3D (IWineD3DPixelShaderImpl *)iface;=0A=
+static inline GLuint IWineD3DPixelShaderImpl_GenerateShader(=0A=
+    IWineD3DPixelShaderImpl *This) {=0A=
     SHADER_BUFFER buffer;=0A=
 =0A=
-#if 0 /* FIXME: Use the buffer that is held by the device, this is ok =
since fixups will be skipped for software shaders=0A=
-        it also requires entering a critical section but cuts down the =
runtime footprint of wined3d and any memory fragmentation that may =
occur... */=0A=
-    if (This->device->fixupVertexBufferSize < SHADER_PGMSIZE) {=0A=
-        HeapFree(GetProcessHeap(), 0, This->fixupVertexBuffer);=0A=
-        This->fixupVertexBuffer =3D HeapAlloc(GetProcessHeap() , 0, =
SHADER_PGMSIZE);=0A=
-        This->fixupVertexBufferSize =3D PGMSIZE;=0A=
-        This->fixupVertexBuffer[0] =3D 0;=0A=
-    }=0A=
-    buffer.buffer =3D This->device->fixupVertexBuffer;=0A=
-#else=0A=
-    buffer.buffer =3D HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, =
SHADER_PGMSIZE); =0A=
-#endif=0A=
+    buffer.buffer =3D HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, =
SHADER_PGMSIZE);=0A=
     buffer.bsize =3D 0;=0A=
     buffer.lineNo =3D 0;=0A=
     buffer.newline =3D TRUE;=0A=
 =0A=
-    ((IWineD3DDeviceImpl =
*)This->baseShader.device)->shader_backend->shader_generate_pshader(iface=
, &buffer);=0A=
-=0A=
-#if 1 /* if were using the data buffer of device then we don't need to =
free it */=0A=
-  HeapFree(GetProcessHeap(), 0, buffer.buffer);=0A=
-#endif=0A=
+    return ((IWineD3DDeviceImpl =
*)This->baseShader.device)->shader_backend->shader_generate_pshader((IWin=
eD3DPixelShader *) This, &buffer);=0A=
 }=0A=
 =0A=
 static HRESULT WINAPI =
IWineD3DPixelShaderImpl_SetFunction(IWineD3DPixelShader *iface, CONST =
DWORD *pFunction) {=0A=
@@ -384,89 +368,29 @@ static HRESULT WINAPI =
IWineD3DPixelShaderImpl_SetFunction(IWineD3DPixelShader *i=0A=
     return WINED3D_OK;=0A=
 }=0A=
 =0A=
-HRESULT pixelshader_compile(IWineD3DPixelShader *iface, struct =
ps_compile_args *args) {=0A=
-=0A=
-    IWineD3DPixelShaderImpl *This =3D(IWineD3DPixelShaderImpl *)iface;=0A=
-    IWineD3DDeviceImpl *deviceImpl =3D (IWineD3DDeviceImpl*) =
This->baseShader.device;=0A=
+GLuint pixelshader_compile(IWineD3DPixelShaderImpl *This, struct =
ps_compile_args *args) {=0A=
     CONST DWORD *function =3D This->baseShader.function;=0A=
-    UINT i, sampler;=0A=
     HRESULT hr;=0A=
+    GLuint retval;=0A=
 =0A=
-    TRACE("(%p) : function %p\n", iface, function);=0A=
-=0A=
-    /* We're already compiled, but check if any of the hardcoded =
stateblock assumptions=0A=
-     * changed.=0A=
-     */=0A=
-    if (This->baseShader.is_compiled) {=0A=
-        for(i =3D 0; i < This->baseShader.num_sampled_samplers; i++) {=0A=
-            sampler =3D This->baseShader.sampled_samplers[i];=0A=
-            if(args->format_conversion[sampler] !=3D =
This->baseShader.sampled_format[sampler]) {=0A=
-                WARN("Recompiling shader %p due to format change on =
sampler %d\n", This, sampler);=0A=
-                WARN("Old format group %s, new is %s\n",=0A=
-                     =
debug_d3dformat(This->baseShader.sampled_format[sampler]),=0A=
-                     debug_d3dformat(args->format_conversion[sampler]));=0A=
-                goto recompile;=0A=
-            }=0A=
-        }=0A=
-=0A=
-        /* TODO: Check projected textures */=0A=
-        /* TODO: Check texture types(2D, Cube, 3D) */=0A=
-=0A=
-        if(args->srgb_correction !=3D This->srgb_enabled && =
This->srgb_mode_hardcoded) {=0A=
-            WARN("Recompiling shader because srgb correction is =
different and hardcoded\n");=0A=
-            goto recompile;=0A=
-        }=0A=
-        if(This->baseShader.reg_maps.vpos && !This->vpos_uniform) {=0A=
-            if(This->render_offscreen !=3D deviceImpl->render_offscreen =
||=0A=
-               This->height !=3D ((IWineD3DSurfaceImpl *) =
deviceImpl->render_targets[0])->currentDesc.Height) {=0A=
-                WARN("Recompiling shader because vpos is used, hard =
compiled and changed\n");=0A=
-                goto recompile;=0A=
-            }=0A=
-        }=0A=
-        if(This->baseShader.reg_maps.usesdsy && !This->vpos_uniform) {=0A=
-            if(This->render_offscreen ? 0 : 1 !=3D =
deviceImpl->render_offscreen ? 0 : 1) {=0A=
-                WARN("Recompiling shader because dsy is used, hard =
compiled and render_offscreen changed\n");=0A=
-                goto recompile;=0A=
-            }=0A=
-        }=0A=
-        if(This->baseShader.hex_version >=3D WINED3DPS_VERSION(3,0)) {=0A=
-            if(args->vp_mode !=3D This->vertexprocessing) {=0A=
-                WARN("Recompiling shader because the vertex processing =
mode changed\n");=0A=
-                goto recompile;=0A=
-            }=0A=
-        }=0A=
-=0A=
-        return WINED3D_OK;=0A=
+    TRACE("(%p) : function %p\n", This, function);=0A=
 =0A=
-        recompile:=0A=
-        if(This->baseShader.recompile_count > 50) {=0A=
-            FIXME("Shader %p recompiled more than 50 times\n", This);=0A=
-        } else {=0A=
-            This->baseShader.recompile_count++;=0A=
-        }=0A=
-=0A=
-        deviceImpl->shader_backend->shader_destroy((IWineD3DBaseShader =
*) iface);=0A=
-    }=0A=
-=0A=
-    /* We don't need to compile */=0A=
-    if (!function) {=0A=
-        This->baseShader.is_compiled =3D TRUE;=0A=
-        return WINED3D_OK;=0A=
+    hr =3D IWineD3DPixelShader_UpdateSamplers((IWineD3DPixelShader *) =
This);=0A=
+    if(FAILED(hr)) {=0A=
+        ERR("Failed to update sampler information\n");=0A=
+        return 0;=0A=
     }=0A=
 =0A=
-    hr =3D IWineD3DPixelShader_UpdateSamplers(iface);=0A=
-    if(FAILED(hr)) return hr;=0A=
-=0A=
     /* Reset fields tracking stateblock values being hardcoded in the =
shader */=0A=
     This->baseShader.num_sampled_samplers =3D 0;=0A=
 =0A=
     /* Generate the HW shader */=0A=
     TRACE("(%p) : Generating hardware program\n", This);=0A=
-    IWineD3DPixelShaderImpl_GenerateShader(iface);=0A=
+    retval =3D IWineD3DPixelShaderImpl_GenerateShader(This);=0A=
 =0A=
     This->baseShader.is_compiled =3D TRUE;=0A=
 =0A=
-    return WINED3D_OK;=0A=
+    return retval;=0A=
 }=0A=
 =0A=
 static HRESULT WINAPI =
IWineD3DPixelShaderImpl_UpdateSamplers(IWineD3DPixelShader *iface) {=0A=
@@ -532,3 +456,35 @@ void find_ps_compile_args(IWineD3DPixelShaderImpl =
*shader, IWineD3DStateBlockImp=0A=
         args->vp_mode =3D vertexshader;=0A=
     }=0A=
 }=0A=
+=0A=
+GLuint find_gl_pshader(IWineD3DPixelShaderImpl *shader, struct =
ps_compile_args *args) {=0A=
+    UINT i;=0A=
+    struct ps_compiled_shader *old_array;=0A=
+=0A=
+    /* Usually we have very few GL shaders for each d3d shader(just 1 =
or maybe 2),=0A=
+     * so a linear search is more performant than a hashmap=0A=
+     */=0A=
+    for(i =3D 0; i < shader->num_gl_shaders; i++) {=0A=
+        if(memcmp(&shader->gl_shaders[i].args, args, sizeof(*args)) =
=3D=3D 0) {=0A=
+            return shader->gl_shaders[i].prgId;=0A=
+        }=0A=
+    }=0A=
+=0A=
+    TRACE("No matching GL shader found, compiling a new shader\n");=0A=
+    old_array =3D shader->gl_shaders;=0A=
+    if(old_array) {=0A=
+        shader->gl_shaders =3D HeapReAlloc(GetProcessHeap(), 0, =
old_array,=0A=
+                                         (shader->num_gl_shaders + 1) * =
sizeof(*shader->gl_shaders));=0A=
+    } else {=0A=
+        shader->gl_shaders =3D HeapAlloc(GetProcessHeap(), 0, =
sizeof(*shader->gl_shaders));=0A=
+    }=0A=
+=0A=
+    if(!shader->gl_shaders) {=0A=
+        ERR("Out of memory\n");=0A=
+        return 0;=0A=
+    }=0A=
+=0A=
+    shader->gl_shaders[shader->num_gl_shaders].args =3D *args;=0A=
+    shader->gl_shaders[shader->num_gl_shaders].prgId =3D =
pixelshader_compile(shader, args);=0A=
+    return shader->gl_shaders[shader->num_gl_shaders++].prgId;=0A=
+}=0A=
diff --git a/dlls/wined3d/wined3d_private.h =
b/dlls/wined3d/wined3d_private.h=0A=
index b593354..cb3a26d 100644=0A=
--- a/dlls/wined3d/wined3d_private.h=0A=
+++ b/dlls/wined3d/wined3d_private.h=0A=
@@ -350,7 +350,7 @@ typedef struct {=0A=
     HRESULT (*shader_alloc_private)(IWineD3DDevice *iface);=0A=
     void (*shader_free_private)(IWineD3DDevice *iface);=0A=
     BOOL (*shader_dirtifyable_constants)(IWineD3DDevice *iface);=0A=
-    void (*shader_generate_pshader)(IWineD3DPixelShader *iface, =
SHADER_BUFFER *buffer);=0A=
+    GLuint (*shader_generate_pshader)(IWineD3DPixelShader *iface, =
SHADER_BUFFER *buffer);=0A=
     void (*shader_generate_vshader)(IWineD3DVertexShader *iface, =
SHADER_BUFFER *buffer);=0A=
     void (*shader_get_caps)(WINED3DDEVTYPE devtype, WineD3D_GL_Info =
*gl_info, struct shader_caps *caps);=0A=
     BOOL (*shader_conv_supported)(WINED3DFORMAT conv);=0A=
@@ -2346,6 +2346,13 @@ struct ps_compile_args {=0A=
     BOOL                        srgb_correction;=0A=
     WINED3DFORMAT               =
format_conversion[MAX_FRAGMENT_SAMPLERS];=0A=
     enum vertexprocessing_mode  vp_mode;=0A=
+    /* Projected textures(ps 1.0-1.3) */=0A=
+    /* Texture types(2D, Cube, 3D) in ps 1.x */=0A=
+};=0A=
+=0A=
+struct ps_compiled_shader {=0A=
+    struct ps_compile_args      args;=0A=
+    GLuint                      prgId;=0A=
 };=0A=
 =0A=
 typedef struct IWineD3DPixelShaderImpl {=0A=
@@ -2365,25 +2372,19 @@ typedef struct IWineD3DPixelShaderImpl {=0A=
     int                         declared_in_count;=0A=
 =0A=
     /* The GL shader */=0A=
-    GLuint                          prgId;=0A=
+    struct ps_compiled_shader   *gl_shaders;=0A=
+    UINT                        num_gl_shaders;=0A=
 =0A=
     /* Some information about the shader behavior */=0A=
     struct stb_const_desc       bumpenvmatconst[MAX_TEXTURES];=0A=
     char                        numbumpenvmatconsts;=0A=
     struct stb_const_desc       luminanceconst[MAX_TEXTURES];=0A=
-    char                        srgb_enabled;=0A=
-    char                        srgb_mode_hardcoded;=0A=
-    UINT                        srgb_low_const;=0A=
-    UINT                        srgb_cmp_const;=0A=
     char                        vpos_uniform;=0A=
-    BOOL                        render_offscreen;=0A=
-    UINT                        height;=0A=
-    enum vertexprocessing_mode  vertexprocessing;=0A=
 } IWineD3DPixelShaderImpl;=0A=
 =0A=
 extern const SHADER_OPCODE IWineD3DPixelShaderImpl_shader_ins[];=0A=
 extern const IWineD3DPixelShaderVtbl IWineD3DPixelShader_Vtbl;=0A=
-HRESULT pixelshader_compile(IWineD3DPixelShader *iface, struct =
ps_compile_args *args);=0A=
+GLuint find_gl_pshader(IWineD3DPixelShaderImpl *shader, struct =
ps_compile_args *args);=0A=
 void find_ps_compile_args(IWineD3DPixelShaderImpl *shader, =
IWineD3DStateBlockImpl *stateblock, struct ps_compile_args *args);=0A=
 =0A=
 /* sRGB correction constants */=0A=
-- =0A=
1.5.6.4=0A=
=0A=

------=_NextPart_000_000E_01C94E6D.F503D280--




More information about the wine-patches mailing list