[PATCH] WineD3D: Add the ability to duplicate GL pixel shaders=0A=
Stefan Doesinger
stefan at codeweavers.com
Mon Nov 24 11:55:50 CST 2008
=0A=
Some stateblock parameters have to be compiled into the GL pixel=0A=
shader code, like lines for pixelformat fixups. This leads to=0A=
problems when applications switch those settings, requiring a=0A=
recompilation of the shader. This patch enables wined3d to have=0A=
multiple GL shaders for a D3D shader(pixel shaders only so far)=0A=
to handle this more efficiently.=0A=
=0A=
There are numerous advantages here:=0A=
1) We don't have to be bothered about enabling some color format=0A=
fixups like for U8V8 and R32F which fix some SDK demos but until=0A=
now broke performance in real games rather badly=0A=
=0A=
2) We can handle sRGB writing more efficiently. We don't have to=0A=
bother uniform loading with it, and we can remove the quite=0A=
expensive sRGB code from the shader entirely when sRGB writing is=0A=
off.=0A=
=0A=
3) Finding the shader in the shader array(usually there is just 1=0A=
shader anyway) is most likely just as fast as validating the=0A=
hardcoded assumptions. For GLSL, this extra parameter can be=0A=
included in the program hashmap(by a later patch)=0A=
=0A=
A few remarks:=0A=
We don't save any uniforms for sRGB constant loading because=0A=
hardware usually can't hardcode immediate values into the shader.=0A=
Thus if we use immediate values in the code, the compiler uses a=0A=
uniform silently. Thus I have dropped the render_offscreen and=0A=
height hardcoding checks because they didn't work anyway.=0A=
=0A=
Better packaging of the pixelshader compile arg structure will=0A=
will come in a different patch=0A=
=0A=
We pass this structure to the shader compilation code, but so far=0A=
it doesn't use it. It still reads the values from the stateblock.=0A=
This isn't an immediate problem, so I fix this in another patch=0A=
---=0A=
dlls/wined3d/arb_program_shader.c | 116 +++++++++----------------------=0A=
dlls/wined3d/baseshader.c | 3 +-=0A=
dlls/wined3d/glsl_shader.c | 111 ++++++++++++-------------------=0A=
dlls/wined3d/pixelshader.c | 134 =
++++++++++++------------------------=0A=
dlls/wined3d/wined3d_private.h | 21 +++---=0A=
5 files changed, 134 insertions(+), 251 deletions(-)=0A=
=0A=
diff --git a/dlls/wined3d/arb_program_shader.c =
b/dlls/wined3d/arb_program_shader.c=0A=
index f6cefe3..3a52453 100644=0A=
--- a/dlls/wined3d/arb_program_shader.c=0A=
+++ b/dlls/wined3d/arb_program_shader.c=0A=
@@ -220,32 +220,6 @@ static void shader_arb_load_constants(=0A=
=
deviceImpl->activeContext->pshader_const_dirty[psi->luminanceconst[i].con=
st_num] =3D 1;=0A=
}=0A=
}=0A=
-=0A=
- if(((IWineD3DPixelShaderImpl *) pshader)->srgb_enabled &&=0A=
- !((IWineD3DPixelShaderImpl *) pshader)->srgb_mode_hardcoded) =
{=0A=
- float comparison[4];=0A=
- float mul_low[4];=0A=
-=0A=
- if(stateBlock->renderState[WINED3DRS_SRGBWRITEENABLE]) {=0A=
- comparison[0] =3D srgb_cmp; comparison[1] =3D srgb_cmp;=0A=
- comparison[2] =3D srgb_cmp; comparison[3] =3D srgb_cmp;=0A=
-=0A=
- mul_low[0] =3D srgb_mul_low; mul_low[1] =3D =
srgb_mul_low;=0A=
- mul_low[2] =3D srgb_mul_low; mul_low[3] =3D =
srgb_mul_low;=0A=
- } else {=0A=
- comparison[0] =3D 1.0 / 0.0; comparison[1] =3D 1.0 / =
0.0;=0A=
- comparison[2] =3D 1.0 / 0.0; comparison[3] =3D 1.0 / =
0.0;=0A=
-=0A=
- mul_low[0] =3D 1.0; mul_low[1] =3D 1.0;=0A=
- mul_low[2] =3D 1.0; mul_low[3] =3D 1.0;=0A=
- }=0A=
- =
GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, =
psi->srgb_cmp_const, comparison));=0A=
- =
GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, =
psi->srgb_low_const, mul_low));=0A=
- checkGLcall("Load sRGB correction constants\n");=0A=
- =
deviceImpl->activeContext->pshader_const_dirty[psi->srgb_low_const] =3D =
1;=0A=
- =
deviceImpl->activeContext->pshader_const_dirty[psi->srgb_cmp_const] =3D =
1;=0A=
-=0A=
- }=0A=
}=0A=
}=0A=
=0A=
@@ -325,49 +299,16 @@ static void shader_generate_arb_declarations(=0A=
}=0A=
=0A=
if(device->stateBlock->renderState[WINED3DRS_SRGBWRITEENABLE] && =
pshader) {=0A=
- IWineD3DPixelShaderImpl *ps_impl =3D (IWineD3DPixelShaderImpl =
*) This;=0A=
- /* If there are 2 constants left to use, use them to pass the =
sRGB correction values in. This way=0A=
- * srgb write correction can be turned on and off dynamically =
without recompilation. Otherwise=0A=
- * hardcode them. The drawback of hardcoding is that the shader =
needs recompilation to turn sRGB=0A=
- * off again=0A=
- */=0A=
- if(max_constantsF + extra_constants_needed + 1 < =
GL_LIMITS(pshader_constantsF) && FALSE) {=0A=
- /* The idea is that if srgb is enabled, then disabled, the =
constant loading code=0A=
- * can effectively disable sRGB correction by passing 1.0 =
and INF as the multiplication=0A=
- * and comparison constants. If it disables it that way, =
the shader won't be recompiled=0A=
- * and the code will stay in, so sRGB writing can be turned =
on again by setting the=0A=
- * constants from the spec=0A=
- */=0A=
- ps_impl->srgb_mode_hardcoded =3D 0;=0A=
- ps_impl->srgb_low_const =3D GL_LIMITS(pshader_constantsF) - =
extra_constants_needed;=0A=
- ps_impl->srgb_cmp_const =3D GL_LIMITS(pshader_constantsF) - =
extra_constants_needed - 1;=0A=
- shader_addline(buffer, "PARAM srgb_mul_low =3D =
program.env[%d];\n", ps_impl->srgb_low_const);=0A=
- shader_addline(buffer, "PARAM srgb_comparison =3D =
program.env[%d];\n", ps_impl->srgb_cmp_const);=0A=
- } else {=0A=
- shader_addline(buffer, "PARAM srgb_mul_low =3D {%f, %f, %f, =
1.0};\n",=0A=
- srgb_mul_low, srgb_mul_low, srgb_mul_low);=0A=
- shader_addline(buffer, "PARAM srgb_comparison =3D {%f, %f, =
%f, %f};\n",=0A=
- srgb_cmp, srgb_cmp, srgb_cmp, srgb_cmp);=0A=
- ps_impl->srgb_mode_hardcoded =3D 1;=0A=
- }=0A=
- /* These can be hardcoded, they do not cause any harm because =
no fragment will enter the high=0A=
- * path if the comparison value is set to INF=0A=
- */=0A=
+ shader_addline(buffer, "PARAM srgb_mul_low =3D {%f, %f, %f, =
1.0};\n",=0A=
+ srgb_mul_low, srgb_mul_low, srgb_mul_low);=0A=
+ shader_addline(buffer, "PARAM srgb_comparison =3D {%f, %f, %f, =
%f};\n",=0A=
+ srgb_cmp, srgb_cmp, srgb_cmp, srgb_cmp);=0A=
shader_addline(buffer, "PARAM srgb_pow =3D {%f, %f, %f, =
1.0};\n",=0A=
srgb_pow, srgb_pow, srgb_pow);=0A=
shader_addline(buffer, "PARAM srgb_mul_hi =3D {%f, %f, %f, =
1.0};\n",=0A=
srgb_mul_high, srgb_mul_high, srgb_mul_high);=0A=
shader_addline(buffer, "PARAM srgb_sub_hi =3D {%f, %f, %f, =
0.0};\n",=0A=
srgb_sub_high, srgb_sub_high, srgb_sub_high);=0A=
- ps_impl->srgb_enabled =3D 1;=0A=
- } else if(pshader) {=0A=
- IWineD3DPixelShaderImpl *ps_impl =3D (IWineD3DPixelShaderImpl =
*) This;=0A=
-=0A=
- /* Do not write any srgb fixup into the shader to save shader =
size and processing time.=0A=
- * As a consequence, we can't toggle srgb write on without =
recompilation=0A=
- */=0A=
- ps_impl->srgb_enabled =3D 0;=0A=
- ps_impl->srgb_mode_hardcoded =3D 1;=0A=
}=0A=
=0A=
/* Load local constants using the program-local space,=0A=
@@ -1898,9 +1839,8 @@ static void shader_arb_select(IWineD3DDevice =
*iface, BOOL usePS, BOOL useVS) {=0A=
struct ps_compile_args compile_args;=0A=
TRACE("Using pixel shader\n");=0A=
find_ps_compile_args((IWineD3DPixelShaderImpl *) =
This->stateBlock->pixelShader, This->stateBlock, &compile_args);=0A=
- pixelshader_compile(This->stateBlock->pixelShader, =
&compile_args);=0A=
-=0A=
- priv->current_fprogram_id =3D ((IWineD3DPixelShaderImpl =
*)This->stateBlock->pixelShader)->prgId;=0A=
+ priv->current_fprogram_id =3D =
find_gl_pshader((IWineD3DPixelShaderImpl *) =
This->stateBlock->pixelShader,=0A=
+ &compile_args);=0A=
=0A=
/* Bind the fragment program */=0A=
GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, =
priv->current_fprogram_id));=0A=
@@ -1978,24 +1918,33 @@ static void shader_arb_cleanup(IWineD3DDevice =
*iface) {=0A=
}=0A=
=0A=
static void shader_arb_destroy(IWineD3DBaseShader *iface) {=0A=
- IWineD3DBaseShaderImpl *This =3D (IWineD3DBaseShaderImpl *) iface;=0A=
- WineD3D_GL_Info *gl_info =3D &((IWineD3DDeviceImpl *) =
This->baseShader.device)->adapter->gl_info;=0A=
- char pshader =3D =
shader_is_pshader_version(This->baseShader.hex_version);=0A=
+ IWineD3DBaseShaderImpl *baseShader =3D (IWineD3DBaseShaderImpl *) =
iface;=0A=
+ WineD3D_GL_Info *gl_info =3D &((IWineD3DDeviceImpl *) =
baseShader->baseShader.device)->adapter->gl_info;=0A=
+ char pshader =3D =
shader_is_pshader_version(baseShader->baseShader.hex_version);=0A=
=0A=
if(pshader) {=0A=
+ IWineD3DPixelShaderImpl *This =3D (IWineD3DPixelShaderImpl *) =
iface;=0A=
+ UINT i;=0A=
+=0A=
ENTER_GL();=0A=
- GL_EXTCALL(glDeleteProgramsARB(1, &((IWineD3DPixelShaderImpl *) =
This)->prgId));=0A=
- checkGLcall("GL_EXTCALL(glDeleteProgramsARB(1, =
&((IWineD3DPixelShaderImpl *) This)->prgId))");=0A=
- ((IWineD3DPixelShaderImpl *) This)->prgId =3D 0;=0A=
+ for(i =3D 0; i < This->num_gl_shaders; i++) {=0A=
+ GL_EXTCALL(glDeleteProgramsARB(1, =
&This->gl_shaders[i].prgId));=0A=
+ checkGLcall("GL_EXTCALL(glDeleteProgramsARB(1, =
&This->gl_shaders[i].prgId))");=0A=
+ }=0A=
LEAVE_GL();=0A=
+ HeapFree(GetProcessHeap(), 0, This->gl_shaders);=0A=
+ This->gl_shaders =3D NULL;=0A=
+ This->num_gl_shaders =3D 0;=0A=
} else {=0A=
+ IWineD3DVertexShaderImpl *This =3D (IWineD3DVertexShaderImpl *) =
iface;=0A=
+=0A=
ENTER_GL();=0A=
- GL_EXTCALL(glDeleteProgramsARB(1, &((IWineD3DVertexShaderImpl =
*) This)->prgId));=0A=
- checkGLcall("GL_EXTCALL(glDeleteProgramsARB(1, =
&((IWineD3DPixelShaderImpl *) This)->prgId))");=0A=
+ GL_EXTCALL(glDeleteProgramsARB(1, &This->prgId));=0A=
+ checkGLcall("GL_EXTCALL(glDeleteProgramsARB(1, &This->prgId))");=0A=
((IWineD3DVertexShaderImpl *) This)->prgId =3D 0;=0A=
LEAVE_GL();=0A=
}=0A=
- This->baseShader.is_compiled =3D FALSE;=0A=
+ baseShader->baseShader.is_compiled =3D FALSE;=0A=
}=0A=
=0A=
static HRESULT shader_arb_alloc(IWineD3DDevice *iface) {=0A=
@@ -2048,13 +1997,14 @@ static void =
arbfp_add_sRGB_correction(SHADER_BUFFER *buffer, const char *fragcol=0A=
/* [0.0;1.0] clamping. Not needed, this is done implicitly */=0A=
}=0A=
=0A=
-static void shader_arb_generate_pshader(IWineD3DPixelShader *iface, =
SHADER_BUFFER *buffer) {=0A=
+static GLuint shader_arb_generate_pshader(IWineD3DPixelShader *iface, =
SHADER_BUFFER *buffer) {=0A=
IWineD3DPixelShaderImpl *This =3D (IWineD3DPixelShaderImpl *)iface;=0A=
shader_reg_maps* reg_maps =3D &This->baseShader.reg_maps;=0A=
CONST DWORD *function =3D This->baseShader.function;=0A=
const char *fragcolor;=0A=
WineD3D_GL_Info *gl_info =3D &((IWineD3DDeviceImpl =
*)This->baseShader.device)->adapter->gl_info;=0A=
local_constant* lconst;=0A=
+ GLuint retval;=0A=
=0A=
/* Create the hw ARB shader */=0A=
shader_addline(buffer, "!!ARBfp1.0\n");=0A=
@@ -2091,7 +2041,7 @@ static void =
shader_arb_generate_pshader(IWineD3DPixelShader *iface, SHADER_BUFFE=0A=
} else {=0A=
fragcolor =3D "TMP_COLOR";=0A=
}=0A=
- if(This->srgb_enabled) {=0A=
+ if(((IWineD3DDeviceImpl =
*)This->baseShader.device)->stateBlock->renderState[WINED3DRS_SRGBWRITEEN=
ABLE]) {=0A=
arbfp_add_sRGB_correction(buffer, fragcolor, "TMP", "TMP2", =
"TA", "TB");=0A=
}=0A=
if (This->baseShader.hex_version < WINED3DPS_VERSION(3,0)) {=0A=
@@ -2102,12 +2052,12 @@ static void =
shader_arb_generate_pshader(IWineD3DPixelShader *iface, SHADER_BUFFE=0A=
shader_addline(buffer, "END\n");=0A=
=0A=
/* TODO: change to resource.glObjectHandle or something like that */=0A=
- GL_EXTCALL(glGenProgramsARB(1, &This->prgId));=0A=
+ GL_EXTCALL(glGenProgramsARB(1, &retval));=0A=
=0A=
- TRACE("Creating a hw pixel shader, prg=3D%d\n", This->prgId);=0A=
- GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, This->prgId));=0A=
+ TRACE("Creating a hw pixel shader, prg=3D%d\n", retval);=0A=
+ GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, retval));=0A=
=0A=
- TRACE("Created hw pixel shader, prg=3D%d\n", This->prgId);=0A=
+ TRACE("Created hw pixel shader, prg=3D%d\n", retval);=0A=
/* Create the program and check for errors */=0A=
GL_EXTCALL(glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, =
GL_PROGRAM_FORMAT_ASCII_ARB,=0A=
buffer->bsize, buffer->buffer));=0A=
@@ -2117,7 +2067,7 @@ static void =
shader_arb_generate_pshader(IWineD3DPixelShader *iface, SHADER_BUFFE=0A=
glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos);=0A=
FIXME("HW PixelShader Error at position %d: %s\n",=0A=
errPos, debugstr_a((const char =
*)glGetString(GL_PROGRAM_ERROR_STRING_ARB)));=0A=
- This->prgId =3D -1;=0A=
+ retval =3D 0;=0A=
}=0A=
=0A=
/* Load immediate constants */=0A=
@@ -2128,6 +2078,8 @@ static void =
shader_arb_generate_pshader(IWineD3DPixelShader *iface, SHADER_BUFFE=0A=
checkGLcall("glProgramLocalParameter4fvARB");=0A=
}=0A=
}=0A=
+=0A=
+ return retval;=0A=
}=0A=
=0A=
static void shader_arb_generate_vshader(IWineD3DVertexShader *iface, =
SHADER_BUFFER *buffer) {=0A=
diff --git a/dlls/wined3d/baseshader.c b/dlls/wined3d/baseshader.c=0A=
index f7c8d35..292fede 100644=0A=
--- a/dlls/wined3d/baseshader.c=0A=
+++ b/dlls/wined3d/baseshader.c=0A=
@@ -1101,8 +1101,9 @@ static void shader_none_destroy(IWineD3DBaseShader =
*iface) {}=0A=
static HRESULT shader_none_alloc(IWineD3DDevice *iface) {return =
WINED3D_OK;}=0A=
static void shader_none_free(IWineD3DDevice *iface) {}=0A=
static BOOL shader_none_dirty_const(IWineD3DDevice *iface) {return =
FALSE;}=0A=
-static void shader_none_generate_pshader(IWineD3DPixelShader *iface, =
SHADER_BUFFER *buffer) {=0A=
+static GLuint shader_none_generate_pshader(IWineD3DPixelShader *iface, =
SHADER_BUFFER *buffer) {=0A=
FIXME("NONE shader backend asked to generate a pixel shader\n");=0A=
+ return 0;=0A=
}=0A=
static void shader_none_generate_vshader(IWineD3DVertexShader *iface, =
SHADER_BUFFER *buffer) {=0A=
FIXME("NONE shader backend asked to generate a vertex shader\n");=0A=
diff --git a/dlls/wined3d/glsl_shader.c b/dlls/wined3d/glsl_shader.c=0A=
index eeec260..d0d7491 100644=0A=
--- a/dlls/wined3d/glsl_shader.c=0A=
+++ b/dlls/wined3d/glsl_shader.c=0A=
@@ -73,8 +73,6 @@ struct glsl_shader_prog_link {=0A=
GLhandleARB bumpenvmat_location[MAX_TEXTURES];=0A=
GLhandleARB luminancescale_location[MAX_TEXTURES];=0A=
GLhandleARB luminanceoffset_location[MAX_TEXTURES];=0A=
- GLhandleARB srgb_comparison_location;=0A=
- GLhandleARB srgb_mul_low_location;=0A=
GLhandleARB ycorrection_location;=0A=
GLenum vertex_color_clamp;=0A=
GLhandleARB vshader;=0A=
@@ -492,28 +490,6 @@ static void shader_glsl_load_constants(=0A=
}=0A=
}=0A=
=0A=
- if(((IWineD3DPixelShaderImpl *) pshader)->srgb_enabled &&=0A=
- !((IWineD3DPixelShaderImpl *) =
pshader)->srgb_mode_hardcoded) {=0A=
- float comparison[4];=0A=
- float mul_low[4];=0A=
-=0A=
- if(stateBlock->renderState[WINED3DRS_SRGBWRITEENABLE]) {=0A=
- comparison[0] =3D srgb_cmp; comparison[1] =3D srgb_cmp;=0A=
- comparison[2] =3D srgb_cmp; comparison[3] =3D srgb_cmp;=0A=
-=0A=
- mul_low[0] =3D srgb_mul_low; mul_low[1] =3D =
srgb_mul_low;=0A=
- mul_low[2] =3D srgb_mul_low; mul_low[3] =3D =
srgb_mul_low;=0A=
- } else {=0A=
- comparison[0] =3D 1.0 / 0.0; comparison[1] =3D 1.0 / =
0.0;=0A=
- comparison[2] =3D 1.0 / 0.0; comparison[3] =3D 1.0 / =
0.0;=0A=
-=0A=
- mul_low[0] =3D 1.0; mul_low[1] =3D 1.0;=0A=
- mul_low[2] =3D 1.0; mul_low[3] =3D 1.0;=0A=
- }=0A=
-=0A=
- GL_EXTCALL(glUniform4fvARB(prog->srgb_comparison_location, =
1, comparison));=0A=
- GL_EXTCALL(glUniform4fvARB(prog->srgb_mul_low_location, 1, =
mul_low));=0A=
- }=0A=
if(((IWineD3DPixelShaderImpl *) pshader)->vpos_uniform) {=0A=
float correction_params[4];=0A=
if(deviceImpl->render_offscreen) {=0A=
@@ -608,27 +584,10 @@ static void shader_generate_glsl_declarations(=0A=
}=0A=
=0A=
if(device->stateBlock->renderState[WINED3DRS_SRGBWRITEENABLE]) {=0A=
- ps_impl->srgb_enabled =3D 1;=0A=
- if(This->baseShader.limits.constant_float + =
extra_constants_needed + 1 < GL_LIMITS(pshader_constantsF)) {=0A=
- shader_addline(buffer, "uniform vec4 srgb_mul_low;\n");=0A=
- shader_addline(buffer, "uniform vec4 =
srgb_comparison;\n");=0A=
- ps_impl->srgb_mode_hardcoded =3D 0;=0A=
- extra_constants_needed++;=0A=
- } else {=0A=
- ps_impl->srgb_mode_hardcoded =3D 1;=0A=
- shader_addline(buffer, "const vec4 srgb_mul_low =3D =
vec4(%f, %f, %f, %f);\n",=0A=
- srgb_mul_low, srgb_mul_low, =
srgb_mul_low, srgb_mul_low);=0A=
- shader_addline(buffer, "const vec4 srgb_comparison =3D =
vec4(%f, %f, %f, %f);\n",=0A=
- srgb_cmp, srgb_cmp, srgb_cmp, srgb_cmp);=0A=
- }=0A=
- } else {=0A=
- IWineD3DPixelShaderImpl *ps_impl =3D =
(IWineD3DPixelShaderImpl *) This;=0A=
-=0A=
- /* Do not write any srgb fixup into the shader to save =
shader size and processing time.=0A=
- * As a consequence, we can't toggle srgb write on without =
recompilation=0A=
- */=0A=
- ps_impl->srgb_enabled =3D 0;=0A=
- ps_impl->srgb_mode_hardcoded =3D 1;=0A=
+ shader_addline(buffer, "const vec4 srgb_mul_low =3D =
vec4(%f, %f, %f, %f);\n",=0A=
+ srgb_mul_low, srgb_mul_low, srgb_mul_low, =
srgb_mul_low);=0A=
+ shader_addline(buffer, "const vec4 srgb_comparison =3D =
vec4(%f, %f, %f, %f);\n",=0A=
+ srgb_cmp, srgb_cmp, srgb_cmp, srgb_cmp);=0A=
}=0A=
if(reg_maps->vpos || reg_maps->usesdsy) {=0A=
if(This->baseShader.limits.constant_float + =
extra_constants_needed + 1 < GL_LIMITS(pshader_constantsF)) {=0A=
@@ -2798,7 +2757,8 @@ static void pshader_glsl_dp2add(SHADER_OPCODE_ARG* =
arg) {=0A=
static void pshader_glsl_input_pack(=0A=
SHADER_BUFFER* buffer,=0A=
semantic* semantics_in,=0A=
- IWineD3DPixelShader *iface) {=0A=
+ IWineD3DPixelShader *iface,=0A=
+ enum vertexprocessing_mode vertexprocessing) {=0A=
=0A=
unsigned int i;=0A=
IWineD3DPixelShaderImpl *This =3D (IWineD3DPixelShaderImpl *) iface;=0A=
@@ -2819,7 +2779,7 @@ static void pshader_glsl_input_pack(=0A=
switch(usage) {=0A=
=0A=
case WINED3DDECLUSAGE_TEXCOORD:=0A=
- if(usage_idx < 8 && This->vertexprocessing =3D=3D =
pretransformed) {=0A=
+ if(usage_idx < 8 && vertexprocessing =3D=3D =
pretransformed) {=0A=
shader_addline(buffer, "IN[%u]%s =3D =
gl_TexCoord[%u]%s;\n",=0A=
This->input_reg_map[i], reg_mask, =
usage_idx, reg_mask);=0A=
} else {=0A=
@@ -3236,8 +3196,7 @@ static void set_glsl_shader_program(IWineD3DDevice =
*iface, BOOL use_ps, BOOL use=0A=
if(use_ps) {=0A=
struct ps_compile_args compile_args;=0A=
=
find_ps_compile_args((IWineD3DPixelShaderImpl*)This->stateBlock->pixelSha=
der, This->stateBlock, &compile_args);=0A=
- pixelshader_compile(pshader, &compile_args);=0A=
- pshader_id =3D ((IWineD3DPixelShaderImpl*)pshader)->prgId;=0A=
+ pshader_id =3D find_gl_pshader((IWineD3DPixelShaderImpl *) =
pshader, &compile_args);=0A=
} else {=0A=
pshader_id =3D 0;=0A=
}=0A=
@@ -3347,8 +3306,6 @@ static void set_glsl_shader_program(IWineD3DDevice =
*iface, BOOL use_ps, BOOL use=0A=
=0A=
=0A=
entry->posFixup_location =3D =
GL_EXTCALL(glGetUniformLocationARB(programId, "posFixup"));=0A=
- entry->srgb_comparison_location =3D =
GL_EXTCALL(glGetUniformLocationARB(programId, "srgb_comparison"));=0A=
- entry->srgb_mul_low_location =3D =
GL_EXTCALL(glGetUniformLocationARB(programId, "srgb_mul_low"));=0A=
entry->ycorrection_location =3D =
GL_EXTCALL(glGetUniformLocationARB(programId, "ycorrection"));=0A=
checkGLcall("Find glsl program uniform locations");=0A=
=0A=
@@ -3535,7 +3492,8 @@ static void shader_glsl_destroy(IWineD3DBaseShader =
*iface) {=0A=
IWineD3DDeviceImpl *device =3D (IWineD3DDeviceImpl =
*)This->baseShader.device;=0A=
struct shader_glsl_priv *priv =3D (struct shader_glsl_priv =
*)device->shader_priv;=0A=
WineD3D_GL_Info *gl_info =3D &device->adapter->gl_info;=0A=
- GLuint *prog;=0A=
+ IWineD3DPixelShaderImpl *ps =3D NULL;=0A=
+ IWineD3DVertexShaderImpl *vs =3D NULL;=0A=
=0A=
/* Note: Do not use QueryInterface here to find out which shader =
type this is because this code=0A=
* can be called from IWineD3DBaseShader::Release=0A=
@@ -3543,11 +3501,13 @@ static void =
shader_glsl_destroy(IWineD3DBaseShader *iface) {=0A=
char pshader =3D =
shader_is_pshader_version(This->baseShader.hex_version);=0A=
=0A=
if(pshader) {=0A=
- prog =3D &((IWineD3DPixelShaderImpl *) This)->prgId;=0A=
+ ps =3D (IWineD3DPixelShaderImpl *) This;=0A=
+ if(ps->num_gl_shaders =3D=3D 0) return;=0A=
} else {=0A=
- prog =3D &((IWineD3DVertexShaderImpl *) This)->prgId;=0A=
+ vs =3D (IWineD3DVertexShaderImpl *) This;=0A=
+ if(vs->prgId =3D=3D 0) return;=0A=
}=0A=
- if(*prog =3D=3D 0) return;=0A=
+=0A=
linked_programs =3D &This->baseShader.linked_programs;=0A=
=0A=
TRACE("Deleting linked programs\n");=0A=
@@ -3565,11 +3525,28 @@ static void =
shader_glsl_destroy(IWineD3DBaseShader *iface) {=0A=
}=0A=
}=0A=
=0A=
- TRACE("Deleting shader object %u\n", *prog);=0A=
- GL_EXTCALL(glDeleteObjectARB(*prog));=0A=
- checkGLcall("glDeleteObjectARB");=0A=
- *prog =3D 0;=0A=
- This->baseShader.is_compiled =3D FALSE;=0A=
+ if(pshader) {=0A=
+ UINT i;=0A=
+=0A=
+ ENTER_GL();=0A=
+ for(i =3D 0; i < ps->num_gl_shaders; i++) {=0A=
+ TRACE("deleting pshader %u\n", ps->gl_shaders[i].prgId);=0A=
+ GL_EXTCALL(glDeleteObjectARB(ps->gl_shaders[i].prgId));=0A=
+ checkGLcall("glDeleteObjectARB");=0A=
+ }=0A=
+ LEAVE_GL();=0A=
+ HeapFree(GetProcessHeap(), 0, ps->gl_shaders);=0A=
+ ps->gl_shaders =3D NULL;=0A=
+ ps->num_gl_shaders =3D 0;=0A=
+ } else {=0A=
+ TRACE("Deleting shader object %u\n", vs->prgId);=0A=
+ ENTER_GL();=0A=
+ GL_EXTCALL(glDeleteObjectARB(vs->prgId));=0A=
+ checkGLcall("glDeleteObjectARB");=0A=
+ LEAVE_GL();=0A=
+ vs->prgId =3D 0;=0A=
+ vs->baseShader.is_compiled =3D FALSE;=0A=
+ }=0A=
}=0A=
=0A=
static unsigned int glsl_program_key_hash(void *key) {=0A=
@@ -3626,7 +3603,7 @@ static BOOL shader_glsl_dirty_const(IWineD3DDevice =
*iface) {=0A=
return FALSE;=0A=
}=0A=
=0A=
-static void shader_glsl_generate_pshader(IWineD3DPixelShader *iface, =
SHADER_BUFFER *buffer) {=0A=
+static GLuint shader_glsl_generate_pshader(IWineD3DPixelShader *iface, =
SHADER_BUFFER *buffer) {=0A=
IWineD3DPixelShaderImpl *This =3D (IWineD3DPixelShaderImpl *)iface;=0A=
shader_reg_maps* reg_maps =3D &This->baseShader.reg_maps;=0A=
CONST DWORD *function =3D This->baseShader.function;=0A=
@@ -3655,13 +3632,9 @@ static void =
shader_glsl_generate_pshader(IWineD3DPixelShader *iface, SHADER_BUFF=0A=
if (This->baseShader.hex_version >=3D WINED3DPS_VERSION(3,0)) {=0A=
=0A=
if(((IWineD3DDeviceImpl *) =
This->baseShader.device)->strided_streams.u.s.position_transformed) {=0A=
- This->vertexprocessing =3D pretransformed;=0A=
- pshader_glsl_input_pack(buffer, This->semantics_in, iface);=0A=
+ pshader_glsl_input_pack(buffer, This->semantics_in, iface, =
pretransformed);=0A=
} else if(!use_vs((IWineD3DDeviceImpl *) =
This->baseShader.device)) {=0A=
- This->vertexprocessing =3D fixedfunction;=0A=
- pshader_glsl_input_pack(buffer, This->semantics_in, iface);=0A=
- } else {=0A=
- This->vertexprocessing =3D vertexshader;=0A=
+ pshader_glsl_input_pack(buffer, This->semantics_in, iface, =
fixedfunction);=0A=
}=0A=
}=0A=
=0A=
@@ -3682,7 +3655,7 @@ static void =
shader_glsl_generate_pshader(IWineD3DPixelShader *iface, SHADER_BUFF=0A=
} else {=0A=
fragcolor =3D "gl_FragColor";=0A=
}=0A=
- if(This->srgb_enabled) {=0A=
+ if(((IWineD3DDeviceImpl =
*)This->baseShader.device)->stateBlock->renderState[WINED3DRS_SRGBWRITEEN=
ABLE]) {=0A=
shader_addline(buffer, "tmp0.xyz =3D pow(%s.xyz, vec3(%f, %f, =
%f)) * vec3(%f, %f, %f) - vec3(%f, %f, %f);\n",=0A=
fragcolor, srgb_pow, srgb_pow, srgb_pow, =
srgb_mul_high, srgb_mul_high, srgb_mul_high,=0A=
srgb_sub_high, srgb_sub_high, srgb_sub_high);=0A=
@@ -3711,7 +3684,7 @@ static void =
shader_glsl_generate_pshader(IWineD3DPixelShader *iface, SHADER_BUFF=0A=
print_glsl_info_log(&GLINFO_LOCATION, shader_obj);=0A=
=0A=
/* Store the shader object */=0A=
- This->prgId =3D shader_obj;=0A=
+ return shader_obj;=0A=
}=0A=
=0A=
static void shader_glsl_generate_vshader(IWineD3DVertexShader *iface, =
SHADER_BUFFER *buffer) {=0A=
diff --git a/dlls/wined3d/pixelshader.c b/dlls/wined3d/pixelshader.c=0A=
index 1ce2849..7b2f639 100644=0A=
--- a/dlls/wined3d/pixelshader.c=0A=
+++ b/dlls/wined3d/pixelshader.c=0A=
@@ -272,32 +272,16 @@ static void pshader_set_limits(=0A=
=0A=
/** Generate a pixel shader string using either GL_FRAGMENT_PROGRAM_ARB=0A=
or GLSL and send it to the card */=0A=
-static inline VOID IWineD3DPixelShaderImpl_GenerateShader(=0A=
- IWineD3DPixelShader *iface) {=0A=
- IWineD3DPixelShaderImpl *This =3D (IWineD3DPixelShaderImpl *)iface;=0A=
+static inline GLuint IWineD3DPixelShaderImpl_GenerateShader(=0A=
+ IWineD3DPixelShaderImpl *This) {=0A=
SHADER_BUFFER buffer;=0A=
=0A=
-#if 0 /* FIXME: Use the buffer that is held by the device, this is ok =
since fixups will be skipped for software shaders=0A=
- it also requires entering a critical section but cuts down the =
runtime footprint of wined3d and any memory fragmentation that may =
occur... */=0A=
- if (This->device->fixupVertexBufferSize < SHADER_PGMSIZE) {=0A=
- HeapFree(GetProcessHeap(), 0, This->fixupVertexBuffer);=0A=
- This->fixupVertexBuffer =3D HeapAlloc(GetProcessHeap() , 0, =
SHADER_PGMSIZE);=0A=
- This->fixupVertexBufferSize =3D PGMSIZE;=0A=
- This->fixupVertexBuffer[0] =3D 0;=0A=
- }=0A=
- buffer.buffer =3D This->device->fixupVertexBuffer;=0A=
-#else=0A=
- buffer.buffer =3D HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, =
SHADER_PGMSIZE); =0A=
-#endif=0A=
+ buffer.buffer =3D HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, =
SHADER_PGMSIZE);=0A=
buffer.bsize =3D 0;=0A=
buffer.lineNo =3D 0;=0A=
buffer.newline =3D TRUE;=0A=
=0A=
- ((IWineD3DDeviceImpl =
*)This->baseShader.device)->shader_backend->shader_generate_pshader(iface=
, &buffer);=0A=
-=0A=
-#if 1 /* if were using the data buffer of device then we don't need to =
free it */=0A=
- HeapFree(GetProcessHeap(), 0, buffer.buffer);=0A=
-#endif=0A=
+ return ((IWineD3DDeviceImpl =
*)This->baseShader.device)->shader_backend->shader_generate_pshader((IWin=
eD3DPixelShader *) This, &buffer);=0A=
}=0A=
=0A=
static HRESULT WINAPI =
IWineD3DPixelShaderImpl_SetFunction(IWineD3DPixelShader *iface, CONST =
DWORD *pFunction) {=0A=
@@ -384,89 +368,29 @@ static HRESULT WINAPI =
IWineD3DPixelShaderImpl_SetFunction(IWineD3DPixelShader *i=0A=
return WINED3D_OK;=0A=
}=0A=
=0A=
-HRESULT pixelshader_compile(IWineD3DPixelShader *iface, struct =
ps_compile_args *args) {=0A=
-=0A=
- IWineD3DPixelShaderImpl *This =3D(IWineD3DPixelShaderImpl *)iface;=0A=
- IWineD3DDeviceImpl *deviceImpl =3D (IWineD3DDeviceImpl*) =
This->baseShader.device;=0A=
+GLuint pixelshader_compile(IWineD3DPixelShaderImpl *This, struct =
ps_compile_args *args) {=0A=
CONST DWORD *function =3D This->baseShader.function;=0A=
- UINT i, sampler;=0A=
HRESULT hr;=0A=
+ GLuint retval;=0A=
=0A=
- TRACE("(%p) : function %p\n", iface, function);=0A=
-=0A=
- /* We're already compiled, but check if any of the hardcoded =
stateblock assumptions=0A=
- * changed.=0A=
- */=0A=
- if (This->baseShader.is_compiled) {=0A=
- for(i =3D 0; i < This->baseShader.num_sampled_samplers; i++) {=0A=
- sampler =3D This->baseShader.sampled_samplers[i];=0A=
- if(args->format_conversion[sampler] !=3D =
This->baseShader.sampled_format[sampler]) {=0A=
- WARN("Recompiling shader %p due to format change on =
sampler %d\n", This, sampler);=0A=
- WARN("Old format group %s, new is %s\n",=0A=
- =
debug_d3dformat(This->baseShader.sampled_format[sampler]),=0A=
- debug_d3dformat(args->format_conversion[sampler]));=0A=
- goto recompile;=0A=
- }=0A=
- }=0A=
-=0A=
- /* TODO: Check projected textures */=0A=
- /* TODO: Check texture types(2D, Cube, 3D) */=0A=
-=0A=
- if(args->srgb_correction !=3D This->srgb_enabled && =
This->srgb_mode_hardcoded) {=0A=
- WARN("Recompiling shader because srgb correction is =
different and hardcoded\n");=0A=
- goto recompile;=0A=
- }=0A=
- if(This->baseShader.reg_maps.vpos && !This->vpos_uniform) {=0A=
- if(This->render_offscreen !=3D deviceImpl->render_offscreen =
||=0A=
- This->height !=3D ((IWineD3DSurfaceImpl *) =
deviceImpl->render_targets[0])->currentDesc.Height) {=0A=
- WARN("Recompiling shader because vpos is used, hard =
compiled and changed\n");=0A=
- goto recompile;=0A=
- }=0A=
- }=0A=
- if(This->baseShader.reg_maps.usesdsy && !This->vpos_uniform) {=0A=
- if(This->render_offscreen ? 0 : 1 !=3D =
deviceImpl->render_offscreen ? 0 : 1) {=0A=
- WARN("Recompiling shader because dsy is used, hard =
compiled and render_offscreen changed\n");=0A=
- goto recompile;=0A=
- }=0A=
- }=0A=
- if(This->baseShader.hex_version >=3D WINED3DPS_VERSION(3,0)) {=0A=
- if(args->vp_mode !=3D This->vertexprocessing) {=0A=
- WARN("Recompiling shader because the vertex processing =
mode changed\n");=0A=
- goto recompile;=0A=
- }=0A=
- }=0A=
-=0A=
- return WINED3D_OK;=0A=
+ TRACE("(%p) : function %p\n", This, function);=0A=
=0A=
- recompile:=0A=
- if(This->baseShader.recompile_count > 50) {=0A=
- FIXME("Shader %p recompiled more than 50 times\n", This);=0A=
- } else {=0A=
- This->baseShader.recompile_count++;=0A=
- }=0A=
-=0A=
- deviceImpl->shader_backend->shader_destroy((IWineD3DBaseShader =
*) iface);=0A=
- }=0A=
-=0A=
- /* We don't need to compile */=0A=
- if (!function) {=0A=
- This->baseShader.is_compiled =3D TRUE;=0A=
- return WINED3D_OK;=0A=
+ hr =3D IWineD3DPixelShader_UpdateSamplers((IWineD3DPixelShader *) =
This);=0A=
+ if(FAILED(hr)) {=0A=
+ ERR("Failed to update sampler information\n");=0A=
+ return 0;=0A=
}=0A=
=0A=
- hr =3D IWineD3DPixelShader_UpdateSamplers(iface);=0A=
- if(FAILED(hr)) return hr;=0A=
-=0A=
/* Reset fields tracking stateblock values being hardcoded in the =
shader */=0A=
This->baseShader.num_sampled_samplers =3D 0;=0A=
=0A=
/* Generate the HW shader */=0A=
TRACE("(%p) : Generating hardware program\n", This);=0A=
- IWineD3DPixelShaderImpl_GenerateShader(iface);=0A=
+ retval =3D IWineD3DPixelShaderImpl_GenerateShader(This);=0A=
=0A=
This->baseShader.is_compiled =3D TRUE;=0A=
=0A=
- return WINED3D_OK;=0A=
+ return retval;=0A=
}=0A=
=0A=
static HRESULT WINAPI =
IWineD3DPixelShaderImpl_UpdateSamplers(IWineD3DPixelShader *iface) {=0A=
@@ -532,3 +456,35 @@ void find_ps_compile_args(IWineD3DPixelShaderImpl =
*shader, IWineD3DStateBlockImp=0A=
args->vp_mode =3D vertexshader;=0A=
}=0A=
}=0A=
+=0A=
+GLuint find_gl_pshader(IWineD3DPixelShaderImpl *shader, struct =
ps_compile_args *args) {=0A=
+ UINT i;=0A=
+ struct ps_compiled_shader *old_array;=0A=
+=0A=
+ /* Usually we have very few GL shaders for each d3d shader(just 1 =
or maybe 2),=0A=
+ * so a linear search is more performant than a hashmap=0A=
+ */=0A=
+ for(i =3D 0; i < shader->num_gl_shaders; i++) {=0A=
+ if(memcmp(&shader->gl_shaders[i].args, args, sizeof(*args)) =
=3D=3D 0) {=0A=
+ return shader->gl_shaders[i].prgId;=0A=
+ }=0A=
+ }=0A=
+=0A=
+ TRACE("No matching GL shader found, compiling a new shader\n");=0A=
+ old_array =3D shader->gl_shaders;=0A=
+ if(old_array) {=0A=
+ shader->gl_shaders =3D HeapReAlloc(GetProcessHeap(), 0, =
old_array,=0A=
+ (shader->num_gl_shaders + 1) * =
sizeof(*shader->gl_shaders));=0A=
+ } else {=0A=
+ shader->gl_shaders =3D HeapAlloc(GetProcessHeap(), 0, =
sizeof(*shader->gl_shaders));=0A=
+ }=0A=
+=0A=
+ if(!shader->gl_shaders) {=0A=
+ ERR("Out of memory\n");=0A=
+ return 0;=0A=
+ }=0A=
+=0A=
+ shader->gl_shaders[shader->num_gl_shaders].args =3D *args;=0A=
+ shader->gl_shaders[shader->num_gl_shaders].prgId =3D =
pixelshader_compile(shader, args);=0A=
+ return shader->gl_shaders[shader->num_gl_shaders++].prgId;=0A=
+}=0A=
diff --git a/dlls/wined3d/wined3d_private.h =
b/dlls/wined3d/wined3d_private.h=0A=
index b593354..cb3a26d 100644=0A=
--- a/dlls/wined3d/wined3d_private.h=0A=
+++ b/dlls/wined3d/wined3d_private.h=0A=
@@ -350,7 +350,7 @@ typedef struct {=0A=
HRESULT (*shader_alloc_private)(IWineD3DDevice *iface);=0A=
void (*shader_free_private)(IWineD3DDevice *iface);=0A=
BOOL (*shader_dirtifyable_constants)(IWineD3DDevice *iface);=0A=
- void (*shader_generate_pshader)(IWineD3DPixelShader *iface, =
SHADER_BUFFER *buffer);=0A=
+ GLuint (*shader_generate_pshader)(IWineD3DPixelShader *iface, =
SHADER_BUFFER *buffer);=0A=
void (*shader_generate_vshader)(IWineD3DVertexShader *iface, =
SHADER_BUFFER *buffer);=0A=
void (*shader_get_caps)(WINED3DDEVTYPE devtype, WineD3D_GL_Info =
*gl_info, struct shader_caps *caps);=0A=
BOOL (*shader_conv_supported)(WINED3DFORMAT conv);=0A=
@@ -2346,6 +2346,13 @@ struct ps_compile_args {=0A=
BOOL srgb_correction;=0A=
WINED3DFORMAT =
format_conversion[MAX_FRAGMENT_SAMPLERS];=0A=
enum vertexprocessing_mode vp_mode;=0A=
+ /* Projected textures(ps 1.0-1.3) */=0A=
+ /* Texture types(2D, Cube, 3D) in ps 1.x */=0A=
+};=0A=
+=0A=
+struct ps_compiled_shader {=0A=
+ struct ps_compile_args args;=0A=
+ GLuint prgId;=0A=
};=0A=
=0A=
typedef struct IWineD3DPixelShaderImpl {=0A=
@@ -2365,25 +2372,19 @@ typedef struct IWineD3DPixelShaderImpl {=0A=
int declared_in_count;=0A=
=0A=
/* The GL shader */=0A=
- GLuint prgId;=0A=
+ struct ps_compiled_shader *gl_shaders;=0A=
+ UINT num_gl_shaders;=0A=
=0A=
/* Some information about the shader behavior */=0A=
struct stb_const_desc bumpenvmatconst[MAX_TEXTURES];=0A=
char numbumpenvmatconsts;=0A=
struct stb_const_desc luminanceconst[MAX_TEXTURES];=0A=
- char srgb_enabled;=0A=
- char srgb_mode_hardcoded;=0A=
- UINT srgb_low_const;=0A=
- UINT srgb_cmp_const;=0A=
char vpos_uniform;=0A=
- BOOL render_offscreen;=0A=
- UINT height;=0A=
- enum vertexprocessing_mode vertexprocessing;=0A=
} IWineD3DPixelShaderImpl;=0A=
=0A=
extern const SHADER_OPCODE IWineD3DPixelShaderImpl_shader_ins[];=0A=
extern const IWineD3DPixelShaderVtbl IWineD3DPixelShader_Vtbl;=0A=
-HRESULT pixelshader_compile(IWineD3DPixelShader *iface, struct =
ps_compile_args *args);=0A=
+GLuint find_gl_pshader(IWineD3DPixelShaderImpl *shader, struct =
ps_compile_args *args);=0A=
void find_ps_compile_args(IWineD3DPixelShaderImpl *shader, =
IWineD3DStateBlockImpl *stateblock, struct ps_compile_args *args);=0A=
=0A=
/* sRGB correction constants */=0A=
-- =0A=
1.5.6.4=0A=
=0A=
------=_NextPart_000_000E_01C94E6D.F503D280--
More information about the wine-patches
mailing list