[PATCH 1/3] wined3d: Put local constants into the shader code again (try 2)
Stefan Dösinger
stefan at codeweavers.com
Tue Apr 2 07:54:01 CDT 2013
try 2: Handle inf and nan constants
This provides a small GPU-side performance boost on dx10+ capable Nvidia
cards - about 5% in World of Tanks on my Geforce 9600, and more in a
dedicated test program.
This is essentially a revert of cd7825c89374fb9dd4c20aef2dbfd258713efe6a,
with proper precision.
Optimizations are possible, like using GL_ARB_shader_bit_encoding to
store infs and nans in GLSL programs, or loading only constants
containing infs and nans via uniforms. I have decided against this
because it would create three ways in which constants are loaded (all in
the array for indirect addressing, hardcoded, separate uniforms loaded
at compile time). I expect nan and inf constants to be used rarely, if
at all, see the description in the test patch.
Performance data follows. The numbers are from a stand-alone test
program testing a GLSL shader with 21 uniforms or constant immediate
values. The numbers indicate the change in performance when switching
from uniforms to consts. The test program is available at
https://84.112.174.163/~git/perftest file const_gl/const_gl.cpp.
A d3d version of the same test can be found in const_d3d/const_d3d.cpp
CPU GPU
Geforce 9600:
Windows: +30% +19%
Linux: +26% +23%
OSX: +1% +23%
Geforce GTX 460:
Windows: ??? +9%
Linux: +4% +15%
Windows CPU fps jumping between 6000 and 9000 fps,
result therefore inconclusive.
Radeon X1600:
OSX: +0% -7%
Linux: -2% -2%
Windows: -4% -6%
GL results mirror local vs global const performance in d3d
Radeon HD 5770:
Linux: +0% +0%
Windows: +0% +0%
Intel GMA X3100:
OSX: +8% -25%
Linux: +8% +10%
Windows: -1% +0%
(OSX results don't make sense; high res outperforms low res)
Geforce 7400:
Linux(nouveau) +0% +0%
Windows: Can't disable vsync
Geforce 7600:
Linux(nvidia) +0% +0%
---
dlls/wined3d/glsl_shader.c | 37 ++++---------------------------------
dlls/wined3d/shader.c | 28 ++++++++++++++++++++++------
dlls/wined3d/wined3d_private.h | 1 +
3 files changed, 27 insertions(+), 39 deletions(-)
diff --git a/dlls/wined3d/glsl_shader.c b/dlls/wined3d/glsl_shader.c
index 7d98487..a840a5d 100644
--- a/dlls/wined3d/glsl_shader.c
+++ b/dlls/wined3d/glsl_shader.c
@@ -1253,15 +1253,14 @@ static void shader_generate_glsl_declarations(const struct wined3d_context *cont
shader_addline(buffer, "vec4 tmp0;\n");
shader_addline(buffer, "vec4 tmp1;\n");
- /* Local constants use a different name so they can be loaded once at shader link time
- * They can't be hardcoded into the shader text via LC = {x, y, z, w}; because the
- * float -> string conversion can cause precision loss.
- */
if (!shader->load_local_constsF)
{
LIST_FOR_EACH_ENTRY(lconst, &shader->constantsF, struct wined3d_shader_lconst, entry)
{
- shader_addline(buffer, "uniform vec4 %s_lc%u;\n", prefix, lconst->idx);
+ const float *value;
+ value = (const float *)lconst->value;
+ shader_addline(buffer, "const vec4 %s_lc%u = vec4(%.8e, %.8e, %.8e, %.8e);\n",
+ prefix, lconst->idx, value[0], value[1], value[2], value[3]);
}
}
@@ -4457,25 +4456,6 @@ static void shader_glsl_generate_fog_code(struct wined3d_shader_buffer *buffer,
}
/* Context activation is done by the caller. */
-static void hardcode_local_constants(const struct wined3d_shader *shader,
- const struct wined3d_gl_info *gl_info, GLhandleARB programId, const char *prefix)
-{
- const struct wined3d_shader_lconst *lconst;
- GLint tmp_loc;
- const float *value;
- char glsl_name[10];
-
- LIST_FOR_EACH_ENTRY(lconst, &shader->constantsF, struct wined3d_shader_lconst, entry)
- {
- value = (const float *)lconst->value;
- snprintf(glsl_name, sizeof(glsl_name), "%s_lc%u", prefix, lconst->idx);
- tmp_loc = GL_EXTCALL(glGetUniformLocationARB(programId, glsl_name));
- GL_EXTCALL(glUniform4fvARB(tmp_loc, 1, value));
- }
- checkGLcall("Hardcoding local constants");
-}
-
-/* Context activation is done by the caller. */
static GLuint shader_glsl_generate_pshader(const struct wined3d_context *context,
struct wined3d_shader_buffer *buffer, const struct wined3d_shader *shader,
const struct ps_compile_args *args, struct ps_np2fixup_info *np2fixup_info)
@@ -5705,15 +5685,6 @@ static void set_glsl_shader_program(const struct wined3d_context *context, struc
*/
shader_glsl_load_vsamplers(gl_info, device->texUnitMap, programId);
shader_glsl_load_psamplers(gl_info, device->texUnitMap, programId);
-
- /* If the local constants do not have to be loaded with the environment constants,
- * load them now to have them hardcoded in the GLSL program. This saves some CPU cycles
- * later
- */
- if (pshader && !pshader->load_local_constsF)
- hardcode_local_constants(pshader, gl_info, programId, "ps");
- if (vshader && !vshader->load_local_constsF)
- hardcode_local_constants(vshader, gl_info, programId, "vs");
}
/* Context activation is done by the caller. */
diff --git a/dlls/wined3d/shader.c b/dlls/wined3d/shader.c
index 6444896..692e5b2 100644
--- a/dlls/wined3d/shader.c
+++ b/dlls/wined3d/shader.c
@@ -584,15 +584,16 @@ static HRESULT shader_get_registers_used(struct wined3d_shader *shader, const st
else if (ins.handler_idx == WINED3DSIH_DEF)
{
struct wined3d_shader_lconst *lconst = HeapAlloc(GetProcessHeap(), 0, sizeof(*lconst));
+ float *value;
if (!lconst) return E_OUTOFMEMORY;
lconst->idx = ins.dst[0].reg.idx[0].offset;
memcpy(lconst->value, ins.src[0].reg.immconst_data, 4 * sizeof(DWORD));
+ value = (float *)lconst->value;
/* In pixel shader 1.X shaders, the constants are clamped between [-1;1] */
if (shader_version.major == 1 && shader_version.type == WINED3D_SHADER_TYPE_PIXEL)
{
- float *value = (float *)lconst->value;
if (value[0] < -1.0f) value[0] = -1.0f;
else if (value[0] > 1.0f) value[0] = 1.0f;
if (value[1] < -1.0f) value[1] = -1.0f;
@@ -604,6 +605,12 @@ static HRESULT shader_get_registers_used(struct wined3d_shader *shader, const st
}
list_add_head(&shader->constantsF, &lconst->entry);
+
+ if (isinf(value[0]) || isnan(value[0]) || isinf(value[1]) || isnan(value[1])
+ || isinf(value[2]) || isnan(value[2]) || isinf(value[3]) || isnan(value[3]))
+ {
+ shader->lconst_inf_or_nan = TRUE;
+ }
}
else if (ins.handler_idx == WINED3DSIH_DEFI)
{
@@ -1629,6 +1636,7 @@ static HRESULT shader_set_function(struct wined3d_shader *shader, const DWORD *b
list_init(&shader->constantsF);
list_init(&shader->constantsB);
list_init(&shader->constantsI);
+ shader->lconst_inf_or_nan = FALSE;
/* Second pass: figure out which registers are used, what the semantics are, etc. */
hr = shader_get_registers_used(shader, fe,
@@ -1752,12 +1760,20 @@ HRESULT CDECL wined3d_shader_set_local_constants_float(struct wined3d_shader *sh
for (i = start_idx; i < end_idx; ++i)
{
struct wined3d_shader_lconst *lconst = HeapAlloc(GetProcessHeap(), 0, sizeof(*lconst));
+ float *value;
if (!lconst)
return E_OUTOFMEMORY;
lconst->idx = i;
- memcpy(lconst->value, src_data + (i - start_idx) * 4 /* 4 components */, 4 * sizeof(float));
+ value = (float *)lconst->value;
+ memcpy(value, src_data + (i - start_idx) * 4 /* 4 components */, 4 * sizeof(float));
list_add_head(&shader->constantsF, &lconst->entry);
+
+ if (isinf(value[0]) || isnan(value[0]) || isinf(value[1]) || isnan(value[1])
+ || isinf(value[2]) || isnan(value[2]) || isinf(value[3]) || isnan(value[3]))
+ {
+ shader->lconst_inf_or_nan = TRUE;
+ }
}
return WINED3D_OK;
@@ -1916,8 +1932,8 @@ static HRESULT vertexshader_init(struct wined3d_shader *shader, struct wined3d_d
vertexshader_set_limits(shader);
- shader->load_local_constsF = reg_maps->usesrelconstF
- && !list_empty(&shader->constantsF);
+ shader->load_local_constsF = (reg_maps->usesrelconstF && !list_empty(&shader->constantsF)) ||
+ shader->lconst_inf_or_nan;
return WINED3D_OK;
}
@@ -1964,7 +1980,7 @@ static HRESULT geometryshader_init(struct wined3d_shader *shader, struct wined3d
geometryshader_set_limits(shader);
- shader->load_local_constsF = FALSE;
+ shader->load_local_constsF = shader->lconst_inf_or_nan;
return WINED3D_OK;
}
@@ -2253,7 +2269,7 @@ static HRESULT pixelshader_init(struct wined3d_shader *shader, struct wined3d_de
}
}
- shader->load_local_constsF = FALSE;
+ shader->load_local_constsF = shader->lconst_inf_or_nan;
return WINED3D_OK;
}
diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h
index 2a11286..bdba525 100644
--- a/dlls/wined3d/wined3d_private.h
+++ b/dlls/wined3d/wined3d_private.h
@@ -2618,6 +2618,7 @@ struct wined3d_shader
struct list constantsF;
struct list constantsI;
struct wined3d_shader_reg_maps reg_maps;
+ BOOL lconst_inf_or_nan;
struct wined3d_shader_signature_element input_signature[max(MAX_ATTRIBS, MAX_REG_INPUT)];
struct wined3d_shader_signature_element output_signature[MAX_REG_OUTPUT];
--
1.8.1.5
More information about the wine-patches
mailing list