[PATCH 1/3] wined3d: Put local constants into the shader code again (try 2)

Stefan Dösinger stefan at codeweavers.com
Tue Apr 2 07:54:01 CDT 2013


try 2: Handle inf and nan constants

This provides a small GPU-side performance boost on dx10+ capable Nvidia
cards - about 5% in World of Tanks on my Geforce 9600, and more in a
dedicated test program.

This is essentially a revert of cd7825c89374fb9dd4c20aef2dbfd258713efe6a,
with proper precision.

Optimizations are possible, like using GL_ARB_shader_bit_encoding to
store infs and nans in GLSL programs, or loading only constants
containing infs and nans via uniforms. I have decided against this
because it would create three ways in which constants are loaded (all in
the array for indirect addressing, hardcoded, separate uniforms loaded
at compile time). I expect nan and inf constants to be used rarely, if
at all, see the description in the test patch.

Performance data follows. The numbers are from a stand-alone test
program testing a GLSL shader with 21 uniforms or constant immediate
values. The numbers indicate the change in performance when switching
from uniforms to consts. The test program is available at
https://84.112.174.163/~git/perftest file const_gl/const_gl.cpp.
A d3d version of the same test can be found in const_d3d/const_d3d.cpp

                        CPU     GPU
Geforce 9600:
        Windows:        +30%    +19%
        Linux:          +26%    +23%
        OSX:		+1%	+23%

Geforce GTX 460:
        Windows:        ???     +9%
        Linux:          +4%     +15%
Windows CPU fps jumping between 6000 and 9000 fps,
result therefore inconclusive.

Radeon X1600:
        OSX:            +0%     -7%
        Linux:          -2%     -2%
        Windows:        -4%     -6%
GL results mirror local vs global const performance in d3d

Radeon HD 5770:
        Linux:          +0%     +0%
        Windows:        +0%     +0%

Intel GMA X3100:
        OSX:            +8%     -25%
        Linux:          +8%     +10%
        Windows:        -1%     +0%
(OSX results don't make sense; high res outperforms low res)

Geforce 7400:
        Linux(nouveau)  +0%     +0%
        Windows: Can't disable vsync

Geforce 7600:
        Linux(nvidia)   +0%     +0%
---
 dlls/wined3d/glsl_shader.c     | 37 ++++---------------------------------
 dlls/wined3d/shader.c          | 28 ++++++++++++++++++++++------
 dlls/wined3d/wined3d_private.h |  1 +
 3 files changed, 27 insertions(+), 39 deletions(-)

diff --git a/dlls/wined3d/glsl_shader.c b/dlls/wined3d/glsl_shader.c
index 7d98487..a840a5d 100644
--- a/dlls/wined3d/glsl_shader.c
+++ b/dlls/wined3d/glsl_shader.c
@@ -1253,15 +1253,14 @@ static void shader_generate_glsl_declarations(const struct wined3d_context *cont
     shader_addline(buffer, "vec4 tmp0;\n");
     shader_addline(buffer, "vec4 tmp1;\n");
 
-    /* Local constants use a different name so they can be loaded once at shader link time
-     * They can't be hardcoded into the shader text via LC = {x, y, z, w}; because the
-     * float -> string conversion can cause precision loss.
-     */
     if (!shader->load_local_constsF)
     {
         LIST_FOR_EACH_ENTRY(lconst, &shader->constantsF, struct wined3d_shader_lconst, entry)
         {
-            shader_addline(buffer, "uniform vec4 %s_lc%u;\n", prefix, lconst->idx);
+            const float *value;
+            value = (const float *)lconst->value;
+            shader_addline(buffer, "const vec4 %s_lc%u = vec4(%.8e, %.8e, %.8e, %.8e);\n",
+                    prefix, lconst->idx, value[0], value[1], value[2], value[3]);
         }
     }
 
@@ -4457,25 +4456,6 @@ static void shader_glsl_generate_fog_code(struct wined3d_shader_buffer *buffer,
 }
 
 /* Context activation is done by the caller. */
-static void hardcode_local_constants(const struct wined3d_shader *shader,
-        const struct wined3d_gl_info *gl_info, GLhandleARB programId, const char *prefix)
-{
-    const struct wined3d_shader_lconst *lconst;
-    GLint tmp_loc;
-    const float *value;
-    char glsl_name[10];
-
-    LIST_FOR_EACH_ENTRY(lconst, &shader->constantsF, struct wined3d_shader_lconst, entry)
-    {
-        value = (const float *)lconst->value;
-        snprintf(glsl_name, sizeof(glsl_name), "%s_lc%u", prefix, lconst->idx);
-        tmp_loc = GL_EXTCALL(glGetUniformLocationARB(programId, glsl_name));
-        GL_EXTCALL(glUniform4fvARB(tmp_loc, 1, value));
-    }
-    checkGLcall("Hardcoding local constants");
-}
-
-/* Context activation is done by the caller. */
 static GLuint shader_glsl_generate_pshader(const struct wined3d_context *context,
         struct wined3d_shader_buffer *buffer, const struct wined3d_shader *shader,
         const struct ps_compile_args *args, struct ps_np2fixup_info *np2fixup_info)
@@ -5705,15 +5685,6 @@ static void set_glsl_shader_program(const struct wined3d_context *context, struc
      */
     shader_glsl_load_vsamplers(gl_info, device->texUnitMap, programId);
     shader_glsl_load_psamplers(gl_info, device->texUnitMap, programId);
-
-    /* If the local constants do not have to be loaded with the environment constants,
-     * load them now to have them hardcoded in the GLSL program. This saves some CPU cycles
-     * later
-     */
-    if (pshader && !pshader->load_local_constsF)
-        hardcode_local_constants(pshader, gl_info, programId, "ps");
-    if (vshader && !vshader->load_local_constsF)
-        hardcode_local_constants(vshader, gl_info, programId, "vs");
 }
 
 /* Context activation is done by the caller. */
diff --git a/dlls/wined3d/shader.c b/dlls/wined3d/shader.c
index 6444896..692e5b2 100644
--- a/dlls/wined3d/shader.c
+++ b/dlls/wined3d/shader.c
@@ -584,15 +584,16 @@ static HRESULT shader_get_registers_used(struct wined3d_shader *shader, const st
         else if (ins.handler_idx == WINED3DSIH_DEF)
         {
             struct wined3d_shader_lconst *lconst = HeapAlloc(GetProcessHeap(), 0, sizeof(*lconst));
+            float *value;
             if (!lconst) return E_OUTOFMEMORY;
 
             lconst->idx = ins.dst[0].reg.idx[0].offset;
             memcpy(lconst->value, ins.src[0].reg.immconst_data, 4 * sizeof(DWORD));
+            value = (float *)lconst->value;
 
             /* In pixel shader 1.X shaders, the constants are clamped between [-1;1] */
             if (shader_version.major == 1 && shader_version.type == WINED3D_SHADER_TYPE_PIXEL)
             {
-                float *value = (float *)lconst->value;
                 if (value[0] < -1.0f) value[0] = -1.0f;
                 else if (value[0] > 1.0f) value[0] = 1.0f;
                 if (value[1] < -1.0f) value[1] = -1.0f;
@@ -604,6 +605,12 @@ static HRESULT shader_get_registers_used(struct wined3d_shader *shader, const st
             }
 
             list_add_head(&shader->constantsF, &lconst->entry);
+
+            if (isinf(value[0]) || isnan(value[0]) || isinf(value[1]) || isnan(value[1])
+                    || isinf(value[2]) || isnan(value[2]) || isinf(value[3]) || isnan(value[3]))
+            {
+                shader->lconst_inf_or_nan = TRUE;
+            }
         }
         else if (ins.handler_idx == WINED3DSIH_DEFI)
         {
@@ -1629,6 +1636,7 @@ static HRESULT shader_set_function(struct wined3d_shader *shader, const DWORD *b
     list_init(&shader->constantsF);
     list_init(&shader->constantsB);
     list_init(&shader->constantsI);
+    shader->lconst_inf_or_nan = FALSE;
 
     /* Second pass: figure out which registers are used, what the semantics are, etc. */
     hr = shader_get_registers_used(shader, fe,
@@ -1752,12 +1760,20 @@ HRESULT CDECL wined3d_shader_set_local_constants_float(struct wined3d_shader *sh
     for (i = start_idx; i < end_idx; ++i)
     {
         struct wined3d_shader_lconst *lconst = HeapAlloc(GetProcessHeap(), 0, sizeof(*lconst));
+        float *value;
         if (!lconst)
             return E_OUTOFMEMORY;
 
         lconst->idx = i;
-        memcpy(lconst->value, src_data + (i - start_idx) * 4 /* 4 components */, 4 * sizeof(float));
+        value = (float *)lconst->value;
+        memcpy(value, src_data + (i - start_idx) * 4 /* 4 components */, 4 * sizeof(float));
         list_add_head(&shader->constantsF, &lconst->entry);
+
+        if (isinf(value[0]) || isnan(value[0]) || isinf(value[1]) || isnan(value[1])
+                || isinf(value[2]) || isnan(value[2]) || isinf(value[3]) || isnan(value[3]))
+        {
+            shader->lconst_inf_or_nan = TRUE;
+        }
     }
 
     return WINED3D_OK;
@@ -1916,8 +1932,8 @@ static HRESULT vertexshader_init(struct wined3d_shader *shader, struct wined3d_d
 
     vertexshader_set_limits(shader);
 
-    shader->load_local_constsF = reg_maps->usesrelconstF
-            && !list_empty(&shader->constantsF);
+    shader->load_local_constsF = (reg_maps->usesrelconstF && !list_empty(&shader->constantsF)) ||
+            shader->lconst_inf_or_nan;
 
     return WINED3D_OK;
 }
@@ -1964,7 +1980,7 @@ static HRESULT geometryshader_init(struct wined3d_shader *shader, struct wined3d
 
     geometryshader_set_limits(shader);
 
-    shader->load_local_constsF = FALSE;
+    shader->load_local_constsF = shader->lconst_inf_or_nan;
 
     return WINED3D_OK;
 }
@@ -2253,7 +2269,7 @@ static HRESULT pixelshader_init(struct wined3d_shader *shader, struct wined3d_de
         }
     }
 
-    shader->load_local_constsF = FALSE;
+    shader->load_local_constsF = shader->lconst_inf_or_nan;
 
     return WINED3D_OK;
 }
diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h
index 2a11286..bdba525 100644
--- a/dlls/wined3d/wined3d_private.h
+++ b/dlls/wined3d/wined3d_private.h
@@ -2618,6 +2618,7 @@ struct wined3d_shader
     struct list constantsF;
     struct list constantsI;
     struct wined3d_shader_reg_maps reg_maps;
+    BOOL lconst_inf_or_nan;
 
     struct wined3d_shader_signature_element input_signature[max(MAX_ATTRIBS, MAX_REG_INPUT)];
     struct wined3d_shader_signature_element output_signature[MAX_REG_OUTPUT];
-- 
1.8.1.5




More information about the wine-patches mailing list