From 0d060807864d6373f1180f113055fa97c70cc951 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20D=C3=B6singer?= Date: Sun, 1 Nov 2009 17:03:39 +0100 Subject: [PATCH 1/8] WineD3D: Create clip coords only if they are used If GL vertex shader clipping is supported(GLSL or NVvp), we currently always calculate clipping results in the shader. They're gracefully discarded by opengl, but we need 6 constants / uniforms to calculate unused values. This change frees up 6 vertex shader constants / uniforms at the expense of duplicating GL shaders if clipping is enabled and disabled. Fixes a regression in Guild Wars on Geforce 5 to 7 class GPUs. --- dlls/wined3d/arb_program_shader.c | 13 ++++++++++--- dlls/wined3d/glsl_shader.c | 13 ++++++++++--- dlls/wined3d/vertexshader.c | 1 + dlls/wined3d/wined3d_private.h | 3 ++- 4 files changed, 23 insertions(+), 7 deletions(-) diff --git a/dlls/wined3d/arb_program_shader.c b/dlls/wined3d/arb_program_shader.c index 2066dda..9129758 100644 --- a/dlls/wined3d/arb_program_shader.c +++ b/dlls/wined3d/arb_program_shader.c @@ -648,7 +648,10 @@ static DWORD shader_generate_arb_declarations(IWineD3DBaseShader *iface, const s if(use_nv_clip(gl_info) && ctx->target_version >= NV2) { - clip_limit = gl_info->limits.clipplanes; + if(ctx->cur_vs_args->super.clip_enabled) + clip_limit = gl_info->limits.clipplanes; + else + clip_limit = 0; } else { @@ -2942,9 +2945,12 @@ static void vshader_add_footer(IWineD3DVertexShaderImpl *This, struct wined3d_sh if(use_nv_clip(gl_info) && priv_ctx->target_version >= NV2) { - for(i = 0; i < priv_ctx->vs_clipplanes; i++) + if(args->super.clip_enabled) { - shader_addline(buffer, "DP4 result.clip[%u].x, TMP_OUT, state.clip[%u].plane;\n", i, i); + for(i = 0; i < priv_ctx->vs_clipplanes; i++) + { + shader_addline(buffer, "DP4 result.clip[%u].x, TMP_OUT, state.clip[%u].plane;\n", i, i); + } } } else if(args->boolclip.clip_texcoord) @@ -4076,6 +4082,7 @@ static struct arb_ps_compiled_shader *find_arb_pshader(IWineD3DPixelShaderImpl * static inline BOOL vs_args_equal(const struct arb_vs_compile_args *stored, const struct arb_vs_compile_args *new, const DWORD use_map, BOOL skip_int) { if((stored->super.swizzle_map & use_map) != new->super.swizzle_map) return FALSE; + if(stored->super.clip_enabled != new->super.clip_enabled) return FALSE; if(stored->super.fog_src != new->super.fog_src) return FALSE; if(stored->boolclip_compare != new->boolclip_compare) return FALSE; if(stored->ps_signature != new->ps_signature) return FALSE; diff --git a/dlls/wined3d/glsl_shader.c b/dlls/wined3d/glsl_shader.c index 995fbcc..11a80b2 100644 --- a/dlls/wined3d/glsl_shader.c +++ b/dlls/wined3d/glsl_shader.c @@ -855,9 +855,13 @@ static void shader_generate_glsl_declarations(const struct wined3d_context *cont * The shader code only uses 0.5, 2.0, 1.0, 128 and -128 in vertex shader code, so one vec4 should be enough * (Unfortunately the Nvidia driver doesn't store 128 and -128 in one float). * - * Writing gl_ClipPos requires one uniform for each clipplane as well. + * Writing gl_ClipVertex requires one uniform for each clipplane as well. */ - max_constantsF = gl_info->limits.glsl_vs_float_constants - 3 - gl_info->limits.clipplanes; + max_constantsF = gl_info->limits.glsl_vs_float_constants - 3; + if(ctx_priv->cur_vs_args->clip_enabled) + { + max_constantsF -= gl_info->limits.clipplanes; + } max_constantsF -= count_bits(This->baseShader.reg_maps.integer_constants); /* Strictly speaking a bool only uses one scalar, but the nvidia(Linux) compiler doesn't pack them properly, * so each scalar requires a full vec4. We could work around this by packing the booleans ourselves, but @@ -3874,7 +3878,9 @@ static GLuint shader_glsl_generate_vshader(const struct wined3d_context *context */ shader_addline(buffer, "gl_Position.y = gl_Position.y * posFixup.y;\n"); shader_addline(buffer, "gl_Position.xy += posFixup.zw * gl_Position.ww;\n"); - shader_addline(buffer, "gl_ClipVertex = gl_Position;\n"); + if(args->clip_enabled) { + shader_addline(buffer, "gl_ClipVertex = gl_Position;\n"); + } /* Z coord [0;1]->[-1;1] mapping, see comment in transform_projection in state.c * @@ -3966,6 +3972,7 @@ static GLhandleARB find_glsl_pshader(const struct wined3d_context *context, static inline BOOL vs_args_equal(const struct vs_compile_args *stored, const struct vs_compile_args *new, const DWORD use_map) { if((stored->swizzle_map & use_map) != new->swizzle_map) return FALSE; + if((stored->clip_enabled) != new->clip_enabled) return FALSE; return stored->fog_src == new->fog_src; } diff --git a/dlls/wined3d/vertexshader.c b/dlls/wined3d/vertexshader.c index 14779a5..63d8c9b 100644 --- a/dlls/wined3d/vertexshader.c +++ b/dlls/wined3d/vertexshader.c @@ -356,6 +356,7 @@ static const IWineD3DVertexShaderVtbl IWineD3DVertexShader_Vtbl = void find_vs_compile_args(IWineD3DVertexShaderImpl *shader, IWineD3DStateBlockImpl *stateblock, struct vs_compile_args *args) { args->fog_src = stateblock->renderState[WINED3DRS_FOGTABLEMODE] == WINED3DFOG_NONE ? VS_FOG_COORD : VS_FOG_Z; + args->clip_enabled = stateblock->renderState[WINED3DRS_CLIPPING] && stateblock->renderState[WINED3DRS_CLIPPLANEENABLE]; args->swizzle_map = ((IWineD3DDeviceImpl *)shader->baseShader.device)->strided_streams.swizzle_map; } diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index 3af2703..f423733 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -695,7 +695,8 @@ enum fog_src_type { }; struct vs_compile_args { - WORD fog_src; + BYTE fog_src; + BYTE clip_enabled; WORD swizzle_map; /* MAX_ATTRIBS, 16 */ }; -- 1.6.4.4