From 4a8b794ab7ac058345b52d1b8edd8ba6bc2af6da Mon Sep 17 00:00:00 2001 From: Tobias Jakobi Date: Sat, 18 Apr 2009 15:11:18 +0200 Subject: [wined3d] enable constant packing for NP2 texcoord fixup Previously every texture that was flagged for NP2 fixup used a vec2 uniform in the shader to store texture dimensions. Turns out that the GLSL compilers just maps vec2 to vec4, so essentially wasting 2 floats. The new code only uses vec4 uniforms but packs dimensions info of 2 textures into a single uniform. --- dlls/wined3d/glsl_shader.c | 63 +++++++++++++++++++++++++++++++++++-------- 1 files changed, 51 insertions(+), 12 deletions(-) diff --git a/dlls/wined3d/glsl_shader.c b/dlls/wined3d/glsl_shader.c index 0e6c024..d5a1b80 100644 --- a/dlls/wined3d/glsl_shader.c +++ b/dlls/wined3d/glsl_shader.c @@ -100,7 +100,7 @@ struct glsl_shader_prog_link { GLint vuniformI_locations[MAX_CONST_I]; GLint puniformI_locations[MAX_CONST_I]; GLint posFixup_location; - GLint np2Fixup_location[MAX_FRAGMENT_SAMPLERS]; + GLint np2Fixup_location[MAX_FRAGMENT_SAMPLERS / 2]; GLint bumpenvmat_location[MAX_TEXTURES]; GLint luminancescale_location[MAX_TEXTURES]; GLint luminanceoffset_location[MAX_TEXTURES]; @@ -511,15 +511,25 @@ static void shader_glsl_load_np2fixup_constants( const IWineD3DStateBlockImpl* stateBlock = (const IWineD3DStateBlockImpl*) deviceImpl->stateBlock; for (i = 0; fixup; fixup >>= 1, ++i) { - if (-1 != prog->np2Fixup_location[i]) { + const UINT idx = prog->np2Fixup_data->idx[i]; + + if (-1 != prog->np2Fixup_location[idx]) { const IWineD3DBaseTextureImpl* const tex = (const IWineD3DBaseTextureImpl*) stateBlock->textures[i]; + GLfloat* tex_dim = &prog->np2Fixup_data->const_cache[idx * 4]; + if (!tex) { FIXME("Nonexistent texture is flagged for NP2 texcoord fixup\n"); continue; + } + + if (prog->np2Fixup_data->swz & (1 << i)) { + tex_dim[2] = tex->baseTexture.pow2Matrix[0]; tex_dim[3] = tex->baseTexture.pow2Matrix[5]; } else { - const float tex_dim[2] = {tex->baseTexture.pow2Matrix[0], tex->baseTexture.pow2Matrix[5]}; - GL_EXTCALL(glUniform2fvARB(prog->np2Fixup_location[i], 1, tex_dim)); + tex_dim[0] = tex->baseTexture.pow2Matrix[0]; tex_dim[1] = tex->baseTexture.pow2Matrix[5]; } + + /* FIXME: can we drop the call when the dimensions don't change? */ + GL_EXTCALL(glUniform4fvARB(prog->np2Fixup_location[idx], 1, tex_dim)); } } } @@ -843,6 +853,15 @@ static void shader_generate_glsl_declarations(IWineD3DBaseShader *iface, const s /* Declare uniforms for NP2 texcoord fixup */ if (pshader && ps_args->np2_fixup) { + IWineD3DPixelShaderImpl* ps = (IWineD3DPixelShaderImpl*) This; + struct ps_np2fixup_t* const fixup = ps->cur_np2fixup; + UINT cur = 0; + + /* FIXME: is initialization necessary? */ + memset(fixup->idx, 0, sizeof(unsigned char) * MAX_FRAGMENT_SAMPLERS); + fixup->swz = 0; + fixup->num_consts = 0; + /* NP2/RECT textures in OpenGL use texcoords in the range [0,width]x[0,height] * while D3D has them in the (normalized) [0,1]x[0,1] range. * samplerNP2Fixup stores texture dimensions and is updated through @@ -859,9 +878,26 @@ static void shader_generate_glsl_declarations(IWineD3DBaseShader *iface, const s continue; } - shader_addline(buffer, "uniform vec2 %csamplerNP2Fixup%u;\n", prefix, i); + if (cur % 2) { + fixup->swz |= (1 << i); + } else { + ++fixup->num_consts; + shader_addline(buffer, "uniform vec4 %csamplerNP2Fixup%u;\n", prefix, fixup->num_consts - 1); + } + + fixup->idx[i] = fixup->num_consts - 1; + ++cur; } } + + if (fixup->const_cache) { + FIXME("NP2 texcoord fixup constant cache was not released properly.\n"); + HeapFree(GetProcessHeap(), 0, fixup->const_cache); + } + + /* FIXME: do we need to zero the memory? */ + fixup->const_cache = + HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(GLfloat) * fixup->num_consts * 4); } /* Declare address variables */ @@ -1572,7 +1608,7 @@ static void PRINTF_ATTR(6, 7) shader_glsl_gen_sample_code(const struct wined3d_s if (shader_is_pshader_version(ins->ctx->reg_maps->shader_version)) { - IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)ins->ctx->shader; + const IWineD3DPixelShaderImpl* const This = (IWineD3DPixelShaderImpl *)ins->ctx->shader; fixup = This->cur_args->color_fixup[sampler]; sampler_base = "Psampler"; @@ -1600,7 +1636,10 @@ static void PRINTF_ATTR(6, 7) shader_glsl_gen_sample_code(const struct wined3d_s shader_addline(ins->ctx->buffer, ", %s)%s);\n", bias, dst_swizzle); } else { if (np2_fixup) { - shader_addline(ins->ctx->buffer, " * PsamplerNP2Fixup%u)%s);\n", sampler, dst_swizzle); + const IWineD3DPixelShaderImpl* const This = (IWineD3DPixelShaderImpl *)ins->ctx->shader; + + shader_addline(ins->ctx->buffer, " * PsamplerNP2Fixup%u.%s)%s);\n", This->cur_np2fixup->idx[sampler], + (This->cur_np2fixup->swz & (1 << sampler)) ? "zw" : "xy", dst_swizzle); } else { shader_addline(ins->ctx->buffer, ")%s);\n", dst_swizzle); } @@ -3613,14 +3652,14 @@ static void set_glsl_shader_program(IWineD3DDevice *iface, BOOL use_ps, BOOL use } if (use_ps && ps_compile_args.np2_fixup) { - char name[32]; - for (i = 0; i < MAX_FRAGMENT_SAMPLERS; ++i) { - if (ps_compile_args.np2_fixup & (1 << i)) { + if (entry->np2Fixup_data) { + char name[32]; + for (i = 0; i < entry->np2Fixup_data->num_consts; ++i) { sprintf(name, "PsamplerNP2Fixup%u", i); entry->np2Fixup_location[i] = GL_EXTCALL(glGetUniformLocationARB(programId, name)); - } else { - entry->np2Fixup_location[i] = -1; } + } else { + FIXME("NP2 texcoord fixup needed for this pixelshader, but no fixup data found."); } } -- 1.6.0.6