[PATCH v7 4/6] wined3d: Implement Fetch4 GLSL shader generation
Daniel Ansorregui
mailszeros at gmail.com
Sun Feb 24 17:37:28 CST 2019
- Implement fetch4 for all texture sampling at "shader_glsl_gen_sample_code"
by storing extra arguments in "glsl_sample_function"
- FIXME: Vertex texldl unimplemented yet, since it is not possible to access
ps_compile_args. Maybe move it to another place.
Probably does not work on windows anyway
- Trigger PS re-generation at FETCH4 state change, by storing a context flag
- Add ps_compile_arg flag for fetch4
Signed-off-by: Daniel Ansorregui <mailszeros at gmail.com>
---
dlls/wined3d/cs.c | 5 ++++-
dlls/wined3d/device.c | 4 ++++
dlls/wined3d/glsl_shader.c | 41 +++++++++++++++++++++++++++++++---
dlls/wined3d/shader.c | 11 +++++++++
dlls/wined3d/state.c | 12 ++++++++++
dlls/wined3d/wined3d_private.h | 15 +++++++++++--
6 files changed, 82 insertions(+), 6 deletions(-)
diff --git a/dlls/wined3d/cs.c b/dlls/wined3d/cs.c
index 3de29d5f94..9f87f3c0dc 100644
--- a/dlls/wined3d/cs.c
+++ b/dlls/wined3d/cs.c
@@ -1372,7 +1372,10 @@ static void wined3d_cs_exec_set_texture(struct wined3d_cs *cs, const void *data)
if (!prev || wined3d_texture_gl(op->texture)->target != wined3d_texture_gl(prev)->target
|| (!is_same_fixup(new_format->color_fixup, old_format->color_fixup)
&& !(can_use_texture_swizzle(gl_info, new_format) && can_use_texture_swizzle(gl_info, old_format)))
- || (new_fmt_flags & WINED3DFMT_FLAG_SHADOW) != (old_fmt_flags & WINED3DFMT_FLAG_SHADOW))
+ || (new_fmt_flags & WINED3DFMT_FLAG_SHADOW) != (old_fmt_flags & WINED3DFMT_FLAG_SHADOW)
+ || ((new_fmt_flags & WINED3DFMT_FLAG_ALLOW_FETCH4) != (old_fmt_flags & WINED3DFMT_FLAG_ALLOW_FETCH4)
+ && cs->state.sampler_states[op->texture->sampler][WINED3D_SAMP_MIPMAP_LOD_BIAS]
+ == MAKEFOURCC('G','E','T','4')))
device_invalidate_state(cs->device, STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL));
if (!prev && op->stage < d3d_info->limits.ffp_blend_stages)
diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c
index 4caa1e8c45..0e91ac1ad9 100644
--- a/dlls/wined3d/device.c
+++ b/dlls/wined3d/device.c
@@ -2124,7 +2124,11 @@ void CDECL wined3d_device_set_sampler_state(struct wined3d_device *device,
device, sampler_idx, debug_d3dsamplerstate(state), value);
if (sampler_idx >= WINED3DVERTEXTEXTURESAMPLER0 && sampler_idx <= WINED3DVERTEXTEXTURESAMPLER3)
+ {
sampler_idx -= (WINED3DVERTEXTEXTURESAMPLER0 - WINED3D_MAX_FRAGMENT_SAMPLERS);
+ if (state == WINED3D_SAMP_MIPMAP_LOD_BIAS && value == MAKEFOURCC('G','E','T','4'))
+ FIXME("Unsupported FETCH4 and Vertex Texture Sampler");
+ }
if (sampler_idx >= ARRAY_SIZE(device->state.sampler_states))
{
diff --git a/dlls/wined3d/glsl_shader.c b/dlls/wined3d/glsl_shader.c
index 3ec54e3418..dbf9d5ce57 100644
--- a/dlls/wined3d/glsl_shader.c
+++ b/dlls/wined3d/glsl_shader.c
@@ -104,6 +104,8 @@ struct glsl_sample_function
enum wined3d_data_type data_type;
BOOL output_single_component;
unsigned int offset_size;
+ BOOL fetch4_enabled;
+ BOOL fetch4_projected;
};
enum heap_node_op
@@ -3618,6 +3620,7 @@ static void shader_glsl_get_sample_function(const struct wined3d_shader_context
BOOL lod = flags & WINED3D_GLSL_SAMPLE_LOD;
BOOL grad = flags & WINED3D_GLSL_SAMPLE_GRAD;
BOOL offset = flags & WINED3D_GLSL_SAMPLE_OFFSET;
+ BOOL gather = !shadow && priv->cur_ps_args && priv->cur_ps_args->fetch4 & (1u << sampler_idx);
const char *base = "texture", *type_part = "", *suffix = "";
unsigned int coord_size, deriv_size;
@@ -3663,6 +3666,16 @@ static void shader_glsl_get_sample_function(const struct wined3d_shader_context
type_part = "";
}
+ sample_function->fetch4_projected = projected;
+ sample_function->fetch4_enabled = gather;
+ if (gather)
+ {
+ base = "texture";
+ type_part = "Gather";
+ suffix = "";
+ projected = lod = grad = offset = FALSE;
+ }
+
sample_function->name = string_buffer_get(priv->string_buffers);
string_buffer_sprintf(sample_function->name, "%s%s%s%s%s%s", base, type_part, projected ? "Proj" : "",
lod ? "Lod" : grad ? "Grad" : "", offset ? "Offset" : "", suffix);
@@ -3797,13 +3810,18 @@ static void PRINTF_ATTR(9, 10) shader_glsl_gen_sample_code(const struct wined3d_
const char *coord_reg_fmt, ...)
{
const struct wined3d_shader_version *version = &ins->ctx->reg_maps->shader_version;
+ const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data;
+ DWORD merged_swizzle = swizzle;
char dst_swizzle[6];
struct color_fixup_desc fixup;
BOOL np2_fixup = FALSE;
va_list args;
int ret;
- shader_glsl_swizzle_to_str(swizzle, FALSE, ins->dst[0].write_mask, dst_swizzle);
+ /* Merge swizzle requested with the fetch4 swizzle */
+ if (sample_function->fetch4_enabled)
+ merged_swizzle = wined3d_merge_swizzle(WINED3DSP_FETCH4_SWIZZLE, swizzle);
+ shader_glsl_swizzle_to_str(merged_swizzle, FALSE, ins->dst[0].write_mask, dst_swizzle);
/* If ARB_texture_swizzle is supported we don't need to do anything here.
* We actually rely on it for vertex shaders and SM4+. */
@@ -3841,7 +3859,6 @@ static void PRINTF_ATTR(9, 10) shader_glsl_gen_sample_code(const struct wined3d_
if (np2_fixup)
{
- const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data;
const unsigned char idx = priv->cur_np2fixup_info->idx[sampler_bind_idx];
switch (shader_glsl_get_write_mask_size(sample_function->coord_mask))
@@ -3864,7 +3881,21 @@ static void PRINTF_ATTR(9, 10) shader_glsl_gen_sample_code(const struct wined3d_
break;
}
}
- if (dx && dy)
+ if (sample_function->fetch4_enabled)
+ {
+ if (sample_function->fetch4_projected)
+ {
+ struct wined3d_string_buffer *reg_name = string_buffer_get(priv->string_buffers);
+ shader_glsl_get_register_name(&ins->src[0].reg, ins->src[0].reg.data_type, reg_name, NULL, ins->ctx);
+ shader_addline(ins->ctx->buffer, " / %s.w", reg_name->buffer);
+ string_buffer_release(priv->string_buffers, reg_name);
+ }
+
+ /* Correct the fetch4 0.5 texel offset */
+ shader_addline(ins->ctx->buffer, " + (vec2(0.5) / textureSize(%s_sampler%u, 0).xy)",
+ shader_glsl_get_prefix(version->type), sampler_bind_idx);
+ }
+ else if (dx && dy)
shader_addline(ins->ctx->buffer, ", %s, %s", dx, dy);
else if (bias)
shader_addline(ins->ctx->buffer, ", %s", bias);
@@ -5402,6 +5433,10 @@ static void shader_glsl_tex(const struct wined3d_shader_instruction *ins)
}
}
+ /* When fetch4 is active, projection is done manually */
+ if (priv->cur_ps_args->fetch4 & (1u << resource_idx))
+ mask = 0;
+
shader_glsl_get_sample_function(ins->ctx, resource_idx, resource_idx, sample_flags, &sample_function);
mask |= sample_function.coord_mask;
sample_function.coord_mask = mask;
diff --git a/dlls/wined3d/shader.c b/dlls/wined3d/shader.c
index 83a793d522..01716c77b2 100644
--- a/dlls/wined3d/shader.c
+++ b/dlls/wined3d/shader.c
@@ -4093,6 +4093,17 @@ void find_ps_compile_args(const struct wined3d_state *state, const struct wined3
}
}
+ for (i = 0; i < MAX_FRAGMENT_SAMPLERS; ++i)
+ {
+ if (state->sampler_states[i][WINED3D_SAMP_MIPMAP_LOD_BIAS] == MAKEFOURCC('G','E','T','4')
+ && state->textures[i]
+ && state->textures[i]->resource.format_flags & WINED3DFMT_FLAG_ALLOW_FETCH4
+ && shader->reg_maps.resource_info[i].type == WINED3D_SHADER_RESOURCE_TEXTURE_2D)
+ args->fetch4 |= 1 << i;
+ else
+ args->fetch4 &= ~(1 << i);
+ }
+
if (context->d3d_info->limits.varying_count < wined3d_max_compat_varyings(context->gl_info))
{
const struct wined3d_shader *vs = state->shader[WINED3D_SHADER_TYPE_VERTEX];
diff --git a/dlls/wined3d/state.c b/dlls/wined3d/state.c
index f049ad9ebf..f37bd45c32 100644
--- a/dlls/wined3d/state.c
+++ b/dlls/wined3d/state.c
@@ -3615,6 +3615,8 @@ static void sampler(struct wined3d_context *context, const struct wined3d_state
if (state->textures[sampler_idx])
{
struct wined3d_texture_gl *texture_gl = wined3d_texture_gl(state->textures[sampler_idx]);
+ BOOL fetch4 = state->sampler_states[sampler_idx][WINED3D_SAMP_MIPMAP_LOD_BIAS]
+ == MAKEFOURCC('G','E','T','4');
const DWORD *sampler_states = state->sampler_states[sampler_idx];
struct wined3d_device *device = context->device;
BOOL srgb = is_srgb_enabled(sampler_states);
@@ -3650,6 +3652,16 @@ static void sampler(struct wined3d_context *context, const struct wined3d_state
/* Trigger shader constant reloading (for NP2 texcoord fixup) */
if (!(texture_gl->t.flags & WINED3D_TEXTURE_POW2_MAT_IDENT))
context->constant_update_mask |= WINED3D_SHADER_CONST_PS_NP2_FIXUP;
+
+ /* Trigger pixel shader recompilation for FETCH4 changes */
+ if (((context->last_was_fetch4 >> sampler_idx) & 0x1) ^ fetch4)
+ {
+ if (fetch4)
+ context->last_was_fetch4 |= 1 << sampler_idx;
+ else
+ context->last_was_fetch4 &= ~(1 << sampler_idx);
+ context->shader_update_mask = (1u << WINED3D_SHADER_TYPE_PIXEL);
+ }
}
else
{
diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h
index 7933c4f547..b438b699b6 100644
--- a/dlls/wined3d/wined3d_private.h
+++ b/dlls/wined3d/wined3d_private.h
@@ -521,7 +521,16 @@ enum wined3d_immconst_type
WINED3D_IMMCONST_VEC4,
};
-#define WINED3DSP_NOSWIZZLE (0u | (1u << 2) | (2u << 4) | (3u << 6))
+#define WINED3DSP_NOSWIZZLE (0u | (1u << 2) | (2u << 4) | (3u << 6))
+#define WINED3DSP_FETCH4_SWIZZLE (2u | (0u << 2) | (1u << 4) | (3u << 6)) /* zxyw */
+
+static inline DWORD wined3d_merge_swizzle(const DWORD a, const DWORD b)
+{
+ return (((a >> ((b >> 0) & 0x3) * 2) & 0x3) << 0)
+ + (((a >> ((b >> 2) & 0x3) * 2) & 0x3) << 2)
+ + (((a >> ((b >> 4) & 0x3) * 2) & 0x3) << 4)
+ + (((a >> ((b >> 6) & 0x3) * 2) & 0x3) << 6);
+}
enum wined3d_shader_src_modifier
{
@@ -1361,7 +1370,8 @@ struct ps_compile_args
DWORD alpha_test_func : 3;
DWORD render_offscreen : 1;
DWORD rt_alpha_swizzle : 8; /* MAX_RENDER_TARGET_VIEWS, 8 */
- DWORD padding : 18;
+ WORD fetch4 : 16; /* MAX_FRAGMENT_SAMPLERS, 16 */
+ DWORD padding : 2;
};
enum fog_src_type
@@ -1896,6 +1906,7 @@ struct wined3d_context
DWORD last_was_ffp_blit : 1;
DWORD last_was_blit : 1;
DWORD last_was_ckey : 1;
+ DWORD last_was_fetch4 : 16; /* MAX_FRAGMENT_SAMPLERS */
DWORD fog_coord : 1;
DWORD fog_enabled : 1;
DWORD current : 1;
--
2.17.1
More information about the wine-devel
mailing list