[PATCH v7 4/6] wined3d: Implement Fetch4 GLSL shader generation

Daniel Ansorregui mailszeros at gmail.com
Sun Feb 24 17:37:28 CST 2019


- Implement fetch4 for all texture sampling at "shader_glsl_gen_sample_code"
  by storing extra arguments in "glsl_sample_function"
- FIXME: Vertex texldl unimplemented yet, since it is not possible to access
  ps_compile_args. Maybe move it to another place.
  Probably does not work on windows anyway
- Trigger PS re-generation at FETCH4 state change, by storing a context flag
- Add ps_compile_arg flag for fetch4

Signed-off-by: Daniel Ansorregui <mailszeros at gmail.com>
---
 dlls/wined3d/cs.c              |  5 ++++-
 dlls/wined3d/device.c          |  4 ++++
 dlls/wined3d/glsl_shader.c     | 41 +++++++++++++++++++++++++++++++---
 dlls/wined3d/shader.c          | 11 +++++++++
 dlls/wined3d/state.c           | 12 ++++++++++
 dlls/wined3d/wined3d_private.h | 15 +++++++++++--
 6 files changed, 82 insertions(+), 6 deletions(-)

diff --git a/dlls/wined3d/cs.c b/dlls/wined3d/cs.c
index 3de29d5f94..9f87f3c0dc 100644
--- a/dlls/wined3d/cs.c
+++ b/dlls/wined3d/cs.c
@@ -1372,7 +1372,10 @@ static void wined3d_cs_exec_set_texture(struct wined3d_cs *cs, const void *data)
         if (!prev || wined3d_texture_gl(op->texture)->target != wined3d_texture_gl(prev)->target
                 || (!is_same_fixup(new_format->color_fixup, old_format->color_fixup)
                 && !(can_use_texture_swizzle(gl_info, new_format) && can_use_texture_swizzle(gl_info, old_format)))
-                || (new_fmt_flags & WINED3DFMT_FLAG_SHADOW) != (old_fmt_flags & WINED3DFMT_FLAG_SHADOW))
+                || (new_fmt_flags & WINED3DFMT_FLAG_SHADOW) != (old_fmt_flags & WINED3DFMT_FLAG_SHADOW)
+                || ((new_fmt_flags & WINED3DFMT_FLAG_ALLOW_FETCH4) != (old_fmt_flags & WINED3DFMT_FLAG_ALLOW_FETCH4)
+                && cs->state.sampler_states[op->texture->sampler][WINED3D_SAMP_MIPMAP_LOD_BIAS]
+                == MAKEFOURCC('G','E','T','4')))
             device_invalidate_state(cs->device, STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL));
 
         if (!prev && op->stage < d3d_info->limits.ffp_blend_stages)
diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c
index 4caa1e8c45..0e91ac1ad9 100644
--- a/dlls/wined3d/device.c
+++ b/dlls/wined3d/device.c
@@ -2124,7 +2124,11 @@ void CDECL wined3d_device_set_sampler_state(struct wined3d_device *device,
             device, sampler_idx, debug_d3dsamplerstate(state), value);
 
     if (sampler_idx >= WINED3DVERTEXTEXTURESAMPLER0 && sampler_idx <= WINED3DVERTEXTEXTURESAMPLER3)
+    {
         sampler_idx -= (WINED3DVERTEXTEXTURESAMPLER0 - WINED3D_MAX_FRAGMENT_SAMPLERS);
+        if (state == WINED3D_SAMP_MIPMAP_LOD_BIAS && value == MAKEFOURCC('G','E','T','4'))
+            FIXME("Unsupported FETCH4 and Vertex Texture Sampler");
+    }
 
     if (sampler_idx >= ARRAY_SIZE(device->state.sampler_states))
     {
diff --git a/dlls/wined3d/glsl_shader.c b/dlls/wined3d/glsl_shader.c
index 3ec54e3418..dbf9d5ce57 100644
--- a/dlls/wined3d/glsl_shader.c
+++ b/dlls/wined3d/glsl_shader.c
@@ -104,6 +104,8 @@ struct glsl_sample_function
     enum wined3d_data_type data_type;
     BOOL output_single_component;
     unsigned int offset_size;
+    BOOL fetch4_enabled;
+    BOOL fetch4_projected;
 };
 
 enum heap_node_op
@@ -3618,6 +3620,7 @@ static void shader_glsl_get_sample_function(const struct wined3d_shader_context
     BOOL lod = flags & WINED3D_GLSL_SAMPLE_LOD;
     BOOL grad = flags & WINED3D_GLSL_SAMPLE_GRAD;
     BOOL offset = flags & WINED3D_GLSL_SAMPLE_OFFSET;
+    BOOL gather = !shadow && priv->cur_ps_args && priv->cur_ps_args->fetch4 & (1u << sampler_idx);
     const char *base = "texture", *type_part = "", *suffix = "";
     unsigned int coord_size, deriv_size;
 
@@ -3663,6 +3666,16 @@ static void shader_glsl_get_sample_function(const struct wined3d_shader_context
         type_part = "";
     }
 
+    sample_function->fetch4_projected = projected;
+    sample_function->fetch4_enabled = gather;
+    if (gather)
+    {
+        base = "texture";
+        type_part = "Gather";
+        suffix = "";
+        projected = lod = grad = offset = FALSE;
+    }
+
     sample_function->name = string_buffer_get(priv->string_buffers);
     string_buffer_sprintf(sample_function->name, "%s%s%s%s%s%s", base, type_part, projected ? "Proj" : "",
             lod ? "Lod" : grad ? "Grad" : "", offset ? "Offset" : "", suffix);
@@ -3797,13 +3810,18 @@ static void PRINTF_ATTR(9, 10) shader_glsl_gen_sample_code(const struct wined3d_
         const char *coord_reg_fmt, ...)
 {
     const struct wined3d_shader_version *version = &ins->ctx->reg_maps->shader_version;
+    const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data;
+    DWORD merged_swizzle = swizzle;
     char dst_swizzle[6];
     struct color_fixup_desc fixup;
     BOOL np2_fixup = FALSE;
     va_list args;
     int ret;
 
-    shader_glsl_swizzle_to_str(swizzle, FALSE, ins->dst[0].write_mask, dst_swizzle);
+    /* Merge swizzle requested with the fetch4 swizzle */
+    if (sample_function->fetch4_enabled)
+        merged_swizzle = wined3d_merge_swizzle(WINED3DSP_FETCH4_SWIZZLE, swizzle);
+    shader_glsl_swizzle_to_str(merged_swizzle, FALSE, ins->dst[0].write_mask, dst_swizzle);
 
     /* If ARB_texture_swizzle is supported we don't need to do anything here.
      * We actually rely on it for vertex shaders and SM4+. */
@@ -3841,7 +3859,6 @@ static void PRINTF_ATTR(9, 10) shader_glsl_gen_sample_code(const struct wined3d_
 
     if (np2_fixup)
     {
-        const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data;
         const unsigned char idx = priv->cur_np2fixup_info->idx[sampler_bind_idx];
 
         switch (shader_glsl_get_write_mask_size(sample_function->coord_mask))
@@ -3864,7 +3881,21 @@ static void PRINTF_ATTR(9, 10) shader_glsl_gen_sample_code(const struct wined3d_
                 break;
         }
     }
-    if (dx && dy)
+    if (sample_function->fetch4_enabled)
+    {
+        if (sample_function->fetch4_projected)
+        {
+            struct wined3d_string_buffer *reg_name = string_buffer_get(priv->string_buffers);
+            shader_glsl_get_register_name(&ins->src[0].reg, ins->src[0].reg.data_type, reg_name, NULL, ins->ctx);
+            shader_addline(ins->ctx->buffer, " / %s.w", reg_name->buffer);
+            string_buffer_release(priv->string_buffers, reg_name);
+        }
+
+        /* Correct the fetch4 0.5 texel offset */
+        shader_addline(ins->ctx->buffer, " + (vec2(0.5) / textureSize(%s_sampler%u, 0).xy)",
+                shader_glsl_get_prefix(version->type), sampler_bind_idx);
+    }
+    else if (dx && dy)
         shader_addline(ins->ctx->buffer, ", %s, %s", dx, dy);
     else if (bias)
         shader_addline(ins->ctx->buffer, ", %s", bias);
@@ -5402,6 +5433,10 @@ static void shader_glsl_tex(const struct wined3d_shader_instruction *ins)
         }
     }
 
+    /* When fetch4 is active, projection is done manually */
+    if (priv->cur_ps_args->fetch4 & (1u << resource_idx))
+        mask = 0;
+
     shader_glsl_get_sample_function(ins->ctx, resource_idx, resource_idx, sample_flags, &sample_function);
     mask |= sample_function.coord_mask;
     sample_function.coord_mask = mask;
diff --git a/dlls/wined3d/shader.c b/dlls/wined3d/shader.c
index 83a793d522..01716c77b2 100644
--- a/dlls/wined3d/shader.c
+++ b/dlls/wined3d/shader.c
@@ -4093,6 +4093,17 @@ void find_ps_compile_args(const struct wined3d_state *state, const struct wined3
         }
     }
 
+    for (i = 0; i < MAX_FRAGMENT_SAMPLERS; ++i)
+    {
+        if (state->sampler_states[i][WINED3D_SAMP_MIPMAP_LOD_BIAS] == MAKEFOURCC('G','E','T','4')
+                && state->textures[i]
+                && state->textures[i]->resource.format_flags & WINED3DFMT_FLAG_ALLOW_FETCH4
+                && shader->reg_maps.resource_info[i].type == WINED3D_SHADER_RESOURCE_TEXTURE_2D)
+            args->fetch4 |= 1 << i;
+        else
+            args->fetch4 &= ~(1 << i);
+    }
+
     if (context->d3d_info->limits.varying_count < wined3d_max_compat_varyings(context->gl_info))
     {
         const struct wined3d_shader *vs = state->shader[WINED3D_SHADER_TYPE_VERTEX];
diff --git a/dlls/wined3d/state.c b/dlls/wined3d/state.c
index f049ad9ebf..f37bd45c32 100644
--- a/dlls/wined3d/state.c
+++ b/dlls/wined3d/state.c
@@ -3615,6 +3615,8 @@ static void sampler(struct wined3d_context *context, const struct wined3d_state
     if (state->textures[sampler_idx])
     {
         struct wined3d_texture_gl *texture_gl = wined3d_texture_gl(state->textures[sampler_idx]);
+        BOOL fetch4 = state->sampler_states[sampler_idx][WINED3D_SAMP_MIPMAP_LOD_BIAS]
+                == MAKEFOURCC('G','E','T','4');
         const DWORD *sampler_states = state->sampler_states[sampler_idx];
         struct wined3d_device *device = context->device;
         BOOL srgb = is_srgb_enabled(sampler_states);
@@ -3650,6 +3652,16 @@ static void sampler(struct wined3d_context *context, const struct wined3d_state
         /* Trigger shader constant reloading (for NP2 texcoord fixup) */
         if (!(texture_gl->t.flags & WINED3D_TEXTURE_POW2_MAT_IDENT))
             context->constant_update_mask |= WINED3D_SHADER_CONST_PS_NP2_FIXUP;
+
+        /* Trigger pixel shader recompilation for FETCH4 changes */
+        if (((context->last_was_fetch4 >> sampler_idx) & 0x1) ^ fetch4)
+        {
+            if (fetch4)
+                context->last_was_fetch4 |= 1 << sampler_idx;
+            else
+                context->last_was_fetch4 &= ~(1 << sampler_idx);
+            context->shader_update_mask = (1u << WINED3D_SHADER_TYPE_PIXEL);
+        }
     }
     else
     {
diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h
index 7933c4f547..b438b699b6 100644
--- a/dlls/wined3d/wined3d_private.h
+++ b/dlls/wined3d/wined3d_private.h
@@ -521,7 +521,16 @@ enum wined3d_immconst_type
     WINED3D_IMMCONST_VEC4,
 };
 
-#define WINED3DSP_NOSWIZZLE (0u | (1u << 2) | (2u << 4) | (3u << 6))
+#define WINED3DSP_NOSWIZZLE      (0u | (1u << 2) | (2u << 4) | (3u << 6))
+#define WINED3DSP_FETCH4_SWIZZLE (2u | (0u << 2) | (1u << 4) | (3u << 6)) /* zxyw */
+
+static inline DWORD wined3d_merge_swizzle(const DWORD a, const DWORD b)
+{
+    return (((a >> ((b >> 0) & 0x3) * 2) & 0x3) << 0)
+            + (((a >> ((b >> 2) & 0x3) * 2) & 0x3) << 2)
+            + (((a >> ((b >> 4) & 0x3) * 2) & 0x3) << 4)
+            + (((a >> ((b >> 6) & 0x3) * 2) & 0x3) << 6);
+}
 
 enum wined3d_shader_src_modifier
 {
@@ -1361,7 +1370,8 @@ struct ps_compile_args
     DWORD alpha_test_func : 3;
     DWORD render_offscreen : 1;
     DWORD rt_alpha_swizzle : 8; /* MAX_RENDER_TARGET_VIEWS, 8 */
-    DWORD padding : 18;
+    WORD fetch4 : 16; /* MAX_FRAGMENT_SAMPLERS, 16 */
+    DWORD padding : 2;
 };
 
 enum fog_src_type
@@ -1896,6 +1906,7 @@ struct wined3d_context
     DWORD last_was_ffp_blit : 1;
     DWORD last_was_blit : 1;
     DWORD last_was_ckey : 1;
+    DWORD last_was_fetch4 : 16; /* MAX_FRAGMENT_SAMPLERS */
     DWORD fog_coord : 1;
     DWORD fog_enabled : 1;
     DWORD current : 1;
-- 
2.17.1




More information about the wine-devel mailing list