[PATCH v5 2/4] wined3d: Add fetch4 to shader FFP generate texture stage

Daniel Ansorregui mailszeros at gmail.com
Mon Feb 11 07:48:37 CST 2019


- Add flag to indicate FETCH4 support in textures
- Implementation follows AMD implementation and swizzle
  projection is allowed and 0.5 texel offset is added

Signed-off-by: Daniel Ansorregui <mailszeros at gmail.com>
---
 dlls/wined3d/glsl_shader.c     | 43 ++++++++++++++++++++++++++++------
 dlls/wined3d/utils.c           | 27 +++++++++++++++++++++
 dlls/wined3d/wined3d_private.h |  4 +++-
 3 files changed, 66 insertions(+), 8 deletions(-)

diff --git a/dlls/wined3d/glsl_shader.c b/dlls/wined3d/glsl_shader.c
index 3298a604fd..1950db06a9 100644
--- a/dlls/wined3d/glsl_shader.c
+++ b/dlls/wined3d/glsl_shader.c
@@ -9711,6 +9711,8 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv *
         shader_addline(buffer, "#extension GL_ARB_shading_language_420pack : enable\n");
     if (gl_info->supported[ARB_TEXTURE_RECTANGLE])
         shader_addline(buffer, "#extension GL_ARB_texture_rectangle : enable\n");
+    if (gl_info->supported[ARB_TEXTURE_GATHER])
+        shader_addline(buffer, "#extension GL_ARB_texture_gather : enable\n");
 
     if (!needs_legacy_glsl_syntax(gl_info))
     {
@@ -9851,6 +9853,9 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv *
     for (stage = 0; stage < MAX_TEXTURES && settings->op[stage].cop != WINED3D_TOP_DISABLE; ++stage)
     {
         const char *texture_function, *coord_mask;
+        struct wined3d_string_buffer offset;
+        BOOL fetch4 = settings->op[stage].fetch4;
+        BOOL fetch4_proj = FALSE;
         BOOL proj;
 
         if (!(tex_map & (1u << stage)))
@@ -9870,7 +9875,6 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv *
             FIXME("Unexpected projection mode %d\n", settings->op[stage].projected);
             proj = TRUE;
         }
-
         if (settings->op[stage].tex_type == WINED3D_GL_RES_TYPE_TEX_CUBE)
             proj = FALSE;
 
@@ -9879,6 +9883,7 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv *
             case WINED3D_GL_RES_TYPE_TEX_1D:
                 texture_function = "texture1D";
                 coord_mask = "x";
+                fetch4 = FALSE;
                 break;
             case WINED3D_GL_RES_TYPE_TEX_2D:
                 texture_function = "texture2D";
@@ -9887,6 +9892,9 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv *
             case WINED3D_GL_RES_TYPE_TEX_3D:
                 texture_function = "texture3D";
                 coord_mask = "xyz";
+                if (fetch4)
+                    FIXME("Unsupported Fetch4 and texture3D sampling");
+                fetch4 = FALSE;
                 break;
             case WINED3D_GL_RES_TYPE_TEX_CUBE:
                 texture_function = "textureCube";
@@ -9901,11 +9909,24 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv *
                 texture_function = "";
                 coord_mask = "xyzw";
                 proj = FALSE;
+                fetch4 = FALSE;
                 break;
         }
         if (!legacy_syntax)
             texture_function = "texture";
 
+        string_buffer_init(&offset);
+        if (fetch4)
+        {
+            texture_function = "textureGather";
+            /* Apply a 0.5 texel offset as in AMD implementation */
+            shader_addline(&offset, " + (vec2(0.5) / textureSize(ps_sampler%u, 0).xy)", stage);
+
+            /* When projection is needed on fetch4 we have to apply it manually by dividing .w */
+            fetch4_proj = proj;
+            proj = FALSE;
+        }
+
         if (stage > 0
                 && (settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP
                 || settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP_LUMINANCE))
@@ -9936,8 +9957,8 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv *
                 shader_addline(buffer, "ret = ffp_texcoord[%u] + ret.xyxy;\n", stage);
             }
 
-            shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ret.%s%s);\n",
-                    stage, texture_function, proj ? "Proj" : "", stage, coord_mask, proj ? "w" : "");
+            shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ret.%s%s%s%s);\n", stage, texture_function,
+                    proj ? "Proj" : "", stage, coord_mask, proj ? "w" : "", fetch4_proj ? " / ret.w" : "", offset.buffer);
 
             if (settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP_LUMINANCE)
                 shader_addline(buffer, "tex%u *= clamp(tex%u.z * bumpenv_lum_scale%u + bumpenv_lum_offset%u, 0.0, 1.0);\n",
@@ -9945,14 +9966,22 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv *
         }
         else if (settings->op[stage].projected == WINED3D_PROJECTION_COUNT3)
         {
-            shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ffp_texcoord[%u].xyz);\n",
-                    stage, texture_function, proj ? "Proj" : "", stage, stage);
+            shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ffp_texcoord[%u].xyz%s);\n", stage,
+                    texture_function, proj ? "Proj" : "", stage, stage, offset.buffer);
         }
         else
         {
-            shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ffp_texcoord[%u].%s%s);\n",
-                    stage, texture_function, proj ? "Proj" : "", stage, stage, coord_mask, proj ? "w" : "");
+            shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ffp_texcoord[%u].%s%s", stage,
+                    texture_function, proj ? "Proj" : "", stage, stage, coord_mask, proj ? "w" : "");
+            if (fetch4_proj)
+                shader_addline(buffer, " / ffp_texcoord[%u].w", stage);
+            shader_addline(buffer, "%s);\n", offset.buffer);
         }
+        string_buffer_clear(&offset);
+
+        /* Match FETCH4 swizzle with textureGather swizzle */
+        if (fetch4)
+            shader_addline(buffer, "tex%u = tex%u.zxyw;\n", stage, stage);
 
         string_buffer_sprintf(tex_reg_name, "tex%u", stage);
         shader_glsl_color_correction_ext(buffer, tex_reg_name->buffer, WINED3DSP_WRITEMASK_ALL,
diff --git a/dlls/wined3d/utils.c b/dlls/wined3d/utils.c
index 7b42202213..b0cae022c6 100644
--- a/dlls/wined3d/utils.c
+++ b/dlls/wined3d/utils.c
@@ -342,6 +342,19 @@ static const struct wined3d_format_base_flags format_base_flags[] =
     {WINED3DFMT_RESZ,                 WINED3DFMT_FLAG_EXTENSION},
 };
 
+/* List of textures were fetch4 can be enabled.
+ * Only available if ARB_TEXTURE_GATHER extension is present */
+static const enum wined3d_format_id wined3d_format_fetch4_enabled[] =
+{
+    WINED3DFMT_L8_UNORM,
+    WINED3DFMT_L16_UNORM,
+    WINED3DFMT_R16_FLOAT,
+    WINED3DFMT_R16,
+    WINED3DFMT_R32_FLOAT,
+    WINED3DFMT_A8_UNORM,
+    WINED3DFMT_INTZ,
+};
+
 static void rgb888_from_rgb565(WORD rgb565, BYTE *r, BYTE *g, BYTE *b)
 {
     BYTE c;
@@ -2120,6 +2133,15 @@ static BOOL init_format_base_info(struct wined3d_adapter *adapter)
         format_set_flag(format, format_base_flags[i].flags);
     }
 
+    if (adapter->gl_info.supported[ARB_TEXTURE_GATHER])
+        for (i = 0; i < ARRAY_SIZE(wined3d_format_fetch4_enabled); ++i)
+        {
+            if (!(format = get_format_internal(adapter, wined3d_format_fetch4_enabled[i])))
+                return FALSE;
+
+            format_set_flag(format, WINED3DFMT_FLAG_ALLOW_FETCH4);
+        }
+
     return TRUE;
 }
 
@@ -5780,6 +5802,7 @@ void gen_ffp_frag_op(const struct wined3d_context *context, const struct wined3d
             settings->op[i].tmp_dst = 0;
             settings->op[i].tex_type = WINED3D_GL_RES_TYPE_TEX_1D;
             settings->op[i].projected = WINED3D_PROJECTION_NONE;
+            settings->op[i].fetch4 = FALSE;
             i++;
             break;
         }
@@ -5923,6 +5946,10 @@ void gen_ffp_frag_op(const struct wined3d_context *context, const struct wined3d
         settings->op[i].aarg1 = aarg1;
         settings->op[i].aarg2 = aarg2;
         settings->op[i].tmp_dst = state->texture_states[i][WINED3D_TSS_RESULT_ARG] == WINED3DTA_TEMP;
+        settings->op[i].fetch4 = (state->textures[i]
+                && state->sampler_states[i][WINED3D_SAMP_MIPMAP_LOD_BIAS] == MAKEFOURCC('G','E','T','4')
+                && state->textures[i]->resource.format_flags & WINED3DFMT_FLAG_ALLOW_FETCH4
+                && settings->op[i].tex_type & (WINED3D_GL_RES_TYPE_TEX_2D | WINED3D_GL_RES_TYPE_TEX_RECT));
     }
 
     /* Clear unsupported stages */
diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h
index 1e3ec28d6b..4224461142 100644
--- a/dlls/wined3d/wined3d_private.h
+++ b/dlls/wined3d/wined3d_private.h
@@ -2747,7 +2747,8 @@ struct texture_stage_op
     unsigned                tex_type : 3;
     unsigned                tmp_dst : 1;
     unsigned                projected : 2;
-    unsigned                padding : 10;
+    unsigned                fetch4 : 1;
+    unsigned                padding : 9;
 };
 
 struct ffp_frag_settings
@@ -4434,6 +4435,7 @@ extern enum wined3d_format_id pixelformat_for_depth(DWORD depth) DECLSPEC_HIDDEN
 #define WINED3DFMT_FLAG_VERTEX_ATTRIBUTE            0x01000000
 #define WINED3DFMT_FLAG_BLIT                        0x02000000
 #define WINED3DFMT_FLAG_MAPPABLE                    0x04000000
+#define WINED3DFMT_FLAG_ALLOW_FETCH4                0x08000000
 
 struct wined3d_rational
 {
-- 
2.17.1




More information about the wine-devel mailing list