[PATCH vkd3d v3 05/12] vkd3d-shader/hlsl: Add Gather methods with offset and tests.

Zebediah Figura (she/her) zfigura at codeweavers.com
Fri Dec 17 17:12:51 CST 2021


On 12/17/21 13:12, Francisco Casas wrote:
> Signed-off-by: Francisco Casas <fcasas at codeweavers.com>
> ---
>   Makefile.am                          |   4 +
>   libs/vkd3d-shader/hlsl.c             |   4 +
>   libs/vkd3d-shader/hlsl.h             |   4 +
>   libs/vkd3d-shader/hlsl.y             | 109 +++++++++++++++++++++++++++
>   libs/vkd3d-shader/hlsl_sm4.c         |  67 ++++++++++++++++
>   tests/hlsl-gather-offset.shader_test |  93 +++++++++++++++++++++++
>   tests/hlsl-gather.shader_test        | 107 ++++++++++++++++++++++++++
>   7 files changed, 388 insertions(+)
>   create mode 100644 tests/hlsl-gather-offset.shader_test
>   create mode 100644 tests/hlsl-gather.shader_test
> 
> diff --git a/Makefile.am b/Makefile.am
> index 16848f5a..749df363 100644
> --- a/Makefile.am
> +++ b/Makefile.am
> @@ -68,6 +68,8 @@ vkd3d_shader_tests = \
>   	tests/hlsl-duplicate-modifiers.shader_test \
>   	tests/hlsl-for.shader_test \
>   	tests/hlsl-function-overload.shader_test \
> +	tests/hlsl-gather-offset.shader_test \
> +	tests/hlsl-gather.shader_test \
>   	tests/hlsl-invalid.shader_test \
>   	tests/hlsl-majority-pragma.shader_test \
>   	tests/hlsl-majority-typedef.shader_test \
> @@ -294,6 +296,8 @@ XFAIL_TESTS = \
>   	tests/hlsl-duplicate-modifiers.shader_test \
>   	tests/hlsl-for.shader_test \
>   	tests/hlsl-function-overload.shader_test \
> +	tests/hlsl-gather-offset.shader_test \
> +	tests/hlsl-gather.shader_test \
>   	tests/hlsl-majority-pragma.shader_test \
>   	tests/hlsl-majority-typedef.shader_test \
>   	tests/hlsl-nested-arrays.shader_test \
> diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c
> index b426e461..82820daf 100644
> --- a/libs/vkd3d-shader/hlsl.c
> +++ b/libs/vkd3d-shader/hlsl.c
> @@ -1277,6 +1277,10 @@ static void dump_ir_resource_load(struct vkd3d_string_buffer *buffer, const stru
>       {
>           [HLSL_RESOURCE_LOAD] = "load_resource",
>           [HLSL_RESOURCE_SAMPLE] = "sample",
> +        [HLSL_RESOURCE_GATHER_RED] = "gather4r",
> +        [HLSL_RESOURCE_GATHER_GREEN] = "gather4g",
> +        [HLSL_RESOURCE_GATHER_BLUE] = "gather4b",
> +        [HLSL_RESOURCE_GATHER_ALPHA] = "gather4a",
>       };
>   
>       vkd3d_string_buffer_printf(buffer, "%s(resource = ", type_names[load->load_type]);

Just to nitpick, how about "gather_r" or "gather_red"? Just because the 
4 is there in DXBC doesn't mean it has any semantic meaning.

> diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h
> index 2396adb4..49fa8d9d 100644
> --- a/libs/vkd3d-shader/hlsl.h
> +++ b/libs/vkd3d-shader/hlsl.h
> @@ -378,6 +378,10 @@ enum hlsl_resource_load_type
>   {
>       HLSL_RESOURCE_LOAD,
>       HLSL_RESOURCE_SAMPLE,
> +    HLSL_RESOURCE_GATHER_RED,
> +    HLSL_RESOURCE_GATHER_GREEN,
> +    HLSL_RESOURCE_GATHER_BLUE,
> +    HLSL_RESOURCE_GATHER_ALPHA,
>   };
>   
>   struct hlsl_ir_resource_load
> diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y
> index 33bfee5b..9aa0138d 100644
> --- a/libs/vkd3d-shader/hlsl.y
> +++ b/libs/vkd3d-shader/hlsl.y
> @@ -1930,6 +1930,115 @@ static bool add_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hl
>           list_add_tail(instrs, &load->node.entry);
>           return true;
>       }
> +    else if (!strcmp(name, "Gather") || !strcmp(name, "GatherRed") || !strcmp(name, "GatherBlue")
> +            || !strcmp(name, "GatherGreen") || !strcmp(name, "GatherAlpha"))
> +    {
> +        const unsigned int sampler_dim = sampler_dim_count(object_type->sampler_dim);
> +        enum hlsl_resource_load_type load_type;
> +        const struct hlsl_type *sampler_type;
> +        struct hlsl_ir_resource_load *load;
> +        struct hlsl_ir_node *offset = NULL;
> +        struct hlsl_ir_load *sampler_load;
> +        struct hlsl_type *result_type;
> +        struct hlsl_ir_node *coords;
> +        int status_out_arg;
> +        int read_channel;
> +
> +        if (!strcmp(name, "Gather") || !strcmp(name, "GatherRed"))
> +        {
> +            load_type = HLSL_RESOURCE_GATHER_RED;
> +            read_channel = 0;
> +        }
> +        else if (!strcmp(name, "GatherGreen"))
> +        {
> +            load_type = HLSL_RESOURCE_GATHER_GREEN;
> +            read_channel = 1;
> +        }
> +        else if (!strcmp(name, "GatherBlue"))
> +        {
> +            load_type = HLSL_RESOURCE_GATHER_BLUE;
> +            read_channel = 2;
> +        }
> +        else if (!strcmp(name, "GatherAlpha"))
> +        {
> +            load_type = HLSL_RESOURCE_GATHER_ALPHA;
> +            read_channel = 3;
> +        }
> +        else {
> +            assert(!"Unexpected Gather method.");
> +        }

I think the assert is a bit superfluous.

On the other hand, when this kind of code pattern comes up, I tend to 
prefer using a helper, along the lines of

else if (!strcmp(name, "GatherGreen"))
{
     add_gather(HLSL_RESOURCE_GATHER_GREEN, 1, ...);
}

which avoids that kind of implicit agreement.

> +
> +
> +        if (!strcmp(name, "Gather"))
> +        {
> +            if (params->args_count != 2 && params->args_count != 3)
> +            {
> +                hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT,
> +                        "Wrong number of arguments to method 'Gather': expected 2 or 3, but got %u.", params->args_count);
> +                return false;
> +            }
> +        }
> +        else if (params->args_count < 2 || params->args_count == 5 || params->args_count > 7)
> +        {
> +            hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT,
> +                    "Wrong number of arguments to method '%s': expected 2,3,4,6 or 7, but got %u.", name, params->args_count);
> +            return false;
> +        }
> +
> +        status_out_arg = -1;
> +        if (params->args_count == 4)
> +            status_out_arg = 3;
> +        if (params->args_count == 7)
> +            status_out_arg = 6;
> +        if (status_out_arg != -1)
> +            FIXME("Ignoring 'status' output parameter.\n");

This probably should be hlsl_fixme().

> +
> +        if (params->args_count == 6 || params->args_count == 7)
> +            FIXME("Ignoring multiple offset parameters.\n");

And this pretty certainly should.

> +
> +        if (params->args_count == 3 || params->args_count == 4)
> +        {
> +            if (!(offset = add_implicit_conversion(ctx, instrs, params->args[2],
> +                    hlsl_get_vector_type(ctx, HLSL_TYPE_INT, sampler_dim), loc)))
> +                offset = params->args[2];
> +        }
> +
> +        sampler_type = params->args[0]->data_type;
> +        if (sampler_type->type != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER
> +                || sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC)
> +        {
> +            struct vkd3d_string_buffer *string;
> +
> +            if ((string = hlsl_type_to_string(ctx, sampler_type)))
> +                hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
> +                        "Wrong type for argument 0 of %s(): expected 'sampler', but got '%s'.", name, string->buffer);
> +            hlsl_release_string_buffer(ctx, string);
> +            return false;
> +        }
> +
> +        if (read_channel >= object_type->e.resource_format->dimx)
> +        {
> +            hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
> +                    "Method %s() requires at least %d channels.", name, read_channel+1);
> +            return false;
> +        }
> +
> +        result_type = hlsl_get_vector_type(ctx, object_type->e.resource_format->base_type, 4);
> +
> +        /* Only HLSL_IR_LOAD can return an object. */
> +        sampler_load = hlsl_ir_load(params->args[0]);
> +
> +        if (!(coords = add_implicit_conversion(ctx, instrs, params->args[1],
> +                hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc)))
> +            coords = params->args[1];
> +
> +        if (!(load = hlsl_new_resource_load(ctx, result_type,
> +                load_type, object_load->src.var, object_load->src.offset.node,
> +                sampler_load->src.var, sampler_load->src.offset.node, coords, offset, loc)))
> +            return false;
> +        list_add_tail(instrs, &load->node.entry);
> +        return true;
> +    }
>       else
>       {
>           struct vkd3d_string_buffer *string;
> diff --git a/libs/vkd3d-shader/hlsl_sm4.c b/libs/vkd3d-shader/hlsl_sm4.c
> index 11e1f59a..be35d125 100644
> --- a/libs/vkd3d-shader/hlsl_sm4.c
> +++ b/libs/vkd3d-shader/hlsl_sm4.c
> @@ -1654,6 +1654,56 @@ static void write_sm4_loop(struct hlsl_ctx *ctx,
>       write_sm4_instruction(buffer, &instr);
>   }
>   
> +
> +static void write_sm4_gather(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer,
> +        const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst,
> +        const struct hlsl_deref *resource, const struct hlsl_deref *sampler, const struct hlsl_ir_node *coords,
> +        unsigned int swizzle, const struct hlsl_ir_node *texel_offset)
> +{
> +    struct sm4_instruction instr;
> +    unsigned int writemask;
> +    int n_srcs = 0;

I would just leave out this variable and use "instr.src_count" directly.

> +
> +    memset(&instr, 0, sizeof(instr));
> +
> +    /* TODO: possible optimization, if the offset ranges from -8 to 7, an VKD3D_SM4_OP_GATHER4 with
> +    an aoffimmi modifier can be used. */
> +    instr.opcode = texel_offset? VKD3D_SM5_OP_GATHER4_PO : VKD3D_SM4_OP_GATHER4;
> +
> +    sm4_register_from_node(&instr.dsts[0].reg, &instr.dsts[0].writemask, NULL, dst);
> +    instr.dst_count = 1;
> +
> +    sm4_register_from_node(&instr.srcs[n_srcs].reg, &writemask, &instr.srcs[n_srcs].swizzle_type, coords);
> +    instr.srcs[n_srcs].swizzle = hlsl_swizzle_from_writemask(writemask);
> +    n_srcs++;
> +
> +    if (texel_offset)
> +    {
> +        sm4_register_from_node(&instr.srcs[n_srcs].reg, &writemask, &instr.srcs[n_srcs].swizzle_type, texel_offset);
> +        instr.srcs[n_srcs].swizzle = hlsl_swizzle_from_writemask(writemask);
> +
> +        n_srcs++;
> +    }
> +
> +    sm4_register_from_deref(ctx, &instr.srcs[n_srcs].reg, &writemask, &instr.srcs[n_srcs].swizzle_type,
> +        resource, resource_type);
> +    instr.srcs[n_srcs].swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), instr.dsts[0].writemask);
> +    n_srcs++;
> +
> +    sm4_register_from_deref(ctx, &instr.srcs[n_srcs].reg, &writemask, NULL,
> +        sampler, sampler->var->data_type);
> +    instr.srcs[n_srcs].reg.type = VKD3D_SM4_RT_SAMPLER;
> +    instr.srcs[n_srcs].reg.dim = VKD3D_SM4_DIMENSION_VEC4;
> +    instr.srcs[n_srcs].swizzle_type = VKD3D_SM4_SWIZZLE_SCALAR;
> +    instr.srcs[n_srcs].swizzle = swizzle;
> +    n_srcs++;
> +
> +    instr.src_count = n_srcs;
> +
> +    write_sm4_instruction(buffer, &instr);
> +}
> +
> +
>   static void write_sm4_resource_load(struct hlsl_ctx *ctx,
>           struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_resource_load *load)
>   {
> @@ -1694,6 +1744,23 @@ static void write_sm4_resource_load(struct hlsl_ctx *ctx,
>               write_sm4_sample(ctx, buffer, resource_type, &load->node, &load->resource, &load->sampler,
>                       coords, texel_offset);
>               break;
> +
> +        case HLSL_RESOURCE_GATHER_RED:
> +            write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource,
> +                    &load->sampler, coords, HLSL_SWIZZLE(X, X, X, X), texel_offset);
> +            break;
> +        case HLSL_RESOURCE_GATHER_GREEN:
> +            write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource,
> +                    &load->sampler, coords, HLSL_SWIZZLE(Y, Y, Y, Y), texel_offset);
> +            break;
> +        case HLSL_RESOURCE_GATHER_BLUE:
> +            write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource,
> +                    &load->sampler, coords, HLSL_SWIZZLE(Z, Z, Z, Z), texel_offset);
> +            break;
> +        case HLSL_RESOURCE_GATHER_ALPHA:
> +            write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource,
> +                    &load->sampler, coords, HLSL_SWIZZLE(W, W, W, W), texel_offset);
> +            break;

Hmm, looking at this, it is a little tempting to add a separate field 
instead. Have you considered that approach?



More information about the wine-devel mailing list