[PATCH vkd3d v7 4/6] vkd3d-shader/hlsl: Perform a copy propagation pass.

Matteo Bruni matteo.mystral at gmail.com
Thu Nov 18 11:28:23 CST 2021


On Wed, Nov 17, 2021 at 9:47 AM Giovanni Mascellani
<gmascellani at codeweavers.com> wrote:
>
> Signed-off-by: Giovanni Mascellani <gmascellani at codeweavers.com>
> Signed-off-by: Zebediah Figura <zfigura at codeweavers.com>
> ---
>  libs/vkd3d-shader/hlsl_codegen.c | 254 ++++++++++++++++++++++++++++++-
>  1 file changed, 253 insertions(+), 1 deletion(-)
> diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c
> index a0154e3b..42422220 100644
> --- a/libs/vkd3d-shader/hlsl_codegen.c
> +++ b/libs/vkd3d-shader/hlsl_codegen.c
> @@ -237,6 +237,253 @@ static void replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new)
>      hlsl_free_instr(old);
>  }
>
> +/* The copy propagation pass scans the code trying to reconstruct, for
> + * each load, which is the store that last wrote to that
> + * variable. When this happens, the load can be replaced with the node
> + * from which the variable was stored.
> + *
> + * In order to do that, the pass keeps a map of all the variables it
> + * has already seen; for each variable, a pointer to a node and a
> + * component index is kept for each of the registers that belong to
> + * the variable. This means that, at that point of the scan, that
> + * register was last stored to from that component of that node. The
> + * pointer can be NULL if information for that register could not be
> + * gathered statically (either because a non-constant offset is used,
> + * or because control flow forces us to drop information).
> + *
> + * When scanning through a store, data for the stored-to variable is
> + * updated. When scanning through a load, it is checked if all the
> + * registers involved in the load come from a single node. In such
> + * case, the store can be replaced with a swizzle based on that
> + * node. */

Time to nitpick the comments...

The first paragraph seems a bit hard to read to me. I'm not a native
speaker (suggestions welcome!) but I'd rewrite it differently, maybe
something like:
"The purpose of the copy propagation pass is to get rid of
intermediate copies of a variable, replacing each use of a variable
with a reference to the instruction that initially defined its current
value."

Technically at this point of the compilation process there is no
notion of registers so I'd avoid using that word. I think "component"
works okay to refer to the individual scalar elements of a variable.
You can use "destination variable" when discussing the stored
variable.

In general, as already mentioned, getting too much into the details in
those "overview" comments doesn't seem useful to me. I'd rather have
some specific comment right next to the relevant piece of code, if
necessary.

> +struct copy_propagation_value
> +{
> +    struct hlsl_ir_node *node;
> +    unsigned int component;
> +};
> +
> +struct copy_propagation_variable
> +{
> +    struct rb_entry entry;
> +    struct hlsl_ir_var *var;
> +    struct copy_propagation_value *values;
> +};

I still haven't gotten warm to these names. What about
copy_propagation_definition (or just copy_propagation_def)?

> +
> +struct copy_propagation_state
> +{
> +    struct rb_tree variables;
> +};
> +
> +static int copy_propagation_variable_compare(const void *key, const struct rb_entry *entry)
> +{
> +    struct copy_propagation_variable *variable = RB_ENTRY_VALUE(entry, struct copy_propagation_variable, entry);
> +    uintptr_t key_int = (uintptr_t)key, entry_int = (uintptr_t)variable->var;
> +
> +    return (key_int > entry_int) - (key_int < entry_int);
> +}
> +
> +static void copy_propagation_variable_destroy(struct rb_entry *entry, void *context)
> +{
> +    struct copy_propagation_variable *variable = RB_ENTRY_VALUE(entry, struct copy_propagation_variable, entry);
> +
> +    vkd3d_free(variable);
> +}
> +
> +static struct copy_propagation_variable *copy_propagation_get_variable(struct copy_propagation_state *state,
> +        struct hlsl_ir_var *var)
> +{
> +    struct rb_entry *entry = rb_get(&state->variables, var);
> +
> +    if (entry)
> +        return RB_ENTRY_VALUE(entry, struct copy_propagation_variable, entry);
> +    else
> +        return NULL;
> +}
> +
> +static struct copy_propagation_variable *copy_propagation_create_variable(struct hlsl_ctx *ctx,
> +        struct copy_propagation_state *state, struct hlsl_ir_var *var)
> +{
> +    struct rb_entry *entry = rb_get(&state->variables, var);
> +    struct copy_propagation_variable *variable;
> +    int res;
> +
> +    if (entry)
> +        return RB_ENTRY_VALUE(entry, struct copy_propagation_variable, entry);
> +
> +    variable = hlsl_alloc(ctx, sizeof(*variable));
> +    if (!variable)
> +        return NULL;
> +
> +    variable->var = var;
> +    variable->values = hlsl_alloc(ctx, sizeof(*variable->values) * var->data_type->reg_size);
> +    if (!variable->values)
> +    {
> +        vkd3d_free(variable);
> +        return NULL;
> +    }

If we want to go ahead with this dynamic / arbitrary variable size
thing (which I still don't like, but I give up), at least let's avoid
a separate allocation for the values, i.e.

struct copy_propagation_variable
{
    struct rb_entry entry;
    struct hlsl_ir_var *var;
    struct copy_propagation_value values[];
};

variable = hlsl_alloc(ctx, offsetof(struct copy_propagation_variable,
values[var->data_type->reg_size]);

> +
> +    res = rb_put(&state->variables, var, &variable->entry);
> +    assert(!res);
> +
> +    return variable;
> +}
> +
> +static void copy_propagation_invalidate_whole_variable(struct copy_propagation_variable *variable)
> +{
> +    TRACE("Invalidate variable %s.\n", variable->var->name);
> +    memset(variable->values, 0, sizeof(*variable->values) * variable->var->data_type->reg_size);
> +}
> +
> +static void copy_propagation_set_value(struct copy_propagation_variable *variable, unsigned int offset,
> +        unsigned char writemask, struct hlsl_ir_node *node)
> +{
> +    static const char swizzle_letters[] = {'x', 'y', 'z', 'w'};
> +
> +    unsigned int i, j = 0;
> +
> +    for (i = 0; i < 4; ++i)
> +    {
> +        if (writemask & (1u << i))
> +        {
> +            TRACE("Variable %s[%d] is written by instruction %p.%c.\n",
> +                    variable->var->name, offset + i, node, swizzle_letters[i]);

I think you can use debug_hlsl_writemask() here (and if not, probably adapt it).

Is there any case where offset + i can be negative?

> +            variable->values[offset + i].node = node;
> +            variable->values[offset + i].component = j++;
> +        }
> +    }
> +}
> +
> +static struct hlsl_ir_node *copy_propagation_find_replacement(struct copy_propagation_variable *variable,
> +        unsigned int offset, unsigned int count, unsigned int *swizzle)

I think I prefer my original suggestion of _get_ (or something else)
rather than _find_; the point being that this function isn't really
finding anything.

> +{
> +    struct hlsl_ir_node *node = NULL;
> +    unsigned int i;
> +
> +    assert(offset + count <= variable->var->data_type->reg_size);
> +
> +    *swizzle = 0;
> +
> +    for (i = 0; i < count; ++i)
> +    {
> +        if (!node)
> +            node = variable->values[offset + i].node;
> +        else if (node != variable->values[offset + i].node)
> +            return NULL;
> +        *swizzle |= variable->values[offset + i].component << (2 * i);
> +    }
> +
> +    return node;
> +}
> +
> +static bool copy_propagation_analyze_load(struct hlsl_ctx *ctx, struct hlsl_ir_load *load,
> +        struct copy_propagation_state *state)
> +{
> +    static const char swizzle_letters[] = {'x', 'y', 'z', 'w'};
> +
> +    struct hlsl_ir_node *node = &load->node, *new_node;
> +    struct copy_propagation_variable *variable;
> +    struct hlsl_type *type = node->data_type;
> +    unsigned int offset, swizzle;
> +    struct hlsl_deref *src = &load->src;
> +    struct hlsl_ir_var *var = src->var;
> +    struct hlsl_ir_swizzle *swizzle_node;

Nitpick, declaration order.

> +
> +    if (type->type != HLSL_CLASS_SCALAR && type->type != HLSL_CLASS_VECTOR)
> +        return false;
> +
> +    if (!hlsl_offset_from_deref(src, &offset))
> +        return false;
> +
> +    variable = copy_propagation_get_variable(state, var);
> +    if (!variable)
> +        return false;
> +
> +    new_node = copy_propagation_find_replacement(variable, offset, type->dimx, &swizzle);
> +
> +    TRACE("Load from %s[%d-%d] reconstructed as instruction %p.%c%c%c%c.\n",
> +            var->name, offset, offset + type->dimx, new_node,
> +            swizzle_letters[swizzle % 4], swizzle_letters[(swizzle >> 2) % 4],
> +            swizzle_letters[(swizzle >> 4) % 4], swizzle_letters[(swizzle >> 6) % 4]);

Here a debug_hlsl_swizzle() would come in handy. It should be pretty
simple to split it out of dump_ir_swizzle().

> +
> +    if (!new_node)
> +        return false;
> +
> +    if (!(swizzle_node = hlsl_new_swizzle(ctx, swizzle, type->dimx, new_node, &node->loc)))
> +        return false;
> +    list_add_before(&node->entry, &swizzle_node->node.entry);
> +
> +    replace_node(node, &swizzle_node->node);
> +
> +    return true;
> +}
> +
> +static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_store *store,
> +        struct copy_propagation_state *state)
> +{
> +    struct copy_propagation_variable *variable;
> +    struct hlsl_deref *lhs = &store->lhs;
> +    struct hlsl_ir_var *var = lhs->var;
> +    unsigned int offset;
> +
> +    variable = copy_propagation_create_variable(ctx, state, var);
> +    if (!variable)
> +        return;
> +
> +    if (hlsl_offset_from_deref(lhs, &offset))
> +        copy_propagation_set_value(variable, offset, store->writemask, store->rhs.node);
> +    else
> +        copy_propagation_invalidate_whole_variable(variable);
> +}
> +
> +static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_block *block,
> +        struct copy_propagation_state *state)
> +{
> +    struct hlsl_ir_node *instr, *next;
> +    bool progress = false;
> +
> +    LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry)
> +    {
> +        switch (instr->type)
> +        {
> +            case HLSL_IR_LOAD:
> +                progress |= copy_propagation_analyze_load(ctx, hlsl_ir_load(instr), state);
> +                break;
> +
> +            case HLSL_IR_STORE:
> +                copy_propagation_record_store(ctx, hlsl_ir_store(instr), state);
> +                break;
> +
> +            case HLSL_IR_IF:
> +                FIXME("Copy propagation doesn't support conditionals yet, leaving.\n");
> +                return progress;
> +
> +            case HLSL_IR_LOOP:
> +                FIXME("Copy propagation doesn't support loops yet, leaving.\n");
> +                return progress;
> +
> +            default:
> +                break;
> +        }
> +    }
> +
> +    return progress;
> +}
> +
> +static bool copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *block)
> +{
> +    struct copy_propagation_state state;
> +    bool progress;
> +
> +    rb_init(&state.variables, copy_propagation_variable_compare);
> +
> +    progress = copy_propagation_transform_block(ctx, block, &state);
> +
> +    rb_destroy(&state.variables, copy_propagation_variable_destroy, NULL);
> +
> +    return progress;
> +}

This and copy_propagation_transform_block() are a bit of a
reimplementation of transform_ir(). It should be possible to extend
transform_ir() to handle this new pass instead.

I'd introduce a struct like

struct transform_pass
{
    bool (*initialize)(struct hlsl_ctx *hlsl_ctx, void **ctx);
    bool (*process_instruction)(void *ctx, struct hlsl_ir_node *, void *);
    void (*destroy)(void *ctx);
};

and update transform_ir() and the existing passes to make use of it. I
imagine we could have some generic initialize() implementation simply
doing *ctx = hlsl_ctx; and an empty destroy() implementation to be
used when there is no particular context to carry around (like in the
existing passes).



More information about the wine-devel mailing list