[PATCH vkd3d 2/5] vkd3d-shader/hlsl: Perform a copy propagation pass.

Zebediah Figura zfigura at codeweavers.com
Tue Nov 9 16:21:14 CST 2021


On 11/9/21 3:44 AM, Giovanni Mascellani wrote:
> Signed-off-by: Giovanni Mascellani <gmascellani at codeweavers.com>
> ---
> Pretty sure this (and the following ones) will require some more
> tweaking before being accepted! :-)
> 
>   libs/vkd3d-shader/hlsl_codegen.c | 254 ++++++++++++++++++++++++++++++-
>   1 file changed, 253 insertions(+), 1 deletion(-)
> 

Overall this patch looks a lot better than I was afraid of. It's a lot 
of code that's intimidating to review, but once you ignore the rbtree 
boilerplate it's simple enough and seems about in line with what I 
expect. There's quite a few things that I think can be simplified, but 
the basic structure seems sound, so nice work.

The CF part is probably going to be a lot trickier, but fortunately I 
think that this patch alone is the one that really matters. Frankly, if 
we were to take this patch, and then a second patch that does copy-prop 
on interior CF blocks with a fresh copy_propagation_state(), we might 
even cover enough that it's not even worrying about CF...

> diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c
> index 0ee8ab55..8db8bfc9 100644
> --- a/libs/vkd3d-shader/hlsl_codegen.c
> +++ b/libs/vkd3d-shader/hlsl_codegen.c
> @@ -237,6 +237,253 @@ static void replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new)
>       hlsl_free_instr(old);
>   }
>   
> +/* struct copy_propagation_state represents the accumulated knowledge
> + * of the copy propagation pass while it scans through the code. Field
> + * "variables" is a tree whose elements have type struct
> + * copy_propagation_varible, and represent each of the variables the
> + * pass has already encountered (except those with special semantics,
> + * which are ignored). For each variable, the array "values" (whose
> + * length is the register size of the variable) represent which node
> + * and which index inside that node (i.e., which of the at most four
> + * entries of a vector) provided that value last time. Field "node"
> + * can be NULL, meaning that the pass was not able to statically
> + * determine the node.
> + */

This comment feels a bit too low-level? I dunno, comments like this are 
hard to review, but my inclination is to describe what you're doing at a 
high level, along the lines of "we track the last known value of each 
component of each variable", and let the code itself explain the details.

> +
> +struct copy_propagation_value
> +{
> +    struct hlsl_ir_node *node;
> +    unsigned int index;

I think the term we want is "component", not "index".

> +};
> +
> +struct copy_propagation_variable
> +{
> +    struct rb_entry entry;
> +    struct hlsl_ir_var *var;
> +    struct copy_propagation_value *values;
> +};
> +
> +struct copy_propagation_state
> +{
> +    struct rb_tree variables;
> +};
> +
> +static int copy_propagation_variable_compare(const void *key, const struct rb_entry *entry)
> +{
> +    struct copy_propagation_variable *variable = RB_ENTRY_VALUE(entry, struct copy_propagation_variable, entry);
> +    uintptr_t key_int = (uintptr_t)key, entry_int = (uintptr_t)variable->var;
> +
> +    if (key_int < entry_int)
> +        return -1;
> +    else if (key_int > entry_int)
> +        return 1;
> +    else
> +        return 0;

Could we just modify the rbtree implementation to use uintptr_t instead, 
and then do a direct subtraction?

> +}
> +
> +static void copy_propagation_variable_destroy(struct rb_entry *entry, void *context)
> +{
> +    struct copy_propagation_variable *variable = RB_ENTRY_VALUE(entry, struct copy_propagation_variable, entry);
> +
> +    vkd3d_free(variable);
> +}
> +
> +static struct copy_propagation_variable *copy_propagation_get_variable(struct hlsl_ctx *ctx,
> +        struct copy_propagation_state *state, struct hlsl_ir_var *var)
> +{
> +    struct rb_entry *entry = rb_get(&state->variables, var);
> +    struct copy_propagation_variable *variable;
> +    int res;
> +
> +    if (entry)
> +        return RB_ENTRY_VALUE(entry, struct copy_propagation_variable, entry);
> +
> +    variable = hlsl_alloc(ctx, sizeof(*variable));
> +    if (!variable)
> +        return NULL;
> +
> +    variable->var = var;
> +    variable->values = hlsl_alloc(ctx, sizeof(*variable->values) * var->data_type->reg_size);
> +    if (!variable->values)
> +    {
> +        vkd3d_free(variable);
> +        return NULL;
> +    }
> +
> +    res = rb_put(&state->variables, var, &variable->entry);
> +    assert(!res);

Although this is a bit awkward because most of the function is only 
relevant for stores, not loads, and I think it would actually be better 
to reflect that in the code, one way or another (so we can do less effort).

> +
> +    return variable;
> +}
> +
> +static void copy_propagation_set_value(struct copy_propagation_variable *variable, unsigned int offset,
> +        unsigned char writemask, struct hlsl_ir_node *node)
> +{
> +    unsigned int index;
> +
> +    for (index = 0; index < 4; ++index)
> +    {
> +        if (writemask & (1u << index))
> +        {
> +            if (TRACE_ON())
> +            {
> +                char buf[32];
> +                if (!node)
> +                    sprintf(buf, "(nil)");

Can that happen?

> +                else if (node->index)
> +                    sprintf(buf, "@%u", node->index);
> +                else
> +                    sprintf(buf, "%p", node);

Can that happen?

> +                TRACE("variable %s[%d] is written by %p[%d]\n", variable->var->name, offset + index, buf, index);

This trace doesn't really match the usual format, but more saliently, 
the hardcoded buffer is ugly. Assuming the other two cases really can 
happen, I'd rather see individual traces spelled out.

Same thing below.

> +            }
> +            variable->values[offset + index].node = node;
> +            variable->values[offset + index].index = index;
> +        }
> +    }
> +}
> +
> +/* Check if locations [offset, offset+count) in variable were all
> + * written from the same node. If so return the node the corresponding
> + * indices, otherwise return NULL (and undefined indices). */
> +static struct hlsl_ir_node *copy_propagation_reconstruct_node(struct copy_propagation_variable *variable,
> +        unsigned int offset, unsigned int count, unsigned int indices[4])

"indices" is really just a swizzle; can we return it as such?

> +{
> +    struct hlsl_ir_node *node = NULL;
> +    unsigned int i;
> +
> +    assert(offset + count <= variable->var->data_type->reg_size);
> +
> +    for (i = 0; i < count; ++i)
> +    {
> +        if (!node)
> +            node = variable->values[offset + i].node;
> +        else if (node != variable->values[offset + i].node)
> +            return NULL;
> +        indices[i] = variable->values[offset + i].index;
> +    }
> +
> +    return node;
> +}

This is really obviously the right thing to do, yet it still confused me 
a *lot* when trying to review it.

That might just be me, but if not, I guess the function name and comment 
could use work. Explaining *why* the function is necessary would 
probably help.

> +
> +static bool copy_propagation_load(struct hlsl_ctx *ctx, struct hlsl_ir_load *load,

Can you please try to name functions using a verb? There's three more 
examples below.

> +        struct copy_propagation_state *state)
> +{
> +    struct hlsl_ir_node *node = &load->node, *new_node;
> +    struct copy_propagation_variable *variable;
> +    struct hlsl_type *type = node->data_type;
> +    unsigned int offset, indices[4] = {};

{} isn't portable, unfortunately.

> +    struct hlsl_deref *src = &load->src;
> +    struct hlsl_ir_var *var = src->var;
> +    struct hlsl_ir_swizzle *swizzle;
> +    DWORD s;
> +
> +    if (var->is_input_semantic || var->is_output_semantic || var->is_uniform)
> +        return false;

For input semantics and uniforms: yeah, but do we need to? The important 
question is "can we reconstruct a store for this variable", and this 
should already be false.

For output semantics, we should never get here in the first place.

> +
> +    if (type->type != HLSL_CLASS_SCALAR && type->type != HLSL_CLASS_VECTOR)
> +        return false;
> +
> +    offset = hlsl_offset_from_deref(src);

The problem with hlsl_offset_from_deref(), and the reason I probably 
should have fought against 62b25bc52b in its current form, is that it 
not only doesn't deal with non-constant offsets, but doesn't really give 
the caller a way to bail either. We should probably be returning bool 
from it, and then aborting here.

> +
> +    variable = copy_propagation_get_variable(ctx, state, var);
> +    if (!variable)
> +        return false;
> +
> +    new_node = copy_propagation_reconstruct_node(variable, offset, type->dimx, indices);
> +
> +    if (TRACE_ON())
> +    {
> +        char buf[32];
> +        if (!new_node)
> +            sprintf(buf, "(nil)");

Is this useful to trace?

> +        else if (new_node->index)
> +            sprintf(buf, "@%u", new_node->index);
> +        else
> +            sprintf(buf, "%p", new_node);

Can this happen?

> +        TRACE("load from %s[%d-%d] reconstructed to %s[%d %d %d %d]\n", var->name, offset,
> +              offset + type->dimx, buf, indices[0], indices[1], indices[2], indices[3]);
> +    }
> +
> +    if (!new_node)
> +        return false;
> +
> +    s = indices[0] | indices[1] << 2 | indices[2] << 4 | indices[3] << 6;
> +    if (!(swizzle = hlsl_new_swizzle(ctx, s, type->dimx, new_node, &node->loc)))
> +        return false;
> +    list_add_before(&node->entry, &swizzle->node.entry);
> +
> +    replace_node(node, &swizzle->node);
> +
> +    return true;
> +}
> +
> +static bool copy_propagation_store(struct hlsl_ctx *ctx, struct hlsl_ir_store *store,
> +        struct copy_propagation_state *state)
> +{
> +    struct copy_propagation_variable *variable;
> +    struct hlsl_deref *lhs = &store->lhs;
> +    struct hlsl_ir_var *var = lhs->var;
> +
> +    if (var->is_input_semantic || var->is_output_semantic || var->is_uniform)
> +        return false;

Same deal as in copy_propagation_load(), but inverted.

> +
> +    variable = copy_propagation_get_variable(ctx, state, var);
> +    if (!variable)
> +        return false;
> +
> +    copy_propagation_set_value(variable, hlsl_offset_from_deref(lhs), store->writemask, store->rhs.node);
> +
> +    return false;
> +}

Shouldn't this function just return void?

> +
> +static bool copy_propagation_recursive(struct hlsl_ctx *ctx, struct hlsl_block *block,
> +        struct copy_propagation_state *state)
> +{
> +    struct hlsl_ir_node *instr, *next;
> +    bool progress = false;
> +
> +    LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry)
> +    {
> +        switch (instr->type)
> +        {
> +            case HLSL_IR_LOAD:
> +                progress |= copy_propagation_load(ctx, hlsl_ir_load(instr), state);
> +                break;
> +
> +            case HLSL_IR_STORE:
> +                progress |= copy_propagation_store(ctx, hlsl_ir_store(instr), state);
> +                break;
> +
> +            case HLSL_IR_IF:
> +                FIXME("Copy propagation doesn't support conditionals yet, leaving.\n");
> +                return progress;
> +
> +            case HLSL_IR_LOOP:
> +                FIXME("Copy propagation doesn't support loops yet, leaving.\n");
> +                return progress;
> +
> +            default:
> +                break;
> +        }
> +    }
> +
> +    return progress;
> +}
> +
> +static bool copy_propagation_pass(struct hlsl_ctx *ctx, struct hlsl_block *block)
> +{
> +    struct copy_propagation_state state;
> +    bool progress;
> +
> +    rb_init(&state.variables, copy_propagation_variable_compare);
> +
> +    progress = copy_propagation_recursive(ctx, block, &state);
> +
> +    rb_destroy(&state.variables, copy_propagation_variable_destroy, NULL);
> +
> +    return progress;
> +}
> +
>   static bool is_vec1(const struct hlsl_type *type)
>   {
>       return (type->type == HLSL_CLASS_SCALAR) || (type->type == HLSL_CLASS_VECTOR && type->dimx == 1);
> @@ -1354,7 +1601,12 @@ int hlsl_emit_dxbc(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_fun
>           progress |= transform_ir(ctx, split_struct_copies, body, NULL);
>       }
>       while (progress);
> -    while (transform_ir(ctx, fold_constants, body, NULL));
> +    do
> +    {
> +        progress = transform_ir(ctx, fold_constants, body, NULL);
> +        progress |= copy_propagation_pass(ctx, body);

This probably isn't worth examining, but I'm curious why constant 
folding is useful after copy-prop.

> +    }
> +    while (progress);
>   
>       if (ctx->profile->major_version < 4)
>           transform_ir(ctx, lower_division, body, NULL);
> 



More information about the wine-devel mailing list