[PATCH vkd3d 2/6] vkd3d-shader/hlsl: Write SM4 store instructions.

Fri Aug 27 04:52:19 CDT 2021

On Fri, Aug 27, 2021 at 11:01 AM Giovanni Mascellani
<gmascellani at codeweavers.com> wrote:
>
> Hi,
>
> Il 24/08/21 08:18, Zebediah Figura ha scritto:
> > Signed-off-by: Zebediah Figura <zfigura at codeweavers.com>
> > ---
> >   libs/vkd3d-shader/hlsl_sm4.c | 116 ++++++++++++++++++++++++++++++++++-
> >   1 file changed, 114 insertions(+), 2 deletions(-)
> >
> > diff --git a/libs/vkd3d-shader/hlsl_sm4.c b/libs/vkd3d-shader/hlsl_sm4.c
> > index 15927671..2e304c5a 100644
> > --- a/libs/vkd3d-shader/hlsl_sm4.c
> > +++ b/libs/vkd3d-shader/hlsl_sm4.c
> > @@ -591,6 +591,7 @@ static unsigned int sm4_swizzle_type(enum vkd3d_sm4_register_type type)
> >       {
> >           case VKD3D_SM4_RT_CONSTBUFFER:
> >           case VKD3D_SM4_RT_INPUT:
> > +        case VKD3D_SM4_RT_TEMP:
> >               return VKD3D_SM4_SWIZZLE_VEC4;
> >
> >           default:
> > @@ -599,6 +600,64 @@ static unsigned int sm4_swizzle_type(enum vkd3d_sm4_register_type type)
> >       }
> >   }
> >
> > +static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *reg,
> > +        unsigned int *writemask, const struct hlsl_deref *deref, const struct hlsl_type *data_type)
> > +{
> > +    const struct hlsl_ir_var *var = deref->var;
> > +
> > +    if (var->is_output_semantic)
> > +    {
> > +        bool has_idx;
> > +
> > +        if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, true, &reg->type, &has_idx))
> > +        {
> > +            if (has_idx)
> > +            {
> > +                reg->idx[0] = var->semantic.index;
> > +                reg->idx_count = 1;
> > +            }
> > +
> > +            if (reg->type == VKD3D_SM4_RT_DEPTHOUT)
> > +                reg->dim = VKD3D_SM4_DIMENSION_SCALAR;
> > +            else
> > +                reg->dim = VKD3D_SM4_DIMENSION_VEC4;
> > +            *writemask = (1u << data_type->dimx) - 1;
> > +        }
> > +        else
> > +        {
> > +            struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(deref, data_type);
> > +
> > +            assert(hlsl_reg.allocated);
> > +            reg->type = VKD3D_SM4_RT_OUTPUT;
> > +            reg->dim = VKD3D_SM4_DIMENSION_VEC4;
> > +            reg->idx[0] = hlsl_reg.id;
> > +            reg->idx_count = 1;
> > +            *writemask = hlsl_reg.writemask;
> > +        }
> > +    }
> > +    else
> > +    {
> > +        struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(deref, data_type);
> > +
> > +        assert(hlsl_reg.allocated);
> > +        reg->type = VKD3D_SM4_RT_TEMP;
> > +        reg->dim = VKD3D_SM4_DIMENSION_VEC4;
> > +        reg->idx[0] = hlsl_reg.id;
> > +        reg->idx_count = 1;
> > +        *writemask = hlsl_reg.writemask;
> > +    }
> > +}
> > +
> > +static void sm4_register_from_node(struct sm4_register *reg, unsigned int *writemask, const struct hlsl_ir_node *instr)
> > +{
> > +    assert(instr->reg.allocated);
> > +    reg->type = VKD3D_SM4_RT_TEMP;
> > +    reg->dim = VKD3D_SM4_DIMENSION_VEC4;
> > +    reg->idx[0] = instr->reg.id;
> > +    reg->idx_count = 1;
> > +    *writemask = instr->reg.writemask;
> > +}
> > +
>
> I think I would use "node" as variable name instead of "instr", so there
> is one less term equivalence to keep in the mind when reading the code.
> But it's not a big deal.

Actually we're moving away from node to instruction. Node is a
historical artifact of the original IR which was tree-based and it
doesn't quite fit nowadays.

>
> >   static uint32_t sm4_encode_register(const struct sm4_register *reg)
> >   {
> >       return (reg->type << VKD3D_SM4_REGISTER_TYPE_SHIFT)
> > @@ -785,11 +844,40 @@ static void write_sm4_ret(struct vkd3d_bytecode_buffer *buffer)
> >       write_sm4_instruction(buffer, &instr);
> >   }
> >
> > -static void write_sm4_shdr(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc)
> > +static void write_sm4_store(struct hlsl_ctx *ctx,
> > +        struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_store *store)
> > +{
> > +    const struct hlsl_ir_node *rhs = store->rhs.node;
> > +    struct sm4_instruction instr;
> > +    unsigned int writemask;
> > +
> > +    if (store->lhs.var->data_type->type == HLSL_CLASS_MATRIX)
> > +    {
> > +        hlsl_fixme(ctx, store->node.loc, "Store to a matrix variable.\n");
> > +        return;
> > +    }
> > +
> > +    memset(&instr, 0, sizeof(instr));
> > +    instr.opcode = VKD3D_SM4_OP_MOV;
> > +
> > +    sm4_register_from_deref(ctx, &instr.dst.reg, &writemask, &store->lhs, rhs->data_type);
> > +    instr.dst.writemask = hlsl_combine_writemasks(writemask, store->writemask);
> > +    instr.has_dst = 1;
> > +
> > +    sm4_register_from_node(&instr.srcs[0].reg, &writemask, rhs);
> > +    instr.srcs[0].swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), instr.dst.writemask);
> > +    instr.src_count = 1;
> > +
> > +    write_sm4_instruction(buffer, &instr);
> > +}
> > +
> > +static void write_sm4_shdr(struct hlsl_ctx *ctx,
> > +        const struct hlsl_ir_function_decl *entry_func, struct dxbc_writer *dxbc)
> >   {
> >       const struct hlsl_profile_info *profile = ctx->profile;
> >       struct vkd3d_bytecode_buffer buffer = {0};
> >       const struct hlsl_buffer *cbuffer;
> > +    const struct hlsl_ir_node *instr;
> >       const struct hlsl_ir_var *var;
> >       size_t token_count_position;
> >
> > @@ -824,6 +912,30 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc)
> >       if (ctx->temp_count)
> >           write_sm4_dcl_temps(&buffer, ctx->temp_count);
> >
> > +    LIST_FOR_EACH_ENTRY(instr, entry_func->body, struct hlsl_ir_node, entry)
> > +    {
> > +        if (instr->data_type)
> > +        {
> > +            if (instr->data_type->type == HLSL_CLASS_MATRIX)
> > +            {
> > +                FIXME("Matrix operations need to be lowered.\n");
> > +                break;
> > +            }
> > +
> > +            assert(instr->data_type->type == HLSL_CLASS_SCALAR || instr->data_type->type == HLSL_CLASS_VECTOR);
> > +        }
> > +
> > +        switch (instr->type)
> > +        {
> > +            case HLSL_IR_STORE:
> > +                write_sm4_store(ctx, &buffer, hlsl_ir_store(instr));
> > +                break;
> > +
> > +            default:
> > +                FIXME("Unhandled instruction type %s.\n", hlsl_node_type_to_string(instr->type));
> > +        }
> > +    }
> > +
> >       write_sm4_ret(&buffer);
> >
> >       set_u32(&buffer, token_count_position, bytecode_get_size(&buffer) / sizeof(uint32_t));
>
> I would split this part to a dedicated function that writes a block of
> instructions, and which we can reuse when we implement codegen for loops
> and conditions. The declaration might be something like:
>
> static void write_sm4_block(struct hlsl_ctx *ctx, struct
> vkd3d_bytecode_buffer *buffer, const struct list *instrs);
>
> Also, I would split the change in two commits: one that introduces
> write_sm4_block (including the check on matrix class) and another one
> that really implements store.

You don't want to introduce dead code though, so making that kind of
split at this point is a bit awkward.

Personally I'm generally for introducing shared helpers as they become
necessary rather than upfront. For example, if at the time of
implementing conditional or loop instructions, as it's likely, some
existing code parts can be nicely reused, a new helper can be created
by factoring out the relevant code. An upside of waiting for a second
user is that you're sure to make the helper(s) you know you need
rather than what you now think you'll need at some point in the
future.
One case where you want to split code early is when functions are
getting too big to be manageable otherwise. That doesn't seem to be
the case here though.