[v2 PATCH 7/7] d3d10/effect: Add initial support for indexing expressions.

Nikolay Sivov nsivov at codeweavers.com
Fri Nov 5 12:09:20 CDT 2021



On 11/5/21 7:32 PM, Matteo Bruni wrote:
> On Mon, Nov 1, 2021 at 11:52 AM Nikolay Sivov <nsivov at codeweavers.com> wrote:
>> Signed-off-by: Nikolay Sivov <nsivov at codeweavers.com>
>> ---
>>  dlls/d3d10/effect.c       | 556 +++++++++++++++++++++++++++++++++++++-
>>  dlls/d3d10/tests/effect.c | 114 ++++++++
>>  2 files changed, 669 insertions(+), 1 deletion(-)
>>
>> diff --git a/dlls/d3d10/effect.c b/dlls/d3d10/effect.c
>> index 78a3d0c386a..3ae2ad4f481 100644
>> --- a/dlls/d3d10/effect.c
>> +++ b/dlls/d3d10/effect.c
>> @@ -30,6 +30,9 @@ WINE_DEFAULT_DEBUG_CHANNEL(d3d10);
>>      ((DWORD)(ch2) << 16) | ((DWORD)(ch3) << 24 ))
>>  #define TAG_DXBC MAKE_TAG('D', 'X', 'B', 'C')
>>  #define TAG_FX10 MAKE_TAG('F', 'X', '1', '0')
>> +#define TAG_FXLC MAKE_TAG('F', 'X', 'L', 'C')
>> +#define TAG_CLI4 MAKE_TAG('C', 'L', 'I', '4')
>> +#define TAG_CTAB MAKE_TAG('C', 'T', 'A', 'B')
>>
>>  #define D3D10_FX10_TYPE_COLUMN_SHIFT    11
>>  #define D3D10_FX10_TYPE_COLUMN_MASK     (0x7 << D3D10_FX10_TYPE_COLUMN_SHIFT)
>> @@ -180,6 +183,89 @@ static enum d3d10_effect_container_type get_var_container_type(const struct d3d1
>>      }
>>  }
>>
>> +struct preshader_instr
>> +{
>> +    unsigned int comp_count : 16;
>> +    unsigned int reserved   :  4;
>> +    unsigned int opcode     : 11;
>> +    unsigned int scalar     :  1;
>> +};
> I see now what's going on with this i.e. this is used to represent
> instructions as encoded in the bytecode but also after changing
> "their" opcode with "ours". Is that just to speed up execution?

The only reason is to be able to use index access, and not to look up by
opcode value.

For practical purposes, it's definitely fine to keep it even more simple
for initial version,
and not patch original field. The only real world use of this that I saw
so far was a single "ftou(var)" call.

I'd prefer smaller initial version to something too complex to matter in
practice.

>
> I find this kind of setup a bit ugly. I do wonder what it would look
> like if this went all the way e.g. what about using this structure
> only for parsing and a separate "unpacked" struct preshader_instr for
> execution? Probably a fixed-size one, including the operands (and
> operands count) right in the same structure. It would be slightly more
> tricky at parse time (e.g. you won't know how much memory you'll need
> before parsing the whole preshader) while making the execution
> slightly more straightforward.
> Thinking about it, what I'm suggesting is probably not a good tradeoff
> and I'd be hard pressed to ask you to make this type of change anyway.
> I guess I'm mostly curious if you tried something like that (or
> otherwise something different) before settling on the current
> solution.

I haven't tried, but I looked at d3dx9 variant, and that does some
unnecessary work I think,
by that kind of repacking of original blob to some internal structures.
We can do it either way,
but I don't see a point of inventing different structure, that doesn't
offer anything comparing to raw blob.

>
>> +enum preshader_op
>> +{
>> +    D3D10_PRESHADER_OP_FTOU = 0,
>> +    D3D10_PRESHADER_OP_ADD,
>> +    D3D10_PRESHADER_OP_LAST_OP,
> D3D10_PRESHADER_OP_COUNT maybe? Also not great...
> Maybe just get rid of it and use ~0u or something for the sentinel value?

Commented above, we could get rid of this entirely for now and bsearch
(or not, because there is only 2 ops for now, and even ADD was added for
testing purposes).

>
>> +};
>> +
>> +struct preshader_op_info
>> +{
>> +    unsigned short idx;
>> +    unsigned short opcode;
>> +    char name[8];
>> +    pres_op_func func;
>> +};
> I already mentioned that this is a bit confusing: idx is "our" opcode,
> while opcode is "dxbc"'s opcode. Just getting rid of idx, as you wrote
> in your previous reply, works for me.

Ok.

>
>> +enum d3d10_reg_table_type
>> +{
>> +    D3D10_REG_TABLE_CONSTANTS = 1,
>> +    D3D10_REG_TABLE_CB = 2,
>> +    D3D10_REG_TABLE_RESULT = 4,
>> +    D3D10_REG_TABLE_TEMP = 7,
>> +    D3D10_REG_TABLE_LAST,
> For this one D3D10_REG_TABLE_COUNT works.
>
>> +};
>> +
>> +struct d3d10_effect_preshader
>> +{
>> +    struct d3d10_reg_table reg_tables[D3D10_REG_TABLE_LAST];
>> +    ID3D10Blob *code;
>> +
>> +    struct d3d10_ctab_var *vars;
>> +    unsigned int vars_count;
>> +};
>> +
>> +struct d3d10_preshader_parse_context
>> +{
>> +    struct d3d10_effect_preshader *preshader;
>> +    struct d3d10_effect *effect;
>> +};
>> +
>>  struct d3d10_effect_prop_dependency
>>  {
>>      unsigned int id;
>> @@ -192,11 +278,117 @@ struct d3d10_effect_prop_dependency
>>              struct d3d10_effect_variable *v;
>>              unsigned int offset;
>>          } var;
>> +        struct
>> +        {
>> +            struct d3d10_effect_variable *v;
>> +            struct d3d10_effect_preshader index;
>> +        } index_expr;
>>      } u;
>>  };
> I still think that we can get rid of the "u" name for the union. It
> doesn't need to happen in this patch though.
Will do.
>
>> +static HRESULT d3d10_effect_preshader_eval(struct d3d10_effect_preshader *p)
>> +{
>> +    unsigned int i, j, regt, offset, instr_count, input_count;
>> +    const DWORD *ip = ID3D10Blob_GetBufferPointer(p->code);
>> +    float *dst, *args[4], *retval;
>> +    struct preshader_instr ins;
>> +
>> +    dst = d3d10_effect_preshader_get_reg_ptr(p, D3D10_REG_TABLE_RESULT, 0);
>> +    memset(dst, 0, sizeof(float) * p->reg_tables[D3D10_REG_TABLE_RESULT].count);
>> +
>> +    /* Update constant buffer */
>> +    dst = d3d10_effect_preshader_get_reg_ptr(p, D3D10_REG_TABLE_CB, 0);
>> +    for (i = 0; i < p->vars_count; ++i)
>> +    {
>> +        struct d3d10_ctab_var *v = &p->vars[i];
>> +        memcpy(dst + v->offset, v->v->buffer->u.buffer.local_buffer, v->length * sizeof(*dst))
>> +    }
>> +
>> +    instr_count = *ip++;
>> +
>> +    for (i = 0; i < instr_count; ++i)
>> +    {
>> +        *(DWORD *)&ins = *ip++;
>> +        input_count = *ip++;
>> +
>> +        if (input_count > ARRAY_SIZE(args))
>> +        {
>> +            FIXME("Unexpected argument count %u.\n", input_count);
>> +            return E_FAIL;
>> +        }
>> +
>> +        /* Arguments */
>> +        for (j = 0; j < input_count; ++j)
>> +        {
>> +            ip++; /* TODO: currently ignored field */
>> +            regt = *ip++;
>> +            offset = *ip++;
>> +
>> +            args[j] = d3d10_effect_preshader_get_reg_ptr(p, regt, offset);
>> +        }
>> +
>> +        ip++; /* TODO: currently ignored field */
>> +        regt = *ip++;
>> +        offset = *ip++;
>> +        retval = d3d10_effect_preshader_get_reg_ptr(p, regt, offset);
>> +
>> +        *retval = preshader_ops[ins.opcode].func(args, input_count);
>> +    }
>> +
>> +    return S_OK;
>> +}
> Maybe add to the TODO comments an indication of what the ignored field
> is? If it makes sense.
Sure, it means the same thing as in d3dx9 - indexed access.
>
>> +
>>  static void d3d10_effect_clear_prop_dependencies(struct d3d10_effect_prop_dependencies *d)
>>  {
>> +    unsigned int i;
>> +
>> +    for (i = 0; i < d->count; ++i)
>> +    {
>> +        struct d3d10_effect_prop_dependency *dep = &d->entries[i];
>> +        switch (dep->operation)
>> +        {
>> +            case D3D10_EOO_INDEX_EXPRESSION:
>> +                d3d10_effect_preshader_clear(&dep->u.index_expr.index);
>> +                break;
>> +            default:
>> +                ;
>> +        }
>> +    }
>>      heap_free(d->entries);
>>      memset(d, 0, sizeof(*d));
>>  }
> I think you can drop the default: entirely without triggering warnings.
>
>> @@ -546,6 +741,38 @@ static void d3d10_effect_update_dependent_props(struct d3d10_effect_prop_depende
>>
>>                  break;
>>
>> +            case D3D10_EOO_INDEX_EXPRESSION:
>> +
>> +                v = d->u.index_expr.v;
>> +
>> +                if (FAILED(hr = d3d10_effect_preshader_eval(&d->u.index_expr.index)))
>> +                {
>> +                    WARN("Failed to evaluate index expression, hr %#x.\n", hr);
>> +                    return;
>> +                }
>> +
>> +                variable_idx = *d->u.index_expr.index.reg_tables[D3D10_REG_TABLE_RESULT].u.dword;
>> +
>> +                if (variable_idx >= v->type->element_count)
>> +                {
>> +                    WARN("Expression evaluated to invalid index value %u, array %s of size %u.\n",
>> +                            variable_idx, debugstr_a(v->name), v->type->element_count);
>> +                    return;
>> +                }
>> +
>> +                switch (property_info->type)
>> +                {
>> +                    case D3D10_SVT_VERTEXSHADER:
>> +                    case D3D10_SVT_PIXELSHADER:
>> +                    case D3D10_SVT_GEOMETRYSHADER:
>> +                        ((void **)dst)[d->idx] = v;
>> +                        *dst_index = variable_idx;
>> +                        break;
>> +                    default:
>> +                        ((void **)dst)[d->idx] = &v->elements[variable_idx];
>> +                }
>> +                break;
>> +
>>              default:
>>                  FIXME("Unsupported property update for %u.\n", d->operation);
>>          }
> Again mostly wondering: does D3D10_EOO_INDEX_EXPRESSION apply to other
> object types, like textures?
> Similarly, have you seen any case where d->idx is not 0 for shaders?
> What does it even mean a non-zero idx in that case?

That's a good question. I copied this from other similar places, but you
might be right, and it's meaningless here, because it's always 0.

There are no object-typed properties that are arrays, and indexed
expression I believe is used only for arrays of objects,
e.g. d->idx (== 2) would matter for cases like this

BlendState blend_state { RenderTargetWriteMask[2] = <expression> };

But that would compile to another type of operation - value expression,
you can't use buffer numeric variable array as right hand value here, like

RenderTargetWriteMask[2] = value[idx_var + 1];

it would be expanded to full expression returning result and not to
value[<expression>], with expression returning index.

I have to test that again, but most likely it's possible to get rid of
d->idx there.

>
>> +static HRESULT parse_fx10_fxlc(void *ctx, const char *data, unsigned int data_size)
>> +{
>> +    struct d3d10_preshader_parse_context *context = ctx;
>> +    struct d3d10_effect_preshader *p = context->preshader;
>> +    uint32_t ins_count;
>> +    const char *ptr;
>> +    unsigned int i;
>> +    HRESULT hr;
>> +
>> +    if (data_size % sizeof(DWORD))
>> +    {
>> +        WARN("FXLC size misaligned %u.\n", data_size);
>> +        return E_FAIL;
>> +    }
>> +
>> +    /* Parse through bytecode copy, so we can patch opcodes. */
>> +    if (FAILED(hr = D3DCreateBlob(data_size, &p->code)))
>> +        return hr;
>> +    memcpy(ID3D10Blob_GetBufferPointer(p->code), data, data_size);
>> +
>> +    ptr = ID3D10Blob_GetBufferPointer(p->code);
> Not a big deal, but you could move up this assignment to avoid
> duplicating the GetBufferPointer() call.
>
>> +    read_dword(&ptr, &ins_count);
>> +    TRACE("%u instructions.\n", ins_count);
>> +
>> +    for (i = 0; i < ins_count; ++i)
>> +    {
>> +        if (FAILED(hr = parse_fx10_preshader_instr(p, &ptr)))
>> +        {
>> +            WARN("Failed to parse instruction %u.\n", i);
>> +            return hr;
>> +        }
>> +    }
>> +
>> +    if (FAILED(hr = d3d10_reg_table_allocate(&p->reg_tables[D3D10_REG_TABLE_RESULT],
>> +            p->reg_tables[D3D10_REG_TABLE_RESULT].count))) return hr;
>> +    if (FAILED(hr = d3d10_reg_table_allocate(&p->reg_tables[D3D10_REG_TABLE_TEMP],
>> +            p->reg_tables[D3D10_REG_TABLE_TEMP].count))) return hr;
> All the calls to d3d10_reg_table_allocate() but one pass table->count
> as count value, which is then initialized again with the same value by
> the function. That works but it doesn't look particularly nice.
>
>> +static HRESULT parse_fx10_ctab(void *ctx, const char *data, unsigned int data_size)
>> +{
>> +    struct d3d10_preshader_parse_context *context = ctx;
>> +    struct d3d10_effect_preshader *p = context->preshader;
>> +    struct ctab_header
>> +    {
>> +        DWORD size;
>> +        DWORD creator;
>> +        DWORD version;
>> +        DWORD constants;
>> +        DWORD constantinfo;
>> +        DWORD flags;
>> +        DWORD target;
>> +    } header;
>> +    struct ctab_const_info
>> +    {
>> +        DWORD name;
>> +        WORD register_set;
>> +        WORD register_index;
>> +        WORD register_count;
>> +        WORD reserved;
>> +        DWORD typeinfo;
>> +        DWORD default_value;
>> +    } *info;
>> +    unsigned int i, cb_reg_count = 0;
>> +    const char *ptr = data;
>> +    const char *name;
>> +    size_t name_len;
>> +    HRESULT hr;
>> +
>> +    if (data_size < sizeof(header))
>> +    {
>> +        WARN("Invalid constant table size %u.\n", data_size);
>> +        return E_FAIL;
>> +    }
>> +
>> +    read_dword(&ptr, &header.size);
>> +    read_dword(&ptr, &header.creator);
>> +    read_dword(&ptr, &header.version);
>> +    read_dword(&ptr, &header.constants);
>> +    read_dword(&ptr, &header.constantinfo);
>> +    read_dword(&ptr, &header.flags);
>> +    read_dword(&ptr, &header.target);
>> +
>> +    if (!require_space(header.constantinfo, header.constants, sizeof(*info) / sizeof(DWORD),
>> +            data_size))
> I think you want plain sizeof(*info) there, without any division.
>
>> diff --git a/dlls/d3d10/tests/effect.c b/dlls/d3d10/tests/effect.c
>> index da644d8d9db..c2ed9c6071e 100644
>> --- a/dlls/d3d10/tests/effect.c
>> +++ b/dlls/d3d10/tests/effect.c
>> @@ -7801,6 +7801,119 @@ static void test_effect_dynamic_numeric_field(void)
>>      ok(!refcount, "Device has %u references left.\n", refcount);
>>  }
>>
>> +#if 0
>> +float4 g_var;
>> +PixelShader ps_array[2];
>> +VertexShader vs_array[2];
>> +technique10 tech
>> +{
>> +    pass p0
>> +    {
>> +        SetPixelShader( ps_array[g_var.z] );
>> +        SetVertexShader( vs_array[g_var.x + 0.1f] );
>> +    }
>> +}
>> +#endif
>> +static DWORD fx_test_index_expression[] =
>> +{
>> +    0x43425844, 0x83787296, 0xf866b7a9, 0x7b56930c, 0xdc9d061a, 0x00000001, 0x000003e9, 0x00000001,
>> +    0x00000024, 0x30315846, 0x000003bd, 0xfeff1001, 0x00000001, 0x00000001, 0x00000002, 0x00000000,
>> +    0x00000000, 0x00000000, 0x00000001, 0x000002cd, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
>> +    0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x00000000, 0x6f6c4724,
>> +    0x736c6162, 0x6f6c6600, 0x00347461, 0x0000000d, 0x00000001, 0x00000000, 0x00000010, 0x00000010,
>> +    0x00000010, 0x0000210a, 0x61765f67, 0x69500072, 0x536c6578, 0x65646168, 0x00360072, 0x00020000,
>> +    0x00020000, 0x00000000, 0x00000000, 0x00000000, 0x00050000, 0x73700000, 0x7272615f, 0x56007961,
>> +    0x65747265, 0x61685378, 0x00726564, 0x00000067, 0x00000002, 0x00000002, 0x00000000, 0x00000000,
>> +    0x00000000, 0x00000006, 0x615f7376, 0x79617272, 0x63657400, 0x30700068, 0x0000ec00, 0x42584400,
>> +    0x990a9743, 0xd17834e6, 0x40de477e, 0xf476a79f, 0x00000101, 0x0000ec00, 0x00000300, 0x00002c00,
>> +    0x0000a800, 0x0000b400, 0x41544300, 0x00007442, 0x00001c00, 0x00004b00, 0x58040000, 0x00000146,
>> +    0x00001c00, 0x00010000, 0x00004800, 0x00003000, 0x00000200, 0x00000100, 0x00003800, 0x00000000,
>> +    0x765f6700, 0xab007261, 0x030001ab, 0x04000100, 0x00000100, 0x00000000, 0x00787400, 0x7263694d,
>> +    0x666f736f, 0x52282074, 0x4c482029, 0x53204c53, 0x65646168, 0x6f432072, 0x6c69706d, 0x31207265,
>> +    0x00312e30, 0x494c43ab, 0x00000434, 0x00000000, 0x4c584600, 0x00003043, 0x00000100, 0x30000100,
>> +    0x00000113, 0x00000000, 0x00000200, 0x00000200, 0x00000000, 0x00000400, 0x00000000, 0xf0f0f000,
>> +    0x0f0f0ff0, 0x00ffff0f, 0x00005e00, 0x0000a100, 0x00012800, 0x42584400, 0x78de2e43, 0x31414e7a,
>> +    0xf69158cd, 0x416c97b6, 0x00000192, 0x00012800, 0x00000300, 0x00002c00, 0x0000a800, 0x0000c400,
>> +    0x41544300, 0x00007442, 0x00001c00, 0x00004b00, 0x58040000, 0x00000146, 0x00001c00, 0x00010000,
>> +    0x00004800, 0x00003000, 0x00000200, 0x00000100, 0x00003800, 0x00000000, 0x765f6700, 0xab007261,
>> +    0x030001ab, 0x04000100, 0x00000100, 0x00000000, 0x00787400, 0x7263694d, 0x666f736f, 0x52282074,
>> +    0x4c482029, 0x53204c53, 0x65646168, 0x6f432072, 0x6c69706d, 0x31207265, 0x00312e30, 0x494c43ab,
>> +    0x00001434, 0x00000400, 0xcccccd00, 0x0000003d, 0x00000000, 0x00000000, 0x4c584600, 0x00005c43,
>> +    0x00000200, 0x40000100, 0x00000220, 0x00000000, 0x00000200, 0x00000000, 0x00000000, 0x00000100,
>> +    0x00000000, 0x00000000, 0x00000700, 0x00000000, 0x30000100, 0x00000113, 0x00000000, 0x00000700,
>> +    0x00000000, 0x00000000, 0x00000400, 0x00000000, 0xf0f0f000, 0x0f0f0ff0, 0x00ffff0f, 0x00009000,
>> +    0x00019900, 0x00000400, 0x00001000, 0x00000000, 0x00000100, 0xffffff00, 0x000000ff, 0x00003000,
>> +    0x00001400, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00005e00, 0x00004200,
>> +    0x00000000, 0xffffff00, 0x000000ff, 0x00000000, 0x00000000, 0x00009000, 0x00007400, 0x00000000,
>> +    0xffffff00, 0x000000ff, 0x00000000, 0x00000000, 0x00009900, 0x00000100, 0x00000000, 0x00009e00,
>> +    0x00000200, 0x00000000, 0x00000700, 0x00000000, 0x00000500, 0x00019100, 0x00000600, 0x00000000,
>> +    0x00000500, 0x0002c500, 0x00000000,
>> +};
>> +
>> +static void test_effect_index_expression(void)
>> +{
>> +    ID3D10EffectTechnique *tech;
>> +    ID3D10EffectPass *pass;
>> +    ID3D10Effect *effect;
>> +    ID3D10Device *device;
>> +    ULONG refcount;
>> +    HRESULT hr;
>> +    D3D10_PASS_SHADER_DESC shader_desc;
>> +    ID3D10EffectVectorVariable *vector;
>> +    ID3D10EffectVariable *v;
>> +    float val[4];
> Nitpick, order of the declarations.
> Nice test otherwise.




More information about the wine-devel mailing list