[PATCH] d3dx9: Improve performance and memory usage in preshader constants setting.

Matteo Bruni matteo.mystral at gmail.com
Mon May 30 16:24:43 CDT 2016


2016-05-26 20:28 GMT+02:00 Paul Gofman <gofmanp at gmail.com>:
> Signed-off-by: Paul Gofman <gofmanp at gmail.com>
> ---
>  dlls/d3dx9_36/d3dx9_private.h |  25 +++-
>  dlls/d3dx9_36/preshader.c     | 259 +++++++++++++++++++++++++++++++++++-------
>  2 files changed, 237 insertions(+), 47 deletions(-)
>
> diff --git a/dlls/d3dx9_36/d3dx9_private.h b/dlls/d3dx9_36/d3dx9_private.h
> index 9938d83..970ef2f 100644
> --- a/dlls/d3dx9_36/d3dx9_private.h
> +++ b/dlls/d3dx9_36/d3dx9_private.h
> @@ -129,14 +129,33 @@ enum pres_reg_tables
>      PRES_REGTAB_FIRST_SHADER = PRES_REGTAB_CONST,
>  };
>
> +enum const_param_copy_state_type
> +{
> +    COPY_STATE_TYPE_PARAM,
> +    COPY_STATE_TYPE_TABLE,
> +    COPY_STATE_TYPE_PARAM_OFFSET,
> +    COPY_STATE_TYPE_TABLE_OFFSET,
> +    COPY_STATE_TYPE_COPY_COUNT

The name of the last enumerator is a bit ambiguous (it might be
mistaken to be a _COUNT entry == last actual enumerator + 1). I would
either change the name a bit or move it from the last place. Although
this might be moot, see below.

> +struct d3dx_const_copy_state

I'm not entirely happy with the name, maybe
d3dx_const_param_eval_output or something in that fashion?

> @@ -811,10 +820,10 @@ static void d3dx_free_const_tab(struct d3dx_const_tab *ctab)
>  {
>      HeapFree(GetProcessHeap(), 0, ctab->inputs);
>      HeapFree(GetProcessHeap(), 0, ctab->inputs_param);
> -    if (ctab->ctab)
> -        ID3DXConstantTable_Release(ctab->ctab);
> +    HeapFree(GetProcessHeap(), 0, ctab->const_set);
>  }
>
> +
>  static void d3dx_free_preshader(struct d3dx_preshader *pres)

That extra blank line doesn't seem necessary.

> @@ -835,14 +844,181 @@ void d3dx_free_param_eval(struct d3dx_param_eval *peval)
>      HeapFree(GetProcessHeap(), 0, peval);
>  }
>
> -static HRESULT set_constants_param(struct d3dx_regstore *rs, struct d3dx_const_tab *const_tab,
> -        D3DXHANDLE hc, struct d3dx_parameter *param)
> +static void set_constants(struct d3dx_regstore *rs, struct d3dx_const_tab *const_tab)
> +{
> +    unsigned int i;
> +    struct d3dx_const_copy_state set_state;
> +
> +    memset(&set_state, 0, sizeof(set_state));
> +    for (i = 0; i < const_tab->const_set_count; ++i)
> +    {
> +        switch (const_tab->const_set[i].copy_state_type)
> +        {
> +            case COPY_STATE_TYPE_PARAM:
> +                set_state.param = const_tab->const_set[i].state_change.param;
> +                break;
> +            case COPY_STATE_TYPE_TABLE:
> +                set_state.table = const_tab->const_set[i].state_change.value;
> +                break;
> +            case COPY_STATE_TYPE_PARAM_OFFSET:
> +                set_state.param_offset = const_tab->const_set[i].state_change.value;
> +                break;
> +            case COPY_STATE_TYPE_TABLE_OFFSET:
> +                set_state.table_offset = const_tab->const_set[i].state_change.value;
> +                break;
> +            case COPY_STATE_TYPE_COPY_COUNT:
> +            {
> +                unsigned int count;
> +                struct d3dx_parameter *param;
> +                enum pres_value_type table_type;
> +                unsigned int j;
> +
> +                count = const_tab->const_set[i].state_change.value;
> +                param = set_state.param;
> +                table_type = table_info[set_state.table].type;
> +                if ((set_state.table_offset + count - 1) / table_info[set_state.table].reg_component_count
> +                        >= rs->table_sizes[set_state.table])
> +                {
> +                    FIXME("Insufficient table space allocated.\n");
> +                    break;
> +                }
> +                if ((param->type == D3DXPT_FLOAT && table_type == PRES_VT_FLOAT)
> +                        || (param->type == D3DXPT_INT && table_type == PRES_VT_INT)
> +                        || (param->type == D3DXPT_BOOL && table_type == PRES_VT_BOOL))
> +                {
> +                    regstore_set_values(rs, set_state.table, (unsigned int *)param->data + set_state.param_offset,
> +                            set_state.table_offset, count);
> +                }
> +                else
> +                {
> +                    for (j = 0; j < count; ++j)
> +                    {
> +                        unsigned int out;
> +                        unsigned int *in = (unsigned int *)param->data + set_state.param_offset + j;
> +                        switch (table_info[set_state.table].type)
> +                        {
> +                            case PRES_VT_FLOAT: set_number(&out, D3DXPT_FLOAT, in, param->type); break;
> +                            case PRES_VT_INT: set_number(&out, D3DXPT_INT, in, param->type); break;
> +                            case PRES_VT_BOOL: set_number(&out, D3DXPT_BOOL, in, param->type); break;
> +                            default:
> +                                FIXME("Unexpected type %#x.\n", table_info[set_state.table].type);
> +                                break;
> +                        }
> +                        regstore_set_values(rs, set_state.table, &out, set_state.table_offset + j, 1);
> +                    }
> +                }
> +                set_state.param_offset += count;
> +                set_state.table_offset += count;
> +                break;
> +            }
> +        }
> +    }
> +}

This seems overly complicated and fragile.
Instead of this kind of "5 instructions bytecode" isn't it easier to
just store and then lookup a struct with all the info you need? I
guess the end result would be something akin to storing an array of
struct d3dx_const_copy_state in place of struct d3dx_const_param_set.
I don't know how the code would exactly look like but I would start by
moving building all the struct d3dx_const_copy_param to init time. BTW
the coalescing you currently do in add_const_set() could be
potentially done as a separate pass after you generate all the
"d3dx_const_copy_state-like" structures (but still at init time),
which might be simpler.

Does this make sense? Am I missing something?

> +static HRESULT add_const_set_value(struct d3dx_const_tab *const_tab, unsigned int value,
> +        unsigned int *current_value, enum const_param_copy_state_type type)
> +{
> +    struct d3dx_const_param_set set;
> +    HRESULT hr;
> +
> +    if (*current_value != value)
> +    {
> +        *current_value = value;
> +        set.copy_state_type = type;
> +        set.state_change.value = value;
> +        if (FAILED(hr=append_const_set(const_tab, &set)))

Whitespace.

> @@ -931,21 +1107,17 @@ static HRESULT set_constants_param(struct d3dx_regstore *rs, struct d3dx_const_t
>      major_stride = max(minor, table_info[table].reg_component_count);
>      n = min(major * major_stride,
>              desc.RegisterCount * table_info[table].reg_component_count + major_stride - 1) / major_stride;
> +
>      for (i = 0; i < n; ++i)
>      {
>          for (j = 0; j < minor; ++j)
>          {
> -            unsigned int out;
> -            unsigned int *in;
>              unsigned int offset;
>
>              offset = start_offset + i * major_stride + j;
> -            if (offset / table_info[table].reg_component_count >= rs->table_sizes[table])
> -            {
> -                if (table_info[table].reg_component_count != 1)
> -                    FIXME("Output offset exceeds table size, name %s, component %u.\n", desc.Name, i);
> +            if ((offset - start_offset) / table_info[table].reg_component_count >= desc.RegisterCount)
>                  break;

Maybe keeping a WARN or something makes sense here?



More information about the wine-devel mailing list