[PATCH] d3dx9: Improve performance and memory usage in preshader constants setting.

Paul Gofman gofmanp at gmail.com
Thu May 26 13:28:21 CDT 2016


Signed-off-by: Paul Gofman <gofmanp at gmail.com>
---
 dlls/d3dx9_36/d3dx9_private.h |  25 +++-
 dlls/d3dx9_36/preshader.c     | 259 +++++++++++++++++++++++++++++++++++-------
 2 files changed, 237 insertions(+), 47 deletions(-)

diff --git a/dlls/d3dx9_36/d3dx9_private.h b/dlls/d3dx9_36/d3dx9_private.h
index 9938d83..970ef2f 100644
--- a/dlls/d3dx9_36/d3dx9_private.h
+++ b/dlls/d3dx9_36/d3dx9_private.h
@@ -129,14 +129,33 @@ enum pres_reg_tables
     PRES_REGTAB_FIRST_SHADER = PRES_REGTAB_CONST,
 };
 
+enum const_param_copy_state_type
+{
+    COPY_STATE_TYPE_PARAM,
+    COPY_STATE_TYPE_TABLE,
+    COPY_STATE_TYPE_PARAM_OFFSET,
+    COPY_STATE_TYPE_TABLE_OFFSET,
+    COPY_STATE_TYPE_COPY_COUNT
+};
+
+struct d3dx_const_param_set
+{
+    enum const_param_copy_state_type copy_state_type;
+    union
+    {
+        struct d3dx_parameter *param;
+        unsigned int value;
+    } state_change;
+};
+
 struct d3dx_const_tab
 {
     unsigned int input_count;
     D3DXCONSTANT_DESC *inputs;
     struct d3dx_parameter **inputs_param;
-    ID3DXConstantTable *ctab;
-    /* TODO: do not keep input constant structure
-       (use it only at the parse stage) */
+    unsigned int const_set_count;
+    unsigned int const_set_size;
+    struct d3dx_const_param_set *const_set;
     const enum pres_reg_tables *regset2table;
 };
 
diff --git a/dlls/d3dx9_36/preshader.c b/dlls/d3dx9_36/preshader.c
index 78f3bdb..6074dff 100644
--- a/dlls/d3dx9_36/preshader.c
+++ b/dlls/d3dx9_36/preshader.c
@@ -206,6 +206,13 @@ struct d3dx_pres_ins
     struct d3dx_pres_operand output;
 };
 
+struct d3dx_const_copy_state
+{
+    struct d3dx_parameter *param;
+    unsigned int table;
+    unsigned int param_offset, table_offset;
+};
+
 static unsigned int get_reg_offset(unsigned int table, unsigned int offset)
 {
     return offset / table_info[table].reg_component_count;
@@ -213,6 +220,8 @@ static unsigned int get_reg_offset(unsigned int table, unsigned int offset)
 
 #define PRES_BITMASK_BLOCK_SIZE (sizeof(unsigned int) * 8)
 
+static HRESULT init_set_constants(struct d3dx_const_tab *const_tab, ID3DXConstantTable *ctab);
+
 static HRESULT regstore_alloc_table(struct d3dx_regstore *rs, unsigned int table)
 {
     unsigned int size;
@@ -485,7 +494,6 @@ static HRESULT get_constants_desc(unsigned int *byte_code, struct d3dx_const_tab
     unsigned int i;
 
     out->inputs = cdesc = NULL;
-    out->ctab = NULL;
     out->inputs_param = NULL;
     out->input_count = 0;
     inputs_param = NULL;
@@ -529,9 +537,9 @@ static HRESULT get_constants_desc(unsigned int *byte_code, struct d3dx_const_tab
     out->input_count = desc.Constants;
     out->inputs = cdesc;
     out->inputs_param = inputs_param;
-    out->ctab = ctab;
-    return D3D_OK;
-
+    hr = init_set_constants(out, ctab);
+    ID3DXConstantTable_Release(ctab);
+    return hr;
 err_out:
     HeapFree(GetProcessHeap(), 0, cdesc);
     HeapFree(GetProcessHeap(), 0, inputs_param);
@@ -682,6 +690,8 @@ static HRESULT parse_preshader(struct d3dx_preshader *pres, unsigned int *ptr, u
         p = ptr_next;
     }
 
+    pres->inputs.regset2table = pres_regset2table;
+
     saved_word = *ptr;
     *ptr = 0xfffe0000;
     hr = get_constants_desc(ptr, &pres->inputs, base);
@@ -689,8 +699,6 @@ static HRESULT parse_preshader(struct d3dx_preshader *pres, unsigned int *ptr, u
     if (FAILED(hr))
         return hr;
 
-    pres->inputs.regset2table = pres_regset2table;
-
     pres->regs.table_sizes[PRES_REGTAB_IMMED] = const_count;
 
     for (i = 0; i < pres->ins_count; ++i)
@@ -797,6 +805,7 @@ void d3dx_create_param_eval(struct d3dx9_base_effect *base_effect, void *byte_co
             dump_registers(&peval->shader_inputs);
         }
     }
+
     *peval_out = peval;
     TRACE("Created parameter evaluator %p.\n", *peval_out);
     return;
@@ -811,10 +820,10 @@ static void d3dx_free_const_tab(struct d3dx_const_tab *ctab)
 {
     HeapFree(GetProcessHeap(), 0, ctab->inputs);
     HeapFree(GetProcessHeap(), 0, ctab->inputs_param);
-    if (ctab->ctab)
-        ID3DXConstantTable_Release(ctab->ctab);
+    HeapFree(GetProcessHeap(), 0, ctab->const_set);
 }
 
+
 static void d3dx_free_preshader(struct d3dx_preshader *pres)
 {
     HeapFree(GetProcessHeap(), 0, pres->ins);
@@ -835,14 +844,181 @@ void d3dx_free_param_eval(struct d3dx_param_eval *peval)
     HeapFree(GetProcessHeap(), 0, peval);
 }
 
-static HRESULT set_constants_param(struct d3dx_regstore *rs, struct d3dx_const_tab *const_tab,
-        D3DXHANDLE hc, struct d3dx_parameter *param)
+static void set_constants(struct d3dx_regstore *rs, struct d3dx_const_tab *const_tab)
+{
+    unsigned int i;
+    struct d3dx_const_copy_state set_state;
+
+    memset(&set_state, 0, sizeof(set_state));
+    for (i = 0; i < const_tab->const_set_count; ++i)
+    {
+        switch (const_tab->const_set[i].copy_state_type)
+        {
+            case COPY_STATE_TYPE_PARAM:
+                set_state.param = const_tab->const_set[i].state_change.param;
+                break;
+            case COPY_STATE_TYPE_TABLE:
+                set_state.table = const_tab->const_set[i].state_change.value;
+                break;
+            case COPY_STATE_TYPE_PARAM_OFFSET:
+                set_state.param_offset = const_tab->const_set[i].state_change.value;
+                break;
+            case COPY_STATE_TYPE_TABLE_OFFSET:
+                set_state.table_offset = const_tab->const_set[i].state_change.value;
+                break;
+            case COPY_STATE_TYPE_COPY_COUNT:
+            {
+                unsigned int count;
+                struct d3dx_parameter *param;
+                enum pres_value_type table_type;
+                unsigned int j;
+
+                count = const_tab->const_set[i].state_change.value;
+                param = set_state.param;
+                table_type = table_info[set_state.table].type;
+                if ((set_state.table_offset + count - 1) / table_info[set_state.table].reg_component_count
+                        >= rs->table_sizes[set_state.table])
+                {
+                    FIXME("Insufficient table space allocated.\n");
+                    break;
+                }
+                if ((param->type == D3DXPT_FLOAT && table_type == PRES_VT_FLOAT)
+                        || (param->type == D3DXPT_INT && table_type == PRES_VT_INT)
+                        || (param->type == D3DXPT_BOOL && table_type == PRES_VT_BOOL))
+                {
+                    regstore_set_values(rs, set_state.table, (unsigned int *)param->data + set_state.param_offset,
+                            set_state.table_offset, count);
+                }
+                else
+                {
+                    for (j = 0; j < count; ++j)
+                    {
+                        unsigned int out;
+                        unsigned int *in = (unsigned int *)param->data + set_state.param_offset + j;
+                        switch (table_info[set_state.table].type)
+                        {
+                            case PRES_VT_FLOAT: set_number(&out, D3DXPT_FLOAT, in, param->type); break;
+                            case PRES_VT_INT: set_number(&out, D3DXPT_INT, in, param->type); break;
+                            case PRES_VT_BOOL: set_number(&out, D3DXPT_BOOL, in, param->type); break;
+                            default:
+                                FIXME("Unexpected type %#x.\n", table_info[set_state.table].type);
+                                break;
+                        }
+                        regstore_set_values(rs, set_state.table, &out, set_state.table_offset + j, 1);
+                    }
+                }
+                set_state.param_offset += count;
+                set_state.table_offset += count;
+                break;
+            }
+        }
+    }
+}
+
+#define INITIAL_CONST_SET_SIZE 16
+
+static HRESULT append_const_set(struct d3dx_const_tab *const_tab, struct d3dx_const_param_set *set)
+{
+    if (const_tab->const_set_count >= const_tab->const_set_size)
+    {
+        unsigned int new_size;
+        struct d3dx_const_param_set *new_alloc;
+
+        if (!const_tab->const_set_size)
+        {
+            new_size = INITIAL_CONST_SET_SIZE;
+            new_alloc = HeapAlloc(GetProcessHeap(), 0, sizeof(*const_tab->const_set) * new_size);
+            if (!new_alloc)
+            {
+                ERR("Out of memory.\n");
+                return E_OUTOFMEMORY;
+            }
+        }
+        else
+        {
+            new_size = const_tab->const_set_size * 2;
+            new_alloc = HeapReAlloc(GetProcessHeap(), 0, const_tab->const_set,
+                    sizeof(*const_tab->const_set) * new_size);
+            if (!new_alloc)
+            {
+                ERR("Out of memory.\n");
+                return E_OUTOFMEMORY;
+            }
+        }
+        const_tab->const_set = new_alloc;
+        const_tab->const_set_size = new_size;
+    }
+    const_tab->const_set[const_tab->const_set_count++] = *set;
+    return D3D_OK;
+}
+
+static HRESULT add_const_set_value(struct d3dx_const_tab *const_tab, unsigned int value,
+        unsigned int *current_value, enum const_param_copy_state_type type)
+{
+    struct d3dx_const_param_set set;
+    HRESULT hr;
+
+    if (*current_value != value)
+    {
+        *current_value = value;
+        set.copy_state_type = type;
+        set.state_change.value = value;
+        if (FAILED(hr=append_const_set(const_tab, &set)))
+            return hr;
+    }
+    return D3D_OK;
+}
+
+static HRESULT add_const_set(struct d3dx_const_tab *const_tab, struct d3dx_const_copy_state *set_state,
+        struct d3dx_parameter *param, unsigned int table, unsigned int param_offset,
+        unsigned int table_offset)
+{
+    struct d3dx_const_param_set set;
+    HRESULT hr;
+
+    if (set_state->param == param && set_state->table == table
+            && set_state->param_offset == param_offset && set_state->table_offset == table_offset)
+    {
+        set_state->param_offset++;
+        set_state->table_offset++;
+        const_tab->const_set[const_tab->const_set_count - 1].state_change.value++;
+        return D3D_OK;
+    }
+    if (set_state->param != param)
+    {
+        set_state->param = param;
+        set.copy_state_type = COPY_STATE_TYPE_PARAM;
+        set.state_change.param = param;
+        if (FAILED(hr=append_const_set(const_tab, &set)))
+            return hr;
+    }
+    if (FAILED(hr = add_const_set_value(const_tab, table,
+            &set_state->table, COPY_STATE_TYPE_TABLE)))
+        return hr;
+    if (FAILED(hr = add_const_set_value(const_tab, param_offset,
+            &set_state->param_offset, COPY_STATE_TYPE_PARAM_OFFSET)))
+        return hr;
+    if (FAILED(hr = add_const_set_value(const_tab, table_offset,
+            &set_state->table_offset, COPY_STATE_TYPE_TABLE_OFFSET)))
+        return hr;
+
+    set.copy_state_type = COPY_STATE_TYPE_COPY_COUNT;
+    set.state_change.value = 1;
+    if (FAILED(hr=append_const_set(const_tab, &set)))
+        return hr;
+    set_state->param_offset++;
+    set_state->table_offset++;
+    return D3D_OK;
+}
+
+static HRESULT init_set_constants_param(struct d3dx_const_tab *const_tab, ID3DXConstantTable *ctab,
+        D3DXHANDLE hc, struct d3dx_parameter *param, struct d3dx_const_copy_state *set_state)
 {
-    ID3DXConstantTable *ctab = const_tab->ctab;
     D3DXCONSTANT_DESC desc;
     unsigned int const_count, param_count, i, j, n, table, start_offset;
     unsigned int minor, major, major_stride, param_offset;
     BOOL transpose, get_element;
+    HRESULT hr;
 
     if (FAILED(get_ctab_constant_desc(ctab, hc, &desc)))
         return D3DERR_INVALIDCALL;
@@ -872,7 +1048,7 @@ static HRESULT set_constants_param(struct d3dx_regstore *rs, struct d3dx_const_t
     }
     if (const_count)
     {
-        HRESULT hr, ret;
+        HRESULT ret;
         D3DXHANDLE hc_element;
 
         ret = D3D_OK;
@@ -889,7 +1065,7 @@ static HRESULT set_constants_param(struct d3dx_regstore *rs, struct d3dx_const_t
             }
             else
             {
-                hr = set_constants_param(rs, const_tab, hc_element, &param->members[i]);
+                hr = init_set_constants_param(const_tab, ctab, hc_element, &param->members[i], set_state);
             }
             if (FAILED(hr))
                 ret = hr;
@@ -931,21 +1107,17 @@ static HRESULT set_constants_param(struct d3dx_regstore *rs, struct d3dx_const_t
     major_stride = max(minor, table_info[table].reg_component_count);
     n = min(major * major_stride,
             desc.RegisterCount * table_info[table].reg_component_count + major_stride - 1) / major_stride;
+
     for (i = 0; i < n; ++i)
     {
         for (j = 0; j < minor; ++j)
         {
-            unsigned int out;
-            unsigned int *in;
             unsigned int offset;
 
             offset = start_offset + i * major_stride + j;
-            if (offset / table_info[table].reg_component_count >= rs->table_sizes[table])
-            {
-                if (table_info[table].reg_component_count != 1)
-                    FIXME("Output offset exceeds table size, name %s, component %u.\n", desc.Name, i);
+            if ((offset - start_offset) / table_info[table].reg_component_count >= desc.RegisterCount)
                 break;
-            }
+
             if (transpose)
                 param_offset = i + j * major;
             else
@@ -955,41 +1127,31 @@ static HRESULT set_constants_param(struct d3dx_regstore *rs, struct d3dx_const_t
                 WARN("Parameter data is too short, name %s, component %u.\n", desc.Name, i);
                 break;
             }
-
-            in = (unsigned int *)param->data + param_offset;
-            /* TODO: store data transfer / convert operation instead of performing an operation
-                from here, to move this to parsing stage */
-            switch (table_info[table].type)
-            {
-                case PRES_VT_FLOAT: set_number(&out, D3DXPT_FLOAT, in, param->type); break;
-                case PRES_VT_INT: set_number(&out, D3DXPT_INT, in, param->type); break;
-                case PRES_VT_BOOL: set_number(&out, D3DXPT_BOOL, in, param->type); break;
-                default:
-                    FIXME("Unexpected type %#x.\n", table_info[table].type);
-                    break;
-            }
-            regstore_set_values(rs, table, &out, offset, 1);
+            if (FAILED(hr = add_const_set(const_tab, set_state, param, table, param_offset, offset)))
+                return hr;
         }
     }
-
     return D3D_OK;
 }
 
-static HRESULT set_constants(struct d3dx_regstore *rs, struct d3dx_const_tab *const_tab)
+static HRESULT init_set_constants(struct d3dx_const_tab *const_tab, ID3DXConstantTable *ctab)
 {
     unsigned int i;
     HRESULT hr, ret;
     D3DXHANDLE hc;
+    struct d3dx_const_copy_state set_state;
+
+    memset(&set_state, 0, sizeof(set_state));
 
     ret = D3D_OK;
     for (i = 0; i < const_tab->input_count; ++i)
     {
         if (!const_tab->inputs_param[i] || const_tab->inputs_param[i]->class == D3DXPC_OBJECT)
             continue;
-        hc = ID3DXConstantTable_GetConstant(const_tab->ctab, NULL, i);
+        hc = ID3DXConstantTable_GetConstant(ctab, NULL, i);
         if (hc)
         {
-            hr = set_constants_param(rs, const_tab, hc, const_tab->inputs_param[i]);
+            hr = init_set_constants_param(const_tab, ctab, hc, const_tab->inputs_param[i], &set_state);
         }
         else
         {
@@ -999,6 +1161,18 @@ static HRESULT set_constants(struct d3dx_regstore *rs, struct d3dx_const_tab *co
         if (FAILED(hr))
             ret = hr;
     }
+
+    if (const_tab->const_set_count)
+    {
+        const_tab->const_set = HeapReAlloc(GetProcessHeap(), 0, const_tab->const_set,
+                sizeof(*const_tab->const_set) * const_tab->const_set_count);
+        if (!const_tab->const_set)
+        {
+            ERR("Out of memory.\n");
+            return E_OUTOFMEMORY;
+        }
+        const_tab->const_set_size = const_tab->const_set_count;
+    }
     return ret;
 }
 
@@ -1074,8 +1248,7 @@ HRESULT d3dx_evaluate_parameter(struct d3dx_param_eval *peval, const struct d3dx
 
     TRACE("peval %p, param %p, param_value %p.\n", peval, param, param_value);
 
-    if (FAILED(hr = set_constants(&peval->pres.regs, &peval->pres.inputs)))
-        return hr;
+    set_constants(&peval->pres.regs, &peval->pres.inputs);
 
     if (FAILED(hr = execute_preshader(&peval->pres)))
         return hr;
@@ -1175,13 +1348,11 @@ HRESULT d3dx_param_eval_set_shader_constants(struct IDirect3DDevice9 *device, st
 
     TRACE("device %p, peval %p, param_type %u.\n", device, peval, peval->param_type);
 
-    if (FAILED(hr = set_constants(rs, &pres->inputs)))
-        return hr;
+    set_constants(rs, &pres->inputs);
     if (FAILED(hr = execute_preshader(pres)))
         return hr;
-    if (FAILED(hr = set_constants(rs, &peval->shader_inputs)))
-        return hr;
 
+    set_constants(rs, &peval->shader_inputs);
     result = D3D_OK;
     for (i = 0; i < ARRAY_SIZE(set_tables); ++i)
     {
-- 
2.5.5




More information about the wine-patches mailing list