[v2] d3dx9: Improve performance and memory usage in preshader constants setting.
Paul Gofman
gofmanp at gmail.com
Thu Jun 9 11:09:18 CDT 2016
Signed-off-by: Paul Gofman <gofmanp at gmail.com>
---
Changes
- Use a simpler approach for constants setting.
dlls/d3dx9_36/d3dx9_private.h | 15 ++-
dlls/d3dx9_36/preshader.c | 250 ++++++++++++++++++++++++++++--------------
2 files changed, 177 insertions(+), 88 deletions(-)
diff --git a/dlls/d3dx9_36/d3dx9_private.h b/dlls/d3dx9_36/d3dx9_private.h
index 9938d83..add73bf 100644
--- a/dlls/d3dx9_36/d3dx9_private.h
+++ b/dlls/d3dx9_36/d3dx9_private.h
@@ -129,14 +129,23 @@ enum pres_reg_tables
PRES_REGTAB_FIRST_SHADER = PRES_REGTAB_CONST,
};
+struct d3dx_const_param_eval_output
+{
+ struct d3dx_parameter *param;
+ unsigned int table;
+ enum D3DXPARAMETER_CLASS constant_class;
+ unsigned int register_index;
+ unsigned int register_count;
+};
+
struct d3dx_const_tab
{
unsigned int input_count;
D3DXCONSTANT_DESC *inputs;
struct d3dx_parameter **inputs_param;
- ID3DXConstantTable *ctab;
- /* TODO: do not keep input constant structure
- (use it only at the parse stage) */
+ unsigned int const_set_count;
+ unsigned int const_set_size;
+ struct d3dx_const_param_eval_output *const_set;
const enum pres_reg_tables *regset2table;
};
diff --git a/dlls/d3dx9_36/preshader.c b/dlls/d3dx9_36/preshader.c
index 78f3bdb..970134c 100644
--- a/dlls/d3dx9_36/preshader.c
+++ b/dlls/d3dx9_36/preshader.c
@@ -213,6 +213,8 @@ static unsigned int get_reg_offset(unsigned int table, unsigned int offset)
#define PRES_BITMASK_BLOCK_SIZE (sizeof(unsigned int) * 8)
+static HRESULT init_set_constants(struct d3dx_const_tab *const_tab, ID3DXConstantTable *ctab);
+
static HRESULT regstore_alloc_table(struct d3dx_regstore *rs, unsigned int table)
{
unsigned int size;
@@ -485,7 +487,6 @@ static HRESULT get_constants_desc(unsigned int *byte_code, struct d3dx_const_tab
unsigned int i;
out->inputs = cdesc = NULL;
- out->ctab = NULL;
out->inputs_param = NULL;
out->input_count = 0;
inputs_param = NULL;
@@ -529,9 +530,9 @@ static HRESULT get_constants_desc(unsigned int *byte_code, struct d3dx_const_tab
out->input_count = desc.Constants;
out->inputs = cdesc;
out->inputs_param = inputs_param;
- out->ctab = ctab;
- return D3D_OK;
-
+ hr = init_set_constants(out, ctab);
+ ID3DXConstantTable_Release(ctab);
+ return hr;
err_out:
HeapFree(GetProcessHeap(), 0, cdesc);
HeapFree(GetProcessHeap(), 0, inputs_param);
@@ -682,6 +683,8 @@ static HRESULT parse_preshader(struct d3dx_preshader *pres, unsigned int *ptr, u
p = ptr_next;
}
+ pres->inputs.regset2table = pres_regset2table;
+
saved_word = *ptr;
*ptr = 0xfffe0000;
hr = get_constants_desc(ptr, &pres->inputs, base);
@@ -689,8 +692,6 @@ static HRESULT parse_preshader(struct d3dx_preshader *pres, unsigned int *ptr, u
if (FAILED(hr))
return hr;
- pres->inputs.regset2table = pres_regset2table;
-
pres->regs.table_sizes[PRES_REGTAB_IMMED] = const_count;
for (i = 0; i < pres->ins_count; ++i)
@@ -811,8 +812,7 @@ static void d3dx_free_const_tab(struct d3dx_const_tab *ctab)
{
HeapFree(GetProcessHeap(), 0, ctab->inputs);
HeapFree(GetProcessHeap(), 0, ctab->inputs_param);
- if (ctab->ctab)
- ID3DXConstantTable_Release(ctab->ctab);
+ HeapFree(GetProcessHeap(), 0, ctab->const_set);
}
static void d3dx_free_preshader(struct d3dx_preshader *pres)
@@ -835,14 +835,136 @@ void d3dx_free_param_eval(struct d3dx_param_eval *peval)
HeapFree(GetProcessHeap(), 0, peval);
}
-static HRESULT set_constants_param(struct d3dx_regstore *rs, struct d3dx_const_tab *const_tab,
+static void set_constants(struct d3dx_regstore *rs, struct d3dx_const_tab *const_tab)
+{
+ unsigned int const_idx;
+
+ for (const_idx = 0; const_idx < const_tab->const_set_count; ++const_idx)
+ {
+ struct d3dx_const_param_eval_output *const_set = &const_tab->const_set[const_idx];
+ unsigned int table = const_set->table;
+ struct d3dx_parameter *param = const_set->param;
+ enum pres_value_type table_type = table_info[table].type;
+ unsigned int i, j, n, start_offset;
+ unsigned int minor, major, major_stride, param_offset;
+ BOOL transpose;
+ unsigned int count;
+
+ transpose = (const_set->constant_class == D3DXPC_MATRIX_COLUMNS && param->class == D3DXPC_MATRIX_ROWS)
+ || (param->class == D3DXPC_MATRIX_COLUMNS && const_set->constant_class == D3DXPC_MATRIX_ROWS);
+ if (const_set->constant_class == D3DXPC_MATRIX_COLUMNS)
+ {
+ major = param->columns;
+ minor = param->rows;
+ }
+ else
+ {
+ major = param->rows;
+ minor = param->columns;
+ }
+ start_offset = const_set->register_index * table_info[table].reg_component_count;
+ major_stride = max(minor, table_info[table].reg_component_count);
+ n = min(major * major_stride,
+ const_set->register_count * table_info[table].reg_component_count + major_stride - 1) / major_stride;
+ count = n * minor;
+ if (((param->type == D3DXPT_FLOAT && table_type == PRES_VT_FLOAT)
+ || (param->type == D3DXPT_INT && table_type == PRES_VT_INT)
+ || (param->type == D3DXPT_BOOL && table_type == PRES_VT_BOOL))
+ && !transpose && minor == major_stride
+ && count == table_info[table].reg_component_count * const_set->register_count
+ && count * sizeof(unsigned int) <= param->bytes)
+ {
+ regstore_set_values(rs, table, param->data, start_offset, count);
+ continue;
+ }
+
+ for (i = 0; i < n; ++i)
+ {
+ for (j = 0; j < minor; ++j)
+ {
+ unsigned int out;
+ unsigned int *in;
+ unsigned int offset;
+
+ offset = start_offset + i * major_stride + j;
+ if (offset / table_info[table].reg_component_count >= rs->table_sizes[table])
+ {
+ if (table_info[table].reg_component_count != 1)
+ FIXME("Output offset exceeds table size, name %s, component %u.\n",
+ debugstr_a(param->name), i);
+ break;
+ }
+ if (transpose)
+ param_offset = i + j * major;
+ else
+ param_offset = i * minor + j;
+ if (param_offset * sizeof(unsigned int) >= param->bytes)
+ {
+ WARN("Parameter data is too short, name %s, component %u.\n", debugstr_a(param->name), i);
+ break;
+ }
+
+ in = (unsigned int *)param->data + param_offset;
+ switch (table_type)
+ {
+ case PRES_VT_FLOAT: set_number(&out, D3DXPT_FLOAT, in, param->type); break;
+ case PRES_VT_INT: set_number(&out, D3DXPT_INT, in, param->type); break;
+ case PRES_VT_BOOL: set_number(&out, D3DXPT_BOOL, in, param->type); break;
+ default:
+ FIXME("Unexpected type %#x.\n", table_info[table].type);
+ break;
+ }
+ regstore_set_values(rs, table, &out, offset, 1);
+ }
+ }
+ }
+}
+
+#define INITIAL_CONST_SET_SIZE 16
+
+static HRESULT append_const_set(struct d3dx_const_tab *const_tab, struct d3dx_const_param_eval_output *set)
+{
+ if (const_tab->const_set_count >= const_tab->const_set_size)
+ {
+ unsigned int new_size;
+ struct d3dx_const_param_eval_output *new_alloc;
+
+ if (!const_tab->const_set_size)
+ {
+ new_size = INITIAL_CONST_SET_SIZE;
+ new_alloc = HeapAlloc(GetProcessHeap(), 0, sizeof(*const_tab->const_set) * new_size);
+ if (!new_alloc)
+ {
+ ERR("Out of memory.\n");
+ return E_OUTOFMEMORY;
+ }
+ }
+ else
+ {
+ new_size = const_tab->const_set_size * 2;
+ new_alloc = HeapReAlloc(GetProcessHeap(), 0, const_tab->const_set,
+ sizeof(*const_tab->const_set) * new_size);
+ if (!new_alloc)
+ {
+ ERR("Out of memory.\n");
+ return E_OUTOFMEMORY;
+ }
+ }
+ const_tab->const_set = new_alloc;
+ const_tab->const_set_size = new_size;
+ }
+ const_tab->const_set[const_tab->const_set_count++] = *set;
+ return D3D_OK;
+}
+
+static HRESULT init_set_constants_param(struct d3dx_const_tab *const_tab, ID3DXConstantTable *ctab,
D3DXHANDLE hc, struct d3dx_parameter *param)
{
- ID3DXConstantTable *ctab = const_tab->ctab;
D3DXCONSTANT_DESC desc;
- unsigned int const_count, param_count, i, j, n, table, start_offset;
- unsigned int minor, major, major_stride, param_offset;
- BOOL transpose, get_element;
+ unsigned int const_count, param_count, i;
+ BOOL get_element;
+ struct d3dx_const_param_eval_output const_set;
+ HRESULT hr;
if (FAILED(get_ctab_constant_desc(ctab, hc, &desc)))
return D3DERR_INVALIDCALL;
@@ -872,7 +994,7 @@ static HRESULT set_constants_param(struct d3dx_regstore *rs, struct d3dx_const_t
}
if (const_count)
{
- HRESULT hr, ret;
+ HRESULT ret;
D3DXHANDLE hc_element;
ret = D3D_OK;
@@ -889,7 +1011,7 @@ static HRESULT set_constants_param(struct d3dx_regstore *rs, struct d3dx_const_t
}
else
{
- hr = set_constants_param(rs, const_tab, hc_element, ¶m->members[i]);
+ hr = init_set_constants_param(const_tab, ctab, hc_element, ¶m->members[i]);
}
if (FAILED(hr))
ret = hr;
@@ -897,85 +1019,34 @@ static HRESULT set_constants_param(struct d3dx_regstore *rs, struct d3dx_const_t
return ret;
}
- transpose = (desc.Class == D3DXPC_MATRIX_COLUMNS && param->class == D3DXPC_MATRIX_ROWS)
- || (param->class == D3DXPC_MATRIX_COLUMNS && desc.Class == D3DXPC_MATRIX_ROWS);
- if (desc.Class == D3DXPC_MATRIX_COLUMNS)
- {
- major = param->columns;
- minor = param->rows;
- }
- else
- {
- major = param->rows;
- minor = param->columns;
- }
-
TRACE("Constant %s, rows %u, columns %u, class %u, bytes %u.\n",
debugstr_a(desc.Name), desc.Rows, desc.Columns, desc.Class, desc.Bytes);
- TRACE("Parameter %s, rows %u, columns %u, class %u, flags %#x, bytes %u, transpose %#x.\n",
+ TRACE("Parameter %s, rows %u, columns %u, class %u, flags %#x, bytes %u.\n",
debugstr_a(param->name), param->rows, param->columns, param->class,
- param->flags, param->bytes, transpose);
+ param->flags, param->bytes);
+ const_set.param = param;
+ const_set.constant_class = desc.Class;
if (desc.RegisterSet >= ARRAY_SIZE(shad_regset2table))
{
FIXME("Unknown register set %u.\n", desc.RegisterSet);
return D3DERR_INVALIDCALL;
}
- table = const_tab->regset2table[desc.RegisterSet];
- if (table >= PRES_REGTAB_COUNT)
+ const_set.register_index = desc.RegisterIndex;
+ const_set.table = const_tab->regset2table[desc.RegisterSet];
+ if (const_set.table >= PRES_REGTAB_COUNT)
{
ERR("Unexpected register set %u.\n", desc.RegisterSet);
return D3DERR_INVALIDCALL;
}
- start_offset = desc.RegisterIndex * table_info[table].reg_component_count;
- major_stride = max(minor, table_info[table].reg_component_count);
- n = min(major * major_stride,
- desc.RegisterCount * table_info[table].reg_component_count + major_stride - 1) / major_stride;
- for (i = 0; i < n; ++i)
- {
- for (j = 0; j < minor; ++j)
- {
- unsigned int out;
- unsigned int *in;
- unsigned int offset;
-
- offset = start_offset + i * major_stride + j;
- if (offset / table_info[table].reg_component_count >= rs->table_sizes[table])
- {
- if (table_info[table].reg_component_count != 1)
- FIXME("Output offset exceeds table size, name %s, component %u.\n", desc.Name, i);
- break;
- }
- if (transpose)
- param_offset = i + j * major;
- else
- param_offset = i * minor + j;
- if (param_offset * sizeof(unsigned int) >= param->bytes)
- {
- WARN("Parameter data is too short, name %s, component %u.\n", desc.Name, i);
- break;
- }
-
- in = (unsigned int *)param->data + param_offset;
- /* TODO: store data transfer / convert operation instead of performing an operation
- from here, to move this to parsing stage */
- switch (table_info[table].type)
- {
- case PRES_VT_FLOAT: set_number(&out, D3DXPT_FLOAT, in, param->type); break;
- case PRES_VT_INT: set_number(&out, D3DXPT_INT, in, param->type); break;
- case PRES_VT_BOOL: set_number(&out, D3DXPT_BOOL, in, param->type); break;
- default:
- FIXME("Unexpected type %#x.\n", table_info[table].type);
- break;
- }
- regstore_set_values(rs, table, &out, offset, 1);
- }
- }
+ const_set.register_count = desc.RegisterCount;
+ if (FAILED(hr = append_const_set(const_tab, &const_set)))
+ return hr;
return D3D_OK;
}
-static HRESULT set_constants(struct d3dx_regstore *rs, struct d3dx_const_tab *const_tab)
+static HRESULT init_set_constants(struct d3dx_const_tab *const_tab, ID3DXConstantTable *ctab)
{
unsigned int i;
HRESULT hr, ret;
@@ -986,10 +1057,10 @@ static HRESULT set_constants(struct d3dx_regstore *rs, struct d3dx_const_tab *co
{
if (!const_tab->inputs_param[i] || const_tab->inputs_param[i]->class == D3DXPC_OBJECT)
continue;
- hc = ID3DXConstantTable_GetConstant(const_tab->ctab, NULL, i);
+ hc = ID3DXConstantTable_GetConstant(ctab, NULL, i);
if (hc)
{
- hr = set_constants_param(rs, const_tab, hc, const_tab->inputs_param[i]);
+ hr = init_set_constants_param(const_tab, ctab, hc, const_tab->inputs_param[i]);
}
else
{
@@ -999,6 +1070,18 @@ static HRESULT set_constants(struct d3dx_regstore *rs, struct d3dx_const_tab *co
if (FAILED(hr))
ret = hr;
}
+
+ if (const_tab->const_set_count)
+ {
+ const_tab->const_set = HeapReAlloc(GetProcessHeap(), 0, const_tab->const_set,
+ sizeof(*const_tab->const_set) * const_tab->const_set_count);
+ if (!const_tab->const_set)
+ {
+ ERR("Out of memory.\n");
+ return E_OUTOFMEMORY;
+ }
+ const_tab->const_set_size = const_tab->const_set_count;
+ }
return ret;
}
@@ -1074,8 +1157,7 @@ HRESULT d3dx_evaluate_parameter(struct d3dx_param_eval *peval, const struct d3dx
TRACE("peval %p, param %p, param_value %p.\n", peval, param, param_value);
- if (FAILED(hr = set_constants(&peval->pres.regs, &peval->pres.inputs)))
- return hr;
+ set_constants(&peval->pres.regs, &peval->pres.inputs);
if (FAILED(hr = execute_preshader(&peval->pres)))
return hr;
@@ -1175,13 +1257,11 @@ HRESULT d3dx_param_eval_set_shader_constants(struct IDirect3DDevice9 *device, st
TRACE("device %p, peval %p, param_type %u.\n", device, peval, peval->param_type);
- if (FAILED(hr = set_constants(rs, &pres->inputs)))
- return hr;
+ set_constants(rs, &pres->inputs);
if (FAILED(hr = execute_preshader(pres)))
return hr;
- if (FAILED(hr = set_constants(rs, &peval->shader_inputs)))
- return hr;
+ set_constants(rs, &peval->shader_inputs);
result = D3D_OK;
for (i = 0; i < ARRAY_SIZE(set_tables); ++i)
{
--
2.5.5
More information about the wine-patches
mailing list