[PATCH] d3dx9: Improve performance and memory usage in preshader constants setting.
Paul Gofman
gofmanp at gmail.com
Thu May 26 13:28:21 CDT 2016
Signed-off-by: Paul Gofman <gofmanp at gmail.com>
---
dlls/d3dx9_36/d3dx9_private.h | 25 +++-
dlls/d3dx9_36/preshader.c | 259 +++++++++++++++++++++++++++++++++++-------
2 files changed, 237 insertions(+), 47 deletions(-)
diff --git a/dlls/d3dx9_36/d3dx9_private.h b/dlls/d3dx9_36/d3dx9_private.h
index 9938d83..970ef2f 100644
--- a/dlls/d3dx9_36/d3dx9_private.h
+++ b/dlls/d3dx9_36/d3dx9_private.h
@@ -129,14 +129,33 @@ enum pres_reg_tables
PRES_REGTAB_FIRST_SHADER = PRES_REGTAB_CONST,
};
+enum const_param_copy_state_type
+{
+ COPY_STATE_TYPE_PARAM,
+ COPY_STATE_TYPE_TABLE,
+ COPY_STATE_TYPE_PARAM_OFFSET,
+ COPY_STATE_TYPE_TABLE_OFFSET,
+ COPY_STATE_TYPE_COPY_COUNT
+};
+
+struct d3dx_const_param_set
+{
+ enum const_param_copy_state_type copy_state_type;
+ union
+ {
+ struct d3dx_parameter *param;
+ unsigned int value;
+ } state_change;
+};
+
struct d3dx_const_tab
{
unsigned int input_count;
D3DXCONSTANT_DESC *inputs;
struct d3dx_parameter **inputs_param;
- ID3DXConstantTable *ctab;
- /* TODO: do not keep input constant structure
- (use it only at the parse stage) */
+ unsigned int const_set_count;
+ unsigned int const_set_size;
+ struct d3dx_const_param_set *const_set;
const enum pres_reg_tables *regset2table;
};
diff --git a/dlls/d3dx9_36/preshader.c b/dlls/d3dx9_36/preshader.c
index 78f3bdb..6074dff 100644
--- a/dlls/d3dx9_36/preshader.c
+++ b/dlls/d3dx9_36/preshader.c
@@ -206,6 +206,13 @@ struct d3dx_pres_ins
struct d3dx_pres_operand output;
};
+struct d3dx_const_copy_state
+{
+ struct d3dx_parameter *param;
+ unsigned int table;
+ unsigned int param_offset, table_offset;
+};
+
static unsigned int get_reg_offset(unsigned int table, unsigned int offset)
{
return offset / table_info[table].reg_component_count;
@@ -213,6 +220,8 @@ static unsigned int get_reg_offset(unsigned int table, unsigned int offset)
#define PRES_BITMASK_BLOCK_SIZE (sizeof(unsigned int) * 8)
+static HRESULT init_set_constants(struct d3dx_const_tab *const_tab, ID3DXConstantTable *ctab);
+
static HRESULT regstore_alloc_table(struct d3dx_regstore *rs, unsigned int table)
{
unsigned int size;
@@ -485,7 +494,6 @@ static HRESULT get_constants_desc(unsigned int *byte_code, struct d3dx_const_tab
unsigned int i;
out->inputs = cdesc = NULL;
- out->ctab = NULL;
out->inputs_param = NULL;
out->input_count = 0;
inputs_param = NULL;
@@ -529,9 +537,9 @@ static HRESULT get_constants_desc(unsigned int *byte_code, struct d3dx_const_tab
out->input_count = desc.Constants;
out->inputs = cdesc;
out->inputs_param = inputs_param;
- out->ctab = ctab;
- return D3D_OK;
-
+ hr = init_set_constants(out, ctab);
+ ID3DXConstantTable_Release(ctab);
+ return hr;
err_out:
HeapFree(GetProcessHeap(), 0, cdesc);
HeapFree(GetProcessHeap(), 0, inputs_param);
@@ -682,6 +690,8 @@ static HRESULT parse_preshader(struct d3dx_preshader *pres, unsigned int *ptr, u
p = ptr_next;
}
+ pres->inputs.regset2table = pres_regset2table;
+
saved_word = *ptr;
*ptr = 0xfffe0000;
hr = get_constants_desc(ptr, &pres->inputs, base);
@@ -689,8 +699,6 @@ static HRESULT parse_preshader(struct d3dx_preshader *pres, unsigned int *ptr, u
if (FAILED(hr))
return hr;
- pres->inputs.regset2table = pres_regset2table;
-
pres->regs.table_sizes[PRES_REGTAB_IMMED] = const_count;
for (i = 0; i < pres->ins_count; ++i)
@@ -797,6 +805,7 @@ void d3dx_create_param_eval(struct d3dx9_base_effect *base_effect, void *byte_co
dump_registers(&peval->shader_inputs);
}
}
+
*peval_out = peval;
TRACE("Created parameter evaluator %p.\n", *peval_out);
return;
@@ -811,10 +820,10 @@ static void d3dx_free_const_tab(struct d3dx_const_tab *ctab)
{
HeapFree(GetProcessHeap(), 0, ctab->inputs);
HeapFree(GetProcessHeap(), 0, ctab->inputs_param);
- if (ctab->ctab)
- ID3DXConstantTable_Release(ctab->ctab);
+ HeapFree(GetProcessHeap(), 0, ctab->const_set);
}
+
static void d3dx_free_preshader(struct d3dx_preshader *pres)
{
HeapFree(GetProcessHeap(), 0, pres->ins);
@@ -835,14 +844,181 @@ void d3dx_free_param_eval(struct d3dx_param_eval *peval)
HeapFree(GetProcessHeap(), 0, peval);
}
-static HRESULT set_constants_param(struct d3dx_regstore *rs, struct d3dx_const_tab *const_tab,
- D3DXHANDLE hc, struct d3dx_parameter *param)
+static void set_constants(struct d3dx_regstore *rs, struct d3dx_const_tab *const_tab)
+{
+ unsigned int i;
+ struct d3dx_const_copy_state set_state;
+
+ memset(&set_state, 0, sizeof(set_state));
+ for (i = 0; i < const_tab->const_set_count; ++i)
+ {
+ switch (const_tab->const_set[i].copy_state_type)
+ {
+ case COPY_STATE_TYPE_PARAM:
+ set_state.param = const_tab->const_set[i].state_change.param;
+ break;
+ case COPY_STATE_TYPE_TABLE:
+ set_state.table = const_tab->const_set[i].state_change.value;
+ break;
+ case COPY_STATE_TYPE_PARAM_OFFSET:
+ set_state.param_offset = const_tab->const_set[i].state_change.value;
+ break;
+ case COPY_STATE_TYPE_TABLE_OFFSET:
+ set_state.table_offset = const_tab->const_set[i].state_change.value;
+ break;
+ case COPY_STATE_TYPE_COPY_COUNT:
+ {
+ unsigned int count;
+ struct d3dx_parameter *param;
+ enum pres_value_type table_type;
+ unsigned int j;
+
+ count = const_tab->const_set[i].state_change.value;
+ param = set_state.param;
+ table_type = table_info[set_state.table].type;
+ if ((set_state.table_offset + count - 1) / table_info[set_state.table].reg_component_count
+ >= rs->table_sizes[set_state.table])
+ {
+ FIXME("Insufficient table space allocated.\n");
+ break;
+ }
+ if ((param->type == D3DXPT_FLOAT && table_type == PRES_VT_FLOAT)
+ || (param->type == D3DXPT_INT && table_type == PRES_VT_INT)
+ || (param->type == D3DXPT_BOOL && table_type == PRES_VT_BOOL))
+ {
+ regstore_set_values(rs, set_state.table, (unsigned int *)param->data + set_state.param_offset,
+ set_state.table_offset, count);
+ }
+ else
+ {
+ for (j = 0; j < count; ++j)
+ {
+ unsigned int out;
+ unsigned int *in = (unsigned int *)param->data + set_state.param_offset + j;
+ switch (table_info[set_state.table].type)
+ {
+ case PRES_VT_FLOAT: set_number(&out, D3DXPT_FLOAT, in, param->type); break;
+ case PRES_VT_INT: set_number(&out, D3DXPT_INT, in, param->type); break;
+ case PRES_VT_BOOL: set_number(&out, D3DXPT_BOOL, in, param->type); break;
+ default:
+ FIXME("Unexpected type %#x.\n", table_info[set_state.table].type);
+ break;
+ }
+ regstore_set_values(rs, set_state.table, &out, set_state.table_offset + j, 1);
+ }
+ }
+ set_state.param_offset += count;
+ set_state.table_offset += count;
+ break;
+ }
+ }
+ }
+}
+
+#define INITIAL_CONST_SET_SIZE 16
+
+static HRESULT append_const_set(struct d3dx_const_tab *const_tab, struct d3dx_const_param_set *set)
+{
+ if (const_tab->const_set_count >= const_tab->const_set_size)
+ {
+ unsigned int new_size;
+ struct d3dx_const_param_set *new_alloc;
+
+ if (!const_tab->const_set_size)
+ {
+ new_size = INITIAL_CONST_SET_SIZE;
+ new_alloc = HeapAlloc(GetProcessHeap(), 0, sizeof(*const_tab->const_set) * new_size);
+ if (!new_alloc)
+ {
+ ERR("Out of memory.\n");
+ return E_OUTOFMEMORY;
+ }
+ }
+ else
+ {
+ new_size = const_tab->const_set_size * 2;
+ new_alloc = HeapReAlloc(GetProcessHeap(), 0, const_tab->const_set,
+ sizeof(*const_tab->const_set) * new_size);
+ if (!new_alloc)
+ {
+ ERR("Out of memory.\n");
+ return E_OUTOFMEMORY;
+ }
+ }
+ const_tab->const_set = new_alloc;
+ const_tab->const_set_size = new_size;
+ }
+ const_tab->const_set[const_tab->const_set_count++] = *set;
+ return D3D_OK;
+}
+
+static HRESULT add_const_set_value(struct d3dx_const_tab *const_tab, unsigned int value,
+ unsigned int *current_value, enum const_param_copy_state_type type)
+{
+ struct d3dx_const_param_set set;
+ HRESULT hr;
+
+ if (*current_value != value)
+ {
+ *current_value = value;
+ set.copy_state_type = type;
+ set.state_change.value = value;
+ if (FAILED(hr=append_const_set(const_tab, &set)))
+ return hr;
+ }
+ return D3D_OK;
+}
+
+static HRESULT add_const_set(struct d3dx_const_tab *const_tab, struct d3dx_const_copy_state *set_state,
+ struct d3dx_parameter *param, unsigned int table, unsigned int param_offset,
+ unsigned int table_offset)
+{
+ struct d3dx_const_param_set set;
+ HRESULT hr;
+
+ if (set_state->param == param && set_state->table == table
+ && set_state->param_offset == param_offset && set_state->table_offset == table_offset)
+ {
+ set_state->param_offset++;
+ set_state->table_offset++;
+ const_tab->const_set[const_tab->const_set_count - 1].state_change.value++;
+ return D3D_OK;
+ }
+ if (set_state->param != param)
+ {
+ set_state->param = param;
+ set.copy_state_type = COPY_STATE_TYPE_PARAM;
+ set.state_change.param = param;
+ if (FAILED(hr=append_const_set(const_tab, &set)))
+ return hr;
+ }
+ if (FAILED(hr = add_const_set_value(const_tab, table,
+ &set_state->table, COPY_STATE_TYPE_TABLE)))
+ return hr;
+ if (FAILED(hr = add_const_set_value(const_tab, param_offset,
+ &set_state->param_offset, COPY_STATE_TYPE_PARAM_OFFSET)))
+ return hr;
+ if (FAILED(hr = add_const_set_value(const_tab, table_offset,
+ &set_state->table_offset, COPY_STATE_TYPE_TABLE_OFFSET)))
+ return hr;
+
+ set.copy_state_type = COPY_STATE_TYPE_COPY_COUNT;
+ set.state_change.value = 1;
+ if (FAILED(hr=append_const_set(const_tab, &set)))
+ return hr;
+ set_state->param_offset++;
+ set_state->table_offset++;
+ return D3D_OK;
+}
+
+static HRESULT init_set_constants_param(struct d3dx_const_tab *const_tab, ID3DXConstantTable *ctab,
+ D3DXHANDLE hc, struct d3dx_parameter *param, struct d3dx_const_copy_state *set_state)
{
- ID3DXConstantTable *ctab = const_tab->ctab;
D3DXCONSTANT_DESC desc;
unsigned int const_count, param_count, i, j, n, table, start_offset;
unsigned int minor, major, major_stride, param_offset;
BOOL transpose, get_element;
+ HRESULT hr;
if (FAILED(get_ctab_constant_desc(ctab, hc, &desc)))
return D3DERR_INVALIDCALL;
@@ -872,7 +1048,7 @@ static HRESULT set_constants_param(struct d3dx_regstore *rs, struct d3dx_const_t
}
if (const_count)
{
- HRESULT hr, ret;
+ HRESULT ret;
D3DXHANDLE hc_element;
ret = D3D_OK;
@@ -889,7 +1065,7 @@ static HRESULT set_constants_param(struct d3dx_regstore *rs, struct d3dx_const_t
}
else
{
- hr = set_constants_param(rs, const_tab, hc_element, ¶m->members[i]);
+ hr = init_set_constants_param(const_tab, ctab, hc_element, ¶m->members[i], set_state);
}
if (FAILED(hr))
ret = hr;
@@ -931,21 +1107,17 @@ static HRESULT set_constants_param(struct d3dx_regstore *rs, struct d3dx_const_t
major_stride = max(minor, table_info[table].reg_component_count);
n = min(major * major_stride,
desc.RegisterCount * table_info[table].reg_component_count + major_stride - 1) / major_stride;
+
for (i = 0; i < n; ++i)
{
for (j = 0; j < minor; ++j)
{
- unsigned int out;
- unsigned int *in;
unsigned int offset;
offset = start_offset + i * major_stride + j;
- if (offset / table_info[table].reg_component_count >= rs->table_sizes[table])
- {
- if (table_info[table].reg_component_count != 1)
- FIXME("Output offset exceeds table size, name %s, component %u.\n", desc.Name, i);
+ if ((offset - start_offset) / table_info[table].reg_component_count >= desc.RegisterCount)
break;
- }
+
if (transpose)
param_offset = i + j * major;
else
@@ -955,41 +1127,31 @@ static HRESULT set_constants_param(struct d3dx_regstore *rs, struct d3dx_const_t
WARN("Parameter data is too short, name %s, component %u.\n", desc.Name, i);
break;
}
-
- in = (unsigned int *)param->data + param_offset;
- /* TODO: store data transfer / convert operation instead of performing an operation
- from here, to move this to parsing stage */
- switch (table_info[table].type)
- {
- case PRES_VT_FLOAT: set_number(&out, D3DXPT_FLOAT, in, param->type); break;
- case PRES_VT_INT: set_number(&out, D3DXPT_INT, in, param->type); break;
- case PRES_VT_BOOL: set_number(&out, D3DXPT_BOOL, in, param->type); break;
- default:
- FIXME("Unexpected type %#x.\n", table_info[table].type);
- break;
- }
- regstore_set_values(rs, table, &out, offset, 1);
+ if (FAILED(hr = add_const_set(const_tab, set_state, param, table, param_offset, offset)))
+ return hr;
}
}
-
return D3D_OK;
}
-static HRESULT set_constants(struct d3dx_regstore *rs, struct d3dx_const_tab *const_tab)
+static HRESULT init_set_constants(struct d3dx_const_tab *const_tab, ID3DXConstantTable *ctab)
{
unsigned int i;
HRESULT hr, ret;
D3DXHANDLE hc;
+ struct d3dx_const_copy_state set_state;
+
+ memset(&set_state, 0, sizeof(set_state));
ret = D3D_OK;
for (i = 0; i < const_tab->input_count; ++i)
{
if (!const_tab->inputs_param[i] || const_tab->inputs_param[i]->class == D3DXPC_OBJECT)
continue;
- hc = ID3DXConstantTable_GetConstant(const_tab->ctab, NULL, i);
+ hc = ID3DXConstantTable_GetConstant(ctab, NULL, i);
if (hc)
{
- hr = set_constants_param(rs, const_tab, hc, const_tab->inputs_param[i]);
+ hr = init_set_constants_param(const_tab, ctab, hc, const_tab->inputs_param[i], &set_state);
}
else
{
@@ -999,6 +1161,18 @@ static HRESULT set_constants(struct d3dx_regstore *rs, struct d3dx_const_tab *co
if (FAILED(hr))
ret = hr;
}
+
+ if (const_tab->const_set_count)
+ {
+ const_tab->const_set = HeapReAlloc(GetProcessHeap(), 0, const_tab->const_set,
+ sizeof(*const_tab->const_set) * const_tab->const_set_count);
+ if (!const_tab->const_set)
+ {
+ ERR("Out of memory.\n");
+ return E_OUTOFMEMORY;
+ }
+ const_tab->const_set_size = const_tab->const_set_count;
+ }
return ret;
}
@@ -1074,8 +1248,7 @@ HRESULT d3dx_evaluate_parameter(struct d3dx_param_eval *peval, const struct d3dx
TRACE("peval %p, param %p, param_value %p.\n", peval, param, param_value);
- if (FAILED(hr = set_constants(&peval->pres.regs, &peval->pres.inputs)))
- return hr;
+ set_constants(&peval->pres.regs, &peval->pres.inputs);
if (FAILED(hr = execute_preshader(&peval->pres)))
return hr;
@@ -1175,13 +1348,11 @@ HRESULT d3dx_param_eval_set_shader_constants(struct IDirect3DDevice9 *device, st
TRACE("device %p, peval %p, param_type %u.\n", device, peval, peval->param_type);
- if (FAILED(hr = set_constants(rs, &pres->inputs)))
- return hr;
+ set_constants(rs, &pres->inputs);
if (FAILED(hr = execute_preshader(pres)))
return hr;
- if (FAILED(hr = set_constants(rs, &peval->shader_inputs)))
- return hr;
+ set_constants(rs, &peval->shader_inputs);
result = D3D_OK;
for (i = 0; i < ARRAY_SIZE(set_tables); ++i)
{
--
2.5.5
More information about the wine-patches
mailing list