[PATCH 3/5] wined3d: Optimize scanning changed shader constants in wined3d_device_apply_stateblock().
Matteo Bruni
mbruni at codeweavers.com
Mon Feb 10 13:35:51 CST 2020
Signed-off-by: Matteo Bruni <mbruni at codeweavers.com>
---
It turns out 7654d58b047be3a54d814d890bf1a26374559c83 caused a pretty
sizeable performance regression (which gets a lot more significant
once wined3d performance gets better - I have patches in that
area). Let's start fixing this.
dlls/wined3d/device.c | 151 ++++++++++++++++++++----------------------
1 file changed, 72 insertions(+), 79 deletions(-)
diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c
index 2297d7de916..87684dc1c46 100644
--- a/dlls/wined3d/device.c
+++ b/dlls/wined3d/device.c
@@ -3830,12 +3830,67 @@ struct wined3d_texture * CDECL wined3d_device_get_texture(const struct wined3d_d
return device->state.textures[stage];
}
+typedef HRESULT (CDECL *wined3d_device_shader_constant_setter)(struct wined3d_device *device,
+ unsigned int start_idx, unsigned int count, const void *constants);
+
+static void device_apply_shader_constants(struct wined3d_device *device,
+ const struct wined3d_stateblock_state *state,
+ DWORD *bitmap, unsigned int bit_count, const void *data, unsigned int stride,
+ wined3d_device_shader_constant_setter shader_constant_setter)
+{
+ const unsigned int word_bit_count = sizeof(DWORD) * CHAR_BIT;
+ unsigned int i, j, idx, start, last;
+ const BYTE *byte_data = data;
+ DWORD map;
+
+ start = last = ~0u;
+ for (i = 0; i < (bit_count + word_bit_count - 1) / word_bit_count; ++i)
+ {
+ map = bitmap[i];
+
+ if (map == ~0u)
+ {
+ if (last != ~0u && last != i * word_bit_count - 1)
+ {
+ shader_constant_setter(device, start, last - start + 1, &byte_data[start * stride]);
+ start = i * word_bit_count;
+ }
+ if (start == ~0u)
+ start = i * word_bit_count;
+ last = i * word_bit_count + word_bit_count - 1;
+ continue;
+ }
+ while (map)
+ {
+ j = wined3d_bit_scan(&map);
+ idx = i * word_bit_count + j;
+
+ if (start == ~0u)
+ {
+ start = last = idx;
+ }
+ else if (last != idx - 1)
+ {
+ shader_constant_setter(device, start, last - start + 1, &byte_data[start * stride]);
+ start = last = idx;
+ }
+ else
+ {
+ last = idx;
+ }
+ }
+ }
+ if (start != ~0u)
+ shader_constant_setter(device, start, last - start + 1, &byte_data[start * stride]);
+}
+
void CDECL wined3d_device_apply_stateblock(struct wined3d_device *device,
struct wined3d_stateblock *stateblock)
{
const struct wined3d_d3d_info *d3d_info = &stateblock->device->adapter->d3d_info;
const struct wined3d_stateblock_state *state = &stateblock->stateblock_state;
- unsigned int i, j, count;
+ unsigned int i, j;
+ DWORD map;
TRACE("device %p, stateblock %p.\n", device, stateblock);
@@ -3844,89 +3899,27 @@ void CDECL wined3d_device_apply_stateblock(struct wined3d_device *device,
if (stateblock->changed.pixelShader)
wined3d_device_set_pixel_shader(device, state->ps);
- count = 0;
- for (i = 0; i < d3d_info->limits.vs_uniform_count; ++i)
- {
- if (stateblock->changed.vs_consts_f[i >> 5] & (1u << (i & 0x1f)))
- ++count;
- else if (count)
- {
- wined3d_device_set_vs_consts_f(device, i - count, count, state->vs_consts_f + i - count);
- count = 0;
- }
- }
- if (count)
- wined3d_device_set_vs_consts_f(device, i - count, count, state->vs_consts_f + i - count);
+ device_apply_shader_constants(device, state, stateblock->changed.vs_consts_f, d3d_info->limits.vs_uniform_count,
+ state->vs_consts_f, sizeof(*state->vs_consts_f), (wined3d_device_shader_constant_setter)wined3d_device_set_vs_consts_f);
- count = 0;
- for (i = 0; i < WINED3D_MAX_CONSTS_B; ++i)
- {
- if (stateblock->changed.vertexShaderConstantsB & (1u << i))
- ++count;
- else if (count)
- {
- wined3d_device_set_vs_consts_b(device, i - count, count, state->vs_consts_b + i - count);
- count = 0;
- }
- }
- if (count)
- wined3d_device_set_vs_consts_b(device, i - count, count, state->vs_consts_b + i - count);
+ map = stateblock->changed.vertexShaderConstantsB;
+ device_apply_shader_constants(device, state, &map, WINED3D_MAX_CONSTS_B, state->vs_consts_b, sizeof(*state->vs_consts_b),
+ (wined3d_device_shader_constant_setter)wined3d_device_set_vs_consts_b);
- count = 0;
- for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i)
- {
- if (stateblock->changed.vertexShaderConstantsI & (1u << i))
- ++count;
- else if (count)
- {
- wined3d_device_set_vs_consts_i(device, i - count, count, state->vs_consts_i + i - count);
- count = 0;
- }
- }
- if (count)
- wined3d_device_set_vs_consts_i(device, i - count, count, state->vs_consts_i + i - count);
+ map = stateblock->changed.vertexShaderConstantsI;
+ device_apply_shader_constants(device, state, &map, WINED3D_MAX_CONSTS_I, state->vs_consts_i, sizeof(*state->vs_consts_i),
+ (wined3d_device_shader_constant_setter)wined3d_device_set_vs_consts_i);
- count = 0;
- for (i = 0; i < d3d_info->limits.ps_uniform_count; ++i)
- {
- if (stateblock->changed.ps_consts_f[i >> 5] & (1u << (i & 0x1f)))
- ++count;
- else if (count)
- {
- wined3d_device_set_ps_consts_f(device, i - count, count, state->ps_consts_f + i - count);
- count = 0;
- }
- }
- if (count)
- wined3d_device_set_ps_consts_f(device, i - count, count, state->ps_consts_f + i - count);
+ device_apply_shader_constants(device, state, stateblock->changed.ps_consts_f, d3d_info->limits.ps_uniform_count,
+ state->ps_consts_f, sizeof(*state->ps_consts_f), (wined3d_device_shader_constant_setter)wined3d_device_set_ps_consts_f);
- count = 0;
- for (i = 0; i < WINED3D_MAX_CONSTS_B; ++i)
- {
- if (stateblock->changed.pixelShaderConstantsB & (1u << i))
- ++count;
- else if (count)
- {
- wined3d_device_set_ps_consts_b(device, i - count, count, state->ps_consts_b + i - count);
- count = 0;
- }
- }
- if (count)
- wined3d_device_set_ps_consts_b(device, i - count, count, state->ps_consts_b + i - count);
+ map = stateblock->changed.pixelShaderConstantsB;
+ device_apply_shader_constants(device, state, &map, WINED3D_MAX_CONSTS_B, state->ps_consts_b, sizeof(*state->ps_consts_b),
+ (wined3d_device_shader_constant_setter)wined3d_device_set_ps_consts_b);
- count = 0;
- for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i)
- {
- if (stateblock->changed.pixelShaderConstantsI & (1u << i))
- ++count;
- else if (count)
- {
- wined3d_device_set_ps_consts_i(device, i - count, count, state->ps_consts_i + i - count);
- count = 0;
- }
- }
- if (count)
- wined3d_device_set_ps_consts_i(device, i - count, count, state->ps_consts_i + i - count);
+ map = stateblock->changed.pixelShaderConstantsI;
+ device_apply_shader_constants(device, state, &map, WINED3D_MAX_CONSTS_I, state->ps_consts_i, sizeof(*state->ps_consts_i),
+ (wined3d_device_shader_constant_setter)wined3d_device_set_ps_consts_i);
for (i = 0; i < ARRAY_SIZE(state->light_state->light_map); ++i)
{
--
2.24.1
More information about the wine-devel
mailing list