[PATCH v2 4/5] wined3d: Optimize scanning changed shader constants in wined3d_device_apply_stateblock().
Matteo Bruni
mbruni at codeweavers.com
Thu Feb 20 15:40:16 CST 2020
Signed-off-by: Matteo Bruni <mbruni at codeweavers.com>
---
v2: Introduce wined3d_bitmap_ffs() and wined3d_bitmap_ffz(), implement
wined3d_apply_shader_constants() on top of them. Tweak a few
additional details while at it, mostly to reduce the amount of changes
necessary when using the same function for
wined3d_stateblock_capture() and wined3d_stateblock_apply() in
followup patches.
dlls/wined3d/device.c | 171 ++++++++++++++++++++++--------------------
1 file changed, 91 insertions(+), 80 deletions(-)
diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c
index 3cf621ff46e..4520058eb60 100644
--- a/dlls/wined3d/device.c
+++ b/dlls/wined3d/device.c
@@ -3832,16 +3832,87 @@ struct wined3d_texture * CDECL wined3d_device_get_texture(const struct wined3d_d
return device->state.textures[stage];
}
+/* Count is the total number of bits in the bitmap (i.e. it doesn't depend on start). */
+static unsigned int wined3d_bitmap_ffs(const uint32_t *bitmap, unsigned int start, unsigned int count)
+{
+ const unsigned int word_bit_count = sizeof(*bitmap) * CHAR_BIT;
+ const uint32_t *end = bitmap + (count + word_bit_count - 1) / word_bit_count;
+ const uint32_t *start_ptr = bitmap + start / word_bit_count;
+ const uint32_t *ptr = start_ptr;
+ uint32_t map, mask;
+
+ if (ptr >= end)
+ return ~0u;
+
+ mask = start % word_bit_count ? ~((1u << (start - 1) % word_bit_count) - 1) : 0xffffffffu;
+ while (!(map = *ptr & mask))
+ {
+ if (++ptr == end)
+ return ~0u;
+ mask = ~0u;
+ }
+ return (ptr - bitmap) * word_bit_count + wined3d_bit_scan(&map);
+}
+
+static unsigned int wined3d_bitmap_ffz(const uint32_t *bitmap, unsigned int start, unsigned int count)
+{
+ const unsigned int word_bit_count = sizeof(*bitmap) * CHAR_BIT;
+ const uint32_t *end = bitmap + (count + word_bit_count - 1) / word_bit_count;
+ const uint32_t *start_ptr = bitmap + start / word_bit_count;
+ const uint32_t *ptr = start_ptr;
+ uint32_t map, mask;
+
+ if (ptr >= end)
+ return ~0u;
+
+ mask = start % word_bit_count ? ~((1u << (start - 1) % word_bit_count) - 1) : 0xffffffffu;
+ while (!(map = ~*ptr & mask))
+ {
+ if (++ptr == end)
+ return ~0u;
+ mask = ~0u;
+ }
+ return (ptr - bitmap) * word_bit_count + wined3d_bit_scan(&map);
+}
+
+typedef HRESULT (CDECL *wined3d_state_shader_constant_setter)(struct wined3d_device *device,
+ unsigned int start_idx, unsigned int count, void *constants);
+
+static void wined3d_apply_shader_constants(struct wined3d_device *device,
+ struct wined3d_stateblock_state *state,
+ const DWORD *bitmap, unsigned int bit_count, void *data, unsigned int stride,
+ wined3d_state_shader_constant_setter shader_constant_setter)
+{
+ BYTE *byte_data = data;
+ unsigned int start, end;
+
+ start = 0;
+ for (;;)
+ {
+ start = wined3d_bitmap_ffs(bitmap, start, bit_count);
+ if (start == ~0u)
+ return;
+
+ end = wined3d_bitmap_ffz(bitmap, start + 1, bit_count);
+ if (end == ~0u)
+ end = bit_count;
+
+ shader_constant_setter(device, start, end - start, &byte_data[start * stride]);
+
+ start = end + 1;
+ }
+}
+
void CDECL wined3d_device_apply_stateblock(struct wined3d_device *device,
struct wined3d_stateblock *stateblock)
{
- const struct wined3d_d3d_info *d3d_info = &stateblock->device->adapter->d3d_info;
const struct wined3d_stateblock_state *state = &stateblock->stateblock_state;
const struct wined3d_saved_states *changed = &stateblock->changed;
struct wined3d_blend_state *blend_state;
struct wined3d_color colour;
- unsigned int i, j, count;
BOOL set_blend_state;
+ unsigned int i, j;
+ DWORD map;
TRACE("device %p, stateblock %p.\n", device, stateblock);
@@ -3850,89 +3921,29 @@ void CDECL wined3d_device_apply_stateblock(struct wined3d_device *device,
if (changed->pixelShader)
wined3d_device_set_pixel_shader(device, state->ps);
- count = 0;
- for (i = 0; i < d3d_info->limits.vs_uniform_count; ++i)
- {
- if (wined3d_bitmap_is_set(changed->vs_consts_f, i))
- ++count;
- else if (count)
- {
- wined3d_device_set_vs_consts_f(device, i - count, count, state->vs_consts_f + i - count);
- count = 0;
- }
- }
- if (count)
- wined3d_device_set_vs_consts_f(device, i - count, count, state->vs_consts_f + i - count);
+ wined3d_apply_shader_constants(device, NULL, changed->vs_consts_f, WINED3D_MAX_VS_CONSTS_F,
+ (void *)state->vs_consts_f, sizeof(*state->vs_consts_f),
+ (wined3d_state_shader_constant_setter)wined3d_device_set_vs_consts_f);
- count = 0;
- for (i = 0; i < WINED3D_MAX_CONSTS_B; ++i)
- {
- if (changed->vertexShaderConstantsB & (1u << i))
- ++count;
- else if (count)
- {
- wined3d_device_set_vs_consts_b(device, i - count, count, state->vs_consts_b + i - count);
- count = 0;
- }
- }
- if (count)
- wined3d_device_set_vs_consts_b(device, i - count, count, state->vs_consts_b + i - count);
+ map = changed->vertexShaderConstantsB;
+ wined3d_apply_shader_constants(device, NULL, &map, WINED3D_MAX_CONSTS_B, (void *)state->vs_consts_b,
+ sizeof(*state->vs_consts_b), (wined3d_state_shader_constant_setter)wined3d_device_set_vs_consts_b);
- count = 0;
- for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i)
- {
- if (changed->vertexShaderConstantsI & (1u << i))
- ++count;
- else if (count)
- {
- wined3d_device_set_vs_consts_i(device, i - count, count, state->vs_consts_i + i - count);
- count = 0;
- }
- }
- if (count)
- wined3d_device_set_vs_consts_i(device, i - count, count, state->vs_consts_i + i - count);
+ map = changed->vertexShaderConstantsI;
+ wined3d_apply_shader_constants(device, NULL, &map, WINED3D_MAX_CONSTS_I, (void *)state->vs_consts_i,
+ sizeof(*state->vs_consts_i), (wined3d_state_shader_constant_setter)wined3d_device_set_vs_consts_i);
- count = 0;
- for (i = 0; i < d3d_info->limits.ps_uniform_count; ++i)
- {
- if (wined3d_bitmap_is_set(changed->ps_consts_f, i))
- ++count;
- else if (count)
- {
- wined3d_device_set_ps_consts_f(device, i - count, count, state->ps_consts_f + i - count);
- count = 0;
- }
- }
- if (count)
- wined3d_device_set_ps_consts_f(device, i - count, count, state->ps_consts_f + i - count);
+ wined3d_apply_shader_constants(device, NULL, changed->ps_consts_f, WINED3D_MAX_PS_CONSTS_F,
+ (void *)state->ps_consts_f, sizeof(*state->ps_consts_f),
+ (wined3d_state_shader_constant_setter)wined3d_device_set_ps_consts_f);
- count = 0;
- for (i = 0; i < WINED3D_MAX_CONSTS_B; ++i)
- {
- if (changed->pixelShaderConstantsB & (1u << i))
- ++count;
- else if (count)
- {
- wined3d_device_set_ps_consts_b(device, i - count, count, state->ps_consts_b + i - count);
- count = 0;
- }
- }
- if (count)
- wined3d_device_set_ps_consts_b(device, i - count, count, state->ps_consts_b + i - count);
+ map = changed->pixelShaderConstantsB;
+ wined3d_apply_shader_constants(device, NULL, &map, WINED3D_MAX_CONSTS_B, (void *)state->ps_consts_b,
+ sizeof(*state->ps_consts_b), (wined3d_state_shader_constant_setter)wined3d_device_set_ps_consts_b);
- count = 0;
- for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i)
- {
- if (changed->pixelShaderConstantsI & (1u << i))
- ++count;
- else if (count)
- {
- wined3d_device_set_ps_consts_i(device, i - count, count, state->ps_consts_i + i - count);
- count = 0;
- }
- }
- if (count)
- wined3d_device_set_ps_consts_i(device, i - count, count, state->ps_consts_i + i - count);
+ map = changed->pixelShaderConstantsI;
+ wined3d_apply_shader_constants(device, NULL, &map, WINED3D_MAX_CONSTS_I, (void *)state->ps_consts_i,
+ sizeof(*state->ps_consts_i), (wined3d_state_shader_constant_setter)wined3d_device_set_ps_consts_i);
for (i = 0; i < ARRAY_SIZE(state->light_state->light_map); ++i)
{
--
2.24.1
More information about the wine-devel
mailing list