[PATCH 3/5] wined3d: Optimize scanning changed shader constants in wined3d_device_apply_stateblock().
Matteo Bruni
mbruni at codeweavers.com
Thu Feb 27 06:31:11 CST 2020
Signed-off-by: Matteo Bruni <mbruni at codeweavers.com>
---
v2: Introduce wined3d_bitmap_ffs() and wined3d_bitmap_ffz(), implement
wined3d_apply_shader_constants() on top of them. Tweak a few
additional details while at it, mostly to reduce the amount of changes
necessary when using the same function for
wined3d_stateblock_capture() and wined3d_stateblock_apply() in
followup patches.
v3: Introduce wined3d_bitmap_get_range(), get rid of
wined3d_apply_shader_constants(), simplify wined3d_bitmap_ffs(), make
both wined3d_bitmap_ffs() and wined3d_bitmap_ffz() use the same common
code. (Thanks Henri!)
dlls/wined3d/device.c | 105 ++++++++++++---------------------
dlls/wined3d/wined3d_private.h | 74 +++++++++++++++++++----
2 files changed, 101 insertions(+), 78 deletions(-)
diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c
index 3cf621ff46e..fa675f57290 100644
--- a/dlls/wined3d/device.c
+++ b/dlls/wined3d/device.c
@@ -3835,13 +3835,14 @@ struct wined3d_texture * CDECL wined3d_device_get_texture(const struct wined3d_d
void CDECL wined3d_device_apply_stateblock(struct wined3d_device *device,
struct wined3d_stateblock *stateblock)
{
- const struct wined3d_d3d_info *d3d_info = &stateblock->device->adapter->d3d_info;
const struct wined3d_stateblock_state *state = &stateblock->stateblock_state;
const struct wined3d_saved_states *changed = &stateblock->changed;
struct wined3d_blend_state *blend_state;
struct wined3d_color colour;
- unsigned int i, j, count;
+ struct wined3d_range range;
+ unsigned int i, j, start;
BOOL set_blend_state;
+ DWORD map;
TRACE("device %p, stateblock %p.\n", device, stateblock);
@@ -3850,89 +3851,57 @@ void CDECL wined3d_device_apply_stateblock(struct wined3d_device *device,
if (changed->pixelShader)
wined3d_device_set_pixel_shader(device, state->ps);
- count = 0;
- for (i = 0; i < d3d_info->limits.vs_uniform_count; ++i)
+ for (start = 0; ; start = range.offset + range.size)
{
- if (wined3d_bitmap_is_set(changed->vs_consts_f, i))
- ++count;
- else if (count)
- {
- wined3d_device_set_vs_consts_f(device, i - count, count, state->vs_consts_f + i - count);
- count = 0;
- }
+ if (!wined3d_bitmap_get_range(changed->vs_consts_f, WINED3D_MAX_VS_CONSTS_F, start, &range))
+ break;
+
+ wined3d_device_set_vs_consts_f(device, range.offset, range.size, &state->vs_consts_f[range.offset]);
}
- if (count)
- wined3d_device_set_vs_consts_f(device, i - count, count, state->vs_consts_f + i - count);
- count = 0;
- for (i = 0; i < WINED3D_MAX_CONSTS_B; ++i)
+ map = changed->vertexShaderConstantsI;
+ for (start = 0; ; start = range.offset + range.size)
{
- if (changed->vertexShaderConstantsB & (1u << i))
- ++count;
- else if (count)
- {
- wined3d_device_set_vs_consts_b(device, i - count, count, state->vs_consts_b + i - count);
- count = 0;
- }
+ if (!wined3d_bitmap_get_range(&map, WINED3D_MAX_CONSTS_I, start, &range))
+ break;
+
+ wined3d_device_set_vs_consts_i(device, range.offset, range.size, &state->vs_consts_i[range.offset]);
}
- if (count)
- wined3d_device_set_vs_consts_b(device, i - count, count, state->vs_consts_b + i - count);
- count = 0;
- for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i)
+ map = changed->vertexShaderConstantsB;
+ for (start = 0; ; start = range.offset + range.size)
{
- if (changed->vertexShaderConstantsI & (1u << i))
- ++count;
- else if (count)
- {
- wined3d_device_set_vs_consts_i(device, i - count, count, state->vs_consts_i + i - count);
- count = 0;
- }
+ if (!wined3d_bitmap_get_range(&map, WINED3D_MAX_CONSTS_B, start, &range))
+ break;
+
+ wined3d_device_set_vs_consts_b(device, range.offset, range.size, &state->vs_consts_b[range.offset]);
}
- if (count)
- wined3d_device_set_vs_consts_i(device, i - count, count, state->vs_consts_i + i - count);
- count = 0;
- for (i = 0; i < d3d_info->limits.ps_uniform_count; ++i)
+ for (start = 0; ; start = range.offset + range.size)
{
- if (wined3d_bitmap_is_set(changed->ps_consts_f, i))
- ++count;
- else if (count)
- {
- wined3d_device_set_ps_consts_f(device, i - count, count, state->ps_consts_f + i - count);
- count = 0;
- }
+ if (!wined3d_bitmap_get_range(changed->ps_consts_f, WINED3D_MAX_PS_CONSTS_F, start, &range))
+ break;
+
+ wined3d_device_set_ps_consts_f(device, range.offset, range.size, &state->ps_consts_f[range.offset]);
}
- if (count)
- wined3d_device_set_ps_consts_f(device, i - count, count, state->ps_consts_f + i - count);
- count = 0;
- for (i = 0; i < WINED3D_MAX_CONSTS_B; ++i)
+ map = changed->pixelShaderConstantsI;
+ for (start = 0; ; start = range.offset + range.size)
{
- if (changed->pixelShaderConstantsB & (1u << i))
- ++count;
- else if (count)
- {
- wined3d_device_set_ps_consts_b(device, i - count, count, state->ps_consts_b + i - count);
- count = 0;
- }
+ if (!wined3d_bitmap_get_range(&map, WINED3D_MAX_CONSTS_I, start, &range))
+ break;
+
+ wined3d_device_set_ps_consts_i(device, range.offset, range.size, &state->ps_consts_i[range.offset]);
}
- if (count)
- wined3d_device_set_ps_consts_b(device, i - count, count, state->ps_consts_b + i - count);
- count = 0;
- for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i)
+ map = changed->pixelShaderConstantsB;
+ for (start = 0; ; start = range.offset + range.size)
{
- if (changed->pixelShaderConstantsI & (1u << i))
- ++count;
- else if (count)
- {
- wined3d_device_set_ps_consts_i(device, i - count, count, state->ps_consts_i + i - count);
- count = 0;
- }
+ if (!wined3d_bitmap_get_range(&map, WINED3D_MAX_CONSTS_B, start, &range))
+ break;
+
+ wined3d_device_set_ps_consts_b(device, range.offset, range.size, &state->ps_consts_b[range.offset]);
}
- if (count)
- wined3d_device_set_ps_consts_i(device, i - count, count, state->ps_consts_i + i - count);
for (i = 0; i < ARRAY_SIZE(state->light_state->light_map); ++i)
{
diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h
index b6042cd6179..15e376c8328 100644
--- a/dlls/wined3d/wined3d_private.h
+++ b/dlls/wined3d/wined3d_private.h
@@ -3196,13 +3196,13 @@ struct wined3d_state
struct wined3d_shader_resource_view *shader_resource_view[WINED3D_SHADER_TYPE_COUNT][MAX_SHADER_RESOURCE_VIEWS];
struct wined3d_unordered_access_view *unordered_access_view[WINED3D_PIPELINE_COUNT][MAX_UNORDERED_ACCESS_VIEWS];
- BOOL vs_consts_b[WINED3D_MAX_CONSTS_B];
- struct wined3d_ivec4 vs_consts_i[WINED3D_MAX_CONSTS_I];
struct wined3d_vec4 vs_consts_f[WINED3D_MAX_VS_CONSTS_F];
+ struct wined3d_ivec4 vs_consts_i[WINED3D_MAX_CONSTS_I];
+ BOOL vs_consts_b[WINED3D_MAX_CONSTS_B];
- BOOL ps_consts_b[WINED3D_MAX_CONSTS_B];
- struct wined3d_ivec4 ps_consts_i[WINED3D_MAX_CONSTS_I];
struct wined3d_vec4 ps_consts_f[WINED3D_MAX_PS_CONSTS_F];
+ struct wined3d_ivec4 ps_consts_i[WINED3D_MAX_CONSTS_I];
+ BOOL ps_consts_b[WINED3D_MAX_CONSTS_B];
struct wined3d_texture *textures[WINED3D_MAX_COMBINED_SAMPLERS];
DWORD sampler_states[WINED3D_MAX_COMBINED_SAMPLERS][WINED3D_HIGHEST_SAMPLER_STATE + 1];
@@ -3914,6 +3914,12 @@ struct wined3d_vertex_declaration
struct wined3d_saved_states
{
+ DWORD vs_consts_f[WINED3D_MAX_VS_CONSTS_F >> 5];
+ WORD vertexShaderConstantsI; /* WINED3D_MAX_CONSTS_I, 16 */
+ WORD vertexShaderConstantsB; /* WINED3D_MAX_CONSTS_B, 16 */
+ DWORD ps_consts_f[WINED3D_MAX_PS_CONSTS_F >> 5];
+ WORD pixelShaderConstantsI; /* WINED3D_MAX_CONSTS_I, 16 */
+ WORD pixelShaderConstantsB; /* WINED3D_MAX_CONSTS_B, 16 */
DWORD transform[(WINED3D_HIGHEST_TRANSFORM_STATE >> 5) + 1];
WORD streamSource; /* WINED3D_MAX_STREAMS, 16 */
WORD streamFreq; /* WINED3D_MAX_STREAMS, 16 */
@@ -3921,12 +3927,6 @@ struct wined3d_saved_states
DWORD textureState[WINED3D_MAX_TEXTURES]; /* WINED3D_HIGHEST_TEXTURE_STATE + 1, 18 */
WORD samplerState[WINED3D_MAX_COMBINED_SAMPLERS]; /* WINED3D_HIGHEST_SAMPLER_STATE + 1, 14 */
DWORD clipplane; /* WINED3D_MAX_USER_CLIP_PLANES, 32 */
- WORD pixelShaderConstantsB; /* WINED3D_MAX_CONSTS_B, 16 */
- WORD pixelShaderConstantsI; /* WINED3D_MAX_CONSTS_I, 16 */
- DWORD ps_consts_f[WINED3D_MAX_PS_CONSTS_F >> 5];
- WORD vertexShaderConstantsB; /* WINED3D_MAX_CONSTS_B, 16 */
- WORD vertexShaderConstantsI; /* WINED3D_MAX_CONSTS_I, 16 */
- DWORD vs_consts_f[WINED3D_MAX_VS_CONSTS_F >> 5];
DWORD textures : 20; /* WINED3D_MAX_COMBINED_SAMPLERS, 20 */
DWORD indices : 1;
DWORD material : 1;
@@ -5325,6 +5325,60 @@ static inline BOOL wined3d_bitmap_is_set(const uint32_t *map, unsigned int idx)
return map[idx >> 5] & (1u << (idx & 0x1f));
}
+static inline unsigned int wined3d_bitmap_ffs_xor(const uint32_t *bitmap, unsigned int bit_count,
+ unsigned int start, uint32_t xor_mask)
+{
+ const unsigned int word_bit_count = sizeof(*bitmap) * CHAR_BIT;
+ const uint32_t *ptr, *end_ptr;
+ uint32_t map, mask;
+
+ assert(bit_count < word_bit_count || !(bit_count % word_bit_count));
+
+ ptr = bitmap + start / word_bit_count;
+ end_ptr = bitmap + (bit_count + word_bit_count - 1) / word_bit_count;
+
+ if (ptr >= end_ptr)
+ return ~0u;
+
+ mask = ~0u << start % word_bit_count;
+ map = (*ptr ^ xor_mask) & mask;
+ while (!map)
+ {
+ if (++ptr == end_ptr)
+ return ~0u;
+ map = *ptr ^ xor_mask;
+ }
+ return (ptr - bitmap) * word_bit_count + wined3d_bit_scan(&map);
+}
+
+static inline unsigned int wined3d_bitmap_ffs(const uint32_t *bitmap, unsigned int bit_count, unsigned int start)
+{
+ return wined3d_bitmap_ffs_xor(bitmap, bit_count, start, 0);
+}
+
+static inline unsigned int wined3d_bitmap_ffz(const uint32_t *bitmap, unsigned int bit_count, unsigned int start)
+{
+ return wined3d_bitmap_ffs_xor(bitmap, bit_count, start, ~0u);
+}
+
+static inline BOOL wined3d_bitmap_get_range(const DWORD *bitmap, unsigned int bit_count,
+ unsigned int start, struct wined3d_range *range)
+{
+ unsigned int range_start, range_end;
+
+ range_start = wined3d_bitmap_ffs(bitmap, bit_count, start);
+ if (range_start == ~0u)
+ return FALSE;
+
+ range_end = wined3d_bitmap_ffz(bitmap, bit_count, range_start + 1);
+ if (range_end == ~0u)
+ range_end = bit_count;
+
+ range->offset = range_start;
+ range->size = range_end - range_start;
+ return TRUE;
+}
+
/* The WNDCLASS-Name for the fake window which we use to retrieve the GL capabilities */
#define WINED3D_OPENGL_WINDOW_CLASS_NAME "WineD3D_OpenGL"
--
2.24.1
More information about the wine-devel
mailing list