[PATCH 3/5] wined3d: Optimize scanning changed shader constants in wined3d_device_apply_stateblock().

Matteo Bruni mbruni at codeweavers.com
Thu Feb 27 06:31:11 CST 2020


Signed-off-by: Matteo Bruni <mbruni at codeweavers.com>
---
v2: Introduce wined3d_bitmap_ffs() and wined3d_bitmap_ffz(), implement
wined3d_apply_shader_constants() on top of them. Tweak a few
additional details while at it, mostly to reduce the amount of changes
necessary when using the same function for
wined3d_stateblock_capture() and wined3d_stateblock_apply() in
followup patches.
v3: Introduce wined3d_bitmap_get_range(), get rid of
wined3d_apply_shader_constants(), simplify wined3d_bitmap_ffs(), make
both wined3d_bitmap_ffs() and wined3d_bitmap_ffz() use the same common
code. (Thanks Henri!)

 dlls/wined3d/device.c          | 105 ++++++++++++---------------------
 dlls/wined3d/wined3d_private.h |  74 +++++++++++++++++++----
 2 files changed, 101 insertions(+), 78 deletions(-)

diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c
index 3cf621ff46e..fa675f57290 100644
--- a/dlls/wined3d/device.c
+++ b/dlls/wined3d/device.c
@@ -3835,13 +3835,14 @@ struct wined3d_texture * CDECL wined3d_device_get_texture(const struct wined3d_d
 void CDECL wined3d_device_apply_stateblock(struct wined3d_device *device,
         struct wined3d_stateblock *stateblock)
 {
-    const struct wined3d_d3d_info *d3d_info = &stateblock->device->adapter->d3d_info;
     const struct wined3d_stateblock_state *state = &stateblock->stateblock_state;
     const struct wined3d_saved_states *changed = &stateblock->changed;
     struct wined3d_blend_state *blend_state;
     struct wined3d_color colour;
-    unsigned int i, j, count;
+    struct wined3d_range range;
+    unsigned int i, j, start;
     BOOL set_blend_state;
+    DWORD map;
 
     TRACE("device %p, stateblock %p.\n", device, stateblock);
 
@@ -3850,89 +3851,57 @@ void CDECL wined3d_device_apply_stateblock(struct wined3d_device *device,
     if (changed->pixelShader)
         wined3d_device_set_pixel_shader(device, state->ps);
 
-    count = 0;
-    for (i = 0; i < d3d_info->limits.vs_uniform_count; ++i)
+    for (start = 0; ; start = range.offset + range.size)
     {
-        if (wined3d_bitmap_is_set(changed->vs_consts_f, i))
-            ++count;
-        else if (count)
-        {
-            wined3d_device_set_vs_consts_f(device, i - count, count, state->vs_consts_f + i - count);
-            count = 0;
-        }
+        if (!wined3d_bitmap_get_range(changed->vs_consts_f, WINED3D_MAX_VS_CONSTS_F, start, &range))
+            break;
+
+        wined3d_device_set_vs_consts_f(device, range.offset, range.size, &state->vs_consts_f[range.offset]);
     }
-    if (count)
-        wined3d_device_set_vs_consts_f(device, i - count, count, state->vs_consts_f + i - count);
 
-    count = 0;
-    for (i = 0; i < WINED3D_MAX_CONSTS_B; ++i)
+    map = changed->vertexShaderConstantsI;
+    for (start = 0; ; start = range.offset + range.size)
     {
-        if (changed->vertexShaderConstantsB & (1u << i))
-            ++count;
-        else if (count)
-        {
-            wined3d_device_set_vs_consts_b(device, i - count, count, state->vs_consts_b + i - count);
-            count = 0;
-        }
+        if (!wined3d_bitmap_get_range(&map, WINED3D_MAX_CONSTS_I, start, &range))
+            break;
+
+        wined3d_device_set_vs_consts_i(device, range.offset, range.size, &state->vs_consts_i[range.offset]);
     }
-    if (count)
-        wined3d_device_set_vs_consts_b(device, i - count, count, state->vs_consts_b + i - count);
 
-    count = 0;
-    for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i)
+    map = changed->vertexShaderConstantsB;
+    for (start = 0; ; start = range.offset + range.size)
     {
-        if (changed->vertexShaderConstantsI & (1u << i))
-            ++count;
-        else if (count)
-        {
-            wined3d_device_set_vs_consts_i(device, i - count, count, state->vs_consts_i + i - count);
-            count = 0;
-        }
+        if (!wined3d_bitmap_get_range(&map, WINED3D_MAX_CONSTS_B, start, &range))
+            break;
+
+        wined3d_device_set_vs_consts_b(device, range.offset, range.size, &state->vs_consts_b[range.offset]);
     }
-    if (count)
-        wined3d_device_set_vs_consts_i(device, i - count, count, state->vs_consts_i + i - count);
 
-    count = 0;
-    for (i = 0; i < d3d_info->limits.ps_uniform_count; ++i)
+    for (start = 0; ; start = range.offset + range.size)
     {
-        if (wined3d_bitmap_is_set(changed->ps_consts_f, i))
-            ++count;
-        else if (count)
-        {
-            wined3d_device_set_ps_consts_f(device, i - count, count, state->ps_consts_f + i - count);
-            count = 0;
-        }
+        if (!wined3d_bitmap_get_range(changed->ps_consts_f, WINED3D_MAX_PS_CONSTS_F, start, &range))
+            break;
+
+        wined3d_device_set_ps_consts_f(device, range.offset, range.size, &state->ps_consts_f[range.offset]);
     }
-    if (count)
-        wined3d_device_set_ps_consts_f(device, i - count, count, state->ps_consts_f + i - count);
 
-    count = 0;
-    for (i = 0; i < WINED3D_MAX_CONSTS_B; ++i)
+    map = changed->pixelShaderConstantsI;
+    for (start = 0; ; start = range.offset + range.size)
     {
-        if (changed->pixelShaderConstantsB & (1u << i))
-            ++count;
-        else if (count)
-        {
-            wined3d_device_set_ps_consts_b(device, i - count, count, state->ps_consts_b + i - count);
-            count = 0;
-        }
+        if (!wined3d_bitmap_get_range(&map, WINED3D_MAX_CONSTS_I, start, &range))
+            break;
+
+        wined3d_device_set_ps_consts_i(device, range.offset, range.size, &state->ps_consts_i[range.offset]);
     }
-    if (count)
-        wined3d_device_set_ps_consts_b(device, i - count, count, state->ps_consts_b + i - count);
 
-    count = 0;
-    for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i)
+    map = changed->pixelShaderConstantsB;
+    for (start = 0; ; start = range.offset + range.size)
     {
-        if (changed->pixelShaderConstantsI & (1u << i))
-            ++count;
-        else if (count)
-        {
-            wined3d_device_set_ps_consts_i(device, i - count, count, state->ps_consts_i + i - count);
-            count = 0;
-        }
+        if (!wined3d_bitmap_get_range(&map, WINED3D_MAX_CONSTS_B, start, &range))
+            break;
+
+        wined3d_device_set_ps_consts_b(device, range.offset, range.size, &state->ps_consts_b[range.offset]);
     }
-    if (count)
-        wined3d_device_set_ps_consts_i(device, i - count, count, state->ps_consts_i + i - count);
 
     for (i = 0; i < ARRAY_SIZE(state->light_state->light_map); ++i)
     {
diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h
index b6042cd6179..15e376c8328 100644
--- a/dlls/wined3d/wined3d_private.h
+++ b/dlls/wined3d/wined3d_private.h
@@ -3196,13 +3196,13 @@ struct wined3d_state
     struct wined3d_shader_resource_view *shader_resource_view[WINED3D_SHADER_TYPE_COUNT][MAX_SHADER_RESOURCE_VIEWS];
     struct wined3d_unordered_access_view *unordered_access_view[WINED3D_PIPELINE_COUNT][MAX_UNORDERED_ACCESS_VIEWS];
 
-    BOOL vs_consts_b[WINED3D_MAX_CONSTS_B];
-    struct wined3d_ivec4 vs_consts_i[WINED3D_MAX_CONSTS_I];
     struct wined3d_vec4 vs_consts_f[WINED3D_MAX_VS_CONSTS_F];
+    struct wined3d_ivec4 vs_consts_i[WINED3D_MAX_CONSTS_I];
+    BOOL vs_consts_b[WINED3D_MAX_CONSTS_B];
 
-    BOOL ps_consts_b[WINED3D_MAX_CONSTS_B];
-    struct wined3d_ivec4 ps_consts_i[WINED3D_MAX_CONSTS_I];
     struct wined3d_vec4 ps_consts_f[WINED3D_MAX_PS_CONSTS_F];
+    struct wined3d_ivec4 ps_consts_i[WINED3D_MAX_CONSTS_I];
+    BOOL ps_consts_b[WINED3D_MAX_CONSTS_B];
 
     struct wined3d_texture *textures[WINED3D_MAX_COMBINED_SAMPLERS];
     DWORD sampler_states[WINED3D_MAX_COMBINED_SAMPLERS][WINED3D_HIGHEST_SAMPLER_STATE + 1];
@@ -3914,6 +3914,12 @@ struct wined3d_vertex_declaration
 
 struct wined3d_saved_states
 {
+    DWORD vs_consts_f[WINED3D_MAX_VS_CONSTS_F >> 5];
+    WORD vertexShaderConstantsI;                /* WINED3D_MAX_CONSTS_I, 16 */
+    WORD vertexShaderConstantsB;                /* WINED3D_MAX_CONSTS_B, 16 */
+    DWORD ps_consts_f[WINED3D_MAX_PS_CONSTS_F >> 5];
+    WORD pixelShaderConstantsI;                 /* WINED3D_MAX_CONSTS_I, 16 */
+    WORD pixelShaderConstantsB;                 /* WINED3D_MAX_CONSTS_B, 16 */
     DWORD transform[(WINED3D_HIGHEST_TRANSFORM_STATE >> 5) + 1];
     WORD streamSource;                          /* WINED3D_MAX_STREAMS, 16 */
     WORD streamFreq;                            /* WINED3D_MAX_STREAMS, 16 */
@@ -3921,12 +3927,6 @@ struct wined3d_saved_states
     DWORD textureState[WINED3D_MAX_TEXTURES];   /* WINED3D_HIGHEST_TEXTURE_STATE + 1, 18 */
     WORD samplerState[WINED3D_MAX_COMBINED_SAMPLERS];   /* WINED3D_HIGHEST_SAMPLER_STATE + 1, 14 */
     DWORD clipplane;                            /* WINED3D_MAX_USER_CLIP_PLANES, 32 */
-    WORD pixelShaderConstantsB;                 /* WINED3D_MAX_CONSTS_B, 16 */
-    WORD pixelShaderConstantsI;                 /* WINED3D_MAX_CONSTS_I, 16 */
-    DWORD ps_consts_f[WINED3D_MAX_PS_CONSTS_F >> 5];
-    WORD vertexShaderConstantsB;                /* WINED3D_MAX_CONSTS_B, 16 */
-    WORD vertexShaderConstantsI;                /* WINED3D_MAX_CONSTS_I, 16 */
-    DWORD vs_consts_f[WINED3D_MAX_VS_CONSTS_F >> 5];
     DWORD textures : 20;                        /* WINED3D_MAX_COMBINED_SAMPLERS, 20 */
     DWORD indices : 1;
     DWORD material : 1;
@@ -5325,6 +5325,60 @@ static inline BOOL wined3d_bitmap_is_set(const uint32_t *map, unsigned int idx)
     return map[idx >> 5] & (1u << (idx & 0x1f));
 }
 
+static inline unsigned int wined3d_bitmap_ffs_xor(const uint32_t *bitmap, unsigned int bit_count,
+        unsigned int start, uint32_t xor_mask)
+{
+    const unsigned int word_bit_count = sizeof(*bitmap) * CHAR_BIT;
+    const uint32_t *ptr, *end_ptr;
+    uint32_t map, mask;
+
+    assert(bit_count < word_bit_count || !(bit_count % word_bit_count));
+
+    ptr = bitmap + start / word_bit_count;
+    end_ptr = bitmap + (bit_count + word_bit_count - 1) / word_bit_count;
+
+    if (ptr >= end_ptr)
+        return ~0u;
+
+    mask = ~0u << start % word_bit_count;
+    map = (*ptr ^ xor_mask) & mask;
+    while (!map)
+    {
+        if (++ptr == end_ptr)
+            return ~0u;
+        map = *ptr ^ xor_mask;
+    }
+    return (ptr - bitmap) * word_bit_count + wined3d_bit_scan(&map);
+}
+
+static inline unsigned int wined3d_bitmap_ffs(const uint32_t *bitmap, unsigned int bit_count, unsigned int start)
+{
+    return wined3d_bitmap_ffs_xor(bitmap, bit_count, start, 0);
+}
+
+static inline unsigned int wined3d_bitmap_ffz(const uint32_t *bitmap, unsigned int bit_count, unsigned int start)
+{
+    return wined3d_bitmap_ffs_xor(bitmap, bit_count, start, ~0u);
+}
+
+static inline BOOL wined3d_bitmap_get_range(const DWORD *bitmap, unsigned int bit_count,
+        unsigned int start, struct wined3d_range *range)
+{
+    unsigned int range_start, range_end;
+
+    range_start = wined3d_bitmap_ffs(bitmap, bit_count, start);
+    if (range_start == ~0u)
+        return FALSE;
+
+    range_end = wined3d_bitmap_ffz(bitmap, bit_count, range_start + 1);
+    if (range_end == ~0u)
+        range_end = bit_count;
+
+    range->offset = range_start;
+    range->size = range_end - range_start;
+    return TRUE;
+}
+
 /* The WNDCLASS-Name for the fake window which we use to retrieve the GL capabilities */
 #define WINED3D_OPENGL_WINDOW_CLASS_NAME "WineD3D_OpenGL"
 
-- 
2.24.1




More information about the wine-devel mailing list