[PATCH 3/5] wined3d: Optimize scanning changed shader constants in wined3d_device_apply_stateblock().

Matteo Bruni mbruni at codeweavers.com
Mon Feb 10 13:35:51 CST 2020


Signed-off-by: Matteo Bruni <mbruni at codeweavers.com>
---
It turns out 7654d58b047be3a54d814d890bf1a26374559c83 caused a pretty
sizeable performance regression (which gets a lot more significant
once wined3d performance gets better - I have patches in that
area). Let's start fixing this.

 dlls/wined3d/device.c | 151 ++++++++++++++++++++----------------------
 1 file changed, 72 insertions(+), 79 deletions(-)

diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c
index 2297d7de916..87684dc1c46 100644
--- a/dlls/wined3d/device.c
+++ b/dlls/wined3d/device.c
@@ -3830,12 +3830,67 @@ struct wined3d_texture * CDECL wined3d_device_get_texture(const struct wined3d_d
     return device->state.textures[stage];
 }
 
+typedef HRESULT (CDECL *wined3d_device_shader_constant_setter)(struct wined3d_device *device,
+        unsigned int start_idx, unsigned int count, const void *constants);
+
+static void device_apply_shader_constants(struct wined3d_device *device,
+        const struct wined3d_stateblock_state *state,
+        DWORD *bitmap, unsigned int bit_count, const void *data, unsigned int stride,
+        wined3d_device_shader_constant_setter shader_constant_setter)
+{
+    const unsigned int word_bit_count = sizeof(DWORD) * CHAR_BIT;
+    unsigned int i, j, idx, start, last;
+    const BYTE *byte_data = data;
+    DWORD map;
+
+    start = last = ~0u;
+    for (i = 0; i < (bit_count + word_bit_count - 1) / word_bit_count; ++i)
+    {
+        map = bitmap[i];
+
+        if (map == ~0u)
+        {
+            if (last != ~0u && last != i * word_bit_count - 1)
+            {
+                shader_constant_setter(device, start, last - start + 1, &byte_data[start * stride]);
+                start = i * word_bit_count;
+            }
+            if (start == ~0u)
+                start = i * word_bit_count;
+            last = i * word_bit_count + word_bit_count - 1;
+            continue;
+        }
+        while (map)
+        {
+            j = wined3d_bit_scan(&map);
+            idx = i * word_bit_count + j;
+
+            if (start == ~0u)
+            {
+                start = last = idx;
+            }
+            else if (last != idx - 1)
+            {
+                shader_constant_setter(device, start, last - start + 1, &byte_data[start * stride]);
+                start = last = idx;
+            }
+            else
+            {
+                last = idx;
+            }
+        }
+    }
+    if (start != ~0u)
+        shader_constant_setter(device, start, last - start + 1, &byte_data[start * stride]);
+}
+
 void CDECL wined3d_device_apply_stateblock(struct wined3d_device *device,
         struct wined3d_stateblock *stateblock)
 {
     const struct wined3d_d3d_info *d3d_info = &stateblock->device->adapter->d3d_info;
     const struct wined3d_stateblock_state *state = &stateblock->stateblock_state;
-    unsigned int i, j, count;
+    unsigned int i, j;
+    DWORD map;
 
     TRACE("device %p, stateblock %p.\n", device, stateblock);
 
@@ -3844,89 +3899,27 @@ void CDECL wined3d_device_apply_stateblock(struct wined3d_device *device,
     if (stateblock->changed.pixelShader)
         wined3d_device_set_pixel_shader(device, state->ps);
 
-    count = 0;
-    for (i = 0; i < d3d_info->limits.vs_uniform_count; ++i)
-    {
-        if (stateblock->changed.vs_consts_f[i >> 5] & (1u << (i & 0x1f)))
-            ++count;
-        else if (count)
-        {
-            wined3d_device_set_vs_consts_f(device, i - count, count, state->vs_consts_f + i - count);
-            count = 0;
-        }
-    }
-    if (count)
-        wined3d_device_set_vs_consts_f(device, i - count, count, state->vs_consts_f + i - count);
+    device_apply_shader_constants(device, state, stateblock->changed.vs_consts_f, d3d_info->limits.vs_uniform_count,
+            state->vs_consts_f, sizeof(*state->vs_consts_f), (wined3d_device_shader_constant_setter)wined3d_device_set_vs_consts_f);
 
-    count = 0;
-    for (i = 0; i < WINED3D_MAX_CONSTS_B; ++i)
-    {
-        if (stateblock->changed.vertexShaderConstantsB & (1u << i))
-            ++count;
-        else if (count)
-        {
-            wined3d_device_set_vs_consts_b(device, i - count, count, state->vs_consts_b + i - count);
-            count = 0;
-        }
-    }
-    if (count)
-        wined3d_device_set_vs_consts_b(device, i - count, count, state->vs_consts_b + i - count);
+    map = stateblock->changed.vertexShaderConstantsB;
+    device_apply_shader_constants(device, state, &map, WINED3D_MAX_CONSTS_B, state->vs_consts_b, sizeof(*state->vs_consts_b),
+            (wined3d_device_shader_constant_setter)wined3d_device_set_vs_consts_b);
 
-    count = 0;
-    for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i)
-    {
-        if (stateblock->changed.vertexShaderConstantsI & (1u << i))
-            ++count;
-        else if (count)
-        {
-            wined3d_device_set_vs_consts_i(device, i - count, count, state->vs_consts_i + i - count);
-            count = 0;
-        }
-    }
-    if (count)
-        wined3d_device_set_vs_consts_i(device, i - count, count, state->vs_consts_i + i - count);
+    map = stateblock->changed.vertexShaderConstantsI;
+    device_apply_shader_constants(device, state, &map, WINED3D_MAX_CONSTS_I, state->vs_consts_i, sizeof(*state->vs_consts_i),
+            (wined3d_device_shader_constant_setter)wined3d_device_set_vs_consts_i);
 
-    count = 0;
-    for (i = 0; i < d3d_info->limits.ps_uniform_count; ++i)
-    {
-        if (stateblock->changed.ps_consts_f[i >> 5] & (1u << (i & 0x1f)))
-            ++count;
-        else if (count)
-        {
-            wined3d_device_set_ps_consts_f(device, i - count, count, state->ps_consts_f + i - count);
-            count = 0;
-        }
-    }
-    if (count)
-        wined3d_device_set_ps_consts_f(device, i - count, count, state->ps_consts_f + i - count);
+    device_apply_shader_constants(device, state, stateblock->changed.ps_consts_f, d3d_info->limits.ps_uniform_count,
+            state->ps_consts_f, sizeof(*state->ps_consts_f), (wined3d_device_shader_constant_setter)wined3d_device_set_ps_consts_f);
 
-    count = 0;
-    for (i = 0; i < WINED3D_MAX_CONSTS_B; ++i)
-    {
-        if (stateblock->changed.pixelShaderConstantsB & (1u << i))
-            ++count;
-        else if (count)
-        {
-            wined3d_device_set_ps_consts_b(device, i - count, count, state->ps_consts_b + i - count);
-            count = 0;
-        }
-    }
-    if (count)
-        wined3d_device_set_ps_consts_b(device, i - count, count, state->ps_consts_b + i - count);
+    map = stateblock->changed.pixelShaderConstantsB;
+    device_apply_shader_constants(device, state, &map, WINED3D_MAX_CONSTS_B, state->ps_consts_b, sizeof(*state->ps_consts_b),
+            (wined3d_device_shader_constant_setter)wined3d_device_set_ps_consts_b);
 
-    count = 0;
-    for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i)
-    {
-        if (stateblock->changed.pixelShaderConstantsI & (1u << i))
-            ++count;
-        else if (count)
-        {
-            wined3d_device_set_ps_consts_i(device, i - count, count, state->ps_consts_i + i - count);
-            count = 0;
-        }
-    }
-    if (count)
-        wined3d_device_set_ps_consts_i(device, i - count, count, state->ps_consts_i + i - count);
+    map = stateblock->changed.pixelShaderConstantsI;
+    device_apply_shader_constants(device, state, &map, WINED3D_MAX_CONSTS_I, state->ps_consts_i, sizeof(*state->ps_consts_i),
+            (wined3d_device_shader_constant_setter)wined3d_device_set_ps_consts_i);
 
     for (i = 0; i < ARRAY_SIZE(state->light_state->light_map); ++i)
     {
-- 
2.24.1




More information about the wine-devel mailing list