[PATCH 5/5] wined3d: Implement UAV clears on the Vulkan backend.

Jan Sikorski jsikorski at codeweavers.com
Tue Aug 10 03:34:30 CDT 2021


Based on the vkd3d implementation.

Signed-off-by: Jan Sikorski <jsikorski at codeweavers.com>
---
 dlls/wined3d/adapter_vk.c            |   2 +
 dlls/wined3d/uav_clear_shaders.inc.c | 365 +++++++++++++++++++++++++++
 dlls/wined3d/view.c                  | 217 +++++++++++++---
 dlls/wined3d/wined3d_private.h       |  22 ++
 4 files changed, 571 insertions(+), 35 deletions(-)
 create mode 100644 dlls/wined3d/uav_clear_shaders.inc.c

diff --git a/dlls/wined3d/adapter_vk.c b/dlls/wined3d/adapter_vk.c
index c7da02865ea..7d96fd59eab 100644
--- a/dlls/wined3d/adapter_vk.c
+++ b/dlls/wined3d/adapter_vk.c
@@ -719,6 +719,7 @@ static HRESULT adapter_vk_init_3d(struct wined3d_device *device)
     wined3d_device_create_default_samplers(device, &context_vk->c);
     wined3d_device_vk_create_null_resources(device_vk, context_vk);
     wined3d_device_vk_create_null_views(device_vk, context_vk);
+    wined3d_device_vk_uav_clear_state_init(device_vk);
 
     return WINED3D_OK;
 }
@@ -740,6 +741,7 @@ static void adapter_vk_uninit_3d_cs(void *object)
         device->shader_backend->shader_destroy(shader);
     }
 
+    wined3d_device_vk_uav_clear_state_cleanup(device_vk);
     device->blitter->ops->blitter_destroy(device->blitter, NULL);
     device->shader_backend->shader_free_private(device, &context_vk->c);
     wined3d_device_vk_destroy_null_views(device_vk, context_vk);
diff --git a/dlls/wined3d/uav_clear_shaders.inc.c b/dlls/wined3d/uav_clear_shaders.inc.c
new file mode 100644
index 00000000000..6cb3c808578
--- /dev/null
+++ b/dlls/wined3d/uav_clear_shaders.inc.c
@@ -0,0 +1,365 @@
+static const uint32_t cs_uav_clear_buffer_float_code[] =
+{
+#if 0
+    RWBuffer<float4> dst;
+
+    struct
+    {
+        float4 clear_value;
+        int2 dst_offset;
+        int2 dst_extent;
+    } u_info;
+
+    [numthreads(128, 1, 1)]
+    void main(int3 thread_id : SV_DispatchThreadID)
+    {
+        if (thread_id.x < u_info.dst_extent.x)
+            dst[u_info.dst_offset.x + thread_id.x] = u_info.clear_value;
+    }
+#endif
+    0x43425844, 0xe114ba61, 0xff6a0d0b, 0x7b25c8f4, 0xfcf7cf22, 0x00000001, 0x0000010c, 0x00000003,
+    0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f,
+    0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000b8, 0x00050050, 0x0000002e, 0x0100086a,
+    0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400089c, 0x0011e000, 0x00000000, 0x00005555,
+    0x0200005f, 0x00020012, 0x02000068, 0x00000001, 0x0400009b, 0x00000080, 0x00000001, 0x00000001,
+    0x07000022, 0x00100012, 0x00000000, 0x0002000a, 0x0020802a, 0x00000000, 0x00000001, 0x0304001f,
+    0x0010000a, 0x00000000, 0x0700001e, 0x00100012, 0x00000000, 0x0002000a, 0x0020800a, 0x00000000,
+    0x00000001, 0x080000a4, 0x0011e0f2, 0x00000000, 0x00100006, 0x00000000, 0x00208e46, 0x00000000,
+    0x00000000, 0x01000015, 0x0100003e,
+};
+
+static const uint32_t cs_uav_clear_buffer_uint_code[] =
+{
+#if 0
+    RWBuffer<uint4> dst;
+
+    struct
+    {
+        uint4 clear_value;
+        int2 dst_offset;
+        int2 dst_extent;
+    } u_info;
+
+    [numthreads(128, 1, 1)]
+    void main(int3 thread_id : SV_DispatchThreadID)
+    {
+        if (thread_id.x < u_info.dst_extent.x)
+            dst[u_info.dst_offset.x + thread_id.x] = u_info.clear_value;
+    }
+#endif
+    0x43425844, 0x3afd0cfd, 0x5145c166, 0x5b9f76b8, 0xa73775cd, 0x00000001, 0x0000010c, 0x00000003,
+    0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f,
+    0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000b8, 0x00050050, 0x0000002e, 0x0100086a,
+    0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400089c, 0x0011e000, 0x00000000, 0x00004444,
+    0x0200005f, 0x00020012, 0x02000068, 0x00000001, 0x0400009b, 0x00000080, 0x00000001, 0x00000001,
+    0x07000022, 0x00100012, 0x00000000, 0x0002000a, 0x0020802a, 0x00000000, 0x00000001, 0x0304001f,
+    0x0010000a, 0x00000000, 0x0700001e, 0x00100012, 0x00000000, 0x0002000a, 0x0020800a, 0x00000000,
+    0x00000001, 0x080000a4, 0x0011e0f2, 0x00000000, 0x00100006, 0x00000000, 0x00208e46, 0x00000000,
+    0x00000000, 0x01000015, 0x0100003e,
+};
+
+static const uint32_t cs_uav_clear_1d_array_float_code[] =
+{
+#if 0
+    RWTexture1DArray<float4> dst;
+
+    struct
+    {
+        float4 clear_value;
+        int2 dst_offset;
+        int2 dst_extent;
+    } u_info;
+
+    [numthreads(64, 1, 1)]
+    void main(int3 thread_id : SV_DispatchThreadID)
+    {
+        if (thread_id.x < u_info.dst_extent.x)
+            dst[int2(u_info.dst_offset.x + thread_id.x, thread_id.y)] = u_info.clear_value;
+    }
+#endif
+    0x43425844, 0x3d73bc2d, 0x2b635f3d, 0x6bf98e92, 0xbe0aa5d9, 0x00000001, 0x0000011c, 0x00000003,
+    0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f,
+    0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000c8, 0x00050050, 0x00000032, 0x0100086a,
+    0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400389c, 0x0011e000, 0x00000000, 0x00005555,
+    0x0200005f, 0x00020032, 0x02000068, 0x00000001, 0x0400009b, 0x00000040, 0x00000001, 0x00000001,
+    0x07000022, 0x00100012, 0x00000000, 0x0002000a, 0x0020802a, 0x00000000, 0x00000001, 0x0304001f,
+    0x0010000a, 0x00000000, 0x0700001e, 0x00100012, 0x00000000, 0x0002000a, 0x0020800a, 0x00000000,
+    0x00000001, 0x04000036, 0x001000e2, 0x00000000, 0x00020556, 0x080000a4, 0x0011e0f2, 0x00000000,
+    0x00100e46, 0x00000000, 0x00208e46, 0x00000000, 0x00000000, 0x01000015, 0x0100003e,
+};
+
+static const uint32_t cs_uav_clear_1d_array_uint_code[] =
+{
+#if 0
+    RWTexture1DArray<uint4> dst;
+
+    struct
+    {
+        uint4 clear_value;
+        int2 dst_offset;
+        int2 dst_extent;
+    } u_info;
+
+    [numthreads(64, 1, 1)]
+    void main(int3 thread_id : SV_DispatchThreadID)
+    {
+        if (thread_id.x < u_info.dst_extent.x)
+            dst[int2(u_info.dst_offset.x + thread_id.x, thread_id.y)] = u_info.clear_value;
+    }
+#endif
+    0x43425844, 0x2f0ca457, 0x72068b34, 0xd9dadc2b, 0xd3178c3e, 0x00000001, 0x0000011c, 0x00000003,
+    0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f,
+    0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000c8, 0x00050050, 0x00000032, 0x0100086a,
+    0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400389c, 0x0011e000, 0x00000000, 0x00004444,
+    0x0200005f, 0x00020032, 0x02000068, 0x00000001, 0x0400009b, 0x00000040, 0x00000001, 0x00000001,
+    0x07000022, 0x00100012, 0x00000000, 0x0002000a, 0x0020802a, 0x00000000, 0x00000001, 0x0304001f,
+    0x0010000a, 0x00000000, 0x0700001e, 0x00100012, 0x00000000, 0x0002000a, 0x0020800a, 0x00000000,
+    0x00000001, 0x04000036, 0x001000e2, 0x00000000, 0x00020556, 0x080000a4, 0x0011e0f2, 0x00000000,
+    0x00100e46, 0x00000000, 0x00208e46, 0x00000000, 0x00000000, 0x01000015, 0x0100003e,
+};
+
+static const uint32_t cs_uav_clear_1d_float_code[] =
+{
+#if 0
+    RWTexture1D<float4> dst;
+
+    struct
+    {
+        float4 clear_value;
+        int2 dst_offset;
+        int2 dst_extent;
+    } u_info;
+
+    [numthreads(64, 1, 1)]
+    void main(int3 thread_id : SV_DispatchThreadID)
+    {
+        if (thread_id.x < u_info.dst_extent.x)
+            dst[u_info.dst_offset.x + thread_id.x] = u_info.clear_value;
+    }
+#endif
+    0x43425844, 0x05266503, 0x4b97006f, 0x01a5cc63, 0xe617d0a1, 0x00000001, 0x0000010c, 0x00000003,
+    0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f,
+    0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000b8, 0x00050050, 0x0000002e, 0x0100086a,
+    0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400109c, 0x0011e000, 0x00000000, 0x00005555,
+    0x0200005f, 0x00020012, 0x02000068, 0x00000001, 0x0400009b, 0x00000040, 0x00000001, 0x00000001,
+    0x07000022, 0x00100012, 0x00000000, 0x0002000a, 0x0020802a, 0x00000000, 0x00000001, 0x0304001f,
+    0x0010000a, 0x00000000, 0x0700001e, 0x00100012, 0x00000000, 0x0002000a, 0x0020800a, 0x00000000,
+    0x00000001, 0x080000a4, 0x0011e0f2, 0x00000000, 0x00100006, 0x00000000, 0x00208e46, 0x00000000,
+    0x00000000, 0x01000015, 0x0100003e,
+};
+
+static const uint32_t cs_uav_clear_1d_uint_code[] =
+{
+#if 0
+    RWTexture1D<uint4> dst;
+
+    struct
+    {
+        uint4 clear_value;
+        int2 dst_offset;
+        int2 dst_extent;
+    } u_info;
+
+    [numthreads(64, 1, 1)]
+    void main(int3 thread_id : SV_DispatchThreadID)
+    {
+        if (thread_id.x < u_info.dst_extent.x)
+            dst[u_info.dst_offset.x + thread_id.x] = u_info.clear_value;
+    }
+#endif
+    0x43425844, 0x19d5c8f2, 0x3ca4ac24, 0x9e258499, 0xf0463fd6, 0x00000001, 0x0000010c, 0x00000003,
+    0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f,
+    0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000b8, 0x00050050, 0x0000002e, 0x0100086a,
+    0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400109c, 0x0011e000, 0x00000000, 0x00004444,
+    0x0200005f, 0x00020012, 0x02000068, 0x00000001, 0x0400009b, 0x00000040, 0x00000001, 0x00000001,
+    0x07000022, 0x00100012, 0x00000000, 0x0002000a, 0x0020802a, 0x00000000, 0x00000001, 0x0304001f,
+    0x0010000a, 0x00000000, 0x0700001e, 0x00100012, 0x00000000, 0x0002000a, 0x0020800a, 0x00000000,
+    0x00000001, 0x080000a4, 0x0011e0f2, 0x00000000, 0x00100006, 0x00000000, 0x00208e46, 0x00000000,
+    0x00000000, 0x01000015, 0x0100003e,
+};
+
+static const uint32_t cs_uav_clear_2d_array_float_code[] =
+{
+#if 0
+    RWTexture2DArray<float4> dst;
+
+    struct
+    {
+        float4 clear_value;
+        int2 dst_offset;
+        int2 dst_extent;
+    } u_info;
+
+    [numthreads(8, 8, 1)]
+    void main(int3 thread_id : SV_DispatchThreadID)
+    {
+        if (all(thread_id.xy < u_info.dst_extent.xy))
+            dst[int3(u_info.dst_offset.xy + thread_id.xy, thread_id.z)] = u_info.clear_value;
+    }
+#endif
+    0x43425844, 0x924d2d2c, 0xb9166376, 0x99f83871, 0x8ef65025, 0x00000001, 0x00000138, 0x00000003,
+    0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f,
+    0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000e4, 0x00050050, 0x00000039, 0x0100086a,
+    0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400409c, 0x0011e000, 0x00000000, 0x00005555,
+    0x0200005f, 0x00020072, 0x02000068, 0x00000001, 0x0400009b, 0x00000008, 0x00000008, 0x00000001,
+    0x07000022, 0x00100032, 0x00000000, 0x00020046, 0x00208ae6, 0x00000000, 0x00000001, 0x07000001,
+    0x00100012, 0x00000000, 0x0010001a, 0x00000000, 0x0010000a, 0x00000000, 0x0304001f, 0x0010000a,
+    0x00000000, 0x0700001e, 0x00100032, 0x00000000, 0x00020046, 0x00208046, 0x00000000, 0x00000001,
+    0x04000036, 0x001000c2, 0x00000000, 0x00020aa6, 0x080000a4, 0x0011e0f2, 0x00000000, 0x00100e46,
+    0x00000000, 0x00208e46, 0x00000000, 0x00000000, 0x01000015, 0x0100003e,
+};
+
+static const uint32_t cs_uav_clear_2d_array_uint_code[] =
+{
+#if 0
+    RWTexture2DArray<uint4> dst;
+
+    struct
+    {
+        uint4 clear_value;
+        int2 dst_offset;
+        int2 dst_extent;
+    } u_info;
+
+    [numthreads(8, 8, 1)]
+    void main(int3 thread_id : SV_DispatchThreadID)
+    {
+        if (all(thread_id.xy < u_info.dst_extent.xy))
+            dst[int3(u_info.dst_offset.xy + thread_id.xy, thread_id.z)] = u_info.clear_value;
+    }
+#endif
+    0x43425844, 0xa92219d4, 0xa2c5e47d, 0x0d308500, 0xf32197b4, 0x00000001, 0x00000138, 0x00000003,
+    0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f,
+    0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000e4, 0x00050050, 0x00000039, 0x0100086a,
+    0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400409c, 0x0011e000, 0x00000000, 0x00004444,
+    0x0200005f, 0x00020072, 0x02000068, 0x00000001, 0x0400009b, 0x00000008, 0x00000008, 0x00000001,
+    0x07000022, 0x00100032, 0x00000000, 0x00020046, 0x00208ae6, 0x00000000, 0x00000001, 0x07000001,
+    0x00100012, 0x00000000, 0x0010001a, 0x00000000, 0x0010000a, 0x00000000, 0x0304001f, 0x0010000a,
+    0x00000000, 0x0700001e, 0x00100032, 0x00000000, 0x00020046, 0x00208046, 0x00000000, 0x00000001,
+    0x04000036, 0x001000c2, 0x00000000, 0x00020aa6, 0x080000a4, 0x0011e0f2, 0x00000000, 0x00100e46,
+    0x00000000, 0x00208e46, 0x00000000, 0x00000000, 0x01000015, 0x0100003e,
+};
+
+static const uint32_t cs_uav_clear_2d_float_code[] =
+{
+#if 0
+    RWTexture2D<float4> dst;
+
+    struct
+    {
+        float4 clear_value;
+        int2 dst_offset;
+        int2 dst_extent;
+    } u_info;
+
+    [numthreads(8, 8, 1)]
+    void main(int3 thread_id : SV_DispatchThreadID)
+    {
+        if (all(thread_id.xy < u_info.dst_extent.xy))
+            dst[u_info.dst_offset.xy + thread_id.xy] = u_info.clear_value;
+    }
+#endif
+    0x43425844, 0x6e735b3f, 0x7348c4fa, 0xb3634e42, 0x50e2d99b, 0x00000001, 0x00000128, 0x00000003,
+    0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f,
+    0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000d4, 0x00050050, 0x00000035, 0x0100086a,
+    0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400189c, 0x0011e000, 0x00000000, 0x00005555,
+    0x0200005f, 0x00020032, 0x02000068, 0x00000001, 0x0400009b, 0x00000008, 0x00000008, 0x00000001,
+    0x07000022, 0x00100032, 0x00000000, 0x00020046, 0x00208ae6, 0x00000000, 0x00000001, 0x07000001,
+    0x00100012, 0x00000000, 0x0010001a, 0x00000000, 0x0010000a, 0x00000000, 0x0304001f, 0x0010000a,
+    0x00000000, 0x0700001e, 0x001000f2, 0x00000000, 0x00020546, 0x00208546, 0x00000000, 0x00000001,
+    0x080000a4, 0x0011e0f2, 0x00000000, 0x00100e46, 0x00000000, 0x00208e46, 0x00000000, 0x00000000,
+    0x01000015, 0x0100003e,
+};
+
+static const uint32_t cs_uav_clear_2d_uint_code[] =
+{
+#if 0
+    RWTexture2D<uint4> dst;
+
+    struct
+    {
+        uint4 clear_value;
+        int2 dst_offset;
+        int2 dst_extent;
+    } u_info;
+
+    [numthreads(8, 8, 1)]
+    void main(int3 thread_id : SV_DispatchThreadID)
+    {
+        if (all(thread_id.xy < u_info.dst_extent.xy))
+            dst[u_info.dst_offset.xy + thread_id.xy] = u_info.clear_value;
+    }
+#endif
+    0x43425844, 0xf01db5dd, 0xc7dc5e55, 0xb017c1a8, 0x55abd52d, 0x00000001, 0x00000128, 0x00000003,
+    0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f,
+    0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000d4, 0x00050050, 0x00000035, 0x0100086a,
+    0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400189c, 0x0011e000, 0x00000000, 0x00004444,
+    0x0200005f, 0x00020032, 0x02000068, 0x00000001, 0x0400009b, 0x00000008, 0x00000008, 0x00000001,
+    0x07000022, 0x00100032, 0x00000000, 0x00020046, 0x00208ae6, 0x00000000, 0x00000001, 0x07000001,
+    0x00100012, 0x00000000, 0x0010001a, 0x00000000, 0x0010000a, 0x00000000, 0x0304001f, 0x0010000a,
+    0x00000000, 0x0700001e, 0x001000f2, 0x00000000, 0x00020546, 0x00208546, 0x00000000, 0x00000001,
+    0x080000a4, 0x0011e0f2, 0x00000000, 0x00100e46, 0x00000000, 0x00208e46, 0x00000000, 0x00000000,
+    0x01000015, 0x0100003e,
+};
+
+static const uint32_t cs_uav_clear_3d_float_code[] =
+{
+#if 0
+    RWTexture3D<float4> dst;
+
+    struct
+    {
+        float4 clear_value;
+        int2 dst_offset;
+        int2 dst_extent;
+    } u_info;
+
+    [numthreads(8, 8, 1)]
+    void main(int3 thread_id : SV_DispatchThreadID)
+    {
+        if (all(thread_id.xy < u_info.dst_extent.xy))
+            dst[int3(u_info.dst_offset.xy, 0) + thread_id.xyz] = u_info.clear_value;
+    }
+#endif
+    0x43425844, 0x5d8f36a0, 0x30fa86a5, 0xfec7f2ef, 0xdfd76cbb, 0x00000001, 0x00000138, 0x00000003,
+    0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f,
+    0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000e4, 0x00050050, 0x00000039, 0x0100086a,
+    0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400289c, 0x0011e000, 0x00000000, 0x00005555,
+    0x0200005f, 0x00020072, 0x02000068, 0x00000001, 0x0400009b, 0x00000008, 0x00000008, 0x00000001,
+    0x07000022, 0x00100032, 0x00000000, 0x00020046, 0x00208ae6, 0x00000000, 0x00000001, 0x07000001,
+    0x00100012, 0x00000000, 0x0010001a, 0x00000000, 0x0010000a, 0x00000000, 0x0304001f, 0x0010000a,
+    0x00000000, 0x0700001e, 0x00100032, 0x00000000, 0x00020046, 0x00208046, 0x00000000, 0x00000001,
+    0x04000036, 0x001000c2, 0x00000000, 0x00020aa6, 0x080000a4, 0x0011e0f2, 0x00000000, 0x00100e46,
+    0x00000000, 0x00208e46, 0x00000000, 0x00000000, 0x01000015, 0x0100003e,
+};
+
+static const uint32_t cs_uav_clear_3d_uint_code[] =
+{
+#if 0
+    RWTexture3D<uint4> dst;
+
+    struct
+    {
+        uint4 clear_value;
+        int2 dst_offset;
+        int2 dst_extent;
+    } u_info;
+
+    [numthreads(8, 8, 1)]
+    void main(int3 thread_id : SV_DispatchThreadID)
+    {
+        if (all(thread_id.xy < u_info.dst_extent.xy))
+            dst[int3(u_info.dst_offset.xy, 0) + thread_id.xyz] = u_info.clear_value;
+    }
+#endif
+    0x43425844, 0x5b9c95b1, 0xc9bde4e3, 0x9aaff806, 0x24a1d264, 0x00000001, 0x00000138, 0x00000003,
+    0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f,
+    0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000e4, 0x00050050, 0x00000039, 0x0100086a,
+    0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400289c, 0x0011e000, 0x00000000, 0x00004444,
+    0x0200005f, 0x00020072, 0x02000068, 0x00000001, 0x0400009b, 0x00000008, 0x00000008, 0x00000001,
+    0x07000022, 0x00100032, 0x00000000, 0x00020046, 0x00208ae6, 0x00000000, 0x00000001, 0x07000001,
+    0x00100012, 0x00000000, 0x0010001a, 0x00000000, 0x0010000a, 0x00000000, 0x0304001f, 0x0010000a,
+    0x00000000, 0x0700001e, 0x00100032, 0x00000000, 0x00020046, 0x00208046, 0x00000000, 0x00000001,
+    0x04000036, 0x001000c2, 0x00000000, 0x00020aa6, 0x080000a4, 0x0011e0f2, 0x00000000, 0x00100e46,
+    0x00000000, 0x00208e46, 0x00000000, 0x00000000, 0x01000015, 0x0100003e,
+};
diff --git a/dlls/wined3d/view.c b/dlls/wined3d/view.c
index 665661e39b1..b1dc33fd1e8 100644
--- a/dlls/wined3d/view.c
+++ b/dlls/wined3d/view.c
@@ -1726,67 +1726,214 @@ HRESULT wined3d_unordered_access_view_gl_init(struct wined3d_unordered_access_vi
     return hr;
 }
 
+struct wined3d_uav_clear_constants_vk
+{
+    VkClearColorValue color;
+    VkOffset2D offset;
+    VkExtent2D extent;
+};
+
+static void STDMETHODCALLTYPE wined3d_uav_clear_object_destroyed(void *parent)
+{
+}
+
+static struct wined3d_parent_ops wined3d_uav_clear_ops =
+{
+    wined3d_uav_clear_object_destroyed
+};
+
+static bool create_shader(struct wined3d_device *device, const uint32_t *byte_code, size_t byte_code_size,
+        struct wined3d_shader **shader)
+{
+    struct wined3d_shader_desc shader_desc;
+    HRESULT result;
+
+    shader_desc.byte_code = byte_code;
+    shader_desc.byte_code_size = byte_code_size;
+
+    result = wined3d_shader_create_cs(device, &shader_desc, NULL, &wined3d_uav_clear_ops, shader);
+    if (FAILED(result))
+        WARN("Failed to initialize shader: %#x\n", result);
+
+    return SUCCEEDED(result);
+}
+
+#include "uav_clear_shaders.inc.c"
+
+void wined3d_device_vk_uav_clear_state_init(struct wined3d_device_vk *device_vk)
+{
+    struct wined3d_context_vk *context_vk = &device_vk->context_vk;
+    struct wined3d_device *device = &device_vk->d;
+    struct wined3d_uav_clear_state_vk *state = &device_vk->uav_clear_state;
+
+    create_shader(device, cs_uav_clear_buffer_float_code, sizeof(cs_uav_clear_buffer_float_code),
+            &state->float_shaders.buffer);
+    create_shader(device, cs_uav_clear_buffer_uint_code, sizeof(cs_uav_clear_buffer_uint_code),
+            &state->uint_shaders.buffer);
+    create_shader(device, cs_uav_clear_1d_array_float_code, sizeof(cs_uav_clear_1d_array_float_code),
+            &state->float_shaders.image_1d);
+    create_shader(device, cs_uav_clear_1d_array_uint_code, sizeof(cs_uav_clear_1d_array_uint_code),
+            &state->uint_shaders.image_1d);
+    create_shader(device, cs_uav_clear_1d_float_code, sizeof(cs_uav_clear_1d_float_code),
+            &state->float_shaders.image_1d_array);
+    create_shader(device, cs_uav_clear_1d_uint_code, sizeof(cs_uav_clear_1d_uint_code),
+            &state->uint_shaders.image_1d_array);
+    create_shader(device, cs_uav_clear_2d_float_code, sizeof(cs_uav_clear_2d_float_code),
+            &state->float_shaders.image_2d);
+    create_shader(device, cs_uav_clear_2d_uint_code, sizeof(cs_uav_clear_2d_uint_code),
+            &state->uint_shaders.image_2d);
+    create_shader(device, cs_uav_clear_2d_array_float_code, sizeof(cs_uav_clear_2d_array_float_code),
+            &state->float_shaders.image_2d_array);
+    create_shader(device, cs_uav_clear_2d_array_uint_code, sizeof(cs_uav_clear_2d_array_uint_code),
+            &state->uint_shaders.image_2d_array);
+    create_shader(device, cs_uav_clear_3d_float_code, sizeof(cs_uav_clear_3d_float_code),
+            &state->float_shaders.image_3d);
+    create_shader(device, cs_uav_clear_3d_uint_code, sizeof(cs_uav_clear_3d_uint_code),
+            &state->uint_shaders.image_3d);
+
+    wined3d_context_vk_create_bo(context_vk, sizeof(struct wined3d_uav_clear_constants_vk),
+        VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+        &state->constants_bo);
+}
+
+void wined3d_device_vk_uav_clear_state_cleanup(struct wined3d_device_vk *device_vk)
+{
+    struct wined3d_context_vk *context_vk = &device_vk->context_vk;
+    struct wined3d_uav_clear_state_vk *state = &device_vk->uav_clear_state;
+
+    wined3d_context_vk_destroy_bo(context_vk, &state->constants_bo);
+
+    if (state->float_shaders.buffer)
+        wined3d_shader_decref(state->float_shaders.buffer);
+    if (state->uint_shaders.buffer)
+        wined3d_shader_decref(state->uint_shaders.buffer);
+    if (state->float_shaders.image_1d)
+        wined3d_shader_decref(state->float_shaders.image_1d);
+    if (state->uint_shaders.image_1d)
+        wined3d_shader_decref(state->uint_shaders.image_1d);
+    if (state->float_shaders.image_1d_array)
+        wined3d_shader_decref(state->float_shaders.image_1d_array);
+    if (state->uint_shaders.image_1d_array)
+        wined3d_shader_decref(state->uint_shaders.image_1d_array);
+    if (state->float_shaders.image_2d)
+        wined3d_shader_decref(state->float_shaders.image_2d);
+    if (state->uint_shaders.image_2d)
+        wined3d_shader_decref(state->uint_shaders.image_2d);
+    if (state->float_shaders.image_2d_array)
+        wined3d_shader_decref(state->float_shaders.image_2d_array);
+    if (state->uint_shaders.image_2d_array)
+        wined3d_shader_decref(state->uint_shaders.image_2d_array);
+    if (state->float_shaders.image_3d)
+        wined3d_shader_decref(state->float_shaders.image_3d);
+    if (state->uint_shaders.image_3d)
+        wined3d_shader_decref(state->uint_shaders.image_3d);
+}
+
 void wined3d_unordered_access_view_vk_clear(struct wined3d_unordered_access_view_vk *view_vk,
         const struct wined3d_uvec4 *clear_value, struct wined3d_context_vk *context_vk, bool fp)
 {
+    struct wined3d_view_desc *view_desc = &view_vk->v.desc;
+    struct wined3d_uav_clear_constants_vk constants = {0};
+    struct wined3d_device *device = context_vk->c.device;
+    struct wined3d_shader_thread_group_size group_count;
+    struct wined3d_uav_clear_shaders_vk *shaders;
     const struct wined3d_vk_info *vk_info;
-    const struct wined3d_format *format;
-    struct wined3d_buffer_vk *buffer_vk;
+    struct wined3d_bo_address bo_address;
+    struct wined3d_device_vk *device_vk;
+    struct wined3d_bo_vk *constants_bo;
     struct wined3d_resource *resource;
-    VkCommandBuffer vk_command_buffer;
     VkBufferMemoryBarrier vk_barrier;
-    VkAccessFlags access_mask;
-    unsigned int offset, size;
+    struct wined3d_shader *shader;
+    struct wined3d_range bo_range;
+    void *mapped_constants_bo;
+    DWORD uav_location;
+    bool is_array;
 
-    TRACE("view_vk %p, clear_value %s, context_vk %p, fp %#x.\n", view_vk, debug_uvec4(clear_value), context_vk, fp);
+    device_vk = wined3d_device_vk(device);
+    shaders = fp ? &device_vk->uav_clear_state.float_shaders : &device_vk->uav_clear_state.uint_shaders;
 
     resource = view_vk->v.resource;
-    if (resource->type != WINED3D_RTYPE_BUFFER)
+    is_array = view_desc->flags & WINED3D_VIEW_TEXTURE_ARRAY;
+
+    switch (resource->type)
     {
-        FIXME("Not implemented for %s resources.\n", debug_d3dresourcetype(resource->type));
-        return;
+        case WINED3D_RTYPE_BUFFER:     shader = shaders->buffer; break;
+        case WINED3D_RTYPE_TEXTURE_1D: shader = is_array ? shaders->image_1d_array : shaders->image_1d; break;
+        case WINED3D_RTYPE_TEXTURE_2D: shader = is_array ? shaders->image_2d_array : shaders->image_2d; break;
+        case WINED3D_RTYPE_TEXTURE_3D: shader = shaders->image_3d; break;
+
+        default:
+            ERR("Unhandled resource type %s.\n", debug_d3dresourcetype(resource->type));
+            return;
     }
 
-    format = view_vk->v.format;
-    if (format->id != WINED3DFMT_R32_UINT && format->id != WINED3DFMT_R32_SINT)
+    if (!shader)
     {
-        FIXME("Not implemented for format %s.\n", debug_d3dformat(format->id));
+        ERR("Shader was not correctly initialized.\n");
         return;
     }
 
-    vk_info = context_vk->vk_info;
-    buffer_vk = wined3d_buffer_vk(buffer_from_resource(resource));
-    wined3d_buffer_load_location(&buffer_vk->b, &context_vk->c, WINED3D_LOCATION_BUFFER);
-    wined3d_buffer_invalidate_location(&buffer_vk->b, ~WINED3D_LOCATION_BUFFER);
+    if (resource->type == WINED3D_RTYPE_BUFFER)
+        uav_location = WINED3D_LOCATION_BUFFER;
+    else
+        uav_location = WINED3D_LOCATION_TEXTURE_RGB;
 
-    get_buffer_view_range(&buffer_vk->b, &view_vk->v.desc, format, &offset, &size);
+    wined3d_view_load_location(resource, view_desc, &context_vk->c, uav_location);
+    wined3d_unordered_access_view_invalidate_location(&view_vk->v, ~uav_location);
 
-    if (!(vk_command_buffer = wined3d_context_vk_get_command_buffer(context_vk)))
-        return;
-    wined3d_context_vk_end_current_render_pass(context_vk);
+    constants.color.uint32[0] = clear_value->x;
+    constants.color.uint32[1] = clear_value->y;
+    constants.color.uint32[2] = clear_value->z;
+    constants.color.uint32[3] = clear_value->w;
+
+    constants.extent.width  = resource->width;
+    constants.extent.height = resource->height;
+
+    group_count = shader->u.cs.thread_group_size;
+
+    if (resource->type != WINED3D_RTYPE_BUFFER)
+    {
+        constants.extent.width  >>= view_desc->u.texture.level_idx;
+        constants.extent.height >>= view_desc->u.texture.level_idx;
+        group_count.z = (view_desc->u.texture.layer_count + group_count.z - 1) / group_count.z;
+    }
+
+    group_count.x = (constants.extent.width  + group_count.x - 1) / group_count.x;
+    group_count.y = (constants.extent.height + group_count.y - 1) / group_count.y;
+
+    constants_bo = &device_vk->uav_clear_state.constants_bo;
+    bo_address.buffer_object = (uintptr_t)constants_bo;
+    bo_address.addr = NULL;
+
+    mapped_constants_bo = wined3d_context_map_bo_address(&context_vk->c, &bo_address,
+            sizeof(constants), WINED3D_MAP_WRITE | WINED3D_MAP_DISCARD);
+    memcpy(mapped_constants_bo, &constants, sizeof(constants));
+
+    bo_range.offset = 0;
+    bo_range.size = sizeof(constants);
+    wined3d_context_unmap_bo_address(&context_vk->c, &bo_address, 1, &bo_range);
+
+    vk_info = context_vk->vk_info;
 
-    access_mask = vk_access_mask_from_bind_flags(buffer_vk->b.resource.bind_flags);
     vk_barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
     vk_barrier.pNext = NULL;
-    vk_barrier.srcAccessMask = access_mask;
-    vk_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+    vk_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT;
+    vk_barrier.dstAccessMask = VK_ACCESS_UNIFORM_READ_BIT;
     vk_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
     vk_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
-    vk_barrier.buffer = buffer_vk->bo.vk_buffer;
-    vk_barrier.offset = buffer_vk->bo.buffer_offset + offset;
-    vk_barrier.size = size;
-    VK_CALL(vkCmdPipelineBarrier(vk_command_buffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
-            VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1, &vk_barrier, 0, NULL));
+    vk_barrier.buffer = constants_bo->vk_buffer;
+    vk_barrier.offset = constants_bo->buffer_offset;
+    vk_barrier.size = constants_bo->size;
+    VK_CALL(vkCmdPipelineBarrier(wined3d_context_vk_get_command_buffer(context_vk),
+            VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
+            0, 0, NULL, 1, &vk_barrier, 0, NULL));
 
-    VK_CALL(vkCmdFillBuffer(vk_command_buffer, buffer_vk->bo.vk_buffer,
-            buffer_vk->bo.buffer_offset + offset, size, clear_value->x));
+    wined3d_unordered_access_view_vk_barrier(view_vk, context_vk, WINED3D_BIND_UNORDERED_ACCESS);
 
-    vk_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
-    vk_barrier.dstAccessMask = access_mask;
-    VK_CALL(vkCmdPipelineBarrier(vk_command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT,
-            VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 0, NULL, 1, &vk_barrier, 0, NULL));
+    device->adapter->shader_backend->shader_run_compute(group_count.x, group_count.y, group_count.z,
+            &context_vk->c, shader, constants_bo, view_vk);
 
-    wined3d_context_vk_reference_bo(context_vk, &buffer_vk->bo);
+    context_invalidate_compute_state(&context_vk->c, STATE_COMPUTE_SHADER);
 }
 
 void wined3d_unordered_access_view_vk_update(struct wined3d_unordered_access_view_vk *uav_vk,
diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h
index 5ffcaa1f8db..1023e64cfd5 100644
--- a/dlls/wined3d/wined3d_private.h
+++ b/dlls/wined3d/wined3d_private.h
@@ -4007,6 +4007,23 @@ void wined3d_allocator_cleanup(struct wined3d_allocator *allocator) DECLSPEC_HID
 bool wined3d_allocator_init(struct wined3d_allocator *allocator,
         size_t pool_count, const struct wined3d_allocator_ops *allocator_ops) DECLSPEC_HIDDEN;
 
+struct wined3d_uav_clear_shaders_vk
+{
+    struct wined3d_shader *buffer;
+    struct wined3d_shader *image_1d;
+    struct wined3d_shader *image_1d_array;
+    struct wined3d_shader *image_2d;
+    struct wined3d_shader *image_2d_array;
+    struct wined3d_shader *image_3d;
+};
+
+struct wined3d_uav_clear_state_vk
+{
+    struct wined3d_uav_clear_shaders_vk float_shaders;
+    struct wined3d_uav_clear_shaders_vk uint_shaders;
+    struct wined3d_bo_vk constants_bo;
+};
+
 struct wined3d_device_vk
 {
     struct wined3d_device d;
@@ -4024,6 +4041,8 @@ struct wined3d_device_vk
     struct wined3d_null_views_vk null_views_vk;
 
     struct wined3d_allocator allocator;
+
+    struct wined3d_uav_clear_state_vk uav_clear_state;
 };
 
 static inline struct wined3d_device_vk *wined3d_device_vk(struct wined3d_device *device)
@@ -4040,6 +4059,9 @@ void wined3d_device_vk_destroy_null_resources(struct wined3d_device_vk *device_v
 void wined3d_device_vk_destroy_null_views(struct wined3d_device_vk *device_vk,
         struct wined3d_context_vk *context_vk) DECLSPEC_HIDDEN;
 
+void wined3d_device_vk_uav_clear_state_init(struct wined3d_device_vk *device_vk) DECLSPEC_HIDDEN;
+void wined3d_device_vk_uav_clear_state_cleanup(struct wined3d_device_vk *device_vk) DECLSPEC_HIDDEN;
+
 static inline float wined3d_alpha_ref(const struct wined3d_state *state)
 {
     return (state->render_states[WINED3D_RS_ALPHAREF] & 0xff) / 255.0f;
-- 
2.30.2




More information about the wine-devel mailing list