[PATCH vkd3d 3/4] vkd3d: Replace descriptor mutex with per-descriptor spinlocks.
Conor McCarthy
cmccarthy at codeweavers.com
Tue Sep 21 01:00:09 CDT 2021
Greatly improves performance in various games that update or copy a
large number of descriptors per frame due to the high overhead of
pthread_mutex_{un}lock.
Based on vkd3d-proton patches by Hans-Kristian Arntzen, Philip
Rebohle and Georg Lehmann.
Signed-off-by: Conor McCarthy <cmccarthy at codeweavers.com>
---
configure.ac | 1 +
include/private/vkd3d_common.h | 33 +++++++++++++++++++++++++++++++++
libs/vkd3d/device.c | 10 +---------
libs/vkd3d/resource.c | 18 ++++++++----------
libs/vkd3d/vkd3d_private.h | 17 ++---------------
5 files changed, 45 insertions(+), 34 deletions(-)
diff --git a/configure.ac b/configure.ac
index d296dfd4..546b6c57 100644
--- a/configure.ac
+++ b/configure.ac
@@ -131,6 +131,7 @@ VKD3D_CHECK_FUNC([HAVE_BUILTIN_POPCOUNT], [__builtin_popcount], [__builtin_popco
VKD3D_CHECK_FUNC([HAVE_BUILTIN_ADD_OVERFLOW], [__builtin_add_overflow], [__builtin_add_overflow(0, 0, (int *)0)])
VKD3D_CHECK_FUNC([HAVE_SYNC_ADD_AND_FETCH], [__sync_add_and_fetch], [__sync_add_and_fetch((int *)0, 0)])
VKD3D_CHECK_FUNC([HAVE_SYNC_SUB_AND_FETCH], [__sync_sub_and_fetch], [__sync_sub_and_fetch((int *)0, 0)])
+VKD3D_CHECK_FUNC([HAVE_SYNC_LOCK_TEST_AND_SET], [__sync_lock_test_and_set], [__sync_lock_test_and_set((int *)0, 0)])
VKD3D_CHECK_PTHREAD_SETNAME_NP
diff --git a/include/private/vkd3d_common.h b/include/private/vkd3d_common.h
index 8d1ca397..1ae43d8f 100644
--- a/include/private/vkd3d_common.h
+++ b/include/private/vkd3d_common.h
@@ -28,6 +28,10 @@
#include <stdbool.h>
#include <stdint.h>
+#ifdef __SSE2__
+#include <emmintrin.h>
+#endif
+
#ifdef _MSC_VER
#include <intrin.h>
#endif
@@ -211,6 +215,18 @@ static inline LONG InterlockedDecrement(LONG volatile *x)
# else
# error "InterlockedDecrement() not implemented for this platform"
# endif
+
+# if HAVE_SYNC_LOCK_TEST_AND_SET
+static inline LONG InterlockedExchange(LONG volatile *ptr, LONG val)
+{
+ return __sync_lock_test_and_set(ptr, val);
+}
+# define vkd3d_spinlock_unlock(lock) __sync_lock_release(lock)
+# else
+# error "spinlocks not implemented for this platform"
+# endif /* HAVE_SYNC_LOCK_TEST_AND_SET */
+#else
+# define vkd3d_spinlock_unlock(lock) InterlockedExchange(lock, 0)
#endif /* _WIN32 */
#if HAVE_SYNC_ADD_AND_FETCH
@@ -222,6 +238,23 @@ static inline LONG InterlockedDecrement(LONG volatile *x)
# error "atomic_add_fetch() not implemented for this platform"
#endif /* HAVE_SYNC_ADD_AND_FETCH */
+typedef LONG vkd3d_spinlock_t;
+
+static inline void vkd3d_spinlock_acquire(vkd3d_spinlock_t *lock)
+{
+ while (InterlockedExchange(lock, 1))
+ {
+#ifdef __SSE2__
+ _mm_pause();
+#endif
+ }
+}
+
+static inline void vkd3d_spinlock_release(vkd3d_spinlock_t *lock)
+{
+ vkd3d_spinlock_unlock(lock);
+}
+
static inline void vkd3d_parse_version(const char *version, int *major, int *minor)
{
*major = atoi(version);
diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c
index 0fadb521..28a01331 100644
--- a/libs/vkd3d/device.c
+++ b/libs/vkd3d/device.c
@@ -2213,7 +2213,6 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device *iface)
{
struct d3d12_device *device = impl_from_ID3D12Device(iface);
ULONG refcount = InterlockedDecrement(&device->refcount);
- size_t i;
TRACE("%p decreasing refcount to %u.\n", device, refcount);
@@ -2231,8 +2230,6 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device *iface)
vkd3d_fence_worker_stop(&device->fence_worker, device);
d3d12_device_destroy_pipeline_cache(device);
d3d12_device_destroy_vkd3d_queues(device);
- for (i = 0; i < ARRAY_SIZE(device->desc_mutex); ++i)
- pthread_mutex_destroy(&device->desc_mutex[i]);
VK_CALL(vkDestroyDevice(device->vk_device, NULL));
if (device->parent)
IUnknown_Release(device->parent);
@@ -3135,8 +3132,7 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device *iface,
struct d3d12_device *device = impl_from_ID3D12Device(iface);
unsigned int dst_range_idx, dst_idx, src_range_idx, src_idx;
unsigned int dst_range_size, src_range_size;
- const struct d3d12_desc *src;
- struct d3d12_desc *dst;
+ struct d3d12_desc *src, *dst;
TRACE("iface %p, dst_descriptor_range_count %u, dst_descriptor_range_offsets %p, "
"dst_descriptor_range_sizes %p, src_descriptor_range_count %u, "
@@ -3692,7 +3688,6 @@ static HRESULT d3d12_device_init(struct d3d12_device *device,
{
const struct vkd3d_vk_device_procs *vk_procs;
HRESULT hr;
- size_t i;
device->ID3D12Device_iface.lpVtbl = &d3d12_device_vtbl;
device->refcount = 1;
@@ -3731,9 +3726,6 @@ static HRESULT d3d12_device_init(struct d3d12_device *device,
vkd3d_render_pass_cache_init(&device->render_pass_cache);
vkd3d_gpu_va_allocator_init(&device->gpu_va_allocator);
- for (i = 0; i < ARRAY_SIZE(device->desc_mutex); ++i)
- pthread_mutex_init(&device->desc_mutex[i], NULL);
-
if ((device->parent = create_info->parent))
IUnknown_AddRef(device->parent);
diff --git a/libs/vkd3d/resource.c b/libs/vkd3d/resource.c
index 2c6c07c7..f3cbb684 100644
--- a/libs/vkd3d/resource.c
+++ b/libs/vkd3d/resource.c
@@ -2122,21 +2122,21 @@ void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *sr
struct d3d12_device *device)
{
struct d3d12_desc destroy_desc;
- pthread_mutex_t *mutex;
destroy_desc.u.view = NULL;
- mutex = d3d12_device_get_descriptor_mutex(device, dst);
- pthread_mutex_lock(mutex);
+ vkd3d_spinlock_acquire(&dst->spinlock);
/* Nothing to do for VKD3D_DESCRIPTOR_MAGIC_CBV. */
if ((dst->magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW)
&& !InterlockedDecrement(&dst->u.view->refcount))
destroy_desc = *dst;
- *dst = *src;
+ dst->magic = src->magic;
+ dst->vk_descriptor_type = src->vk_descriptor_type;
+ dst->u = src->u;
- pthread_mutex_unlock(mutex);
+ vkd3d_spinlock_release(&dst->spinlock);
/* Destroy the view after unlocking to reduce wait time. */
if (destroy_desc.u.view)
@@ -2150,12 +2150,11 @@ static void d3d12_desc_destroy(struct d3d12_desc *descriptor, struct d3d12_devic
d3d12_desc_write_atomic(descriptor, &null_desc, device);
}
-void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src,
+void d3d12_desc_copy(struct d3d12_desc *dst, struct d3d12_desc *src,
struct d3d12_device *device)
{
bool needs_update = true;
struct d3d12_desc tmp;
- pthread_mutex_t *mutex;
assert(dst != src);
@@ -2181,14 +2180,13 @@ void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src,
if (needs_update)
{
- mutex = d3d12_device_get_descriptor_mutex(device, src);
- pthread_mutex_lock(mutex);
+ vkd3d_spinlock_acquire(&src->spinlock);
if (src->magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW)
vkd3d_view_incref(src->u.view);
tmp = *src;
- pthread_mutex_unlock(mutex);
+ vkd3d_spinlock_release(&src->spinlock);
d3d12_desc_write_atomic(dst, &tmp, device);
}
diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h
index 9829e0aa..78e503ae 100644
--- a/libs/vkd3d/vkd3d_private.h
+++ b/libs/vkd3d/vkd3d_private.h
@@ -517,6 +517,7 @@ struct d3d12_desc
{
uint32_t magic;
VkDescriptorType vk_descriptor_type;
+ vkd3d_spinlock_t spinlock;
union
{
VkDescriptorBufferInfo vk_cbv_info;
@@ -534,7 +535,7 @@ static inline struct d3d12_desc *d3d12_desc_from_gpu_handle(D3D12_GPU_DESCRIPTOR
return (struct d3d12_desc *)(intptr_t)gpu_handle.ptr;
}
-void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device);
+void d3d12_desc_copy(struct d3d12_desc *dst, struct d3d12_desc *src, struct d3d12_device *device);
void d3d12_desc_create_cbv(struct d3d12_desc *descriptor,
struct d3d12_device *device, const D3D12_CONSTANT_BUFFER_VIEW_DESC *desc);
void d3d12_desc_create_srv(struct d3d12_desc *descriptor,
@@ -1134,7 +1135,6 @@ struct d3d12_device
struct vkd3d_fence_worker fence_worker;
pthread_mutex_t mutex;
- pthread_mutex_t desc_mutex[8];
struct vkd3d_render_pass_cache render_pass_cache;
VkPipelineCache vk_pipeline_cache;
@@ -1200,19 +1200,6 @@ static inline unsigned int d3d12_device_get_descriptor_handle_increment_size(str
return ID3D12Device_GetDescriptorHandleIncrementSize(&device->ID3D12Device_iface, descriptor_type);
}
-static inline pthread_mutex_t *d3d12_device_get_descriptor_mutex(struct d3d12_device *device,
- const struct d3d12_desc *descriptor)
-{
- STATIC_ASSERT(!(ARRAY_SIZE(device->desc_mutex) & (ARRAY_SIZE(device->desc_mutex) - 1)));
- uintptr_t idx = (uintptr_t)descriptor;
-
- idx ^= idx >> 12;
- idx ^= idx >> 6;
- idx ^= idx >> 3;
-
- return &device->desc_mutex[idx & (ARRAY_SIZE(device->desc_mutex) - 1)];
-}
-
/* utils */
enum vkd3d_format_type
{
--
2.32.0
More information about the wine-devel
mailing list