[PATCH vkd3d 2/4] vkd3d: Avoid holding mutex while waiting for fences.

Józef Kucia joseph.kucia at gmail.com
Thu Jun 13 06:47:34 CDT 2019


From: Józef Kucia <jkucia at codeweavers.com>

We maintain separate arrays for enqueued fences and fences owned by the
fence worker thread.

Signed-off-by: Józef Kucia <jkucia at codeweavers.com>
---
 include/private/vkd3d_common.h |  10 +-
 libs/vkd3d/command.c           | 164 +++++++++++++++++++++++----------
 libs/vkd3d/vkd3d_private.h     |  15 +++
 3 files changed, 137 insertions(+), 52 deletions(-)

diff --git a/include/private/vkd3d_common.h b/include/private/vkd3d_common.h
index 244209243594..16c31e633ab1 100644
--- a/include/private/vkd3d_common.h
+++ b/include/private/vkd3d_common.h
@@ -118,7 +118,7 @@ static inline LONG InterlockedIncrement(LONG volatile *x)
     return __sync_add_and_fetch(x, 1);
 }
 # else
-#  error "InterlockedIncrement not implemented for this platform"
+#  error "InterlockedIncrement() not implemented for this platform"
 # endif  /* HAVE_SYNC_ADD_AND_FETCH */
 
 # if HAVE_SYNC_SUB_AND_FETCH
@@ -127,8 +127,14 @@ static inline LONG InterlockedDecrement(LONG volatile *x)
     return __sync_sub_and_fetch(x, 1);
 }
 # else
-#  error "InterlockedDecrement not implemented for this platform"
+#  error "InterlockedDecrement() not implemented for this platform"
 # endif
 #endif  /* _WIN32 */
 
+#if HAVE_SYNC_ADD_AND_FETCH
+# define atomic_add_fetch(ptr, val) __sync_add_and_fetch(ptr, val)
+#else
+# error "atomic_add_fetch() not implemented for this platform"
+#endif  /* HAVE_SYNC_ADD_AND_FETCH */
+
 #endif  /* __VKD3D_COMMON_H */
diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c
index 25c8005dc445..7634e71b32ee 100644
--- a/libs/vkd3d/command.c
+++ b/libs/vkd3d/command.c
@@ -257,27 +257,22 @@ static HRESULT vkd3d_enqueue_gpu_fence(struct vkd3d_fence_worker *worker,
         return hresult_from_errno(rc);
     }
 
-    if (!vkd3d_array_reserve((void **)&worker->vk_fences, &worker->vk_fences_size,
-            worker->fence_count + 1, sizeof(*worker->vk_fences)))
-    {
-        ERR("Failed to add GPU fence.\n");
-        pthread_mutex_unlock(&worker->mutex);
-        return E_OUTOFMEMORY;
-    }
-    if (!vkd3d_array_reserve((void **)&worker->fences, &worker->fences_size,
-            worker->fence_count + 1, sizeof(*worker->fences)))
+    if (!vkd3d_array_reserve((void **)&worker->enqueued_fences, &worker->enqueued_fences_size,
+            worker->enqueued_fence_count + 1, sizeof(*worker->enqueued_fences)))
     {
         ERR("Failed to add GPU fence.\n");
         pthread_mutex_unlock(&worker->mutex);
         return E_OUTOFMEMORY;
     }
 
-    worker->vk_fences[worker->fence_count] = vk_fence;
-    worker->fences[worker->fence_count].fence = fence;
-    worker->fences[worker->fence_count].value = value;
-    worker->fences[worker->fence_count].queue = queue;
-    worker->fences[worker->fence_count].queue_sequence_number = queue_sequence_number;
-    ++worker->fence_count;
+    worker->enqueued_fences[worker->enqueued_fence_count].vk_fence = vk_fence;
+    worker->enqueued_fences[worker->enqueued_fence_count].fence = fence;
+    worker->enqueued_fences[worker->enqueued_fence_count].value = value;
+    worker->enqueued_fences[worker->enqueued_fence_count].queue = queue;
+    worker->enqueued_fences[worker->enqueued_fence_count].queue_sequence_number = queue_sequence_number;
+    ++worker->enqueued_fence_count;
+
+    InterlockedIncrement(&fence->pending_worker_operation_count);
 
     pthread_cond_signal(&worker->cond);
     pthread_mutex_unlock(&worker->mutex);
@@ -287,37 +282,73 @@ static HRESULT vkd3d_enqueue_gpu_fence(struct vkd3d_fence_worker *worker,
 
 static void vkd3d_fence_worker_remove_fence(struct vkd3d_fence_worker *worker, struct d3d12_fence *fence)
 {
-    struct d3d12_device *device = worker->device;
-    const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
-    unsigned int i, j;
+    LONG count;
     int rc;
 
+    if (!(count = atomic_add_fetch(&fence->pending_worker_operation_count, 0)))
+        return;
+
+    WARN("Waiting for %u pending fence operations (fence %p).\n", count, fence);
+
     if ((rc = pthread_mutex_lock(&worker->mutex)))
     {
         ERR("Failed to lock mutex, error %d.\n", rc);
         return;
     }
 
-    for (i = 0, j = 0; i < worker->fence_count; ++i)
+    while ((count = atomic_add_fetch(&fence->pending_worker_operation_count, 0)))
     {
-        if (worker->fences[i].fence == fence)
-        {
-            VK_CALL(vkDestroyFence(device->vk_device, worker->vk_fences[i], NULL));
-            continue;
-        }
+        TRACE("Still waiting for %u pending fence operations (fence %p).\n", count, fence);
 
-        if (i != j)
-        {
-            worker->vk_fences[j] = worker->vk_fences[i];
-            worker->fences[j] = worker->fences[i];
-        }
-        ++j;
+        worker->pending_fence_destruction = true;
+        pthread_cond_signal(&worker->cond);
+
+        pthread_cond_wait(&worker->fence_destruction_cond, &worker->mutex);
     }
-    worker->fence_count = j;
+
+    TRACE("Removed fence %p.\n", fence);
 
     pthread_mutex_unlock(&worker->mutex);
 }
 
+static void vkd3d_fence_worker_move_enqueued_fences_locked(struct vkd3d_fence_worker *worker)
+{
+    unsigned int i;
+    size_t count;
+    bool ret;
+
+    if (!worker->enqueued_fence_count)
+        return;
+
+    count = worker->fence_count + worker->enqueued_fence_count;
+
+    ret = vkd3d_array_reserve((void **)&worker->vk_fences, &worker->vk_fences_size,
+            count, sizeof(*worker->vk_fences));
+    ret &= vkd3d_array_reserve((void **)&worker->fences, &worker->fences_size,
+            count, sizeof(*worker->fences));
+    if (!ret)
+    {
+        ERR("Failed to reserve memory.\n");
+        return;
+    }
+
+    for (i = 0; i < worker->enqueued_fence_count; ++i)
+    {
+        struct vkd3d_enqueued_fence *current = &worker->enqueued_fences[i];
+
+        worker->vk_fences[worker->fence_count] = current->vk_fence;
+
+        worker->fences[worker->fence_count].fence = current->fence;
+        worker->fences[worker->fence_count].value = current->value;
+        worker->fences[worker->fence_count].queue = current->queue;
+        worker->fences[worker->fence_count].queue_sequence_number = current->queue_sequence_number;
+
+        ++worker->fence_count;
+    }
+    assert(worker->fence_count == count);
+    worker->enqueued_fence_count = 0;
+}
+
 static void vkd3d_wait_for_gpu_fences(struct vkd3d_fence_worker *worker)
 {
     struct d3d12_device *device = worker->device;
@@ -351,6 +382,8 @@ static void vkd3d_wait_for_gpu_fences(struct vkd3d_fence_worker *worker)
             if (FAILED(hr = d3d12_fence_signal(current->fence, current->value, vk_fence)))
                 ERR("Failed to signal D3D12 fence, hr %#x.\n", hr);
 
+            InterlockedDecrement(&current->fence->pending_worker_operation_count);
+
             vkd3d_queue_update_sequence_number(current->queue, current->queue_sequence_number, device);
             continue;
         }
@@ -373,35 +406,48 @@ static void *vkd3d_fence_worker_main(void *arg)
     struct vkd3d_fence_worker *worker = arg;
     int rc;
 
-    vkd3d_set_thread_name("vkd3d_worker");
+    vkd3d_set_thread_name("vkd3d_fence");
 
     for (;;)
     {
-        if ((rc = pthread_mutex_lock(&worker->mutex)))
-        {
-            ERR("Failed to lock mutex, error %d.\n", rc);
-            return NULL;
-        }
+        vkd3d_wait_for_gpu_fences(worker);
 
-        if (worker->should_exit && !worker->fence_count)
+        if (!worker->fence_count || atomic_add_fetch(&worker->enqueued_fence_count, 0))
         {
-            pthread_mutex_unlock(&worker->mutex);
-            break;
-        }
+            if ((rc = pthread_mutex_lock(&worker->mutex)))
+            {
+                ERR("Failed to lock mutex, error %d.\n", rc);
+                break;
+            }
 
-        if (!worker->fence_count)
-        {
-            if ((rc = pthread_cond_wait(&worker->cond, &worker->mutex)))
+            if (worker->pending_fence_destruction)
             {
-                ERR("Failed to wait on condition variable, error %d.\n", rc);
-                pthread_mutex_unlock(&worker->mutex);
-                return NULL;
+                pthread_cond_broadcast(&worker->fence_destruction_cond);
+                worker->pending_fence_destruction = false;
             }
-        }
 
-        vkd3d_wait_for_gpu_fences(worker);
+            if (worker->enqueued_fence_count)
+            {
+                vkd3d_fence_worker_move_enqueued_fences_locked(worker);
+            }
+            else
+            {
+                if (worker->should_exit)
+                {
+                    pthread_mutex_unlock(&worker->mutex);
+                    break;
+                }
 
-        pthread_mutex_unlock(&worker->mutex);
+                if ((rc = pthread_cond_wait(&worker->cond, &worker->mutex)))
+                {
+                    ERR("Failed to wait on condition variable, error %d.\n", rc);
+                    pthread_mutex_unlock(&worker->mutex);
+                    break;
+                }
+            }
+
+            pthread_mutex_unlock(&worker->mutex);
+        }
     }
 
     return NULL;
@@ -416,8 +462,13 @@ HRESULT vkd3d_fence_worker_start(struct vkd3d_fence_worker *worker,
     TRACE("worker %p.\n", worker);
 
     worker->should_exit = false;
+    worker->pending_fence_destruction = false;
     worker->device = device;
 
+    worker->enqueued_fence_count = 0;
+    worker->enqueued_fences = NULL;
+    worker->enqueued_fences_size = 0;
+
     worker->fence_count = 0;
 
     worker->vk_fences = NULL;
@@ -438,11 +489,20 @@ HRESULT vkd3d_fence_worker_start(struct vkd3d_fence_worker *worker,
         return hresult_from_errno(rc);
     }
 
+    if ((rc = pthread_cond_init(&worker->fence_destruction_cond, NULL)))
+    {
+        ERR("Failed to initialize condition variable, error %d.\n", rc);
+        pthread_mutex_destroy(&worker->mutex);
+        pthread_cond_destroy(&worker->cond);
+        return hresult_from_errno(rc);
+    }
+
     if (FAILED(hr = vkd3d_create_thread(device->vkd3d_instance,
             vkd3d_fence_worker_main, worker, &worker->thread)))
     {
         pthread_mutex_destroy(&worker->mutex);
         pthread_cond_destroy(&worker->cond);
+        pthread_cond_destroy(&worker->fence_destruction_cond);
     }
 
     return hr;
@@ -472,7 +532,9 @@ HRESULT vkd3d_fence_worker_stop(struct vkd3d_fence_worker *worker,
 
     pthread_mutex_destroy(&worker->mutex);
     pthread_cond_destroy(&worker->cond);
+    pthread_cond_destroy(&worker->fence_destruction_cond);
 
+    vkd3d_free(worker->enqueued_fences);
     vkd3d_free(worker->vk_fences);
     vkd3d_free(worker->fences);
 
@@ -1047,6 +1109,8 @@ static HRESULT d3d12_fence_init(struct d3d12_fence *fence, struct d3d12_device *
 
     memset(fence->old_vk_fences, 0, sizeof(fence->old_vk_fences));
 
+    fence->pending_worker_operation_count = 0;
+
     if (FAILED(hr = vkd3d_private_store_init(&fence->private_store)))
     {
         pthread_mutex_destroy(&fence->mutex);
diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h
index 9f00d327b73b..3af8a9594a5e 100644
--- a/libs/vkd3d/vkd3d_private.h
+++ b/libs/vkd3d/vkd3d_private.h
@@ -156,7 +156,20 @@ struct vkd3d_fence_worker
     union vkd3d_thread_handle thread;
     pthread_mutex_t mutex;
     pthread_cond_t cond;
+    pthread_cond_t fence_destruction_cond;
     bool should_exit;
+    bool pending_fence_destruction;
+
+    size_t enqueued_fence_count;
+    struct vkd3d_enqueued_fence
+    {
+        VkFence vk_fence;
+        struct d3d12_fence *fence;
+        uint64_t value;
+        struct vkd3d_queue *queue;
+        uint64_t queue_sequence_number;
+    } *enqueued_fences;
+    size_t enqueued_fences_size;
 
     size_t fence_count;
     VkFence *vk_fences;
@@ -317,6 +330,8 @@ struct d3d12_fence
     struct list semaphores;
     unsigned int semaphore_count;
 
+    LONG pending_worker_operation_count;
+
     VkFence old_vk_fences[VKD3D_MAX_VK_SYNC_OBJECTS];
 
     struct d3d12_device *device;
-- 
2.21.0




More information about the wine-devel mailing list