[PATCH vkd3d] vkd3d: Use Vulkan timeline semaphores for D3D12 fences.

Conor McCarthy cmccarthy at codeweavers.com
Tue Jan 25 08:34:42 CST 2022


If KHR_timeline_semaphore is not available, the old implementation
will be used.

Based on a vkd3d-proton patch by Hans-Kristian Arntzen.

Signed-off-by: Conor McCarthy <cmccarthy at codeweavers.com>
---
 libs/vkd3d/command.c       | 479 +++++++++++++++++++++++++++++++++----
 libs/vkd3d/device.c        |  14 ++
 libs/vkd3d/vkd3d_private.h |  11 +
 libs/vkd3d/vulkan_procs.h  |   5 +
 tests/d3d12.c              |  11 +-
 5 files changed, 463 insertions(+), 57 deletions(-)

diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c
index 2cf1eba2..3203cb51 100644
--- a/libs/vkd3d/command.c
+++ b/libs/vkd3d/command.c
@@ -268,6 +268,7 @@ static HRESULT vkd3d_enqueue_gpu_fence(struct vkd3d_fence_worker *worker,
     }
 
     worker->enqueued_fences[worker->enqueued_fence_count].vk_fence = vk_fence;
+    worker->enqueued_fences[worker->enqueued_fence_count].vk_semaphore = VK_NULL_HANDLE;
     waiting_fence = &worker->enqueued_fences[worker->enqueued_fence_count].waiting_fence;
     waiting_fence->fence = fence;
     waiting_fence->value = value;
@@ -317,6 +318,7 @@ static void vkd3d_fence_worker_remove_fence(struct vkd3d_fence_worker *worker, s
 static void vkd3d_fence_worker_move_enqueued_fences_locked(struct vkd3d_fence_worker *worker)
 {
     unsigned int i;
+    bool timeline;
     size_t count;
     bool ret;
 
@@ -325,8 +327,18 @@ static void vkd3d_fence_worker_move_enqueued_fences_locked(struct vkd3d_fence_wo
 
     count = worker->fence_count + worker->enqueued_fence_count;
 
-    ret = vkd3d_array_reserve((void **)&worker->vk_fences, &worker->vk_fences_size,
-            count, sizeof(*worker->vk_fences));
+    if ((timeline = worker->device->vk_info.KHR_timeline_semaphore))
+    {
+        ret = vkd3d_array_reserve((void **) &worker->vk_semaphores, &worker->vk_semaphores_size,
+                count, sizeof(*worker->vk_semaphores));
+        ret &= vkd3d_array_reserve((void **) &worker->semaphore_wait_values, &worker->semaphore_wait_values_size,
+                count, sizeof(*worker->semaphore_wait_values));
+    }
+    else
+    {
+        ret = vkd3d_array_reserve((void **)&worker->vk_fences, &worker->vk_fences_size,
+                count, sizeof(*worker->vk_fences));
+    }
     ret &= vkd3d_array_reserve((void **)&worker->fences, &worker->fences_size,
             count, sizeof(*worker->fences));
     if (!ret)
@@ -339,7 +351,16 @@ static void vkd3d_fence_worker_move_enqueued_fences_locked(struct vkd3d_fence_wo
     {
         struct vkd3d_enqueued_fence *current = &worker->enqueued_fences[i];
 
-        worker->vk_fences[worker->fence_count] = current->vk_fence;
+        if (timeline)
+        {
+            worker->vk_semaphores[worker->fence_count] = current->vk_semaphore;
+            worker->semaphore_wait_values[worker->fence_count] = current->waiting_fence.value;
+        }
+        else
+        {
+            worker->vk_fences[worker->fence_count] = current->vk_fence;
+        }
+
         worker->fences[worker->fence_count] = current->waiting_fence;
         ++worker->fence_count;
     }
@@ -347,6 +368,66 @@ static void vkd3d_fence_worker_move_enqueued_fences_locked(struct vkd3d_fence_wo
     worker->enqueued_fence_count = 0;
 }
 
+static void vkd3d_wait_for_gpu_timeline_semaphores(struct vkd3d_fence_worker *worker)
+{
+    struct d3d12_device *device = worker->device;
+    const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
+    VkSemaphoreWaitInfoKHR wait_info;
+    VkSemaphore vk_semaphore;
+    uint64_t counter_value;
+    unsigned int i, j;
+    HRESULT hr;
+    int vr;
+
+    if (!worker->fence_count)
+        return;
+
+    wait_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO_KHR;
+    wait_info.pNext = NULL;
+    wait_info.flags = VK_SEMAPHORE_WAIT_ANY_BIT_KHR;
+    wait_info.pSemaphores = worker->vk_semaphores;
+    wait_info.semaphoreCount = worker->fence_count;
+    wait_info.pValues = worker->semaphore_wait_values;
+
+    vr = VK_CALL(vkWaitSemaphoresKHR(device->vk_device, &wait_info, ~(uint64_t)0));
+    if (vr == VK_TIMEOUT)
+        return;
+    if (vr != VK_SUCCESS)
+    {
+        ERR("Failed to wait for Vulkan timeline semaphores, vr %d.\n", vr);
+        return;
+    }
+
+    for (i = 0, j = 0; i < worker->fence_count; ++i)
+    {
+        struct vkd3d_waiting_fence *current = &worker->fences[i];
+
+        vk_semaphore = worker->vk_semaphores[i];
+        if ((vr = VK_CALL(vkGetSemaphoreCounterValueKHR(device->vk_device, vk_semaphore, &counter_value))) < 0)
+        {
+            ERR("Failed to get Vulkan semaphore status, vr %d.\n", vr);
+        }
+        else if (counter_value >= current->value)
+        {
+            TRACE("Signaling fence %p value %#"PRIx64".\n", current->fence, current->value);
+            if (FAILED(hr = d3d12_fence_signal(current->fence, counter_value, VK_NULL_HANDLE)))
+                ERR("Failed to signal D3D12 fence, hr %#x.\n", hr);
+
+            InterlockedDecrement(&current->fence->pending_worker_operation_count);
+            continue;
+        }
+
+        if (i != j)
+        {
+            worker->vk_semaphores[j] = worker->vk_semaphores[i];
+            worker->semaphore_wait_values[j] = worker->semaphore_wait_values[i];
+            worker->fences[j] = worker->fences[i];
+        }
+        ++j;
+    }
+    worker->fence_count = j;
+}
+
 static void vkd3d_wait_for_gpu_fences(struct vkd3d_fence_worker *worker)
 {
     struct d3d12_device *device = worker->device;
@@ -402,13 +483,19 @@ static void vkd3d_wait_for_gpu_fences(struct vkd3d_fence_worker *worker)
 static void *vkd3d_fence_worker_main(void *arg)
 {
     struct vkd3d_fence_worker *worker = arg;
+    bool timeline;
     int rc;
 
     vkd3d_set_thread_name("vkd3d_fence");
 
+    timeline = worker->device->vk_info.KHR_timeline_semaphore;
+
     for (;;)
     {
-        vkd3d_wait_for_gpu_fences(worker);
+        if (timeline)
+            vkd3d_wait_for_gpu_timeline_semaphores(worker);
+        else
+            vkd3d_wait_for_gpu_fences(worker);
 
         if (!worker->fence_count || atomic_add_fetch(&worker->enqueued_fence_count, 0))
         {
@@ -473,6 +560,10 @@ HRESULT vkd3d_fence_worker_start(struct vkd3d_fence_worker *worker,
     worker->vk_fences_size = 0;
     worker->fences = NULL;
     worker->fences_size = 0;
+    worker->vk_semaphores = NULL;
+    worker->vk_semaphores_size = 0;
+    worker->semaphore_wait_values = NULL;
+    worker->semaphore_wait_values_size = 0;
 
     if ((rc = pthread_mutex_init(&worker->mutex, NULL)))
     {
@@ -535,6 +626,8 @@ HRESULT vkd3d_fence_worker_stop(struct vkd3d_fence_worker *worker,
     vkd3d_free(worker->enqueued_fences);
     vkd3d_free(worker->vk_fences);
     vkd3d_free(worker->fences);
+    vkd3d_free(worker->vk_semaphores);
+    vkd3d_free(worker->semaphore_wait_values);
 
     return S_OK;
 }
@@ -684,6 +777,7 @@ static void d3d12_fence_destroy_vk_objects(struct d3d12_fence *fence)
     }
 
     d3d12_fence_garbage_collect_vk_semaphores_locked(fence, true);
+    VK_CALL(vkDestroySemaphore(device->vk_device, fence->timeline_semaphore, NULL));
 
     pthread_mutex_unlock(&fence->mutex);
 }
@@ -802,31 +896,21 @@ static HRESULT d3d12_fence_add_vk_semaphore(struct d3d12_fence *fence,
     return hr;
 }
 
-static HRESULT d3d12_fence_signal(struct d3d12_fence *fence, uint64_t value, VkFence vk_fence)
+static bool d3d12_fence_signal_external_events_locked(struct d3d12_fence *fence)
 {
     struct d3d12_device *device = fence->device;
-    struct vkd3d_signaled_semaphore *current;
     bool signal_null_event_cond = false;
     unsigned int i, j;
-    int rc;
-
-    if ((rc = pthread_mutex_lock(&fence->mutex)))
-    {
-        ERR("Failed to lock mutex, error %d.\n", rc);
-        return hresult_from_errno(rc);
-    }
-
-    fence->value = value;
 
     for (i = 0, j = 0; i < fence->event_count; ++i)
     {
         struct vkd3d_waiting_event *current = &fence->events[i];
 
-        if (current->value <= value)
+        if (current->value <= fence->value)
         {
             if (current->event)
             {
-                fence->device->signal_event(current->event);
+                device->signal_event(current->event);
             }
             else
             {
@@ -841,9 +925,36 @@ static HRESULT d3d12_fence_signal(struct d3d12_fence *fence, uint64_t value, VkF
             ++j;
         }
     }
+
     fence->event_count = j;
 
-    if (signal_null_event_cond)
+    return signal_null_event_cond;
+}
+
+static HRESULT d3d12_fence_signal(struct d3d12_fence *fence, uint64_t value, VkFence vk_fence)
+{
+    struct d3d12_device *device = fence->device;
+    struct vkd3d_signaled_semaphore *current;
+    unsigned int i;
+    int rc;
+
+    if ((rc = pthread_mutex_lock(&fence->mutex)))
+    {
+        ERR("Failed to lock mutex, error %d.\n", rc);
+        return hresult_from_errno(rc);
+    }
+
+    if (value < fence->value && device->vk_info.KHR_timeline_semaphore)
+    {
+        FIXME("Fence values must be monotonically increasing. Fence %p, was %"PRIx64", now %"PRIx64".\n",
+                fence, fence->value, value);
+    }
+    else
+    {
+        fence->value = value;
+    }
+
+    if (d3d12_fence_signal_external_events_locked(fence))
         pthread_cond_broadcast(&fence->null_event_cond);
 
     if (vk_fence)
@@ -1069,12 +1180,71 @@ static HRESULT STDMETHODCALLTYPE d3d12_fence_SetEventOnCompletion(ID3D12Fence *i
     return S_OK;
 }
 
+static HRESULT d3d12_fence_signal_cpu_timeline_semaphore(struct d3d12_fence *fence, uint64_t value)
+{
+    struct d3d12_device *device = fence->device;
+    VkSemaphoreSignalInfoKHR info;
+    VkResult vr = VK_SUCCESS;
+    int rc;
+
+    if ((rc = pthread_mutex_lock(&fence->mutex)))
+    {
+        ERR("Failed to lock mutex, error %d.\n", rc);
+        return hresult_from_errno(rc);
+    }
+
+    /* We must only signal a value which is greater than the current value.
+     * That value can be in the range of current known value (fence->value), or as large as pending_timeline_value.
+     * Pending timeline value signal might be blocked by another synchronization primitive, and thus statically
+     * cannot be that value, so the safest thing to do is to check the current value which is updated by the fence
+     * wait thread continuously. This check is technically racy since the value might be immediately out of date,
+     * but there is no way to avoid this. */
+    if (value > fence->value)
+    {
+        struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
+
+        /* Sanity check against the delta limit. */
+        if (value - fence->value > device->vk_info.timeline_semaphore_properties.maxTimelineSemaphoreValueDifference)
+        {
+            FIXME("Timeline semaphore delta is %"PRIu64", but implementation only supports a delta of %"PRIu64".\n",
+                    value - fence->value, device->vk_info.timeline_semaphore_properties.maxTimelineSemaphoreValueDifference);
+        }
+
+        info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SIGNAL_INFO_KHR;
+        info.pNext = NULL;
+        info.semaphore = fence->timeline_semaphore;
+        info.value = value;
+        if ((vr = VK_CALL(vkSignalSemaphoreKHR(device->vk_device, &info))) >= 0)
+        {
+            fence->value = value;
+            if (value > fence->pending_timeline_value)
+                fence->pending_timeline_value = value;
+        }
+        else
+        {
+            ERR("Failed to signal timeline semaphore, vr %d.\n", vr);
+        }
+    }
+    else if (value != fence->value)
+    {
+        FIXME("Attempting to signal fence %p with %"PRIu64", but value is currently %"PRIu64".\n",
+                fence, value, fence->value);
+    }
+
+    d3d12_fence_signal_external_events_locked(fence);
+
+    pthread_mutex_unlock(&fence->mutex);
+    return hresult_from_vk_result(vr);
+}
+
 static HRESULT STDMETHODCALLTYPE d3d12_fence_Signal(ID3D12Fence *iface, UINT64 value)
 {
     struct d3d12_fence *fence = impl_from_ID3D12Fence(iface);
 
     TRACE("iface %p, value %#"PRIx64".\n", iface, value);
 
+    if (fence->timeline_semaphore)
+        return d3d12_fence_signal_cpu_timeline_semaphore(fence, value);
     return d3d12_fence_signal(fence, value, VK_NULL_HANDLE);
 }
 
@@ -1105,6 +1275,34 @@ static struct d3d12_fence *unsafe_impl_from_ID3D12Fence(ID3D12Fence *iface)
     return impl_from_ID3D12Fence(iface);
 }
 
+static HRESULT d3d12_fence_init_timeline_semaphore(struct d3d12_fence *fence, struct d3d12_device *device,
+        uint64_t initial_value)
+{
+    const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
+    VkSemaphoreTypeCreateInfoKHR type_info;
+    VkSemaphoreCreateInfo info;
+    VkResult vr;
+
+    info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
+    info.pNext = &type_info;
+    info.flags = 0;
+
+    type_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO_KHR;
+    type_info.pNext = NULL;
+    type_info.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE_KHR;
+    type_info.initialValue = initial_value;
+
+    if ((vr = VK_CALL(vkCreateSemaphore(device->vk_device, &info, NULL, &fence->timeline_semaphore))) < 0)
+    {
+        WARN("Failed to create timeline semaphore, vr %d.\n", vr);
+        return hresult_from_vk_result(vr);
+    }
+
+    fence->pending_timeline_value = initial_value;
+
+    return S_OK;
+}
+
 static HRESULT d3d12_fence_init(struct d3d12_fence *fence, struct d3d12_device *device,
         UINT64 initial_value, D3D12_FENCE_FLAGS flags)
 {
@@ -1136,6 +1334,11 @@ static HRESULT d3d12_fence_init(struct d3d12_fence *fence, struct d3d12_device *
     fence->events_size = 0;
     fence->event_count = 0;
 
+    fence->timeline_semaphore = NULL;
+    if (device->vk_info.KHR_timeline_semaphore && FAILED(hr = d3d12_fence_init_timeline_semaphore(fence,
+            device, initial_value)))
+        return hr;
+
     list_init(&fence->semaphores);
     fence->semaphore_count = 0;
 
@@ -6000,18 +6203,97 @@ static void STDMETHODCALLTYPE d3d12_command_queue_EndEvent(ID3D12CommandQueue *i
     FIXME("iface %p stub!\n", iface);
 }
 
+static bool d3d12_fence_update_gpu_signal_timeline_semaphore(struct d3d12_fence *fence, uint64_t value)
+{
+    struct d3d12_device *device = fence->device;
+    bool need_signal = false;
+    int rc;
+
+    if ((rc = pthread_mutex_lock(&fence->mutex)))
+    {
+        ERR("Failed to lock mutex, error %d.\n", rc);
+        return false;
+    }
+
+    /* If we're attempting to async signal a fence with a value which is not monotonically increasing the payload value,
+     * warn about this case. Do not treat this as an error since it might work. */
+    if (value > fence->pending_timeline_value)
+    {
+        /* Sanity check against the delta limit. Use the current fence value. */
+        if (value - fence->value > device->vk_info.timeline_semaphore_properties.maxTimelineSemaphoreValueDifference)
+        {
+            FIXME("Timeline semaphore delta is %"PRIu64", but implementation only supports a delta of %"PRIu64".\n",
+                    value - fence->value, device->vk_info.timeline_semaphore_properties.maxTimelineSemaphoreValueDifference);
+        }
+
+        fence->pending_timeline_value = value;
+        need_signal = true;
+    }
+    else
+    {
+        FIXME("Fence %p is being signalled non-monotonically. Old pending value %"PRIu64", new pending value %"PRIu64".\n",
+                fence, fence->pending_timeline_value, value);
+
+        /* Mostly to be safe against weird, unknown use cases. The pending signal might be blocked
+         * by another fence, so we'll base this on the actual, currently visible count value. */
+        need_signal = value > fence->value;
+    }
+
+    pthread_mutex_unlock(&fence->mutex);
+
+    return need_signal;
+}
+
+static HRESULT vkd3d_enqueue_timeline_semaphore(struct vkd3d_fence_worker *worker, VkSemaphore vk_semaphore,
+        struct d3d12_fence *fence, uint64_t value, struct vkd3d_queue *queue)
+{
+    struct vkd3d_waiting_fence *waiting_fence;
+    int rc;
+
+    TRACE("worker %p, fence %p, value %#"PRIx64".\n", worker, fence, value);
+
+    if ((rc = pthread_mutex_lock(&worker->mutex)))
+    {
+        ERR("Failed to lock mutex, error %d.\n", rc);
+        return hresult_from_errno(rc);
+    }
+
+    if (!vkd3d_array_reserve((void **)&worker->enqueued_fences, &worker->enqueued_fences_size,
+            worker->enqueued_fence_count + 1, sizeof(*worker->enqueued_fences)))
+    {
+        ERR("Failed to add GPU timeline semaphore.\n");
+        pthread_mutex_unlock(&worker->mutex);
+        return E_OUTOFMEMORY;
+    }
+
+    worker->enqueued_fences[worker->enqueued_fence_count].vk_semaphore = vk_semaphore;
+    waiting_fence = &worker->enqueued_fences[worker->enqueued_fence_count].waiting_fence;
+    waiting_fence->fence = fence;
+    waiting_fence->value = value;
+    waiting_fence->queue = queue;
+    ++worker->enqueued_fence_count;
+
+    InterlockedIncrement(&fence->pending_worker_operation_count);
+
+    pthread_cond_signal(&worker->cond);
+    pthread_mutex_unlock(&worker->mutex);
+
+    return S_OK;
+}
+
 static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue *iface,
         ID3D12Fence *fence_iface, UINT64 value)
 {
     struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);
+    VkTimelineSemaphoreSubmitInfoKHR timeline_submit_info;
     const struct vkd3d_vk_device_procs *vk_procs;
     VkSemaphore vk_semaphore = VK_NULL_HANDLE;
     VkFence vk_fence = VK_NULL_HANDLE;
     struct vkd3d_queue *vkd3d_queue;
+    uint64_t sequence_number = 0;
     struct d3d12_device *device;
     struct d3d12_fence *fence;
     VkSubmitInfo submit_info;
-    uint64_t sequence_number;
     VkQueue vk_queue;
     VkResult vr;
     HRESULT hr;
@@ -6024,10 +6306,27 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue *
 
     fence = unsafe_impl_from_ID3D12Fence(fence_iface);
 
-    if ((vr = d3d12_fence_create_vk_fence(fence, &vk_fence)) < 0)
+    if (device->vk_info.KHR_timeline_semaphore)
     {
-        WARN("Failed to create Vulkan fence, vr %d.\n", vr);
-        goto fail_vkresult;
+        if (d3d12_fence_update_gpu_signal_timeline_semaphore(fence, value))
+        {
+            vk_semaphore = fence->timeline_semaphore;
+            assert(vk_semaphore);
+        }
+        else
+        {
+            /* If we are not incrementing the counter, this is a noop since we cannot
+             * signal a timeline semaphore non-monotonically in Vulkan. */
+            return S_OK;
+        }
+    }
+    else
+    {
+        if ((vr = d3d12_fence_create_vk_fence(fence, &vk_fence)) < 0)
+        {
+            WARN("Failed to create Vulkan fence, vr %d.\n", vr);
+            goto fail_vkresult;
+        }
     }
 
     if (!(vk_queue = vkd3d_queue_acquire(vkd3d_queue)))
@@ -6037,7 +6336,8 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue *
         goto fail;
     }
 
-    if ((vr = vkd3d_queue_create_vk_semaphore_locked(vkd3d_queue, device, &vk_semaphore)) < 0)
+    if (!device->vk_info.KHR_timeline_semaphore && (vr = vkd3d_queue_create_vk_semaphore_locked(vkd3d_queue,
+            device, &vk_semaphore)) < 0)
     {
         ERR("Failed to create Vulkan semaphore, vr %d.\n", vr);
         vk_semaphore = VK_NULL_HANDLE;
@@ -6053,7 +6353,19 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue *
     submit_info.signalSemaphoreCount = vk_semaphore ? 1 : 0;
     submit_info.pSignalSemaphores = &vk_semaphore;
 
-    if ((vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_info, vk_fence))) >= 0)
+    if (device->vk_info.KHR_timeline_semaphore)
+    {
+        timeline_submit_info.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR;
+        timeline_submit_info.pNext = NULL;
+        timeline_submit_info.pSignalSemaphoreValues = &value;
+        timeline_submit_info.signalSemaphoreValueCount = 1;
+        timeline_submit_info.waitSemaphoreValueCount = 0;
+        timeline_submit_info.pWaitSemaphoreValues = NULL;
+        submit_info.pNext = &timeline_submit_info;
+    }
+
+    vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_info, vk_fence));
+    if (!device->vk_info.KHR_timeline_semaphore && vr >= 0)
     {
         sequence_number = ++vkd3d_queue->submitted_sequence_number;
 
@@ -6070,26 +6382,34 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue *
         goto fail_vkresult;
     }
 
-    if (vk_semaphore && SUCCEEDED(hr = d3d12_fence_add_vk_semaphore(fence, vk_semaphore, vk_fence, value)))
-        vk_semaphore = VK_NULL_HANDLE;
-
-    vr = VK_CALL(vkGetFenceStatus(device->vk_device, vk_fence));
-    if (vr == VK_NOT_READY)
-    {
-        if (SUCCEEDED(hr = vkd3d_enqueue_gpu_fence(&device->fence_worker, vk_fence, fence, value, vkd3d_queue, sequence_number)))
-            vk_fence = VK_NULL_HANDLE;
-    }
-    else if (vr == VK_SUCCESS)
+    if (device->vk_info.KHR_timeline_semaphore)
     {
-        TRACE("Already signaled %p, value %#"PRIx64".\n", fence, value);
-        hr = d3d12_fence_signal(fence, value, vk_fence);
-        vk_fence = VK_NULL_HANDLE;
-        vkd3d_queue_update_sequence_number(vkd3d_queue, sequence_number, device);
+        if (SUCCEEDED(hr = vkd3d_enqueue_timeline_semaphore(&device->fence_worker, vk_semaphore, fence, value, vkd3d_queue)))
+            vk_semaphore = VK_NULL_HANDLE;
     }
     else
     {
-        FIXME("Failed to get fence status, vr %d.\n", vr);
-        hr = hresult_from_vk_result(vr);
+        if (vk_semaphore && SUCCEEDED(hr = d3d12_fence_add_vk_semaphore(fence, vk_semaphore, vk_fence, value)))
+            vk_semaphore = VK_NULL_HANDLE;
+
+        vr = VK_CALL(vkGetFenceStatus(device->vk_device, vk_fence));
+        if (vr == VK_NOT_READY)
+        {
+            if (SUCCEEDED(hr = vkd3d_enqueue_gpu_fence(&device->fence_worker, vk_fence, fence, value, vkd3d_queue, sequence_number)))
+                vk_fence = VK_NULL_HANDLE;
+        }
+        else if (vr == VK_SUCCESS)
+        {
+            TRACE("Already signaled %p, value %#"PRIx64".\n", fence, value);
+            hr = d3d12_fence_signal(fence, value, vk_fence);
+            vk_fence = VK_NULL_HANDLE;
+            vkd3d_queue_update_sequence_number(vkd3d_queue, sequence_number, device);
+        }
+        else
+        {
+            FIXME("Failed to get fence status, vr %d.\n", vr);
+            hr = hresult_from_vk_result(vr);
+        }
     }
 
     if (vk_fence || vk_semaphore)
@@ -6105,32 +6425,27 @@ fail_vkresult:
     hr = hresult_from_vk_result(vr);
 fail:
     VK_CALL(vkDestroyFence(device->vk_device, vk_fence, NULL));
-    VK_CALL(vkDestroySemaphore(device->vk_device, vk_semaphore, NULL));
+    if (!device->vk_info.KHR_timeline_semaphore)
+        VK_CALL(vkDestroySemaphore(device->vk_device, vk_semaphore, NULL));
     return hr;
 }
 
-static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Wait(ID3D12CommandQueue *iface,
-        ID3D12Fence *fence_iface, UINT64 value)
+static HRESULT d3d12_command_queue_wait_binary_semaphore(struct d3d12_command_queue *command_queue,
+        struct d3d12_fence *fence, uint64_t value)
 {
     static const VkPipelineStageFlagBits wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
-    struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);
     const struct vkd3d_vk_device_procs *vk_procs;
     struct vkd3d_signaled_semaphore *semaphore;
     uint64_t completed_value = 0;
     struct vkd3d_queue *queue;
-    struct d3d12_fence *fence;
     VkSubmitInfo submit_info;
     VkQueue vk_queue;
     VkResult vr;
     HRESULT hr;
 
-    TRACE("iface %p, fence %p, value %#"PRIx64".\n", iface, fence_iface, value);
-
     vk_procs = &command_queue->device->vk_procs;
     queue = command_queue->vkd3d_queue;
 
-    fence = unsafe_impl_from_ID3D12Fence(fence_iface);
-
     semaphore = d3d12_fence_acquire_vk_semaphore(fence, value, &completed_value);
     if (!semaphore && completed_value >= value)
     {
@@ -6208,6 +6523,72 @@ fail:
     return hr;
 }
 
+static HRESULT d3d12_command_queue_wait_timeline_semaphore(struct d3d12_command_queue *command_queue,
+        struct d3d12_fence *fence, uint64_t value)
+{
+    static const VkPipelineStageFlagBits wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
+    VkTimelineSemaphoreSubmitInfoKHR timeline_submit_info;
+    const struct vkd3d_vk_device_procs *vk_procs;
+    struct vkd3d_queue *queue;
+    VkSubmitInfo submit_info;
+    VkQueue vk_queue;
+    VkResult vr;
+
+    vk_procs = &command_queue->device->vk_procs;
+    queue = command_queue->vkd3d_queue;
+
+    assert(fence->timeline_semaphore);
+    timeline_submit_info.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR;
+    timeline_submit_info.pNext = NULL;
+    timeline_submit_info.signalSemaphoreValueCount = 0;
+    timeline_submit_info.pSignalSemaphoreValues = NULL;
+    timeline_submit_info.waitSemaphoreValueCount = 1;
+    timeline_submit_info.pWaitSemaphoreValues = &value;
+
+    if (!(vk_queue = vkd3d_queue_acquire(queue)))
+    {
+        ERR("Failed to acquire queue %p.\n", queue);
+        return E_FAIL;
+    }
+
+    submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
+    submit_info.pNext = &timeline_submit_info;
+    submit_info.waitSemaphoreCount = 1;
+    submit_info.pWaitSemaphores = &fence->timeline_semaphore;
+    submit_info.pWaitDstStageMask = &wait_stage_mask;
+    submit_info.commandBufferCount = 0;
+    submit_info.pCommandBuffers = NULL;
+    submit_info.signalSemaphoreCount = 0;
+    submit_info.pSignalSemaphores = NULL;
+
+    vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_info, VK_NULL_HANDLE));
+
+    vkd3d_queue_release(queue);
+
+    if (vr < 0)
+    {
+        WARN("Failed to submit wait operation, vr %d.\n", vr);
+        return hresult_from_vk_result(vr);
+    }
+
+    return S_OK;
+}
+
+static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Wait(ID3D12CommandQueue *iface,
+        ID3D12Fence *fence_iface, UINT64 value)
+{
+    struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);
+    struct d3d12_fence *fence = unsafe_impl_from_ID3D12Fence(fence_iface);
+
+    TRACE("iface %p, fence %p, value %#"PRIx64".\n", iface, fence_iface, value);
+
+    if (command_queue->device->vk_info.KHR_timeline_semaphore)
+        return d3d12_command_queue_wait_timeline_semaphore(command_queue, fence, value);
+
+    FIXME_ONCE("KHR_timeline_semaphore is not available. Some wait commands may be unsupported.\n");
+    return d3d12_command_queue_wait_binary_semaphore(command_queue, fence, value);
+}
+
 static HRESULT STDMETHODCALLTYPE d3d12_command_queue_GetTimestampFrequency(ID3D12CommandQueue *iface,
         UINT64 *frequency)
 {
diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c
index 4bcb5efc..bc5cad76 100644
--- a/libs/vkd3d/device.c
+++ b/libs/vkd3d/device.c
@@ -129,6 +129,7 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] =
     VK_EXTENSION(KHR_MAINTENANCE3, KHR_maintenance3),
     VK_EXTENSION(KHR_PUSH_DESCRIPTOR, KHR_push_descriptor),
     VK_EXTENSION(KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE, KHR_sampler_mirror_clamp_to_edge),
+    VK_EXTENSION(KHR_TIMELINE_SEMAPHORE, KHR_timeline_semaphore),
     /* EXT extensions */
     VK_EXTENSION(EXT_CALIBRATED_TIMESTAMPS, EXT_calibrated_timestamps),
     VK_EXTENSION(EXT_CONDITIONAL_RENDERING, EXT_conditional_rendering),
@@ -683,6 +684,7 @@ struct vkd3d_physical_device_info
     VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT texel_buffer_alignment_properties;
     VkPhysicalDeviceTransformFeedbackPropertiesEXT xfb_properties;
     VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT vertex_divisor_properties;
+    VkPhysicalDeviceTimelineSemaphorePropertiesKHR timeline_semaphore_properties;
 
     VkPhysicalDeviceProperties2KHR properties2;
 
@@ -694,6 +696,7 @@ struct vkd3d_physical_device_info
     VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT texel_buffer_alignment_features;
     VkPhysicalDeviceTransformFeedbackFeaturesEXT xfb_features;
     VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT vertex_divisor_features;
+    VkPhysicalDeviceTimelineSemaphoreFeaturesKHR timeline_semaphore_features;
 
     VkPhysicalDeviceFeatures2 features2;
 };
@@ -705,10 +708,12 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i
     VkPhysicalDeviceDescriptorIndexingPropertiesEXT *descriptor_indexing_properties;
     VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *vertex_divisor_properties;
     VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *buffer_alignment_properties;
+    VkPhysicalDeviceTimelineSemaphorePropertiesKHR *timeline_semaphore_properties;
     VkPhysicalDeviceDescriptorIndexingFeaturesEXT *descriptor_indexing_features;
     VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *vertex_divisor_features;
     VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *buffer_alignment_features;
     VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT *demote_features;
+    VkPhysicalDeviceTimelineSemaphoreFeaturesKHR *timeline_semaphore_features;
     VkPhysicalDeviceDepthClipEnableFeaturesEXT *depth_clip_features;
     VkPhysicalDeviceMaintenance3Properties *maintenance3_properties;
     VkPhysicalDeviceTransformFeedbackPropertiesEXT *xfb_properties;
@@ -727,6 +732,8 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i
     buffer_alignment_properties = &info->texel_buffer_alignment_properties;
     vertex_divisor_features = &info->vertex_divisor_features;
     vertex_divisor_properties = &info->vertex_divisor_properties;
+    timeline_semaphore_features = &info->timeline_semaphore_features;
+    timeline_semaphore_properties = &info->timeline_semaphore_properties;
     xfb_features = &info->xfb_features;
     xfb_properties = &info->xfb_properties;
 
@@ -746,6 +753,8 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i
     vk_prepend_struct(&info->features2, xfb_features);
     vertex_divisor_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT;
     vk_prepend_struct(&info->features2, vertex_divisor_features);
+    timeline_semaphore_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR;
+    vk_prepend_struct(&info->features2, timeline_semaphore_features);
 
     if (vulkan_info->KHR_get_physical_device_properties2)
         VK_CALL(vkGetPhysicalDeviceFeatures2KHR(physical_device, &info->features2));
@@ -764,6 +773,8 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i
     vk_prepend_struct(&info->properties2, xfb_properties);
     vertex_divisor_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT;
     vk_prepend_struct(&info->properties2, vertex_divisor_properties);
+    timeline_semaphore_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_PROPERTIES_KHR;
+    vk_prepend_struct(&info->properties2, timeline_semaphore_properties);
 
     if (vulkan_info->KHR_get_physical_device_properties2)
         VK_CALL(vkGetPhysicalDeviceProperties2KHR(physical_device, &info->properties2));
@@ -1312,6 +1323,7 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device,
     vulkan_info->rasterization_stream = physical_device_info->xfb_properties.transformFeedbackRasterizationStreamSelect;
     vulkan_info->transform_feedback_queries = physical_device_info->xfb_properties.transformFeedbackQueries;
     vulkan_info->max_vertex_attrib_divisor = max(physical_device_info->vertex_divisor_properties.maxVertexAttribDivisor, 1);
+    vulkan_info->timeline_semaphore_properties = physical_device_info->timeline_semaphore_properties;
 
     device->feature_options.DoublePrecisionFloatShaderOps = features->shaderFloat64;
     device->feature_options.OutputMergerLogicOp = features->logicOp;
@@ -1434,6 +1446,8 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device,
         vulkan_info->EXT_shader_demote_to_helper_invocation = false;
     if (!physical_device_info->texel_buffer_alignment_features.texelBufferAlignment)
         vulkan_info->EXT_texel_buffer_alignment = false;
+    if (!physical_device_info->timeline_semaphore_features.timelineSemaphore)
+        vulkan_info->KHR_timeline_semaphore = false;
 
     vulkan_info->texel_buffer_alignment_properties = physical_device_info->texel_buffer_alignment_properties;
 
diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h
index 136b0203..088d4312 100644
--- a/libs/vkd3d/vkd3d_private.h
+++ b/libs/vkd3d/vkd3d_private.h
@@ -107,6 +107,7 @@ struct vkd3d_vulkan_info
     bool KHR_maintenance3;
     bool KHR_push_descriptor;
     bool KHR_sampler_mirror_clamp_to_edge;
+    bool KHR_timeline_semaphore;
     /* EXT device extensions */
     bool EXT_calibrated_timestamps;
     bool EXT_conditional_rendering;
@@ -130,6 +131,8 @@ struct vkd3d_vulkan_info
 
     VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT texel_buffer_alignment_properties;
 
+    VkPhysicalDeviceTimelineSemaphorePropertiesKHR timeline_semaphore_properties;
+
     unsigned int shader_extension_count;
     enum vkd3d_shader_spirv_extension shader_extensions[VKD3D_MAX_SHADER_EXTENSIONS];
 
@@ -196,6 +199,7 @@ struct vkd3d_fence_worker
     struct vkd3d_enqueued_fence
     {
         VkFence vk_fence;
+        VkSemaphore vk_semaphore;
         struct vkd3d_waiting_fence waiting_fence;
     } *enqueued_fences;
     size_t enqueued_fences_size;
@@ -205,6 +209,10 @@ struct vkd3d_fence_worker
     size_t vk_fences_size;
     struct vkd3d_waiting_fence *fences;
     size_t fences_size;
+    VkSemaphore *vk_semaphores;
+    size_t vk_semaphores_size;
+    uint64_t *semaphore_wait_values;
+    size_t semaphore_wait_values_size;
 
     struct d3d12_device *device;
 };
@@ -378,6 +386,9 @@ struct d3d12_fence
     size_t events_size;
     size_t event_count;
 
+    VkSemaphore timeline_semaphore;
+    uint64_t pending_timeline_value;
+
     struct list semaphores;
     unsigned int semaphore_count;
 
diff --git a/libs/vkd3d/vulkan_procs.h b/libs/vkd3d/vulkan_procs.h
index 60556735..34e0ab4b 100644
--- a/libs/vkd3d/vulkan_procs.h
+++ b/libs/vkd3d/vulkan_procs.h
@@ -195,6 +195,11 @@ VK_DEVICE_EXT_PFN(vkGetDescriptorSetLayoutSupportKHR)
 /* VK_KHR_push_descriptor */
 VK_DEVICE_EXT_PFN(vkCmdPushDescriptorSetKHR)
 
+/* VK_KHR_timeline_semaphore */
+VK_DEVICE_EXT_PFN(vkGetSemaphoreCounterValueKHR)
+VK_DEVICE_EXT_PFN(vkWaitSemaphoresKHR)
+VK_DEVICE_EXT_PFN(vkSignalSemaphoreKHR)
+
 /* VK_EXT_calibrated_timestamps */
 VK_DEVICE_EXT_PFN(vkGetCalibratedTimestampsEXT)
 
diff --git a/tests/d3d12.c b/tests/d3d12.c
index 82f353a7..b5a9b89f 100644
--- a/tests/d3d12.c
+++ b/tests/d3d12.c
@@ -33238,7 +33238,9 @@ static void test_queue_wait(void)
     command_list = context.list;
     queue = context.queue;
 
-    queue2 = create_command_queue(device, D3D12_COMMAND_LIST_TYPE_DIRECT, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL);
+    /* 'queue2' must not map to the same command queue as 'queue', or Wait() before GPU signal will fail.
+     * Using a compute queue fixes this on most hardware, but it may still fail on low spec hardware. */
+    queue2 = create_command_queue(device, D3D12_COMMAND_LIST_TYPE_COMPUTE, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL);
 
     event = create_event();
     ok(event, "Failed to create event.\n");
@@ -33303,12 +33305,6 @@ static void test_queue_wait(void)
     check_readback_data_uint(&rb, NULL, 0xff00ff00, 0);
     release_resource_readback(&rb);
 
-    if (!vkd3d_test_platform_is_windows())
-    {
-        skip("Wait() is not implemented yet.\n"); /* FIXME */
-        goto skip_tests;
-    }
-
     /* Wait() before CPU signal */
     update_buffer_data(cb, 0, sizeof(blue), &blue);
     queue_wait(queue, fence, 2);
@@ -33384,7 +33380,6 @@ static void test_queue_wait(void)
     check_readback_data_uint(&rb, NULL, 0xff00ff00, 0);
     release_resource_readback(&rb);
 
-skip_tests:
     /* Signal() and Wait() in the same command queue */
     update_buffer_data(cb, 0, sizeof(blue), &blue);
     queue_signal(queue, fence, 7);
-- 
2.34.1




More information about the wine-devel mailing list