[PATCH 2/4] wined3d: Use the default queue index for resource fencing.

Stefan Dösinger stefan at codeweavers.com
Wed Feb 23 10:01:42 CST 2022


This removes tracking work entirely from the CS thread and replaces
interlocked with regular writes on the main thread. It also avoids
accessing access_count in read-write mode from two threads.

Signed-off-by: Stefan Dösinger <stefan at codeweavers.com>
---
 dlls/wined3d/cs.c              |  4 --
 dlls/wined3d/wined3d_private.h | 96 +++++++++++++++++++++++++++++-----
 2 files changed, 82 insertions(+), 18 deletions(-)

diff --git a/dlls/wined3d/cs.c b/dlls/wined3d/cs.c
index 3eb4f39429a..10d7b4b8996 100644
--- a/dlls/wined3d/cs.c
+++ b/dlls/wined3d/cs.c
@@ -2751,8 +2751,6 @@ static void wined3d_cs_exec_update_sub_resource(struct wined3d_cs *cs, const voi
                 op->sub_resource_idx, context, &op->bo, box, op->row_pitch, op->slice_pitch);
 
     context_release(context);
-
-    wined3d_resource_release(resource);
 }
 
 void wined3d_device_context_emit_update_sub_resource(struct wined3d_device_context *context,
@@ -2792,8 +2790,6 @@ void wined3d_device_context_emit_update_sub_resource(struct wined3d_device_conte
     op->row_pitch = row_pitch;
     op->slice_pitch = slice_pitch;
 
-    wined3d_device_context_acquire_resource(context, resource);
-
     wined3d_device_context_submit(context, WINED3D_CS_QUEUE_MAP);
     /* The data pointer may go away, so we need to wait until it is read.
      * Copying the data may be faster if it's small. */
diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h
index 7c03e8a2c20..d797b26fff0 100644
--- a/dlls/wined3d/wined3d_private.h
+++ b/dlls/wined3d/wined3d_private.h
@@ -4314,7 +4314,7 @@ struct wined3d_resource
     LONG ref;
     LONG bind_count;
     LONG map_count;
-    LONG access_count;
+    ULONG access_time;
     struct wined3d_device *device;
     enum wined3d_resource_type type;
     enum wined3d_gl_resource_type gl_type;
@@ -4358,17 +4358,6 @@ static inline ULONG wined3d_resource_decref(struct wined3d_resource *resource)
     return resource->resource_ops->resource_decref(resource);
 }
 
-static inline void wined3d_resource_acquire(struct wined3d_resource *resource)
-{
-    InterlockedIncrement(&resource->access_count);
-}
-
-static inline void wined3d_resource_release(struct wined3d_resource *resource)
-{
-    LONG refcount = InterlockedDecrement(&resource->access_count);
-    assert(refcount >= 0);
-}
-
 static inline HRESULT wined3d_resource_get_sub_resource_desc(struct wined3d_resource *resource,
         unsigned int sub_resource_idx, struct wined3d_sub_resource_desc *desc)
 {
@@ -5140,15 +5129,94 @@ void wined3d_device_context_emit_update_sub_resource(struct wined3d_device_conte
 HRESULT wined3d_device_context_emit_unmap(struct wined3d_device_context *context,
         struct wined3d_resource *resource, unsigned int sub_resource_idx) DECLSPEC_HIDDEN;
 
-static inline void wined3d_resource_wait_idle(struct wined3d_resource *resource)
+static inline void wined3d_resource_acquire(struct wined3d_resource *resource)
+{
+    const struct wined3d_cs *cs = resource->device->cs;
+    resource->access_time = cs->queue[WINED3D_CS_QUEUE_DEFAULT].head;
+}
+
+static inline void wined3d_resource_release(struct wined3d_resource *resource)
+{
+}
+
+static inline void wined3d_resource_wait_idle(const struct wined3d_resource *resource)
 {
     const struct wined3d_cs *cs = resource->device->cs;
+    ULONG access_time, tail, head;
 
     if (!cs->thread || cs->thread_id == GetCurrentThreadId())
         return;
 
-    while (InterlockedCompareExchange(&resource->access_count, 0, 0))
+    /* A resource is considered busy between queueing a command that reads it and the execution of that
+     * command. We use the head and tail pointer of the default CS queue for tracking the access time.
+     * We can't track commands on the map queue this way. If a map command is handled asynchronously the
+     * resource fencing needs to be handled some other way.
+     *
+     * The queue head and tail will wrap around when the 32 bit ULONG is exhausted. We therefore need to
+     * handle a few cases:
+     *
+     * A...access_time in the resource
+     * H...queue write head
+     * T...queue read tail
+     *
+     * Case 1:
+     * |.....T------A-----H..........|
+     * The resource is busy because the access time is between head and tail. No wrap-around has happened.
+     *
+     * Case 2:
+     * |..A.....T---------H..........|
+     * The resource is idle, the last command using it has been executed.
+     *
+     * Case 3:
+     * |........T---------H.....A....|
+     * The resource is idle, the last command using it has been executed and the head and tail have since
+     * wrapped around.
+     *
+     * Case 4:
+     * |--A---H.................T----|
+     * Resource is busy, HEAD has wrapped around, tail not yet. Note that Head < Tail
+     *
+     * Case 5:
+     * |------H....A............T----|
+     * Resource is idle. Head has wrapped around, tail not yet.
+     *
+     * Case 6:
+     *          A
+     *          T
+     * |........H....................|
+     *
+     * Queue is empty, resource therefore idle.
+     *
+     * It is possible that a resource has not been used for a long time and is idle, but the head and
+     * tail wrapped around in such a way that the previously set access time falls between head and tail.
+     * In this case we will incorrectly wait for the resource. Because we use the entire 32 bits of the
+     * counters and not just the bits needed to address the actual queue memory, this should happen rarely.
+     * If it turns out to be a problem we can switch to 64 bit counters or attempt to somehow mark the
+     * access time of resources invalid. CS packets are at least 4 byte aligned, so we could use the lower
+     * 2 bits in access_time for such a marker.
+     *
+     * Note that the access time is set before the command is submitted, so we have to wait until the
+     * tail is bigger than access_time, not equal. */
+    access_time = resource->access_time;
+    head = cs->queue[WINED3D_CS_QUEUE_DEFAULT].head;
+    while (1)
+    {
+        tail = *(volatile ULONG *)&cs->queue[WINED3D_CS_QUEUE_DEFAULT].tail;
+        if (head == tail) /* Case 6, queue empty. */
+            break;
+
+        if (head > tail)
+        {
+            if (access_time >= head || access_time < tail) /* Case 2, 3. */
+                break;
+        }
+        else if (access_time > tail || access_time <= head) /* Case 5. */
+        {
+            break;
+        }
+        /* Case 1, 4 - busy, wait a little. */
         YieldProcessor();
+    }
 }
 
 /* TODO: Add tests and support for FLOAT16_4 POSITIONT, D3DCOLOR position, other
-- 
2.34.1




More information about the wine-devel mailing list