[PATCH 5/5] wined3d: Allow the command stream thread to run asynchronously.

Henri Verbeet hverbeet at codeweavers.com
Tue May 2 04:35:30 CDT 2017


For a modest performance improvement in CPU-limited scenarios, especially ones
where a significant amount of time is spent in the graphics driver. There's
certainly still more room for improvement, resource maps in particular will
currently cause stalls.

Signed-off-by: Henri Verbeet <hverbeet at codeweavers.com>
---
 dlls/wined3d/cs.c              | 46 +++++++++++++++++++++++++++++++++++++++---
 dlls/wined3d/device.c          |  6 ++++++
 dlls/wined3d/swapchain.c       | 10 +++++++++
 dlls/wined3d/texture.c         | 11 ++++++++++
 dlls/wined3d/wined3d_private.h |  2 ++
 5 files changed, 72 insertions(+), 3 deletions(-)

diff --git a/dlls/wined3d/cs.c b/dlls/wined3d/cs.c
index 951520e..91a99b7 100644
--- a/dlls/wined3d/cs.c
+++ b/dlls/wined3d/cs.c
@@ -434,6 +434,8 @@ static void wined3d_cs_exec_present(struct wined3d_cs *cs, const void *data)
     {
         wined3d_resource_release(&swapchain->back_buffers[i]->resource);
     }
+
+    InterlockedDecrement(&cs->pending_presents);
 }
 
 void wined3d_cs_emit_present(struct wined3d_cs *cs, struct wined3d_swapchain *swapchain,
@@ -441,6 +443,7 @@ void wined3d_cs_emit_present(struct wined3d_cs *cs, struct wined3d_swapchain *sw
 {
     struct wined3d_cs_present *op;
     unsigned int i;
+    LONG pending;
 
     op = cs->ops->require_space(cs, sizeof(*op));
     op->opcode = WINED3D_CS_OP_PRESENT;
@@ -450,6 +453,8 @@ void wined3d_cs_emit_present(struct wined3d_cs *cs, struct wined3d_swapchain *sw
     op->dst_rect = *dst_rect;
     op->flags = flags;
 
+    pending = InterlockedIncrement(&cs->pending_presents);
+
     wined3d_resource_acquire(&swapchain->front_buffer->resource);
     for (i = 0; i < swapchain->desc.backbuffer_count; ++i)
     {
@@ -457,6 +462,15 @@ void wined3d_cs_emit_present(struct wined3d_cs *cs, struct wined3d_swapchain *sw
     }
 
     cs->ops->submit(cs);
+
+    /* Limit input latency by limiting the number of presents that we can get
+     * ahead of the worker thread. We have a constant limit here, but
+     * IDXGIDevice1 allows tuning this. */
+    while (pending > 1)
+    {
+        wined3d_pause();
+        pending = InterlockedCompareExchange(&cs->pending_presents, 0, 0);
+    }
 }
 
 static void wined3d_cs_exec_clear(struct wined3d_cs *cs, const void *data)
@@ -1856,6 +1870,10 @@ HRESULT wined3d_cs_map(struct wined3d_cs *cs, struct wined3d_resource *resource,
     struct wined3d_cs_map *op;
     HRESULT hr;
 
+    /* Mapping resources from the worker thread isn't an issue by itself, but
+     * increasing the map count would be visible to applications. */
+    wined3d_not_from_cs(cs);
+
     op = cs->ops->require_space(cs, sizeof(*op));
     op->opcode = WINED3D_CS_OP_MAP;
     op->resource = resource;
@@ -1866,6 +1884,7 @@ HRESULT wined3d_cs_map(struct wined3d_cs *cs, struct wined3d_resource *resource,
     op->hr = &hr;
 
     cs->ops->submit(cs);
+    cs->ops->finish(cs);
 
     return hr;
 }
@@ -1883,6 +1902,8 @@ HRESULT wined3d_cs_unmap(struct wined3d_cs *cs, struct wined3d_resource *resourc
     struct wined3d_cs_unmap *op;
     HRESULT hr;
 
+    wined3d_not_from_cs(cs);
+
     op = cs->ops->require_space(cs, sizeof(*op));
     op->opcode = WINED3D_CS_OP_UNMAP;
     op->resource = resource;
@@ -1890,6 +1911,7 @@ HRESULT wined3d_cs_unmap(struct wined3d_cs *cs, struct wined3d_resource *resourc
     op->hr = &hr;
 
     cs->ops->submit(cs);
+    cs->ops->finish(cs);
 
     return hr;
 }
@@ -2040,6 +2062,8 @@ void wined3d_cs_emit_blt_sub_resource(struct wined3d_cs *cs, struct wined3d_reso
         wined3d_resource_acquire(src_resource);
 
     cs->ops->submit(cs);
+    if (flags & WINED3D_BLT_SYNCHRONOUS)
+        cs->ops->finish(cs);
 }
 
 static void wined3d_cs_exec_update_sub_resource(struct wined3d_cs *cs, const void *data)
@@ -2119,6 +2143,9 @@ void wined3d_cs_emit_update_sub_resource(struct wined3d_cs *cs, struct wined3d_r
     wined3d_resource_acquire(resource);
 
     cs->ops->submit(cs);
+    /* The data pointer may go away, so we need to wait until it is read.
+     * Copying the data may be faster if it's small. */
+    cs->ops->finish(cs);
 }
 
 static void wined3d_cs_exec_add_dirty_texture_region(struct wined3d_cs *cs, const void *data)
@@ -2165,6 +2192,7 @@ static void wined3d_cs_emit_stop(struct wined3d_cs *cs)
     op->opcode = WINED3D_CS_OP_STOP;
 
     cs->ops->submit(cs);
+    cs->ops->finish(cs);
 }
 
 static void (* const wined3d_cs_op_handlers[])(struct wined3d_cs *cs, const void *data) =
@@ -2261,10 +2289,15 @@ static void wined3d_cs_st_submit(struct wined3d_cs *cs)
         HeapFree(GetProcessHeap(), 0, data);
 }
 
+static void wined3d_cs_st_finish(struct wined3d_cs *cs)
+{
+}
+
 static const struct wined3d_cs_ops wined3d_cs_st_ops =
 {
     wined3d_cs_st_require_space,
     wined3d_cs_st_submit,
+    wined3d_cs_st_finish,
     wined3d_cs_st_push_constants,
 };
 
@@ -2288,9 +2321,6 @@ static void wined3d_cs_mt_submit(struct wined3d_cs *cs)
 
     if (InterlockedCompareExchange(&cs->waiting_for_event, FALSE, TRUE))
         SetEvent(cs->event);
-
-    while (!wined3d_cs_queue_is_empty(queue))
-        wined3d_pause();
 }
 
 static void *wined3d_cs_mt_require_space(struct wined3d_cs *cs, size_t size)
@@ -2358,10 +2388,20 @@ static void *wined3d_cs_mt_require_space(struct wined3d_cs *cs, size_t size)
     return packet->data;
 }
 
+static void wined3d_cs_mt_finish(struct wined3d_cs *cs)
+{
+    if (cs->thread_id == GetCurrentThreadId())
+        return wined3d_cs_st_finish(cs);
+
+    while (!wined3d_cs_queue_is_empty(&cs->queue))
+        wined3d_pause();
+}
+
 static const struct wined3d_cs_ops wined3d_cs_mt_ops =
 {
     wined3d_cs_mt_require_space,
     wined3d_cs_mt_submit,
+    wined3d_cs_mt_finish,
     wined3d_cs_mt_push_constants,
 };
 
diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c
index 0985d62..1ff0969 100644
--- a/dlls/wined3d/device.c
+++ b/dlls/wined3d/device.c
@@ -997,6 +997,7 @@ static void wined3d_device_delete_opengl_contexts_cs(void *object)
 static void wined3d_device_delete_opengl_contexts(struct wined3d_device *device)
 {
     wined3d_cs_destroy_object(device->cs, wined3d_device_delete_opengl_contexts_cs, device);
+    device->cs->ops->finish(device->cs);
 }
 
 static void wined3d_device_create_primary_opengl_context_cs(void *object)
@@ -1035,6 +1036,7 @@ static void wined3d_device_create_primary_opengl_context_cs(void *object)
 static HRESULT wined3d_device_create_primary_opengl_context(struct wined3d_device *device)
 {
     wined3d_cs_init_object(device->cs, wined3d_device_create_primary_opengl_context_cs, device);
+    device->cs->ops->finish(device->cs);
     if (!device->swapchains[0]->num_contexts)
         return E_FAIL;
 
@@ -1179,6 +1181,8 @@ HRESULT CDECL wined3d_device_uninit_3d(struct wined3d_device *device)
     if (!device->d3d_initialized)
         return WINED3DERR_INVALIDCALL;
 
+    device->cs->ops->finish(device->cs);
+
     if (device->logo_texture)
         wined3d_texture_decref(device->logo_texture);
     if (device->cursor_texture)
@@ -4558,6 +4562,8 @@ HRESULT CDECL wined3d_device_reset(struct wined3d_device *device,
     TRACE("device %p, swapchain_desc %p, mode %p, callback %p, reset_state %#x.\n",
             device, swapchain_desc, mode, callback, reset_state);
 
+    device->cs->ops->finish(device->cs);
+
     if (!(swapchain = wined3d_device_get_swapchain(device, 0)))
     {
         ERR("Failed to get the first implicit swapchain.\n");
diff --git a/dlls/wined3d/swapchain.c b/dlls/wined3d/swapchain.c
index 476259b..b3a3020 100644
--- a/dlls/wined3d/swapchain.c
+++ b/dlls/wined3d/swapchain.c
@@ -66,6 +66,7 @@ static void swapchain_cleanup(struct wined3d_swapchain *swapchain)
     }
 
     wined3d_cs_destroy_object(swapchain->device->cs, wined3d_swapchain_destroy_object, swapchain);
+    swapchain->device->cs->ops->finish(swapchain->device->cs);
 
     /* Restore the screen resolution if we rendered in fullscreen.
      * This will restore the screen resolution to what it was before creating
@@ -113,6 +114,10 @@ ULONG CDECL wined3d_swapchain_decref(struct wined3d_swapchain *swapchain)
 
     if (!refcount)
     {
+        struct wined3d_device *device = swapchain->device;
+
+        device->cs->ops->finish(device->cs);
+
         swapchain_cleanup(swapchain);
         swapchain->parent_ops->wined3d_object_destroyed(swapchain->parent);
         HeapFree(GetProcessHeap(), 0, swapchain);
@@ -896,6 +901,7 @@ static HRESULT swapchain_init(struct wined3d_swapchain *swapchain, struct wined3
         }
 
         wined3d_cs_init_object(device->cs, wined3d_swapchain_cs_init, swapchain);
+        device->cs->ops->finish(device->cs);
 
         if (!swapchain->context[0])
         {
@@ -992,6 +998,7 @@ err:
     }
 
     wined3d_cs_destroy_object(swapchain->device->cs, wined3d_swapchain_destroy_object, swapchain);
+    swapchain->device->cs->ops->finish(device->cs);
 
     if (swapchain->front_buffer)
     {
@@ -1184,6 +1191,7 @@ HRESULT CDECL wined3d_swapchain_resize_buffers(struct wined3d_swapchain *swapcha
         unsigned int width, unsigned int height, enum wined3d_format_id format_id,
         enum wined3d_multisample_type multisample_type, unsigned int multisample_quality)
 {
+    struct wined3d_device *device = swapchain->device;
     BOOL update_desc = FALSE;
 
     TRACE("swapchain %p, buffer_count %u, width %u, height %u, format %s, "
@@ -1196,6 +1204,8 @@ HRESULT CDECL wined3d_swapchain_resize_buffers(struct wined3d_swapchain *swapcha
     if (buffer_count && buffer_count != swapchain->desc.backbuffer_count)
         FIXME("Cannot change the back buffer count yet.\n");
 
+    device->cs->ops->finish(device->cs);
+
     if (!width || !height)
     {
         /* The application is requesting that either the swapchain width or
diff --git a/dlls/wined3d/texture.c b/dlls/wined3d/texture.c
index dbfdc3f..d6bda47 100644
--- a/dlls/wined3d/texture.c
+++ b/dlls/wined3d/texture.c
@@ -1327,6 +1327,7 @@ HRESULT CDECL wined3d_texture_update_desc(struct wined3d_texture *texture, UINT
     if (surface->dc)
     {
         wined3d_cs_destroy_object(device->cs, texture2d_destroy_dc, surface);
+        device->cs->ops->finish(device->cs);
         create_dib = TRUE;
     }
 
@@ -1387,7 +1388,10 @@ HRESULT CDECL wined3d_texture_update_desc(struct wined3d_texture *texture, UINT
     wined3d_texture_invalidate_location(texture, 0, ~valid_location);
 
     if (create_dib)
+    {
         wined3d_cs_init_object(device->cs, texture2d_create_dc, surface);
+        device->cs->ops->finish(device->cs);
+    }
 
     return WINED3D_OK;
 }
@@ -2243,6 +2247,7 @@ static HRESULT texture_init(struct wined3d_texture *texture, const struct wined3
             if ((desc->usage & WINED3DUSAGE_OWNDC) || (device->wined3d->flags & WINED3D_NO3D))
             {
                 wined3d_cs_init_object(device->cs, texture2d_create_dc, surface);
+                device->cs->ops->finish(device->cs);
                 if (!surface->dc)
                 {
                     wined3d_texture_cleanup_sync(texture);
@@ -3037,7 +3042,10 @@ HRESULT CDECL wined3d_texture_get_dc(struct wined3d_texture *texture, unsigned i
         return WINED3DERR_INVALIDCALL;
 
     if (!surface->dc)
+    {
         wined3d_cs_init_object(device->cs, texture2d_create_dc, surface);
+        device->cs->ops->finish(device->cs);
+    }
     if (!surface->dc)
         return WINED3DERR_INVALIDCALL;
 
@@ -3081,7 +3089,10 @@ HRESULT CDECL wined3d_texture_release_dc(struct wined3d_texture *texture, unsign
     }
 
     if (!(texture->resource.usage & WINED3DUSAGE_OWNDC) && !(device->wined3d->flags & WINED3D_NO3D))
+    {
         wined3d_cs_destroy_object(device->cs, texture2d_destroy_dc, surface);
+        device->cs->ops->finish(device->cs);
+    }
 
     --sub_resource->map_count;
     if (!--texture->resource.map_count && texture->update_map_binding)
diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h
index 628ef9a..2e0e8e9 100644
--- a/dlls/wined3d/wined3d_private.h
+++ b/dlls/wined3d/wined3d_private.h
@@ -3250,6 +3250,7 @@ struct wined3d_cs_ops
 {
     void *(*require_space)(struct wined3d_cs *cs, size_t size);
     void (*submit)(struct wined3d_cs *cs);
+    void (*finish)(struct wined3d_cs *cs);
     void (*push_constants)(struct wined3d_cs *cs, enum wined3d_push_constants p,
             unsigned int start_idx, unsigned int count, const void *constants);
 };
@@ -3271,6 +3272,7 @@ struct wined3d_cs
 
     HANDLE event;
     BOOL waiting_for_event;
+    LONG pending_presents;
 };
 
 struct wined3d_cs *wined3d_cs_create(struct wined3d_device *device) DECLSPEC_HIDDEN;
-- 
2.1.4




More information about the wine-patches mailing list