[PATCH 5/5] wined3d: Allow the command stream thread to run asynchronously.
Henri Verbeet
hverbeet at codeweavers.com
Tue May 2 04:35:30 CDT 2017
For a modest performance improvement in CPU-limited scenarios, especially ones
where a significant amount of time is spent in the graphics driver. There's
certainly still more room for improvement, resource maps in particular will
currently cause stalls.
Signed-off-by: Henri Verbeet <hverbeet at codeweavers.com>
---
dlls/wined3d/cs.c | 46 +++++++++++++++++++++++++++++++++++++++---
dlls/wined3d/device.c | 6 ++++++
dlls/wined3d/swapchain.c | 10 +++++++++
dlls/wined3d/texture.c | 11 ++++++++++
dlls/wined3d/wined3d_private.h | 2 ++
5 files changed, 72 insertions(+), 3 deletions(-)
diff --git a/dlls/wined3d/cs.c b/dlls/wined3d/cs.c
index 951520e..91a99b7 100644
--- a/dlls/wined3d/cs.c
+++ b/dlls/wined3d/cs.c
@@ -434,6 +434,8 @@ static void wined3d_cs_exec_present(struct wined3d_cs *cs, const void *data)
{
wined3d_resource_release(&swapchain->back_buffers[i]->resource);
}
+
+ InterlockedDecrement(&cs->pending_presents);
}
void wined3d_cs_emit_present(struct wined3d_cs *cs, struct wined3d_swapchain *swapchain,
@@ -441,6 +443,7 @@ void wined3d_cs_emit_present(struct wined3d_cs *cs, struct wined3d_swapchain *sw
{
struct wined3d_cs_present *op;
unsigned int i;
+ LONG pending;
op = cs->ops->require_space(cs, sizeof(*op));
op->opcode = WINED3D_CS_OP_PRESENT;
@@ -450,6 +453,8 @@ void wined3d_cs_emit_present(struct wined3d_cs *cs, struct wined3d_swapchain *sw
op->dst_rect = *dst_rect;
op->flags = flags;
+ pending = InterlockedIncrement(&cs->pending_presents);
+
wined3d_resource_acquire(&swapchain->front_buffer->resource);
for (i = 0; i < swapchain->desc.backbuffer_count; ++i)
{
@@ -457,6 +462,15 @@ void wined3d_cs_emit_present(struct wined3d_cs *cs, struct wined3d_swapchain *sw
}
cs->ops->submit(cs);
+
+ /* Limit input latency by limiting the number of presents that we can get
+ * ahead of the worker thread. We have a constant limit here, but
+ * IDXGIDevice1 allows tuning this. */
+ while (pending > 1)
+ {
+ wined3d_pause();
+ pending = InterlockedCompareExchange(&cs->pending_presents, 0, 0);
+ }
}
static void wined3d_cs_exec_clear(struct wined3d_cs *cs, const void *data)
@@ -1856,6 +1870,10 @@ HRESULT wined3d_cs_map(struct wined3d_cs *cs, struct wined3d_resource *resource,
struct wined3d_cs_map *op;
HRESULT hr;
+ /* Mapping resources from the worker thread isn't an issue by itself, but
+ * increasing the map count would be visible to applications. */
+ wined3d_not_from_cs(cs);
+
op = cs->ops->require_space(cs, sizeof(*op));
op->opcode = WINED3D_CS_OP_MAP;
op->resource = resource;
@@ -1866,6 +1884,7 @@ HRESULT wined3d_cs_map(struct wined3d_cs *cs, struct wined3d_resource *resource,
op->hr = &hr;
cs->ops->submit(cs);
+ cs->ops->finish(cs);
return hr;
}
@@ -1883,6 +1902,8 @@ HRESULT wined3d_cs_unmap(struct wined3d_cs *cs, struct wined3d_resource *resourc
struct wined3d_cs_unmap *op;
HRESULT hr;
+ wined3d_not_from_cs(cs);
+
op = cs->ops->require_space(cs, sizeof(*op));
op->opcode = WINED3D_CS_OP_UNMAP;
op->resource = resource;
@@ -1890,6 +1911,7 @@ HRESULT wined3d_cs_unmap(struct wined3d_cs *cs, struct wined3d_resource *resourc
op->hr = &hr;
cs->ops->submit(cs);
+ cs->ops->finish(cs);
return hr;
}
@@ -2040,6 +2062,8 @@ void wined3d_cs_emit_blt_sub_resource(struct wined3d_cs *cs, struct wined3d_reso
wined3d_resource_acquire(src_resource);
cs->ops->submit(cs);
+ if (flags & WINED3D_BLT_SYNCHRONOUS)
+ cs->ops->finish(cs);
}
static void wined3d_cs_exec_update_sub_resource(struct wined3d_cs *cs, const void *data)
@@ -2119,6 +2143,9 @@ void wined3d_cs_emit_update_sub_resource(struct wined3d_cs *cs, struct wined3d_r
wined3d_resource_acquire(resource);
cs->ops->submit(cs);
+ /* The data pointer may go away, so we need to wait until it is read.
+ * Copying the data may be faster if it's small. */
+ cs->ops->finish(cs);
}
static void wined3d_cs_exec_add_dirty_texture_region(struct wined3d_cs *cs, const void *data)
@@ -2165,6 +2192,7 @@ static void wined3d_cs_emit_stop(struct wined3d_cs *cs)
op->opcode = WINED3D_CS_OP_STOP;
cs->ops->submit(cs);
+ cs->ops->finish(cs);
}
static void (* const wined3d_cs_op_handlers[])(struct wined3d_cs *cs, const void *data) =
@@ -2261,10 +2289,15 @@ static void wined3d_cs_st_submit(struct wined3d_cs *cs)
HeapFree(GetProcessHeap(), 0, data);
}
+static void wined3d_cs_st_finish(struct wined3d_cs *cs)
+{
+}
+
static const struct wined3d_cs_ops wined3d_cs_st_ops =
{
wined3d_cs_st_require_space,
wined3d_cs_st_submit,
+ wined3d_cs_st_finish,
wined3d_cs_st_push_constants,
};
@@ -2288,9 +2321,6 @@ static void wined3d_cs_mt_submit(struct wined3d_cs *cs)
if (InterlockedCompareExchange(&cs->waiting_for_event, FALSE, TRUE))
SetEvent(cs->event);
-
- while (!wined3d_cs_queue_is_empty(queue))
- wined3d_pause();
}
static void *wined3d_cs_mt_require_space(struct wined3d_cs *cs, size_t size)
@@ -2358,10 +2388,20 @@ static void *wined3d_cs_mt_require_space(struct wined3d_cs *cs, size_t size)
return packet->data;
}
+static void wined3d_cs_mt_finish(struct wined3d_cs *cs)
+{
+ if (cs->thread_id == GetCurrentThreadId())
+ return wined3d_cs_st_finish(cs);
+
+ while (!wined3d_cs_queue_is_empty(&cs->queue))
+ wined3d_pause();
+}
+
static const struct wined3d_cs_ops wined3d_cs_mt_ops =
{
wined3d_cs_mt_require_space,
wined3d_cs_mt_submit,
+ wined3d_cs_mt_finish,
wined3d_cs_mt_push_constants,
};
diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c
index 0985d62..1ff0969 100644
--- a/dlls/wined3d/device.c
+++ b/dlls/wined3d/device.c
@@ -997,6 +997,7 @@ static void wined3d_device_delete_opengl_contexts_cs(void *object)
static void wined3d_device_delete_opengl_contexts(struct wined3d_device *device)
{
wined3d_cs_destroy_object(device->cs, wined3d_device_delete_opengl_contexts_cs, device);
+ device->cs->ops->finish(device->cs);
}
static void wined3d_device_create_primary_opengl_context_cs(void *object)
@@ -1035,6 +1036,7 @@ static void wined3d_device_create_primary_opengl_context_cs(void *object)
static HRESULT wined3d_device_create_primary_opengl_context(struct wined3d_device *device)
{
wined3d_cs_init_object(device->cs, wined3d_device_create_primary_opengl_context_cs, device);
+ device->cs->ops->finish(device->cs);
if (!device->swapchains[0]->num_contexts)
return E_FAIL;
@@ -1179,6 +1181,8 @@ HRESULT CDECL wined3d_device_uninit_3d(struct wined3d_device *device)
if (!device->d3d_initialized)
return WINED3DERR_INVALIDCALL;
+ device->cs->ops->finish(device->cs);
+
if (device->logo_texture)
wined3d_texture_decref(device->logo_texture);
if (device->cursor_texture)
@@ -4558,6 +4562,8 @@ HRESULT CDECL wined3d_device_reset(struct wined3d_device *device,
TRACE("device %p, swapchain_desc %p, mode %p, callback %p, reset_state %#x.\n",
device, swapchain_desc, mode, callback, reset_state);
+ device->cs->ops->finish(device->cs);
+
if (!(swapchain = wined3d_device_get_swapchain(device, 0)))
{
ERR("Failed to get the first implicit swapchain.\n");
diff --git a/dlls/wined3d/swapchain.c b/dlls/wined3d/swapchain.c
index 476259b..b3a3020 100644
--- a/dlls/wined3d/swapchain.c
+++ b/dlls/wined3d/swapchain.c
@@ -66,6 +66,7 @@ static void swapchain_cleanup(struct wined3d_swapchain *swapchain)
}
wined3d_cs_destroy_object(swapchain->device->cs, wined3d_swapchain_destroy_object, swapchain);
+ swapchain->device->cs->ops->finish(swapchain->device->cs);
/* Restore the screen resolution if we rendered in fullscreen.
* This will restore the screen resolution to what it was before creating
@@ -113,6 +114,10 @@ ULONG CDECL wined3d_swapchain_decref(struct wined3d_swapchain *swapchain)
if (!refcount)
{
+ struct wined3d_device *device = swapchain->device;
+
+ device->cs->ops->finish(device->cs);
+
swapchain_cleanup(swapchain);
swapchain->parent_ops->wined3d_object_destroyed(swapchain->parent);
HeapFree(GetProcessHeap(), 0, swapchain);
@@ -896,6 +901,7 @@ static HRESULT swapchain_init(struct wined3d_swapchain *swapchain, struct wined3
}
wined3d_cs_init_object(device->cs, wined3d_swapchain_cs_init, swapchain);
+ device->cs->ops->finish(device->cs);
if (!swapchain->context[0])
{
@@ -992,6 +998,7 @@ err:
}
wined3d_cs_destroy_object(swapchain->device->cs, wined3d_swapchain_destroy_object, swapchain);
+ swapchain->device->cs->ops->finish(device->cs);
if (swapchain->front_buffer)
{
@@ -1184,6 +1191,7 @@ HRESULT CDECL wined3d_swapchain_resize_buffers(struct wined3d_swapchain *swapcha
unsigned int width, unsigned int height, enum wined3d_format_id format_id,
enum wined3d_multisample_type multisample_type, unsigned int multisample_quality)
{
+ struct wined3d_device *device = swapchain->device;
BOOL update_desc = FALSE;
TRACE("swapchain %p, buffer_count %u, width %u, height %u, format %s, "
@@ -1196,6 +1204,8 @@ HRESULT CDECL wined3d_swapchain_resize_buffers(struct wined3d_swapchain *swapcha
if (buffer_count && buffer_count != swapchain->desc.backbuffer_count)
FIXME("Cannot change the back buffer count yet.\n");
+ device->cs->ops->finish(device->cs);
+
if (!width || !height)
{
/* The application is requesting that either the swapchain width or
diff --git a/dlls/wined3d/texture.c b/dlls/wined3d/texture.c
index dbfdc3f..d6bda47 100644
--- a/dlls/wined3d/texture.c
+++ b/dlls/wined3d/texture.c
@@ -1327,6 +1327,7 @@ HRESULT CDECL wined3d_texture_update_desc(struct wined3d_texture *texture, UINT
if (surface->dc)
{
wined3d_cs_destroy_object(device->cs, texture2d_destroy_dc, surface);
+ device->cs->ops->finish(device->cs);
create_dib = TRUE;
}
@@ -1387,7 +1388,10 @@ HRESULT CDECL wined3d_texture_update_desc(struct wined3d_texture *texture, UINT
wined3d_texture_invalidate_location(texture, 0, ~valid_location);
if (create_dib)
+ {
wined3d_cs_init_object(device->cs, texture2d_create_dc, surface);
+ device->cs->ops->finish(device->cs);
+ }
return WINED3D_OK;
}
@@ -2243,6 +2247,7 @@ static HRESULT texture_init(struct wined3d_texture *texture, const struct wined3
if ((desc->usage & WINED3DUSAGE_OWNDC) || (device->wined3d->flags & WINED3D_NO3D))
{
wined3d_cs_init_object(device->cs, texture2d_create_dc, surface);
+ device->cs->ops->finish(device->cs);
if (!surface->dc)
{
wined3d_texture_cleanup_sync(texture);
@@ -3037,7 +3042,10 @@ HRESULT CDECL wined3d_texture_get_dc(struct wined3d_texture *texture, unsigned i
return WINED3DERR_INVALIDCALL;
if (!surface->dc)
+ {
wined3d_cs_init_object(device->cs, texture2d_create_dc, surface);
+ device->cs->ops->finish(device->cs);
+ }
if (!surface->dc)
return WINED3DERR_INVALIDCALL;
@@ -3081,7 +3089,10 @@ HRESULT CDECL wined3d_texture_release_dc(struct wined3d_texture *texture, unsign
}
if (!(texture->resource.usage & WINED3DUSAGE_OWNDC) && !(device->wined3d->flags & WINED3D_NO3D))
+ {
wined3d_cs_destroy_object(device->cs, texture2d_destroy_dc, surface);
+ device->cs->ops->finish(device->cs);
+ }
--sub_resource->map_count;
if (!--texture->resource.map_count && texture->update_map_binding)
diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h
index 628ef9a..2e0e8e9 100644
--- a/dlls/wined3d/wined3d_private.h
+++ b/dlls/wined3d/wined3d_private.h
@@ -3250,6 +3250,7 @@ struct wined3d_cs_ops
{
void *(*require_space)(struct wined3d_cs *cs, size_t size);
void (*submit)(struct wined3d_cs *cs);
+ void (*finish)(struct wined3d_cs *cs);
void (*push_constants)(struct wined3d_cs *cs, enum wined3d_push_constants p,
unsigned int start_idx, unsigned int count, const void *constants);
};
@@ -3271,6 +3272,7 @@ struct wined3d_cs
HANDLE event;
BOOL waiting_for_event;
+ LONG pending_presents;
};
struct wined3d_cs *wined3d_cs_create(struct wined3d_device *device) DECLSPEC_HIDDEN;
--
2.1.4
More information about the wine-patches
mailing list