[PATCH 4/5] wined3d: Introduce a multi-threaded command stream implementation.

Henri Verbeet hverbeet at codeweavers.com
Wed Apr 12 17:11:01 CDT 2017


The primary purpose of this patch is to serialise draws from multiple threads,
without the overhead of extra flushes that "StrictDrawOrdering" imposes.
With additional work, offloading state processing and driver overhead to a
separate thread may also allow for improved performance in some applications,
but that's not a goal of this patch.

Signed-off-by: Henri Verbeet <hverbeet at codeweavers.com>
---
 dlls/wined3d/context.c         |   6 +
 dlls/wined3d/cs.c              | 410 ++++++++++++++++++++++++++++++++++++-----
 dlls/wined3d/device.c          |   6 +
 dlls/wined3d/query.c           |  45 ++++-
 dlls/wined3d/wined3d_main.c    |   4 +
 dlls/wined3d/wined3d_private.h |  59 +++++-
 6 files changed, 472 insertions(+), 58 deletions(-)

diff --git a/dlls/wined3d/context.c b/dlls/wined3d/context.c
index 48b9347..4d1c2a8 100644
--- a/dlls/wined3d/context.c
+++ b/dlls/wined3d/context.c
@@ -1704,6 +1704,8 @@ struct wined3d_context *context_create(struct wined3d_swapchain *swapchain,
 
     TRACE("swapchain %p, target %p, window %p.\n", swapchain, target, swapchain->win_handle);
 
+    wined3d_from_cs(device->cs);
+
     ret = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*ret));
     if (!ret)
         return NULL;
@@ -2106,6 +2108,8 @@ void context_destroy(struct wined3d_device *device, struct wined3d_context *cont
 
     TRACE("Destroying ctx %p\n", context);
 
+    wined3d_from_cs(device->cs);
+
     /* We delay destroying a context when it is active. The context_release()
      * function invokes context_destroy() again while leaving the last level. */
     if (context->level)
@@ -3841,6 +3845,8 @@ struct wined3d_context *context_acquire(const struct wined3d_device *device,
 
     TRACE("device %p, texture %p, sub_resource_idx %u.\n", device, texture, sub_resource_idx);
 
+    wined3d_from_cs(device->cs);
+
     if (current_context && current_context->destroyed)
         current_context = NULL;
 
diff --git a/dlls/wined3d/cs.c b/dlls/wined3d/cs.c
index 6d413d4..43352eb 100644
--- a/dlls/wined3d/cs.c
+++ b/dlls/wined3d/cs.c
@@ -26,6 +26,7 @@ WINE_DEFAULT_DEBUG_CHANNEL(d3d);
 
 enum wined3d_cs_op
 {
+    WINED3D_CS_OP_NOP,
     WINED3D_CS_OP_PRESENT,
     WINED3D_CS_OP_CLEAR,
     WINED3D_CS_OP_DISPATCH,
@@ -57,6 +58,7 @@ enum wined3d_cs_op
     WINED3D_CS_OP_SET_MATERIAL,
     WINED3D_CS_OP_SET_LIGHT,
     WINED3D_CS_OP_SET_LIGHT_ENABLE,
+    WINED3D_CS_OP_PUSH_CONSTANTS,
     WINED3D_CS_OP_RESET_STATE,
     WINED3D_CS_OP_CALLBACK,
     WINED3D_CS_OP_QUERY_ISSUE,
@@ -67,6 +69,18 @@ enum wined3d_cs_op
     WINED3D_CS_OP_BLT_SUB_RESOURCE,
     WINED3D_CS_OP_UPDATE_SUB_RESOURCE,
     WINED3D_CS_OP_ADD_DIRTY_TEXTURE_REGION,
+    WINED3D_CS_OP_STOP,
+};
+
+struct wined3d_cs_packet
+{
+    size_t size;
+    BYTE data[1];
+};
+
+struct wined3d_cs_nop
+{
+    enum wined3d_cs_op opcode;
 };
 
 struct wined3d_cs_present
@@ -306,6 +320,15 @@ struct wined3d_cs_set_light_enable
     BOOL enable;
 };
 
+struct wined3d_cs_push_constants
+{
+    enum wined3d_cs_op opcode;
+    enum wined3d_push_constants type;
+    unsigned int start_idx;
+    unsigned int count;
+    BYTE constants[1];
+};
+
 struct wined3d_cs_reset_state
 {
     enum wined3d_cs_op opcode;
@@ -386,6 +409,15 @@ struct wined3d_cs_add_dirty_texture_region
     unsigned int layer;
 };
 
+struct wined3d_cs_stop
+{
+    enum wined3d_cs_op opcode;
+};
+
+static void wined3d_cs_exec_nop(struct wined3d_cs *cs, const void *data)
+{
+}
+
 static void wined3d_cs_exec_present(struct wined3d_cs *cs, const void *data)
 {
     const struct wined3d_cs_present *op = data;
@@ -1598,6 +1630,73 @@ void wined3d_cs_emit_set_light_enable(struct wined3d_cs *cs, unsigned int idx, B
     cs->ops->submit(cs);
 }
 
+static const struct
+{
+    size_t offset;
+    size_t size;
+    DWORD mask;
+}
+wined3d_cs_push_constant_info[] =
+{
+    /* WINED3D_PUSH_CONSTANTS_VS_F */
+    {FIELD_OFFSET(struct wined3d_state, vs_consts_f), sizeof(struct wined3d_vec4),  WINED3D_SHADER_CONST_VS_F},
+    /* WINED3D_PUSH_CONSTANTS_PS_F */
+    {FIELD_OFFSET(struct wined3d_state, ps_consts_f), sizeof(struct wined3d_vec4),  WINED3D_SHADER_CONST_PS_F},
+    /* WINED3D_PUSH_CONSTANTS_VS_I */
+    {FIELD_OFFSET(struct wined3d_state, vs_consts_i), sizeof(struct wined3d_ivec4), WINED3D_SHADER_CONST_VS_I},
+    /* WINED3D_PUSH_CONSTANTS_PS_I */
+    {FIELD_OFFSET(struct wined3d_state, ps_consts_i), sizeof(struct wined3d_ivec4), WINED3D_SHADER_CONST_PS_I},
+    /* WINED3D_PUSH_CONSTANTS_VS_B */
+    {FIELD_OFFSET(struct wined3d_state, vs_consts_b), sizeof(BOOL),                 WINED3D_SHADER_CONST_VS_B},
+    /* WINED3D_PUSH_CONSTANTS_PS_B */
+    {FIELD_OFFSET(struct wined3d_state, ps_consts_b), sizeof(BOOL),                 WINED3D_SHADER_CONST_PS_B},
+};
+
+static void wined3d_cs_st_push_constants(struct wined3d_cs *cs, enum wined3d_push_constants p,
+        unsigned int start_idx, unsigned int count, const void *constants)
+{
+    struct wined3d_device *device = cs->device;
+    unsigned int context_count;
+    unsigned int i;
+    size_t offset;
+
+    if (p == WINED3D_PUSH_CONSTANTS_VS_F)
+        device->shader_backend->shader_update_float_vertex_constants(device, start_idx, count);
+    else if (p == WINED3D_PUSH_CONSTANTS_PS_F)
+        device->shader_backend->shader_update_float_pixel_constants(device, start_idx, count);
+
+    offset = wined3d_cs_push_constant_info[p].offset + start_idx * wined3d_cs_push_constant_info[p].size;
+    memcpy((BYTE *)&cs->state + offset, constants, count * wined3d_cs_push_constant_info[p].size);
+    for (i = 0, context_count = device->context_count; i < context_count; ++i)
+    {
+        device->contexts[i]->constant_update_mask |= wined3d_cs_push_constant_info[p].mask;
+    }
+}
+
+static void wined3d_cs_exec_push_constants(struct wined3d_cs *cs, const void *data)
+{
+    const struct wined3d_cs_push_constants *op = data;
+
+    wined3d_cs_st_push_constants(cs, op->type, op->start_idx, op->count, op->constants);
+}
+
+static void wined3d_cs_mt_push_constants(struct wined3d_cs *cs, enum wined3d_push_constants p,
+        unsigned int start_idx, unsigned int count, const void *constants)
+{
+    struct wined3d_cs_push_constants *op;
+    size_t size;
+
+    size = count * wined3d_cs_push_constant_info[p].size;
+    op = cs->ops->require_space(cs, FIELD_OFFSET(struct wined3d_cs_push_constants, constants[size]));
+    op->opcode = WINED3D_CS_OP_PUSH_CONSTANTS;
+    op->type = p;
+    op->start_idx = start_idx;
+    op->count = count;
+    memcpy(op->constants, constants, size);
+
+    cs->ops->submit(cs);
+}
+
 static void wined3d_cs_exec_reset_state(struct wined3d_cs *cs, const void *data)
 {
     struct wined3d_adapter *adapter = cs->device->adapter;
@@ -1651,8 +1750,39 @@ static void wined3d_cs_exec_query_issue(struct wined3d_cs *cs, const void *data)
 {
     const struct wined3d_cs_query_issue *op = data;
     struct wined3d_query *query = op->query;
+    BOOL poll;
+
+    poll = query->query_ops->query_issue(query, op->flags);
 
-    query->query_ops->query_issue(query, op->flags);
+    if (!cs->thread)
+        return;
+
+    if (poll && list_empty(&query->poll_list_entry))
+    {
+        list_add_tail(&cs->query_poll_list, &query->poll_list_entry);
+        return;
+    }
+
+    /* This can happen if occlusion queries are restarted. This discards the
+     * old result, since polling it could result in a GL error. */
+    if ((op->flags & WINED3DISSUE_BEGIN) && !poll && !list_empty(&query->poll_list_entry))
+    {
+        list_remove(&query->poll_list_entry);
+        list_init(&query->poll_list_entry);
+        InterlockedIncrement(&query->counter_retrieved);
+        return;
+    }
+
+    /* This can happen when an occlusion query is ended without being started,
+     * in which case we don't want to poll, but still have to counter-balance
+     * the increment of the main counter.
+     *
+     * This can also happen if an event query is re-issued before the first
+     * fence was reached. In this case the query is already in the list and
+     * the poll function will check the new fence. We have to counter-balance
+     * the discarded increment. */
+    if (op->flags & WINED3DISSUE_END)
+        InterlockedIncrement(&query->counter_retrieved);
 }
 
 void wined3d_cs_emit_query_issue(struct wined3d_cs *cs, struct wined3d_query *query, DWORD flags)
@@ -2026,8 +2156,19 @@ void wined3d_cs_emit_add_dirty_texture_region(struct wined3d_cs *cs,
     cs->ops->submit(cs);
 }
 
+static void wined3d_cs_emit_stop(struct wined3d_cs *cs)
+{
+    struct wined3d_cs_stop *op;
+
+    op = cs->ops->require_space(cs, sizeof(*op));
+    op->opcode = WINED3D_CS_OP_STOP;
+
+    cs->ops->submit(cs);
+}
+
 static void (* const wined3d_cs_op_handlers[])(struct wined3d_cs *cs, const void *data) =
 {
+    /* WINED3D_CS_OP_NOP                        */ wined3d_cs_exec_nop,
     /* WINED3D_CS_OP_PRESENT                    */ wined3d_cs_exec_present,
     /* WINED3D_CS_OP_CLEAR                      */ wined3d_cs_exec_clear,
     /* WINED3D_CS_OP_DISPATCH                   */ wined3d_cs_exec_dispatch,
@@ -2059,6 +2200,7 @@ static void (* const wined3d_cs_op_handlers[])(struct wined3d_cs *cs, const void
     /* WINED3D_CS_OP_SET_MATERIAL               */ wined3d_cs_exec_set_material,
     /* WINED3D_CS_OP_SET_LIGHT                  */ wined3d_cs_exec_set_light,
     /* WINED3D_CS_OP_SET_LIGHT_ENABLE           */ wined3d_cs_exec_set_light_enable,
+    /* WINED3D_CS_OP_PUSH_CONSTANTS             */ wined3d_cs_exec_push_constants,
     /* WINED3D_CS_OP_RESET_STATE                */ wined3d_cs_exec_reset_state,
     /* WINED3D_CS_OP_CALLBACK                   */ wined3d_cs_exec_callback,
     /* WINED3D_CS_OP_QUERY_ISSUE                */ wined3d_cs_exec_query_issue,
@@ -2107,7 +2249,10 @@ static void wined3d_cs_st_submit(struct wined3d_cs *cs)
     cs->start = cs->end;
 
     opcode = *(const enum wined3d_cs_op *)&data[start];
-    wined3d_cs_op_handlers[opcode](cs, &data[start]);
+    if (opcode >= WINED3D_CS_OP_STOP)
+        ERR("Invalid opcode %#x.\n", opcode);
+    else
+        wined3d_cs_op_handlers[opcode](cs, &data[start]);
 
     if (cs->data == data)
         cs->start = cs->end = start;
@@ -2115,56 +2260,200 @@ static void wined3d_cs_st_submit(struct wined3d_cs *cs)
         HeapFree(GetProcessHeap(), 0, data);
 }
 
-static void wined3d_cs_st_push_constants(struct wined3d_cs *cs, enum wined3d_push_constants p,
-        unsigned int start_idx, unsigned int count, const void *constants)
+static const struct wined3d_cs_ops wined3d_cs_st_ops =
 {
-    struct wined3d_device *device = cs->device;
-    unsigned int context_count;
-    unsigned int i;
-    size_t offset;
+    wined3d_cs_st_require_space,
+    wined3d_cs_st_submit,
+    wined3d_cs_st_push_constants,
+};
+
+static BOOL wined3d_cs_queue_is_empty(const struct wined3d_cs_queue *queue)
+{
+    return *(volatile LONG *)&queue->head == queue->tail;
+}
 
-    static const struct
+static void wined3d_cs_mt_submit(struct wined3d_cs *cs)
+{
+    struct wined3d_cs_queue *queue = &cs->queue;
+    struct wined3d_cs_packet *packet;
+    size_t packet_size;
+
+    if (cs->thread_id == GetCurrentThreadId())
+        return wined3d_cs_st_submit(cs);
+
+    packet = (struct wined3d_cs_packet *)&queue->data[queue->head];
+    packet_size = FIELD_OFFSET(struct wined3d_cs_packet, data[packet->size]);
+    InterlockedExchange(&queue->head, (queue->head + packet_size) & (WINED3D_CS_QUEUE_SIZE - 1));
+
+    if (InterlockedCompareExchange(&cs->waiting_for_event, FALSE, TRUE))
+        SetEvent(cs->event);
+
+    while (!wined3d_cs_queue_is_empty(queue))
+        wined3d_pause();
+}
+
+static void *wined3d_cs_mt_require_space(struct wined3d_cs *cs, size_t size)
+{
+    struct wined3d_cs_queue *queue = &cs->queue;
+    size_t queue_size = ARRAY_SIZE(queue->data);
+    size_t header_size, packet_size, remaining;
+    struct wined3d_cs_packet *packet;
+
+    if (cs->thread_id == GetCurrentThreadId())
+        return wined3d_cs_st_require_space(cs, size);
+
+    header_size = FIELD_OFFSET(struct wined3d_cs_packet, data[0]);
+    size = (size + header_size - 1) & ~(header_size - 1);
+    packet_size = FIELD_OFFSET(struct wined3d_cs_packet, data[size]);
+    if (packet_size >= WINED3D_CS_QUEUE_SIZE)
     {
-        size_t offset;
-        size_t size;
-        DWORD mask;
+        ERR("Packet size %lu >= queue size %u.\n",
+                (unsigned long)packet_size, WINED3D_CS_QUEUE_SIZE);
+        return NULL;
     }
-    push_constant_info[] =
+
+    remaining = queue_size - queue->head;
+    if (remaining < packet_size)
     {
-        /* WINED3D_PUSH_CONSTANTS_VS_F */
-        {FIELD_OFFSET(struct wined3d_state, vs_consts_f), sizeof(struct wined3d_vec4),  WINED3D_SHADER_CONST_VS_F},
-        /* WINED3D_PUSH_CONSTANTS_PS_F */
-        {FIELD_OFFSET(struct wined3d_state, ps_consts_f), sizeof(struct wined3d_vec4),  WINED3D_SHADER_CONST_PS_F},
-        /* WINED3D_PUSH_CONSTANTS_VS_I */
-        {FIELD_OFFSET(struct wined3d_state, vs_consts_i), sizeof(struct wined3d_ivec4), WINED3D_SHADER_CONST_VS_I},
-        /* WINED3D_PUSH_CONSTANTS_PS_I */
-        {FIELD_OFFSET(struct wined3d_state, ps_consts_i), sizeof(struct wined3d_ivec4), WINED3D_SHADER_CONST_PS_I},
-        /* WINED3D_PUSH_CONSTANTS_VS_B */
-        {FIELD_OFFSET(struct wined3d_state, vs_consts_b), sizeof(BOOL),                 WINED3D_SHADER_CONST_VS_B},
-        /* WINED3D_PUSH_CONSTANTS_PS_B */
-        {FIELD_OFFSET(struct wined3d_state, ps_consts_b), sizeof(BOOL),                 WINED3D_SHADER_CONST_PS_B},
-    };
+        size_t nop_size = remaining - header_size;
+        struct wined3d_cs_nop *nop;
 
-    if (p == WINED3D_PUSH_CONSTANTS_VS_F)
-        device->shader_backend->shader_update_float_vertex_constants(device, start_idx, count);
-    else if (p == WINED3D_PUSH_CONSTANTS_PS_F)
-        device->shader_backend->shader_update_float_pixel_constants(device, start_idx, count);
+        TRACE("Inserting a nop for %lu + %lu bytes.\n",
+                (unsigned long)header_size, (unsigned long)nop_size);
 
-    offset = push_constant_info[p].offset + start_idx * push_constant_info[p].size;
-    memcpy((BYTE *)&cs->state + offset, constants, count * push_constant_info[p].size);
-    for (i = 0, context_count = device->context_count; i < context_count; ++i)
+        nop = wined3d_cs_mt_require_space(cs, nop_size);
+        if (nop_size)
+            nop->opcode = WINED3D_CS_OP_NOP;
+
+        wined3d_cs_mt_submit(cs);
+        assert(!queue->head);
+    }
+
+    for (;;)
     {
-        device->contexts[i]->constant_update_mask |= push_constant_info[p].mask;
+        LONG tail = *(volatile LONG *)&queue->tail;
+        LONG head = queue->head;
+        LONG new_pos;
+
+        /* Empty. */
+        if (head == tail)
+            break;
+        new_pos = (head + packet_size) & (WINED3D_CS_QUEUE_SIZE - 1);
+        /* Head ahead of tail. We checked the remaining size above, so we only
+         * need to make sure we don't make head equal to tail. */
+        if (head > tail && (new_pos != tail))
+            break;
+        /* Tail ahead of head. Make sure the new head is before the tail as
+         * well. Note that new_pos is 0 when it's at the end of the queue. */
+        if (new_pos < tail && new_pos)
+            break;
+
+        TRACE("Waiting for free space. Head %u, tail %u, packet size %lu.\n",
+                head, tail, (unsigned long)packet_size);
     }
+
+    packet = (struct wined3d_cs_packet *)&queue->data[queue->head];
+    packet->size = size;
+    return packet->data;
 }
 
-static const struct wined3d_cs_ops wined3d_cs_st_ops =
+static const struct wined3d_cs_ops wined3d_cs_mt_ops =
 {
-    wined3d_cs_st_require_space,
-    wined3d_cs_st_submit,
-    wined3d_cs_st_push_constants,
+    wined3d_cs_mt_require_space,
+    wined3d_cs_mt_submit,
+    wined3d_cs_mt_push_constants,
 };
 
+static void poll_queries(struct wined3d_cs *cs)
+{
+    struct wined3d_query *query, *cursor;
+
+    LIST_FOR_EACH_ENTRY_SAFE(query, cursor, &cs->query_poll_list, struct wined3d_query, poll_list_entry)
+    {
+        if (!query->query_ops->query_poll(query, 0))
+            continue;
+
+        list_remove(&query->poll_list_entry);
+        list_init(&query->poll_list_entry);
+        InterlockedIncrement(&query->counter_retrieved);
+    }
+}
+
+static void wined3d_cs_wait_event(struct wined3d_cs *cs)
+{
+    InterlockedExchange(&cs->waiting_for_event, TRUE);
+
+    /* The main thread might have enqueued a command and blocked on it after
+     * the CS thread decided to enter wined3d_cs_wait_event(), but before
+     * "waiting_for_event" was set.
+     *
+     * Likewise, we can race with the main thread when resetting
+     * "waiting_for_event", in which case we would need to call
+     * WaitForSingleObject() because the main thread called SetEvent(). */
+    if (!wined3d_cs_queue_is_empty(&cs->queue)
+            && InterlockedCompareExchange(&cs->waiting_for_event, FALSE, TRUE))
+        return;
+
+    WaitForSingleObject(cs->event, INFINITE);
+}
+
+static DWORD WINAPI wined3d_cs_run(void *ctx)
+{
+    struct wined3d_cs_packet *packet;
+    struct wined3d_cs_queue *queue;
+    unsigned int spin_count = 0;
+    struct wined3d_cs *cs = ctx;
+    enum wined3d_cs_op opcode;
+    unsigned int poll = 0;
+    LONG tail;
+
+    TRACE("Started.\n");
+
+    queue = &cs->queue;
+    list_init(&cs->query_poll_list);
+    cs->thread_id = GetCurrentThreadId();
+    for (;;)
+    {
+        if (++poll == WINED3D_CS_QUERY_POLL_INTERVAL)
+        {
+            poll_queries(cs);
+            poll = 0;
+        }
+
+        if (wined3d_cs_queue_is_empty(queue))
+        {
+            if (++spin_count >= WINED3D_CS_SPIN_COUNT && list_empty(&cs->query_poll_list))
+                wined3d_cs_wait_event(cs);
+            continue;
+        }
+        spin_count = 0;
+
+        tail = queue->tail;
+        packet = (struct wined3d_cs_packet *)&queue->data[tail];
+        if (packet->size)
+        {
+            opcode = *(const enum wined3d_cs_op *)packet->data;
+
+            if (opcode >= WINED3D_CS_OP_STOP)
+            {
+                if (opcode > WINED3D_CS_OP_STOP)
+                    ERR("Invalid opcode %#x.\n", opcode);
+                break;
+            }
+
+            wined3d_cs_op_handlers[opcode](cs, packet->data);
+        }
+
+        tail += FIELD_OFFSET(struct wined3d_cs_packet, data[packet->size]);
+        tail &= (WINED3D_CS_QUEUE_SIZE - 1);
+        InterlockedExchange(&queue->tail, tail);
+    }
+
+    queue->tail = queue->head = 0;
+    TRACE("Stopped.\n");
+    return 0;
+}
+
 struct wined3d_cs *wined3d_cs_create(struct wined3d_device *device)
 {
     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
@@ -2173,6 +2462,9 @@ struct wined3d_cs *wined3d_cs_create(struct wined3d_device *device)
     if (!(cs = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*cs))))
         return NULL;
 
+    cs->ops = &wined3d_cs_st_ops;
+    cs->device = device;
+
     if (!(cs->fb.render_targets = wined3d_calloc(gl_info->limits.buffers, sizeof(*cs->fb.render_targets))))
     {
         HeapFree(GetProcessHeap(), 0, cs);
@@ -2182,19 +2474,38 @@ struct wined3d_cs *wined3d_cs_create(struct wined3d_device *device)
     state_init(&cs->state, &cs->fb, gl_info, &device->adapter->d3d_info,
             WINED3D_STATE_NO_REF | WINED3D_STATE_INIT_DEFAULT);
 
-    cs->ops = &wined3d_cs_st_ops;
-    cs->device = device;
-
     cs->data_size = WINED3D_INITIAL_CS_SIZE;
     if (!(cs->data = HeapAlloc(GetProcessHeap(), 0, cs->data_size)))
+        goto fail;
+
+    if (wined3d_settings.cs_multithreaded
+            && !RtlIsCriticalSectionLockedByThread(NtCurrentTeb()->Peb->LoaderLock))
     {
-        state_cleanup(&cs->state);
-        HeapFree(GetProcessHeap(), 0, cs->fb.render_targets);
-        HeapFree(GetProcessHeap(), 0, cs);
-        return NULL;
+        cs->ops = &wined3d_cs_mt_ops;
+
+        if (!(cs->event = CreateEventW(NULL, FALSE, FALSE, NULL)))
+        {
+            ERR("Failed to create command stream event.\n");
+            HeapFree(GetProcessHeap(), 0, cs->data);
+            goto fail;
+        }
+
+        if (!(cs->thread = CreateThread(NULL, 0, wined3d_cs_run, cs, 0, NULL)))
+        {
+            ERR("Failed to create wined3d command stream thread.\n");
+            CloseHandle(cs->event);
+            HeapFree(GetProcessHeap(), 0, cs->data);
+            goto fail;
+        }
     }
 
     return cs;
+
+fail:
+    state_cleanup(&cs->state);
+    HeapFree(GetProcessHeap(), 0, cs->fb.render_targets);
+    HeapFree(GetProcessHeap(), 0, cs);
+    return NULL;
 }
 
 void wined3d_cs_destroy(struct wined3d_cs *cs)
@@ -2202,5 +2513,14 @@ void wined3d_cs_destroy(struct wined3d_cs *cs)
     state_cleanup(&cs->state);
     HeapFree(GetProcessHeap(), 0, cs->fb.render_targets);
     HeapFree(GetProcessHeap(), 0, cs->data);
+
+    if (cs->thread)
+    {
+        wined3d_cs_emit_stop(cs);
+        CloseHandle(cs->thread);
+        if (!CloseHandle(cs->event))
+            ERR("Closing event failed.\n");
+    }
+
     HeapFree(GetProcessHeap(), 0, cs);
 }
diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c
index 83965e7..7e90be5 100644
--- a/dlls/wined3d/device.c
+++ b/dlls/wined3d/device.c
@@ -4715,6 +4715,8 @@ void device_resource_add(struct wined3d_device *device, struct wined3d_resource
 {
     TRACE("device %p, resource %p.\n", device, resource);
 
+    wined3d_not_from_cs(device->cs);
+
     list_add_head(&device->resources, &resource->resource_list_entry);
 }
 
@@ -4722,6 +4724,8 @@ static void device_resource_remove(struct wined3d_device *device, struct wined3d
 {
     TRACE("device %p, resource %p.\n", device, resource);
 
+    wined3d_not_from_cs(device->cs);
+
     list_remove(&resource->resource_list_entry);
 }
 
@@ -4896,6 +4900,8 @@ void device_invalidate_state(const struct wined3d_device *device, DWORD state)
     BYTE shift;
     UINT i;
 
+    wined3d_from_cs(device->cs);
+
     if (STATE_IS_COMPUTE(state))
     {
         for (i = 0; i < device->context_count; ++i)
diff --git a/dlls/wined3d/query.c b/dlls/wined3d/query.c
index 685c5ce..79e6176 100644
--- a/dlls/wined3d/query.c
+++ b/dlls/wined3d/query.c
@@ -37,6 +37,7 @@ static void wined3d_query_init(struct wined3d_query *query, struct wined3d_devic
     query->data = data;
     query->data_size = data_size;
     query->query_ops = query_ops;
+    list_init(&query->poll_list_entry);
 }
 
 static struct wined3d_event_query *wined3d_event_query_from_query(struct wined3d_query *query)
@@ -260,6 +261,9 @@ static void wined3d_query_destroy_object(void *object)
 {
     struct wined3d_query *query = object;
 
+    if (!list_empty(&query->poll_list_entry))
+        list_remove(&query->poll_list_entry);
+
     /* Queries are specific to the GL context that created them. Not
      * deleting the query will obviously leak it, but that's still better
      * than potentially deleting a different query with the same id in this
@@ -328,8 +332,17 @@ HRESULT CDECL wined3d_query_get_data(struct wined3d_query *query,
         return WINED3DERR_INVALIDCALL;
     }
 
-    if (!query->query_ops->query_poll(query, flags))
+    if (!query->device->cs->thread)
+    {
+        if (!query->query_ops->query_poll(query, flags))
+            return S_FALSE;
+    }
+    else if (query->counter_main != query->counter_retrieved)
+    {
+        if (flags & WINED3DGETDATA_FLUSH)
+            wined3d_cs_emit_flush(query->device->cs);
         return S_FALSE;
+    }
 
     if (data)
         memcpy(data, query->data, min(data_size, query->data_size));
@@ -348,6 +361,9 @@ HRESULT CDECL wined3d_query_issue(struct wined3d_query *query, DWORD flags)
 {
     TRACE("query %p, flags %#x.\n", query, flags);
 
+    if (flags & WINED3DISSUE_END)
+        ++query->counter_main;
+
     wined3d_cs_emit_query_issue(query->device->cs, query, flags);
 
     if (flags & WINED3DISSUE_BEGIN)
@@ -449,7 +465,7 @@ enum wined3d_query_type CDECL wined3d_query_get_type(const struct wined3d_query
     return query->type;
 }
 
-static void wined3d_event_query_ops_issue(struct wined3d_query *query, DWORD flags)
+static BOOL wined3d_event_query_ops_issue(struct wined3d_query *query, DWORD flags)
 {
     TRACE("query %p, flags %#x.\n", query, flags);
 
@@ -458,20 +474,24 @@ static void wined3d_event_query_ops_issue(struct wined3d_query *query, DWORD fla
         struct wined3d_event_query *event_query = wined3d_event_query_from_query(query);
 
         wined3d_event_query_issue(event_query, query->device);
+        return TRUE;
     }
     else if (flags & WINED3DISSUE_BEGIN)
     {
         /* Started implicitly at query creation. */
         ERR("Event query issued with START flag - what to do?\n");
     }
+
+    return FALSE;
 }
 
-static void wined3d_occlusion_query_ops_issue(struct wined3d_query *query, DWORD flags)
+static BOOL wined3d_occlusion_query_ops_issue(struct wined3d_query *query, DWORD flags)
 {
     struct wined3d_occlusion_query *oq = wined3d_occlusion_query_from_query(query);
     struct wined3d_device *device = query->device;
     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
     struct wined3d_context *context;
+    BOOL poll = FALSE;
 
     TRACE("query %p, flags %#x.\n", query, flags);
 
@@ -479,7 +499,7 @@ static void wined3d_occlusion_query_ops_issue(struct wined3d_query *query, DWORD
      * restart. */
     if (flags & WINED3DISSUE_BEGIN)
     {
-        if (query->state == QUERY_BUILDING)
+        if (oq->started)
         {
             if ((context = context_reacquire(device, oq->context)))
             {
@@ -506,13 +526,14 @@ static void wined3d_occlusion_query_ops_issue(struct wined3d_query *query, DWORD
         checkGLcall("glBeginQuery()");
 
         context_release(context);
+        oq->started = TRUE;
     }
     if (flags & WINED3DISSUE_END)
     {
         /* MSDN says END on a non-building occlusion query returns an error,
          * but our tests show that it returns OK. But OpenGL doesn't like it,
          * so avoid generating an error. */
-        if (query->state == QUERY_BUILDING)
+        if (oq->started)
         {
             if ((context = context_reacquire(device, oq->context)))
             {
@@ -520,13 +541,17 @@ static void wined3d_occlusion_query_ops_issue(struct wined3d_query *query, DWORD
                 checkGLcall("glEndQuery()");
 
                 context_release(context);
+                poll = TRUE;
             }
             else
             {
                 FIXME("Wrong thread, can't end query.\n");
             }
         }
+        oq->started = FALSE;
     }
+
+    return poll;
 }
 
 static BOOL wined3d_timestamp_query_ops_poll(struct wined3d_query *query, DWORD flags)
@@ -565,7 +590,7 @@ static BOOL wined3d_timestamp_query_ops_poll(struct wined3d_query *query, DWORD
     return available;
 }
 
-static void wined3d_timestamp_query_ops_issue(struct wined3d_query *query, DWORD flags)
+static BOOL wined3d_timestamp_query_ops_issue(struct wined3d_query *query, DWORD flags)
 {
     struct wined3d_timestamp_query *tq = wined3d_timestamp_query_from_query(query);
     const struct wined3d_gl_info *gl_info;
@@ -587,7 +612,11 @@ static void wined3d_timestamp_query_ops_issue(struct wined3d_query *query, DWORD
         GL_EXTCALL(glQueryCounter(tq->id, GL_TIMESTAMP));
         checkGLcall("glQueryCounter()");
         context_release(context);
+
+        return TRUE;
     }
+
+    return FALSE;
 }
 
 static BOOL wined3d_timestamp_disjoint_query_ops_poll(struct wined3d_query *query, DWORD flags)
@@ -597,9 +626,11 @@ static BOOL wined3d_timestamp_disjoint_query_ops_poll(struct wined3d_query *quer
     return TRUE;
 }
 
-static void wined3d_timestamp_disjoint_query_ops_issue(struct wined3d_query *query, DWORD flags)
+static BOOL wined3d_timestamp_disjoint_query_ops_issue(struct wined3d_query *query, DWORD flags)
 {
     TRACE("query %p, flags %#x.\n", query, flags);
+
+    return FALSE;
 }
 
 static const struct wined3d_query_ops event_query_ops =
diff --git a/dlls/wined3d/wined3d_main.c b/dlls/wined3d/wined3d_main.c
index c59a956..f662c3a 100644
--- a/dlls/wined3d/wined3d_main.c
+++ b/dlls/wined3d/wined3d_main.c
@@ -72,6 +72,7 @@ static CRITICAL_SECTION wined3d_wndproc_cs = {&wined3d_wndproc_cs_debug, -1, 0,
  * where appropriate. */
 struct wined3d_settings wined3d_settings =
 {
+    FALSE,          /* No multithreaded CS by default. */
     MAKEDWORD_VERSION(1, 0), /* Default to legacy OpenGL */
     TRUE,           /* Use of GLSL enabled by default */
     ORM_FBO,        /* Use FBOs to do offscreen rendering */
@@ -204,6 +205,8 @@ static BOOL wined3d_dll_init(HINSTANCE hInstDLL)
 
     if (hkey || appkey)
     {
+        if (!get_config_key_dword(hkey, appkey, "csmt", &wined3d_settings.cs_multithreaded))
+            ERR_(winediag)("Setting multithreaded command stream to %#x.\n", wined3d_settings.cs_multithreaded);
         if (!get_config_key_dword(hkey, appkey, "MaxVersionGL", &tmpvalue))
         {
             if (tmpvalue != wined3d_settings.max_gl_version)
@@ -300,6 +303,7 @@ static BOOL wined3d_dll_init(HINSTANCE hInstDLL)
         if (!get_config_key(hkey, appkey, "StrictDrawOrdering", buffer, size)
                 && !strcmp(buffer,"enabled"))
         {
+            ERR_(winediag)("\"StrictDrawOrdering\" is deprecated, please use \"csmt\" instead.");
             TRACE("Enforcing strict draw ordering.\n");
             wined3d_settings.strict_draw_ordering = TRUE;
         }
diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h
index 74b61fb..6f1ac37 100644
--- a/dlls/wined3d/wined3d_private.h
+++ b/dlls/wined3d/wined3d_private.h
@@ -31,6 +31,7 @@
 #define WINE_GLAPI
 #endif
 
+#include <assert.h>
 #include <stdarg.h>
 #include <math.h>
 #include <limits.h>
@@ -361,6 +362,13 @@ static inline unsigned int wined3d_popcount(unsigned int x)
 #endif
 }
 
+static inline void wined3d_pause(void)
+{
+#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
+    __asm__ __volatile__( "rep;nop" : : : "memory" );
+#endif
+}
+
 #define ORM_BACKBUFFER  0
 #define ORM_FBO         1
 
@@ -371,6 +379,7 @@ static inline unsigned int wined3d_popcount(unsigned int x)
  * values in wined3d_main.c as well. */
 struct wined3d_settings
 {
+    unsigned int cs_multithreaded;
     DWORD max_gl_version;
     BOOL glslRequested;
     int offscreen_rendering_mode;
@@ -1564,7 +1573,7 @@ enum wined3d_query_state
 struct wined3d_query_ops
 {
     BOOL (*query_poll)(struct wined3d_query *query, DWORD flags);
-    void (*query_issue)(struct wined3d_query *query, DWORD flags);
+    BOOL (*query_issue)(struct wined3d_query *query, DWORD flags);
 };
 
 struct wined3d_query
@@ -1578,6 +1587,9 @@ struct wined3d_query
     const void *data;
     DWORD data_size;
     const struct wined3d_query_ops *query_ops;
+
+    LONG counter_main, counter_retrieved;
+    struct list poll_list_entry;
 };
 
 union wined3d_gl_query_object
@@ -1619,6 +1631,7 @@ struct wined3d_occlusion_query
     GLuint id;
     struct wined3d_context *context;
     UINT64 samples;
+    BOOL started;
 };
 
 struct wined3d_timestamp_query
@@ -2806,11 +2819,6 @@ static inline void wined3d_resource_release(struct wined3d_resource *resource)
     InterlockedDecrement(&resource->access_count);
 }
 
-static inline void wined3d_resource_wait_idle(struct wined3d_resource *resource)
-{
-    while (InterlockedCompareExchange(&resource->access_count, 0, 0));
-}
-
 void resource_cleanup(struct wined3d_resource *resource) DECLSPEC_HIDDEN;
 HRESULT resource_init(struct wined3d_resource *resource, struct wined3d_device *device,
         enum wined3d_resource_type type, const struct wined3d_format *format,
@@ -3220,6 +3228,16 @@ enum wined3d_push_constants
     WINED3D_PUSH_CONSTANTS_PS_B,
 };
 
+#define WINED3D_CS_QUERY_POLL_INTERVAL  10u
+#define WINED3D_CS_QUEUE_SIZE           0x100000u
+#define WINED3D_CS_SPIN_COUNT           10000000u
+
+struct wined3d_cs_queue
+{
+    LONG head, tail;
+    BYTE data[WINED3D_CS_QUEUE_SIZE];
+};
+
 struct wined3d_cs_ops
 {
     void *(*require_space)(struct wined3d_cs *cs, size_t size);
@@ -3234,9 +3252,16 @@ struct wined3d_cs
     struct wined3d_device *device;
     struct wined3d_fb_state fb;
     struct wined3d_state state;
+    HANDLE thread;
+    DWORD thread_id;
 
+    struct wined3d_cs_queue queue;
     size_t data_size, start, end;
     void *data;
+    struct list query_poll_list;
+
+    HANDLE event;
+    BOOL waiting_for_event;
 };
 
 struct wined3d_cs *wined3d_cs_create(struct wined3d_device *device) DECLSPEC_HIDDEN;
@@ -3327,6 +3352,17 @@ static inline void wined3d_cs_push_constants(struct wined3d_cs *cs, enum wined3d
     cs->ops->push_constants(cs, p, start_idx, count, constants);
 }
 
+static inline void wined3d_resource_wait_idle(struct wined3d_resource *resource)
+{
+    const struct wined3d_cs *cs = resource->device->cs;
+
+    if (!cs->thread || cs->thread_id == GetCurrentThreadId())
+        return;
+
+    while (InterlockedCompareExchange(&resource->access_count, 0, 0))
+        wined3d_pause();
+}
+
 /* TODO: Add tests and support for FLOAT16_4 POSITIONT, D3DCOLOR position, other
  * fixed function semantics as D3DCOLOR or FLOAT16 */
 enum wined3d_buffer_conversion_type
@@ -4012,6 +4048,17 @@ static inline struct wined3d_surface *context_get_rt_surface(const struct wined3
     return texture->sub_resources[context->current_rt.sub_resource_idx].u.surface;
 }
 
+static inline void wined3d_from_cs(struct wined3d_cs *cs)
+{
+    if (cs->thread)
+        assert(cs->thread_id == GetCurrentThreadId());
+}
+
+static inline void wined3d_not_from_cs(struct wined3d_cs *cs)
+{
+    assert(cs->thread_id != GetCurrentThreadId());
+}
+
 /* The WNDCLASS-Name for the fake window which we use to retrieve the GL capabilities */
 #define WINED3D_OPENGL_WINDOW_CLASS_NAME "WineD3D_OpenGL"
 
-- 
2.1.4




More information about the wine-patches mailing list