[PATCH 2/2] wined3d: Do not call queue_is_empty in wined3d_cs_mt_finish (v3).
Stefan Dösinger
stefan at codeweavers.com
Sun Aug 27 16:22:42 CDT 2017
Without an explicit volatile, aarch64 gcc will copy queue->tail into a
register outside the loop and then continuously compare two registers
against each other. The *(volatile LONG *)& forces gcc to re-read the
memory every iteration. Therefore, queue_is_empty as it is written will
only work from the CS thread.
Gcc does not generate any instructions to invalidate ARM CPU caches
regardless of the use of volatile *.
x86_64 gcc will always read both comparands from memory inside the loop,
so the volatile has no effect there.
Signed-off-by: Stefan Dösinger <stefan at codeweavers.com>
---
Version 3: Update the explanation, the issue is compiler generated code,
not CPU caches. Swap the parameters of wined3d_cs_queue_is_empty.
Version 2: Add a wined3d_from_cs assertion to wined3d_cs_queue_is_empty.
Making finish a special case instead of reading both head and tail
through volatile * in queue_is_empty is measurably faster on ARM (260 vs
240 fps in a simple CPU limited test case). It (unsurprisingly) makes no
difference on x86.
To reproduce the bug run d3d9 make test on ARM.
---
dlls/wined3d/cs.c | 13 +++++++------
dlls/wined3d/wined3d_private.h | 2 +-
2 files changed, 8 insertions(+), 7 deletions(-)
diff --git a/dlls/wined3d/cs.c b/dlls/wined3d/cs.c
index 4a405d0..bef4795 100644
--- a/dlls/wined3d/cs.c
+++ b/dlls/wined3d/cs.c
@@ -2413,8 +2413,9 @@ static const struct wined3d_cs_ops wined3d_cs_st_ops =
wined3d_cs_st_push_constants,
};
-static BOOL wined3d_cs_queue_is_empty(const struct wined3d_cs_queue *queue)
+static BOOL wined3d_cs_queue_is_empty(const struct wined3d_cs *cs, const struct wined3d_cs_queue *queue)
{
+ wined3d_from_cs(cs);
return *(volatile LONG *)&queue->head == queue->tail;
}
@@ -2513,7 +2514,7 @@ static void wined3d_cs_mt_finish(struct wined3d_cs *cs, enum wined3d_cs_queue_id
if (cs->thread_id == GetCurrentThreadId())
return wined3d_cs_st_finish(cs, queue_id);
- while (!wined3d_cs_queue_is_empty(&cs->queue[queue_id]))
+ while (cs->queue[queue_id].head != *(volatile LONG *)&cs->queue[queue_id].tail)
wined3d_pause();
}
@@ -2551,8 +2552,8 @@ static void wined3d_cs_wait_event(struct wined3d_cs *cs)
* Likewise, we can race with the main thread when resetting
* "waiting_for_event", in which case we would need to call
* WaitForSingleObject() because the main thread called SetEvent(). */
- if (!(wined3d_cs_queue_is_empty(&cs->queue[WINED3D_CS_QUEUE_DEFAULT])
- && wined3d_cs_queue_is_empty(&cs->queue[WINED3D_CS_QUEUE_MAP]))
+ if (!(wined3d_cs_queue_is_empty(cs, &cs->queue[WINED3D_CS_QUEUE_DEFAULT])
+ && wined3d_cs_queue_is_empty(cs, &cs->queue[WINED3D_CS_QUEUE_MAP]))
&& InterlockedCompareExchange(&cs->waiting_for_event, FALSE, TRUE))
return;
@@ -2582,10 +2583,10 @@ static DWORD WINAPI wined3d_cs_run(void *ctx)
}
queue = &cs->queue[WINED3D_CS_QUEUE_MAP];
- if (wined3d_cs_queue_is_empty(queue))
+ if (wined3d_cs_queue_is_empty(cs, queue))
{
queue = &cs->queue[WINED3D_CS_QUEUE_DEFAULT];
- if (wined3d_cs_queue_is_empty(queue))
+ if (wined3d_cs_queue_is_empty(cs, queue))
{
if (++spin_count >= WINED3D_CS_SPIN_COUNT && list_empty(&cs->query_poll_list))
wined3d_cs_wait_event(cs);
diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h
index 74043d0..1a196a5 100644
--- a/dlls/wined3d/wined3d_private.h
+++ b/dlls/wined3d/wined3d_private.h
@@ -4277,7 +4277,7 @@ static inline struct wined3d_surface *context_get_rt_surface(const struct wined3
return texture->sub_resources[context->current_rt.sub_resource_idx].u.surface;
}
-static inline void wined3d_from_cs(struct wined3d_cs *cs)
+static inline void wined3d_from_cs(const struct wined3d_cs *cs)
{
if (cs->thread)
assert(cs->thread_id == GetCurrentThreadId());
--
2.10.2
More information about the wine-patches
mailing list