[PATCH 2/5] ntdll: Reimplement Win32 futexes on top of thread-ID alerts.

Zebediah Figura zfigura at codeweavers.com
Wed Nov 17 20:18:54 CST 2021


Signed-off-by: Zebediah Figura <zfigura at codeweavers.com>
---
 dlls/ntdll/sync.c        | 175 ++++++++++++++++++++++++++++++++++++++-
 dlls/ntdll/unix/loader.c |   3 -
 dlls/ntdll/unix/sync.c   | 162 ------------------------------------
 dlls/ntdll/unixlib.h     |   6 +-
 4 files changed, 173 insertions(+), 173 deletions(-)

diff --git a/dlls/ntdll/sync.c b/dlls/ntdll/sync.c
index bfb30661864..78c68ddbe38 100644
--- a/dlls/ntdll/sync.c
+++ b/dlls/ntdll/sync.c
@@ -34,11 +34,18 @@
 #include "windef.h"
 #include "winternl.h"
 #include "wine/debug.h"
+#include "wine/list.h"
 #include "ntdll_misc.h"
 
 WINE_DEFAULT_DEBUG_CHANNEL(sync);
 WINE_DECLARE_DEBUG_CHANNEL(relay);
 
+static const char *debugstr_timeout( const LARGE_INTEGER *timeout )
+{
+    if (!timeout) return "(infinite)";
+    return wine_dbgstr_longlong( timeout->QuadPart );
+}
+
 /******************************************************************
  *              RtlRunOnceInitialize (NTDLL.@)
  */
@@ -863,13 +870,113 @@ NTSTATUS WINAPI RtlSleepConditionVariableSRW( RTL_CONDITION_VARIABLE *variable,
     return status;
 }
 
+/* RtlWaitOnAddress() and RtlWakeAddress*(), hereafter referred to as "Win32
+ * futexes", offer futex-like semantics with a variable set of address sizes,
+ * but are limited to a single process. They are also fair—the documentation
+ * specifies this, and tests bear it out.
+ *
+ * On Windows they are implemented using NtAlertThreadByThreadId and
+ * NtWaitForAlertByThreadId, which manipulate a single flag (similar to an
+ * auto-reset event) per thread. This can be tested by attempting to wake a
+ * thread waiting in RtlWaitOnAddress() via NtAlertThreadByThreadId.
+ */
+
+struct futex_entry
+{
+    struct list entry;
+    const void *addr;
+    DWORD tid;
+};
+
+struct futex_queue
+{
+    struct list queue;
+    LONG lock;
+};
+
+static struct futex_queue futex_queues[256];
+
+static struct futex_queue *get_futex_queue( const void *addr )
+{
+    ULONG_PTR val = (ULONG_PTR)addr;
+
+    return &futex_queues[(val >> 4) % ARRAY_SIZE(futex_queues)];
+}
+
+static void spin_lock( LONG *lock )
+{
+    while (InterlockedCompareExchange( lock, -1, 0 ))
+        YieldProcessor();
+}
+
+static void spin_unlock( LONG *lock )
+{
+    InterlockedExchange( lock, 0 );
+}
+
+static BOOL compare_addr( const void *addr, const void *cmp, SIZE_T size )
+{
+    switch (size)
+    {
+        case 1:
+            return (*(const UCHAR *)addr == *(const UCHAR *)cmp);
+        case 2:
+            return (*(const USHORT *)addr == *(const USHORT *)cmp);
+        case 4:
+            return (*(const ULONG *)addr == *(const ULONG *)cmp);
+        case 8:
+            return (*(const ULONG64 *)addr == *(const ULONG64 *)cmp);
+    }
+
+    return FALSE;
+}
+
 /***********************************************************************
  *           RtlWaitOnAddress   (NTDLL.@)
  */
 NTSTATUS WINAPI RtlWaitOnAddress( const void *addr, const void *cmp, SIZE_T size,
                                   const LARGE_INTEGER *timeout )
 {
-    return unix_funcs->RtlWaitOnAddress( addr, cmp, size, timeout );
+    struct futex_queue *queue = get_futex_queue( addr );
+    struct futex_entry entry;
+    NTSTATUS ret;
+
+    TRACE("addr %p cmp %p size %#Ix timeout %s\n", addr, cmp, size, debugstr_timeout( timeout ));
+
+    if (size != 1 && size != 2 && size != 4 && size != 8)
+        return STATUS_INVALID_PARAMETER;
+
+    entry.addr = addr;
+    entry.tid = GetCurrentThreadId();
+
+    spin_lock( &queue->lock );
+
+    /* Do the comparison inside of the spinlock, to reduce spurious wakeups. */
+
+    if (!compare_addr( addr, cmp, size ))
+    {
+        spin_unlock( &queue->lock );
+        return STATUS_SUCCESS;
+    }
+
+    if (!queue->queue.next)
+        list_init( &queue->queue );
+    list_add_tail( &queue->queue, &entry.entry );
+
+    spin_unlock( &queue->lock );
+
+    ret = NtWaitForAlertByThreadId( NULL, timeout );
+
+    spin_lock( &queue->lock );
+    /* We may have already been removed by a call to RtlWakeAddressSingle(). */
+    if (entry.addr)
+        list_remove( &entry.entry );
+    spin_unlock( &queue->lock );
+
+    TRACE("returning %#x\n", ret);
+
+    if (ret == STATUS_ALERTED) ret = STATUS_SUCCESS;
+    return ret;
 }
 
 /***********************************************************************
@@ -877,7 +984,37 @@ NTSTATUS WINAPI RtlWaitOnAddress( const void *addr, const void *cmp, SIZE_T size
  */
 void WINAPI RtlWakeAddressAll( const void *addr )
 {
-    return unix_funcs->RtlWakeAddressAll( addr );
+    struct futex_queue *queue = get_futex_queue( addr );
+    unsigned int count = 0, i;
+    struct futex_entry *entry;
+    DWORD tids[256];
+
+    TRACE("%p\n", addr);
+
+    if (!addr) return;
+
+    spin_lock( &queue->lock );
+
+    if (!queue->queue.next)
+        list_init(&queue->queue);
+
+    LIST_FOR_EACH_ENTRY( entry, &queue->queue, struct futex_entry, entry )
+    {
+        if (entry->addr == addr)
+        {
+            /* Try to buffer wakes, so that we don't make a system call while
+             * holding a spinlock. */
+            if (count < ARRAY_SIZE(tids))
+                tids[count++] = entry->tid;
+            else
+                NtAlertThreadByThreadId( (HANDLE)(DWORD_PTR)entry->tid );
+        }
+    }
+
+    spin_unlock( &queue->lock );
+
+    for (i = 0; i < count; ++i)
+        NtAlertThreadByThreadId( (HANDLE)(DWORD_PTR)tids[i] );
 }
 
 /***********************************************************************
@@ -885,5 +1022,37 @@ void WINAPI RtlWakeAddressAll( const void *addr )
  */
 void WINAPI RtlWakeAddressSingle( const void *addr )
 {
-    return unix_funcs->RtlWakeAddressSingle( addr );
+    struct futex_queue *queue = get_futex_queue( addr );
+    struct futex_entry *entry;
+    DWORD tid = 0;
+
+    TRACE("%p\n", addr);
+
+    if (!addr) return;
+
+    spin_lock( &queue->lock );
+
+    if (!queue->queue.next)
+        list_init(&queue->queue);
+
+    LIST_FOR_EACH_ENTRY( entry, &queue->queue, struct futex_entry, entry )
+    {
+        if (entry->addr == addr)
+        {
+            /* Try to buffer wakes, so that we don't make a system call while
+             * holding a spinlock. */
+            tid = entry->tid;
+
+            /* Remove this entry from the queue, so that a simultaneous call to
+             * RtlWakeAddressSingle() will not also wake it—two simultaneous
+             * calls must wake at least two waiters if they exist. */
+            entry->addr = NULL;
+            list_remove( &entry->entry );
+            break;
+        }
+    }
+
+    spin_unlock( &queue->lock );
+
+    if (tid) NtAlertThreadByThreadId( (HANDLE)(DWORD_PTR)tid );
 }
diff --git a/dlls/ntdll/unix/loader.c b/dlls/ntdll/unix/loader.c
index 7f36dc42579..05ae264297a 100644
--- a/dlls/ntdll/unix/loader.c
+++ b/dlls/ntdll/unix/loader.c
@@ -2145,9 +2145,6 @@ static struct unix_funcs unix_funcs =
     NtCurrentTeb,
 #endif
     RtlGetSystemTimePrecise,
-    RtlWaitOnAddress,
-    RtlWakeAddressAll,
-    RtlWakeAddressSingle,
     fast_RtlpWaitForCriticalSection,
     fast_RtlpUnWaitCriticalSection,
     fast_RtlDeleteCriticalSection,
diff --git a/dlls/ntdll/unix/sync.c b/dlls/ntdll/unix/sync.c
index cdc63c8a6ef..47a0a03b45f 100644
--- a/dlls/ntdll/unix/sync.c
+++ b/dlls/ntdll/unix/sync.c
@@ -73,10 +73,6 @@ WINE_DEFAULT_DEBUG_CHANNEL(sync);
 
 HANDLE keyed_event = 0;
 
-static const LARGE_INTEGER zero_timeout;
-
-static pthread_mutex_t addr_mutex = PTHREAD_MUTEX_INITIALIZER;
-
 static const char *debugstr_timeout( const LARGE_INTEGER *timeout )
 {
     if (!timeout) return "(infinite)";
@@ -186,24 +182,6 @@ static void timespec_from_timeout( struct timespec *timespec, const LARGE_INTEGE
 #endif
 
 
-static BOOL compare_addr( const void *addr, const void *cmp, SIZE_T size )
-{
-    switch (size)
-    {
-        case 1:
-            return (*(const UCHAR *)addr == *(const UCHAR *)cmp);
-        case 2:
-            return (*(const USHORT *)addr == *(const USHORT *)cmp);
-        case 4:
-            return (*(const ULONG *)addr == *(const ULONG *)cmp);
-        case 8:
-            return (*(const ULONG64 *)addr == *(const ULONG64 *)cmp);
-    }
-
-    return FALSE;
-}
-
-
 /* create a struct security_descriptor and contained information in one contiguous piece of memory */
 NTSTATUS alloc_object_attributes( const OBJECT_ATTRIBUTES *attr, struct object_attributes **ret,
                                   data_size_t *ret_len )
@@ -2977,71 +2955,6 @@ NTSTATUS CDECL fast_RtlWakeConditionVariable( RTL_CONDITION_VARIABLE *variable,
     return STATUS_SUCCESS;
 }
 
-
-/* We can't map addresses to futex directly, because an application can wait on
- * 8 bytes, and we can't pass all 8 as the compare value to futex(). Instead we
- * map all addresses to a small fixed table of futexes. This may result in
- * spurious wakes, but the application is already expected to handle those. */
-
-static int addr_futex_table[256];
-
-static inline int *hash_addr( const void *addr )
-{
-    ULONG_PTR val = (ULONG_PTR)addr;
-
-    return &addr_futex_table[(val >> 2) & 255];
-}
-
-static inline NTSTATUS fast_wait_addr( const void *addr, const void *cmp, SIZE_T size,
-                                       const LARGE_INTEGER *timeout )
-{
-    int *futex;
-    int val;
-    struct timespec timespec;
-    int ret;
-
-    if (!use_futexes())
-        return STATUS_NOT_IMPLEMENTED;
-
-    futex = hash_addr( addr );
-
-    /* We must read the previous value of the futex before checking the value
-     * of the address being waited on. That way, if we receive a wake between
-     * now and waiting on the futex, we know that val will have changed.
-     * Use an atomic load so that memory accesses are ordered between this read
-     * and the increment below. */
-    val = InterlockedCompareExchange( futex, 0, 0 );
-    if (!compare_addr( addr, cmp, size ))
-        return STATUS_SUCCESS;
-
-    if (timeout)
-    {
-        timespec_from_timeout( &timespec, timeout );
-        ret = futex_wait( futex, val, &timespec );
-    }
-    else
-        ret = futex_wait( futex, val, NULL );
-
-    if (ret == -1 && errno == ETIMEDOUT)
-        return STATUS_TIMEOUT;
-    return STATUS_SUCCESS;
-}
-
-static inline NTSTATUS fast_wake_addr( const void *addr )
-{
-    int *futex;
-
-    if (!use_futexes())
-        return STATUS_NOT_IMPLEMENTED;
-
-    futex = hash_addr( addr );
-
-    InterlockedIncrement( futex );
-
-    futex_wake( futex, INT_MAX );
-    return STATUS_SUCCESS;
-}
-
 #else
 
 NTSTATUS CDECL fast_RtlTryAcquireSRWLockExclusive( RTL_SRWLOCK *lock )
@@ -3084,79 +2997,4 @@ NTSTATUS CDECL fast_wait_cv( RTL_CONDITION_VARIABLE *variable, const void *value
     return STATUS_NOT_IMPLEMENTED;
 }
 
-static inline NTSTATUS fast_wait_addr( const void *addr, const void *cmp, SIZE_T size,
-                                       const LARGE_INTEGER *timeout )
-{
-    return STATUS_NOT_IMPLEMENTED;
-}
-
-static inline NTSTATUS fast_wake_addr( const void *addr )
-{
-    return STATUS_NOT_IMPLEMENTED;
-}
-
 #endif
-
-
-/***********************************************************************
- *           RtlWaitOnAddress   (NTDLL.@)
- */
-NTSTATUS WINAPI RtlWaitOnAddress( const void *addr, const void *cmp, SIZE_T size,
-                                  const LARGE_INTEGER *timeout )
-{
-    select_op_t select_op;
-    NTSTATUS ret;
-    timeout_t abs_timeout = timeout ? timeout->QuadPart : TIMEOUT_INFINITE;
-
-    if (size != 1 && size != 2 && size != 4 && size != 8)
-        return STATUS_INVALID_PARAMETER;
-
-    if ((ret = fast_wait_addr( addr, cmp, size, timeout )) != STATUS_NOT_IMPLEMENTED)
-        return ret;
-
-    mutex_lock( &addr_mutex );
-    if (!compare_addr( addr, cmp, size ))
-    {
-        mutex_unlock( &addr_mutex );
-        return STATUS_SUCCESS;
-    }
-
-    if (abs_timeout < 0)
-    {
-        LARGE_INTEGER now;
-
-        NtQueryPerformanceCounter( &now, NULL );
-        abs_timeout -= now.QuadPart;
-    }
-
-    select_op.keyed_event.op     = SELECT_KEYED_EVENT_WAIT;
-    select_op.keyed_event.handle = wine_server_obj_handle( keyed_event );
-    select_op.keyed_event.key    = wine_server_client_ptr( addr );
-
-    return server_select( &select_op, sizeof(select_op.keyed_event), SELECT_INTERRUPTIBLE,
-                          abs_timeout, NULL, &addr_mutex, NULL );
-}
-
-/***********************************************************************
- *           RtlWakeAddressAll    (NTDLL.@)
- */
-void WINAPI RtlWakeAddressAll( const void *addr )
-{
-    if (fast_wake_addr( addr ) != STATUS_NOT_IMPLEMENTED) return;
-
-    mutex_lock( &addr_mutex );
-    while (NtReleaseKeyedEvent( 0, addr, 0, &zero_timeout ) == STATUS_SUCCESS) {}
-    mutex_unlock( &addr_mutex );
-}
-
-/***********************************************************************
- *           RtlWakeAddressSingle (NTDLL.@)
- */
-void WINAPI RtlWakeAddressSingle( const void *addr )
-{
-    if (fast_wake_addr( addr ) != STATUS_NOT_IMPLEMENTED) return;
-
-    mutex_lock( &addr_mutex );
-    NtReleaseKeyedEvent( 0, addr, 0, &zero_timeout );
-    mutex_unlock( &addr_mutex );
-}
diff --git a/dlls/ntdll/unixlib.h b/dlls/ntdll/unixlib.h
index 62030d91cdb..ded05c40e17 100644
--- a/dlls/ntdll/unixlib.h
+++ b/dlls/ntdll/unixlib.h
@@ -26,7 +26,7 @@
 struct _DISPATCHER_CONTEXT;
 
 /* increment this when you change the function table */
-#define NTDLL_UNIXLIB_VERSION 128
+#define NTDLL_UNIXLIB_VERSION 129
 
 struct unix_funcs
 {
@@ -37,10 +37,6 @@ struct unix_funcs
 
     /* other Win32 API functions */
     LONGLONG      (WINAPI *RtlGetSystemTimePrecise)(void);
-    NTSTATUS      (WINAPI *RtlWaitOnAddress)( const void *addr, const void *cmp, SIZE_T size,
-                                              const LARGE_INTEGER *timeout );
-    void          (WINAPI *RtlWakeAddressAll)( const void *addr );
-    void          (WINAPI *RtlWakeAddressSingle)( const void *addr );
 
     /* fast locks */
     NTSTATUS      (CDECL *fast_RtlpWaitForCriticalSection)( RTL_CRITICAL_SECTION *crit, int timeout );
-- 
2.33.0




More information about the wine-devel mailing list