[PATCH 2/3] ntdll: Add a futex-based implementation of SRW locks.

Zebediah Figura zfigura at codeweavers.com
Mon Jun 10 10:07:53 CDT 2019


Signed-off-by: Zebediah Figura <zfigura at codeweavers.com>
---
This was written for Shadow of the Tomb Raider, which makes heavy use of SRW
locks. However, it's not particularly clear that it improves performance there.

 dlls/ntdll/sync.c | 310 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 303 insertions(+), 7 deletions(-)

diff --git a/dlls/ntdll/sync.c b/dlls/ntdll/sync.c
index 9c62f8eb6c5f..c7a96b63c3ae 100644
--- a/dlls/ntdll/sync.c
+++ b/dlls/ntdll/sync.c
@@ -61,7 +61,7 @@
 #include "wine/debug.h"
 #include "ntdll_misc.h"
 
-WINE_DEFAULT_DEBUG_CHANNEL(ntdll);
+WINE_DEFAULT_DEBUG_CHANNEL(sync);
 
 HANDLE keyed_event = NULL;
 
@@ -71,17 +71,31 @@ static const LARGE_INTEGER zero_timeout;
 
 #ifdef __linux__
 
-static int wait_op = 128; /*FUTEX_WAIT|FUTEX_PRIVATE_FLAG*/
-static int wake_op = 129; /*FUTEX_WAKE|FUTEX_PRIVATE_FLAG*/
+#define FUTEX_WAIT 0
+#define FUTEX_WAKE 1
+#define FUTEX_WAIT_BITSET 9
+#define FUTEX_WAKE_BITSET 10
+
+static int futex_private = 128;
 
 static inline int futex_wait( const int *addr, int val, struct timespec *timeout )
 {
-    return syscall( __NR_futex, addr, wait_op, val, timeout, 0, 0 );
+    return syscall( __NR_futex, addr, FUTEX_WAIT | futex_private, val, timeout, 0, 0 );
 }
 
 static inline int futex_wake( const int *addr, int val )
 {
-    return syscall( __NR_futex, addr, wake_op, val, NULL, 0, 0 );
+    return syscall( __NR_futex, addr, FUTEX_WAKE | futex_private, val, NULL, 0, 0 );
+}
+
+static inline int futex_wait_bitset( const int *addr, int val, struct timespec *timeout, int mask )
+{
+    return syscall( __NR_futex, addr, FUTEX_WAIT_BITSET | futex_private, val, timeout, 0, mask );
+}
+
+static inline int futex_wake_bitset( const int *addr, int val, int mask )
+{
+    return syscall( __NR_futex, addr, FUTEX_WAKE_BITSET | futex_private, val, NULL, 0, mask );
 }
 
 static inline int use_futexes(void)
@@ -93,8 +107,7 @@ static inline int use_futexes(void)
         futex_wait( &supported, 10, NULL );
         if (errno == ENOSYS)
         {
-            wait_op = 0; /*FUTEX_WAIT*/
-            wake_op = 1; /*FUTEX_WAKE*/
+            futex_private = 0;
             futex_wait( &supported, 10, NULL );
         }
         supported = (errno != ENOSYS);
@@ -1642,6 +1655,266 @@ DWORD WINAPI RtlRunOnceExecuteOnce( RTL_RUN_ONCE *once, PRTL_RUN_ONCE_INIT_FN fu
     return RtlRunOnceComplete( once, 0, context ? *context : NULL );
 }
 
+#ifdef __linux__
+
+/* Futex-based SRW lock implementation:
+ *
+ * Since we can rely on the kernel to release all threads and don't need to
+ * worry about NtReleaseKeyedEvent(), we can simplify the layout a bit. The
+ * layout looks like this:
+ *
+ *    31 - Exclusive lock bit, set if the resource is owned exclusively.
+ * 30-16 - Number of exclusive waiters. Unlike the fallback implementation,
+ *         this does not include the thread owning the lock, or shared threads
+ *         waiting on the lock.
+ *  15-0 - Number of shared owners. Unlike the fallback implementation, this
+ *         does not include the number of shared threads waiting on the lock.
+ *         Thus the state [1, x, >=1] will never occur.
+ */
+
+#define SRWLOCK_FUTEX_EXCLUSIVE_LOCK_BIT        0x80000000
+#define SRWLOCK_FUTEX_EXCLUSIVE_WAITERS_MASK    0x7fff0000
+#define SRWLOCK_FUTEX_EXCLUSIVE_WAITERS_INC     0x00010000
+#define SRWLOCK_FUTEX_SHARED_OWNERS_MASK        0x0000ffff
+#define SRWLOCK_FUTEX_SHARED_OWNERS_INC         0x00000001
+
+/* Futex bitmasks; these are independent from the bits in the lock itself. */
+#define SRWLOCK_FUTEX_BITSET_EXCLUSIVE  1
+#define SRWLOCK_FUTEX_BITSET_SHARED     2
+
+static NTSTATUS fast_try_acquire_srw_exclusive( RTL_SRWLOCK *lock )
+{
+    int old, new;
+    NTSTATUS ret;
+
+    if (!use_futexes()) return STATUS_NOT_IMPLEMENTED;
+
+    do
+    {
+        old = *(int *)lock;
+
+        if (!(old & SRWLOCK_FUTEX_EXCLUSIVE_LOCK_BIT)
+                && !(old & SRWLOCK_FUTEX_SHARED_OWNERS_MASK))
+        {
+            /* Not locked exclusive or shared. We can try to grab it. */
+            new = old | SRWLOCK_FUTEX_EXCLUSIVE_LOCK_BIT;
+            ret = STATUS_SUCCESS;
+        }
+        else
+        {
+            new = old;
+            ret = STATUS_TIMEOUT;
+        }
+    } while (interlocked_cmpxchg( (int *)lock, new, old ) != old);
+
+    return ret;
+}
+
+static NTSTATUS fast_acquire_srw_exclusive( RTL_SRWLOCK *lock )
+{
+    int old, new;
+    BOOLEAN wait;
+
+    if (!use_futexes()) return STATUS_NOT_IMPLEMENTED;
+
+    /* Atomically increment the exclusive waiter count. */
+    do
+    {
+        old = *(int *)lock;
+        new = old + SRWLOCK_FUTEX_EXCLUSIVE_WAITERS_INC;
+        assert(new & SRWLOCK_FUTEX_EXCLUSIVE_WAITERS_MASK);
+    } while (interlocked_cmpxchg( (int *)lock, new, old ) != old);
+
+    for (;;)
+    {
+        do
+        {
+            old = *(int *)lock;
+
+            if (!(old & SRWLOCK_FUTEX_EXCLUSIVE_LOCK_BIT)
+                    && !(old & SRWLOCK_FUTEX_SHARED_OWNERS_MASK))
+            {
+                /* Not locked exclusive or shared. We can try to grab it. */
+                new = old | SRWLOCK_FUTEX_EXCLUSIVE_LOCK_BIT;
+                assert(old & SRWLOCK_FUTEX_EXCLUSIVE_WAITERS_MASK);
+                new -= SRWLOCK_FUTEX_EXCLUSIVE_WAITERS_INC;
+                wait = FALSE;
+            }
+            else
+            {
+                new = old;
+                wait = TRUE;
+            }
+        } while (interlocked_cmpxchg( (int *)lock, new, old ) != old);
+
+        if (!wait)
+            return STATUS_SUCCESS;
+
+        futex_wait_bitset( (int *)lock, new, NULL, SRWLOCK_FUTEX_BITSET_EXCLUSIVE );
+    }
+
+    return STATUS_SUCCESS;
+}
+
+static NTSTATUS fast_try_acquire_srw_shared( RTL_SRWLOCK *lock )
+{
+    int new, old;
+    NTSTATUS ret;
+
+    if (!use_futexes()) return STATUS_NOT_IMPLEMENTED;
+
+    do
+    {
+        old = *(int *)lock;
+
+        if (!(old & SRWLOCK_FUTEX_EXCLUSIVE_LOCK_BIT)
+                && !(old & SRWLOCK_FUTEX_EXCLUSIVE_WAITERS_MASK))
+        {
+            /* Not locked exclusive, and no exclusive waiters. We can try to
+             * grab it. */
+            new = old + SRWLOCK_FUTEX_SHARED_OWNERS_INC;
+            assert(new & SRWLOCK_FUTEX_SHARED_OWNERS_MASK);
+            ret = STATUS_SUCCESS;
+        }
+        else
+        {
+            new = old;
+            ret = STATUS_TIMEOUT;
+        }
+    } while (interlocked_cmpxchg( (int *)lock, new, old ) != old);
+
+    return ret;
+}
+
+static NTSTATUS fast_acquire_srw_shared( RTL_SRWLOCK *lock )
+{
+    int old, new;
+    BOOLEAN wait;
+
+    if (!use_futexes()) return STATUS_NOT_IMPLEMENTED;
+
+    for (;;)
+    {
+        do
+        {
+            old = *(int *)lock;
+
+            if (!(old & SRWLOCK_FUTEX_EXCLUSIVE_LOCK_BIT)
+                    && !(old & SRWLOCK_FUTEX_EXCLUSIVE_WAITERS_MASK))
+            {
+                /* Not locked exclusive, and no exclusive waiters. We can try
+                 * to grab it. */
+                new = old + SRWLOCK_FUTEX_SHARED_OWNERS_INC;
+                assert(new & SRWLOCK_FUTEX_SHARED_OWNERS_MASK);
+                wait = FALSE;
+            }
+            else
+            {
+                new = old;
+                wait = TRUE;
+            }
+        } while (interlocked_cmpxchg( (int *)lock, new, old ) != old);
+
+        if (!wait)
+            return STATUS_SUCCESS;
+
+        futex_wait_bitset( (int *)lock, new, NULL, SRWLOCK_FUTEX_BITSET_SHARED );
+    }
+
+    return STATUS_SUCCESS;
+}
+
+static NTSTATUS fast_release_srw_exclusive( RTL_SRWLOCK *lock )
+{
+    int old, new;
+
+    if (!use_futexes()) return STATUS_NOT_IMPLEMENTED;
+
+    do
+    {
+        old = *(int *)lock;
+
+        if (!(old & SRWLOCK_FUTEX_EXCLUSIVE_LOCK_BIT))
+        {
+            ERR("Lock %p is not owned exclusive! (%#x)\n", lock, *(int *)lock);
+            return STATUS_RESOURCE_NOT_OWNED;
+        }
+
+        new = old & ~SRWLOCK_FUTEX_EXCLUSIVE_LOCK_BIT;
+    } while (interlocked_cmpxchg( (int *)lock, new, old ) != old);
+
+    if (new & SRWLOCK_FUTEX_EXCLUSIVE_WAITERS_MASK)
+        futex_wake_bitset( (int *)lock, 1, SRWLOCK_FUTEX_BITSET_EXCLUSIVE );
+    else
+        futex_wake_bitset( (int *)lock, INT_MAX, SRWLOCK_FUTEX_BITSET_SHARED );
+
+    return STATUS_SUCCESS;
+}
+
+static NTSTATUS fast_release_srw_shared( RTL_SRWLOCK *lock )
+{
+    int old, new;
+
+    if (!use_futexes()) return STATUS_NOT_IMPLEMENTED;
+
+    do
+    {
+        old = *(int *)lock;
+
+        if (old & SRWLOCK_FUTEX_EXCLUSIVE_LOCK_BIT)
+        {
+            ERR("Lock %p is owned exclusive! (%#x)\n", lock, *(int *)lock);
+            return STATUS_RESOURCE_NOT_OWNED;
+        }
+        else if (!(old & SRWLOCK_FUTEX_SHARED_OWNERS_MASK))
+        {
+            ERR("Lock %p is not owned shared! (%#x)\n", lock, *(int *)lock);
+            return STATUS_RESOURCE_NOT_OWNED;
+        }
+
+        new = old - SRWLOCK_FUTEX_SHARED_OWNERS_INC;
+    } while (interlocked_cmpxchg( (int *)lock, new, old ) != old);
+
+    /* Optimization: only bother waking if there are actually exclusive waiters. */
+    if (!(new & SRWLOCK_FUTEX_SHARED_OWNERS_MASK) && (new & SRWLOCK_FUTEX_EXCLUSIVE_WAITERS_MASK))
+        futex_wake_bitset( (int *)lock, 1, SRWLOCK_FUTEX_BITSET_EXCLUSIVE );
+
+    return STATUS_SUCCESS;
+}
+
+#else
+
+static NTSTATUS fast_try_acquire_srw_exclusive( RTL_SRWLOCK *lock )
+{
+    return STATUS_NOT_IMPLEMENTED;
+}
+
+static NTSTATUS fast_acquire_srw_exclusive( RTL_SRWLOCK *lock )
+{
+    return STATUS_NOT_IMPLEMENTED;
+}
+
+static NTSTATUS fast_try_acquire_srw_shared( RTL_SRWLOCK *lock )
+{
+    return STATUS_NOT_IMPLEMENTED;
+}
+
+static NTSTATUS fast_acquire_srw_shared( RTL_SRWLOCK *lock )
+{
+    return STATUS_NOT_IMPLEMENTED;
+}
+
+static NTSTATUS fast_release_srw_exclusive( RTL_SRWLOCK *lock )
+{
+    return STATUS_NOT_IMPLEMENTED;
+}
+
+static NTSTATUS fast_release_srw_shared( RTL_SRWLOCK *lock )
+{
+    return STATUS_NOT_IMPLEMENTED;
+}
+
+#endif
 
 /* SRW locks implementation
  *
@@ -1789,6 +2062,9 @@ void WINAPI RtlInitializeSRWLock( RTL_SRWLOCK *lock )
  */
 void WINAPI RtlAcquireSRWLockExclusive( RTL_SRWLOCK *lock )
 {
+    if (fast_acquire_srw_exclusive( lock ) != STATUS_NOT_IMPLEMENTED)
+        return;
+
     if (srwlock_lock_exclusive( (unsigned int *)&lock->Ptr, SRWLOCK_RES_EXCLUSIVE ))
         NtWaitForKeyedEvent( 0, srwlock_key_exclusive(lock), FALSE, NULL );
 }
@@ -1803,6 +2079,10 @@ void WINAPI RtlAcquireSRWLockExclusive( RTL_SRWLOCK *lock )
 void WINAPI RtlAcquireSRWLockShared( RTL_SRWLOCK *lock )
 {
     unsigned int val, tmp;
+
+    if (fast_acquire_srw_shared( lock ) != STATUS_NOT_IMPLEMENTED)
+        return;
+
     /* Acquires a shared lock. If it's currently not possible to add elements to
      * the shared queue, then request exclusive access instead. */
     for (val = *(unsigned int *)&lock->Ptr;; val = tmp)
@@ -1833,6 +2113,9 @@ void WINAPI RtlAcquireSRWLockShared( RTL_SRWLOCK *lock )
  */
 void WINAPI RtlReleaseSRWLockExclusive( RTL_SRWLOCK *lock )
 {
+    if (fast_release_srw_exclusive( lock ) != STATUS_NOT_IMPLEMENTED)
+        return;
+
     srwlock_leave_exclusive( lock, srwlock_unlock_exclusive( (unsigned int *)&lock->Ptr,
                              - SRWLOCK_RES_EXCLUSIVE ) - SRWLOCK_RES_EXCLUSIVE );
 }
@@ -1842,6 +2125,9 @@ void WINAPI RtlReleaseSRWLockExclusive( RTL_SRWLOCK *lock )
  */
 void WINAPI RtlReleaseSRWLockShared( RTL_SRWLOCK *lock )
 {
+    if (fast_release_srw_shared( lock ) != STATUS_NOT_IMPLEMENTED)
+        return;
+
     srwlock_leave_shared( lock, srwlock_lock_exclusive( (unsigned int *)&lock->Ptr,
                           - SRWLOCK_RES_SHARED ) - SRWLOCK_RES_SHARED );
 }
@@ -1855,6 +2141,11 @@ void WINAPI RtlReleaseSRWLockShared( RTL_SRWLOCK *lock )
  */
 BOOLEAN WINAPI RtlTryAcquireSRWLockExclusive( RTL_SRWLOCK *lock )
 {
+    NTSTATUS ret;
+
+    if ((ret = fast_try_acquire_srw_exclusive( lock )) != STATUS_NOT_IMPLEMENTED)
+        return (ret == STATUS_SUCCESS);
+
     return interlocked_cmpxchg( (int *)&lock->Ptr, SRWLOCK_MASK_IN_EXCLUSIVE |
                                 SRWLOCK_RES_EXCLUSIVE, 0 ) == 0;
 }
@@ -1865,6 +2156,11 @@ BOOLEAN WINAPI RtlTryAcquireSRWLockExclusive( RTL_SRWLOCK *lock )
 BOOLEAN WINAPI RtlTryAcquireSRWLockShared( RTL_SRWLOCK *lock )
 {
     unsigned int val, tmp;
+    NTSTATUS ret;
+
+    if ((ret = fast_try_acquire_srw_shared( lock )) != STATUS_NOT_IMPLEMENTED)
+        return (ret == STATUS_SUCCESS);
+
     for (val = *(unsigned int *)&lock->Ptr;; val = tmp)
     {
         if (val & SRWLOCK_MASK_EXCLUSIVE_QUEUE)
-- 
2.20.1




More information about the wine-devel mailing list