[PATCH 2/3] ntdll: Add a futex-based implementation of SRW locks.
Zebediah Figura
zfigura at codeweavers.com
Mon Jun 10 10:07:53 CDT 2019
Signed-off-by: Zebediah Figura <zfigura at codeweavers.com>
---
This was written for Shadow of the Tomb Raider, which makes heavy use of SRW
locks. However, it's not particularly clear that it improves performance there.
dlls/ntdll/sync.c | 310 ++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 303 insertions(+), 7 deletions(-)
diff --git a/dlls/ntdll/sync.c b/dlls/ntdll/sync.c
index 9c62f8eb6c5f..c7a96b63c3ae 100644
--- a/dlls/ntdll/sync.c
+++ b/dlls/ntdll/sync.c
@@ -61,7 +61,7 @@
#include "wine/debug.h"
#include "ntdll_misc.h"
-WINE_DEFAULT_DEBUG_CHANNEL(ntdll);
+WINE_DEFAULT_DEBUG_CHANNEL(sync);
HANDLE keyed_event = NULL;
@@ -71,17 +71,31 @@ static const LARGE_INTEGER zero_timeout;
#ifdef __linux__
-static int wait_op = 128; /*FUTEX_WAIT|FUTEX_PRIVATE_FLAG*/
-static int wake_op = 129; /*FUTEX_WAKE|FUTEX_PRIVATE_FLAG*/
+#define FUTEX_WAIT 0
+#define FUTEX_WAKE 1
+#define FUTEX_WAIT_BITSET 9
+#define FUTEX_WAKE_BITSET 10
+
+static int futex_private = 128;
static inline int futex_wait( const int *addr, int val, struct timespec *timeout )
{
- return syscall( __NR_futex, addr, wait_op, val, timeout, 0, 0 );
+ return syscall( __NR_futex, addr, FUTEX_WAIT | futex_private, val, timeout, 0, 0 );
}
static inline int futex_wake( const int *addr, int val )
{
- return syscall( __NR_futex, addr, wake_op, val, NULL, 0, 0 );
+ return syscall( __NR_futex, addr, FUTEX_WAKE | futex_private, val, NULL, 0, 0 );
+}
+
+static inline int futex_wait_bitset( const int *addr, int val, struct timespec *timeout, int mask )
+{
+ return syscall( __NR_futex, addr, FUTEX_WAIT_BITSET | futex_private, val, timeout, 0, mask );
+}
+
+static inline int futex_wake_bitset( const int *addr, int val, int mask )
+{
+ return syscall( __NR_futex, addr, FUTEX_WAKE_BITSET | futex_private, val, NULL, 0, mask );
}
static inline int use_futexes(void)
@@ -93,8 +107,7 @@ static inline int use_futexes(void)
futex_wait( &supported, 10, NULL );
if (errno == ENOSYS)
{
- wait_op = 0; /*FUTEX_WAIT*/
- wake_op = 1; /*FUTEX_WAKE*/
+ futex_private = 0;
futex_wait( &supported, 10, NULL );
}
supported = (errno != ENOSYS);
@@ -1642,6 +1655,266 @@ DWORD WINAPI RtlRunOnceExecuteOnce( RTL_RUN_ONCE *once, PRTL_RUN_ONCE_INIT_FN fu
return RtlRunOnceComplete( once, 0, context ? *context : NULL );
}
+#ifdef __linux__
+
+/* Futex-based SRW lock implementation:
+ *
+ * Since we can rely on the kernel to release all threads and don't need to
+ * worry about NtReleaseKeyedEvent(), we can simplify the layout a bit. The
+ * layout looks like this:
+ *
+ * 31 - Exclusive lock bit, set if the resource is owned exclusively.
+ * 30-16 - Number of exclusive waiters. Unlike the fallback implementation,
+ * this does not include the thread owning the lock, or shared threads
+ * waiting on the lock.
+ * 15-0 - Number of shared owners. Unlike the fallback implementation, this
+ * does not include the number of shared threads waiting on the lock.
+ * Thus the state [1, x, >=1] will never occur.
+ */
+
+#define SRWLOCK_FUTEX_EXCLUSIVE_LOCK_BIT 0x80000000
+#define SRWLOCK_FUTEX_EXCLUSIVE_WAITERS_MASK 0x7fff0000
+#define SRWLOCK_FUTEX_EXCLUSIVE_WAITERS_INC 0x00010000
+#define SRWLOCK_FUTEX_SHARED_OWNERS_MASK 0x0000ffff
+#define SRWLOCK_FUTEX_SHARED_OWNERS_INC 0x00000001
+
+/* Futex bitmasks; these are independent from the bits in the lock itself. */
+#define SRWLOCK_FUTEX_BITSET_EXCLUSIVE 1
+#define SRWLOCK_FUTEX_BITSET_SHARED 2
+
+static NTSTATUS fast_try_acquire_srw_exclusive( RTL_SRWLOCK *lock )
+{
+ int old, new;
+ NTSTATUS ret;
+
+ if (!use_futexes()) return STATUS_NOT_IMPLEMENTED;
+
+ do
+ {
+ old = *(int *)lock;
+
+ if (!(old & SRWLOCK_FUTEX_EXCLUSIVE_LOCK_BIT)
+ && !(old & SRWLOCK_FUTEX_SHARED_OWNERS_MASK))
+ {
+ /* Not locked exclusive or shared. We can try to grab it. */
+ new = old | SRWLOCK_FUTEX_EXCLUSIVE_LOCK_BIT;
+ ret = STATUS_SUCCESS;
+ }
+ else
+ {
+ new = old;
+ ret = STATUS_TIMEOUT;
+ }
+ } while (interlocked_cmpxchg( (int *)lock, new, old ) != old);
+
+ return ret;
+}
+
+static NTSTATUS fast_acquire_srw_exclusive( RTL_SRWLOCK *lock )
+{
+ int old, new;
+ BOOLEAN wait;
+
+ if (!use_futexes()) return STATUS_NOT_IMPLEMENTED;
+
+ /* Atomically increment the exclusive waiter count. */
+ do
+ {
+ old = *(int *)lock;
+ new = old + SRWLOCK_FUTEX_EXCLUSIVE_WAITERS_INC;
+ assert(new & SRWLOCK_FUTEX_EXCLUSIVE_WAITERS_MASK);
+ } while (interlocked_cmpxchg( (int *)lock, new, old ) != old);
+
+ for (;;)
+ {
+ do
+ {
+ old = *(int *)lock;
+
+ if (!(old & SRWLOCK_FUTEX_EXCLUSIVE_LOCK_BIT)
+ && !(old & SRWLOCK_FUTEX_SHARED_OWNERS_MASK))
+ {
+ /* Not locked exclusive or shared. We can try to grab it. */
+ new = old | SRWLOCK_FUTEX_EXCLUSIVE_LOCK_BIT;
+ assert(old & SRWLOCK_FUTEX_EXCLUSIVE_WAITERS_MASK);
+ new -= SRWLOCK_FUTEX_EXCLUSIVE_WAITERS_INC;
+ wait = FALSE;
+ }
+ else
+ {
+ new = old;
+ wait = TRUE;
+ }
+ } while (interlocked_cmpxchg( (int *)lock, new, old ) != old);
+
+ if (!wait)
+ return STATUS_SUCCESS;
+
+ futex_wait_bitset( (int *)lock, new, NULL, SRWLOCK_FUTEX_BITSET_EXCLUSIVE );
+ }
+
+ return STATUS_SUCCESS;
+}
+
+static NTSTATUS fast_try_acquire_srw_shared( RTL_SRWLOCK *lock )
+{
+ int new, old;
+ NTSTATUS ret;
+
+ if (!use_futexes()) return STATUS_NOT_IMPLEMENTED;
+
+ do
+ {
+ old = *(int *)lock;
+
+ if (!(old & SRWLOCK_FUTEX_EXCLUSIVE_LOCK_BIT)
+ && !(old & SRWLOCK_FUTEX_EXCLUSIVE_WAITERS_MASK))
+ {
+ /* Not locked exclusive, and no exclusive waiters. We can try to
+ * grab it. */
+ new = old + SRWLOCK_FUTEX_SHARED_OWNERS_INC;
+ assert(new & SRWLOCK_FUTEX_SHARED_OWNERS_MASK);
+ ret = STATUS_SUCCESS;
+ }
+ else
+ {
+ new = old;
+ ret = STATUS_TIMEOUT;
+ }
+ } while (interlocked_cmpxchg( (int *)lock, new, old ) != old);
+
+ return ret;
+}
+
+static NTSTATUS fast_acquire_srw_shared( RTL_SRWLOCK *lock )
+{
+ int old, new;
+ BOOLEAN wait;
+
+ if (!use_futexes()) return STATUS_NOT_IMPLEMENTED;
+
+ for (;;)
+ {
+ do
+ {
+ old = *(int *)lock;
+
+ if (!(old & SRWLOCK_FUTEX_EXCLUSIVE_LOCK_BIT)
+ && !(old & SRWLOCK_FUTEX_EXCLUSIVE_WAITERS_MASK))
+ {
+ /* Not locked exclusive, and no exclusive waiters. We can try
+ * to grab it. */
+ new = old + SRWLOCK_FUTEX_SHARED_OWNERS_INC;
+ assert(new & SRWLOCK_FUTEX_SHARED_OWNERS_MASK);
+ wait = FALSE;
+ }
+ else
+ {
+ new = old;
+ wait = TRUE;
+ }
+ } while (interlocked_cmpxchg( (int *)lock, new, old ) != old);
+
+ if (!wait)
+ return STATUS_SUCCESS;
+
+ futex_wait_bitset( (int *)lock, new, NULL, SRWLOCK_FUTEX_BITSET_SHARED );
+ }
+
+ return STATUS_SUCCESS;
+}
+
+static NTSTATUS fast_release_srw_exclusive( RTL_SRWLOCK *lock )
+{
+ int old, new;
+
+ if (!use_futexes()) return STATUS_NOT_IMPLEMENTED;
+
+ do
+ {
+ old = *(int *)lock;
+
+ if (!(old & SRWLOCK_FUTEX_EXCLUSIVE_LOCK_BIT))
+ {
+ ERR("Lock %p is not owned exclusive! (%#x)\n", lock, *(int *)lock);
+ return STATUS_RESOURCE_NOT_OWNED;
+ }
+
+ new = old & ~SRWLOCK_FUTEX_EXCLUSIVE_LOCK_BIT;
+ } while (interlocked_cmpxchg( (int *)lock, new, old ) != old);
+
+ if (new & SRWLOCK_FUTEX_EXCLUSIVE_WAITERS_MASK)
+ futex_wake_bitset( (int *)lock, 1, SRWLOCK_FUTEX_BITSET_EXCLUSIVE );
+ else
+ futex_wake_bitset( (int *)lock, INT_MAX, SRWLOCK_FUTEX_BITSET_SHARED );
+
+ return STATUS_SUCCESS;
+}
+
+static NTSTATUS fast_release_srw_shared( RTL_SRWLOCK *lock )
+{
+ int old, new;
+
+ if (!use_futexes()) return STATUS_NOT_IMPLEMENTED;
+
+ do
+ {
+ old = *(int *)lock;
+
+ if (old & SRWLOCK_FUTEX_EXCLUSIVE_LOCK_BIT)
+ {
+ ERR("Lock %p is owned exclusive! (%#x)\n", lock, *(int *)lock);
+ return STATUS_RESOURCE_NOT_OWNED;
+ }
+ else if (!(old & SRWLOCK_FUTEX_SHARED_OWNERS_MASK))
+ {
+ ERR("Lock %p is not owned shared! (%#x)\n", lock, *(int *)lock);
+ return STATUS_RESOURCE_NOT_OWNED;
+ }
+
+ new = old - SRWLOCK_FUTEX_SHARED_OWNERS_INC;
+ } while (interlocked_cmpxchg( (int *)lock, new, old ) != old);
+
+ /* Optimization: only bother waking if there are actually exclusive waiters. */
+ if (!(new & SRWLOCK_FUTEX_SHARED_OWNERS_MASK) && (new & SRWLOCK_FUTEX_EXCLUSIVE_WAITERS_MASK))
+ futex_wake_bitset( (int *)lock, 1, SRWLOCK_FUTEX_BITSET_EXCLUSIVE );
+
+ return STATUS_SUCCESS;
+}
+
+#else
+
+static NTSTATUS fast_try_acquire_srw_exclusive( RTL_SRWLOCK *lock )
+{
+ return STATUS_NOT_IMPLEMENTED;
+}
+
+static NTSTATUS fast_acquire_srw_exclusive( RTL_SRWLOCK *lock )
+{
+ return STATUS_NOT_IMPLEMENTED;
+}
+
+static NTSTATUS fast_try_acquire_srw_shared( RTL_SRWLOCK *lock )
+{
+ return STATUS_NOT_IMPLEMENTED;
+}
+
+static NTSTATUS fast_acquire_srw_shared( RTL_SRWLOCK *lock )
+{
+ return STATUS_NOT_IMPLEMENTED;
+}
+
+static NTSTATUS fast_release_srw_exclusive( RTL_SRWLOCK *lock )
+{
+ return STATUS_NOT_IMPLEMENTED;
+}
+
+static NTSTATUS fast_release_srw_shared( RTL_SRWLOCK *lock )
+{
+ return STATUS_NOT_IMPLEMENTED;
+}
+
+#endif
/* SRW locks implementation
*
@@ -1789,6 +2062,9 @@ void WINAPI RtlInitializeSRWLock( RTL_SRWLOCK *lock )
*/
void WINAPI RtlAcquireSRWLockExclusive( RTL_SRWLOCK *lock )
{
+ if (fast_acquire_srw_exclusive( lock ) != STATUS_NOT_IMPLEMENTED)
+ return;
+
if (srwlock_lock_exclusive( (unsigned int *)&lock->Ptr, SRWLOCK_RES_EXCLUSIVE ))
NtWaitForKeyedEvent( 0, srwlock_key_exclusive(lock), FALSE, NULL );
}
@@ -1803,6 +2079,10 @@ void WINAPI RtlAcquireSRWLockExclusive( RTL_SRWLOCK *lock )
void WINAPI RtlAcquireSRWLockShared( RTL_SRWLOCK *lock )
{
unsigned int val, tmp;
+
+ if (fast_acquire_srw_shared( lock ) != STATUS_NOT_IMPLEMENTED)
+ return;
+
/* Acquires a shared lock. If it's currently not possible to add elements to
* the shared queue, then request exclusive access instead. */
for (val = *(unsigned int *)&lock->Ptr;; val = tmp)
@@ -1833,6 +2113,9 @@ void WINAPI RtlAcquireSRWLockShared( RTL_SRWLOCK *lock )
*/
void WINAPI RtlReleaseSRWLockExclusive( RTL_SRWLOCK *lock )
{
+ if (fast_release_srw_exclusive( lock ) != STATUS_NOT_IMPLEMENTED)
+ return;
+
srwlock_leave_exclusive( lock, srwlock_unlock_exclusive( (unsigned int *)&lock->Ptr,
- SRWLOCK_RES_EXCLUSIVE ) - SRWLOCK_RES_EXCLUSIVE );
}
@@ -1842,6 +2125,9 @@ void WINAPI RtlReleaseSRWLockExclusive( RTL_SRWLOCK *lock )
*/
void WINAPI RtlReleaseSRWLockShared( RTL_SRWLOCK *lock )
{
+ if (fast_release_srw_shared( lock ) != STATUS_NOT_IMPLEMENTED)
+ return;
+
srwlock_leave_shared( lock, srwlock_lock_exclusive( (unsigned int *)&lock->Ptr,
- SRWLOCK_RES_SHARED ) - SRWLOCK_RES_SHARED );
}
@@ -1855,6 +2141,11 @@ void WINAPI RtlReleaseSRWLockShared( RTL_SRWLOCK *lock )
*/
BOOLEAN WINAPI RtlTryAcquireSRWLockExclusive( RTL_SRWLOCK *lock )
{
+ NTSTATUS ret;
+
+ if ((ret = fast_try_acquire_srw_exclusive( lock )) != STATUS_NOT_IMPLEMENTED)
+ return (ret == STATUS_SUCCESS);
+
return interlocked_cmpxchg( (int *)&lock->Ptr, SRWLOCK_MASK_IN_EXCLUSIVE |
SRWLOCK_RES_EXCLUSIVE, 0 ) == 0;
}
@@ -1865,6 +2156,11 @@ BOOLEAN WINAPI RtlTryAcquireSRWLockExclusive( RTL_SRWLOCK *lock )
BOOLEAN WINAPI RtlTryAcquireSRWLockShared( RTL_SRWLOCK *lock )
{
unsigned int val, tmp;
+ NTSTATUS ret;
+
+ if ((ret = fast_try_acquire_srw_shared( lock )) != STATUS_NOT_IMPLEMENTED)
+ return (ret == STATUS_SUCCESS);
+
for (val = *(unsigned int *)&lock->Ptr;; val = tmp)
{
if (val & SRWLOCK_MASK_EXCLUSIVE_QUEUE)
--
2.20.1
More information about the wine-devel
mailing list