[PATCH 2/5] ntdll: Reimplement FLS data management on top of up to date structures.

Paul Gofman pgofman at codeweavers.com
Fri Oct 2 05:16:46 CDT 2020


Win10 18132 introduced increased FLS data limit and entirely changed
underlying structure.

Signed-off-by: Paul Gofman <pgofman at codeweavers.com>
---
 dlls/kernelbase/thread.c |   2 +-
 dlls/ntdll/loader.c      |   6 +-
 dlls/ntdll/ntdll_misc.h  |   3 +
 dlls/ntdll/thread.c      | 244 ++++++++++++++++++++++++++++++++-------
 include/winternl.h       |  28 ++++-
 5 files changed, 233 insertions(+), 50 deletions(-)

diff --git a/dlls/kernelbase/thread.c b/dlls/kernelbase/thread.c
index f2b746856e1..ffa05f8fe81 100644
--- a/dlls/kernelbase/thread.c
+++ b/dlls/kernelbase/thread.c
@@ -762,7 +762,7 @@ struct fiber_data
     CONTEXT               context;           /* 14/30 fiber context */
     DWORD                 flags;             /*       fiber flags */
     LPFIBER_START_ROUTINE start;             /*       start routine */
-    void                **fls_slots;         /*       fiber storage slots */
+    void                 *fls_slots;         /*       fiber storage slots */
 };
 
 extern void WINAPI switch_fiber( CONTEXT *old, CONTEXT *new );
diff --git a/dlls/ntdll/loader.c b/dlls/ntdll/loader.c
index cd27ce5658b..1d5ffc33f5d 100644
--- a/dlls/ntdll/loader.c
+++ b/dlls/ntdll/loader.c
@@ -168,7 +168,6 @@ static RTL_CRITICAL_SECTION peb_lock = { &peb_critsect_debug, -1, 0, 0, 0, 0 };
 static PEB_LDR_DATA ldr = { sizeof(ldr), TRUE };
 static RTL_BITMAP tls_bitmap;
 static RTL_BITMAP tls_expansion_bitmap;
-static RTL_BITMAP fls_bitmap;
 
 static WINE_MODREF *cached_modref;
 static WINE_MODREF *current_modref;
@@ -4005,7 +4004,6 @@ static NTSTATUS process_init(void)
     peb->FastPebLock        = &peb_lock;
     peb->TlsBitmap          = &tls_bitmap;
     peb->TlsExpansionBitmap = &tls_expansion_bitmap;
-    peb->FlsBitmap          = &fls_bitmap;
     peb->LoaderLock         = &loader_section;
     peb->OSMajorVersion     = 5;
     peb->OSMinorVersion     = 1;
@@ -4014,13 +4012,11 @@ static NTSTATUS process_init(void)
     peb->SessionId          = 1;
     peb->ProcessHeap        = RtlCreateHeap( HEAP_GROWABLE, NULL, 0, 0, NULL, NULL );
 
-    InitializeListHead( &peb->FlsListHead );
     RtlInitializeBitMap( &tls_bitmap, peb->TlsBitmapBits, sizeof(peb->TlsBitmapBits) * 8 );
     RtlInitializeBitMap( &tls_expansion_bitmap, peb->TlsExpansionBitmapBits,
                          sizeof(peb->TlsExpansionBitmapBits) * 8 );
-    RtlInitializeBitMap( &fls_bitmap, peb->FlsBitmapBits, sizeof(peb->FlsBitmapBits) * 8 );
     RtlSetBits( peb->TlsBitmap, 0, 1 ); /* TLS index 0 is reserved and should be initialized to NULL. */
-    RtlSetBits( peb->FlsBitmap, 0, 1 );
+    init_global_fls_data();
 
     InitializeListHead( &ldr.InLoadOrderModuleList );
     InitializeListHead( &ldr.InMemoryOrderModuleList );
diff --git a/dlls/ntdll/ntdll_misc.h b/dlls/ntdll/ntdll_misc.h
index 9e905a1bdc0..19bf574a446 100644
--- a/dlls/ntdll/ntdll_misc.h
+++ b/dlls/ntdll/ntdll_misc.h
@@ -127,4 +127,7 @@ static inline void ascii_to_unicode( WCHAR *dst, const char *src, size_t len )
     while (len--) *dst++ = (unsigned char)*src++;
 }
 
+/* FLS data */
+extern void init_global_fls_data(void) DECLSPEC_HIDDEN;
+
 #endif
diff --git a/dlls/ntdll/thread.c b/dlls/ntdll/thread.c
index 57292043582..a4509d82328 100644
--- a/dlls/ntdll/thread.c
+++ b/dlls/ntdll/thread.c
@@ -34,6 +34,7 @@
 #include "wine/exception.h"
 
 WINE_DECLARE_DEBUG_CHANNEL(relay);
+WINE_DECLARE_DEBUG_CHANNEL(thread);
 
 struct _KUSER_SHARED_DATA *user_shared_data = (void *)0x7ffe0000;
 
@@ -253,41 +254,128 @@ TEB_ACTIVE_FRAME * WINAPI RtlGetFrame(void)
  ***********************************************************************/
 
 
+static GLOBAL_FLS_DATA fls_data;
+
+#define MAX_FLS_DATA_COUNT 0xff0
+
+void DECLSPEC_HIDDEN init_global_fls_data(void)
+{
+    InitializeListHead( &fls_data.fls_list_head );
+}
+
+static void lock_fls_data(void)
+{
+    RtlAcquirePebLock();
+}
+
+static void unlock_fls_data(void)
+{
+    RtlReleasePebLock();
+}
+
+static unsigned int fls_chunk_size( unsigned int chunk_index )
+{
+    return 0x10 << chunk_index;
+}
+
+static unsigned int fls_index_from_chunk_index( unsigned int chunk_index, unsigned int index )
+{
+    return 0x10 * ((1 << chunk_index) - 1) + index;
+}
+
+static unsigned int fls_chunk_index_from_index( unsigned int index, unsigned int *index_in_chunk )
+{
+    unsigned int chunk_index = 0;
+
+    while (index >= fls_chunk_size( chunk_index ))
+        index -= fls_chunk_size( chunk_index++ );
+
+    *index_in_chunk = index;
+    return chunk_index;
+}
+
+static TEB_FLS_DATA * fls_alloc_data(void)
+{
+    TEB_FLS_DATA *fls;
+
+    if (!(fls = RtlAllocateHeap( GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*fls) )))
+        return NULL;
+
+    lock_fls_data();
+    InsertTailList( &fls_data.fls_list_head, &fls->fls_list_entry );
+    unlock_fls_data();
+
+    return fls;
+}
+
+
 /***********************************************************************
  *              RtlFlsAlloc  (NTDLL.@)
  */
 NTSTATUS WINAPI DECLSPEC_HOTPATCH RtlFlsAlloc( PFLS_CALLBACK_FUNCTION callback, ULONG *ret_index )
 {
-    PEB * const peb = NtCurrentTeb()->Peb;
-    NTSTATUS status = STATUS_NO_MEMORY;
-    DWORD index;
+    unsigned int chunk_index, index, i;
+    FLS_INFO_CHUNK *chunk;
+    TEB_FLS_DATA *fls;
 
-    RtlAcquirePebLock();
-    if (peb->FlsCallback ||
-        (peb->FlsCallback = RtlAllocateHeap( GetProcessHeap(), HEAP_ZERO_MEMORY,
-                                        8 * sizeof(peb->FlsBitmapBits) * sizeof(void*) )))
+    if (!(fls = NtCurrentTeb()->FlsSlots)
+            && !(NtCurrentTeb()->FlsSlots = fls = fls_alloc_data()))
+        return STATUS_NO_MEMORY;
+
+    lock_fls_data();
+    for (i = 0; i < ARRAY_SIZE(fls_data.fls_callback_chunks); ++i)
+    {
+        if (!fls_data.fls_callback_chunks[i] || fls_data.fls_callback_chunks[i]->count < fls_chunk_size( i ))
+            break;
+    }
+
+    if ((chunk_index = i) == ARRAY_SIZE(fls_data.fls_callback_chunks))
     {
-        index = RtlFindClearBitsAndSet( peb->FlsBitmap, 1, 1 );
-        if (index != ~0U)
+        unlock_fls_data();
+        return STATUS_NO_MEMORY;
+    }
+
+    if ((chunk = fls_data.fls_callback_chunks[chunk_index]))
+    {
+        for (index = 0; i < fls_chunk_size( chunk_index ); ++index)
+            if (!chunk->callbacks[index].callback)
+                break;
+        assert( index < fls_chunk_size( chunk_index ));
+    }
+    else
+    {
+        fls_data.fls_callback_chunks[chunk_index] = chunk = RtlAllocateHeap( GetProcessHeap(), HEAP_ZERO_MEMORY,
+                offsetof(FLS_INFO_CHUNK, callbacks) + sizeof(*chunk->callbacks) * fls_chunk_size( chunk_index ));
+        if (!chunk)
         {
-            if (!NtCurrentTeb()->FlsSlots &&
-                !(NtCurrentTeb()->FlsSlots = RtlAllocateHeap( GetProcessHeap(), HEAP_ZERO_MEMORY,
-                                                        8 * sizeof(peb->FlsBitmapBits) * sizeof(void*) )))
-            {
-                RtlClearBits( peb->FlsBitmap, index, 1 );
-            }
-            else
-            {
-                NtCurrentTeb()->FlsSlots[index] = 0; /* clear the value */
-                peb->FlsCallback[index] = callback;
-                status = STATUS_SUCCESS;
-            }
+            unlock_fls_data();
+            return STATUS_NO_MEMORY;
+        }
+
+        if (chunk_index)
+        {
+            index = 0;
+        }
+        else
+        {
+            chunk->count = 1; /* FLS index 0 is prohibited. */
+            chunk->callbacks[0].callback = (void *)~(ULONG_PTR)0;
+            index = 1;
         }
     }
-    RtlReleasePebLock();
-    if (!status)
-        *ret_index = index;
-    return status;
+
+    ++chunk->count;
+    chunk->callbacks[index].callback = callback ? callback : (void *)~(ULONG_PTR)0;
+
+    if ((*ret_index = fls_index_from_chunk_index( chunk_index, index )) > fls_data.fls_high_index)
+        fls_data.fls_high_index = *ret_index;
+
+    unlock_fls_data();
+
+    if (fls->fls_data_chunks[chunk_index])
+        fls->fls_data_chunks[chunk_index][index + 1] = NULL;
+
+    return STATUS_SUCCESS;
 }
 
 
@@ -296,20 +384,51 @@ NTSTATUS WINAPI DECLSPEC_HOTPATCH RtlFlsAlloc( PFLS_CALLBACK_FUNCTION callback,
  */
 NTSTATUS WINAPI DECLSPEC_HOTPATCH RtlFlsFree( ULONG index )
 {
-    NTSTATUS status;
+    unsigned int chunk_index, idx;
+    FLS_INFO_CHUNK *chunk;
+    TEB_FLS_DATA *fls;
 
-    RtlAcquirePebLock();
-    if (RtlAreBitsSet( NtCurrentTeb()->Peb->FlsBitmap, index, 1 ))
+    lock_fls_data();
+
+    if (!index || index > fls_data.fls_high_index)
+    {
+        unlock_fls_data();
+        return STATUS_INVALID_PARAMETER;
+    }
+
+    chunk_index = fls_chunk_index_from_index( index, &idx );
+    if (!(chunk = fls_data.fls_callback_chunks[chunk_index])
+            || !chunk->callbacks[idx].callback)
+    {
+        unlock_fls_data();
+        return STATUS_INVALID_PARAMETER;
+    }
+
+    if ((fls = NtCurrentTeb()->FlsSlots) && fls->fls_data_chunks[chunk_index])
     {
-        RtlClearBits( NtCurrentTeb()->Peb->FlsBitmap, index, 1 );
         /* FIXME: call Fls callback */
-        /* FIXME: add equivalent of ThreadZeroTlsCell here */
-        if (NtCurrentTeb()->FlsSlots) NtCurrentTeb()->FlsSlots[index] = 0;
-        status = STATUS_SUCCESS;
+        fls->fls_data_chunks[chunk_index][idx + 1] = NULL;
     }
-    else status = STATUS_INVALID_PARAMETER;
-    RtlReleasePebLock();
-    return status;
+
+    --chunk->count;
+    chunk->callbacks[idx].callback = NULL;
+
+    if (index == fls_data.fls_high_index)
+    {
+        while (--fls_data.fls_high_index)
+        {
+            if (idx)
+                --idx;
+            else
+                idx = fls_chunk_size( --chunk_index ) - 1;
+
+            if (chunk->callbacks[idx].callback)
+                break;
+        }
+    }
+
+    unlock_fls_data();
+    return STATUS_SUCCESS;
 }
 
 
@@ -318,15 +437,25 @@ NTSTATUS WINAPI DECLSPEC_HOTPATCH RtlFlsFree( ULONG index )
  */
 NTSTATUS WINAPI DECLSPEC_HOTPATCH RtlFlsSetValue( ULONG index, void *data )
 {
-    if (!index || index >= 8 * sizeof(NtCurrentTeb()->Peb->FlsBitmapBits))
+    unsigned int chunk_index, idx;
+    TEB_FLS_DATA *fls;
+
+    if (!index || index >= MAX_FLS_DATA_COUNT)
         return STATUS_INVALID_PARAMETER;
 
-    if (!NtCurrentTeb()->FlsSlots &&
-        !(NtCurrentTeb()->FlsSlots = RtlAllocateHeap( GetProcessHeap(), HEAP_ZERO_MEMORY,
-                                        8 * sizeof(NtCurrentTeb()->Peb->FlsBitmapBits) * sizeof(void*) )))
+    if (!(fls = NtCurrentTeb()->FlsSlots)
+            && !(NtCurrentTeb()->FlsSlots = fls = fls_alloc_data()))
+        return STATUS_NO_MEMORY;
+
+    chunk_index = fls_chunk_index_from_index( index, &idx );
+
+    if (!fls->fls_data_chunks[chunk_index] &&
+            !(fls->fls_data_chunks[chunk_index] = RtlAllocateHeap( GetProcessHeap(), HEAP_ZERO_MEMORY,
+            (fls_chunk_size( chunk_index ) + 1) * sizeof(*fls->fls_data_chunks[chunk_index]) )))
         return STATUS_NO_MEMORY;
 
-    NtCurrentTeb()->FlsSlots[index] = data;
+    fls->fls_data_chunks[chunk_index][idx + 1] = data;
+
     return STATUS_SUCCESS;
 }
 
@@ -336,11 +465,15 @@ NTSTATUS WINAPI DECLSPEC_HOTPATCH RtlFlsSetValue( ULONG index, void *data )
  */
 NTSTATUS WINAPI DECLSPEC_HOTPATCH RtlFlsGetValue( ULONG index, void **data )
 {
-    if (!index || index >= 8 * sizeof(NtCurrentTeb()->Peb->FlsBitmapBits) || !NtCurrentTeb()->FlsSlots)
+    unsigned int chunk_index, idx;
+    TEB_FLS_DATA *fls;
+
+    if (!index || index >= MAX_FLS_DATA_COUNT || !(fls = NtCurrentTeb()->FlsSlots))
         return STATUS_INVALID_PARAMETER;
 
-    *data = NtCurrentTeb()->FlsSlots[index];
+    chunk_index = fls_chunk_index_from_index( index, &idx );
 
+    *data = fls->fls_data_chunks[chunk_index] ? fls->fls_data_chunks[chunk_index][idx + 1] : NULL;
     return STATUS_SUCCESS;
 }
 
@@ -350,6 +483,31 @@ NTSTATUS WINAPI DECLSPEC_HOTPATCH RtlFlsGetValue( ULONG index, void **data )
  */
 void WINAPI DECLSPEC_HOTPATCH RtlProcessFlsData( void *teb_fls_data, ULONG flags )
 {
+    TEB_FLS_DATA *fls = teb_fls_data;
+    unsigned int i;
+
+    TRACE_(thread)( "teb_fls_data %p, flags %#x.\n", teb_fls_data, flags );
+
+    if (flags & ~3)
+        FIXME_(thread)( "Unknown flags %#x.\n", flags );
+
+    if (!fls)
+        return;
+
+    if (flags & 1)
+    {
+        lock_fls_data();
+        /* Not using RemoveEntryList() as accoring to the test Windows does not zero list entry here. */
+        fls->fls_list_entry.Flink->Blink = fls->fls_list_entry.Blink;
+        fls->fls_list_entry.Blink->Flink = fls->fls_list_entry.Flink;
+        unlock_fls_data();
+    }
+
     if (flags & 2)
-        RtlFreeHeap( GetProcessHeap(), 0, teb_fls_data );
+    {
+        for (i = 0; i < ARRAY_SIZE(fls->fls_data_chunks); ++i)
+            RtlFreeHeap( GetProcessHeap(), 0, fls->fls_data_chunks[i] );
+
+        RtlFreeHeap( GetProcessHeap(), 0, fls );
+    }
 }
diff --git a/include/winternl.h b/include/winternl.h
index 75324635ed7..89ea58c2d93 100644
--- a/include/winternl.h
+++ b/include/winternl.h
@@ -265,6 +265,32 @@ typedef struct _TEB_ACTIVE_FRAME_EX
     void            *ExtensionIdentifier;
 } TEB_ACTIVE_FRAME_EX, *PTEB_ACTIVE_FRAME_EX;
 
+typedef struct _FLS_CALLBACK
+{
+    void                  *unknown;
+    PFLS_CALLBACK_FUNCTION callback; /* ~0 if NULL callback is set, NULL if FLS index is free. */
+} FLS_CALLBACK, *PFLS_CALLBACK;
+
+typedef struct _FLS_INFO_CHUNK
+{
+    ULONG           count;         /* number of allocated FLS indexes in the chunk. */
+    FLS_CALLBACK    callbacks[1];  /* the size is 0x10 for chunk 0 and is twice as
+                                    * the previous chunk size for the rest. */
+} FLS_INFO_CHUNK, *PFLS_INFO_CHUNK;
+
+typedef struct _GLOBAL_FLS_DATA
+{
+    FLS_INFO_CHUNK *fls_callback_chunks[8];
+    LIST_ENTRY      fls_list_head;
+    ULONG           fls_high_index;
+} GLOBAL_FLS_DATA, *PGLOBAL_FLS_DATA;
+
+typedef struct _TEB_FLS_DATA
+{
+    LIST_ENTRY      fls_list_entry;
+    void          **fls_data_chunks[8];
+} TEB_FLS_DATA, *PTEB_FLS_DATA;
+
 #define TEB_ACTIVE_FRAME_CONTEXT_FLAG_EXTENDED 0x00000001
 #define TEB_ACTIVE_FRAME_FLAG_EXTENDED         0x00000001
 
@@ -446,7 +472,7 @@ typedef struct _TEB
     ULONG                        HeapVirtualAffinity;               /* fa8/17b0 */
     PVOID                        CurrentTransactionHandle;          /* fac/17b8 */
     TEB_ACTIVE_FRAME            *ActiveFrame;                       /* fb0/17c0 */
-    PVOID                       *FlsSlots;                          /* fb4/17c8 */
+    TEB_FLS_DATA                *FlsSlots;                          /* fb4/17c8 */
     PVOID                        PreferredLanguages;                /* fb8/17d0 */
     PVOID                        UserPrefLanguages;                 /* fbc/17d8 */
     PVOID                        MergedPrefLanguages;               /* fc0/17e0 */
-- 
2.26.2




More information about the wine-devel mailing list