[PATCH 3/3] ntdll: Optimize get_vprot_range_size() for big ranges.

Paul Gofman pgofman at codeweavers.com
Thu Sep 16 12:06:39 CDT 2021


Signed-off-by: Paul Gofman <pgofman at codeweavers.com>
---
    This patch reduces the overhead for scanning huge ranges for the changed vprot
    greatly.
    Reduces the CPU overhead in DeathLoop which reserves a huge memory area and
    then commits some pages from within it, often calling VirtualQuery() for the
    addresses within the reserved area.

 dlls/ntdll/unix/virtual.c | 79 ++++++++++++++++++++++++++++++++++++---
 1 file changed, 74 insertions(+), 5 deletions(-)

diff --git a/dlls/ntdll/unix/virtual.c b/dlls/ntdll/unix/virtual.c
index 14ea3b11143..fce256a8267 100644
--- a/dlls/ntdll/unix/virtual.c
+++ b/dlls/ntdll/unix/virtual.c
@@ -947,17 +947,86 @@ static BYTE get_page_vprot( const void *addr )
  *           get_vprot_range_size
  *
  * Return the size of the region with equal masked protection byte.
+ * base and size should be page aligned.
  * The function assumes that base and size are page aligned and
  * base + size does not wrap around. */
 static SIZE_T get_vprot_range_size( BYTE *base, SIZE_T size, BYTE mask, BYTE *vprot )
 {
-    BYTE *addr;
+#define BYTES_IN_WORD sizeof(UINT64)
+    SIZE_T i, start_idx, end_idx, aligned_start_idx, aligned_end_idx, count;
+    static const UINT_PTR index_align_mask = BYTES_IN_WORD - 1;
+    UINT64 vprot_word, mask_word, changed_word;
+    const BYTE *vprot_ptr;
+    unsigned int j;
+#ifdef _WIN64
+    size_t idx_page;
+#endif
+    size_t idx;
+
+
+    TRACE("base %p, size %p, mask %#x.\n", base, (void *)size, mask);
 
-    *vprot = get_page_vprot( base );
-    for (addr = base + page_size; addr != base + size; addr += page_size)
-        if ((*vprot ^ get_page_vprot( addr )) & mask) break;
+    start_idx = (size_t)base >> page_shift;
+    end_idx = start_idx + (size >> page_shift);
+    idx = start_idx;
+#ifdef _WIN64
+    end_idx = min( end_idx, pages_vprot_size << pages_vprot_shift );
+    if (end_idx <= start_idx) return size;
+    idx_page = idx >> pages_vprot_shift;
+    idx &= pages_vprot_mask;
+    vprot_ptr = pages_vprot[idx_page];
+#else
+    vprot_ptr = pages_vprot;
+#endif
+
+    aligned_start_idx = (start_idx + index_align_mask) & ~index_align_mask;
+    if (aligned_start_idx > end_idx) aligned_start_idx = end_idx;
+
+    aligned_end_idx = end_idx & ~index_align_mask;
+    if (aligned_end_idx < aligned_start_idx) aligned_end_idx = aligned_start_idx;
+
+    /* Page count in zero level page table on x64 is at least the multiples of BYTES_IN_WORD
+     * so we don't have to worry about crossing the boundary on unaligned idx values. */
+    *vprot = vprot_ptr[idx];
+    count = aligned_start_idx - start_idx;
+
+    for (i = 0; i < count; ++i)
+        if ((*vprot ^ vprot_ptr[idx++]) & mask) return i << page_shift;
+
+    count += aligned_end_idx - aligned_start_idx;
+    vprot_word = 0x101010101010101ull * *vprot;
+    mask_word = 0x101010101010101ull * mask;
+    for (; i < count; i += 8)
+    {
+#ifdef _WIN64
+        if (idx >> pages_vprot_shift)
+        {
+            idx = 0;
+            vprot_ptr = pages_vprot[idx_page++];
+        }
+#endif
+        changed_word = (vprot_word ^ *(UINT64 *)(vprot_ptr + idx)) & mask_word;
+        if (changed_word)
+        {
+            for (j = 0; !((BYTE *)&changed_word)[j]; ++j) ++i;
+            return i << page_shift;
+        }
+        idx += 8;
+    }
+
+#ifdef _WIN64
+    if (aligned_end_idx != end_idx && (idx >> pages_vprot_shift))
+    {
+        idx = 0;
+        vprot_ptr = pages_vprot[idx_page++];
+    }
+#endif
+    count += end_idx - aligned_end_idx;
+    for (; i < count; ++i)
+        if ((*vprot ^ vprot_ptr[idx++]) & mask) return i << page_shift;
 
-    return addr - base;
+    return *vprot & mask ? count << page_shift : size;
+#undef BYTES_IN_WORD
 }
 
 /***********************************************************************
-- 
2.31.1




More information about the wine-devel mailing list