[PATCH v2 3/3] ntdll: Optimize get_vprot_range_size() for big ranges.
Paul Gofman
pgofman at codeweavers.com
Thu Sep 16 12:50:05 CDT 2021
Signed-off-by: Paul Gofman <pgofman at codeweavers.com>
---
This patch reduces the overhead for scanning huge ranges for the changed vprot
greatly.
Improves DeathLoop performance which reserves a huge memory area and
then commits some pages from within it, often calling VirtualQuery() for the
addresses within the allocated area.
dlls/ntdll/unix/virtual.c | 80 ++++++++++++++++++++++++++++++++++++---
1 file changed, 75 insertions(+), 5 deletions(-)
diff --git a/dlls/ntdll/unix/virtual.c b/dlls/ntdll/unix/virtual.c
index 14ea3b11143..96ecce3daf4 100644
--- a/dlls/ntdll/unix/virtual.c
+++ b/dlls/ntdll/unix/virtual.c
@@ -947,17 +947,87 @@ static BYTE get_page_vprot( const void *addr )
* get_vprot_range_size
*
* Return the size of the region with equal masked protection byte.
+ * base and size should be page aligned.
* The function assumes that base and size are page aligned and
* base + size does not wrap around. */
static SIZE_T get_vprot_range_size( BYTE *base, SIZE_T size, BYTE mask, BYTE *vprot )
{
- BYTE *addr;
+ SIZE_T i, start_idx, end_idx, aligned_start_idx, aligned_end_idx, count;
+ static const UINT_PTR index_align_mask = 7;
+ UINT64 vprot_word, mask_word, changed_word;
+ const BYTE *vprot_ptr;
+#ifdef _WIN64
+ size_t idx_page;
+#endif
+ unsigned int j;
+ size_t idx;
+
+ TRACE("base %p, size %p, mask %#x.\n", base, (void *)size, mask);
+
+ start_idx = (size_t)base >> page_shift;
+ end_idx = start_idx + (size >> page_shift);
+ idx = start_idx;
+#ifdef _WIN64
+ end_idx = min( end_idx, pages_vprot_size << pages_vprot_shift );
+ if (end_idx <= start_idx)
+ {
+ *vprot = 0;
+ return size;
+ }
+ idx_page = idx >> pages_vprot_shift;
+ idx &= pages_vprot_mask;
+ vprot_ptr = pages_vprot[idx_page++];
+#else
+ vprot_ptr = pages_vprot;
+#endif
+
+ aligned_start_idx = (start_idx + index_align_mask) & ~index_align_mask;
+ if (aligned_start_idx > end_idx) aligned_start_idx = end_idx;
+
+ aligned_end_idx = end_idx & ~index_align_mask;
+ if (aligned_end_idx < aligned_start_idx) aligned_end_idx = aligned_start_idx;
+
+ /* Page count in zero level page table on x64 is at least the multiples of 8
+ * so we don't have to worry about crossing the boundary on unaligned idx values. */
+ *vprot = vprot_ptr[idx];
+ count = aligned_start_idx - start_idx;
+
+ for (i = 0; i < count; ++i)
+ if ((*vprot ^ vprot_ptr[idx++]) & mask) return i << page_shift;
- *vprot = get_page_vprot( base );
- for (addr = base + page_size; addr != base + size; addr += page_size)
- if ((*vprot ^ get_page_vprot( addr )) & mask) break;
+ count += aligned_end_idx - aligned_start_idx;
+ vprot_word = 0x101010101010101ull * *vprot;
+ mask_word = 0x101010101010101ull * mask;
+ for (; i < count; i += 8)
+ {
+#ifdef _WIN64
+ if (idx >> pages_vprot_shift)
+ {
+ idx = 0;
+ vprot_ptr = pages_vprot[idx_page++];
+ }
+#endif
+ changed_word = (vprot_word ^ *(UINT64 *)(vprot_ptr + idx)) & mask_word;
+ if (changed_word)
+ {
+ for (j = 0; !((BYTE *)&changed_word)[j]; ++j) ++i;
+ return i << page_shift;
+ }
+ idx += 8;
+ }
+
+#ifdef _WIN64
+ if (aligned_end_idx != end_idx && (idx >> pages_vprot_shift))
+ {
+ idx = 0;
+ vprot_ptr = pages_vprot[idx_page];
+ }
+#endif
+ count += end_idx - aligned_end_idx;
+ for (; i < count; ++i)
+ if ((*vprot ^ vprot_ptr[idx++]) & mask) return i << page_shift;
- return addr - base;
+ return *vprot & mask ? count << page_shift : size;
}
/***********************************************************************
--
2.31.1
More information about the wine-devel
mailing list