[PATCH v4 3/3] ntdll: Optimize get_vprot_range_size() for big ranges.
Paul Gofman
pgofman at codeweavers.com
Mon Sep 20 14:38:51 CDT 2021
Signed-off-by: Paul Gofman <pgofman at codeweavers.com>
---
dlls/ntdll/unix/virtual.c | 67 ++++++++++++++++++++++++++++++++++++---
1 file changed, 62 insertions(+), 5 deletions(-)
diff --git a/dlls/ntdll/unix/virtual.c b/dlls/ntdll/unix/virtual.c
index fef66061d4a..b8213fd74f5 100644
--- a/dlls/ntdll/unix/virtual.c
+++ b/dlls/ntdll/unix/virtual.c
@@ -947,17 +947,74 @@ static BYTE get_page_vprot( const void *addr )
* get_vprot_range_size
*
* Return the size of the region with equal masked protection byte.
+ * base and size should be page aligned.
* The function assumes that base and size are page aligned and
* base + size does not wrap around. */
static SIZE_T get_vprot_range_size( char *base, SIZE_T size, BYTE mask, BYTE *vprot )
{
- char *addr;
+#define BYTES_IN_WORD sizeof(UINT64)
+ static const UINT_PTR index_align_mask = BYTES_IN_WORD - 1;
+ static const UINT64 word_from_byte = 0x101010101010101ull;
+ SIZE_T i, start_idx, end_idx, aligned_start_idx;
+ UINT64 vprot_word, mask_word, changed_word;
+ const BYTE *vprot_ptr;
+#ifdef _WIN64
+ size_t idx_page;
+#endif
+ unsigned int j;
+ size_t idx;
+
+ TRACE("base %p, size %p, mask %#x.\n", base, (void *)size, mask);
+
+ start_idx = (size_t)base >> page_shift;
+ end_idx = start_idx + (size >> page_shift);
+ idx = start_idx;
+#ifdef _WIN64
+ end_idx = min( end_idx, pages_vprot_size << pages_vprot_shift );
+ if (end_idx <= start_idx)
+ {
+ *vprot = 0;
+ return size;
+ }
+ idx_page = idx >> pages_vprot_shift;
+ idx &= pages_vprot_mask;
+ vprot_ptr = pages_vprot[idx_page];
+#else
+ vprot_ptr = pages_vprot;
+#endif
+
+ aligned_start_idx = (start_idx + index_align_mask) & ~index_align_mask;
+ if (aligned_start_idx > end_idx) aligned_start_idx = end_idx;
- *vprot = get_page_vprot( base );
- for (addr = base + page_size; addr != base + size; addr += page_size)
- if ((*vprot ^ get_page_vprot( addr )) & mask) break;
+ /* Page count in zero level page table on x64 is at least the multiples of BYTES_IN_WORD
+ * so we don't have to worry about crossing the boundary on unaligned idx values. */
+ *vprot = vprot_ptr[idx];
+
+ for (i = start_idx; i < aligned_start_idx; ++i)
+ if ((*vprot ^ vprot_ptr[idx++]) & mask) return (i - start_idx) << page_shift;
+
+ vprot_word = word_from_byte * *vprot;
+ mask_word = word_from_byte * mask;
+ for (; i < end_idx; i += BYTES_IN_WORD)
+ {
+#ifdef _WIN64
+ if (idx >> pages_vprot_shift)
+ {
+ idx = 0;
+ vprot_ptr = pages_vprot[++idx_page];
+ }
+#endif
+ changed_word = (vprot_word ^ *(UINT64 *)(vprot_ptr + idx)) & mask_word;
+ if (changed_word)
+ {
+ for (j = 0; i < end_idx && !((BYTE *)&changed_word)[j]; ++j) ++i;
+ return (i - start_idx) << page_shift;
+ }
+ idx += BYTES_IN_WORD;
+ }
- return addr - base;
+ return *vprot & mask ? (end_idx - start_idx) << page_shift : size;
+#undef BYTES_IN_WORD
}
/***********************************************************************
--
2.31.1
More information about the wine-devel
mailing list