Signed-off-by: Paul Gofman [email protected] --- dlls/ntdll/unix/virtual.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/dlls/ntdll/unix/virtual.c b/dlls/ntdll/unix/virtual.c index 883e5cff927..7b33b3e82ac 100644 --- a/dlls/ntdll/unix/virtual.c +++ b/dlls/ntdll/unix/virtual.c @@ -1033,6 +1033,7 @@ static void* try_map_free_area( void *base, void *end, ptrdiff_t step, step == 0) break; start = (char *)start + step; + step *= 2; }
return NULL;
Signed-off-by: Paul Gofman [email protected] --- dlls/ntdll/unix/virtual.c | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/dlls/ntdll/unix/virtual.c b/dlls/ntdll/unix/virtual.c index 7b33b3e82ac..13775d654bd 100644 --- a/dlls/ntdll/unix/virtual.c +++ b/dlls/ntdll/unix/virtual.c @@ -188,7 +188,11 @@ static BYTE *pages_vprot; #endif
static struct file_view *view_block_start, *view_block_end, *next_free_view; +#ifdef _WIN64 +static const size_t view_block_size = 0x200000; +#else static const size_t view_block_size = 0x100000; +#endif static void *preload_reserve_start; static void *preload_reserve_end; static BOOL force_exec_prot; /* whether to force PROT_EXEC on all PROT_READ mmaps */
Windows allocates virtual memory strictly bottom up or top down depending on the requested flags. Modern Linux VM allocator always allocates memory top down. Some applications break if the allocated memory addresses are from higher memory than they expect.
Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=48175 Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=46568 Signed-off-by: Paul Gofman [email protected] --- dlls/ntdll/unix/virtual.c | 417 ++++++++++++++++++-------------------- 1 file changed, 201 insertions(+), 216 deletions(-)
diff --git a/dlls/ntdll/unix/virtual.c b/dlls/ntdll/unix/virtual.c index 13775d654bd..5d626ee85c4 100644 --- a/dlls/ntdll/unix/virtual.c +++ b/dlls/ntdll/unix/virtual.c @@ -967,44 +967,6 @@ static struct file_view *find_view_range( const void *addr, size_t size ) }
-/*********************************************************************** - * find_view_inside_range - * - * Find first (resp. last, if top_down) view inside a range. - * virtual_mutex must be held by caller. - */ -static struct wine_rb_entry *find_view_inside_range( void **base_ptr, void **end_ptr, int top_down ) -{ - struct wine_rb_entry *first = NULL, *ptr = views_tree.root; - void *base = *base_ptr, *end = *end_ptr; - - /* find the first (resp. last) view inside the range */ - while (ptr) - { - struct file_view *view = WINE_RB_ENTRY_VALUE( ptr, struct file_view, entry ); - if ((char *)view->base + view->size >= (char *)end) - { - end = min( end, view->base ); - ptr = ptr->left; - } - else if (view->base <= base) - { - base = max( (char *)base, (char *)view->base + view->size ); - ptr = ptr->right; - } - else - { - first = ptr; - ptr = top_down ? ptr->right : ptr->left; - } - } - - *base_ptr = base; - *end_ptr = end; - return first; -} - - /*********************************************************************** * try_map_free_area * @@ -1043,110 +1005,6 @@ static void* try_map_free_area( void *base, void *end, ptrdiff_t step, return NULL; }
- -/*********************************************************************** - * map_free_area - * - * Find a free area between views inside the specified range and map it. - * virtual_mutex must be held by caller. - */ -static void *map_free_area( void *base, void *end, size_t size, int top_down, int unix_prot ) -{ - struct wine_rb_entry *first = find_view_inside_range( &base, &end, top_down ); - ptrdiff_t step = top_down ? -(granularity_mask + 1) : (granularity_mask + 1); - void *start; - - if (top_down) - { - start = ROUND_ADDR( (char *)end - size, granularity_mask ); - if (start >= end || start < base) return NULL; - - while (first) - { - struct file_view *view = WINE_RB_ENTRY_VALUE( first, struct file_view, entry ); - if ((start = try_map_free_area( (char *)view->base + view->size, (char *)start + size, step, - start, size, unix_prot ))) break; - start = ROUND_ADDR( (char *)view->base - size, granularity_mask ); - /* stop if remaining space is not large enough */ - if (!start || start >= end || start < base) return NULL; - first = wine_rb_prev( first ); - } - } - else - { - start = ROUND_ADDR( (char *)base + granularity_mask, granularity_mask ); - if (!start || start >= end || (char *)end - (char *)start < size) return NULL; - - while (first) - { - struct file_view *view = WINE_RB_ENTRY_VALUE( first, struct file_view, entry ); - if ((start = try_map_free_area( start, view->base, step, - start, size, unix_prot ))) break; - start = ROUND_ADDR( (char *)view->base + view->size + granularity_mask, granularity_mask ); - /* stop if remaining space is not large enough */ - if (!start || start >= end || (char *)end - (char *)start < size) return NULL; - first = wine_rb_next( first ); - } - } - - if (!first) - return try_map_free_area( base, end, step, start, size, unix_prot ); - - return start; -} - - -/*********************************************************************** - * find_reserved_free_area - * - * Find a free area between views inside the specified range. - * virtual_mutex must be held by caller. - * The range must be inside the preloader reserved range. - */ -static void *find_reserved_free_area( void *base, void *end, size_t size, int top_down ) -{ - struct range_entry *range; - void *start; - - base = ROUND_ADDR( (char *)base + granularity_mask, granularity_mask ); - end = (char *)ROUND_ADDR( (char *)end - size, granularity_mask ) + size; - - if (top_down) - { - start = (char *)end - size; - range = free_ranges_lower_bound( start ); - assert(range != free_ranges_end && range->end >= start); - - if ((char *)range->end - (char *)start < size) start = ROUND_ADDR( (char *)range->end - size, granularity_mask ); - do - { - if (start >= end || start < base || (char *)end - (char *)start < size) return NULL; - if (start < range->end && start >= range->base && (char *)range->end - (char *)start >= size) break; - if (--range < free_ranges) return NULL; - start = ROUND_ADDR( (char *)range->end - size, granularity_mask ); - } - while (1); - } - else - { - start = base; - range = free_ranges_lower_bound( start ); - assert(range != free_ranges_end && range->end >= start); - - if (start < range->base) start = ROUND_ADDR( (char *)range->base + granularity_mask, granularity_mask ); - do - { - if (start >= end || start < base || (char *)end - (char *)start < size) return NULL; - if (start < range->end && start >= range->base && (char *)range->end - (char *)start >= size) break; - if (++range == free_ranges_end) return NULL; - start = ROUND_ADDR( (char *)range->base + granularity_mask, granularity_mask ); - } - while (1); - } - return start; -} - - /*********************************************************************** * add_reserved_area * @@ -1315,8 +1173,7 @@ static void delete_view( struct file_view *view ) /* [in] View */ { if (!(view->protect & VPROT_SYSTEM)) unmap_area( view->base, view->size ); set_page_vprot( view->base, view->size, 0 ); - if (mmap_is_in_reserved_area( view->base, view->size )) - free_ranges_remove_view( view ); + free_ranges_remove_view( view ); wine_rb_remove( &views_tree, &view->entry ); *(struct file_view **)view = next_free_view; next_free_view = view; @@ -1364,8 +1221,7 @@ static NTSTATUS create_view( struct file_view **view_ret, void *base, size_t siz set_page_vprot( base, size, vprot );
wine_rb_put( &views_tree, view->base, &view->entry ); - if (mmap_is_in_reserved_area( view->base, view->size )) - free_ranges_insert_view( view ); + free_ranges_insert_view( view );
*view_ret = view;
@@ -1590,54 +1446,219 @@ static inline void *unmap_extra_space( void *ptr, size_t total_size, size_t want return ptr; }
- struct alloc_area { + char *map_area_start, *map_area_end, *result; size_t size; - int top_down; - void *limit; - void *result; + ptrdiff_t step; + int unix_prot; + BOOL top_down; };
-/*********************************************************************** - * alloc_reserved_area_callback - * - * Try to map some space inside a reserved area. Callback for mmap_enum_reserved_areas. - */ -static int CDECL alloc_reserved_area_callback( void *start, SIZE_T size, void *arg ) +static int CDECL alloc_area_in_reserved_or_between_callback( void *start, SIZE_T size, void *arg ) { - struct alloc_area *alloc = arg; - void *end = (char *)start + size; - - if (start < address_space_start) start = address_space_start; - if (is_beyond_limit( start, size, alloc->limit )) end = alloc->limit; - if (start >= end) return 0; + char *intersect_start, *intersect_end; + char *end = (char *)start + size; + struct alloc_area *area = arg; + char *alloc_start;
- /* make sure we don't touch the preloader reserved range */ - if (preload_reserve_end >= start) + if (area->top_down) { - if (preload_reserve_end >= end) + if (area->map_area_start >= end) + return 1; + + if (area->map_area_end <= (char *)start) + return 0; + + intersect_start = max((char *)start, area->map_area_start); + intersect_end = min((char *)end, area->map_area_end); + + assert(ROUND_ADDR(intersect_start, granularity_mask) == intersect_start); + assert(ROUND_ADDR(intersect_end + granularity_mask - 1, granularity_mask) == intersect_end); + + alloc_start = ROUND_ADDR( (char *)area->map_area_end - size, granularity_mask ); + + if (alloc_start >= intersect_end) { - if (preload_reserve_start <= start) return 0; /* no space in that area */ - if (preload_reserve_start < end) end = preload_reserve_start; + if ((area->result = try_map_free_area( area->map_area_start, alloc_start + size, area->step, + alloc_start, area->size, area->unix_prot ))) + return 1; } - else if (preload_reserve_start <= start) start = preload_reserve_end; - else + + alloc_start = ROUND_ADDR( intersect_end - area->size, granularity_mask ); + if (alloc_start >= intersect_start) { - /* range is split in two by the preloader reservation, try first part */ - if ((alloc->result = find_reserved_free_area( start, preload_reserve_start, alloc->size, - alloc->top_down ))) + if ((area->result = wine_anon_mmap( alloc_start, area->size, + area->unix_prot, MAP_FIXED )) != alloc_start) + ERR("Could not map in reserved area, alloc_start %p, size %p.\n", + alloc_start, (void *)area->size); + return 1; + } + + area->map_area_end = intersect_start; + if (area->map_area_end - area->map_area_start < area->size) + return 1; + } + else + { + if (area->map_area_end <= (char *)start) + return 1; + + if (area->map_area_start >= (char *)end) + return 0; + + intersect_start = max((char *)start, area->map_area_start); + intersect_end = min((char *)end, area->map_area_end); + + assert(ROUND_ADDR(intersect_start, granularity_mask) == intersect_start); + assert(ROUND_ADDR(intersect_end + granularity_mask - 1, granularity_mask) == intersect_end); + + if (intersect_start - area->map_area_start >= area->size) + { + if ((area->result = try_map_free_area( area->map_area_start, intersect_start, area->step, + area->map_area_start, area->size, area->unix_prot ))) return 1; - /* then fall through to try second part */ - start = preload_reserve_end; } + + if (intersect_end - intersect_start >= area->size) + { + if ((area->result = wine_anon_mmap( intersect_start, area->size, area->unix_prot, MAP_FIXED )) + != intersect_start) + ERR("Could not map in reserved area.\n"); + return 1; + } + area->map_area_start = intersect_end; + if (area->map_area_end - area->map_area_start < area->size) + return 1; } - if ((alloc->result = find_reserved_free_area( start, end, alloc->size, alloc->top_down ))) - return 1;
return 0; }
+static void *alloc_free_area_in_range( struct alloc_area *area, char *base, char *end ) +{ + char *start; + + TRACE("range %p-%p.\n", base, end); + + if (base >= end) + return NULL; + + area->map_area_start = base; + area->map_area_end = end; + + if (area->top_down) + { + start = ROUND_ADDR( end - area->size, granularity_mask ); + if (start >= end || start < base) + return NULL; + } + else + { + start = ROUND_ADDR( base + granularity_mask, granularity_mask ); + if (!start || start >= end || (char *)end - (char *)start < area->size) + return NULL; + } + + mmap_enum_reserved_areas( alloc_area_in_reserved_or_between_callback, area, area->top_down ); + + if (area->result) + return area->result; + + if (area->top_down) + { + start = ROUND_ADDR( area->map_area_end - area->size, granularity_mask ); + if (start >= area->map_area_end || start < area->map_area_start) + return NULL; + + return try_map_free_area( area->map_area_start, start + area->size, area->step, + start, area->size, area->unix_prot ); + } + else + { + start = ROUND_ADDR( area->map_area_start + granularity_mask, granularity_mask ); + if (!start || start >= area->map_area_end + || area->map_area_end - start < area->size) + return NULL; + + return try_map_free_area( start, area->map_area_end, area->step, + start, area->size, area->unix_prot ); + } +} + +static void *alloc_free_area( void *limit, size_t size, BOOL top_down, int unix_prot ) +{ + struct range_entry *range, *ranges_start, *ranges_end; + char *reserve_start, *reserve_end; + struct alloc_area area; + char *base, *end; + int ranges_inc; + + TRACE("limit %p, size %p, top_down %#x.\n", limit, (void *)size, top_down); + + if (top_down) + { + ranges_start = free_ranges_end - 1; + ranges_end = free_ranges - 1; + ranges_inc = -1; + } + else + { + ranges_start = free_ranges; + ranges_end = free_ranges_end; + ranges_inc = 1; + } + + memset( &area, 0, sizeof(area) ); + area.step = top_down ? -(granularity_mask + 1) : (granularity_mask + 1); + area.size = size; + area.top_down = top_down; + area.unix_prot = unix_prot; + + reserve_start = ROUND_ADDR( (char *)preload_reserve_start, granularity_mask ); + reserve_end = ROUND_ADDR( (char *)preload_reserve_end + granularity_mask, granularity_mask ); + + for (range = ranges_start; range != ranges_end; range += ranges_inc) + { + base = range->base; + end = range->end; + + TRACE("range %p-%p.\n", base, end); + + if (base < (char *)address_space_start) + base = (char *)address_space_start; + if (end > (char *)ROUND_ADDR( limit, granularity_mask )) + end = ROUND_ADDR( limit, granularity_mask ); + + if (reserve_end >= base) + { + if (reserve_end >= end) + { + if (reserve_start <= base) + continue; /* no space in that area */ + if (reserve_start < end) + end = reserve_start; + } + else if (reserve_start <= base) + { + base = reserve_end; + } + else + { + /* range is split in two by the preloader reservation, try first part. */ + if ((area.result = alloc_free_area_in_range( &area, base, reserve_start ))) + return area.result; + /* then fall through to try second part. */ + base = reserve_end; + } + } + + if ((area.result = alloc_free_area_in_range( &area, base, end ))) + return area.result; + } + return NULL; +} + /*********************************************************************** * map_fixed_area * @@ -1715,48 +1736,11 @@ static NTSTATUS map_view( struct file_view **view_ret, void *base, size_t size, if (status != STATUS_SUCCESS) return status; ptr = base; } - else + else if (!(ptr = alloc_free_area( (void*)(get_zero_bits_64_mask( zero_bits_64 ) + & (UINT_PTR)user_space_limit), size, top_down, get_unix_prot( vprot ) ))) { - size_t view_size = size + granularity_mask + 1; - struct alloc_area alloc; - - alloc.size = size; - alloc.top_down = top_down; - alloc.limit = (void*)(get_zero_bits_64_mask( zero_bits_64 ) & (UINT_PTR)user_space_limit); - - if (mmap_enum_reserved_areas( alloc_reserved_area_callback, &alloc, top_down )) - { - ptr = alloc.result; - TRACE( "got mem in reserved area %p-%p\n", ptr, (char *)ptr + size ); - if (wine_anon_mmap( ptr, size, get_unix_prot(vprot), MAP_FIXED ) != ptr) - return STATUS_INVALID_PARAMETER; - goto done; - } - - if (zero_bits_64) - { - if (!(ptr = map_free_area( address_space_start, alloc.limit, size, - top_down, get_unix_prot(vprot) ))) - return STATUS_NO_MEMORY; - TRACE( "got mem with map_free_area %p-%p\n", ptr, (char *)ptr + size ); - goto done; - } - - for (;;) - { - if ((ptr = wine_anon_mmap( NULL, view_size, get_unix_prot(vprot), 0 )) == (void *)-1) - { - if (errno == ENOMEM) return STATUS_NO_MEMORY; - return STATUS_INVALID_PARAMETER; - } - TRACE( "got mem with anon mmap %p-%p\n", ptr, (char *)ptr + size ); - /* if we got something beyond the user limit, unmap it and retry */ - if (is_beyond_limit( ptr, view_size, user_space_limit )) add_reserved_area( ptr, view_size ); - else break; - } - ptr = unmap_extra_space( ptr, view_size, size ); + return STATUS_NO_MEMORY; } -done: status = create_view( view_ret, ptr, size, vprot ); if (status != STATUS_SUCCESS) unmap_area( ptr, size ); return status; @@ -2392,6 +2376,7 @@ void virtual_init(void) if (preload_reserve_start) address_space_start = min( address_space_start, preload_reserve_start ); } + TRACE("preload reserve %p-%p.\n", preload_reserve_start, preload_reserve_end); }
size = teb_size + max( MINSIGSTKSZ, 8192 );
On 2020-07-23 18:32, Paul Gofman wrote:
Windows allocates virtual memory strictly bottom up or top down depending on the requested flags. Modern Linux VM allocator always allocates memory top down. Some applications break if the allocated memory addresses are from higher memory than they expect.
Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=48175 Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=46568 Signed-off-by: Paul Gofman [email protected]
dlls/ntdll/unix/virtual.c | 417 ++++++++++++++++++-------------------- 1 file changed, 201 insertions(+), 216 deletions(-)
diff --git a/dlls/ntdll/unix/virtual.c b/dlls/ntdll/unix/virtual.c index 13775d654bd..5d626ee85c4 100644 --- a/dlls/ntdll/unix/virtual.c +++ b/dlls/ntdll/unix/virtual.c @@ -967,44 +967,6 @@ static struct file_view *find_view_range( const void *addr, size_t size ) }
-/***********************************************************************
find_view_inside_range
- Find first (resp. last, if top_down) view inside a range.
- virtual_mutex must be held by caller.
- */
-static struct wine_rb_entry *find_view_inside_range( void **base_ptr, void **end_ptr, int top_down ) -{
- struct wine_rb_entry *first = NULL, *ptr = views_tree.root;
- void *base = *base_ptr, *end = *end_ptr;
- /* find the first (resp. last) view inside the range */
- while (ptr)
- {
struct file_view *view = WINE_RB_ENTRY_VALUE( ptr, struct file_view, entry );
if ((char *)view->base + view->size >= (char *)end)
{
end = min( end, view->base );
ptr = ptr->left;
}
else if (view->base <= base)
{
base = max( (char *)base, (char *)view->base + view->size );
ptr = ptr->right;
}
else
{
first = ptr;
ptr = top_down ? ptr->right : ptr->left;
}
- }
- *base_ptr = base;
- *end_ptr = end;
- return first;
-}
- /***********************************************************************
try_map_free_area
@@ -1043,110 +1005,6 @@ static void* try_map_free_area( void *base, void *end, ptrdiff_t step, return NULL; }
-/***********************************************************************
map_free_area
- Find a free area between views inside the specified range and map it.
- virtual_mutex must be held by caller.
- */
-static void *map_free_area( void *base, void *end, size_t size, int top_down, int unix_prot ) -{
- struct wine_rb_entry *first = find_view_inside_range( &base, &end, top_down );
- ptrdiff_t step = top_down ? -(granularity_mask + 1) : (granularity_mask + 1);
- void *start;
- if (top_down)
- {
start = ROUND_ADDR( (char *)end - size, granularity_mask );
if (start >= end || start < base) return NULL;
while (first)
{
struct file_view *view = WINE_RB_ENTRY_VALUE( first, struct file_view, entry );
if ((start = try_map_free_area( (char *)view->base + view->size, (char *)start + size, step,
start, size, unix_prot ))) break;
start = ROUND_ADDR( (char *)view->base - size, granularity_mask );
/* stop if remaining space is not large enough */
if (!start || start >= end || start < base) return NULL;
first = wine_rb_prev( first );
}
- }
- else
- {
start = ROUND_ADDR( (char *)base + granularity_mask, granularity_mask );
if (!start || start >= end || (char *)end - (char *)start < size) return NULL;
while (first)
{
struct file_view *view = WINE_RB_ENTRY_VALUE( first, struct file_view, entry );
if ((start = try_map_free_area( start, view->base, step,
start, size, unix_prot ))) break;
start = ROUND_ADDR( (char *)view->base + view->size + granularity_mask, granularity_mask );
/* stop if remaining space is not large enough */
if (!start || start >= end || (char *)end - (char *)start < size) return NULL;
first = wine_rb_next( first );
}
- }
- if (!first)
return try_map_free_area( base, end, step, start, size, unix_prot );
- return start;
-}
-/***********************************************************************
find_reserved_free_area
- Find a free area between views inside the specified range.
- virtual_mutex must be held by caller.
- The range must be inside the preloader reserved range.
- */
-static void *find_reserved_free_area( void *base, void *end, size_t size, int top_down ) -{
- struct range_entry *range;
- void *start;
- base = ROUND_ADDR( (char *)base + granularity_mask, granularity_mask );
- end = (char *)ROUND_ADDR( (char *)end - size, granularity_mask ) + size;
- if (top_down)
- {
start = (char *)end - size;
range = free_ranges_lower_bound( start );
assert(range != free_ranges_end && range->end >= start);
if ((char *)range->end - (char *)start < size) start = ROUND_ADDR( (char *)range->end - size, granularity_mask );
do
{
if (start >= end || start < base || (char *)end - (char *)start < size) return NULL;
if (start < range->end && start >= range->base && (char *)range->end - (char *)start >= size) break;
if (--range < free_ranges) return NULL;
start = ROUND_ADDR( (char *)range->end - size, granularity_mask );
}
while (1);
- }
- else
- {
start = base;
range = free_ranges_lower_bound( start );
assert(range != free_ranges_end && range->end >= start);
if (start < range->base) start = ROUND_ADDR( (char *)range->base + granularity_mask, granularity_mask );
do
{
if (start >= end || start < base || (char *)end - (char *)start < size) return NULL;
if (start < range->end && start >= range->base && (char *)range->end - (char *)start >= size) break;
if (++range == free_ranges_end) return NULL;
start = ROUND_ADDR( (char *)range->base + granularity_mask, granularity_mask );
}
while (1);
- }
- return start;
-}
- /***********************************************************************
add_reserved_area
@@ -1315,8 +1173,7 @@ static void delete_view( struct file_view *view ) /* [in] View */ { if (!(view->protect & VPROT_SYSTEM)) unmap_area( view->base, view->size ); set_page_vprot( view->base, view->size, 0 );
- if (mmap_is_in_reserved_area( view->base, view->size ))
free_ranges_remove_view( view );
- free_ranges_remove_view( view ); wine_rb_remove( &views_tree, &view->entry ); *(struct file_view **)view = next_free_view; next_free_view = view;
@@ -1364,8 +1221,7 @@ static NTSTATUS create_view( struct file_view **view_ret, void *base, size_t siz set_page_vprot( base, size, vprot );
wine_rb_put( &views_tree, view->base, &view->entry );
- if (mmap_is_in_reserved_area( view->base, view->size ))
free_ranges_insert_view( view );
free_ranges_insert_view( view );
*view_ret = view;
@@ -1590,54 +1446,219 @@ static inline void *unmap_extra_space( void *ptr, size_t total_size, size_t want return ptr; }
- struct alloc_area {
- char *map_area_start, *map_area_end, *result; size_t size;
- int top_down;
- void *limit;
- void *result;
- ptrdiff_t step;
- int unix_prot;
- BOOL top_down; };
-/***********************************************************************
alloc_reserved_area_callback
- Try to map some space inside a reserved area. Callback for mmap_enum_reserved_areas.
- */
-static int CDECL alloc_reserved_area_callback( void *start, SIZE_T size, void *arg ) +static int CDECL alloc_area_in_reserved_or_between_callback( void *start, SIZE_T size, void *arg ) {
- struct alloc_area *alloc = arg;
- void *end = (char *)start + size;
- if (start < address_space_start) start = address_space_start;
- if (is_beyond_limit( start, size, alloc->limit )) end = alloc->limit;
- if (start >= end) return 0;
- char *intersect_start, *intersect_end;
- char *end = (char *)start + size;
- struct alloc_area *area = arg;
- char *alloc_start;
- /* make sure we don't touch the preloader reserved range */
- if (preload_reserve_end >= start)
- if (area->top_down) {
if (preload_reserve_end >= end)
if (area->map_area_start >= end)
return 1;
if (area->map_area_end <= (char *)start)
return 0;
intersect_start = max((char *)start, area->map_area_start);
intersect_end = min((char *)end, area->map_area_end);
assert(ROUND_ADDR(intersect_start, granularity_mask) == intersect_start);
assert(ROUND_ADDR(intersect_end + granularity_mask - 1, granularity_mask) == intersect_end);
alloc_start = ROUND_ADDR( (char *)area->map_area_end - size, granularity_mask );
if (alloc_start >= intersect_end) {
if (preload_reserve_start <= start) return 0; /* no space in that area */
if (preload_reserve_start < end) end = preload_reserve_start;
if ((area->result = try_map_free_area( area->map_area_start, alloc_start + size, area->step,
alloc_start, area->size, area->unix_prot )))
return 1; }
else if (preload_reserve_start <= start) start = preload_reserve_end;
else
alloc_start = ROUND_ADDR( intersect_end - area->size, granularity_mask );
if (alloc_start >= intersect_start) {
/* range is split in two by the preloader reservation, try first part */
if ((alloc->result = find_reserved_free_area( start, preload_reserve_start, alloc->size,
alloc->top_down )))
if ((area->result = wine_anon_mmap( alloc_start, area->size,
area->unix_prot, MAP_FIXED )) != alloc_start)
ERR("Could not map in reserved area, alloc_start %p, size %p.\n",
alloc_start, (void *)area->size);
return 1;
}
area->map_area_end = intersect_start;
if (area->map_area_end - area->map_area_start < area->size)
return 1;
- }
- else
- {
if (area->map_area_end <= (char *)start)
return 1;
if (area->map_area_start >= (char *)end)
return 0;
intersect_start = max((char *)start, area->map_area_start);
intersect_end = min((char *)end, area->map_area_end);
assert(ROUND_ADDR(intersect_start, granularity_mask) == intersect_start);
assert(ROUND_ADDR(intersect_end + granularity_mask - 1, granularity_mask) == intersect_end);
if (intersect_start - area->map_area_start >= area->size)
{
if ((area->result = try_map_free_area( area->map_area_start, intersect_start, area->step,
area->map_area_start, area->size, area->unix_prot ))) return 1;
/* then fall through to try second part */
start = preload_reserve_end; }
if (intersect_end - intersect_start >= area->size)
{
if ((area->result = wine_anon_mmap( intersect_start, area->size, area->unix_prot, MAP_FIXED ))
!= intersect_start)
ERR("Could not map in reserved area.\n");
return 1;
}
area->map_area_start = intersect_end;
if (area->map_area_end - area->map_area_start < area->size)
return 1; }
if ((alloc->result = find_reserved_free_area( start, end, alloc->size, alloc->top_down )))
return 1; return 0;
}
+static void *alloc_free_area_in_range( struct alloc_area *area, char *base, char *end ) +{
- char *start;
- TRACE("range %p-%p.\n", base, end);
- if (base >= end)
return NULL;
- area->map_area_start = base;
- area->map_area_end = end;
- if (area->top_down)
- {
start = ROUND_ADDR( end - area->size, granularity_mask );
if (start >= end || start < base)
return NULL;
- }
- else
- {
start = ROUND_ADDR( base + granularity_mask, granularity_mask );
if (!start || start >= end || (char *)end - (char *)start < area->size)
return NULL;
- }
- mmap_enum_reserved_areas( alloc_area_in_reserved_or_between_callback, area, area->top_down );
- if (area->result)
return area->result;
- if (area->top_down)
- {
start = ROUND_ADDR( area->map_area_end - area->size, granularity_mask );
if (start >= area->map_area_end || start < area->map_area_start)
return NULL;
return try_map_free_area( area->map_area_start, start + area->size, area->step,
start, area->size, area->unix_prot );
- }
- else
- {
start = ROUND_ADDR( area->map_area_start + granularity_mask, granularity_mask );
if (!start || start >= area->map_area_end
|| area->map_area_end - start < area->size)
return NULL;
return try_map_free_area( start, area->map_area_end, area->step,
start, area->size, area->unix_prot );
- }
+}
+static void *alloc_free_area( void *limit, size_t size, BOOL top_down, int unix_prot ) +{
- struct range_entry *range, *ranges_start, *ranges_end;
- char *reserve_start, *reserve_end;
- struct alloc_area area;
- char *base, *end;
- int ranges_inc;
- TRACE("limit %p, size %p, top_down %#x.\n", limit, (void *)size, top_down);
- if (top_down)
- {
ranges_start = free_ranges_end - 1;
ranges_end = free_ranges - 1;
ranges_inc = -1;
- }
- else
- {
ranges_start = free_ranges;
ranges_end = free_ranges_end;
ranges_inc = 1;
- }
- memset( &area, 0, sizeof(area) );
- area.step = top_down ? -(granularity_mask + 1) : (granularity_mask + 1);
- area.size = size;
- area.top_down = top_down;
- area.unix_prot = unix_prot;
- reserve_start = ROUND_ADDR( (char *)preload_reserve_start, granularity_mask );
- reserve_end = ROUND_ADDR( (char *)preload_reserve_end + granularity_mask, granularity_mask );
- for (range = ranges_start; range != ranges_end; range += ranges_inc)
- {
base = range->base;
end = range->end;
TRACE("range %p-%p.\n", base, end);
if (base < (char *)address_space_start)
base = (char *)address_space_start;
if (end > (char *)ROUND_ADDR( limit, granularity_mask ))
end = ROUND_ADDR( limit, granularity_mask );
if (reserve_end >= base)
{
if (reserve_end >= end)
{
if (reserve_start <= base)
continue; /* no space in that area */
if (reserve_start < end)
end = reserve_start;
}
else if (reserve_start <= base)
{
base = reserve_end;
}
else
{
/* range is split in two by the preloader reservation, try first part. */
if ((area.result = alloc_free_area_in_range( &area, base, reserve_start )))
return area.result;
/* then fall through to try second part. */
base = reserve_end;
}
}
if ((area.result = alloc_free_area_in_range( &area, base, end )))
return area.result;
- }
- return NULL;
+}
- /***********************************************************************
map_fixed_area
@@ -1715,48 +1736,11 @@ static NTSTATUS map_view( struct file_view **view_ret, void *base, size_t size, if (status != STATUS_SUCCESS) return status; ptr = base; }
- else
- else if (!(ptr = alloc_free_area( (void*)(get_zero_bits_64_mask( zero_bits_64 )
& (UINT_PTR)user_space_limit), size, top_down, get_unix_prot( vprot ) ))) {
size_t view_size = size + granularity_mask + 1;
struct alloc_area alloc;
alloc.size = size;
alloc.top_down = top_down;
alloc.limit = (void*)(get_zero_bits_64_mask( zero_bits_64 ) & (UINT_PTR)user_space_limit);
if (mmap_enum_reserved_areas( alloc_reserved_area_callback, &alloc, top_down ))
{
ptr = alloc.result;
TRACE( "got mem in reserved area %p-%p\n", ptr, (char *)ptr + size );
if (wine_anon_mmap( ptr, size, get_unix_prot(vprot), MAP_FIXED ) != ptr)
return STATUS_INVALID_PARAMETER;
goto done;
}
if (zero_bits_64)
{
if (!(ptr = map_free_area( address_space_start, alloc.limit, size,
top_down, get_unix_prot(vprot) )))
return STATUS_NO_MEMORY;
TRACE( "got mem with map_free_area %p-%p\n", ptr, (char *)ptr + size );
goto done;
}
for (;;)
{
if ((ptr = wine_anon_mmap( NULL, view_size, get_unix_prot(vprot), 0 )) == (void *)-1)
{
if (errno == ENOMEM) return STATUS_NO_MEMORY;
return STATUS_INVALID_PARAMETER;
}
TRACE( "got mem with anon mmap %p-%p\n", ptr, (char *)ptr + size );
/* if we got something beyond the user limit, unmap it and retry */
if (is_beyond_limit( ptr, view_size, user_space_limit )) add_reserved_area( ptr, view_size );
else break;
}
ptr = unmap_extra_space( ptr, view_size, size );
return STATUS_NO_MEMORY; }
-done: status = create_view( view_ret, ptr, size, vprot ); if (status != STATUS_SUCCESS) unmap_area( ptr, size ); return status; @@ -2392,6 +2376,7 @@ void virtual_init(void) if (preload_reserve_start) address_space_start = min( address_space_start, preload_reserve_start ); }
TRACE("preload reserve %p-%p.\n", preload_reserve_start, preload_reserve_end); } size = teb_size + max( MINSIGSTKSZ, 8192 );
I can understand what this is doing (extend the free ranges tracking over the whole address space, and merge all the code paths together), but it's a big change all at once.
The free ranges were only tracked within the reserved areas mostly because it was only useful there, but also because the mmap smaller alignment was causing a lot of fragmentation in an initial implementation. Now that we align all views and ranges to 64k I don't think it would fragment that much anymore so it could probably be done separately. And I think it would still be interesting to gather some statistics to see how many ranges and holes we usually have, just to check that it's not going crazy. FWIW, there a patch to print that information on warn+virtual in Proton:
198045357c31d0846bab6c8ac388aa1b914c9cf0
About the rest, wouldn't it be possible to just reserve some more pages with reserve_area where we expect some free memory to be, and if we are out of reserved space, then use the existing code to allocate the views within the newly reserved space. Of course it would possibly cause more memory to be reserved for Wine and less to be available to system, I'm not sure if we can mitigate that somehow.
On 8/6/20 18:42, Rémi Bernon wrote:
I can understand what this is doing (extend the free ranges tracking over the whole address space, and merge all the code paths together), but it's a big change all at once.
Yes, this is the case, in a tiny bit more details the logic is:
1. iterate over free areas (skip too small right away);
2. within free area, enumerate reserved areas and allocate the memory during enumeration or right after if there is a space at the edges of enumeration;
In the majority of cases step 2 should succeed from the first time.
I thought of doing that in parts (by leaving reserved areas as is), but it was getting more complicated and ugly as it is now in this version. We would need to maintain a separate free area list for the free areas outside of reserved areas, which was getting a bit tricky for some corner cases, and looked very weird overall. If to keep one list, we would need to prevent the free list logic from joining free areas between reserved and "normal" space. That would move the complications there and result in the longer overall code, while probably not making free list managing nicer and is not needed long term.
The free ranges were only tracked within the reserved areas mostly because it was only useful there, but also because the mmap smaller alignment was causing a lot of fragmentation in an initial implementation. Now that we align all views and ranges to 64k I don't think it would fragment that much anymore so it could probably be done separately. And I think it would still be interesting to gather some statistics to see how many ranges and holes we usually have, just to check that it's not going crazy.
I did observe that statistics over some games and I think even still have some log recorded which I used as the data source for the test case I made to reproduce some real life allocation cases (keeping that from separate threads) when testing the performance. I will need some time to gather that once again and come up with some verified figures, but from what I can tell at once:
- The number of views varies greatly between the games, from a few thousands to hitting the default Linux mmap limit, with values roughly about ~10000-2000 seen often;
- The number of free ranges is not great, I doubt I ever saw more than a hundred. With forced 64k alignment a lot of allocations do not produce a lot of free ranges. To make this number great the application should use a really weird pattern of allocation by doing explicit VM allocs of a small size and then freeing a lot in between, and then allocating bigger chunks so the existing free blocks do not fit.
About the rest, wouldn't it be possible to just reserve some more pages with reserve_area where we expect some free memory to be, and if we are out of reserved space, then use the existing code to allocate the views within the newly reserved space. Of course it would possibly cause more memory to be reserved for Wine and less to be available to system, I'm not sure if we can mitigate that somehow.
As a game specific hack, sure, this can work, but it looks a bit problematic for me as a general solution. First of all, I am unsure how to sensibly choose this parameter in a universal way. We need to reserve as much memory as the application is going to ever allocate. E. g., when I was testing this with AION 64 bit and it was OK with addresses withing ~16GB (apparently it was not planning to ever use more RAM), returning bigger pointers were crashing it. I guess this relates to every application affected, it just expects the pointer in the memory pointers to be in the certain range. The range may differ greatly (e. g., MK11 is fine with Windows 7 64 bit address space limit), but to guarantee that range with reserved areas we will have to always reserve as much RAM as application is using. Besides, there are pointers given to application which are obtained from native libraries, like Open GL / Vulkan mappings, audio buffers etc. Those pointers on Windows also fall into expected ranges. If we reserve low memory for Wine allocations native pointers will always fall outside. I am not aware if that will break any existing application, but it could.
Overall, do you think that maintaining that allocation "duality", within reserved areas and without, is making anything more straightforward, given we can have solution which avoids that and that is acceptable performance wise? I had some preliminary solution like that before your free lists for reserved areas were upstreamed, IMO (apart from my free lists having a very skeleton implementation) it looked much more cumbersome and was introducing a lot of code which should supposingly go away long term.
BTW do we need those reserved areas at all for anything besides ensuring core system libraries get to the right place and we do get some low memory for things like zero_mask allocations (while for the latter case this space can be taken away)? I suspect that not. Maybe once we switch to ordered allocations we can just remove reserved areas once those DLLs are loaded and thus simplify the alocations?
Sorry for the late reply, but I was a bit interested in this patch (which doesn't seem to have gone anywhere yet?).
I think this should also fix bug 38668.
On 8/19/20 17:56, Gabriel Ivăncescu wrote:
Sorry for the late reply, but I was a bit interested in this patch (which doesn't seem to have gone anywhere yet?).
I think this should also fix bug 38668.
I am frankly not quite sure how to move forward with this. I've got a comment from Rémi who rightfully considered the change to big for a one time change. While I do agree with that in principle, I don't see the way how to simplify the change without introducing a more complicated and ugly state in between (I have wrote some more details in the previous e-mail).
I also received a comment from Alexandre in the chat that he is sceptical about the idea because it is getting too complicated.
For now I don't see any other substantially different and good way to solve the bunch of related bugs. Besides the referenced bugs, it is also known to affect Mortal Kombat 11 and Star Wars: Battlefron II for now.
So I am not sure how to proceed from here. Should we just leave these bugs, or is there some direction of how to do it simpler which I am missing, or should I maybe try harder to structure the code in some more clear way?
On Aug 19, 2020, at 10:06 AM, Paul Gofman [email protected] wrote:
On 8/19/20 17:56, Gabriel Ivăncescu wrote:
Sorry for the late reply, but I was a bit interested in this patch (which doesn't seem to have gone anywhere yet?).
I think this should also fix bug 38668.
I am frankly not quite sure how to move forward with this. I've got a comment from Rémi who rightfully considered the change to big for a one time change. While I do agree with that in principle, I don't see the way how to simplify the change without introducing a more complicated and ugly state in between (I have wrote some more details in the previous e-mail).
I also received a comment from Alexandre in the chat that he is sceptical about the idea because it is getting too complicated.
For now I don't see any other substantially different and good way to solve the bunch of related bugs. Besides the referenced bugs, it is also known to affect Mortal Kombat 11 and Star Wars: Battlefron II for now.
So I am not sure how to proceed from here. Should we just leave these bugs, or is there some direction of how to do it simpler which I am missing, or should I maybe try harder to structure the code in some more clear way?
I haven't paid really close attention. Is this just about doing bottom-up allocation? For the non-MAP_FIXED case, if you supply a non-zero address to mmap(), don't most systems take that as a hint and try to allocate there? If they can't, do they then search from there for the closest available free region?
macOS behaves like that. Not sure about other platforms. (Then again, macOS has the Mach APIs which allow enumerating the free/allocated regions pretty directly. See the Mac-specific implementation of reserve_area() in libs/wine/mmap.c.)
-Ken
On 8/19/20 18:59, Ken Thomases wrote:
On Aug 19, 2020, at 10:06 AM, Paul Gofman [email protected] wrote:
On 8/19/20 17:56, Gabriel Ivăncescu wrote:
Sorry for the late reply, but I was a bit interested in this patch (which doesn't seem to have gone anywhere yet?).
I think this should also fix bug 38668.
I am frankly not quite sure how to move forward with this. I've got a comment from Rémi who rightfully considered the change to big for a one time change. While I do agree with that in principle, I don't see the way how to simplify the change without introducing a more complicated and ugly state in between (I have wrote some more details in the previous e-mail).
I also received a comment from Alexandre in the chat that he is sceptical about the idea because it is getting too complicated.
For now I don't see any other substantially different and good way to solve the bunch of related bugs. Besides the referenced bugs, it is also known to affect Mortal Kombat 11 and Star Wars: Battlefron II for now.
So I am not sure how to proceed from here. Should we just leave these bugs, or is there some direction of how to do it simpler which I am missing, or should I maybe try harder to structure the code in some more clear way?
I haven't paid really close attention. Is this just about doing bottom-up allocation?
This is the motivation, yeah, to force allocation order (whether TOP_DOWN is requested or bottom up if not), as a bunch of games assumes the memory pointers they get from VirtualAlloc() (without _TOP_DOWN flag of course) to be in some limited range. Some (like MK11) actually want the pointer to be within pre-Win8 user space area limit (44 bits), some other (like AION) do assume they get the pointers from lower space, e. g., AION was fine with pointers in ~16GB range. But it is not clear how to force even 44 bit limit in a simple way (which won't completely solve the issue anyway), as simply reducing the user_space_limit leads to allocating of TBs of virtual memory in reserved space before mmap() starts returning pointers from the range we need.
For the non-MAP_FIXED case, if you supply a non-zero address to mmap(), don't most systems take that as a hint and try to allocate there?
Yes, they do. We never try to allocate the space where Wine has allocated it previously, but mmap's called from host libraries are out of our control.
If they can't, do they then search from there for the closest available free region?
Linux doesn't, just allocates the same way as no hint was given (bottom up).
There is /proc/sys/vm/legacy_va_layout which, when turned on, will trigger the use of legacy VM allocator in kernel which was bottom up. That could potentially solve the problem with the games (while _TOP_DOWN allocations won't be de-facto supported that is probably less of an issue). But I would beware switching the system to some many years old unused allocator, and also that requires root privileges.
macOS behaves like that. Not sure about other platforms. (Then again, macOS has the Mach APIs which allow enumerating the free/allocated regions pretty directly. See the Mac-specific implementation of reserve_area() in libs/wine/mmap.c.)
-Ken
There is already the code in Wine which allocates the memory within non-reserved areas when zero_bits parameter is given to NtAllocateVirtualMemory() (and some apps are known to depend on this parameter to work correctly). This code gets an idea which areas are supposed to be free by surfing through the views, and then does search the found free area with the same function used in my patches. That has a performance impact as there is typically much more views than gaps between then (now when allocations are 64k aligned) and no quick way to find the free areas (FWIW, Linux kernel uses just the view tree when searches for a free VM block, but it maintains the info in view which holds the max block size to the side of the view which allows for binary search). But this has limited performance impact because zero_bits is used rarely, switching all the allocations to work this way has prohibitive performance impact.
There is also the code in Wine which allocates space within reserved areas by using free block list (maintained for reserved areas only).
My patch combines these two type of allocations by extending free block tracking for all views allocation, and handling whether the space should be allocated in reserved area or checked for conflicts with native mappings in alloc_area_in_reserved_or_between_callback(). The last patch also "lazily" marks natively mapped area as free which eliminates performance impact previously noticed without this.
On 2020-08-19 18:21, Paul Gofman wrote:
On 8/19/20 18:59, Ken Thomases wrote:
On Aug 19, 2020, at 10:06 AM, Paul Gofman [email protected] wrote:
On 8/19/20 17:56, Gabriel Ivăncescu wrote:
Sorry for the late reply, but I was a bit interested in this patch (which doesn't seem to have gone anywhere yet?).
I think this should also fix bug 38668.
I am frankly not quite sure how to move forward with this. I've got a comment from Rémi who rightfully considered the change to big for a one time change. While I do agree with that in principle, I don't see the way how to simplify the change without introducing a more complicated and ugly state in between (I have wrote some more details in the previous e-mail).
I also received a comment from Alexandre in the chat that he is sceptical about the idea because it is getting too complicated.
For now I don't see any other substantially different and good way to solve the bunch of related bugs. Besides the referenced bugs, it is also known to affect Mortal Kombat 11 and Star Wars: Battlefron II for now.
So I am not sure how to proceed from here. Should we just leave these bugs, or is there some direction of how to do it simpler which I am missing, or should I maybe try harder to structure the code in some more clear way?
I haven't paid really close attention. Is this just about doing bottom-up allocation?
This is the motivation, yeah, to force allocation order (whether TOP_DOWN is requested or bottom up if not), as a bunch of games assumes the memory pointers they get from VirtualAlloc() (without _TOP_DOWN flag of course) to be in some limited range. Some (like MK11) actually want the pointer to be within pre-Win8 user space area limit (44 bits), some other (like AION) do assume they get the pointers from lower space, e. g., AION was fine with pointers in ~16GB range. But it is not clear how to force even 44 bit limit in a simple way (which won't completely solve the issue anyway), as simply reducing the user_space_limit leads to allocating of TBs of virtual memory in reserved space before mmap() starts returning pointers from the range we need.
For the non-MAP_FIXED case, if you supply a non-zero address to mmap(), don't most systems take that as a hint and try to allocate there?
Yes, they do. We never try to allocate the space where Wine has allocated it previously, but mmap's called from host libraries are out of our control.
If they can't, do they then search from there for the closest available free region?
Linux doesn't, just allocates the same way as no hint was given (bottom up).
There is /proc/sys/vm/legacy_va_layout which, when turned on, will trigger the use of legacy VM allocator in kernel which was bottom up. That could potentially solve the problem with the games (while _TOP_DOWN allocations won't be de-facto supported that is probably less of an issue). But I would beware switching the system to some many years old unused allocator, and also that requires root privileges.
macOS behaves like that. Not sure about other platforms. (Then again, macOS has the Mach APIs which allow enumerating the free/allocated regions pretty directly. See the Mac-specific implementation of reserve_area() in libs/wine/mmap.c.)
-Ken
There is already the code in Wine which allocates the memory within non-reserved areas when zero_bits parameter is given to NtAllocateVirtualMemory() (and some apps are known to depend on this parameter to work correctly). This code gets an idea which areas are supposed to be free by surfing through the views, and then does search the found free area with the same function used in my patches. That has a performance impact as there is typically much more views than gaps between then (now when allocations are 64k aligned) and no quick way to find the free areas (FWIW, Linux kernel uses just the view tree when searches for a free VM block, but it maintains the info in view which holds the max block size to the side of the view which allows for binary search). But this has limited performance impact because zero_bits is used rarely, switching all the allocations to work this way has prohibitive performance impact.
There is also the code in Wine which allocates space within reserved areas by using free block list (maintained for reserved areas only).
My patch combines these two type of allocations by extending free block tracking for all views allocation, and handling whether the space should be allocated in reserved area or checked for conflicts with native mappings in alloc_area_in_reserved_or_between_callback(). The last patch also "lazily" marks natively mapped area as free which eliminates performance impact previously noticed without this.
I was thinking that we could do something like in the attached patches, where if the mapping in reserved areas failed in bottom up alloc, we reserve more memory and try again until it succeeds.
I think it's simpler and the existing code already handles the reservation with bisecting steps nicely. Wouldn't that work?
Of course this will cause some more memory to be definitively reserved for Windows, but that will then also be used as needed by following allocations.
Alternatively we could release the over-reserved memory every time, but that also means we will have to reserve again on next allocations.
On 8/21/20 00:47, Rémi Bernon wrote:
I was thinking that we could do something like in the attached patches, where if the mapping in reserved areas failed in bottom up alloc, we reserve more memory and try again until it succeeds.
I think it's simpler and the existing code already handles the reservation with bisecting steps nicely. Wouldn't that work?
Of course this will cause some more memory to be definitively reserved for Windows, but that will then also be used as needed by following allocations.
Alternatively we could release the over-reserved memory every time, but that also means we will have to reserve again on next allocations.
I guess something like that can work, but I don't understand how playing harder with reserve areas is simpler or less hacky. But maybe its only me.
What will happen if there is a native mapping close to the previous reserve area end and reserve_area() will shrink its size below the size we need? Anyway, that can be solved some way by introducing a smarter search probably between the lines of existing search in try_map_free_area. And yeah, not immediately clear to me also if the reserved areas are needed to given back or not.
What I can't understand yet, how it is simpler or better to increase those free areas each time we run out of space with them (top down allocs are rare, and there is no "don't care" flag). It IMO further twists the "free area" - "reserved area" allocation duality. Does saving a few lines in the patch (compared to the one which does a unified search algorithm) worth all of it (even if it is still so after bringing in some missing bits)?
On 2020-08-21 00:03, Paul Gofman wrote:
On 8/21/20 00:47, Rémi Bernon wrote:
I was thinking that we could do something like in the attached patches, where if the mapping in reserved areas failed in bottom up alloc, we reserve more memory and try again until it succeeds.
I think it's simpler and the existing code already handles the reservation with bisecting steps nicely. Wouldn't that work?
Of course this will cause some more memory to be definitively reserved for Windows, but that will then also be used as needed by following allocations.
Alternatively we could release the over-reserved memory every time, but that also means we will have to reserve again on next allocations.
I guess something like that can work, but I don't understand how playing harder with reserve areas is simpler or less hacky. But maybe its only me.
What will happen if there is a native mapping close to the previous reserve area end and reserve_area() will shrink its size below the size we need? Anyway, that can be solved some way by introducing a smarter search probably between the lines of existing search in try_map_free_area. And yeah, not immediately clear to me also if the reserved areas are needed to given back or not.
What I can't understand yet, how it is simpler or better to increase those free areas each time we run out of space with them (top down allocs are rare, and there is no "don't care" flag). It IMO further twists the "free area" - "reserved area" allocation duality.
Well the free ranges were mostly meant as an optimization over iterating the view tree, and they only track free addresses from the Windows point of view. The reserved area, are the addresses that Wine has reserved for its Windows usage, and that are so known not to be used by the system.
For me that works in the same way an allocator reserves larger blocks from the system virtual memory allocator, and then splits them in small blocks depending on the allocation requests that it will give to its clients.
Does saving a few lines in the patch (compared to the one which does a unified search algorithm) worth all of it (even if it is still so after bringing in some missing bits)? --
Rémi Bernon [email protected]
On 8/21/20 01:17, Rémi Bernon wrote:
On 2020-08-21 00:03, Paul Gofman wrote:
On 8/21/20 00:47, Rémi Bernon wrote:
I was thinking that we could do something like in the attached patches, where if the mapping in reserved areas failed in bottom up alloc, we reserve more memory and try again until it succeeds.
I think it's simpler and the existing code already handles the reservation with bisecting steps nicely. Wouldn't that work?
Of course this will cause some more memory to be definitively reserved for Windows, but that will then also be used as needed by following allocations.
Alternatively we could release the over-reserved memory every time, but that also means we will have to reserve again on next allocations.
I guess something like that can work, but I don't understand how playing harder with reserve areas is simpler or less hacky. But maybe its only me.
What will happen if there is a native mapping close to the previous reserve area end and reserve_area() will shrink its size below the size we need? Anyway, that can be solved some way by introducing a smarter search probably between the lines of existing search in try_map_free_area. And yeah, not immediately clear to me also if the reserved areas are needed to given back or not.
What I can't understand yet, how it is simpler or better to increase those free areas each time we run out of space with them (top down allocs are rare, and there is no "don't care" flag). It IMO further twists the "free area" - "reserved area" allocation duality.
Well the free ranges were mostly meant as an optimization over iterating the view tree, and they only track free addresses from the Windows point of view. The reserved area, are the addresses that Wine has reserved for its Windows usage, and that are so known not to be used by the system.
Yes, but it was previously implied that they can be reserved once at start. They apparently were not meant to reserve all space for Wine VM allocations, probably they are needed to make address conflicts for that non-relocatable modules less likely.
For me that works in the same way an allocator reserves larger blocks from the system virtual memory allocator, and then splits them in small blocks depending on the allocation requests that it will give to its clients.
In a way, yes. But it is not clear to me which exactly benefit do you see in reserving an extra space in advance? And doing it in a way that now we will have distinct top down and bottom up areas?
Do you have concerns about the performance? Will it help if I send a test program with some test data extracted from a +virtual log with a game run which was previously hitting a problematic case? I also found DayZ Server very good for testing as it allocates an insane amount of small virtual memory chunks on startup.
BTW I doubt anything like that with hard distinction between top-down and bottom up areas can work for x86 where we already face VM exhaustion in many cases. It, again, can probably be solved by adding some heuristics and better handling "first chance" out of memory cases or going from top down / bottom up distinction to something neater, but it won't make the thing simpler. Or it should be a separate path for x64 only, which is also not simplifying things.
On 2020-08-21 00:50, Paul Gofman wrote:
On 8/21/20 01:17, Rémi Bernon wrote:
On 2020-08-21 00:03, Paul Gofman wrote:
On 8/21/20 00:47, Rémi Bernon wrote:
I was thinking that we could do something like in the attached patches, where if the mapping in reserved areas failed in bottom up alloc, we reserve more memory and try again until it succeeds.
I think it's simpler and the existing code already handles the reservation with bisecting steps nicely. Wouldn't that work?
Of course this will cause some more memory to be definitively reserved for Windows, but that will then also be used as needed by following allocations.
Alternatively we could release the over-reserved memory every time, but that also means we will have to reserve again on next allocations.
I guess something like that can work, but I don't understand how playing harder with reserve areas is simpler or less hacky. But maybe its only me.
What will happen if there is a native mapping close to the previous reserve area end and reserve_area() will shrink its size below the size we need? Anyway, that can be solved some way by introducing a smarter search probably between the lines of existing search in try_map_free_area. And yeah, not immediately clear to me also if the reserved areas are needed to given back or not.
What I can't understand yet, how it is simpler or better to increase those free areas each time we run out of space with them (top down allocs are rare, and there is no "don't care" flag). It IMO further twists the "free area" - "reserved area" allocation duality.
Well the free ranges were mostly meant as an optimization over iterating the view tree, and they only track free addresses from the Windows point of view. The reserved area, are the addresses that Wine has reserved for its Windows usage, and that are so known not to be used by the system.
Yes, but it was previously implied that they can be reserved once at start. They apparently were not meant to reserve all space for Wine VM allocations, probably they are needed to make address conflicts for that non-relocatable modules less likely.
For me that works in the same way an allocator reserves larger blocks from the system virtual memory allocator, and then splits them in small blocks depending on the allocation requests that it will give to its clients.
In a way, yes. But it is not clear to me which exactly benefit do you see in reserving an extra space in advance? And doing it in a way that now we will have distinct top down and bottom up areas?
FWIW, the logic could be extended to the top down allocations in a similar way, and completely replace the non-reserved allocation case. Maybe that could even simplify the code?
Do you have concerns about the performance? Will it help if I send a test program with some test data extracted from a +virtual log with a game run which was previously hitting a problematic case? I also found DayZ Server very good for testing as it allocates an insane amount of small virtual memory chunks on startup.
BTW I doubt anything like that with hard distinction between top-down and bottom up areas can work for x86 where we already face VM exhaustion in many cases. It, again, can probably be solved by adding some heuristics and better handling "first chance" out of memory cases or going from top down / bottom up distinction to something neater, but it won't make the thing simpler. Or it should be a separate path for x64 only, which is also not simplifying things.
Yes, I added that as a quick way to prevent allocations from succeeding in the top reserved areas if it was supposed to be bottom up, but the idea isn't to prevent it if it is the only free memory region.
That could even be an issue as the code will try forever until it succeeds, or exhaust memory, which isn't going to end well in that case.
I thought of limiting the number of retries, or it could be x64 specific indeed -- IIRC the problem with high pointers doesn't happen on 32bit anyway.
On 8/21/20 09:06, Rémi Bernon wrote:
On 2020-08-21 00:50, Paul Gofman wrote:
On 8/21/20 01:17, Rémi Bernon wrote:
On 2020-08-21 00:03, Paul Gofman wrote:
On 8/21/20 00:47, Rémi Bernon wrote:
I was thinking that we could do something like in the attached patches, where if the mapping in reserved areas failed in bottom up alloc, we reserve more memory and try again until it succeeds.
I think it's simpler and the existing code already handles the reservation with bisecting steps nicely. Wouldn't that work?
Of course this will cause some more memory to be definitively reserved for Windows, but that will then also be used as needed by following allocations.
Alternatively we could release the over-reserved memory every time, but that also means we will have to reserve again on next allocations.
I guess something like that can work, but I don't understand how playing harder with reserve areas is simpler or less hacky. But maybe its only me.
What will happen if there is a native mapping close to the previous reserve area end and reserve_area() will shrink its size below the size we need? Anyway, that can be solved some way by introducing a smarter search probably between the lines of existing search in try_map_free_area. And yeah, not immediately clear to me also if the reserved areas are needed to given back or not.
What I can't understand yet, how it is simpler or better to increase those free areas each time we run out of space with them (top down allocs are rare, and there is no "don't care" flag). It IMO further twists the "free area" - "reserved area" allocation duality.
Well the free ranges were mostly meant as an optimization over iterating the view tree, and they only track free addresses from the Windows point of view. The reserved area, are the addresses that Wine has reserved for its Windows usage, and that are so known not to be used by the system.
Yes, but it was previously implied that they can be reserved once at start. They apparently were not meant to reserve all space for Wine VM allocations, probably they are needed to make address conflicts for that non-relocatable modules less likely.
For me that works in the same way an allocator reserves larger blocks from the system virtual memory allocator, and then splits them in small blocks depending on the allocation requests that it will give to its clients.
In a way, yes. But it is not clear to me which exactly benefit do you see in reserving an extra space in advance? And doing it in a way that now we will have distinct top down and bottom up areas?
FWIW, the logic could be extended to the top down allocations in a similar way, and completely replace the non-reserved allocation case. Maybe that could even simplify the code?
Do you have concerns about the performance? Will it help if I send a test program with some test data extracted from a +virtual log with a game run which was previously hitting a problematic case? I also found DayZ Server very good for testing as it allocates an insane amount of small virtual memory chunks on startup.
BTW I doubt anything like that with hard distinction between top-down and bottom up areas can work for x86 where we already face VM exhaustion in many cases. It, again, can probably be solved by adding some heuristics and better handling "first chance" out of memory cases or going from top down / bottom up distinction to something neater, but it won't make the thing simpler. Or it should be a separate path for x64 only, which is also not simplifying things.
Yes, I added that as a quick way to prevent allocations from succeeding in the top reserved areas if it was supposed to be bottom up, but the idea isn't to prevent it if it is the only free memory region.
That could even be an issue as the code will try forever until it succeeds, or exhaust memory, which isn't going to end well in that case.
I thought of limiting the number of retries, or it could be x64 specific indeed -- IIRC the problem with high pointers doesn't happen on 32bit anyway.
If ordered allocations are preferred through reserved areas, why then the allocations for nonzero zero_bits parameter are done the other way? Should not we start then from getting rid of existing code searching the areas between the views with try_mmap_free_area() in favor of dynamically extending reserved areas? I still think it is easier and more straightforward to rather optimize the implemented zero_bits path and extend that for all the allocations which I was essentially trying to do in my patches.
Signed-off-by: Paul Gofman [email protected] --- dlls/ntdll/unix/virtual.c | 134 ++++++++++++++++++++++++++++++-------- 1 file changed, 106 insertions(+), 28 deletions(-)
diff --git a/dlls/ntdll/unix/virtual.c b/dlls/ntdll/unix/virtual.c index 5d626ee85c4..43c149094d7 100644 --- a/dlls/ntdll/unix/virtual.c +++ b/dlls/ntdll/unix/virtual.c @@ -99,6 +99,7 @@ struct file_view #define VPROT_WRITEWATCH 0x40 /* per-mapping protection flags */ #define VPROT_SYSTEM 0x0200 /* system view (underlying mmap not under our control) */ +#define VPROT_NATIVE 0x0400
/* Conversion from VPROT_* to Win32 flags */ static const BYTE VIRTUAL_Win32Flags[16] = @@ -824,7 +825,9 @@ static void dump_view( struct file_view *view ) BYTE prot = get_page_vprot( addr );
TRACE( "View: %p - %p", addr, addr + view->size - 1 ); - if (view->protect & VPROT_SYSTEM) + if (view->protect & VPROT_NATIVE) + TRACE(" (native)\n"); + else if (view->protect & VPROT_SYSTEM) TRACE( " (builtin image)\n" ); else if (view->protect & SEC_IMAGE) TRACE( " (image)\n" ); @@ -966,6 +969,16 @@ static struct file_view *find_view_range( const void *addr, size_t size ) return NULL; }
+struct alloc_area +{ + char *map_area_start, *map_area_end, *result; + size_t size; + ptrdiff_t step; + int unix_prot; + BOOL top_down; + char *native_mapped; + size_t native_mapped_size; +};
/*********************************************************************** * try_map_free_area @@ -973,26 +986,34 @@ static struct file_view *find_view_range( const void *addr, size_t size ) * Try mmaping some expected free memory region, eventually stepping and * retrying inside it, and return where it actually succeeded, or NULL. */ -static void* try_map_free_area( void *base, void *end, ptrdiff_t step, - void *start, size_t size, int unix_prot ) +static void* try_map_free_area( struct alloc_area *area, void *base, void *end, void *start ) { + ptrdiff_t step = area->step; void *ptr;
- while (start && base <= start && (char*)start + size <= (char*)end) + while (start && base <= start && (char*)start + area->size <= (char*)end) { - if ((ptr = wine_anon_mmap( start, size, unix_prot, MAP_FIXED_NOREPLACE )) == start) + if ((ptr = wine_anon_mmap( start, area->size, area->unix_prot, + MAP_FIXED_NOREPLACE )) == start) return start; TRACE( "Found free area is already mapped, start %p.\n", start );
if (ptr == (void *)-1 && errno != EEXIST) { ERR( "wine_anon_mmap() error %s, range %p-%p, unix_prot %#x.\n", - strerror(errno), start, (char *)start + size, unix_prot ); + strerror(errno), start, (char *)start + area->size, area->unix_prot ); return NULL; }
if (ptr != (void *)-1) - munmap( ptr, size ); + munmap( ptr, area->size ); + + if (!area->native_mapped && step) + { + area->native_mapped = start; + area->native_mapped_size = step > 0 ? step : -step; + area->native_mapped_size = min(area->native_mapped_size, (char *)end - (char *)start); + }
if ((step > 0 && (char *)end - (char *)start < step) || (step < 0 && (char *)start - (char *)base < -step) || @@ -1446,15 +1467,6 @@ static inline void *unmap_extra_space( void *ptr, size_t total_size, size_t want return ptr; }
-struct alloc_area -{ - char *map_area_start, *map_area_end, *result; - size_t size; - ptrdiff_t step; - int unix_prot; - BOOL top_down; -}; - static int CDECL alloc_area_in_reserved_or_between_callback( void *start, SIZE_T size, void *arg ) { char *intersect_start, *intersect_end; @@ -1480,8 +1492,8 @@ static int CDECL alloc_area_in_reserved_or_between_callback( void *start, SIZE_T
if (alloc_start >= intersect_end) { - if ((area->result = try_map_free_area( area->map_area_start, alloc_start + size, area->step, - alloc_start, area->size, area->unix_prot ))) + if ((area->result = try_map_free_area( area, area->map_area_start, + alloc_start + size, alloc_start ))) return 1; }
@@ -1515,8 +1527,8 @@ static int CDECL alloc_area_in_reserved_or_between_callback( void *start, SIZE_T
if (intersect_start - area->map_area_start >= area->size) { - if ((area->result = try_map_free_area( area->map_area_start, intersect_start, area->step, - area->map_area_start, area->size, area->unix_prot ))) + if ((area->result = try_map_free_area( area, area->map_area_start, + intersect_start, area->map_area_start ))) return 1; }
@@ -1571,8 +1583,7 @@ static void *alloc_free_area_in_range( struct alloc_area *area, char *base, char if (start >= area->map_area_end || start < area->map_area_start) return NULL;
- return try_map_free_area( area->map_area_start, start + area->size, area->step, - start, area->size, area->unix_prot ); + return try_map_free_area( area, area->map_area_start, start + area->size, start ); } else { @@ -1581,8 +1592,7 @@ static void *alloc_free_area_in_range( struct alloc_area *area, char *base, char || area->map_area_end - start < area->size) return NULL;
- return try_map_free_area( start, area->map_area_end, area->step, - start, area->size, area->unix_prot ); + return try_map_free_area( area, start, area->map_area_end, start ); } }
@@ -1592,6 +1602,7 @@ static void *alloc_free_area( void *limit, size_t size, BOOL top_down, int unix_ char *reserve_start, *reserve_end; struct alloc_area area; char *base, *end; + NTSTATUS status; int ranges_inc;
TRACE("limit %p, size %p, top_down %#x.\n", limit, (void *)size, top_down); @@ -1647,16 +1658,67 @@ static void *alloc_free_area( void *limit, size_t size, BOOL top_down, int unix_ { /* range is split in two by the preloader reservation, try first part. */ if ((area.result = alloc_free_area_in_range( &area, base, reserve_start ))) - return area.result; + break; /* then fall through to try second part. */ base = reserve_end; } }
if ((area.result = alloc_free_area_in_range( &area, base, end ))) - return area.result; + break; } - return NULL; + + if (area.native_mapped) + { + char *native_mapped_start, *native_mapped_end; + + TRACE("Excluding %p - %p from free list.\n", + area.native_mapped, (char *)area.native_mapped + area.native_mapped_size ); + + native_mapped_start = ROUND_ADDR(area.native_mapped, granularity_mask); + native_mapped_end = ROUND_ADDR((char *)area.native_mapped + area.native_mapped_size + granularity_mask, + granularity_mask); + + if (area.result >= native_mapped_end || area.result + size < native_mapped_start) + /* In case of top down allocation try_map_free_area() result area can overlap the + * area previously marked as native if the latter was unmapped behind our back. */ + { + struct file_view *prev, *next; + + prev = find_view_range( native_mapped_start - 1, native_mapped_end - native_mapped_start + 2 ); + if (prev && (char *)prev->base >= native_mapped_end) + { + next = prev; + prev = WINE_RB_ENTRY_VALUE( wine_rb_prev( &next->entry ), struct file_view, entry ); + } + else if (prev) + { + next = WINE_RB_ENTRY_VALUE( wine_rb_next( &prev->entry ), struct file_view, entry ); + } + else + { + next = NULL; + } + + if (prev && prev->protect & VPROT_NATIVE && (char *)prev->base + prev->size >= native_mapped_start) + { + assert( (char *)prev->base + prev->size == native_mapped_start ); + native_mapped_start = prev->base; + delete_view( prev ); + } + if (next && next->protect & VPROT_NATIVE && native_mapped_end >= (char *)next->base) + { + assert( native_mapped_end == (char *)next->base ); + native_mapped_end = (char *)next->base + next->size; + delete_view( next ); + } + if ((status = create_view( &next, native_mapped_start, native_mapped_end - native_mapped_start, + VPROT_SYSTEM | VPROT_NATIVE ))) + ERR("Could not cretae view for natively mapped area, status %#x.\n", status); + } + } + + return area.result; }
/*********************************************************************** @@ -1716,6 +1778,17 @@ static NTSTATUS map_fixed_area( void *base, size_t size, unsigned int vprot ) return STATUS_SUCCESS; }
+static void clear_native_views(void) +{ + struct file_view *view, *next_view; + + WINE_RB_FOR_EACH_ENTRY_DESTRUCTOR( view, next_view, &views_tree, struct file_view, entry ) + { + if (view->protect & VPROT_NATIVE) + delete_view( view ); + } +} + /*********************************************************************** * map_view * @@ -1739,7 +1812,12 @@ static NTSTATUS map_view( struct file_view **view_ret, void *base, size_t size, else if (!(ptr = alloc_free_area( (void*)(get_zero_bits_64_mask( zero_bits_64 ) & (UINT_PTR)user_space_limit), size, top_down, get_unix_prot( vprot ) ))) { - return STATUS_NO_MEMORY; + WARN("Allocation failed, clearing native views.\n"); + + clear_native_views(); + if (!(ptr = alloc_free_area( (void*)(get_zero_bits_64_mask( zero_bits_64 ) + & (UINT_PTR)user_space_limit), size, top_down, get_unix_prot( vprot ) ))) + return STATUS_NO_MEMORY; } status = create_view( view_ret, ptr, size, vprot ); if (status != STATUS_SUCCESS) unmap_area( ptr, size );
On 2020-07-23 18:32, Paul Gofman wrote:
Signed-off-by: Paul Gofman [email protected]
dlls/ntdll/unix/virtual.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/dlls/ntdll/unix/virtual.c b/dlls/ntdll/unix/virtual.c index 883e5cff927..7b33b3e82ac 100644 --- a/dlls/ntdll/unix/virtual.c +++ b/dlls/ntdll/unix/virtual.c @@ -1033,6 +1033,7 @@ static void* try_map_free_area( void *base, void *end, ptrdiff_t step, step == 0) break; start = (char *)start + step;
step *= 2; } return NULL;
Could we use mincore as a better search heuristic, like what was suggested before, instead of this which doesn't feel very right?
On 8/6/20 18:22, Rémi Bernon wrote:
On 2020-07-23 18:32, Paul Gofman wrote:
Signed-off-by: Paul Gofman [email protected]
dlls/ntdll/unix/virtual.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/dlls/ntdll/unix/virtual.c b/dlls/ntdll/unix/virtual.c index 883e5cff927..7b33b3e82ac 100644 --- a/dlls/ntdll/unix/virtual.c +++ b/dlls/ntdll/unix/virtual.c @@ -1033,6 +1033,7 @@ static void* try_map_free_area( void *base, void *end, ptrdiff_t step, step == 0) break; start = (char *)start + step; + step *= 2; } return NULL;
Could we use mincore as a better search heuristic, like what was suggested before, instead of this which doesn't feel very right?
I though of that and even experimented with mincore() a bit at some moment.
First of all, the information that we care for from mincore() is its return status, which tells us if there is any page mapped in the requested range or not. So we can't use it like, e. g., query the range and understand which is the last page mapped to jump accordingly. We can only know if there is some page mapped in the range. Which of course still makes possible to query the memory range which is quicker than performing mmap() with OS chosen address, but that is the case on non-Linux archs only where we don't have MMAP_FIXED_NOREPLACE flag. We could potentially introduce some algorithm for probing parts of the (already failed) mmap range to set the step in a more accurate way, but that will cost, as mincore() is also taking some time in kernel and involves VMA read locking. So it looks like whatever we do here, it won't work as a sole measure to solve the performance issues with native mapping conflicts. The major performance impact circumvention is the last patch in the series. Yet some measurable slowdown was observed while crawling through long native mapped area (before they all will end up marked). Increasing the step is effective and very simple solution.
Do you feel like I am missing some sensible way to use mincore() to this part better?