[PATCH 3/4] ntdll: Force virtual memory allocation order.

Rémi Bernon rbernon at codeweavers.com
Thu Aug 6 10:42:39 CDT 2020


On 2020-07-23 18:32, Paul Gofman wrote:
> Windows allocates virtual memory strictly bottom up or
> top down depending on the requested flags. Modern Linux
> VM allocator always allocates memory top down. Some
> applications break if the allocated memory addresses
> are from higher memory than they expect.
> 
> Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=48175
> Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=46568
> Signed-off-by: Paul Gofman <pgofman at codeweavers.com>
> ---
>   dlls/ntdll/unix/virtual.c | 417 ++++++++++++++++++--------------------
>   1 file changed, 201 insertions(+), 216 deletions(-)
> 
> diff --git a/dlls/ntdll/unix/virtual.c b/dlls/ntdll/unix/virtual.c
> index 13775d654bd..5d626ee85c4 100644
> --- a/dlls/ntdll/unix/virtual.c
> +++ b/dlls/ntdll/unix/virtual.c
> @@ -967,44 +967,6 @@ static struct file_view *find_view_range( const void *addr, size_t size )
>   }
>   
>   
> -/***********************************************************************
> - *           find_view_inside_range
> - *
> - * Find first (resp. last, if top_down) view inside a range.
> - * virtual_mutex must be held by caller.
> - */
> -static struct wine_rb_entry *find_view_inside_range( void **base_ptr, void **end_ptr, int top_down )
> -{
> -    struct wine_rb_entry *first = NULL, *ptr = views_tree.root;
> -    void *base = *base_ptr, *end = *end_ptr;
> -
> -    /* find the first (resp. last) view inside the range */
> -    while (ptr)
> -    {
> -        struct file_view *view = WINE_RB_ENTRY_VALUE( ptr, struct file_view, entry );
> -        if ((char *)view->base + view->size >= (char *)end)
> -        {
> -            end = min( end, view->base );
> -            ptr = ptr->left;
> -        }
> -        else if (view->base <= base)
> -        {
> -            base = max( (char *)base, (char *)view->base + view->size );
> -            ptr = ptr->right;
> -        }
> -        else
> -        {
> -            first = ptr;
> -            ptr = top_down ? ptr->right : ptr->left;
> -        }
> -    }
> -
> -    *base_ptr = base;
> -    *end_ptr = end;
> -    return first;
> -}
> -
> -
>   /***********************************************************************
>    *           try_map_free_area
>    *
> @@ -1043,110 +1005,6 @@ static void* try_map_free_area( void *base, void *end, ptrdiff_t step,
>       return NULL;
>   }
>   
> -
> -/***********************************************************************
> - *           map_free_area
> - *
> - * Find a free area between views inside the specified range and map it.
> - * virtual_mutex must be held by caller.
> - */
> -static void *map_free_area( void *base, void *end, size_t size, int top_down, int unix_prot )
> -{
> -    struct wine_rb_entry *first = find_view_inside_range( &base, &end, top_down );
> -    ptrdiff_t step = top_down ? -(granularity_mask + 1) : (granularity_mask + 1);
> -    void *start;
> -
> -    if (top_down)
> -    {
> -        start = ROUND_ADDR( (char *)end - size, granularity_mask );
> -        if (start >= end || start < base) return NULL;
> -
> -        while (first)
> -        {
> -            struct file_view *view = WINE_RB_ENTRY_VALUE( first, struct file_view, entry );
> -            if ((start = try_map_free_area( (char *)view->base + view->size, (char *)start + size, step,
> -                                            start, size, unix_prot ))) break;
> -            start = ROUND_ADDR( (char *)view->base - size, granularity_mask );
> -            /* stop if remaining space is not large enough */
> -            if (!start || start >= end || start < base) return NULL;
> -            first = wine_rb_prev( first );
> -        }
> -    }
> -    else
> -    {
> -        start = ROUND_ADDR( (char *)base + granularity_mask, granularity_mask );
> -        if (!start || start >= end || (char *)end - (char *)start < size) return NULL;
> -
> -        while (first)
> -        {
> -            struct file_view *view = WINE_RB_ENTRY_VALUE( first, struct file_view, entry );
> -            if ((start = try_map_free_area( start, view->base, step,
> -                                            start, size, unix_prot ))) break;
> -            start = ROUND_ADDR( (char *)view->base + view->size + granularity_mask, granularity_mask );
> -            /* stop if remaining space is not large enough */
> -            if (!start || start >= end || (char *)end - (char *)start < size) return NULL;
> -            first = wine_rb_next( first );
> -        }
> -    }
> -
> -    if (!first)
> -        return try_map_free_area( base, end, step, start, size, unix_prot );
> -
> -    return start;
> -}
> -
> -
> -/***********************************************************************
> - *           find_reserved_free_area
> - *
> - * Find a free area between views inside the specified range.
> - * virtual_mutex must be held by caller.
> - * The range must be inside the preloader reserved range.
> - */
> -static void *find_reserved_free_area( void *base, void *end, size_t size, int top_down )
> -{
> -    struct range_entry *range;
> -    void *start;
> -
> -    base = ROUND_ADDR( (char *)base + granularity_mask, granularity_mask );
> -    end = (char *)ROUND_ADDR( (char *)end - size, granularity_mask ) + size;
> -
> -    if (top_down)
> -    {
> -        start = (char *)end - size;
> -        range = free_ranges_lower_bound( start );
> -        assert(range != free_ranges_end && range->end >= start);
> -
> -        if ((char *)range->end - (char *)start < size) start = ROUND_ADDR( (char *)range->end - size, granularity_mask );
> -        do
> -        {
> -            if (start >= end || start < base || (char *)end - (char *)start < size) return NULL;
> -            if (start < range->end && start >= range->base && (char *)range->end - (char *)start >= size) break;
> -            if (--range < free_ranges) return NULL;
> -            start = ROUND_ADDR( (char *)range->end - size, granularity_mask );
> -        }
> -        while (1);
> -    }
> -    else
> -    {
> -        start = base;
> -        range = free_ranges_lower_bound( start );
> -        assert(range != free_ranges_end && range->end >= start);
> -
> -        if (start < range->base) start = ROUND_ADDR( (char *)range->base + granularity_mask, granularity_mask );
> -        do
> -        {
> -            if (start >= end || start < base || (char *)end - (char *)start < size) return NULL;
> -            if (start < range->end && start >= range->base && (char *)range->end - (char *)start >= size) break;
> -            if (++range == free_ranges_end) return NULL;
> -            start = ROUND_ADDR( (char *)range->base + granularity_mask, granularity_mask );
> -        }
> -        while (1);
> -    }
> -    return start;
> -}
> -
> -
>   /***********************************************************************
>    *           add_reserved_area
>    *
> @@ -1315,8 +1173,7 @@ static void delete_view( struct file_view *view ) /* [in] View */
>   {
>       if (!(view->protect & VPROT_SYSTEM)) unmap_area( view->base, view->size );
>       set_page_vprot( view->base, view->size, 0 );
> -    if (mmap_is_in_reserved_area( view->base, view->size ))
> -        free_ranges_remove_view( view );
> +    free_ranges_remove_view( view );
>       wine_rb_remove( &views_tree, &view->entry );
>       *(struct file_view **)view = next_free_view;
>       next_free_view = view;
> @@ -1364,8 +1221,7 @@ static NTSTATUS create_view( struct file_view **view_ret, void *base, size_t siz
>       set_page_vprot( base, size, vprot );
>   
>       wine_rb_put( &views_tree, view->base, &view->entry );
> -    if (mmap_is_in_reserved_area( view->base, view->size ))
> -        free_ranges_insert_view( view );
> +    free_ranges_insert_view( view );
>   
>       *view_ret = view;
>   
> @@ -1590,54 +1446,219 @@ static inline void *unmap_extra_space( void *ptr, size_t total_size, size_t want
>       return ptr;
>   }
>   
> -
>   struct alloc_area
>   {
> +    char *map_area_start, *map_area_end, *result;
>       size_t size;
> -    int    top_down;
> -    void  *limit;
> -    void  *result;
> +    ptrdiff_t step;
> +    int unix_prot;
> +    BOOL top_down;
>   };
>   
> -/***********************************************************************
> - *           alloc_reserved_area_callback
> - *
> - * Try to map some space inside a reserved area. Callback for mmap_enum_reserved_areas.
> - */
> -static int CDECL alloc_reserved_area_callback( void *start, SIZE_T size, void *arg )
> +static int CDECL alloc_area_in_reserved_or_between_callback( void *start, SIZE_T size, void *arg )
>   {
> -    struct alloc_area *alloc = arg;
> -    void *end = (char *)start + size;
> -
> -    if (start < address_space_start) start = address_space_start;
> -    if (is_beyond_limit( start, size, alloc->limit )) end = alloc->limit;
> -    if (start >= end) return 0;
> +    char *intersect_start, *intersect_end;
> +    char *end = (char *)start + size;
> +    struct alloc_area *area = arg;
> +    char *alloc_start;
>   
> -    /* make sure we don't touch the preloader reserved range */
> -    if (preload_reserve_end >= start)
> +    if (area->top_down)
>       {
> -        if (preload_reserve_end >= end)
> +        if (area->map_area_start >= end)
> +            return 1;
> +
> +        if (area->map_area_end <= (char *)start)
> +            return 0;
> +
> +        intersect_start = max((char *)start, area->map_area_start);
> +        intersect_end = min((char *)end, area->map_area_end);
> +
> +        assert(ROUND_ADDR(intersect_start, granularity_mask) == intersect_start);
> +        assert(ROUND_ADDR(intersect_end + granularity_mask - 1, granularity_mask) == intersect_end);
> +
> +        alloc_start = ROUND_ADDR( (char *)area->map_area_end - size, granularity_mask );
> +
> +        if (alloc_start >= intersect_end)
>           {
> -            if (preload_reserve_start <= start) return 0;  /* no space in that area */
> -            if (preload_reserve_start < end) end = preload_reserve_start;
> +            if ((area->result = try_map_free_area( area->map_area_start, alloc_start + size, area->step,
> +                    alloc_start, area->size, area->unix_prot )))
> +                return 1;
>           }
> -        else if (preload_reserve_start <= start) start = preload_reserve_end;
> -        else
> +
> +        alloc_start = ROUND_ADDR( intersect_end - area->size, granularity_mask );
> +        if (alloc_start >= intersect_start)
>           {
> -            /* range is split in two by the preloader reservation, try first part */
> -            if ((alloc->result = find_reserved_free_area( start, preload_reserve_start, alloc->size,
> -                                                          alloc->top_down )))
> +            if ((area->result = wine_anon_mmap( alloc_start, area->size,
> +                    area->unix_prot, MAP_FIXED )) != alloc_start)
> +                ERR("Could not map in reserved area, alloc_start %p, size %p.\n",
> +                        alloc_start, (void *)area->size);
> +            return 1;
> +        }
> +
> +        area->map_area_end = intersect_start;
> +        if (area->map_area_end - area->map_area_start < area->size)
> +            return 1;
> +    }
> +    else
> +    {
> +        if (area->map_area_end <= (char *)start)
> +            return 1;
> +
> +        if (area->map_area_start >= (char *)end)
> +            return 0;
> +
> +        intersect_start = max((char *)start, area->map_area_start);
> +        intersect_end = min((char *)end, area->map_area_end);
> +
> +        assert(ROUND_ADDR(intersect_start, granularity_mask) == intersect_start);
> +        assert(ROUND_ADDR(intersect_end + granularity_mask - 1, granularity_mask) == intersect_end);
> +
> +        if (intersect_start - area->map_area_start >= area->size)
> +        {
> +            if ((area->result = try_map_free_area( area->map_area_start, intersect_start, area->step,
> +                    area->map_area_start, area->size, area->unix_prot )))
>                   return 1;
> -            /* then fall through to try second part */
> -            start = preload_reserve_end;
>           }
> +
> +        if (intersect_end - intersect_start >= area->size)
> +        {
> +            if ((area->result = wine_anon_mmap( intersect_start, area->size, area->unix_prot, MAP_FIXED ))
> +                    != intersect_start)
> +                ERR("Could not map in reserved area.\n");
> +            return 1;
> +        }
> +        area->map_area_start = intersect_end;
> +        if (area->map_area_end - area->map_area_start < area->size)
> +            return 1;
>       }
> -    if ((alloc->result = find_reserved_free_area( start, end, alloc->size, alloc->top_down )))
> -        return 1;
>   
>       return 0;
>   }
>   
> +static void *alloc_free_area_in_range( struct alloc_area *area, char *base, char *end )
> +{
> +    char *start;
> +
> +    TRACE("range %p-%p.\n", base, end);
> +
> +    if (base >= end)
> +        return NULL;
> +
> +    area->map_area_start = base;
> +    area->map_area_end = end;
> +
> +    if (area->top_down)
> +    {
> +        start = ROUND_ADDR( end - area->size, granularity_mask );
> +        if (start >= end || start < base)
> +            return NULL;
> +    }
> +    else
> +    {
> +        start = ROUND_ADDR( base + granularity_mask, granularity_mask );
> +        if (!start || start >= end || (char *)end - (char *)start < area->size)
> +            return NULL;
> +    }
> +
> +    mmap_enum_reserved_areas( alloc_area_in_reserved_or_between_callback, area, area->top_down );
> +
> +    if (area->result)
> +        return area->result;
> +
> +    if (area->top_down)
> +    {
> +        start = ROUND_ADDR( area->map_area_end - area->size, granularity_mask );
> +        if (start >= area->map_area_end || start < area->map_area_start)
> +            return NULL;
> +
> +        return try_map_free_area( area->map_area_start, start + area->size, area->step,
> +                start, area->size, area->unix_prot );
> +    }
> +    else
> +    {
> +        start = ROUND_ADDR( area->map_area_start + granularity_mask, granularity_mask );
> +        if (!start || start >= area->map_area_end
> +                || area->map_area_end - start < area->size)
> +            return NULL;
> +
> +        return try_map_free_area( start, area->map_area_end, area->step,
> +                start, area->size, area->unix_prot );
> +    }
> +}
> +
> +static void *alloc_free_area( void *limit, size_t size, BOOL top_down, int unix_prot )
> +{
> +    struct range_entry *range, *ranges_start, *ranges_end;
> +    char *reserve_start, *reserve_end;
> +    struct alloc_area area;
> +    char *base, *end;
> +    int ranges_inc;
> +
> +    TRACE("limit %p, size %p, top_down %#x.\n", limit, (void *)size, top_down);
> +
> +    if (top_down)
> +    {
> +        ranges_start = free_ranges_end - 1;
> +        ranges_end = free_ranges - 1;
> +        ranges_inc = -1;
> +    }
> +    else
> +    {
> +        ranges_start = free_ranges;
> +        ranges_end = free_ranges_end;
> +        ranges_inc = 1;
> +    }
> +
> +    memset( &area, 0, sizeof(area) );
> +    area.step = top_down ? -(granularity_mask + 1) : (granularity_mask + 1);
> +    area.size = size;
> +    area.top_down = top_down;
> +    area.unix_prot = unix_prot;
> +
> +    reserve_start = ROUND_ADDR( (char *)preload_reserve_start, granularity_mask );
> +    reserve_end = ROUND_ADDR( (char *)preload_reserve_end + granularity_mask, granularity_mask );
> +
> +    for (range = ranges_start; range != ranges_end; range += ranges_inc)
> +    {
> +        base = range->base;
> +        end = range->end;
> +
> +        TRACE("range %p-%p.\n", base, end);
> +
> +        if (base < (char *)address_space_start)
> +            base = (char *)address_space_start;
> +        if (end > (char *)ROUND_ADDR( limit, granularity_mask ))
> +            end = ROUND_ADDR( limit, granularity_mask );
> +
> +        if (reserve_end >= base)
> +        {
> +            if (reserve_end >= end)
> +            {
> +                if (reserve_start <= base)
> +                    continue;  /* no space in that area */
> +                if (reserve_start < end)
> +                    end = reserve_start;
> +            }
> +            else if (reserve_start <= base)
> +            {
> +                base = reserve_end;
> +            }
> +            else
> +            {
> +                /* range is split in two by the preloader reservation, try first part. */
> +                if ((area.result = alloc_free_area_in_range( &area, base, reserve_start )))
> +                    return area.result;
> +                /* then fall through to try second part. */
> +                base = reserve_end;
> +            }
> +        }
> +
> +        if ((area.result = alloc_free_area_in_range( &area, base, end )))
> +            return area.result;
> +    }
> +    return NULL;
> +}
> +
>   /***********************************************************************
>    *           map_fixed_area
>    *
> @@ -1715,48 +1736,11 @@ static NTSTATUS map_view( struct file_view **view_ret, void *base, size_t size,
>           if (status != STATUS_SUCCESS) return status;
>           ptr = base;
>       }
> -    else
> +    else if (!(ptr = alloc_free_area( (void*)(get_zero_bits_64_mask( zero_bits_64 )
> +            & (UINT_PTR)user_space_limit), size, top_down, get_unix_prot( vprot ) )))
>       {
> -        size_t view_size = size + granularity_mask + 1;
> -        struct alloc_area alloc;
> -
> -        alloc.size = size;
> -        alloc.top_down = top_down;
> -        alloc.limit = (void*)(get_zero_bits_64_mask( zero_bits_64 ) & (UINT_PTR)user_space_limit);
> -
> -        if (mmap_enum_reserved_areas( alloc_reserved_area_callback, &alloc, top_down ))
> -        {
> -            ptr = alloc.result;
> -            TRACE( "got mem in reserved area %p-%p\n", ptr, (char *)ptr + size );
> -            if (wine_anon_mmap( ptr, size, get_unix_prot(vprot), MAP_FIXED ) != ptr)
> -                return STATUS_INVALID_PARAMETER;
> -            goto done;
> -        }
> -
> -        if (zero_bits_64)
> -        {
> -            if (!(ptr = map_free_area( address_space_start, alloc.limit, size,
> -                                       top_down, get_unix_prot(vprot) )))
> -                return STATUS_NO_MEMORY;
> -            TRACE( "got mem with map_free_area %p-%p\n", ptr, (char *)ptr + size );
> -            goto done;
> -        }
> -
> -        for (;;)
> -        {
> -            if ((ptr = wine_anon_mmap( NULL, view_size, get_unix_prot(vprot), 0 )) == (void *)-1)
> -            {
> -                if (errno == ENOMEM) return STATUS_NO_MEMORY;
> -                return STATUS_INVALID_PARAMETER;
> -            }
> -            TRACE( "got mem with anon mmap %p-%p\n", ptr, (char *)ptr + size );
> -            /* if we got something beyond the user limit, unmap it and retry */
> -            if (is_beyond_limit( ptr, view_size, user_space_limit )) add_reserved_area( ptr, view_size );
> -            else break;
> -        }
> -        ptr = unmap_extra_space( ptr, view_size, size );
> +        return STATUS_NO_MEMORY;
>       }
> -done:
>       status = create_view( view_ret, ptr, size, vprot );
>       if (status != STATUS_SUCCESS) unmap_area( ptr, size );
>       return status;
> @@ -2392,6 +2376,7 @@ void virtual_init(void)
>               if (preload_reserve_start)
>                   address_space_start = min( address_space_start, preload_reserve_start );
>           }
> +        TRACE("preload reserve %p-%p.\n", preload_reserve_start, preload_reserve_end);
>       }
>   
>       size = teb_size + max( MINSIGSTKSZ, 8192 );
> 

I can understand what this is doing (extend the free ranges tracking 
over the whole address space, and merge all the code paths together), 
but it's a big change all at once.

The free ranges were only tracked within the reserved areas mostly 
because it was only useful there, but also because the mmap smaller 
alignment was causing a lot of fragmentation in an initial 
implementation. Now that we align all views and ranges to 64k I don't 
think it would fragment that much anymore so it could probably be done 
separately. And I think it would still be interesting to gather some 
statistics to see how many ranges and holes we usually have, just to 
check that it's not going crazy. FWIW, there a patch to print that 
information on warn+virtual in Proton:

   198045357c31d0846bab6c8ac388aa1b914c9cf0

About the rest, wouldn't it be possible to just reserve some more pages 
with reserve_area where we expect some free memory to be, and if we are 
out of reserved space, then use the existing code to allocate the views 
within the newly reserved space. Of course it would possibly cause more 
memory to be reserved for Wine and less to be available to system, I'm 
not sure if we can mitigate that somehow.
-- 
Rémi Bernon <rbernon at codeweavers.com>



More information about the wine-devel mailing list