[PATCH v4 05/10] loader: Don't clobber existing memory mappings when reserving addresses.

Jinoh Kang jinoh.kang.kr at gmail.com
Fri Jan 28 12:40:38 CST 2022


The main role of the preloader is to reserve specific virtual memory
address ranges used for special purposes on Windows, before the actual
Wine loader could be loaded.

It achieves this goal via the following process:

1. It eliminates future allocations of addresses in the reserved ranges.
   Specifically, it issues a series of mmap() calls with PROT_NONE
   protection to reserve those ranges, so that the OS won't allocate any
   of the reserved addresses for other users (i.e. Unix system
   libraries).

2. It eliminates current references of addresses in the reserved ranges.
   Specifically, if the vDSO had occupied one of the reserved ranges,
   the preloader removes it from the auxiliary vector (AT_SYSINFO*).

3. If (2) is not possible because the address is in use (e.g. current
   call stack), it gives up reservation and removes the reserved range
   from preload_info.

Each virtual memory area (VMA) is treated as follows when it overlaps
with Wine's reserved address ranges.

a. Preloader code/data: (1)
b. vDSO: (1), then (2)
c. Stack: (3)

There are a few issues with this approach:

i.   VMAs are forcibly overwritten even if they are already allocated.
     This is unnecessary for already mapped VMAs as long as (2) is
     performed, since the OS won't reuse addresses from existing VMAs
     for future allocations anyway.  The only step required for such
     VMAs are either (2) or (3), not (1).

ii.  (1) irrevocably overwrites some useful preexisting useful VMAs
     such as vDSO and vvar.  The vDSO can be relocated in newer versions
     of Linux kernel, so it's more useful to keep it.  To do so,
     however, we have to first allocate a _new_ address for such VMAs.
     This is possible only after filling all the other reserved ranges;
     otherwise, the OS might allocate a to-be-reserved address for us.

     Therefore, there needs to be a way to mmap()-fill all unallocated
     regions inside the reserved ranges, while still keeping existing
     VMAs intact.

iii. Only (c) receives the special treatment of not being overwritten by
     PROT_NONE allocation from (1).  Theoretically other VMAs that are
     in active use such as (a) shall receive the equal treatment anyway.

Fix this by reading /proc/self/maps for existing VMAs, and splitting
mmap() calls to avoid erasing existing memory mappings.

Note that MAP_FIXED_NOREPLACE is not suitable for this kind of job:
it fails entirely if there exist *any* overlapping memory mappings.

Signed-off-by: Jinoh Kang <jinoh.kang.kr at gmail.com>
---

Notes:
    v1 -> v2:
    - linebuffer_init()
      - add comment on subtracting 1 from ->limit
    - linebuffer_getline()
      - fix typo in memmove size
    - parse_maps_line()
      - use -page_size for max page address instad of ULONG_MAX
    - lookup_vma_entry()
      - skip forward if mid->end == address in binary search
    - free_vma_list()
      - use NULL instead of 0
    - alloc_scan_vma()
      - use page_size instead of hard-coded 4096
      - use -1 instead of MAP_FAILED macro for consistency
    - map_reserve_preload_ranges()
      - compute exclude region from stackarg_info instead of directly receiving it
      - make agnostic to pointer comparison signedness
      - make explicit one-byte padding before and after padding region
      - handle potential off-by-one overflow
    
    v3 -> v4:
    - edit commit message
    - add comments and documentation
    - rename linebuffer "overflow" field to "truncated"
    - remove redundant "delim" parameter from linebuffer_getline
    - make const the "line" argument of parse_maps_line
    - slightly rewrite insert_vma_entry

 loader/preloader.c | 392 ++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 369 insertions(+), 23 deletions(-)

diff --git a/loader/preloader.c b/loader/preloader.c
index 14ab42c2ffc..50d97a3984e 100644
--- a/loader/preloader.c
+++ b/loader/preloader.c
@@ -184,6 +184,31 @@ struct preloader_state
     struct stackarg_info s;
 };
 
+struct linebuffer
+{
+    char *base;
+    char *limit;
+    char *head;
+    char *tail;
+    int truncated;
+};
+
+struct vma_area
+{
+    unsigned long start;
+    unsigned long end;
+};
+
+struct vma_area_list
+{
+    struct vma_area *base;
+    struct vma_area *list_end;
+    struct vma_area *alloc_end;
+};
+
+#define FOREACH_VMA(list, item) \
+    for ((item) = (list)->base; (item) != (list)->list_end; (item)++)
+
 /*
  * The __bb_init_func is an empty function only called when file is
  * compiled with gcc flags "-fprofile-arcs -ftest-coverage".  This
@@ -742,6 +767,17 @@ static inline void *wld_memmove( void *dest, const void *src, size_t len )
     return dest;
 }
 
+static inline void *wld_memchr( const void *mem, int val, size_t len )
+{
+    const unsigned char *ptr = mem, *end = (const unsigned char *)ptr + len;
+
+    for (ptr = mem; ptr != end; ptr++)
+        if (*ptr == (unsigned char)val)
+            return (void *)ptr;
+
+    return NULL;
+}
+
 /*
  * parse_ul - parse an unsigned long number with given radix
  *
@@ -1562,6 +1598,333 @@ static void set_process_name( int argc, char *argv[] )
     for (i = 1; i < argc; i++) argv[i] -= off;
 }
 
+/*
+ * linebuffer_init
+ *
+ * Initialise a linebuffer with the given buffer.
+ */
+static void linebuffer_init( struct linebuffer *lbuf, char *base, size_t len )
+{
+    lbuf->base = base;
+    lbuf->limit = base + (len - 1);  /* extra NULL byte */
+    lbuf->head = base;
+    lbuf->tail = base;
+    lbuf->truncated = 0;
+}
+
+/*
+ * linebuffer_getline
+ *
+ * Retrieve a line from the linebuffer.
+ * If a line is longer than the allocated buffer, then the line is truncated.
+ * In this case, the truncated flag is set to indicate this condition.
+ */
+static char *linebuffer_getline( struct linebuffer *lbuf )
+{
+    char *lnp, *line;
+
+    while ((lnp = wld_memchr( lbuf->tail, '\n', lbuf->head - lbuf->tail )))
+    {
+        line = lbuf->tail;
+        lbuf->tail = lnp + 1;
+        if (!lbuf->truncated)
+        {
+            *lnp = '\0';
+            return line;
+        }
+        lbuf->truncated = 0;
+    }
+
+    if (lbuf->base == lbuf->tail)
+    {
+        if (lbuf->head == lbuf->limit)
+        {
+            line = lbuf->tail;
+            lbuf->tail = lbuf->head;
+            lbuf->truncated = 1;
+            *lbuf->head = '\0';
+            return line;
+        }
+    }
+    else wld_memmove( lbuf->base, lbuf->tail, lbuf->head - lbuf->tail);
+    lbuf->head -= lbuf->tail - lbuf->base;
+    lbuf->tail = lbuf->base;
+
+    return NULL;
+}
+
+/*
+ * parse_maps_line
+ *
+ * Parse an entry from /proc/self/maps file into a vma_area structure.
+ */
+static int parse_maps_line( struct vma_area *entry, const char *line )
+{
+    struct vma_area item = { 0 };
+    char *ptr = (char *)line;
+    int overflow;
+
+    item.start = parse_ul( ptr, &ptr, 16, &overflow );
+    if (overflow) return -1;
+    if (*ptr != '-') fatal_error( "parse error in /proc/self/maps\n" );
+    ptr++;
+
+    item.end = parse_ul( ptr, &ptr, 16, &overflow );
+    if (overflow) item.end = -page_size;
+    if (*ptr != ' ') fatal_error( "parse error in /proc/self/maps\n" );
+    ptr++;
+
+    if (item.start >= item.end) return -1;
+
+    if (*ptr != 'r' && *ptr != '-') fatal_error( "parse error in /proc/self/maps\n" );
+    ptr++;
+    if (*ptr != 'w' && *ptr != '-') fatal_error( "parse error in /proc/self/maps\n" );
+    ptr++;
+    if (*ptr != 'x' && *ptr != '-') fatal_error( "parse error in /proc/self/maps\n" );
+    ptr++;
+    if (*ptr != 's' && *ptr != 'p') fatal_error( "parse error in /proc/self/maps\n" );
+    ptr++;
+    if (*ptr != ' ') fatal_error( "parse error in /proc/self/maps\n" );
+    ptr++;
+
+    parse_ul( ptr, &ptr, 16, NULL );
+    if (*ptr != ' ') fatal_error( "parse error in /proc/self/maps\n" );
+    ptr++;
+
+    parse_ul( ptr, &ptr, 16, NULL );
+    if (*ptr != ':') fatal_error( "parse error in /proc/self/maps\n" );
+    ptr++;
+
+    parse_ul( ptr, &ptr, 16, NULL );
+    if (*ptr != ' ') fatal_error( "parse error in /proc/self/maps\n" );
+    ptr++;
+
+    parse_ul( ptr, &ptr, 16, NULL );
+    if (*ptr != ' ') fatal_error( "parse error in /proc/self/maps\n" );
+    ptr++;
+
+    *entry = item;
+    return 0;
+}
+
+/*
+ * lookup_vma_entry
+ *
+ * Find the first VMA of which end address is greater than the given address.
+ */
+static struct vma_area *lookup_vma_entry( const struct vma_area_list *list, unsigned long address )
+{
+    const struct vma_area *left = list->base, *right = list->list_end, *mid;
+    while (left < right)
+    {
+        mid = left + (right - left) / 2;
+        if (mid->end <= address) left = mid + 1;
+        else right = mid;
+    }
+    return (struct vma_area *)left;
+}
+
+/*
+ * map_reserve_range
+ *
+ * Reserve the specified address range.
+ * If there are any existing VMAs in the range, they are replaced.
+ */
+static int map_reserve_range( void *addr, size_t size )
+{
+    if (addr == (void *)-1 ||
+        wld_mmap( addr, size, PROT_NONE,
+                  MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0) != addr)
+        return -1;
+    return 0;
+}
+
+/*
+ * map_reserve_unmapped_range
+ *
+ * Reserve the specified address range excluding already mapped areas.
+ */
+static int map_reserve_unmapped_range( const struct vma_area_list *list, void *addr, size_t size )
+{
+    unsigned long range_start = (unsigned long)addr,
+                  range_end = (unsigned long)addr + size;
+    const struct vma_area *start, *item;
+    unsigned long last_addr = range_start;
+
+    start = lookup_vma_entry( list, range_start );
+    for (item = start; item != list->list_end && item->start < range_end; item++)
+    {
+        if (item->start > last_addr &&
+            map_reserve_range( (void *)last_addr, item->start - last_addr ) < 0)
+            goto fail;
+        last_addr = item->end;
+    }
+
+    if (range_end > last_addr &&
+        map_reserve_range( (void *)last_addr, range_end - last_addr ) < 0)
+        goto fail;
+    return 0;
+
+fail:
+    while (item != start)
+    {
+        item--;
+        last_addr = item == start ? range_start : item[-1].end;
+        if (item->start > last_addr)
+            wld_munmap( (void *)last_addr, item->start - last_addr );
+    }
+    return -1;
+}
+
+/*
+ * insert_vma_entry
+ *
+ * Insert the given VMA into the list.
+ */
+static void insert_vma_entry( struct vma_area_list *list, const struct vma_area *item )
+{
+    struct vma_area *left = list->base, *right = list->list_end, *mid;
+
+    if (left < right)
+    {
+        mid = right - 1;  /* optimisation: start search from end */
+        for (;;)
+        {
+            if (mid->start < item->start) left = mid + 1;
+            else right = mid;
+            if (left >= right) break;
+            mid = left + (right - left) / 2;
+        }
+    }
+    wld_memmove(left + 1, left, list->list_end - left);
+    wld_memmove(left, item, sizeof(*item));
+    list->list_end++;
+    return;
+}
+
+/*
+ * scan_vma
+ *
+ * Parse /proc/self/maps into the given VMA area list.
+ */
+static void scan_vma( struct vma_area_list *list, size_t *act_count )
+{
+    int fd;
+    size_t n = 0;
+    ssize_t nread;
+    struct linebuffer lbuf;
+    char buffer[80 + PATH_MAX], *line;
+    struct vma_area item;
+
+    fd = wld_open( "/proc/self/maps", O_RDONLY );
+    if (fd == -1) fatal_error( "could not open /proc/self/maps\n" );
+
+    linebuffer_init(&lbuf, buffer, sizeof(buffer));
+    for (;;)
+    {
+        nread = wld_read( fd, lbuf.head, lbuf.limit - lbuf.head );
+        if (nread < 0) fatal_error( "could not read /proc/self/maps\n" );
+        if (nread == 0) break;
+        lbuf.head += nread;
+
+        while ((line = linebuffer_getline( &lbuf )))
+        {
+            if (parse_maps_line( &item, line ) >= 0)
+            {
+                if (list->list_end < list->alloc_end) insert_vma_entry( list, &item );
+                n++;
+            }
+        }
+    }
+
+    wld_close(fd);
+    *act_count = n;
+}
+
+/*
+ * free_vma_list
+ *
+ * Free the buffer in the given VMA list.
+ */
+static void free_vma_list( struct vma_area_list *list )
+{
+    if (list->base)
+        wld_munmap( list->base,
+                    (unsigned char *)list->alloc_end - (unsigned char *)list->base );
+    list->base = NULL;
+    list->list_end = NULL;
+    list->alloc_end = NULL;
+}
+
+/*
+ * alloc_scan_vma
+ *
+ * Parse /proc/self/maps into a newly allocated VMA area list.
+ */
+static void alloc_scan_vma( struct vma_area_list *listp )
+{
+    size_t max_count = page_size / sizeof(struct vma_area);
+    struct vma_area_list vma_list;
+
+    for (;;)
+    {
+        vma_list.base = wld_mmap( NULL, sizeof(struct vma_area) * max_count,
+                                  PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS,
+                                  -1, 0 );
+        if (vma_list.base == (struct vma_area *)-1)
+            fatal_error( "could not allocate memory for VMA list\n");
+        vma_list.list_end = vma_list.base;
+        vma_list.alloc_end = vma_list.base + max_count;
+
+        scan_vma( &vma_list, &max_count );
+        if (vma_list.list_end - vma_list.base == max_count)
+        {
+            wld_memmove(listp, &vma_list, sizeof(*listp));
+            break;
+        }
+
+        free_vma_list( &vma_list );
+    }
+}
+
+/*
+ * map_reserve_preload_ranges
+ *
+ * Attempt to reserve memory ranges into preload_info.
+ * If any preload_info entry overlaps with stack, remove the entry instead of
+ * reserving.
+ */
+static void map_reserve_preload_ranges( const struct vma_area_list *vma_list,
+                                        const struct stackarg_info *stackinfo )
+{
+    size_t i;
+    unsigned long exclude_start = (unsigned long)stackinfo->stack - 1;
+    unsigned long exclude_end = (unsigned long)stackinfo->auxv + 1;
+
+    for (i = 0; preload_info[i].size; i++)
+    {
+        if (exclude_end   >  (unsigned long)preload_info[i].addr &&
+            exclude_start <= (unsigned long)preload_info[i].addr + preload_info[i].size - 1)
+        {
+            remove_preload_range( i );
+            i--;
+        }
+        else if (map_reserve_unmapped_range( vma_list, preload_info[i].addr, preload_info[i].size ) < 0)
+        {
+            /* don't warn for low 64k */
+            if (preload_info[i].addr >= (void *)0x10000
+#ifdef __aarch64__
+                && preload_info[i].addr < (void *)0x7fffffffff /* ARM64 address space might end here*/
+#endif
+            )
+                wld_printf( "preloader: Warning: failed to reserve range %p-%p\n",
+                            preload_info[i].addr, (char *)preload_info[i].addr + preload_info[i].size );
+            remove_preload_range( i );
+            i--;
+        }
+    }
+}
+
 
 /*
  *  wld_start
@@ -1578,6 +1941,7 @@ void* wld_start( void **stack )
     struct wld_link_map main_binary_map, ld_so_map;
     struct wine_preload_info **wine_main_preload_info;
     struct preloader_state state = { 0 };
+    struct vma_area_list vma_list = { NULL };
 
     parse_stackargs( &state.s, *stack );
 
@@ -1606,29 +1970,9 @@ void* wld_start( void **stack )
     /* reserve memory that Wine needs */
     reserve = stackargs_getenv( &state.s, "WINEPRELOADRESERVE" );
     if (reserve) preload_reserve( reserve );
-    for (i = 0; preload_info[i].size; i++)
-    {
-        if ((char *)state.s.auxv  >= (char *)preload_info[i].addr &&
-            (char *)state.s.stack <= (char *)preload_info[i].addr + preload_info[i].size)
-        {
-            remove_preload_range( i );
-            i--;
-        }
-        else if (wld_mmap( preload_info[i].addr, preload_info[i].size, PROT_NONE,
-                           MAP_FIXED | MAP_PRIVATE | MAP_ANON | MAP_NORESERVE, -1, 0 ) == (void *)-1)
-        {
-            /* don't warn for low 64k */
-            if (preload_info[i].addr >= (void *)0x10000
-#ifdef __aarch64__
-                && preload_info[i].addr < (void *)0x7fffffffff /* ARM64 address space might end here*/
-#endif
-            )
-                wld_printf( "preloader: Warning: failed to reserve range %p-%p\n",
-                            preload_info[i].addr, (char *)preload_info[i].addr + preload_info[i].size );
-            remove_preload_range( i );
-            i--;
-        }
-    }
+
+    alloc_scan_vma( &vma_list );
+    map_reserve_preload_ranges( &vma_list, &state.s );
 
     /* add an executable page at the top of the address space to defeat
      * broken no-exec protections that play with the code selector limit */
@@ -1689,6 +2033,8 @@ void* wld_start( void **stack )
     }
 #endif
 
+    free_vma_list( &vma_list );
+
     *stack = state.s.stack;
     return (void *)ld_so_map.l_entry;
 }
-- 
2.34.1




More information about the wine-devel mailing list