[PATCH v2 05/10] loader: Don't clobber existing memory mappings when reserving addresses.
Jinoh Kang
wine at gitlab.winehq.org
Thu Apr 28 13:02:55 CDT 2022
From: Jinoh Kang <jinoh.kang.kr at gmail.com>
The main role of the preloader is to reserve specific virtual memory
address ranges used for special purposes on Windows, before Wine could
be loaded.
It achieves this goal via the following process:
(1) It eliminates future allocations of addresses in the reserved
ranges. Specifically, it issues a series of mmap() calls with
PROT_NONE protection to reserve those ranges, so that the OS won't
allocate any of the reserved addresses for other users (i.e. Unix
system libraries).
(2) It eliminates current references of addresses in the reserved
ranges. Specifically, if the vDSO had occupied one of the reserved
ranges, the preloader removes it from the auxiliary vector
(AT_SYSINFO*).
(3) If (2) is not possible because the address is in use (e.g. current
call stack), it gives up reservation and removes the reserved range
from preload_info.
Today, each virtual memory area (VMA) is treated as follows when it
overlaps with Wine's reserved address ranges.
- Preloader code/data. Preloader should leave no trace of itself after
Wine has been loaded. Thus, no current references to preloader code
or data remain. (2) is skipped.
Meanwhile, if any part of the preloader overlaps with a reserved
range, it is unmapped. In other words, we forcibly perform (1) here.
This could lead to crash if it ever touched code or data that are
still being used.
- vDSO/vvar. This is overwritten and ignored completely when it
overlaps with any reserved range. In other words, both (1) and (2)
are performed.
- Stack. Since the stack is always in use, (2) is not possible.
Therefore, (3) is performed.
There are a few issues with this approach:
1. Existing VMAs that overlaps with any reserved ranges are forcibly
overwritten during (1). There is actually no need to overwrite them,
since existing VMAs themselves automatically act as reservations by
nature (i.e. no future allocations would overlap any existing VMAs).
Furthermore, arbitrarily overwriting any memory in use would cause
the preloader to crash. The only treatments required for existing
VMAs are either (2) or (3), not (1).
2. (1) irrevocably overwrites some useful preexisting useful VMAs such
as vDSO if they overlap with any reserved ranges. It's more useful
to relocate vDSO to a safe address if possible, which is supported by
newer versions of Linux kernel. To do so, however, we first have to
allocate a _new_ address for such VMAs before the overlapping address
range could be reserved. Notice a chicken-egg problem here:
- If we perform (1) before allocating a new address for vDSO, the
vDSO goes away even before we get a chance to relocate it.
- If we allocate a new address for vDSO before performing (1),
the new address allocated by the OS might end up overlapping with
one of the reserved ranges.
What we need here is a way to mmap()-fill all unallocated regions
inside the reserved ranges, *while* still keeping existing VMAs
intact. In this way, we can perform (1) foremost to avoid allocating
a reserved address for relocated vDSO. After the vDSO is relocated
to a safe address, we can perform (1) once more to finalise the
reservation.
3. Only the stack receives the special treatment of not being
overwritten by PROT_NONE allocation from (1). Theoretically other
VMAs that are in use such as the preloader code and data shall
receive the equal treatment anyway.
Fix this by reading /proc/self/maps for existing VMAs, and splitting
mmap() calls to avoid erasing existing memory mappings.
Note that MAP_FIXED_NOREPLACE is not suitable for this kind of job:
it fails entirely if there exist *any* overlapping memory mappings.
Signed-off-by: Jinoh Kang <jinoh.kang.kr at gmail.com>
---
loader/preloader.c | 392 ++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 369 insertions(+), 23 deletions(-)
diff --git a/loader/preloader.c b/loader/preloader.c
index b46187dbf8e..6c3c21f5978 100644
--- a/loader/preloader.c
+++ b/loader/preloader.c
@@ -184,6 +184,31 @@ struct preloader_state
struct stackarg_info s;
};
+struct linebuffer
+{
+ char *base;
+ char *limit;
+ char *head;
+ char *tail;
+ int truncated;
+};
+
+struct vma_area
+{
+ unsigned long start;
+ unsigned long end;
+};
+
+struct vma_area_list
+{
+ struct vma_area *base;
+ struct vma_area *list_end;
+ struct vma_area *alloc_end;
+};
+
+#define FOREACH_VMA(list, item) \
+ for ((item) = (list)->base; (item) != (list)->list_end; (item)++)
+
/*
* The __bb_init_func is an empty function only called when file is
* compiled with gcc flags "-fprofile-arcs -ftest-coverage". This
@@ -742,6 +767,17 @@ static inline void *wld_memmove( void *dest, const void *src, size_t len )
return dest;
}
+static inline void *wld_memchr( const void *mem, int val, size_t len )
+{
+ const unsigned char *ptr = mem, *end = (const unsigned char *)ptr + len;
+
+ for (ptr = mem; ptr != end; ptr++)
+ if (*ptr == (unsigned char)val)
+ return (void *)ptr;
+
+ return NULL;
+}
+
/*
* parse_ul - parse an unsigned long number with given radix
*
@@ -1596,6 +1632,333 @@ static void set_process_name( int argc, char *argv[] )
for (i = 1; i < argc; i++) argv[i] -= off;
}
+/*
+ * linebuffer_init
+ *
+ * Initialise a linebuffer with the given buffer.
+ */
+static void linebuffer_init( struct linebuffer *lbuf, char *base, size_t len )
+{
+ lbuf->base = base;
+ lbuf->limit = base + (len - 1); /* extra NULL byte */
+ lbuf->head = base;
+ lbuf->tail = base;
+ lbuf->truncated = 0;
+}
+
+/*
+ * linebuffer_getline
+ *
+ * Retrieve a line from the linebuffer.
+ * If a line is longer than the allocated buffer, then the line is truncated.
+ * In this case, the truncated flag is set to indicate this condition.
+ */
+static char *linebuffer_getline( struct linebuffer *lbuf )
+{
+ char *lnp, *line;
+
+ while ((lnp = wld_memchr( lbuf->tail, '\n', lbuf->head - lbuf->tail )))
+ {
+ line = lbuf->tail;
+ lbuf->tail = lnp + 1;
+ if (!lbuf->truncated)
+ {
+ *lnp = '\0';
+ return line;
+ }
+ lbuf->truncated = 0;
+ }
+
+ if (lbuf->base == lbuf->tail)
+ {
+ if (lbuf->head == lbuf->limit)
+ {
+ line = lbuf->tail;
+ lbuf->tail = lbuf->head;
+ lbuf->truncated = 1;
+ *lbuf->head = '\0';
+ return line;
+ }
+ }
+ else wld_memmove( lbuf->base, lbuf->tail, lbuf->head - lbuf->tail);
+ lbuf->head -= lbuf->tail - lbuf->base;
+ lbuf->tail = lbuf->base;
+
+ return NULL;
+}
+
+/*
+ * parse_maps_line
+ *
+ * Parse an entry from /proc/self/maps file into a vma_area structure.
+ */
+static int parse_maps_line( struct vma_area *entry, const char *line )
+{
+ struct vma_area item = { 0 };
+ char *ptr = (char *)line;
+ int overflow;
+
+ item.start = parse_ul( ptr, &ptr, 16, &overflow );
+ if (overflow) return -1;
+ if (*ptr != '-') fatal_error( "parse error in /proc/self/maps\n" );
+ ptr++;
+
+ item.end = parse_ul( ptr, &ptr, 16, &overflow );
+ if (overflow) item.end = -page_size;
+ if (*ptr != ' ') fatal_error( "parse error in /proc/self/maps\n" );
+ ptr++;
+
+ if (item.start >= item.end) return -1;
+
+ if (*ptr != 'r' && *ptr != '-') fatal_error( "parse error in /proc/self/maps\n" );
+ ptr++;
+ if (*ptr != 'w' && *ptr != '-') fatal_error( "parse error in /proc/self/maps\n" );
+ ptr++;
+ if (*ptr != 'x' && *ptr != '-') fatal_error( "parse error in /proc/self/maps\n" );
+ ptr++;
+ if (*ptr != 's' && *ptr != 'p') fatal_error( "parse error in /proc/self/maps\n" );
+ ptr++;
+ if (*ptr != ' ') fatal_error( "parse error in /proc/self/maps\n" );
+ ptr++;
+
+ parse_ul( ptr, &ptr, 16, NULL );
+ if (*ptr != ' ') fatal_error( "parse error in /proc/self/maps\n" );
+ ptr++;
+
+ parse_ul( ptr, &ptr, 16, NULL );
+ if (*ptr != ':') fatal_error( "parse error in /proc/self/maps\n" );
+ ptr++;
+
+ parse_ul( ptr, &ptr, 16, NULL );
+ if (*ptr != ' ') fatal_error( "parse error in /proc/self/maps\n" );
+ ptr++;
+
+ parse_ul( ptr, &ptr, 16, NULL );
+ if (*ptr != ' ') fatal_error( "parse error in /proc/self/maps\n" );
+ ptr++;
+
+ *entry = item;
+ return 0;
+}
+
+/*
+ * lookup_vma_entry
+ *
+ * Find the first VMA of which end address is greater than the given address.
+ */
+static struct vma_area *lookup_vma_entry( const struct vma_area_list *list, unsigned long address )
+{
+ const struct vma_area *left = list->base, *right = list->list_end, *mid;
+ while (left < right)
+ {
+ mid = left + (right - left) / 2;
+ if (mid->end <= address) left = mid + 1;
+ else right = mid;
+ }
+ return (struct vma_area *)left;
+}
+
+/*
+ * map_reserve_range
+ *
+ * Reserve the specified address range.
+ * If there are any existing VMAs in the range, they are replaced.
+ */
+static int map_reserve_range( void *addr, size_t size )
+{
+ if (addr == (void *)-1 ||
+ wld_mmap( addr, size, PROT_NONE,
+ MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0) != addr)
+ return -1;
+ return 0;
+}
+
+/*
+ * map_reserve_unmapped_range
+ *
+ * Reserve the specified address range excluding already mapped areas.
+ */
+static int map_reserve_unmapped_range( const struct vma_area_list *list, void *addr, size_t size )
+{
+ unsigned long range_start = (unsigned long)addr,
+ range_end = (unsigned long)addr + size;
+ const struct vma_area *start, *item;
+ unsigned long last_addr = range_start;
+
+ start = lookup_vma_entry( list, range_start );
+ for (item = start; item != list->list_end && item->start < range_end; item++)
+ {
+ if (item->start > last_addr &&
+ map_reserve_range( (void *)last_addr, item->start - last_addr ) < 0)
+ goto fail;
+ last_addr = item->end;
+ }
+
+ if (range_end > last_addr &&
+ map_reserve_range( (void *)last_addr, range_end - last_addr ) < 0)
+ goto fail;
+ return 0;
+
+fail:
+ while (item != start)
+ {
+ item--;
+ last_addr = item == start ? range_start : item[-1].end;
+ if (item->start > last_addr)
+ wld_munmap( (void *)last_addr, item->start - last_addr );
+ }
+ return -1;
+}
+
+/*
+ * insert_vma_entry
+ *
+ * Insert the given VMA into the list.
+ */
+static void insert_vma_entry( struct vma_area_list *list, const struct vma_area *item )
+{
+ struct vma_area *left = list->base, *right = list->list_end, *mid;
+
+ if (left < right)
+ {
+ mid = right - 1; /* optimisation: start search from end */
+ for (;;)
+ {
+ if (mid->start < item->start) left = mid + 1;
+ else right = mid;
+ if (left >= right) break;
+ mid = left + (right - left) / 2;
+ }
+ }
+ wld_memmove(left + 1, left, list->list_end - left);
+ wld_memmove(left, item, sizeof(*item));
+ list->list_end++;
+ return;
+}
+
+/*
+ * scan_vma
+ *
+ * Parse /proc/self/maps into the given VMA area list.
+ */
+static void scan_vma( struct vma_area_list *list, size_t *act_count )
+{
+ int fd;
+ size_t n = 0;
+ ssize_t nread;
+ struct linebuffer lbuf;
+ char buffer[80 + PATH_MAX], *line;
+ struct vma_area item;
+
+ fd = wld_open( "/proc/self/maps", O_RDONLY );
+ if (fd == -1) fatal_error( "could not open /proc/self/maps\n" );
+
+ linebuffer_init(&lbuf, buffer, sizeof(buffer));
+ for (;;)
+ {
+ nread = wld_read( fd, lbuf.head, lbuf.limit - lbuf.head );
+ if (nread < 0) fatal_error( "could not read /proc/self/maps\n" );
+ if (nread == 0) break;
+ lbuf.head += nread;
+
+ while ((line = linebuffer_getline( &lbuf )))
+ {
+ if (parse_maps_line( &item, line ) >= 0)
+ {
+ if (list->list_end < list->alloc_end) insert_vma_entry( list, &item );
+ n++;
+ }
+ }
+ }
+
+ wld_close(fd);
+ *act_count = n;
+}
+
+/*
+ * free_vma_list
+ *
+ * Free the buffer in the given VMA list.
+ */
+static void free_vma_list( struct vma_area_list *list )
+{
+ if (list->base)
+ wld_munmap( list->base,
+ (unsigned char *)list->alloc_end - (unsigned char *)list->base );
+ list->base = NULL;
+ list->list_end = NULL;
+ list->alloc_end = NULL;
+}
+
+/*
+ * alloc_scan_vma
+ *
+ * Parse /proc/self/maps into a newly allocated VMA area list.
+ */
+static void alloc_scan_vma( struct vma_area_list *listp )
+{
+ size_t max_count = page_size / sizeof(struct vma_area);
+ struct vma_area_list vma_list;
+
+ for (;;)
+ {
+ vma_list.base = wld_mmap( NULL, sizeof(struct vma_area) * max_count,
+ PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS,
+ -1, 0 );
+ if (vma_list.base == (struct vma_area *)-1)
+ fatal_error( "could not allocate memory for VMA list\n");
+ vma_list.list_end = vma_list.base;
+ vma_list.alloc_end = vma_list.base + max_count;
+
+ scan_vma( &vma_list, &max_count );
+ if (vma_list.list_end - vma_list.base == max_count)
+ {
+ wld_memmove(listp, &vma_list, sizeof(*listp));
+ break;
+ }
+
+ free_vma_list( &vma_list );
+ }
+}
+
+/*
+ * map_reserve_preload_ranges
+ *
+ * Attempt to reserve memory ranges into preload_info.
+ * If any preload_info entry overlaps with stack, remove the entry instead of
+ * reserving.
+ */
+static void map_reserve_preload_ranges( const struct vma_area_list *vma_list,
+ const struct stackarg_info *stackinfo )
+{
+ size_t i;
+ unsigned long exclude_start = (unsigned long)stackinfo->stack - 1;
+ unsigned long exclude_end = (unsigned long)stackinfo->auxv + 1;
+
+ for (i = 0; preload_info[i].size; i++)
+ {
+ if (exclude_end > (unsigned long)preload_info[i].addr &&
+ exclude_start <= (unsigned long)preload_info[i].addr + preload_info[i].size - 1)
+ {
+ remove_preload_range( i );
+ i--;
+ }
+ else if (map_reserve_unmapped_range( vma_list, preload_info[i].addr, preload_info[i].size ) < 0)
+ {
+ /* don't warn for low 64k */
+ if (preload_info[i].addr >= (void *)0x10000
+#ifdef __aarch64__
+ && preload_info[i].addr < (void *)0x7fffffffff /* ARM64 address space might end here*/
+#endif
+ )
+ wld_printf( "preloader: Warning: failed to reserve range %p-%p\n",
+ preload_info[i].addr, (char *)preload_info[i].addr + preload_info[i].size );
+ remove_preload_range( i );
+ i--;
+ }
+ }
+}
+
/*
* wld_start
@@ -1612,6 +1975,7 @@ void* wld_start( void **stack )
struct wld_link_map main_binary_map, ld_so_map;
struct wine_preload_info **wine_main_preload_info;
struct preloader_state state = { 0 };
+ struct vma_area_list vma_list = { NULL };
parse_stackargs( &state.s, *stack );
@@ -1640,29 +2004,9 @@ void* wld_start( void **stack )
/* reserve memory that Wine needs */
reserve = stackargs_getenv( &state.s, "WINEPRELOADRESERVE" );
if (reserve) preload_reserve( reserve );
- for (i = 0; preload_info[i].size; i++)
- {
- if ((char *)state.s.auxv >= (char *)preload_info[i].addr &&
- (char *)state.s.stack <= (char *)preload_info[i].addr + preload_info[i].size)
- {
- remove_preload_range( i );
- i--;
- }
- else if (wld_mmap( preload_info[i].addr, preload_info[i].size, PROT_NONE,
- MAP_FIXED | MAP_PRIVATE | MAP_ANON | MAP_NORESERVE, -1, 0 ) == (void *)-1)
- {
- /* don't warn for low 64k */
- if (preload_info[i].addr >= (void *)0x10000
-#ifdef __aarch64__
- && preload_info[i].addr < (void *)0x7fffffffff /* ARM64 address space might end here*/
-#endif
- )
- wld_printf( "preloader: Warning: failed to reserve range %p-%p\n",
- preload_info[i].addr, (char *)preload_info[i].addr + preload_info[i].size );
- remove_preload_range( i );
- i--;
- }
- }
+
+ alloc_scan_vma( &vma_list );
+ map_reserve_preload_ranges( &vma_list, &state.s );
/* add an executable page at the top of the address space to defeat
* broken no-exec protections that play with the code selector limit */
@@ -1723,6 +2067,8 @@ void* wld_start( void **stack )
}
#endif
+ free_vma_list( &vma_list );
+
*stack = state.s.stack;
return (void *)ld_so_map.l_entry;
}
--
GitLab
https://gitlab.winehq.org/wine/wine/-/merge_requests/6
More information about the wine-devel
mailing list