[PATCH 08/10] loader: Relocate vDSO on conflict with reserved ranges.

Jinoh Kang jinoh.kang.kr at gmail.com
Wed Dec 29 12:08:33 CST 2021


Today, the preloader removes the vDSO entries (AT_SYSINFO*) from the
auxiliary vector when it conflicts with one of the predefined reserved
ranges.

vDSO is a shared object provided by the kernel.  Among other things, it
provides a mechanism to issue certain system calls without the overhead
of switching to the kernel mode.

Without vDSO, libc still works; however, it is expected that some system
call functions (e.g.  gettimeofday, clock_gettime) will show degraded
performance.

Fix this by relocating vDSO to another address (if supported by the
kernel) instead of erasing it from auxv entirely.

This behavior is enabled only when the "WINEPRELOADREMAPVDSO"
environment variable is set to "on-conflict".  In the future, it could
become the default behaviour.

Signed-off-by: Jinoh Kang <jinoh.kang.kr at gmail.com>
---
 loader/preloader.c | 177 ++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 174 insertions(+), 3 deletions(-)

diff --git a/loader/preloader.c b/loader/preloader.c
index 8e6b5768e10..b13ba1dac57 100644
--- a/loader/preloader.c
+++ b/loader/preloader.c
@@ -102,6 +102,9 @@
 #ifndef MAP_NORESERVE
 #define MAP_NORESERVE 0
 #endif
+#ifndef MREMAP_FIXED
+#define MREMAP_FIXED 2
+#endif
 
 static struct wine_preload_info preload_info[] =
 {
@@ -189,10 +192,19 @@ struct linebuffer
     int overflow;
 };
 
+enum vma_type_flags
+{
+    VMA_NORMAL = 0x01,
+    VMA_VDSO   = 0x02,
+    VMA_VVAR   = 0x04,
+};
+
 struct vma_area
 {
     unsigned long start;
     unsigned long end;
+    unsigned char type_flags;
+    unsigned char moved;
 };
 
 struct vma_area_list
@@ -205,6 +217,16 @@ struct vma_area_list
 #define FOREACH_VMA(list, item) \
     for ((item) = (list)->base; (item) != (list)->list_end; (item)++)
 
+enum remap_policy
+{
+    REMAP_POLICY_ON_CONFLICT = 0,
+    REMAP_POLICY_FORCE = 1,
+    REMAP_POLICY_SKIP = 2,
+    LAST_REMAP_POLICY,
+
+    REMAP_POLICY_DEFAULT_VDSO = REMAP_POLICY_SKIP,
+};
+
 /*
  * The __bb_init_func is an empty function only called when file is
  * compiled with gcc flags "-fprofile-arcs -ftest-coverage".  This
@@ -376,6 +398,16 @@ static inline int wld_munmap( void *addr, size_t len )
     return SYSCALL_RET(ret);
 }
 
+static inline void *wld_mremap( void *old_addr, size_t old_len, size_t new_size, int flags, void *new_addr )
+{
+    long ret;
+    __asm__ __volatile__( "int $0x80"
+                          : "=a" (ret) : "0" (163 /* SYS_mremap */), "b" (old_addr), "c" (old_len),
+                            "d" (new_size), "S" (flags), "D" (new_addr)
+                          : "memory" );
+    return (void *)SYSCALL_RET(ret);
+}
+
 static inline int wld_prctl( int code, long arg )
 {
     long ret;
@@ -462,6 +494,9 @@ SYSCALL_FUNC( wld_mprotect, 10 /* SYS_mprotect */ );
 int wld_munmap( void *addr, size_t len );
 SYSCALL_FUNC( wld_munmap, 11 /* SYS_munmap */ );
 
+void *wld_mremap( void *old_addr, size_t old_len, size_t new_size, int flags, void *new_addr );
+SYSCALL_FUNC( wld_mremap, 25 /* SYS_mremap */ );
+
 int wld_prctl( int code, long arg );
 SYSCALL_FUNC( wld_prctl, 157 /* SYS_prctl */ );
 
@@ -572,6 +607,9 @@ SYSCALL_FUNC( wld_mprotect, 226 /* SYS_mprotect */ );
 int wld_munmap( void *addr, size_t len );
 SYSCALL_FUNC( wld_munmap, 215 /* SYS_munmap */ );
 
+void *wld_mremap( void *old_addr, size_t old_len, size_t new_size, int flags, void *new_addr );
+SYSCALL_FUNC( wld_mremap, 216 /* SYS_mremap */ );
+
 int wld_prctl( int code, long arg );
 SYSCALL_FUNC( wld_prctl, 167 /* SYS_prctl */ );
 
@@ -674,6 +712,9 @@ SYSCALL_FUNC( wld_mprotect, 125 /* SYS_mprotect */ );
 int wld_munmap( void *addr, size_t len );
 SYSCALL_FUNC( wld_munmap, 91 /* SYS_munmap */ );
 
+void *wld_mremap( void *old_addr, size_t old_len, size_t new_size, int flags, void *new_addr );
+SYSCALL_FUNC( wld_mremap, 163 /* SYS_mremap */ );
+
 int wld_prctl( int code, long arg );
 SYSCALL_FUNC( wld_prctl, 172 /* SYS_prctl */ );
 
@@ -1593,6 +1634,7 @@ static char *linebuffer_getline( struct linebuffer *lbuf, char delim )
 static int parse_maps_line( struct vma_area *entry, char *line )
 {
     struct vma_area item = { 0 };
+    unsigned long dev_maj, dev_min;
     char *ptr = line;
     int overflow;
 
@@ -1623,11 +1665,11 @@ static int parse_maps_line( struct vma_area *entry, char *line )
     if (*ptr != ' ') fatal_error( "parse error in /proc/self/maps\n" );
     ptr++;
 
-    parse_ul( ptr, &ptr, 16, NULL );
+    dev_maj = parse_ul( ptr, &ptr, 16, NULL );
     if (*ptr != ':') fatal_error( "parse error in /proc/self/maps\n" );
     ptr++;
 
-    parse_ul( ptr, &ptr, 16, NULL );
+    dev_min = parse_ul( ptr, &ptr, 16, NULL );
     if (*ptr != ' ') fatal_error( "parse error in /proc/self/maps\n" );
     ptr++;
 
@@ -1635,6 +1677,17 @@ static int parse_maps_line( struct vma_area *entry, char *line )
     if (*ptr != ' ') fatal_error( "parse error in /proc/self/maps\n" );
     ptr++;
 
+    while (*ptr == ' ')
+        ptr++;
+
+    if (dev_maj == 0 && dev_min == 0)
+    {
+        if (wld_strcmp(ptr, "[vdso]") == 0)
+            item.type_flags |= VMA_VDSO;
+        else if (wld_strcmp(ptr, "[vvar]") == 0)
+            item.type_flags |= VMA_VVAR;
+    }
+
     *entry = item;
     return 0;
 }
@@ -1713,6 +1766,51 @@ static void insert_vma_entry( struct vma_area_list *list, const struct vma_area
     return;
 }
 
+static int find_vma_envelope_range( const struct vma_area_list *list, int type_mask, unsigned long *startp, unsigned long *sizep )
+{
+    const struct vma_area *item;
+    unsigned long start = ULONG_MAX;
+    unsigned long end = 0;
+
+    FOREACH_VMA(list, item)
+    {
+        if (item->type_flags & type_mask)
+        {
+            if (start > item->start) start = item->start;
+            if (end < item->end) end = item->end;
+        }
+    }
+
+    if (start >= end) return -1;
+
+    *startp = start;
+    *sizep = end - start;
+    return 0;
+}
+
+static int remap_multiple_vmas( struct vma_area_list *list, unsigned long offset, int type_mask, unsigned char revert )
+{
+    struct vma_area *item;
+    void *expect_addr, *mapped_addr;
+    size_t size;
+
+    FOREACH_VMA(list, item)
+    {
+        if ((item->type_flags & type_mask) && item->moved == revert)
+        {
+            expect_addr = (void *)(item->start + offset);
+            size = item->end - item->start;
+            mapped_addr = wld_mremap( (void *)item->start, size, size,
+                                      MREMAP_FIXED | MREMAP_MAYMOVE, expect_addr );
+            if (mapped_addr == (void *)-1) return -1;
+            if (mapped_addr != (void *)item->start) item->moved = !revert;
+            if (mapped_addr != expect_addr) return -1;
+        }
+    }
+
+    return 0;
+}
+
 static void scan_vma( struct vma_area_list *list, size_t *act_count )
 {
     int fd;
@@ -1783,6 +1881,77 @@ static void alloc_scan_vma( struct vma_area_list *listp )
     }
 }
 
+static enum remap_policy stackargs_get_remap_policy( const struct stackarg_info *info, const char *name,
+                                                     enum remap_policy default_policy )
+{
+    char *valstr = stackargs_getenv( info, name ), *endptr;
+    unsigned long valnum;
+
+    if (valstr) {
+        if (wld_strcmp(valstr, "auto") == 0 || wld_strcmp(valstr, "on-conflict") == 0)
+            return REMAP_POLICY_ON_CONFLICT;
+        if (wld_strcmp(valstr, "always") == 0 || wld_strcmp(valstr, "force") == 0)
+            return REMAP_POLICY_FORCE;
+        if (wld_strcmp(valstr, "never") == 0 || wld_strcmp(valstr, "skip") == 0)
+            return REMAP_POLICY_SKIP;
+        valnum = parse_ul( valstr, &endptr, 10, NULL );
+        if (!*endptr && valnum < LAST_REMAP_POLICY) return valnum;
+    }
+
+    return default_policy;
+}
+
+static int remap_vdso( struct vma_area_list *vma_list, struct preloader_state *state )
+{
+    enum remap_policy policy;
+    unsigned long vdso_start, vdso_size, offset;
+    unsigned long new_vdso_start, new_vdso_size;
+    void *new_vdso;
+    struct wld_auxv *auxv;
+
+    if (find_vma_envelope_range( vma_list, VMA_VDSO | VMA_VVAR,
+                                 &vdso_start, &vdso_size ) < 0) return 0;
+
+    policy = stackargs_get_remap_policy( &state->s, "WINEPRELOADREMAPVDSO", REMAP_POLICY_DEFAULT_VDSO );
+    if (policy == REMAP_POLICY_SKIP) return -1;
+    if (policy != REMAP_POLICY_FORCE &&
+        find_preload_reserved_area( (void *)vdso_start, vdso_size ) < 0) return 0;
+
+    new_vdso = wld_mmap( NULL, vdso_size, PROT_NONE,
+                         MAP_GROWSDOWN | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0 );
+    if (new_vdso == (void *)-1) return -1;
+
+    offset = (unsigned long)new_vdso - vdso_start;
+    if (remap_multiple_vmas( vma_list, offset, VMA_VVAR, 0 ) < 0 ||
+        remap_multiple_vmas( vma_list, offset, VMA_VDSO, 0 ) < 0) goto remap_restore;
+
+    free_vma_list( vma_list );
+    alloc_scan_vma( vma_list );
+
+    if (find_vma_envelope_range( vma_list, VMA_VDSO | VMA_VVAR, &new_vdso_start, &new_vdso_size ) < 0 ||
+        vdso_start + offset != new_vdso_start ||
+        vdso_size != new_vdso_size) goto remap_restore;
+
+    for (auxv = state->s.auxv; auxv->a_type != AT_NULL; auxv++)
+    {
+        switch (auxv->a_type)
+        {
+        case AT_SYSINFO:
+        case AT_SYSINFO_EHDR:
+            auxv->a_un.a_val += offset;
+            break;
+        }
+    }
+
+    return 1;
+
+remap_restore:
+    if (remap_multiple_vmas( vma_list, -offset, -1, 1 ) < 0)
+        fatal_error( "Cannot restore VDSO VMAs\n" );
+
+    return -1;
+}
+
 static void map_reserve_preload_ranges( struct vma_area_list *vma_list, void *exclude_start, void *exclude_end )
 {
     size_t i;
@@ -1858,6 +2027,8 @@ void* wld_start( void **stack )
     alloc_scan_vma( &vma_list );
     map_reserve_preload_ranges( &vma_list, state.s.stack, state.s.auxv );
 
+    if (remap_vdso( &vma_list, &state ) > 0) map_reserve_preload_ranges( &vma_list, state.s.stack, state.s.auxv );
+
     /* add an executable page at the top of the address space to defeat
      * broken no-exec protections that play with the code selector limit */
     if (find_preload_reserved_area( (char *)0x80000000 - page_size, page_size ) >= 0)
@@ -1887,7 +2058,7 @@ void* wld_start( void **stack )
 #undef SET_NEW_AV
 
     i = 0;
-    /* delete sysinfo values if addresses conflict */
+    /* delete sysinfo values if addresses conflict and remap failed */
     if (is_in_preload_range( state.s.auxv, AT_SYSINFO ) || is_in_preload_range( state.s.auxv, AT_SYSINFO_EHDR ))
     {
         delete_av[i++].a_type = AT_SYSINFO;
-- 
2.31.1




More information about the wine-devel mailing list