[PATCH v4 07/10] loader: Relocate vDSO on conflict with reserved ranges.

Jinoh Kang jinoh.kang.kr at gmail.com
Fri Jan 28 12:40:47 CST 2022


Today, the preloader removes the vDSO entries (AT_SYSINFO*) from the
auxiliary vector when it conflicts with one of the predefined reserved
ranges.

vDSO is a shared object provided by the kernel.  Among other things, it
provides a mechanism to issue certain system calls without the overhead
of switching to the kernel mode.

Without vDSO, libc still works; however, it is expected that some system
call functions (e.g.  gettimeofday, clock_gettime) will show degraded
performance.

Fix this by relocating vDSO to another address (if supported by the
kernel) instead of erasing it from auxv entirely.

Since this is a potentially risky change, this behaviour is hidden
behind the "WINEPRELOADREMAPVDSO" environment variable.  To activate the
behaviour, the user needs to set "WINEPRELOADREMAPVDSO=on-conflict".
After sufficient testing has been done via staging process, the new
behaviour could be the default and the environment variables removed.

Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=52313
Signed-off-by: Jinoh Kang <jinoh.kang.kr at gmail.com>
---

Notes:
    v1 -> v2:
    - s/offset/delta/g
    - remap_vdso()
      - significantly improve kernel vdso_mremap() support detection logic
      - add comments
      - only modify AT_SYSINFO* if it's in vDSO range
      - guard against vdso_start + vdso_size overflow
      - remove erroneous MAP_GROWSDOWN
    - fix remap_multiple_vmas() when revert = 1
    - some refactoring
    
    v3 -> v4:
    - add/edit some comments and documentation
    - explain why reading /proc/self/maps is necessary for identifying vDSO
      in comments
    - change code style to match the rest
    - decompose find_remap_area into find_vma_envelope_range and
      check_remap_policy

 loader/preloader.c | 580 ++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 577 insertions(+), 3 deletions(-)

diff --git a/loader/preloader.c b/loader/preloader.c
index 7d17136d3bc..52036dee554 100644
--- a/loader/preloader.c
+++ b/loader/preloader.c
@@ -72,6 +72,7 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
+#include <signal.h>
 #include <sys/mman.h>
 #ifdef HAVE_SYS_SYSCALL_H
 # include <sys/syscall.h>
@@ -86,6 +87,9 @@
 #ifdef HAVE_SYS_LINK_H
 # include <sys/link.h>
 #endif
+#ifdef HAVE_SYS_UCONTEXT_H
+# include <sys/ucontext.h>
+#endif
 
 #include "wine/asm.h"
 #include "main.h"
@@ -102,6 +106,11 @@
 #ifndef MAP_NORESERVE
 #define MAP_NORESERVE 0
 #endif
+#ifndef MREMAP_FIXED
+#define MREMAP_FIXED 2
+#endif
+
+#define REMAP_TEST_SIG SIGIO  /* Any signal GDB doesn't stop on */
 
 static struct wine_preload_info preload_info[] =
 {
@@ -165,6 +174,18 @@ struct wld_auxv
     } a_un;
 };
 
+typedef unsigned long wld_sigset_t[8 / sizeof(unsigned long)];
+
+struct wld_sigaction {
+    /* Prefix all fields since they may collide with macros from libc headers */
+    void (*wld_sa_sigaction)(int, siginfo_t *, void *);
+    unsigned long wld_sa_flags;
+    void (*wld_sa_restorer)(void);
+    wld_sigset_t wld_sa_mask;
+};
+
+#define WLD_SA_SIGINFO 4
+
 /* Aggregates information about initial program stack and variables
  * (e.g. argv and envp) that reside in it.
  */
@@ -193,10 +214,55 @@ struct linebuffer
     int truncated;
 };
 
+/*
+ * Flags that specify the kind of each VMA entry read from /proc/self/maps.
+ *
+ * On Linux, Reading /proc/self/maps is the only reliable way to identify the
+ * exact range of vDSO/vvar mapping.  The reason is twofold:
+ *
+ * 1. vDSO usually hard-codes vvar's offset relative to vDSO. Therefore,
+ *    remapping vDSO requires vvar to be also remapped as well.  However, vvar's
+ *    size and its location relative to vDSO is *not* guaranteed by ABI, and has
+ *    changed all the time.
+ *
+ *    - x86: [vvar] orginally resided at a fixed address 0xffffffffff5ff000
+ *      (64-bit) [1], but was later changed so that it precedes [vdso] [2].
+ *      There, sym_vvar_start is a negative value [3]. text_start is the base
+ *      address of vDSO, and addr becomes the address of vvar.
+ *
+ *    - AArch32: [vvar] is a single page and precedes [vdso] [4].
+ *
+ *    - AArch64: [vvar] is two pages long and precedes [vdso] [5].
+ *      Before v5.9, [vvar] was a single page [6].
+ *
+ * 2. It's very difficult to deduce vDSO and vvar's size and offset relative to
+ *    each other.  Since vvar's symbol does not exist in vDSO's symtab,
+ *    determining the layout would require parsing vDSO's code.
+ *
+ * Also note that CRIU (Checkpoint Restore In Userspace) has maps parsing code
+ * just for relocating vDSO [7].
+ *
+ * [1] https://lwn.net/Articles/615809/
+ * [2] https://elixir.bootlin.com/linux/v5.16.3/source/arch/x86/entry/vdso/vma.c#L246
+ * [3] https://elixir.bootlin.com/linux/v5.16.3/source/arch/x86/include/asm/vdso.h#L21
+ * [4] https://elixir.bootlin.com/linux/v5.16.3/source/arch/arm/kernel/vdso.c#L236
+ * [5] https://elixir.bootlin.com/linux/v5.16.3/source/arch/arm64/kernel/vdso.c#L214
+ * [6] https://elixir.bootlin.com/linux/v5.8/source/arch/arm64/kernel/vdso.c#L161
+ * [7] https://github.com/checkpoint-restore/criu/blob/a315774e11b4da1eb36446ae996eac1695a129a6/criu/vdso.c
+ */
+enum vma_type_flags
+{
+    VMA_NORMAL = 0x01,
+    VMA_VDSO   = 0x02,
+    VMA_VVAR   = 0x04,
+};
+
 struct vma_area
 {
     unsigned long start;
     unsigned long end;
+    unsigned char type_flags;  /* enum vma_type_flags */
+    unsigned char moved;       /* has been mremap()'d? */
 };
 
 struct vma_area_list
@@ -209,6 +275,57 @@ struct vma_area_list
 #define FOREACH_VMA(list, item) \
     for ((item) = (list)->base; (item) != (list)->list_end; (item)++)
 
+/*
+ * Allow the user to configure the remapping behaviour if it causes trouble.
+ * The "force" (REMAP_POLICY_FORCE) value can be used to test the remapping
+ * code path unconditionally.
+ */
+enum remap_policy
+{
+    REMAP_POLICY_ON_CONFLICT = 0,
+    REMAP_POLICY_FORCE = 1,
+    REMAP_POLICY_SKIP = 2,
+    LAST_REMAP_POLICY,
+
+    REMAP_POLICY_DEFAULT_VDSO = REMAP_POLICY_SKIP,
+};
+
+/*
+ * Used in a signal handler that tests if mremap() on vDSO works on the current
+ * kernel.
+ */
+struct remap_test_block {
+    /* The old address range of vDSO or sigpage. Used to test if pages are remapped properly. */
+    unsigned long old_mapping_start;
+    unsigned long old_mapping_size;
+
+    struct vma_area_list *vma_list;
+
+    /*
+     * Difference between the base address of the new mapping and the old mapping.
+     *
+     * Set to zero if the handler reverted mappings to old state before returning
+     * in order to safely return when it detects failed remapping.
+     */
+    unsigned long delta;
+
+    /*
+     * Whether remapping was successfully recognised by the kernel.
+     *
+     * If the signal handler is never called (due to e.g. being blocked), it is counted
+     * as being unsuccessful.
+     */
+    unsigned char is_successful;
+
+    /*
+     * Whether remapping could not be recognised by the kernel.
+     *
+     * If both is_successful and is_failed are set, is_failed takes precedence.
+     * The flags are intentionally made redundant to detect multiple successive
+     * invocation of the signal handler due to external signal delivery. */
+    unsigned char is_failed;
+} remap_test;
+
 /*
  * The __bb_init_func is an empty function only called when file is
  * compiled with gcc flags "-fprofile-arcs -ftest-coverage".  This
@@ -244,6 +361,15 @@ struct
     unsigned int  garbage : 25;
 } thread_ldt = { -1, (unsigned long)thread_data, 0xfffff, 1, 0, 0, 1, 0, 1, 0 };
 
+typedef unsigned long wld_old_sigset_t;
+
+struct wld_old_sigaction {
+    /* Prefix all fields since they may collide with macros from libc headers */
+    void (*wld_sa_sigaction)(int, siginfo_t *, void *);
+    wld_old_sigset_t wld_sa_mask;
+    unsigned long wld_sa_flags;
+    void (*wld_sa_restorer)(void);
+};
 
 /*
  * The _start function is the entry and exit point of this program
@@ -381,6 +507,16 @@ static inline int wld_munmap( void *addr, size_t len )
     return SYSCALL_RET(ret);
 }
 
+static inline void *wld_mremap( void *old_addr, size_t old_len, size_t new_size, int flags, void *new_addr )
+{
+    int ret;
+    __asm__ __volatile__( "pushl %%ebx; movl %2,%%ebx; int $0x80; popl %%ebx"
+                          : "=a" (ret) : "0" (163 /* SYS_mremap */), "r" (old_addr), "c" (old_len),
+                            "d" (new_size), "S" (flags), "D" (new_addr)
+                          : "memory" );
+    return (void *)SYSCALL_RET(ret);
+}
+
 static inline int wld_prctl( int code, long arg )
 {
     int ret;
@@ -389,6 +525,64 @@ static inline int wld_prctl( int code, long arg )
     return SYSCALL_RET(ret);
 }
 
+static void copy_old_sigset( void *dest, const void *src )
+{
+    /* Avoid aliasing */
+    size_t i;
+    for (i = 0; i < sizeof(wld_old_sigset_t); i++)
+        *((unsigned char *)dest + i) = *((const unsigned char *)src + i);
+}
+
+static inline int wld_sigaction( int signum, const struct wld_sigaction *act, struct wld_sigaction *old_act )
+{
+    int ret;
+    __asm__ __volatile__( "pushl %%ebx; movl %2,%%ebx; int $0x80; popl %%ebx"
+                          : "=a" (ret) : "0" (174 /* SYS_rt_sigaction */), "r" (signum), "c" (act), "d" (old_act), "S" (sizeof(act->wld_sa_mask))
+                          : "memory" );
+    if (ret == -38 /* ENOSYS */) {
+        struct wld_old_sigaction act_buf, old_act_buf, *act_real, *old_act_real;
+
+        if (act) {
+            act_real = &act_buf;
+            act_buf.wld_sa_sigaction = act->wld_sa_sigaction;
+            copy_old_sigset(&act_buf.wld_sa_mask, &act->wld_sa_mask);
+            act_buf.wld_sa_flags = act->wld_sa_flags;
+            act_buf.wld_sa_restorer = act->wld_sa_restorer;
+        }
+
+        if (old_act) old_act_real = &old_act_buf;
+
+        __asm__ __volatile__( "pushl %%ebx; movl %2,%%ebx; int $0x80; popl %%ebx"
+                              : "=a" (ret) : "0" (67 /* SYS_sigaction */), "r" (signum), "c" (act_real), "d" (old_act_real)
+                              : "memory" );
+
+        if (old_act && ret >= 0) {
+            old_act->wld_sa_sigaction = old_act_buf.wld_sa_sigaction;
+            old_act->wld_sa_flags = old_act_buf.wld_sa_flags;
+            old_act->wld_sa_restorer = old_act_buf.wld_sa_restorer;
+            copy_old_sigset(&old_act->wld_sa_mask, &old_act_buf.wld_sa_mask);
+        }
+    }
+    return SYSCALL_RET(ret);
+}
+
+static inline int wld_kill( pid_t pid, int sig )
+{
+    int ret;
+    __asm__ __volatile__( "pushl %%ebx; movl %2,%%ebx; int $0x80; popl %%ebx"
+                          : "=a" (ret) : "0" (37 /* SYS_kill */), "r" (pid), "c" (sig)
+                          : "memory" /* clobber: signal handler side effects on raise() */ );
+    return SYSCALL_RET(ret);
+}
+
+static inline pid_t wld_getpid( void )
+{
+    int ret;
+    __asm__ __volatile__( "int $0x80"
+                          : "=a" (ret) : "0" (20 /* SYS_getpid */) );
+    return ret;
+}
+
 #elif defined(__x86_64__)
 
 void *thread_data[256];
@@ -467,9 +661,15 @@ SYSCALL_FUNC( wld_mprotect, 10 /* SYS_mprotect */ );
 int wld_munmap( void *addr, size_t len );
 SYSCALL_FUNC( wld_munmap, 11 /* SYS_munmap */ );
 
+void *wld_mremap( void *old_addr, size_t old_len, size_t new_size, int flags, void *new_addr );
+SYSCALL_FUNC( wld_mremap, 25 /* SYS_mremap */ );
+
 int wld_prctl( int code, long arg );
 SYSCALL_FUNC( wld_prctl, 157 /* SYS_prctl */ );
 
+pid_t wld_getpid(void);
+SYSCALL_NOERR( wld_getpid, 39 /* SYS_getpid */ );
+
 uid_t wld_getuid(void);
 SYSCALL_NOERR( wld_getuid, 102 /* SYS_getuid */ );
 
@@ -577,9 +777,26 @@ SYSCALL_FUNC( wld_mprotect, 226 /* SYS_mprotect */ );
 int wld_munmap( void *addr, size_t len );
 SYSCALL_FUNC( wld_munmap, 215 /* SYS_munmap */ );
 
+void *wld_mremap( void *old_addr, size_t old_len, size_t new_size, int flags, void *new_addr );
+SYSCALL_FUNC( wld_mremap, 216 /* SYS_mremap */ );
+
 int wld_prctl( int code, long arg );
 SYSCALL_FUNC( wld_prctl, 167 /* SYS_prctl */ );
 
+int wld_rt_sigaction( int signum, const struct wld_sigaction *act, struct wld_sigaction *old_act, size_t sigsetsize );
+SYSCALL_FUNC( wld_rt_sigaction, 134 /* SYS_rt_sigaction */ );
+
+static inline int wld_sigaction( int signum, const struct wld_sigaction *act, struct wld_sigaction *old_act )
+{
+    return wld_rt_sigaction( signum, act, old_act, sizeof(act->wld_sa_mask) );
+}
+
+int wld_kill( pid_t pid, int sig );
+SYSCALL_FUNC( wld_kill, 129 /* SYS_kill */ );
+
+pid_t wld_getpid(void);
+SYSCALL_NOERR( wld_getpid, 172 /* SYS_getpid */ );
+
 uid_t wld_getuid(void);
 SYSCALL_NOERR( wld_getuid, 174 /* SYS_getuid */ );
 
@@ -679,9 +896,26 @@ SYSCALL_FUNC( wld_mprotect, 125 /* SYS_mprotect */ );
 int wld_munmap( void *addr, size_t len );
 SYSCALL_FUNC( wld_munmap, 91 /* SYS_munmap */ );
 
+void *wld_mremap( void *old_addr, size_t old_len, size_t new_size, int flags, void *new_addr );
+SYSCALL_FUNC( wld_mremap, 163 /* SYS_mremap */ );
+
 int wld_prctl( int code, long arg );
 SYSCALL_FUNC( wld_prctl, 172 /* SYS_prctl */ );
 
+int wld_rt_sigaction( int signum, const struct wld_sigaction *act, struct wld_sigaction *old_act, size_t sigsetsize );
+SYSCALL_FUNC( wld_rt_sigaction, 174 /* SYS_rt_sigaction */ );
+
+static inline int wld_sigaction( int signum, const struct wld_sigaction *act, struct wld_sigaction *old_act )
+{
+    return wld_rt_sigaction( signum, act, old_act, sizeof(act->wld_sa_mask) );
+}
+
+int wld_kill( pid_t pid, int sig );
+SYSCALL_FUNC( wld_kill, 37 /* SYS_kill */ );
+
+pid_t wld_getpid(void);
+SYSCALL_NOERR( wld_getpid, 20 /* SYS_getpid */ );
+
 uid_t wld_getuid(void);
 SYSCALL_NOERR( wld_getuid, 24 /* SYS_getuid */ );
 
@@ -1661,6 +1895,7 @@ static char *linebuffer_getline( struct linebuffer *lbuf )
 static int parse_maps_line( struct vma_area *entry, const char *line )
 {
     struct vma_area item = { 0 };
+    unsigned long dev_maj, dev_min;
     char *ptr = (char *)line;
     int overflow;
 
@@ -1691,11 +1926,11 @@ static int parse_maps_line( struct vma_area *entry, const char *line )
     if (*ptr != ' ') fatal_error( "parse error in /proc/self/maps\n" );
     ptr++;
 
-    parse_ul( ptr, &ptr, 16, NULL );
+    dev_maj = parse_ul( ptr, &ptr, 16, NULL );
     if (*ptr != ':') fatal_error( "parse error in /proc/self/maps\n" );
     ptr++;
 
-    parse_ul( ptr, &ptr, 16, NULL );
+    dev_min = parse_ul( ptr, &ptr, 16, NULL );
     if (*ptr != ' ') fatal_error( "parse error in /proc/self/maps\n" );
     ptr++;
 
@@ -1703,6 +1938,17 @@ static int parse_maps_line( struct vma_area *entry, const char *line )
     if (*ptr != ' ') fatal_error( "parse error in /proc/self/maps\n" );
     ptr++;
 
+    while (*ptr == ' ')
+        ptr++;
+
+    if (dev_maj == 0 && dev_min == 0)
+    {
+        if (wld_strcmp(ptr, "[vdso]") == 0)
+            item.type_flags |= VMA_VDSO;
+        else if (wld_strcmp(ptr, "[vvar]") == 0)
+            item.type_flags |= VMA_VVAR;
+    }
+
     *entry = item;
     return 0;
 }
@@ -1802,6 +2048,76 @@ static void insert_vma_entry( struct vma_area_list *list, const struct vma_area
     return;
 }
 
+/*
+ * find_vma_envelope_range
+ *
+ * Compute the smallest range that contains all VMAs with any of the given
+ * type flags.
+ */
+static int find_vma_envelope_range( const struct vma_area_list *list, int type_mask, unsigned long *startp, unsigned long *sizep )
+{
+    const struct vma_area *item;
+    unsigned long start = ULONG_MAX;
+    unsigned long end = 0;
+
+    FOREACH_VMA(list, item)
+    {
+        if (item->type_flags & type_mask)
+        {
+            if (start > item->start) start = item->start;
+            if (end < item->end) end = item->end;
+        }
+    }
+
+    if (start >= end) return -1;
+
+    *startp = start;
+    *sizep = end - start;
+    return 0;
+}
+
+/*
+ * remap_multiple_vmas
+ *
+ * Relocate all VMAs with the given type flags.
+ * This function can also be used to reverse the effects of previous
+ * remap_multiple_vmas().
+ */
+static int remap_multiple_vmas( struct vma_area_list *list, unsigned long delta, int type_mask, unsigned char revert )
+{
+    struct vma_area *item;
+    void *old_addr, *desired_addr, *mapped_addr;
+    size_t size;
+
+    FOREACH_VMA(list, item)
+    {
+        if ((item->type_flags & type_mask) && item->moved == revert)
+        {
+            if (revert)
+            {
+                old_addr = (void *)(item->start + delta);
+                desired_addr = (void *)item->start;
+            }
+            else
+            {
+                old_addr = (void *)item->start;
+                desired_addr = (void *)(item->start + delta);
+            }
+            size = item->end - item->start;
+            mapped_addr = wld_mremap( old_addr, size, size, MREMAP_FIXED | MREMAP_MAYMOVE, desired_addr );
+            if (mapped_addr == (void *)-1) return -1;
+            if (mapped_addr != desired_addr)
+            {
+                if (mapped_addr == old_addr) return -1;  /* kernel deoesn't support MREMAP_FIXED */
+                fatal_error( "mremap() returned different address\n" );
+            }
+            item->moved = !revert;
+        }
+    }
+
+    return 0;
+}
+
 /*
  * scan_vma
  *
@@ -1887,6 +2203,262 @@ static void alloc_scan_vma( struct vma_area_list *listp )
     }
 }
 
+/*
+ * stackargs_get_remap_policy
+ *
+ * Parse the remap policy value from the given environment variable.
+ */
+static enum remap_policy stackargs_get_remap_policy( const struct stackarg_info *info, const char *name,
+                                                     enum remap_policy default_policy )
+{
+    char *valstr = stackargs_getenv( info, name ), *endptr;
+    unsigned long valnum;
+
+    if (valstr)
+    {
+        if (wld_strcmp(valstr, "auto") == 0 || wld_strcmp(valstr, "on-conflict") == 0)
+            return REMAP_POLICY_ON_CONFLICT;
+        if (wld_strcmp(valstr, "always") == 0 || wld_strcmp(valstr, "force") == 0)
+            return REMAP_POLICY_FORCE;
+        if (wld_strcmp(valstr, "never") == 0 || wld_strcmp(valstr, "skip") == 0)
+            return REMAP_POLICY_SKIP;
+        valnum = parse_ul( valstr, &endptr, 10, NULL );
+        if (!*endptr && valnum < LAST_REMAP_POLICY) return valnum;
+    }
+
+    return default_policy;
+}
+
+/*
+ * check_remap_policy
+ *
+ * Check remap policy against the given range and determine the action to take.
+ *
+ *  -1: fail
+ *   0: do nothing
+ *   1: proceed with remapping
+ */
+static int check_remap_policy( struct preloader_state *state,
+                               const char *policy_envname, enum remap_policy default_policy,
+                               unsigned long start, unsigned long size )
+{
+    switch (stackargs_get_remap_policy( &state->s, policy_envname, default_policy ))
+    {
+    case REMAP_POLICY_SKIP:
+        return -1;
+    case REMAP_POLICY_ON_CONFLICT:
+        if (find_preload_reserved_area( (void *)start, size ) < 0)
+            return 0;
+        /* fallthrough */
+    case REMAP_POLICY_FORCE:
+    default:
+        return 1;
+    }
+}
+
+#ifndef __x86_64__
+/*
+ * remap_test_in_old_address_range
+ *
+ * Determine whether the address falls in the old mapping address range
+ * (i.e. before mremap).
+ */
+static int remap_test_in_old_address_range( unsigned long address )
+{
+    return address - remap_test.old_mapping_start < remap_test.old_mapping_size;
+}
+
+/*
+ * remap_test_signal_handler
+ *
+ * A signal handler that detects whether the kernel has acknowledged the new
+ * addresss for the remapped vDSO.
+ */
+static void remap_test_signal_handler( int signum, siginfo_t *sinfo, void *context )
+{
+    (void)signum;
+    (void)sinfo;
+    (void)context;
+
+    if (remap_test_in_old_address_range((unsigned long)__builtin_return_address(0))) goto fail;
+
+#ifdef __i386__
+    /* test for SYSENTER/SYSEXIT return address (int80_landing_pad) */
+    if (remap_test_in_old_address_range(((ucontext_t *)context)->uc_mcontext.gregs[REG_EIP])) goto fail;
+#endif
+
+    remap_test.is_successful = 1;
+    return;
+
+fail:
+    /* Kernel too old to support remapping. Restore vDSO/sigpage to return safely. */
+    if (remap_test.delta) {
+        if (remap_multiple_vmas( remap_test.vma_list, remap_test.delta, -1, 1 ) < 0)
+            fatal_error( "Cannot restore remapped VMAs\n" );
+        remap_test.delta = 0;
+    }
+
+    /* Signal handler might be called several times externally,
+     * so overwrite with the latest status just to be safe. */
+    remap_test.is_failed = 1;
+}
+#endif
+
+/*
+ * test_remap_successful
+ *
+ * Test if the kernel has acknowledged the remapped vDSO.
+ *
+ * Remapping vDSO requires explicit kernel support for most architectures, but
+ * the support is missing in old Linux kernels (pre-4.8).  Among other things,
+ * vDSO contains the default signal restorer (sigreturn trampoline) and on i386
+ * the fast syscall gate (which uses SYSENTER on Intel CPUs).  The kernel keeps
+ * track of the addresses of both of these things per process, and those
+ * addresses need to be updated accordingly if the vDSO address changes.
+ * Without proper support, mremap() on vDSO still succeeds, but the kernel still
+ * uses old addresses for the vDSO components, resulting in crashes or other
+ * unpredictable behaviour if any of those addresses are used.
+ *
+ * We attempt to detect this condition by installing a signal handler and
+ * sending a signal to ourselves.  The signal handler will test if the restorer
+ * address falls in the old address range; if this is the case, we remap the
+ * vDSO to its old address and report failure (i.e. no support from kernel).  On
+ * i386, we additionally check for the syscall gate.  If the addresses do not
+ * overlap with the old address range, the kernel is new enough to support vDSO
+ * remapping and we can proceed as normal.
+ */
+static int test_remap_successful( struct vma_area_list *vma_list, struct preloader_state *state,
+                                  unsigned long old_mapping_start, unsigned long old_mapping_size,
+                                  unsigned long delta )
+{
+#ifdef __x86_64__
+    (void)vma_list;
+    (void)state;
+    (void)old_mapping_start;
+    (void)old_mapping_size;
+    (void)delta;
+
+    /* x86-64 doesn't use SYSENTER for syscalls, and requires sa_restorer for
+     * signal handlers.  We can safely relocate vDSO without kernel support
+     * (vdso_mremap). */
+    return 0;
+#else
+    struct wld_sigaction sigact;
+    pid_t pid;
+    int result = -1;
+    unsigned long syscall_addr = 0;
+
+    pid = wld_getpid();
+    if (pid < 0) fatal_error( "failed to get PID\n" );
+
+#ifdef __i386__
+    syscall_addr = get_auxiliary( state->s.auxv, AT_SYSINFO, 0 );
+    if (syscall_addr - old_mapping_start < old_mapping_size) syscall_addr += delta;
+#endif
+
+    remap_test.old_mapping_start = old_mapping_start;
+    remap_test.old_mapping_size = old_mapping_size;
+    remap_test.vma_list = vma_list;
+    remap_test.delta = delta;
+    remap_test.is_successful = 0;
+    remap_test.is_failed = 0;
+
+    wld_memset( &sigact, 0, sizeof(sigact) );
+    sigact.wld_sa_sigaction = remap_test_signal_handler;
+    sigact.wld_sa_flags = WLD_SA_SIGINFO;
+    /* We deliberately skip sa_restorer, since we're trying to get the address
+     * of the kernel's built-in restorer function. */
+
+    if (wld_sigaction( REMAP_TEST_SIG, &sigact, &sigact ) < 0) fatal_error( "cannot register test signal handler\n" );
+
+    /* Unsafe region below - may race with signal handler */
+#ifdef __i386__
+    if (syscall_addr) {
+        /* Also test __kernel_vsyscall return as well */
+        __asm__ __volatile__( "call *%1"
+                              : "=a" (result) : "r" (syscall_addr), "0" (37 /* SYS_kill */), "b" (pid), "c" (REMAP_TEST_SIG) );
+        result = SYSCALL_RET(result);
+    }
+#else
+    syscall_addr = 0;
+#endif
+    if (!syscall_addr) result = wld_kill( pid, REMAP_TEST_SIG );
+    /* Unsafe region above - may race with signal handler */
+
+    if (wld_sigaction( REMAP_TEST_SIG, &sigact, &sigact ) < 0) fatal_error( "cannot unregister test signal handler\n" );
+    if (result == -1) fatal_error( "cannot raise test signal\n" );
+
+    /* Now that the signal handler can no longer be called,
+     * we can safely access the result data. */
+    if (remap_test.is_failed || !remap_test.is_successful) {
+        if (remap_test.delta && remap_multiple_vmas( remap_test.vma_list, remap_test.delta, -1, 1 ) < 0)
+            fatal_error( "Cannot restore remapped VMAs\n" );
+        return -1;
+    }
+
+    return 0;
+#endif
+}
+
+/*
+ * remap_vdso
+ *
+ * Perform vDSO remapping if it conflicts with one of the reserved address ranges.
+ */
+static int remap_vdso( struct vma_area_list *vma_list, struct preloader_state *state )
+{
+    int result;
+    unsigned long vdso_start, vdso_size, delta;
+    void *new_vdso;
+    struct wld_auxv *auxv;
+
+    if (find_vma_envelope_range( vma_list, VMA_VDSO | VMA_VVAR, &vdso_start, &vdso_size ) < 0) return 0;
+
+    result = check_remap_policy( state, "WINEPRELOADREMAPVDSO",
+                                 REMAP_POLICY_DEFAULT_VDSO,
+                                 vdso_start, vdso_size );
+    if (result <= 0) return result;
+
+    new_vdso = wld_mmap( NULL, vdso_size, PROT_NONE,
+                         MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0 );
+    if (new_vdso == (void *)-1) return -1;
+
+    delta = (unsigned long)new_vdso - vdso_start;
+    /* It's easier to undo vvar remapping, so we remap it first. */
+    if (remap_multiple_vmas( vma_list, delta, VMA_VVAR, 0 ) < 0 ||
+        remap_multiple_vmas( vma_list, delta, VMA_VDSO, 0 ) < 0) goto remap_restore;
+
+    /* NOTE: AArch32 may have restorer in vDSO if we're running on an old ARM64 kernel. */
+    if (test_remap_successful( vma_list, state, vdso_start, vdso_size, delta ) < 0)
+    {
+        /* mapping restore done by test_remap_successful */
+        return -1;
+    }
+
+    for (auxv = state->s.auxv; auxv->a_type != AT_NULL; auxv++)
+    {
+        switch (auxv->a_type)
+        {
+        case AT_SYSINFO:
+        case AT_SYSINFO_EHDR:
+            if ((unsigned long)auxv->a_un.a_val - vdso_start < vdso_size)
+                auxv->a_un.a_val += delta;
+            break;
+        }
+    }
+
+    /* Refresh VMA list */
+    free_vma_list( vma_list );
+    alloc_scan_vma( vma_list );
+    return 1;
+
+remap_restore:
+    if (remap_multiple_vmas( vma_list, delta, -1, 1 ) < 0)
+        fatal_error( "Cannot restore remapped VMAs\n" );
+
+    return -1;
+}
+
 /*
  * map_reserve_preload_ranges
  *
@@ -1974,6 +2546,8 @@ void* wld_start( void **stack )
     alloc_scan_vma( &vma_list );
     map_reserve_preload_ranges( &vma_list, &state.s );
 
+    if (remap_vdso( &vma_list, &state ) > 0) map_reserve_preload_ranges( &vma_list, &state.s );
+
     /* add an executable page at the top of the address space to defeat
      * broken no-exec protections that play with the code selector limit */
     if (find_preload_reserved_area( (char *)0x80000000 - page_size, page_size ) >= 0)
@@ -2003,7 +2577,7 @@ void* wld_start( void **stack )
 #undef SET_NEW_AV
 
     i = 0;
-    /* delete sysinfo values if addresses conflict */
+    /* delete sysinfo values if addresses conflict and remap failed */
     if (is_in_preload_range( state.s.auxv, AT_SYSINFO ) || is_in_preload_range( state.s.auxv, AT_SYSINFO_EHDR ))
     {
         delete_av[i++].a_type = AT_SYSINFO;
-- 
2.34.1




More information about the wine-devel mailing list