[PATCH v4 07/10] loader: Relocate vDSO on conflict with reserved ranges.
Jinoh Kang
jinoh.kang.kr at gmail.com
Fri Jan 28 12:40:47 CST 2022
Today, the preloader removes the vDSO entries (AT_SYSINFO*) from the
auxiliary vector when it conflicts with one of the predefined reserved
ranges.
vDSO is a shared object provided by the kernel. Among other things, it
provides a mechanism to issue certain system calls without the overhead
of switching to the kernel mode.
Without vDSO, libc still works; however, it is expected that some system
call functions (e.g. gettimeofday, clock_gettime) will show degraded
performance.
Fix this by relocating vDSO to another address (if supported by the
kernel) instead of erasing it from auxv entirely.
Since this is a potentially risky change, this behaviour is hidden
behind the "WINEPRELOADREMAPVDSO" environment variable. To activate the
behaviour, the user needs to set "WINEPRELOADREMAPVDSO=on-conflict".
After sufficient testing has been done via staging process, the new
behaviour could be the default and the environment variables removed.
Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=52313
Signed-off-by: Jinoh Kang <jinoh.kang.kr at gmail.com>
---
Notes:
v1 -> v2:
- s/offset/delta/g
- remap_vdso()
- significantly improve kernel vdso_mremap() support detection logic
- add comments
- only modify AT_SYSINFO* if it's in vDSO range
- guard against vdso_start + vdso_size overflow
- remove erroneous MAP_GROWSDOWN
- fix remap_multiple_vmas() when revert = 1
- some refactoring
v3 -> v4:
- add/edit some comments and documentation
- explain why reading /proc/self/maps is necessary for identifying vDSO
in comments
- change code style to match the rest
- decompose find_remap_area into find_vma_envelope_range and
check_remap_policy
loader/preloader.c | 580 ++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 577 insertions(+), 3 deletions(-)
diff --git a/loader/preloader.c b/loader/preloader.c
index 7d17136d3bc..52036dee554 100644
--- a/loader/preloader.c
+++ b/loader/preloader.c
@@ -72,6 +72,7 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
+#include <signal.h>
#include <sys/mman.h>
#ifdef HAVE_SYS_SYSCALL_H
# include <sys/syscall.h>
@@ -86,6 +87,9 @@
#ifdef HAVE_SYS_LINK_H
# include <sys/link.h>
#endif
+#ifdef HAVE_SYS_UCONTEXT_H
+# include <sys/ucontext.h>
+#endif
#include "wine/asm.h"
#include "main.h"
@@ -102,6 +106,11 @@
#ifndef MAP_NORESERVE
#define MAP_NORESERVE 0
#endif
+#ifndef MREMAP_FIXED
+#define MREMAP_FIXED 2
+#endif
+
+#define REMAP_TEST_SIG SIGIO /* Any signal GDB doesn't stop on */
static struct wine_preload_info preload_info[] =
{
@@ -165,6 +174,18 @@ struct wld_auxv
} a_un;
};
+typedef unsigned long wld_sigset_t[8 / sizeof(unsigned long)];
+
+struct wld_sigaction {
+ /* Prefix all fields since they may collide with macros from libc headers */
+ void (*wld_sa_sigaction)(int, siginfo_t *, void *);
+ unsigned long wld_sa_flags;
+ void (*wld_sa_restorer)(void);
+ wld_sigset_t wld_sa_mask;
+};
+
+#define WLD_SA_SIGINFO 4
+
/* Aggregates information about initial program stack and variables
* (e.g. argv and envp) that reside in it.
*/
@@ -193,10 +214,55 @@ struct linebuffer
int truncated;
};
+/*
+ * Flags that specify the kind of each VMA entry read from /proc/self/maps.
+ *
+ * On Linux, Reading /proc/self/maps is the only reliable way to identify the
+ * exact range of vDSO/vvar mapping. The reason is twofold:
+ *
+ * 1. vDSO usually hard-codes vvar's offset relative to vDSO. Therefore,
+ * remapping vDSO requires vvar to be also remapped as well. However, vvar's
+ * size and its location relative to vDSO is *not* guaranteed by ABI, and has
+ * changed all the time.
+ *
+ * - x86: [vvar] orginally resided at a fixed address 0xffffffffff5ff000
+ * (64-bit) [1], but was later changed so that it precedes [vdso] [2].
+ * There, sym_vvar_start is a negative value [3]. text_start is the base
+ * address of vDSO, and addr becomes the address of vvar.
+ *
+ * - AArch32: [vvar] is a single page and precedes [vdso] [4].
+ *
+ * - AArch64: [vvar] is two pages long and precedes [vdso] [5].
+ * Before v5.9, [vvar] was a single page [6].
+ *
+ * 2. It's very difficult to deduce vDSO and vvar's size and offset relative to
+ * each other. Since vvar's symbol does not exist in vDSO's symtab,
+ * determining the layout would require parsing vDSO's code.
+ *
+ * Also note that CRIU (Checkpoint Restore In Userspace) has maps parsing code
+ * just for relocating vDSO [7].
+ *
+ * [1] https://lwn.net/Articles/615809/
+ * [2] https://elixir.bootlin.com/linux/v5.16.3/source/arch/x86/entry/vdso/vma.c#L246
+ * [3] https://elixir.bootlin.com/linux/v5.16.3/source/arch/x86/include/asm/vdso.h#L21
+ * [4] https://elixir.bootlin.com/linux/v5.16.3/source/arch/arm/kernel/vdso.c#L236
+ * [5] https://elixir.bootlin.com/linux/v5.16.3/source/arch/arm64/kernel/vdso.c#L214
+ * [6] https://elixir.bootlin.com/linux/v5.8/source/arch/arm64/kernel/vdso.c#L161
+ * [7] https://github.com/checkpoint-restore/criu/blob/a315774e11b4da1eb36446ae996eac1695a129a6/criu/vdso.c
+ */
+enum vma_type_flags
+{
+ VMA_NORMAL = 0x01,
+ VMA_VDSO = 0x02,
+ VMA_VVAR = 0x04,
+};
+
struct vma_area
{
unsigned long start;
unsigned long end;
+ unsigned char type_flags; /* enum vma_type_flags */
+ unsigned char moved; /* has been mremap()'d? */
};
struct vma_area_list
@@ -209,6 +275,57 @@ struct vma_area_list
#define FOREACH_VMA(list, item) \
for ((item) = (list)->base; (item) != (list)->list_end; (item)++)
+/*
+ * Allow the user to configure the remapping behaviour if it causes trouble.
+ * The "force" (REMAP_POLICY_FORCE) value can be used to test the remapping
+ * code path unconditionally.
+ */
+enum remap_policy
+{
+ REMAP_POLICY_ON_CONFLICT = 0,
+ REMAP_POLICY_FORCE = 1,
+ REMAP_POLICY_SKIP = 2,
+ LAST_REMAP_POLICY,
+
+ REMAP_POLICY_DEFAULT_VDSO = REMAP_POLICY_SKIP,
+};
+
+/*
+ * Used in a signal handler that tests if mremap() on vDSO works on the current
+ * kernel.
+ */
+struct remap_test_block {
+ /* The old address range of vDSO or sigpage. Used to test if pages are remapped properly. */
+ unsigned long old_mapping_start;
+ unsigned long old_mapping_size;
+
+ struct vma_area_list *vma_list;
+
+ /*
+ * Difference between the base address of the new mapping and the old mapping.
+ *
+ * Set to zero if the handler reverted mappings to old state before returning
+ * in order to safely return when it detects failed remapping.
+ */
+ unsigned long delta;
+
+ /*
+ * Whether remapping was successfully recognised by the kernel.
+ *
+ * If the signal handler is never called (due to e.g. being blocked), it is counted
+ * as being unsuccessful.
+ */
+ unsigned char is_successful;
+
+ /*
+ * Whether remapping could not be recognised by the kernel.
+ *
+ * If both is_successful and is_failed are set, is_failed takes precedence.
+ * The flags are intentionally made redundant to detect multiple successive
+ * invocation of the signal handler due to external signal delivery. */
+ unsigned char is_failed;
+} remap_test;
+
/*
* The __bb_init_func is an empty function only called when file is
* compiled with gcc flags "-fprofile-arcs -ftest-coverage". This
@@ -244,6 +361,15 @@ struct
unsigned int garbage : 25;
} thread_ldt = { -1, (unsigned long)thread_data, 0xfffff, 1, 0, 0, 1, 0, 1, 0 };
+typedef unsigned long wld_old_sigset_t;
+
+struct wld_old_sigaction {
+ /* Prefix all fields since they may collide with macros from libc headers */
+ void (*wld_sa_sigaction)(int, siginfo_t *, void *);
+ wld_old_sigset_t wld_sa_mask;
+ unsigned long wld_sa_flags;
+ void (*wld_sa_restorer)(void);
+};
/*
* The _start function is the entry and exit point of this program
@@ -381,6 +507,16 @@ static inline int wld_munmap( void *addr, size_t len )
return SYSCALL_RET(ret);
}
+static inline void *wld_mremap( void *old_addr, size_t old_len, size_t new_size, int flags, void *new_addr )
+{
+ int ret;
+ __asm__ __volatile__( "pushl %%ebx; movl %2,%%ebx; int $0x80; popl %%ebx"
+ : "=a" (ret) : "0" (163 /* SYS_mremap */), "r" (old_addr), "c" (old_len),
+ "d" (new_size), "S" (flags), "D" (new_addr)
+ : "memory" );
+ return (void *)SYSCALL_RET(ret);
+}
+
static inline int wld_prctl( int code, long arg )
{
int ret;
@@ -389,6 +525,64 @@ static inline int wld_prctl( int code, long arg )
return SYSCALL_RET(ret);
}
+static void copy_old_sigset( void *dest, const void *src )
+{
+ /* Avoid aliasing */
+ size_t i;
+ for (i = 0; i < sizeof(wld_old_sigset_t); i++)
+ *((unsigned char *)dest + i) = *((const unsigned char *)src + i);
+}
+
+static inline int wld_sigaction( int signum, const struct wld_sigaction *act, struct wld_sigaction *old_act )
+{
+ int ret;
+ __asm__ __volatile__( "pushl %%ebx; movl %2,%%ebx; int $0x80; popl %%ebx"
+ : "=a" (ret) : "0" (174 /* SYS_rt_sigaction */), "r" (signum), "c" (act), "d" (old_act), "S" (sizeof(act->wld_sa_mask))
+ : "memory" );
+ if (ret == -38 /* ENOSYS */) {
+ struct wld_old_sigaction act_buf, old_act_buf, *act_real, *old_act_real;
+
+ if (act) {
+ act_real = &act_buf;
+ act_buf.wld_sa_sigaction = act->wld_sa_sigaction;
+ copy_old_sigset(&act_buf.wld_sa_mask, &act->wld_sa_mask);
+ act_buf.wld_sa_flags = act->wld_sa_flags;
+ act_buf.wld_sa_restorer = act->wld_sa_restorer;
+ }
+
+ if (old_act) old_act_real = &old_act_buf;
+
+ __asm__ __volatile__( "pushl %%ebx; movl %2,%%ebx; int $0x80; popl %%ebx"
+ : "=a" (ret) : "0" (67 /* SYS_sigaction */), "r" (signum), "c" (act_real), "d" (old_act_real)
+ : "memory" );
+
+ if (old_act && ret >= 0) {
+ old_act->wld_sa_sigaction = old_act_buf.wld_sa_sigaction;
+ old_act->wld_sa_flags = old_act_buf.wld_sa_flags;
+ old_act->wld_sa_restorer = old_act_buf.wld_sa_restorer;
+ copy_old_sigset(&old_act->wld_sa_mask, &old_act_buf.wld_sa_mask);
+ }
+ }
+ return SYSCALL_RET(ret);
+}
+
+static inline int wld_kill( pid_t pid, int sig )
+{
+ int ret;
+ __asm__ __volatile__( "pushl %%ebx; movl %2,%%ebx; int $0x80; popl %%ebx"
+ : "=a" (ret) : "0" (37 /* SYS_kill */), "r" (pid), "c" (sig)
+ : "memory" /* clobber: signal handler side effects on raise() */ );
+ return SYSCALL_RET(ret);
+}
+
+static inline pid_t wld_getpid( void )
+{
+ int ret;
+ __asm__ __volatile__( "int $0x80"
+ : "=a" (ret) : "0" (20 /* SYS_getpid */) );
+ return ret;
+}
+
#elif defined(__x86_64__)
void *thread_data[256];
@@ -467,9 +661,15 @@ SYSCALL_FUNC( wld_mprotect, 10 /* SYS_mprotect */ );
int wld_munmap( void *addr, size_t len );
SYSCALL_FUNC( wld_munmap, 11 /* SYS_munmap */ );
+void *wld_mremap( void *old_addr, size_t old_len, size_t new_size, int flags, void *new_addr );
+SYSCALL_FUNC( wld_mremap, 25 /* SYS_mremap */ );
+
int wld_prctl( int code, long arg );
SYSCALL_FUNC( wld_prctl, 157 /* SYS_prctl */ );
+pid_t wld_getpid(void);
+SYSCALL_NOERR( wld_getpid, 39 /* SYS_getpid */ );
+
uid_t wld_getuid(void);
SYSCALL_NOERR( wld_getuid, 102 /* SYS_getuid */ );
@@ -577,9 +777,26 @@ SYSCALL_FUNC( wld_mprotect, 226 /* SYS_mprotect */ );
int wld_munmap( void *addr, size_t len );
SYSCALL_FUNC( wld_munmap, 215 /* SYS_munmap */ );
+void *wld_mremap( void *old_addr, size_t old_len, size_t new_size, int flags, void *new_addr );
+SYSCALL_FUNC( wld_mremap, 216 /* SYS_mremap */ );
+
int wld_prctl( int code, long arg );
SYSCALL_FUNC( wld_prctl, 167 /* SYS_prctl */ );
+int wld_rt_sigaction( int signum, const struct wld_sigaction *act, struct wld_sigaction *old_act, size_t sigsetsize );
+SYSCALL_FUNC( wld_rt_sigaction, 134 /* SYS_rt_sigaction */ );
+
+static inline int wld_sigaction( int signum, const struct wld_sigaction *act, struct wld_sigaction *old_act )
+{
+ return wld_rt_sigaction( signum, act, old_act, sizeof(act->wld_sa_mask) );
+}
+
+int wld_kill( pid_t pid, int sig );
+SYSCALL_FUNC( wld_kill, 129 /* SYS_kill */ );
+
+pid_t wld_getpid(void);
+SYSCALL_NOERR( wld_getpid, 172 /* SYS_getpid */ );
+
uid_t wld_getuid(void);
SYSCALL_NOERR( wld_getuid, 174 /* SYS_getuid */ );
@@ -679,9 +896,26 @@ SYSCALL_FUNC( wld_mprotect, 125 /* SYS_mprotect */ );
int wld_munmap( void *addr, size_t len );
SYSCALL_FUNC( wld_munmap, 91 /* SYS_munmap */ );
+void *wld_mremap( void *old_addr, size_t old_len, size_t new_size, int flags, void *new_addr );
+SYSCALL_FUNC( wld_mremap, 163 /* SYS_mremap */ );
+
int wld_prctl( int code, long arg );
SYSCALL_FUNC( wld_prctl, 172 /* SYS_prctl */ );
+int wld_rt_sigaction( int signum, const struct wld_sigaction *act, struct wld_sigaction *old_act, size_t sigsetsize );
+SYSCALL_FUNC( wld_rt_sigaction, 174 /* SYS_rt_sigaction */ );
+
+static inline int wld_sigaction( int signum, const struct wld_sigaction *act, struct wld_sigaction *old_act )
+{
+ return wld_rt_sigaction( signum, act, old_act, sizeof(act->wld_sa_mask) );
+}
+
+int wld_kill( pid_t pid, int sig );
+SYSCALL_FUNC( wld_kill, 37 /* SYS_kill */ );
+
+pid_t wld_getpid(void);
+SYSCALL_NOERR( wld_getpid, 20 /* SYS_getpid */ );
+
uid_t wld_getuid(void);
SYSCALL_NOERR( wld_getuid, 24 /* SYS_getuid */ );
@@ -1661,6 +1895,7 @@ static char *linebuffer_getline( struct linebuffer *lbuf )
static int parse_maps_line( struct vma_area *entry, const char *line )
{
struct vma_area item = { 0 };
+ unsigned long dev_maj, dev_min;
char *ptr = (char *)line;
int overflow;
@@ -1691,11 +1926,11 @@ static int parse_maps_line( struct vma_area *entry, const char *line )
if (*ptr != ' ') fatal_error( "parse error in /proc/self/maps\n" );
ptr++;
- parse_ul( ptr, &ptr, 16, NULL );
+ dev_maj = parse_ul( ptr, &ptr, 16, NULL );
if (*ptr != ':') fatal_error( "parse error in /proc/self/maps\n" );
ptr++;
- parse_ul( ptr, &ptr, 16, NULL );
+ dev_min = parse_ul( ptr, &ptr, 16, NULL );
if (*ptr != ' ') fatal_error( "parse error in /proc/self/maps\n" );
ptr++;
@@ -1703,6 +1938,17 @@ static int parse_maps_line( struct vma_area *entry, const char *line )
if (*ptr != ' ') fatal_error( "parse error in /proc/self/maps\n" );
ptr++;
+ while (*ptr == ' ')
+ ptr++;
+
+ if (dev_maj == 0 && dev_min == 0)
+ {
+ if (wld_strcmp(ptr, "[vdso]") == 0)
+ item.type_flags |= VMA_VDSO;
+ else if (wld_strcmp(ptr, "[vvar]") == 0)
+ item.type_flags |= VMA_VVAR;
+ }
+
*entry = item;
return 0;
}
@@ -1802,6 +2048,76 @@ static void insert_vma_entry( struct vma_area_list *list, const struct vma_area
return;
}
+/*
+ * find_vma_envelope_range
+ *
+ * Compute the smallest range that contains all VMAs with any of the given
+ * type flags.
+ */
+static int find_vma_envelope_range( const struct vma_area_list *list, int type_mask, unsigned long *startp, unsigned long *sizep )
+{
+ const struct vma_area *item;
+ unsigned long start = ULONG_MAX;
+ unsigned long end = 0;
+
+ FOREACH_VMA(list, item)
+ {
+ if (item->type_flags & type_mask)
+ {
+ if (start > item->start) start = item->start;
+ if (end < item->end) end = item->end;
+ }
+ }
+
+ if (start >= end) return -1;
+
+ *startp = start;
+ *sizep = end - start;
+ return 0;
+}
+
+/*
+ * remap_multiple_vmas
+ *
+ * Relocate all VMAs with the given type flags.
+ * This function can also be used to reverse the effects of previous
+ * remap_multiple_vmas().
+ */
+static int remap_multiple_vmas( struct vma_area_list *list, unsigned long delta, int type_mask, unsigned char revert )
+{
+ struct vma_area *item;
+ void *old_addr, *desired_addr, *mapped_addr;
+ size_t size;
+
+ FOREACH_VMA(list, item)
+ {
+ if ((item->type_flags & type_mask) && item->moved == revert)
+ {
+ if (revert)
+ {
+ old_addr = (void *)(item->start + delta);
+ desired_addr = (void *)item->start;
+ }
+ else
+ {
+ old_addr = (void *)item->start;
+ desired_addr = (void *)(item->start + delta);
+ }
+ size = item->end - item->start;
+ mapped_addr = wld_mremap( old_addr, size, size, MREMAP_FIXED | MREMAP_MAYMOVE, desired_addr );
+ if (mapped_addr == (void *)-1) return -1;
+ if (mapped_addr != desired_addr)
+ {
+ if (mapped_addr == old_addr) return -1; /* kernel deoesn't support MREMAP_FIXED */
+ fatal_error( "mremap() returned different address\n" );
+ }
+ item->moved = !revert;
+ }
+ }
+
+ return 0;
+}
+
/*
* scan_vma
*
@@ -1887,6 +2203,262 @@ static void alloc_scan_vma( struct vma_area_list *listp )
}
}
+/*
+ * stackargs_get_remap_policy
+ *
+ * Parse the remap policy value from the given environment variable.
+ */
+static enum remap_policy stackargs_get_remap_policy( const struct stackarg_info *info, const char *name,
+ enum remap_policy default_policy )
+{
+ char *valstr = stackargs_getenv( info, name ), *endptr;
+ unsigned long valnum;
+
+ if (valstr)
+ {
+ if (wld_strcmp(valstr, "auto") == 0 || wld_strcmp(valstr, "on-conflict") == 0)
+ return REMAP_POLICY_ON_CONFLICT;
+ if (wld_strcmp(valstr, "always") == 0 || wld_strcmp(valstr, "force") == 0)
+ return REMAP_POLICY_FORCE;
+ if (wld_strcmp(valstr, "never") == 0 || wld_strcmp(valstr, "skip") == 0)
+ return REMAP_POLICY_SKIP;
+ valnum = parse_ul( valstr, &endptr, 10, NULL );
+ if (!*endptr && valnum < LAST_REMAP_POLICY) return valnum;
+ }
+
+ return default_policy;
+}
+
+/*
+ * check_remap_policy
+ *
+ * Check remap policy against the given range and determine the action to take.
+ *
+ * -1: fail
+ * 0: do nothing
+ * 1: proceed with remapping
+ */
+static int check_remap_policy( struct preloader_state *state,
+ const char *policy_envname, enum remap_policy default_policy,
+ unsigned long start, unsigned long size )
+{
+ switch (stackargs_get_remap_policy( &state->s, policy_envname, default_policy ))
+ {
+ case REMAP_POLICY_SKIP:
+ return -1;
+ case REMAP_POLICY_ON_CONFLICT:
+ if (find_preload_reserved_area( (void *)start, size ) < 0)
+ return 0;
+ /* fallthrough */
+ case REMAP_POLICY_FORCE:
+ default:
+ return 1;
+ }
+}
+
+#ifndef __x86_64__
+/*
+ * remap_test_in_old_address_range
+ *
+ * Determine whether the address falls in the old mapping address range
+ * (i.e. before mremap).
+ */
+static int remap_test_in_old_address_range( unsigned long address )
+{
+ return address - remap_test.old_mapping_start < remap_test.old_mapping_size;
+}
+
+/*
+ * remap_test_signal_handler
+ *
+ * A signal handler that detects whether the kernel has acknowledged the new
+ * addresss for the remapped vDSO.
+ */
+static void remap_test_signal_handler( int signum, siginfo_t *sinfo, void *context )
+{
+ (void)signum;
+ (void)sinfo;
+ (void)context;
+
+ if (remap_test_in_old_address_range((unsigned long)__builtin_return_address(0))) goto fail;
+
+#ifdef __i386__
+ /* test for SYSENTER/SYSEXIT return address (int80_landing_pad) */
+ if (remap_test_in_old_address_range(((ucontext_t *)context)->uc_mcontext.gregs[REG_EIP])) goto fail;
+#endif
+
+ remap_test.is_successful = 1;
+ return;
+
+fail:
+ /* Kernel too old to support remapping. Restore vDSO/sigpage to return safely. */
+ if (remap_test.delta) {
+ if (remap_multiple_vmas( remap_test.vma_list, remap_test.delta, -1, 1 ) < 0)
+ fatal_error( "Cannot restore remapped VMAs\n" );
+ remap_test.delta = 0;
+ }
+
+ /* Signal handler might be called several times externally,
+ * so overwrite with the latest status just to be safe. */
+ remap_test.is_failed = 1;
+}
+#endif
+
+/*
+ * test_remap_successful
+ *
+ * Test if the kernel has acknowledged the remapped vDSO.
+ *
+ * Remapping vDSO requires explicit kernel support for most architectures, but
+ * the support is missing in old Linux kernels (pre-4.8). Among other things,
+ * vDSO contains the default signal restorer (sigreturn trampoline) and on i386
+ * the fast syscall gate (which uses SYSENTER on Intel CPUs). The kernel keeps
+ * track of the addresses of both of these things per process, and those
+ * addresses need to be updated accordingly if the vDSO address changes.
+ * Without proper support, mremap() on vDSO still succeeds, but the kernel still
+ * uses old addresses for the vDSO components, resulting in crashes or other
+ * unpredictable behaviour if any of those addresses are used.
+ *
+ * We attempt to detect this condition by installing a signal handler and
+ * sending a signal to ourselves. The signal handler will test if the restorer
+ * address falls in the old address range; if this is the case, we remap the
+ * vDSO to its old address and report failure (i.e. no support from kernel). On
+ * i386, we additionally check for the syscall gate. If the addresses do not
+ * overlap with the old address range, the kernel is new enough to support vDSO
+ * remapping and we can proceed as normal.
+ */
+static int test_remap_successful( struct vma_area_list *vma_list, struct preloader_state *state,
+ unsigned long old_mapping_start, unsigned long old_mapping_size,
+ unsigned long delta )
+{
+#ifdef __x86_64__
+ (void)vma_list;
+ (void)state;
+ (void)old_mapping_start;
+ (void)old_mapping_size;
+ (void)delta;
+
+ /* x86-64 doesn't use SYSENTER for syscalls, and requires sa_restorer for
+ * signal handlers. We can safely relocate vDSO without kernel support
+ * (vdso_mremap). */
+ return 0;
+#else
+ struct wld_sigaction sigact;
+ pid_t pid;
+ int result = -1;
+ unsigned long syscall_addr = 0;
+
+ pid = wld_getpid();
+ if (pid < 0) fatal_error( "failed to get PID\n" );
+
+#ifdef __i386__
+ syscall_addr = get_auxiliary( state->s.auxv, AT_SYSINFO, 0 );
+ if (syscall_addr - old_mapping_start < old_mapping_size) syscall_addr += delta;
+#endif
+
+ remap_test.old_mapping_start = old_mapping_start;
+ remap_test.old_mapping_size = old_mapping_size;
+ remap_test.vma_list = vma_list;
+ remap_test.delta = delta;
+ remap_test.is_successful = 0;
+ remap_test.is_failed = 0;
+
+ wld_memset( &sigact, 0, sizeof(sigact) );
+ sigact.wld_sa_sigaction = remap_test_signal_handler;
+ sigact.wld_sa_flags = WLD_SA_SIGINFO;
+ /* We deliberately skip sa_restorer, since we're trying to get the address
+ * of the kernel's built-in restorer function. */
+
+ if (wld_sigaction( REMAP_TEST_SIG, &sigact, &sigact ) < 0) fatal_error( "cannot register test signal handler\n" );
+
+ /* Unsafe region below - may race with signal handler */
+#ifdef __i386__
+ if (syscall_addr) {
+ /* Also test __kernel_vsyscall return as well */
+ __asm__ __volatile__( "call *%1"
+ : "=a" (result) : "r" (syscall_addr), "0" (37 /* SYS_kill */), "b" (pid), "c" (REMAP_TEST_SIG) );
+ result = SYSCALL_RET(result);
+ }
+#else
+ syscall_addr = 0;
+#endif
+ if (!syscall_addr) result = wld_kill( pid, REMAP_TEST_SIG );
+ /* Unsafe region above - may race with signal handler */
+
+ if (wld_sigaction( REMAP_TEST_SIG, &sigact, &sigact ) < 0) fatal_error( "cannot unregister test signal handler\n" );
+ if (result == -1) fatal_error( "cannot raise test signal\n" );
+
+ /* Now that the signal handler can no longer be called,
+ * we can safely access the result data. */
+ if (remap_test.is_failed || !remap_test.is_successful) {
+ if (remap_test.delta && remap_multiple_vmas( remap_test.vma_list, remap_test.delta, -1, 1 ) < 0)
+ fatal_error( "Cannot restore remapped VMAs\n" );
+ return -1;
+ }
+
+ return 0;
+#endif
+}
+
+/*
+ * remap_vdso
+ *
+ * Perform vDSO remapping if it conflicts with one of the reserved address ranges.
+ */
+static int remap_vdso( struct vma_area_list *vma_list, struct preloader_state *state )
+{
+ int result;
+ unsigned long vdso_start, vdso_size, delta;
+ void *new_vdso;
+ struct wld_auxv *auxv;
+
+ if (find_vma_envelope_range( vma_list, VMA_VDSO | VMA_VVAR, &vdso_start, &vdso_size ) < 0) return 0;
+
+ result = check_remap_policy( state, "WINEPRELOADREMAPVDSO",
+ REMAP_POLICY_DEFAULT_VDSO,
+ vdso_start, vdso_size );
+ if (result <= 0) return result;
+
+ new_vdso = wld_mmap( NULL, vdso_size, PROT_NONE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0 );
+ if (new_vdso == (void *)-1) return -1;
+
+ delta = (unsigned long)new_vdso - vdso_start;
+ /* It's easier to undo vvar remapping, so we remap it first. */
+ if (remap_multiple_vmas( vma_list, delta, VMA_VVAR, 0 ) < 0 ||
+ remap_multiple_vmas( vma_list, delta, VMA_VDSO, 0 ) < 0) goto remap_restore;
+
+ /* NOTE: AArch32 may have restorer in vDSO if we're running on an old ARM64 kernel. */
+ if (test_remap_successful( vma_list, state, vdso_start, vdso_size, delta ) < 0)
+ {
+ /* mapping restore done by test_remap_successful */
+ return -1;
+ }
+
+ for (auxv = state->s.auxv; auxv->a_type != AT_NULL; auxv++)
+ {
+ switch (auxv->a_type)
+ {
+ case AT_SYSINFO:
+ case AT_SYSINFO_EHDR:
+ if ((unsigned long)auxv->a_un.a_val - vdso_start < vdso_size)
+ auxv->a_un.a_val += delta;
+ break;
+ }
+ }
+
+ /* Refresh VMA list */
+ free_vma_list( vma_list );
+ alloc_scan_vma( vma_list );
+ return 1;
+
+remap_restore:
+ if (remap_multiple_vmas( vma_list, delta, -1, 1 ) < 0)
+ fatal_error( "Cannot restore remapped VMAs\n" );
+
+ return -1;
+}
+
/*
* map_reserve_preload_ranges
*
@@ -1974,6 +2546,8 @@ void* wld_start( void **stack )
alloc_scan_vma( &vma_list );
map_reserve_preload_ranges( &vma_list, &state.s );
+ if (remap_vdso( &vma_list, &state ) > 0) map_reserve_preload_ranges( &vma_list, &state.s );
+
/* add an executable page at the top of the address space to defeat
* broken no-exec protections that play with the code selector limit */
if (find_preload_reserved_area( (char *)0x80000000 - page_size, page_size ) >= 0)
@@ -2003,7 +2577,7 @@ void* wld_start( void **stack )
#undef SET_NEW_AV
i = 0;
- /* delete sysinfo values if addresses conflict */
+ /* delete sysinfo values if addresses conflict and remap failed */
if (is_in_preload_range( state.s.auxv, AT_SYSINFO ) || is_in_preload_range( state.s.auxv, AT_SYSINFO_EHDR ))
{
delete_av[i++].a_type = AT_SYSINFO;
--
2.34.1
More information about the wine-devel
mailing list