[PATCH 2/2] ntdll: Make syscall dispatcher properly restore X16 and X17 in ARM64.

Jinoh Kang jinoh.kang.kr at gmail.com
Mon Nov 8 09:41:18 CST 2021


Today, __wine_syscall_dispatcher clobbers X16 and X17 registers even if
`syscall_frame->restore_flags` has CONTEXT_INTEGER set.
This is because they are used as scratch registers for restoring SP and
PC, which cannot be loaded directly from memory or other architectural
state.

signal_set_full_context bypasses this clobbering behaviour by relying on
kernel instead to fully restore the context.  Specifically, it raises
SIGUSR2, of which handler moves everything to the sigcontext and returns
implicitly via the sigreturn() Unix system call.

However, this approach is not generic and also error-prone: the
signal_set_full_context implementation now differs from other
architectures by abruptly switching to the specified context instead of
taking the proper path of returning via the syscall dispatcher.

Fix this by splitting the ARM64 syscall dispatcher's returning behaviour
into a fast path and a slow path.

- If CONTEXT_INTEGER is not set, the dispatcher takes the fast path:
  the X16 and X17 registers are clobbered as usual.

- If X16 == PC and X17 == SP, the dispatcher also takes the fast path:
  it can safely use X16 and X17 without corrupting the thread state,
  since those two registers already have the desired values.

- Otherwise, the dispatcher takes the slow path: it raises SIGUSR2 and
  does full context restore in the signal handler.

Also, modify ARM64 signal_set_full_context to match that of other
architectures.  The function will let the dispatcher choose either path
as appropriate.

Performance impact is expected to be negligible (if any) when
CONTEXT_INTEGER is unset (the usual case), since the number of
instructions executed is left unchanged for this case.

Signed-off-by: Jinoh Kang <jinoh.kang.kr at gmail.com>
---
 dlls/ntdll/unix/signal_arm64.c | 57 ++++++++++++++++++++++++----------
 1 file changed, 40 insertions(+), 17 deletions(-)

diff --git a/dlls/ntdll/unix/signal_arm64.c b/dlls/ntdll/unix/signal_arm64.c
index da5739b6f1c..c5db2e7de64 100644
--- a/dlls/ntdll/unix/signal_arm64.c
+++ b/dlls/ntdll/unix/signal_arm64.c
@@ -327,6 +327,10 @@ NTSTATUS CDECL unwind_builtin_dll( ULONG type, DISPATCHER_CONTEXT *dispatch, CON
 /***********************************************************************
  *           syscall_frame_fixup_for_fastpath
  *
+ * Fixes up the given syscall frame such that the syscall dispatcher
+ * can return via the fast path if CONTEXT_INTEGER is set in
+ * restore_flags.
+ *
  * Clobbers the frame's X16 and X17 register values.
  */
 static void syscall_frame_fixup_for_fastpath( struct syscall_frame *frame )
@@ -426,7 +430,8 @@ NTSTATUS signal_set_full_context( CONTEXT *context )
 {
     NTSTATUS status = NtSetContextThread( GetCurrentThread(), context );
 
-    if (!status && (context->ContextFlags & CONTEXT_INTEGER) == CONTEXT_INTEGER) raise( SIGUSR2 );
+    if (!status && (context->ContextFlags & CONTEXT_INTEGER) == CONTEXT_INTEGER)
+        arm64_thread_data()->syscall_frame->restore_flags |= CONTEXT_INTEGER;
     return status;
 }
 
@@ -1172,6 +1177,17 @@ void signal_init_process(void)
 }
 
 
+/***********************************************************************
+ *           syscall_dispatcher_return_slowpath
+ */
+void DECLSPEC_HIDDEN syscall_dispatcher_return_slowpath( void )
+{
+    raise( SIGUSR2 );
+
+    /* should not be reached */
+    abort_thread( 0 );
+}
+
 /***********************************************************************
  *           call_init_thunk
  */
@@ -1312,13 +1328,28 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher,
                    "ldr x16, [x16, x20, lsl 3]\n\t"
                    "blr x16\n\t"
                    "mov sp, x22\n"
-                   "3:\tldp x18, x19, [sp, #0x90]\n\t"
+                   "3:\tldr w16, [sp, #0x10c]\n\t"  /* frame->restore_flags */
+                   "tbz x16, #1, 1f\n\t"        /* CONTEXT_INTEGER */
+                   "ldp x6, x7, [sp, #0x80]\n\t"    /* frame->x[16..17] */
+                   "ldp x8, x9, [sp, #0xf8]\n\t"    /* frame->sp, frame->pc */
+                   "eor x6, x6, x9\n\t"         /* frame->x16 == frame->pc? */
+                   "eor x7, x7, x8\n\t"         /* frame->x17 == frame->sp? */
+                   "orr x6, x6, x7\n\t"
+                   "cbnz x6, 5f\n\t"            /* take slowpath if unequal */
+                   "ldp x0, x1, [sp, #0x00]\n\t"
+                   "ldp x2, x3, [sp, #0x10]\n\t"
+                   "ldp x4, x5, [sp, #0x20]\n\t"
+                   "ldp x6, x7, [sp, #0x30]\n\t"
+                   "ldp x8, x9, [sp, #0x40]\n\t"
+                   "ldp x10, x11, [sp, #0x50]\n\t"
+                   "ldp x12, x13, [sp, #0x60]\n\t"
+                   "ldp x14, x15, [sp, #0x70]\n"
+                   "1:\tldp x18, x19, [sp, #0x90]\n\t"
                    "ldp x20, x21, [sp, #0xa0]\n\t"
                    "ldp x22, x23, [sp, #0xb0]\n\t"
                    "ldp x24, x25, [sp, #0xc0]\n\t"
                    "ldp x26, x27, [sp, #0xd0]\n\t"
                    "ldp x28, x29, [sp, #0xe0]\n\t"
-                   "ldr w16, [sp, #0x10c]\n\t"  /* frame->restore_flags */
                    "tbz x16, #2, 1f\n\t"        /* CONTEXT_FLOATING_POINT */
                    "ldp q0,  q1,  [sp, #0x130]\n\t"
                    "ldp q2,  q3,  [sp, #0x150]\n\t"
@@ -1336,19 +1367,10 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher,
                    "ldp q26, q27, [sp, #0x2d0]\n\t"
                    "ldp q28, q29, [sp, #0x2f0]\n\t"
                    "ldp q30, q31, [sp, #0x310]\n\t"
-                   "ldr w9, [sp, #0x128]\n\t"
-                   "msr FPCR, x9\n\t"
-                   "ldr w9, [sp, #0x12c]\n\t"
-                   "msr FPSR, x9\n"
-                   "1:\ttbz x16, #1, 1f\n\t"    /* CONTEXT_INTEGER */
-                   "ldp x0, x1, [sp, #0x00]\n\t"
-                   "ldp x2, x3, [sp, #0x10]\n\t"
-                   "ldp x4, x5, [sp, #0x20]\n\t"
-                   "ldp x6, x7, [sp, #0x30]\n\t"
-                   "ldp x8, x9, [sp, #0x40]\n\t"
-                   "ldp x10, x11, [sp, #0x50]\n\t"
-                   "ldp x12, x13, [sp, #0x60]\n\t"
-                   "ldp x14, x15, [sp, #0x70]\n"
+                   "ldr w17, [sp, #0x128]\n\t"
+                   "msr FPCR, x17\n\t"
+                   "ldr w17, [sp, #0x12c]\n\t"
+                   "msr FPSR, x17\n"
                    "1:\tldp x16, x17, [sp, #0x100]\n\t"
                    "msr NZCV, x17\n\t"
                    "ldp x30, x17, [sp, #0xf0]\n\t"
@@ -1360,7 +1382,8 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher,
                    __ASM_NAME("__wine_syscall_dispatcher_return") ":\n\t"
                    "mov sp, x0\n\t"
                    "mov x0, x1\n\t"
-                   "b 3b" )
+                   "b 3b\n"
+                   "5:\tbl " __ASM_NAME("syscall_dispatcher_return_slowpath") )
 
 
 /***********************************************************************
-- 
2.33.1




More information about the wine-devel mailing list