[PATCH 1/2] ntdll: Implement saving AVX registers in NtGetContextThread().

Paul Gofman pgofman at codeweavers.com
Wed Sep 2 06:30:31 CDT 2020


Signed-off-by: Paul Gofman <pgofman at codeweavers.com>
---
 dlls/ntdll/unix/signal_i386.c   | 58 ++++++++++++++++++++++++++++++--
 dlls/ntdll/unix/signal_x86_64.c | 59 +++++++++++++++++++++++++++++++--
 2 files changed, 113 insertions(+), 4 deletions(-)

diff --git a/dlls/ntdll/unix/signal_i386.c b/dlls/ntdll/unix/signal_i386.c
index 5d4d1469472..45482b819cc 100644
--- a/dlls/ntdll/unix/signal_i386.c
+++ b/dlls/ntdll/unix/signal_i386.c
@@ -683,6 +683,56 @@ static inline void save_fpux( CONTEXT *context )
 }
 
 
+/***********************************************************************
+ *           save_xstate
+ *
+ * Save the XState context
+ */
+static inline NTSTATUS save_xstate( CONTEXT *context )
+{
+    CONTEXT_EX *context_ex = (CONTEXT_EX *)(context + 1);
+    DECLSPEC_ALIGN(64) struct
+    {
+        XSAVE_FORMAT xsave;
+        XSTATE xstate;
+    }
+    xsave_area;
+    XSTATE *xs;
+
+    if (!(user_shared_data->XState.EnabledFeatures && (xs = xstate_from_context( context ))))
+        return STATUS_SUCCESS;
+
+    if (context_ex->XState.Length < offsetof(XSTATE, YmmContext)
+            || context_ex->XState.Length > sizeof(XSTATE))
+        return STATUS_INVALID_PARAMETER;
+
+    if (user_shared_data->XState.CompactionEnabled)
+    {
+        /* xsavec doesn't use anything from the save area. */
+        __asm__ volatile( "xsavec %0" : "=m"(xsave_area)
+                : "a" ((unsigned int)(xs->CompactionMask & (1 << XSTATE_AVX))), "d" (0) );
+    }
+    else
+    {
+        /* xsave preserves those bits in the mask which are not in EDX:EAX, so zero it. */
+        xsave_area.xstate.Mask = xsave_area.xstate.CompactionMask = 0;
+        __asm__ volatile( "xsave %0" : "=m"(xsave_area)
+                : "a" ((unsigned int)(xs->Mask & (1 << XSTATE_AVX))), "d" (0) );
+    }
+
+    memcpy(xs, &xsave_area.xstate, offsetof(XSTATE, YmmContext));
+    if (xs->Mask & (1 << XSTATE_AVX))
+    {
+        if (context_ex->XState.Length < sizeof(XSTATE))
+            return STATUS_BUFFER_OVERFLOW;
+
+        memcpy(&xs->YmmContext, &xsave_area.xstate.YmmContext, sizeof(xs->YmmContext));
+    }
+
+    return STATUS_SUCCESS;
+}
+
+
 /***********************************************************************
  *           restore_fpu
  *
@@ -1189,11 +1239,15 @@ NTSTATUS WINAPI NtSetContextThread( HANDLE handle, const CONTEXT *context )
  */
 NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context )
 {
-    NTSTATUS ret;
+    NTSTATUS ret, xsave_status;
     struct syscall_frame *frame = x86_thread_data()->syscall_frame;
     DWORD needed_flags = context->ContextFlags & ~CONTEXT_i386;
     BOOL self = (handle == GetCurrentThread());
 
+    /* Save xstate before any calls which can potentially change volatile ymm registers.
+     * E. g., debug output will clobber ymm registers. */
+    xsave_status = self ? save_xstate( context ) : STATUS_SUCCESS; /* FIXME: other thread. */
+
     /* debug registers require a server call */
     if (needed_flags & CONTEXT_DEBUG_REGISTERS) self = FALSE;
 
@@ -1265,7 +1319,7 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context )
         TRACE( "%p: dr0=%08x dr1=%08x dr2=%08x dr3=%08x dr6=%08x dr7=%08x\n", handle,
                context->Dr0, context->Dr1, context->Dr2, context->Dr3, context->Dr6, context->Dr7 );
 
-    return STATUS_SUCCESS;
+    return xsave_status;
 }
 
 
diff --git a/dlls/ntdll/unix/signal_x86_64.c b/dlls/ntdll/unix/signal_x86_64.c
index c77d65f8678..a0ad49815a4 100644
--- a/dlls/ntdll/unix/signal_x86_64.c
+++ b/dlls/ntdll/unix/signal_x86_64.c
@@ -1481,6 +1481,56 @@ static void save_context( struct xcontext *xcontext, const ucontext_t *sigcontex
 }
 
 
+/***********************************************************************
+ *           save_xstate
+ *
+ * Save the XState context
+ */
+static inline NTSTATUS save_xstate( CONTEXT *context )
+{
+    CONTEXT_EX *context_ex = (CONTEXT_EX *)(context + 1);
+    DECLSPEC_ALIGN(64) struct
+    {
+        XSAVE_FORMAT xsave;
+        XSTATE xstate;
+    }
+    xsave_area;
+    XSTATE *xs;
+
+    if (!(user_shared_data->XState.EnabledFeatures && (xs = xstate_from_context( context ))))
+        return STATUS_SUCCESS;
+
+    if (context_ex->XState.Length < offsetof(XSTATE, YmmContext)
+            || context_ex->XState.Length > sizeof(XSTATE))
+        return STATUS_INVALID_PARAMETER;
+
+    if (user_shared_data->XState.CompactionEnabled)
+    {
+        /* xsavec doesn't use anything from the save area. */
+        __asm__ volatile( "xsavec %0" : "=m"(xsave_area)
+                : "a" ((unsigned int)(xs->CompactionMask & (1 << XSTATE_AVX))), "d" (0) );
+    }
+    else
+    {
+        /* xsave preserves those bits in the mask which are not in EDX:EAX, so zero it. */
+        xsave_area.xstate.Mask = xsave_area.xstate.CompactionMask = 0;
+        __asm__ volatile( "xsave %0" : "=m"(xsave_area)
+                : "a" ((unsigned int)(xs->Mask & (1 << XSTATE_AVX))), "d" (0) );
+    }
+
+    memcpy(xs, &xsave_area.xstate, offsetof(XSTATE, YmmContext));
+    if (xs->Mask & (1 << XSTATE_AVX))
+    {
+        if (context_ex->XState.Length < sizeof(XSTATE))
+            return STATUS_BUFFER_OVERFLOW;
+
+        memcpy(&xs->YmmContext, &xsave_area.xstate.YmmContext, sizeof(xs->YmmContext));
+    }
+
+    return STATUS_SUCCESS;
+}
+
+
 /***********************************************************************
  *           restore_context
  *
@@ -1780,13 +1830,17 @@ NTSTATUS WINAPI NtSetContextThread( HANDLE handle, const CONTEXT *context )
  */
 NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context )
 {
-    NTSTATUS ret;
+    NTSTATUS ret, xsave_status;
     DWORD needed_flags;
     struct syscall_frame *frame = amd64_thread_data()->syscall_frame;
     BOOL self = (handle == GetCurrentThread());
 
     if (!context) return STATUS_INVALID_PARAMETER;
 
+    /* Save xstate before any calls which can potentially change volatile ymm registers.
+     * E. g., debug output will clobber ymm registers. */
+    xsave_status = self ? save_xstate( context ) : STATUS_SUCCESS; /* FIXME: other thread. */
+
     needed_flags = context->ContextFlags & ~CONTEXT_AMD64;
 
     /* debug registers require a server call */
@@ -1859,7 +1913,8 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context )
             amd64_thread_data()->dr7 = context->Dr7;
         }
     }
-    return STATUS_SUCCESS;
+
+    return xsave_status;
 }
 
 extern void CDECL raise_func_trampoline( void *dispatcher );
-- 
2.26.2




More information about the wine-devel mailing list