[PATCH 1/3] winebuild: Move the CALL32_CBClient[Ex] implementation to krnl386.

Zebediah Figura zfigura at codeweavers.com
Sun Oct 3 19:00:47 CDT 2021


Except for the return thunk.

Signed-off-by: Zebediah Figura <zfigura at codeweavers.com>
---
 dlls/krnl386.exe16/thunk.c | 164 ++++++++++++++++++++++++++++++++++++-
 tools/winebuild/relay.c    | 155 -----------------------------------
 2 files changed, 162 insertions(+), 157 deletions(-)

diff --git a/dlls/krnl386.exe16/thunk.c b/dlls/krnl386.exe16/thunk.c
index 98dbf31a770..4a373b6c7cb 100644
--- a/dlls/krnl386.exe16/thunk.c
+++ b/dlls/krnl386.exe16/thunk.c
@@ -1951,10 +1951,134 @@ void WINAPI CBClientGlueSL( CONTEXT *context )
     context->Eip   = OFFSETOF  ( glue );
 }
 
+/*******************************************************************
+ *         CALL32_CBClient
+ *
+ * Call a CBClient relay stub from 32-bit code (KERNEL.620).
+ *
+ * Since the relay stub is itself 32-bit, this should not be a problem;
+ * unfortunately, the relay stubs are expected to switch back to a
+ * 16-bit stack (and 16-bit code) after completion :-(
+ *
+ * This would conflict with our 16- vs. 32-bit stack handling, so
+ * we simply switch *back* to our 32-bit stack before returning to
+ * the caller ...
+ *
+ * The CBClient relay stub expects to be called with the following
+ * 16-bit stack layout, and with ebp and ebx pointing into the 16-bit
+ * stack at the designated places:
+ *
+ *    ...
+ *  (ebp+14) original arguments to the callback routine
+ *  (ebp+10) far return address to original caller
+ *  (ebp+6)  Thunklet target address
+ *  (ebp+2)  Thunklet relay ID code
+ *  (ebp)    BP (saved by CBClientGlueSL)
+ *  (ebp-2)  SI (saved by CBClientGlueSL)
+ *  (ebp-4)  DI (saved by CBClientGlueSL)
+ *  (ebp-6)  DS (saved by CBClientGlueSL)
+ *
+ *   ...     buffer space used by the 16-bit side glue for temp copies
+ *
+ *  (ebx+4)  far return address to 16-bit side glue code
+ *  (ebx)    saved 16-bit ss:sp (pointing to ebx+4)
+ *
+ * The 32-bit side glue code accesses both the original arguments (via ebp)
+ * and the temporary copies prepared by the 16-bit side glue (via ebx).
+ * After completion, the stub will load ss:sp from the buffer at ebx
+ * and perform a far return to 16-bit code.
+ *
+ * To trick the relay stub into returning to us, we replace the 16-bit
+ * return address to the glue code by a cs:ip pair pointing to our
+ * return entry point (the original return address is saved first).
+ * Our return stub thus called will then reload the 32-bit ss:esp and
+ * return to 32-bit code (by using and ss:esp value that we have also
+ * pushed onto the 16-bit stack before and a cs:eip values found at
+ * that position on the 32-bit stack).  The ss:esp to be restored is
+ * found relative to the 16-bit stack pointer at:
+ *
+ *  (ebx-4)   ss  (flat)
+ *  (ebx-8)   sp  (32-bit stack pointer)
+ *
+ * The second variant of this routine, CALL32_CBClientEx, which is used
+ * to implement KERNEL.621, has to cope with yet another problem: Here,
+ * the 32-bit side directly returns to the caller of the CBClient thunklet,
+ * restoring registers saved by CBClientGlueSL and cleaning up the stack.
+ * As we have to return to our 32-bit code first, we have to adapt the
+ * layout of our temporary area so as to include values for the registers
+ * that are to be restored, and later (in the implementation of KERNEL.621)
+ * we *really* restore them. The return stub restores DS, DI, SI, and BP
+ * from the stack, skips the next 8 bytes (CBClient relay code / target),
+ * and then performs a lret NN, where NN is the number of arguments to be
+ * removed. Thus, we prepare our temporary area as follows:
+ *
+ *     (ebx+22) 16-bit cs  (this segment)
+ *     (ebx+20) 16-bit ip  ('16-bit' return entry point)
+ *     (ebx+16) 32-bit ss  (flat)
+ *     (ebx+12) 32-bit sp  (32-bit stack pointer)
+ *     (ebx+10) 16-bit bp  (points to ebx+24)
+ *     (ebx+8)  16-bit si  (ignored)
+ *     (ebx+6)  16-bit di  (ignored)
+ *     (ebx+4)  16-bit ds  (we actually use the flat DS here)
+ *     (ebx+2)  16-bit ss  (16-bit stack segment)
+ *     (ebx+0)  16-bit sp  (points to ebx+4)
+ *
+ * Note that we ensure that DS is not changed and remains the flat segment,
+ * and the 32-bit stack pointer our own return stub needs fits just
+ * perfectly into the 8 bytes that are skipped by the Windows stub.
+ * One problem is that we have to determine the number of removed arguments,
+ * as these have to be really removed in KERNEL.621. Thus, the BP value
+ * that we place in the temporary area to be restored, contains the value
+ * that SP would have if no arguments were removed. By comparing the actual
+ * value of SP with this value in our return stub we can compute the number
+ * of removed arguments. This is then returned to KERNEL.621.
+ *
+ * The stack layout of this function:
+ * (ebp+20)  nArgs     pointer to variable receiving nr. of args (Ex only)
+ * (ebp+16)  esi       pointer to caller's esi value
+ * (ebp+12)  arg       ebp value to be set for relay stub
+ * (ebp+8)   func      CBClient relay stub address
+ * (ebp+4)   ret addr
+ * (ebp)     ebp
+ */
+extern DWORD CALL32_CBClient( FARPROC proc, LPWORD args, WORD *stackLin, DWORD *esi );
+__ASM_GLOBAL_FUNC( CALL32_CBClient,
+                   "pushl %ebp\n\t"
+                   __ASM_CFI(".cfi_adjust_cfa_offset 4\n\t")
+                   __ASM_CFI(".cfi_rel_offset %ebp,0\n\t")
+                   "movl %esp,%ebp\n\t"
+                   __ASM_CFI(".cfi_def_cfa_register %ebp\n\t")
+                   "pushl %edi\n\t"
+                   __ASM_CFI(".cfi_rel_offset %edi,-4\n\t")
+                   "pushl %esi\n\t"
+                   __ASM_CFI(".cfi_rel_offset %esi,-8\n\t")
+                   "pushl %ebx\n\t"
+                   __ASM_CFI(".cfi_rel_offset %ebx,-12\n\t")
+                   "movl 16(%ebp),%ebx\n\t"
+                   "leal -8(%esp),%eax\n\t"
+                   "movl %eax,-8(%ebx)\n\t"
+                   "movl 20(%ebp),%esi\n\t"
+                   "movl (%esi),%esi\n\t"
+                   "movl 8(%ebp),%eax\n\t"
+                   "movl 12(%ebp),%ebp\n\t"
+                   "pushl %cs\n\t"
+                   "call *%eax\n\t"
+                   "movl 32(%esp),%edi\n\t"
+                   "movl %esi,(%edi)\n\t"
+                   "popl %ebx\n\t"
+                   __ASM_CFI(".cfi_same_value %ebx\n\t")
+                   "popl %esi\n\t"
+                   __ASM_CFI(".cfi_same_value %esi\n\t")
+                   "popl %edi\n\t"
+                   __ASM_CFI(".cfi_same_value %edi\n\t")
+                   "popl %ebp\n\t"
+                   __ASM_CFI(".cfi_def_cfa %esp,4\n\t")
+                   __ASM_CFI(".cfi_same_value %ebp\n\t")
+                   "ret\n\t" )
+
 /***********************************************************************
  *     CBClientThunkSL                      (KERNEL.620)
  */
-extern DWORD CALL32_CBClient( FARPROC proc, LPWORD args, WORD *stackLin, DWORD *esi );
 void WINAPI CBClientThunkSL( CONTEXT *context )
 {
     /* Call 32-bit relay code */
@@ -1976,10 +2100,46 @@ void WINAPI CBClientThunkSL( CONTEXT *context )
     stack16_pop( 12 );
 }
 
+extern DWORD CALL32_CBClientEx( FARPROC proc, LPWORD args, WORD *stackLin, DWORD *esi, INT *nArgs );
+__ASM_GLOBAL_FUNC( CALL32_CBClientEx,
+                   "pushl %ebp\n\t"
+                   __ASM_CFI(".cfi_adjust_cfa_offset 4\n\t")
+                   __ASM_CFI(".cfi_rel_offset %ebp,0\n\t")
+                   "movl %esp,%ebp\n\t"
+                   __ASM_CFI(".cfi_def_cfa_register %ebp\n\t")
+                   "pushl %edi\n\t"
+                   __ASM_CFI(".cfi_rel_offset %edi,-4\n\t")
+                   "pushl %esi\n\t"
+                   __ASM_CFI(".cfi_rel_offset %esi,-8\n\t")
+                   "pushl %ebx\n\t"
+                   __ASM_CFI(".cfi_rel_offset %ebx,-12\n\t")
+                   "movl 16(%ebp),%ebx\n\t"
+                   "leal -8(%esp),%eax\n\t"
+                   "movl %eax,12(%ebx)\n\t"
+                   "movl 20(%ebp),%esi\n\t"
+                   "movl (%esi),%esi\n\t"
+                   "movl 8(%ebp),%eax\n\t"
+                   "movl 12(%ebp),%ebp\n\t"
+                   "pushl %cs\n\t"
+                   "call *%eax\n\t"
+                   "movl 32(%esp),%edi\n\t"
+                   "movl %esi,(%edi)\n\t"
+                   "movl 36(%esp),%ebx\n\t"
+                   "movl %ebp,(%ebx)\n\t"
+                   "popl %ebx\n\t"
+                   __ASM_CFI(".cfi_same_value %ebx\n\t")
+                   "popl %esi\n\t"
+                   __ASM_CFI(".cfi_same_value %esi\n\t")
+                   "popl %edi\n\t"
+                   __ASM_CFI(".cfi_same_value %edi\n\t")
+                   "popl %ebp\n\t"
+                   __ASM_CFI(".cfi_def_cfa %esp,4\n\t")
+                   __ASM_CFI(".cfi_same_value %ebp\n\t")
+                   "ret\n\t" )
+
 /***********************************************************************
  *     CBClientThunkSLEx                    (KERNEL.621)
  */
-extern DWORD CALL32_CBClientEx( FARPROC proc, LPWORD args, WORD *stackLin, DWORD *esi, INT *nArgs );
 void WINAPI CBClientThunkSLEx( CONTEXT *context )
 {
     /* Call 32-bit relay code */
diff --git a/tools/winebuild/relay.c b/tools/winebuild/relay.c
index abe2bdae6ed..eaa70888986 100644
--- a/tools/winebuild/relay.c
+++ b/tools/winebuild/relay.c
@@ -564,163 +564,8 @@ static void BuildRet16Func(void)
 }
 
 
-/*******************************************************************
- *         BuildCallTo32CBClient
- *
- * Call a CBClient relay stub from 32-bit code (KERNEL.620).
- *
- * Since the relay stub is itself 32-bit, this should not be a problem;
- * unfortunately, the relay stubs are expected to switch back to a
- * 16-bit stack (and 16-bit code) after completion :-(
- *
- * This would conflict with our 16- vs. 32-bit stack handling, so
- * we simply switch *back* to our 32-bit stack before returning to
- * the caller ...
- *
- * The CBClient relay stub expects to be called with the following
- * 16-bit stack layout, and with ebp and ebx pointing into the 16-bit
- * stack at the designated places:
- *
- *    ...
- *  (ebp+14) original arguments to the callback routine
- *  (ebp+10) far return address to original caller
- *  (ebp+6)  Thunklet target address
- *  (ebp+2)  Thunklet relay ID code
- *  (ebp)    BP (saved by CBClientGlueSL)
- *  (ebp-2)  SI (saved by CBClientGlueSL)
- *  (ebp-4)  DI (saved by CBClientGlueSL)
- *  (ebp-6)  DS (saved by CBClientGlueSL)
- *
- *   ...     buffer space used by the 16-bit side glue for temp copies
- *
- *  (ebx+4)  far return address to 16-bit side glue code
- *  (ebx)    saved 16-bit ss:sp (pointing to ebx+4)
- *
- * The 32-bit side glue code accesses both the original arguments (via ebp)
- * and the temporary copies prepared by the 16-bit side glue (via ebx).
- * After completion, the stub will load ss:sp from the buffer at ebx
- * and perform a far return to 16-bit code.
- *
- * To trick the relay stub into returning to us, we replace the 16-bit
- * return address to the glue code by a cs:ip pair pointing to our
- * return entry point (the original return address is saved first).
- * Our return stub thus called will then reload the 32-bit ss:esp and
- * return to 32-bit code (by using and ss:esp value that we have also
- * pushed onto the 16-bit stack before and a cs:eip values found at
- * that position on the 32-bit stack).  The ss:esp to be restored is
- * found relative to the 16-bit stack pointer at:
- *
- *  (ebx-4)   ss  (flat)
- *  (ebx-8)   sp  (32-bit stack pointer)
- *
- * The second variant of this routine, CALL32_CBClientEx, which is used
- * to implement KERNEL.621, has to cope with yet another problem: Here,
- * the 32-bit side directly returns to the caller of the CBClient thunklet,
- * restoring registers saved by CBClientGlueSL and cleaning up the stack.
- * As we have to return to our 32-bit code first, we have to adapt the
- * layout of our temporary area so as to include values for the registers
- * that are to be restored, and later (in the implementation of KERNEL.621)
- * we *really* restore them. The return stub restores DS, DI, SI, and BP
- * from the stack, skips the next 8 bytes (CBClient relay code / target),
- * and then performs a lret NN, where NN is the number of arguments to be
- * removed. Thus, we prepare our temporary area as follows:
- *
- *     (ebx+22) 16-bit cs  (this segment)
- *     (ebx+20) 16-bit ip  ('16-bit' return entry point)
- *     (ebx+16) 32-bit ss  (flat)
- *     (ebx+12) 32-bit sp  (32-bit stack pointer)
- *     (ebx+10) 16-bit bp  (points to ebx+24)
- *     (ebx+8)  16-bit si  (ignored)
- *     (ebx+6)  16-bit di  (ignored)
- *     (ebx+4)  16-bit ds  (we actually use the flat DS here)
- *     (ebx+2)  16-bit ss  (16-bit stack segment)
- *     (ebx+0)  16-bit sp  (points to ebx+4)
- *
- * Note that we ensure that DS is not changed and remains the flat segment,
- * and the 32-bit stack pointer our own return stub needs fits just
- * perfectly into the 8 bytes that are skipped by the Windows stub.
- * One problem is that we have to determine the number of removed arguments,
- * as these have to be really removed in KERNEL.621. Thus, the BP value
- * that we place in the temporary area to be restored, contains the value
- * that SP would have if no arguments were removed. By comparing the actual
- * value of SP with this value in our return stub we can compute the number
- * of removed arguments. This is then returned to KERNEL.621.
- *
- * The stack layout of this function:
- * (ebp+20)  nArgs     pointer to variable receiving nr. of args (Ex only)
- * (ebp+16)  esi       pointer to caller's esi value
- * (ebp+12)  arg       ebp value to be set for relay stub
- * (ebp+8)   func      CBClient relay stub address
- * (ebp+4)   ret addr
- * (ebp)     ebp
- */
 static void BuildCallTo32CBClient( int isEx )
 {
-    function_header( isEx ? "CALL32_CBClientEx" : "CALL32_CBClient" );
-
-    /* Entry code */
-
-    output_cfi( ".cfi_startproc" );
-    output( "\tpushl %%ebp\n" );
-    output_cfi( ".cfi_adjust_cfa_offset 4" );
-    output_cfi( ".cfi_rel_offset %%ebp,0" );
-    output( "\tmovl %%esp,%%ebp\n" );
-    output_cfi( ".cfi_def_cfa_register %%ebp" );
-    output( "\tpushl %%edi\n" );
-    output_cfi( ".cfi_rel_offset %%edi,-4" );
-    output( "\tpushl %%esi\n" );
-    output_cfi( ".cfi_rel_offset %%esi,-8" );
-    output( "\tpushl %%ebx\n" );
-    output_cfi( ".cfi_rel_offset %%ebx,-12" );
-
-    /* Get pointer to temporary area and save the 32-bit stack pointer */
-
-    output( "\tmovl 16(%%ebp), %%ebx\n" );
-    output( "\tleal -8(%%esp), %%eax\n" );
-
-    if ( !isEx )
-        output( "\tmovl %%eax, -8(%%ebx)\n" );
-    else
-        output( "\tmovl %%eax, 12(%%ebx)\n" );
-
-    /* Set up registers and call CBClient relay stub (simulating a far call) */
-
-    output( "\tmovl 20(%%ebp), %%esi\n" );
-    output( "\tmovl (%%esi), %%esi\n" );
-
-    output( "\tmovl 8(%%ebp), %%eax\n" );
-    output( "\tmovl 12(%%ebp), %%ebp\n" );
-
-    output( "\tpushl %%cs\n" );
-    output( "\tcall *%%eax\n" );
-
-    /* Return new esi value to caller */
-
-    output( "\tmovl 32(%%esp), %%edi\n" );
-    output( "\tmovl %%esi, (%%edi)\n" );
-
-    /* Return argument size to caller */
-    if ( isEx )
-    {
-        output( "\tmovl 36(%%esp), %%ebx\n" );
-        output( "\tmovl %%ebp, (%%ebx)\n" );
-    }
-
-    /* Restore registers and return */
-
-    output( "\tpopl %%ebx\n" );
-    output_cfi( ".cfi_same_value %%ebx" );
-    output( "\tpopl %%esi\n" );
-    output_cfi( ".cfi_same_value %%esi" );
-    output( "\tpopl %%edi\n" );
-    output_cfi( ".cfi_same_value %%edi" );
-    output( "\tpopl %%ebp\n" );
-    output_cfi( ".cfi_def_cfa %%esp,4" );
-    output_cfi( ".cfi_same_value %%ebp" );
-    output( "\tret\n" );
-    output_cfi( ".cfi_endproc" );
-    output_function_size( isEx ? "CALL32_CBClientEx" : "CALL32_CBClient" );
-
     /* '16-bit' return stub */
 
     function_header( isEx ? "CALL32_CBClientEx_Ret" : "CALL32_CBClient_Ret" );
-- 
2.33.0




More information about the wine-devel mailing list