From 0471b9eb4fd66d4b51b53fe86ddcb6f390e37a05 Mon Sep 17 00:00:00 2001 From: Dan Kegel Date: Fri, 12 Oct 2012 22:31:39 -0700 Subject: [PATCH] vcomp: single-threaded implementation of _vcomp_fork --- configure.ac | 3 +- dlls/vcomp/Makefile.in | 1 + dlls/vcomp/fork.c | 162 +++++++++++++++++++++++++++++++++++++++ dlls/vcomp/tests/Makefile.in | 10 +++ dlls/vcomp/tests/fork.c | 157 +++++++++++++++++++++++++++++++++++++ dlls/vcomp/tests/vcomp.manifest | 21 +++++ dlls/vcomp/tests/vcomp.rc | 22 ++++++ dlls/vcomp/vcomp.spec | 2 +- 8 files changed, 376 insertions(+), 2 deletions(-) create mode 100644 dlls/vcomp/fork.c create mode 100644 dlls/vcomp/tests/Makefile.in create mode 100644 dlls/vcomp/tests/fork.c create mode 100644 dlls/vcomp/tests/vcomp.manifest create mode 100644 dlls/vcomp/tests/vcomp.rc diff --git a/configure.ac b/configure.ac index fc1d93c..bc3c8b7 100644 --- a/configure.ac +++ b/configure.ac @@ -3016,7 +3016,8 @@ WINE_CONFIG_DLL(uxtheme,,[implib]) WINE_CONFIG_TEST(dlls/uxtheme/tests) WINE_CONFIG_DLL(vbscript) WINE_CONFIG_TEST(dlls/vbscript/tests) -WINE_CONFIG_DLL(vcomp) +WINE_CONFIG_DLL(vcomp,,[implib]) +WINE_CONFIG_TEST(dlls/vcomp/tests) WINE_CONFIG_DLL(vcomp100) WINE_CONFIG_DLL(vcomp90) WINE_CONFIG_DLL(vdhcp.vxd,enable_win16) diff --git a/dlls/vcomp/Makefile.in b/dlls/vcomp/Makefile.in index d256526..971cf0f 100644 --- a/dlls/vcomp/Makefile.in +++ b/dlls/vcomp/Makefile.in @@ -1,6 +1,7 @@ MODULE = vcomp.dll C_SRCS = \ + fork.c \ main.c @MAKE_DLL_RULES@ diff --git a/dlls/vcomp/fork.c b/dlls/vcomp/fork.c new file mode 100644 index 0000000..1a41f73 --- /dev/null +++ b/dlls/vcomp/fork.c @@ -0,0 +1,162 @@ +/* + * vcomp fork/join implementation + * + * Copyright 2012 Dan Kegel + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "config.h" + +#include + +#include "windef.h" +#include "winbase.h" +#include "wine/debug.h" + +WINE_DEFAULT_DEBUG_CHANNEL(vcomp); + +void CDECL _vcomp_fork_call_wrapper(void *wrapper, int nargs, UINT_PTR args[]); + +/* When Visual C encounters a '#pragma omp parallel' directive, + * it wraps the next statement in a function, and passes the address + * of the wrapper function to _vcomp_fork, which calls that function-- + * possibly after spawning extra copies on new threads. + * + * If the directive has an if() clause, the value passed to the if clause + * is passed as the first argument to _vcomp_fork; if it is false, + * or if OMP_NUM_THREADS is 1, or omp_set_num_threads(1) has been called, + * or if too many threads are already in use, native _vcomp_fork doesn't spawn + * any extra threads, it just calls the wrapper function. + * + * The OpenMP standard allows implementations to fall back to executing + * everything on a single thread, so that's what we'll do for now; + * our _vcomp_fork will simply call the wrapper function. + * That's enough to make many, but not all, apps run correctly. + * + * If the statement being wrapped refers to variables from an outer scope, + * Visual C passes them to _vcomp_fork and thence the wrapper as follows: + * - Unchanging ints are always passed by value + * - Unchanging floats are passed by value on i386, but by reference on amd64 + * - Everything else is passed by reference + * + * The call to _vcomp_fork is synthesized by the compiler; + * user code isn't even aware that a call is being made. + * _vcomp_fork takes a variable number of arguments, so one might think + * it should use the normal varargs techniques. + * But both _vcomp_fork and its caller are under Visual C's control, + * so the compiler is free to use a nonstandard ABI for this call. + * And it does, in that float arguments are not promoted to double. + * (Some apps that use floats would probably be very annoyed if they were + * silently promoted to doubles by "#pragma omp parallel".) + * To properly call _vcomp_fork with floats, one must avoid using varargs; + * to emphasize that, we'll avoid them here in the function's definition, too. + * + * The call from _vcomp_fork to the wrapper function also doesn't quite + * follow the normal win32/win64 calling conventions: + * 1) Since Visual C never passes floats or doubles by value to the + * wrapper on amd64, native vcomp.dll does not copy floating point parameters + * to registers, contrary to the win64 ABI. Manual tests confirm this. + * 2) Since the wrapper itself doesn't use varargs at all, _vcomp_fork can't + * just pass an __ms_va_list; it has to push the arguments onto the stack again. + * This can't be done in C, so we use assembly in _vcomp_fork_call_wrapper. + * (That function is a close copy of call_method in oleaut32/typelib.c, + * with unneeded instructions removed.) + */ + +void WINAPIV _vcomp_fork(BOOL ifval, int nargs, void *wrapper, UINT_PTR arg1) +{ + TRACE("(%d, %d, %p, ...)\n", ifval, nargs, wrapper); + _vcomp_fork_call_wrapper(wrapper, nargs, &arg1); +} + +#if defined(__i386__) +__ASM_GLOBAL_FUNC( _vcomp_fork_call_wrapper, + "pushl %ebp\n\t" + __ASM_CFI(".cfi_adjust_cfa_offset 4\n\t") + __ASM_CFI(".cfi_rel_offset %ebp,0\n\t") + "movl %esp,%ebp\n\t" + __ASM_CFI(".cfi_def_cfa_register %ebp\n\t") + "pushl %esi\n\t" + __ASM_CFI(".cfi_rel_offset %esi,-4\n\t") + "pushl %edi\n\t" + __ASM_CFI(".cfi_rel_offset %edi,-8\n\t") + "movl 12(%ebp),%edx\n\t" + "movl %esp,%edi\n\t" + "shll $2,%edx\n\t" + "jz 1f\n\t" + "subl %edx,%edi\n\t" + "andl $~15,%edi\n\t" + "movl %edi,%esp\n\t" + "movl 12(%ebp),%ecx\n\t" + "movl 16(%ebp),%esi\n\t" + "cld\n\t" + "rep; movsl\n" + "1:\tcall *8(%ebp)\n\t" + "leal -8(%ebp),%esp\n\t" + "popl %edi\n\t" + __ASM_CFI(".cfi_same_value %edi\n\t") + "popl %esi\n\t" + __ASM_CFI(".cfi_same_value %esi\n\t") + "popl %ebp\n\t" + __ASM_CFI(".cfi_def_cfa %esp,4\n\t") + __ASM_CFI(".cfi_same_value %ebp\n\t") + "ret" ) + +#elif defined(__x86_64__) + +__ASM_GLOBAL_FUNC( _vcomp_fork_call_wrapper, + "pushq %rbp\n\t" + __ASM_CFI(".cfi_adjust_cfa_offset 8\n\t") + __ASM_CFI(".cfi_rel_offset %rbp,0\n\t") + "movq %rsp,%rbp\n\t" + __ASM_CFI(".cfi_def_cfa_register %rbp\n\t") + "pushq %rsi\n\t" + __ASM_CFI(".cfi_rel_offset %rsi,-8\n\t") + "pushq %rdi\n\t" + __ASM_CFI(".cfi_rel_offset %rdi,-16\n\t") + "movq %rcx,%rax\n\t" + "movq $4,%rcx\n\t" + "cmp %rcx,%rdx\n\t" + "cmovgq %rdx,%rcx\n\t" + "leaq 0(,%rcx,8),%rdx\n\t" + "subq %rdx,%rsp\n\t" + "andq $~15,%rsp\n\t" + "movq %rsp,%rdi\n\t" + "movq %r8,%rsi\n\t" + "rep; movsq\n\t" + "movq 0(%rsp),%rcx\n\t" + "movq 8(%rsp),%rdx\n\t" + "movq 16(%rsp),%r8\n\t" + "movq 24(%rsp),%r9\n\t" + "callq *%rax\n\t" + "leaq -16(%rbp),%rsp\n\t" + "popq %rdi\n\t" + __ASM_CFI(".cfi_same_value %rdi\n\t") + "popq %rsi\n\t" + __ASM_CFI(".cfi_same_value %rsi\n\t") + __ASM_CFI(".cfi_def_cfa_register %rsp\n\t") + "popq %rbp\n\t" + __ASM_CFI(".cfi_adjust_cfa_offset -8\n\t") + __ASM_CFI(".cfi_same_value %rbp\n\t") + "ret") +#else + +void CDECL _vcomp_fork_call_wrapper(void *wrapper, int nargs, UINT_PTR args[]) +{ + ERR("Not implemented for this architecture\n"); +} + +#endif diff --git a/dlls/vcomp/tests/Makefile.in b/dlls/vcomp/tests/Makefile.in new file mode 100644 index 0000000..7a2e103 --- /dev/null +++ b/dlls/vcomp/tests/Makefile.in @@ -0,0 +1,10 @@ +TESTDLL = vcomp.dll +IMPORTS = vcomp + +C_SRCS = \ + fork.c + +RC_SRCS = \ + vcomp.rc + +@MAKE_TEST_RULES@ diff --git a/dlls/vcomp/tests/fork.c b/dlls/vcomp/tests/fork.c new file mode 100644 index 0000000..6a76df0 --- /dev/null +++ b/dlls/vcomp/tests/fork.c @@ -0,0 +1,157 @@ +/* + * Unit test suite for vcomp fork/join implementation + * + * Copyright 2012 Dan Kegel + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "wine/test.h" + +static void *p_vcomp_fork; +static int CDECL (*pomp_get_max_threads)(void); + +#define GETFUNC(x) do { p##x = (void*)GetProcAddress(vcomp, #x); ok(p##x != NULL, "Export '%s' not found\n", #x); } while(0) + +static BOOL init(void) +{ + HMODULE vcomp = LoadLibraryA("vcomp.dll"); + if(!vcomp) { + win_skip("vcomp.dll not installed\n"); + return FALSE; + } + + GETFUNC(_vcomp_fork); + GETFUNC(omp_get_max_threads); + + return TRUE; +} + +/* Test whether a variety of types are passed correctly. + * Pass five of each because the first four parameters are + * handled differently on amd64, and we want to test both + * ways. + */ + +static void CDECL _test_vcomp_fork_ptr_worker(volatile LONG *a, volatile LONG *b, volatile LONG *c, volatile LONG *d, volatile LONG *e) +{ + InterlockedIncrement(a); + InterlockedIncrement(b); + InterlockedIncrement(c); + InterlockedIncrement(d); + InterlockedIncrement(e); +} + +static void test_vcomp_fork_ptr(void) +{ + volatile LONG a, b, c, d, e; + int n; + + void CDECL (*p_vcomp_fork_p5)(BOOL, int, void *, volatile LONG *, volatile LONG *, volatile LONG *, volatile LONG *, volatile LONG *); + p_vcomp_fork_p5 = p_vcomp_fork; + + /* #pragma omp parallel if(FALSE) shared(a, b, c, d, e) + * { InterlockedIncrement(&a); ... InterlockedIncrement(&e); } + */ + a=0; b=1; c=2; d=3; e=4; + p_vcomp_fork_p5(FALSE, 5, _test_vcomp_fork_ptr_worker, &a, &b, &c, &d, &e); + ok(a == 1, "a == 1\n"); + ok(b == 2, "a == 2\n"); + ok(c == 3, "a == 3\n"); + ok(d == 4, "a == 4\n"); + ok(e == 5, "a == 5\n"); + + /* #pragma omp parallel if(TRUE) shared(a, b, c, d, e) + * { InterlockedIncrement(&a); ... InterlockedIncrement(&e); } + */ + a=0; b=1; c=2; d=3; e=4; + n = pomp_get_max_threads(); + p_vcomp_fork_p5(TRUE, 5, _test_vcomp_fork_ptr_worker, &a, &b, &c, &d, &e); + ok(a > 0 && a <= (n+0), "a > 0 && a <= (n+0)\n"); + ok(b > 1 && b <= (n+1), "b > 1 && b <= (n+1)\n"); + ok(c > 2 && c <= (n+2), "c > 2 && c <= (n+2)\n"); + ok(d > 3 && d <= (n+3), "d > 3 && d <= (n+3)\n"); + ok(e > 4 && e <= (n+4), "e > 4 && e <= (n+4)\n"); +} + +static void CDECL _test_vcomp_fork_uintptr_worker(UINT_PTR a, UINT_PTR b, UINT_PTR c, UINT_PTR d, UINT_PTR e) +{ + ok(a == 1, "expected a == 1\n"); + ok(b == MAXUINT_PTR-2, "expected b == MAXUINT_PTR-2\n"); + ok(c == 3, "expected c == 3\n"); + ok(d == MAXUINT_PTR-4, "expected d == MAXUINT_PTR-4\n"); + ok(e == 5, "expected e == 5\n"); +} + +static void test_vcomp_fork_uintptr(void) +{ + void CDECL (*p_vcomp_fork_u5)(BOOL, int, void *, UINT_PTR, UINT_PTR, UINT_PTR, UINT_PTR, UINT_PTR); + p_vcomp_fork_u5 = p_vcomp_fork; + + /* test_vcomp_fork_ptr ought to have been enough, but probably + * didn't vary all the bits of the high word, so do that here. + */ + p_vcomp_fork_u5(TRUE, 5, _test_vcomp_fork_uintptr_worker, \ + (UINT_PTR)1, (UINT_PTR)(MAXUINT_PTR-2), \ + (UINT_PTR)3, (UINT_PTR)(MAXUINT_PTR)-4, (UINT_PTR) 5); +} + +static void CDECL _test_vcomp_fork_float_worker(float a, float b, float c, float d, float e) +{ + ok(1.4999 < a && a < 1.5001, "expected a == 1.5, got %f\n", a); + ok(2.4999 < b && b < 2.5001, "expected b == 2.5, got %f\n", b); + ok(3.4999 < c && c < 3.5001, "expected c == 3.5, got %f\n", c); + ok(4.4999 < d && d < 4.5001, "expected d == 4.5, got %f\n", d); + ok(5.4999 < e && e < 5.5001, "expected e == 5.5, got %f\n", e); +} + +static void test_vcomp_fork_float(void) +{ + static const int is_win64 = (sizeof(void *) > sizeof(int)); + + /* Avoid float promotion by using a prototype tailored for this call */ + void CDECL (*p_vcomp_fork_f5)(BOOL, int, void *, float, float, float, float, float); + p_vcomp_fork_f5 = p_vcomp_fork; + + if (is_win64) { + /* This test should never be run on amd64. */ + return; + } + + /* + * 32 bit Visual C sometimes passes 32 bit floats by value to + * the wrapper, so verify that here. + * + * x86-64 Visual C has not yet been observed passing 32 bit floats by + * value to the wrapper, and indeed _vcomp_fork does not even copy the + * first four args to floating point registers, so this test fails + * on x86-64 for the first four arguments even on native. + * Therefore don't run it. (It's hard to write a reliable test to show + * this, since the floating point registers might just happen + * to have the right values once in a blue moon.) + */ + + p_vcomp_fork_f5(TRUE, 5, _test_vcomp_fork_float_worker, 1.5f, 2.5f, 3.5f, 4.5f, 5.5f); +} + +START_TEST(fork) +{ + if (!init()) + return; + + test_vcomp_fork_ptr(); + test_vcomp_fork_uintptr(); + test_vcomp_fork_float(); +} diff --git a/dlls/vcomp/tests/vcomp.manifest b/dlls/vcomp/tests/vcomp.manifest new file mode 100644 index 0000000..6c8bd91 --- /dev/null +++ b/dlls/vcomp/tests/vcomp.manifest @@ -0,0 +1,21 @@ + + + +Wine vcomp test suite + + + + + + diff --git a/dlls/vcomp/tests/vcomp.rc b/dlls/vcomp/tests/vcomp.rc new file mode 100644 index 0000000..c5f1d25 --- /dev/null +++ b/dlls/vcomp/tests/vcomp.rc @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2012 Dan Kegel + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "winuser.h" + +/* @makedep: vcomp.manifest */ +1 RT_MANIFEST vcomp.manifest diff --git a/dlls/vcomp/vcomp.spec b/dlls/vcomp/vcomp.spec index 306dd15..d446574 100644 --- a/dlls/vcomp/vcomp.spec +++ b/dlls/vcomp/vcomp.spec @@ -64,7 +64,7 @@ @ stub _vcomp_for_static_init_i8 @ stub _vcomp_for_static_simple_init @ stub _vcomp_for_static_simple_init_i8 -@ stub _vcomp_fork +@ varargs _vcomp_fork(long long ptr) @ stub _vcomp_get_thread_num @ stub _vcomp_leave_critsect @ stub _vcomp_master_barrier -- 1.7.9.5