[PATCH v2 2/3] vcomp: Implement C2VectParallel().
Paul Gofman
pgofman at codeweavers.com
Wed Feb 10 06:01:51 CST 2021
Used by Ancient Cities.
Signed-off-by: Paul Gofman <pgofman at codeweavers.com>
---
v3:
- use a helper function to copy the arguments through va_arg();
- fix argument copy count;
- add missing __ms_va_end();
- add an overflow safe check for empty range (the corresponding test is added to the next patch).
dlls/vcomp/main.c | 158 ++++++++++++++++++++++++++++++++++++
dlls/vcomp110/vcomp110.spec | 2 +-
dlls/vcomp120/vcomp120.spec | 2 +-
dlls/vcomp140/vcomp140.spec | 2 +-
4 files changed, 161 insertions(+), 3 deletions(-)
diff --git a/dlls/vcomp/main.c b/dlls/vcomp/main.c
index e5d946dca12..666a5fc9781 100644
--- a/dlls/vcomp/main.c
+++ b/dlls/vcomp/main.c
@@ -33,6 +33,8 @@
WINE_DEFAULT_DEBUG_CHANNEL(vcomp);
+#define MAX_VECT_PARALLEL_CALLBACK_ARGS 128
+
typedef CRITICAL_SECTION *omp_lock_t;
typedef CRITICAL_SECTION *omp_nest_lock_t;
@@ -122,6 +124,14 @@ static void **ptr_from_va_list(__ms_va_list valist)
return (void **)valist;
}
+static void copy_va_list_data(void **args, __ms_va_list valist, int args_count)
+{
+ unsigned int i;
+
+ for (i = 0; i < args_count; ++i)
+ args[i] = va_arg(valist, void *);
+}
+
#if defined(__i386__)
extern void CDECL _vcomp_fork_call_wrapper(void *wrapper, int nargs, void **args);
@@ -1665,6 +1675,154 @@ void CDECL _vcomp_leave_critsect(CRITICAL_SECTION *critsect)
LeaveCriticalSection(critsect);
}
+static unsigned int get_step_count(int start, int end, int range_offset, int step)
+{
+ int range = end - start + step - range_offset;
+
+ if (step < 0)
+ return (unsigned)-range / -step;
+ else
+ return (unsigned)range / step;
+}
+
+static void CDECL c2vectparallel_wrapper(int start, int end, int step, int end_included, BOOL dynamic_distribution,
+ int volatile *dynamic_start, void *function, int nargs, __ms_va_list valist)
+{
+ void *wrapper_args[MAX_VECT_PARALLEL_CALLBACK_ARGS];
+ unsigned int step_count, steps_per_call, remainder;
+ int thread_count = omp_get_num_threads();
+ int curr_start, curr_end, range_offset;
+ int thread = _vcomp_get_thread_num();
+ int step_sign;
+
+ copy_va_list_data(&wrapper_args[2], valist, nargs - 2);
+
+ step_sign = step > 0 ? 1 : -1;
+ range_offset = step_sign * !end_included;
+
+ if (dynamic_distribution)
+ {
+ int next_start, new_start, end_value;
+
+ start = *dynamic_start;
+ end_value = end + !!end_included * step;
+ while (start != end_value)
+ {
+ step_count = get_step_count(start, end, range_offset, step);
+
+ curr_end = start + (step_count + thread_count - 1) / thread_count * step
+ + range_offset;
+
+ if ((curr_end - end) * step_sign > 0)
+ {
+ next_start = end_value;
+ curr_end = end;
+ }
+ else
+ {
+ next_start = curr_end - range_offset;
+ curr_end -= step;
+ }
+
+ if ((new_start = InterlockedCompareExchange(dynamic_start, next_start, start)) != start)
+ {
+ start = new_start;
+ continue;
+ }
+
+ wrapper_args[0] = (void *)(ULONG_PTR)start;
+ wrapper_args[1] = (void *)(ULONG_PTR)curr_end;
+ _vcomp_fork_call_wrapper(function, nargs, wrapper_args);
+ start = *dynamic_start;
+ }
+ return;
+ }
+
+ step_count = get_step_count(start, end, range_offset, step);
+
+ /* According to the tests native vcomp still makes extra calls
+ * with empty range from excessive threads under certain conditions
+ * for unclear reason. */
+ if (thread >= step_count && (end_included || (step != 1 && step != -1)))
+ return;
+
+ steps_per_call = step_count / thread_count;
+ remainder = step_count % thread_count;
+
+ if (thread < remainder)
+ {
+ curr_start = thread * (steps_per_call + 1);
+ curr_end = curr_start + steps_per_call + 1;
+ }
+ else if (thread < step_count)
+ {
+ curr_start = remainder + steps_per_call * thread;
+ curr_end = curr_start + steps_per_call;
+ }
+ else
+ {
+ curr_start = curr_end = 0;
+ }
+
+ curr_start = start + curr_start * step;
+ curr_end = start + (curr_end - 1) * step + range_offset;
+
+ wrapper_args[0] = (void *)(ULONG_PTR)curr_start;
+ wrapper_args[1] = (void *)(ULONG_PTR)curr_end;
+ _vcomp_fork_call_wrapper(function, nargs, wrapper_args);
+}
+
+void CDECL C2VectParallel(int start, int end, int step, BOOL end_included, int thread_count,
+ BOOL dynamic_distribution, void *function, int nargs, ...)
+{
+ struct vcomp_thread_data *thread_data;
+ int volatile dynamic_start;
+ int prev_thread_count;
+ __ms_va_list valist;
+
+ TRACE("start %d, end %d, step %d, end_included %d, thread_count %d, dynamic_distribution %#x,"
+ " function %p, nargs %d.\n", start, end, step, end_included, thread_count,
+ dynamic_distribution, function, nargs);
+
+ if (nargs > MAX_VECT_PARALLEL_CALLBACK_ARGS)
+ {
+ FIXME("Number of arguments %u exceeds supported maximum %u"
+ " (not calling the loop code, expect problems).\n",
+ nargs, MAX_VECT_PARALLEL_CALLBACK_ARGS);
+ return;
+ }
+
+ __ms_va_start(valist, nargs);
+
+ /* This expression can result in integer overflow. According to the tests,
+ * native vcomp runs the function as a single thread both for empty range
+ * and (end - start) not fitting the integer range. */
+ if ((step > 0 && end < start) || (step < 0 && end > start)
+ || (end - start) / step < 2 || thread_count < 0)
+ {
+ void *wrapper_args[MAX_VECT_PARALLEL_CALLBACK_ARGS];
+
+ wrapper_args[0] = (void *)(ULONG_PTR)start;
+ wrapper_args[1] = (void *)(ULONG_PTR)end;
+ copy_va_list_data(&wrapper_args[2], valist, nargs - 2);
+ _vcomp_fork_call_wrapper(function, nargs, wrapper_args);
+ __ms_va_end(valist);
+ return;
+ }
+
+ thread_data = vcomp_init_thread_data();
+ prev_thread_count = thread_data->fork_threads;
+ thread_data->fork_threads = thread_count;
+
+ dynamic_start = start;
+
+ _vcomp_fork(TRUE, 9, c2vectparallel_wrapper, start, end, step, end_included, dynamic_distribution,
+ &dynamic_start, function, nargs, valist);
+
+ thread_data->fork_threads = prev_thread_count;
+ __ms_va_end(valist);
+}
+
BOOL WINAPI DllMain(HINSTANCE instance, DWORD reason, LPVOID reserved)
{
TRACE("(%p, %d, %p)\n", instance, reason, reserved);
diff --git a/dlls/vcomp110/vcomp110.spec b/dlls/vcomp110/vcomp110.spec
index 22a94823c5f..fb4d626c07c 100644
--- a/dlls/vcomp110/vcomp110.spec
+++ b/dlls/vcomp110/vcomp110.spec
@@ -1,4 +1,4 @@
-@ stub C2VectParallel
+@ varargs C2VectParallel(long long long long long long ptr long)
@ cdecl _vcomp_atomic_add_i1(ptr long)
@ cdecl _vcomp_atomic_add_i2(ptr long)
@ cdecl _vcomp_atomic_add_i4(ptr long)
diff --git a/dlls/vcomp120/vcomp120.spec b/dlls/vcomp120/vcomp120.spec
index 22a94823c5f..fb4d626c07c 100644
--- a/dlls/vcomp120/vcomp120.spec
+++ b/dlls/vcomp120/vcomp120.spec
@@ -1,4 +1,4 @@
-@ stub C2VectParallel
+@ varargs C2VectParallel(long long long long long long ptr long)
@ cdecl _vcomp_atomic_add_i1(ptr long)
@ cdecl _vcomp_atomic_add_i2(ptr long)
@ cdecl _vcomp_atomic_add_i4(ptr long)
diff --git a/dlls/vcomp140/vcomp140.spec b/dlls/vcomp140/vcomp140.spec
index 22a94823c5f..fb4d626c07c 100644
--- a/dlls/vcomp140/vcomp140.spec
+++ b/dlls/vcomp140/vcomp140.spec
@@ -1,4 +1,4 @@
-@ stub C2VectParallel
+@ varargs C2VectParallel(long long long long long long ptr long)
@ cdecl _vcomp_atomic_add_i1(ptr long)
@ cdecl _vcomp_atomic_add_i2(ptr long)
@ cdecl _vcomp_atomic_add_i4(ptr long)
--
2.29.2
More information about the wine-devel
mailing list