[PATCH 3/4] msvcrt: Check for ERMS support and use __stosb for large memset calls.
Rémi Bernon
rbernon at codeweavers.com
Mon Sep 13 07:23:40 CDT 2021
Signed-off-by: Rémi Bernon <rbernon at codeweavers.com>
---
dlls/msvcrt/math.c | 16 ++++++++++++++++
dlls/msvcrt/msvcrt.h | 1 +
dlls/msvcrt/string.c | 5 +++++
3 files changed, 22 insertions(+)
diff --git a/dlls/msvcrt/math.c b/dlls/msvcrt/math.c
index 7f59a4d20d4..6639bb5ee23 100644
--- a/dlls/msvcrt/math.c
+++ b/dlls/msvcrt/math.c
@@ -43,6 +43,7 @@
#include <limits.h>
#include <locale.h>
#include <math.h>
+#include <intrin.h>
#include "msvcrt.h"
#include "winternl.h"
@@ -64,11 +65,26 @@ typedef int (CDECL *MSVCRT_matherr_func)(struct _exception *);
static MSVCRT_matherr_func MSVCRT_default_matherr_func = NULL;
+BOOL erms_supported;
BOOL sse2_supported;
static BOOL sse2_enabled;
void msvcrt_init_math( void *module )
{
+#if defined(__i386__) || defined(__x86_64__)
+ int regs[4];
+
+ __cpuid(regs, 0);
+ if (regs[0] < 7) erms_supported = FALSE;
+ else
+ {
+ __cpuidex(regs, 7, 0);
+ erms_supported = ((regs[1] >> 9) & 1);
+ }
+#else
+ erms_supported = FALSE;
+#endif
+
sse2_supported = IsProcessorFeaturePresent( PF_XMMI64_INSTRUCTIONS_AVAILABLE );
#if _MSVCR_VER <=71
sse2_enabled = FALSE;
diff --git a/dlls/msvcrt/msvcrt.h b/dlls/msvcrt/msvcrt.h
index 60f8c2f5ef2..022eced35d9 100644
--- a/dlls/msvcrt/msvcrt.h
+++ b/dlls/msvcrt/msvcrt.h
@@ -33,6 +33,7 @@
#undef strncpy
#undef wcsncpy
+extern BOOL erms_supported DECLSPEC_HIDDEN;
extern BOOL sse2_supported DECLSPEC_HIDDEN;
#define DBL80_MAX_10_EXP 4932
diff --git a/dlls/msvcrt/string.c b/dlls/msvcrt/string.c
index 3a7312572ab..d09b44fbcd6 100644
--- a/dlls/msvcrt/string.c
+++ b/dlls/msvcrt/string.c
@@ -27,6 +27,7 @@
#include <math.h>
#include <limits.h>
#include <locale.h>
+#include <intrin.h>
#include <float.h>
#include "msvcrt.h"
#include "bnum.h"
@@ -2857,6 +2858,10 @@ void * __cdecl memcpy(void *dst, const void *src, size_t n)
static void memset_aligned_32(unsigned char *d, uint64_t v, size_t n)
{
+#if defined(__i386__) || defined(__x86_64__)
+ if (n >= 2048 && erms_supported) __stosb(d, v, n);
+ else
+#endif
while (n >= 32)
{
*(uint64_t*)(d + n - 32) = v;
--
2.33.0
More information about the wine-devel
mailing list