[PATCH v4 2/3] msvcrt: Check for ERMS support and use rep stosb for large memset calls.

Rémi Bernon rbernon at codeweavers.com
Tue Sep 14 09:28:15 CDT 2021


Signed-off-by: Rémi Bernon <rbernon at codeweavers.com>
---
 dlls/msvcrt/math.c   | 13 +++++++++
 dlls/msvcrt/msvcrt.h |  1 +
 dlls/msvcrt/string.c | 64 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 78 insertions(+)

diff --git a/dlls/msvcrt/math.c b/dlls/msvcrt/math.c
index 7f59a4d20d4..9974e72d78f 100644
--- a/dlls/msvcrt/math.c
+++ b/dlls/msvcrt/math.c
@@ -43,6 +43,7 @@
 #include <limits.h>
 #include <locale.h>
 #include <math.h>
+#include <intrin.h>
 
 #include "msvcrt.h"
 #include "winternl.h"
@@ -64,11 +65,23 @@ typedef int (CDECL *MSVCRT_matherr_func)(struct _exception *);
 
 static MSVCRT_matherr_func MSVCRT_default_matherr_func = NULL;
 
+BOOL erms_supported;
 BOOL sse2_supported;
 static BOOL sse2_enabled;
 
 void msvcrt_init_math( void *module )
 {
+#if defined(__i386__) || defined(__x86_64__)
+    int regs[4];
+
+    __cpuid(regs, 0);
+    if (regs[0] >= 7)
+    {
+        __cpuidex(regs, 7, 0);
+        erms_supported = ((regs[1] >> 9) & 1);
+    }
+#endif
+
     sse2_supported = IsProcessorFeaturePresent( PF_XMMI64_INSTRUCTIONS_AVAILABLE );
 #if _MSVCR_VER <=71
     sse2_enabled = FALSE;
diff --git a/dlls/msvcrt/msvcrt.h b/dlls/msvcrt/msvcrt.h
index 60f8c2f5ef2..022eced35d9 100644
--- a/dlls/msvcrt/msvcrt.h
+++ b/dlls/msvcrt/msvcrt.h
@@ -33,6 +33,7 @@
 #undef strncpy
 #undef wcsncpy
 
+extern BOOL erms_supported DECLSPEC_HIDDEN;
 extern BOOL sse2_supported DECLSPEC_HIDDEN;
 
 #define DBL80_MAX_10_EXP 4932
diff --git a/dlls/msvcrt/string.c b/dlls/msvcrt/string.c
index f2b1b4a5b11..32291f06001 100644
--- a/dlls/msvcrt/string.c
+++ b/dlls/msvcrt/string.c
@@ -2732,6 +2732,13 @@ __ASM_GLOBAL_FUNC( sse2_memmove,
         MEMMOVE_CLEANUP
         "ret" )
 
+#undef MEMMOVE_INIT
+#undef MEMMOVE_CLEANUP
+#undef DEST_REG
+#undef SRC_REG
+#undef LEN_REG
+#undef TMP_REG
+
 #endif
 
 /*********************************************************************
@@ -2855,6 +2862,56 @@ void * __cdecl memcpy(void *dst, const void *src, size_t n)
     return memmove(dst, src, n);
 }
 
+#if defined(__i386__) || defined(__x86_64__)
+
+#ifdef __i386__
+#define DEST_REG "%edi"
+#define LEN_REG "%ecx"
+#define VAL_REG "%eax"
+
+#define MEMSET_INIT \
+    "movl " DEST_REG ", %edx\n\t" \
+    "movl 4(%esp), " DEST_REG "\n\t" \
+    "movl 8(%esp), " VAL_REG "\n\t" \
+    "movl 12(%esp), " LEN_REG "\n\t"
+
+#define MEMSET_RET \
+    "movl %edx, " DEST_REG "\n\t" \
+    "ret"
+
+#else
+
+#define DEST_REG "%rdi"
+#define LEN_REG "%rcx"
+#define VAL_REG "%eax"
+
+#define MEMSET_INIT \
+    "movq " DEST_REG ", %r9\n\t" \
+    "movq %rcx, " DEST_REG "\n\t" \
+    "movl %edx, " VAL_REG "\n\t" \
+    "movq %r8, " LEN_REG "\n\t"
+
+#define MEMSET_RET \
+    "movq %r9, " DEST_REG "\n\t" \
+    "ret"
+
+#endif
+
+void __cdecl erms_memset_aligned_32(unsigned char *d, unsigned int c, size_t n);
+__ASM_GLOBAL_FUNC( erms_memset_aligned_32,
+        MEMSET_INIT
+        "rep\n\t"
+        "stosb\n\t"
+        MEMSET_RET )
+
+#undef MEMSET_INIT
+#undef MEMSET_RET
+#undef DEST_REG
+#undef LEN_REG
+#undef VAL_REG
+
+#endif
+
 static inline void memset_aligned_32(unsigned char *d, uint64_t v, size_t n)
 {
     while (n >= 32)
@@ -2890,6 +2947,13 @@ void *__cdecl memset(void *dst, int c, size_t n)
         if (n <= 64) return dst;
 
         n = (n - a) & ~0x1f;
+#if defined(__i386__) || defined(__x86_64__)
+        if (n >= 2048 && erms_supported)
+        {
+            erms_memset_aligned_32(d + a, v, n);
+            return dst;
+        }
+#endif
         memset_aligned_32(d + a, v, n);
         return dst;
     }
-- 
2.33.0




More information about the wine-devel mailing list