[PATCH v3 3/3] msvcrt: Add an SSE2 memset_aligned_32 implementation.

Rémi Bernon rbernon at codeweavers.com
Tue Sep 14 07:16:48 CDT 2021


For intermediate sizes.

Signed-off-by: Rémi Bernon <rbernon at codeweavers.com>
---
 dlls/msvcrt/string.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/dlls/msvcrt/string.c b/dlls/msvcrt/string.c
index b8a5cc89663..6fb269e4185 100644
--- a/dlls/msvcrt/string.c
+++ b/dlls/msvcrt/string.c
@@ -2907,6 +2907,27 @@ __ASM_GLOBAL_FUNC( erms_memset_aligned_32,
         "stosb\n\t"
         MEMSET_RET )
 
+void *__cdecl sse2_memset_aligned_32(unsigned char *d, unsigned int c, size_t n, void *ret);
+__ASM_GLOBAL_FUNC( sse2_memset_aligned_32,
+        MEMSET_INIT
+        "movd " VAL_REG ", %xmm0\n\t"
+        "pshufd $0, %xmm0, %xmm0\n\t"
+        "test $0x20, " LEN_REG "\n\t"
+        "je 1f\n\t"
+        "sub $0x20, " LEN_REG "\n\t"
+        "movdqa %xmm0, 0x00(" DEST_REG ", " LEN_REG ")\n\t"
+        "movdqa %xmm0, 0x10(" DEST_REG ", " LEN_REG ")\n\t"
+        "je 2f\n\t"
+        "1:\n\t"
+        "sub $0x40, " LEN_REG "\n\t"
+        "movdqa %xmm0, 0x00(" DEST_REG ", " LEN_REG ")\n\t"
+        "movdqa %xmm0, 0x10(" DEST_REG ", " LEN_REG ")\n\t"
+        "movdqa %xmm0, 0x20(" DEST_REG ", " LEN_REG ")\n\t"
+        "movdqa %xmm0, 0x30(" DEST_REG ", " LEN_REG ")\n\t"
+        "ja 1b\n\t"
+        "2:\n\t"
+        MEMSET_RET )
+
 #undef MEMSET_INIT
 #undef MEMSET_RET
 #undef DEST_REG
@@ -2952,6 +2973,11 @@ void *__cdecl memset(void *dst, int c, size_t n)
         n = (n - a) & ~0x1f;
 #if defined(__i386__) || defined(__x86_64__)
         if (n >= 2048 && erms_supported) return erms_memset_aligned_32(d + a, v, n, dst);
+#ifdef __i386__
+        if (sse2_supported) return sse2_memset_aligned_32(d + a, v, n, dst);
+#else
+        return sse2_memset_aligned_32(d + a, v, n, dst);
+#endif
 #endif
         return memset_aligned_32(d + a, v, n, dst);
     }
-- 
2.33.0




More information about the wine-devel mailing list