[PATCH v4 3/3] msvcrt: Add an SSE2 memset_aligned_32 implementation.

Rémi Bernon rbernon at codeweavers.com
Tue Sep 14 09:28:16 CDT 2021


Signed-off-by: Rémi Bernon <rbernon at codeweavers.com>
---
 dlls/msvcrt/string.c | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/dlls/msvcrt/string.c b/dlls/msvcrt/string.c
index 32291f06001..4a0e778e77a 100644
--- a/dlls/msvcrt/string.c
+++ b/dlls/msvcrt/string.c
@@ -2904,6 +2904,27 @@ __ASM_GLOBAL_FUNC( erms_memset_aligned_32,
         "stosb\n\t"
         MEMSET_RET )
 
+void __cdecl sse2_memset_aligned_32(unsigned char *d, unsigned int c, size_t n);
+__ASM_GLOBAL_FUNC( sse2_memset_aligned_32,
+        MEMSET_INIT
+        "movd " VAL_REG ", %xmm0\n\t"
+        "pshufd $0, %xmm0, %xmm0\n\t"
+        "test $0x20, " LEN_REG "\n\t"
+        "je 1f\n\t"
+        "sub $0x20, " LEN_REG "\n\t"
+        "movdqa %xmm0, 0x00(" DEST_REG ", " LEN_REG ")\n\t"
+        "movdqa %xmm0, 0x10(" DEST_REG ", " LEN_REG ")\n\t"
+        "je 2f\n\t"
+        "1:\n\t"
+        "sub $0x40, " LEN_REG "\n\t"
+        "movdqa %xmm0, 0x00(" DEST_REG ", " LEN_REG ")\n\t"
+        "movdqa %xmm0, 0x10(" DEST_REG ", " LEN_REG ")\n\t"
+        "movdqa %xmm0, 0x20(" DEST_REG ", " LEN_REG ")\n\t"
+        "movdqa %xmm0, 0x30(" DEST_REG ", " LEN_REG ")\n\t"
+        "ja 1b\n\t"
+        "2:\n\t"
+        MEMSET_RET )
+
 #undef MEMSET_INIT
 #undef MEMSET_RET
 #undef DEST_REG
@@ -2953,9 +2974,21 @@ void *__cdecl memset(void *dst, int c, size_t n)
             erms_memset_aligned_32(d + a, v, n);
             return dst;
         }
+#ifdef __x86_64__
+        sse2_memset_aligned_32(d + a, v, n);
+        return dst;
+#else
+        if (sse2_supported)
+        {
+            sse2_memset_aligned_32(d + a, v, n);
+            return dst;
+        }
+#endif
 #endif
+#ifndef __x86_64__
         memset_aligned_32(d + a, v, n);
         return dst;
+#endif
     }
     if (n >= 8)
     {
-- 
2.33.0




More information about the wine-devel mailing list