[PATCH v2 3/3] msvcrt: Add an SSE2 memset_aligned_32 implementation.

Rémi Bernon rbernon at codeweavers.com
Tue Sep 14 04:05:09 CDT 2021


For intermediate sizes.

Signed-off-by: Rémi Bernon <rbernon at codeweavers.com>
---
 dlls/msvcrt/string.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/dlls/msvcrt/string.c b/dlls/msvcrt/string.c
index 26bb9cd8ba4..8a6095dda57 100644
--- a/dlls/msvcrt/string.c
+++ b/dlls/msvcrt/string.c
@@ -2864,6 +2864,32 @@ static void memset_aligned_32(unsigned char *d, uint64_t v, size_t n)
         __asm__ __volatile__ ("cld; rep; stosb" : "+D"(d), "+c"(n) : "a"(c) : "memory", "cc");
         return;
     }
+#ifdef __i386__
+    if (sse2_supported)
+#endif
+    {
+        unsigned int c = v;
+        __asm__ __volatile__ (
+            "movd %2, %%xmm0\n\t"
+            "pshufd $0, %%xmm0, %%xmm0\n\t"
+            "test $0x20, %0\n\t"
+            "je 1f\n\t"
+            "sub $0x20, %0\n\t"
+            "movdqa %%xmm0, 0x00(%1,%0)\n\t"
+            "movdqa %%xmm0, 0x10(%1,%0)\n\t"
+            "je 2f\n\t"
+            "1:\n\t"
+            "sub $0x40, %0\n\t"
+            "movdqa %%xmm0, 0x00(%1,%0)\n\t"
+            "movdqa %%xmm0, 0x10(%1,%0)\n\t"
+            "movdqa %%xmm0, 0x20(%1,%0)\n\t"
+            "movdqa %%xmm0, 0x30(%1,%0)\n\t"
+            "ja 1b\n\t"
+            "2:\n\t"
+            : "+r"(n) : "r"(d), "r"(c) : "xmm0", "memory", "cc"
+        );
+        return;
+    }
 #endif
     while (n >= 32)
     {
-- 
2.33.0




More information about the wine-devel mailing list