[PATCH 4/4] msvcrt: Add an SSE2 memset_aligned_32 implementation.

Rémi Bernon rbernon at codeweavers.com
Mon Sep 13 07:23:41 CDT 2021


For intermediate sizes.

Signed-off-by: Rémi Bernon <rbernon at codeweavers.com>
---
 dlls/msvcrt/string.c | 30 +++++++++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

diff --git a/dlls/msvcrt/string.c b/dlls/msvcrt/string.c
index d09b44fbcd6..6e9fb8d119d 100644
--- a/dlls/msvcrt/string.c
+++ b/dlls/msvcrt/string.c
@@ -2859,7 +2859,35 @@ void * __cdecl memcpy(void *dst, const void *src, size_t n)
 static void memset_aligned_32(unsigned char *d, uint64_t v, size_t n)
 {
 #if defined(__i386__) || defined(__x86_64__)
-    if (n >= 2048 && erms_supported) __stosb(d, v, n);
+#ifdef __i386__
+    if (n < 2048 && sse2_supported)
+#else
+    if (n < 2048)
+#endif
+    {
+        __asm__ __volatile__ (
+            "movd %1, %%xmm0\n\t"
+            "pshufd $0, %%xmm0, %%xmm0\n\t"
+            "test $0x20, %2\n\t"
+            "je 1f\n\t"
+            "sub $0x20, %2\n\t"
+            "movdqa %%xmm0, 0x00(%0,%2)\n\t"
+            "movdqa %%xmm0, 0x10(%0,%2)\n\t"
+            "je 2f\n\t"
+            "1:\n\t"
+            "sub $0x40, %2\n\t"
+            "movdqa %%xmm0, 0x00(%0,%2)\n\t"
+            "movdqa %%xmm0, 0x10(%0,%2)\n\t"
+            "movdqa %%xmm0, 0x20(%0,%2)\n\t"
+            "movdqa %%xmm0, 0x30(%0,%2)\n\t"
+            "ja 1b\n\t"
+            "2:\n\t"
+            :
+            : "r"(d), "r"((uint32_t)v), "c"(n)
+            : "memory"
+        );
+    }
+    else if (erms_supported) __stosb(d, v, n);
     else
 #endif
     while (n >= 32)
-- 
2.33.0




More information about the wine-devel mailing list