[PATCH v2 3/3] msvcrt: Add an SSE2 memset_aligned_32 implementation.
Rémi Bernon
rbernon at codeweavers.com
Tue Sep 14 04:05:09 CDT 2021
For intermediate sizes.
Signed-off-by: Rémi Bernon <rbernon at codeweavers.com>
---
dlls/msvcrt/string.c | 26 ++++++++++++++++++++++++++
1 file changed, 26 insertions(+)
diff --git a/dlls/msvcrt/string.c b/dlls/msvcrt/string.c
index 26bb9cd8ba4..8a6095dda57 100644
--- a/dlls/msvcrt/string.c
+++ b/dlls/msvcrt/string.c
@@ -2864,6 +2864,32 @@ static void memset_aligned_32(unsigned char *d, uint64_t v, size_t n)
__asm__ __volatile__ ("cld; rep; stosb" : "+D"(d), "+c"(n) : "a"(c) : "memory", "cc");
return;
}
+#ifdef __i386__
+ if (sse2_supported)
+#endif
+ {
+ unsigned int c = v;
+ __asm__ __volatile__ (
+ "movd %2, %%xmm0\n\t"
+ "pshufd $0, %%xmm0, %%xmm0\n\t"
+ "test $0x20, %0\n\t"
+ "je 1f\n\t"
+ "sub $0x20, %0\n\t"
+ "movdqa %%xmm0, 0x00(%1,%0)\n\t"
+ "movdqa %%xmm0, 0x10(%1,%0)\n\t"
+ "je 2f\n\t"
+ "1:\n\t"
+ "sub $0x40, %0\n\t"
+ "movdqa %%xmm0, 0x00(%1,%0)\n\t"
+ "movdqa %%xmm0, 0x10(%1,%0)\n\t"
+ "movdqa %%xmm0, 0x20(%1,%0)\n\t"
+ "movdqa %%xmm0, 0x30(%1,%0)\n\t"
+ "ja 1b\n\t"
+ "2:\n\t"
+ : "+r"(n) : "r"(d), "r"(c) : "xmm0", "memory", "cc"
+ );
+ return;
+ }
#endif
while (n >= 32)
{
--
2.33.0
More information about the wine-devel
mailing list