[PATCH 4/4] msvcrt: Add an SSE2 memset_aligned_32 implementation.
Rémi Bernon
rbernon at codeweavers.com
Mon Sep 13 07:23:41 CDT 2021
For intermediate sizes.
Signed-off-by: Rémi Bernon <rbernon at codeweavers.com>
---
dlls/msvcrt/string.c | 30 +++++++++++++++++++++++++++++-
1 file changed, 29 insertions(+), 1 deletion(-)
diff --git a/dlls/msvcrt/string.c b/dlls/msvcrt/string.c
index d09b44fbcd6..6e9fb8d119d 100644
--- a/dlls/msvcrt/string.c
+++ b/dlls/msvcrt/string.c
@@ -2859,7 +2859,35 @@ void * __cdecl memcpy(void *dst, const void *src, size_t n)
static void memset_aligned_32(unsigned char *d, uint64_t v, size_t n)
{
#if defined(__i386__) || defined(__x86_64__)
- if (n >= 2048 && erms_supported) __stosb(d, v, n);
+#ifdef __i386__
+ if (n < 2048 && sse2_supported)
+#else
+ if (n < 2048)
+#endif
+ {
+ __asm__ __volatile__ (
+ "movd %1, %%xmm0\n\t"
+ "pshufd $0, %%xmm0, %%xmm0\n\t"
+ "test $0x20, %2\n\t"
+ "je 1f\n\t"
+ "sub $0x20, %2\n\t"
+ "movdqa %%xmm0, 0x00(%0,%2)\n\t"
+ "movdqa %%xmm0, 0x10(%0,%2)\n\t"
+ "je 2f\n\t"
+ "1:\n\t"
+ "sub $0x40, %2\n\t"
+ "movdqa %%xmm0, 0x00(%0,%2)\n\t"
+ "movdqa %%xmm0, 0x10(%0,%2)\n\t"
+ "movdqa %%xmm0, 0x20(%0,%2)\n\t"
+ "movdqa %%xmm0, 0x30(%0,%2)\n\t"
+ "ja 1b\n\t"
+ "2:\n\t"
+ :
+ : "r"(d), "r"((uint32_t)v), "c"(n)
+ : "memory"
+ );
+ }
+ else if (erms_supported) __stosb(d, v, n);
else
#endif
while (n >= 32)
--
2.33.0
More information about the wine-devel
mailing list