[PATCH v3 3/3] msvcrt: Add an SSE2 memset_aligned_32 implementation.
Rémi Bernon
rbernon at codeweavers.com
Tue Sep 14 07:16:48 CDT 2021
For intermediate sizes.
Signed-off-by: Rémi Bernon <rbernon at codeweavers.com>
---
dlls/msvcrt/string.c | 26 ++++++++++++++++++++++++++
1 file changed, 26 insertions(+)
diff --git a/dlls/msvcrt/string.c b/dlls/msvcrt/string.c
index b8a5cc89663..6fb269e4185 100644
--- a/dlls/msvcrt/string.c
+++ b/dlls/msvcrt/string.c
@@ -2907,6 +2907,27 @@ __ASM_GLOBAL_FUNC( erms_memset_aligned_32,
"stosb\n\t"
MEMSET_RET )
+void *__cdecl sse2_memset_aligned_32(unsigned char *d, unsigned int c, size_t n, void *ret);
+__ASM_GLOBAL_FUNC( sse2_memset_aligned_32,
+ MEMSET_INIT
+ "movd " VAL_REG ", %xmm0\n\t"
+ "pshufd $0, %xmm0, %xmm0\n\t"
+ "test $0x20, " LEN_REG "\n\t"
+ "je 1f\n\t"
+ "sub $0x20, " LEN_REG "\n\t"
+ "movdqa %xmm0, 0x00(" DEST_REG ", " LEN_REG ")\n\t"
+ "movdqa %xmm0, 0x10(" DEST_REG ", " LEN_REG ")\n\t"
+ "je 2f\n\t"
+ "1:\n\t"
+ "sub $0x40, " LEN_REG "\n\t"
+ "movdqa %xmm0, 0x00(" DEST_REG ", " LEN_REG ")\n\t"
+ "movdqa %xmm0, 0x10(" DEST_REG ", " LEN_REG ")\n\t"
+ "movdqa %xmm0, 0x20(" DEST_REG ", " LEN_REG ")\n\t"
+ "movdqa %xmm0, 0x30(" DEST_REG ", " LEN_REG ")\n\t"
+ "ja 1b\n\t"
+ "2:\n\t"
+ MEMSET_RET )
+
#undef MEMSET_INIT
#undef MEMSET_RET
#undef DEST_REG
@@ -2952,6 +2973,11 @@ void *__cdecl memset(void *dst, int c, size_t n)
n = (n - a) & ~0x1f;
#if defined(__i386__) || defined(__x86_64__)
if (n >= 2048 && erms_supported) return erms_memset_aligned_32(d + a, v, n, dst);
+#ifdef __i386__
+ if (sse2_supported) return sse2_memset_aligned_32(d + a, v, n, dst);
+#else
+ return sse2_memset_aligned_32(d + a, v, n, dst);
+#endif
#endif
return memset_aligned_32(d + a, v, n, dst);
}
--
2.33.0
More information about the wine-devel
mailing list