[PATCH v2] msvcrt: Faster memcmp().

Jan Sikorski jsikorski at codeweavers.com
Tue Apr 19 08:53:46 CDT 2022


Signed-off-by: Jan Sikorski <jsikorski at codeweavers.com>
---
v2: Smarter unaligned implementation for non-x86 architectures.
Use uint64_t blocks instead of size_t.
Some renaming & clean up.
---
 dlls/msvcrt/string.c | 99 ++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 90 insertions(+), 9 deletions(-)

diff --git a/dlls/msvcrt/string.c b/dlls/msvcrt/string.c
index 3b352ac0bf2..7b42604c123 100644
--- a/dlls/msvcrt/string.c
+++ b/dlls/msvcrt/string.c
@@ -2675,10 +2675,13 @@ int CDECL I10_OUTPUT(MSVCRT__LDOUBLE ld80, int prec, int flag, struct _I10_OUTPU
 }
 #undef I10_OUTPUT_MAX_PREC
 
-/*********************************************************************
- *                  memcmp (MSVCRT.@)
- */
-int __cdecl memcmp(const void *ptr1, const void *ptr2, size_t n)
+#ifdef WORDS_BIGENDIAN
+# define MERGE(w1, sh1, w2, sh2) ((w1 << sh1) | (w2 >> sh2))
+#else
+# define MERGE(w1, sh1, w2, sh2) ((w1 >> sh1) | (w2 << sh2))
+#endif
+
+static inline int memcmp_bytes(const void *ptr1, const void *ptr2, size_t n)
 {
     const unsigned char *p1, *p2;
 
@@ -2690,6 +2693,89 @@ int __cdecl memcmp(const void *ptr1, const void *ptr2, size_t n)
     return 0;
 }
 
+static int memcmp_aligned(const uint64_t *p1, const uint64_t *p2, size_t size)
+{
+    const size_t block_size = sizeof(*p1);
+
+    size_t remainder = size & (block_size - 1);
+    size_t block_count = size / block_size;
+
+    while (block_count)
+    {
+        if (*p1 != *p2)
+            return memcmp_bytes(p1, p2, block_size);
+
+        p1++;
+        p2++;
+        block_count--;
+    }
+
+    return memcmp_bytes(p1, p2, remainder);
+}
+
+static int memcmp_unaligned(const uint64_t *aligned, const unsigned char *unaligned, int offset, size_t size)
+{
+    const size_t block_size = sizeof(*aligned);
+    size_t remainder = size & (block_size - 1);
+    size_t block_count = size / block_size;
+
+    int shift_prev = 8 * offset;
+    int shift_next = 8 * (block_size - offset);
+
+    const uint64_t *block = (const uint64_t *)(unaligned - offset);
+    uint64_t prev, next, merged;
+
+    prev = block[0];
+    while (block_count)
+    {
+        next = block[1];
+        merged = MERGE(prev, shift_prev, next, shift_next);
+        if (merged != *aligned)
+            return memcmp_bytes(aligned, &merged, block_size);
+
+        aligned++;
+        block++;
+        block_count--;
+
+        prev = next;
+    }
+
+    return memcmp_bytes(aligned, (const char *)block + offset, remainder);
+}
+
+/*********************************************************************
+ *                  memcmp (MSVCRT.@)
+ */
+int __cdecl memcmp(const void *ptr1, const void *ptr2, size_t n)
+{
+    const unsigned char *p1 = ptr1, *p2 = ptr2;
+    const size_t block_size = 8;
+    size_t align, offset;
+    int result;
+
+    if (n < block_size)
+        return memcmp_bytes(p1, p2, n);
+
+    align = -(uintptr_t)p1 & (block_size - 1);
+
+    if ((result = memcmp_bytes(p1, p2, align)))
+        return result;
+
+    p1 += align;
+    p2 += align;
+    n  -= align;
+
+#if defined(__i386__) || defined(__x86_64__)
+    return memcmp_aligned((const uint64_t *)p1, (const uint64_t *)p2, n);
+#endif
+
+    offset = (uintptr_t)p2 & (block_size - 1);
+    if (!offset)
+        return memcmp_aligned((const uint64_t *)p1, (const uint64_t *)p2, n);
+
+    return memcmp_unaligned((const uint64_t *)p1, p2, offset, n);
+}
+
 #if defined(__i386__) || defined(__x86_64__)
 
 #ifdef __i386__
@@ -2946,11 +3032,6 @@ __ASM_GLOBAL_FUNC( sse2_memmove,
 /*********************************************************************
  *                  memmove (MSVCRT.@)
  */
-#ifdef WORDS_BIGENDIAN
-# define MERGE(w1, sh1, w2, sh2) ((w1 << sh1) | (w2 >> sh2))
-#else
-# define MERGE(w1, sh1, w2, sh2) ((w1 >> sh1) | (w2 << sh2))
-#endif
 void * __cdecl memmove(void *dst, const void *src, size_t n)
 {
 #ifdef __x86_64__
-- 
2.32.0




More information about the wine-devel mailing list