[PATCH v2] msvcrt: Faster memcmp().
Piotr Caban
piotr.caban at gmail.com
Mon Apr 25 11:48:57 CDT 2022
Hi,
Sorry it took so long to review it.
I've done some tests and I don't think it makes sense to do the "merge"
approach. I'm attaching a version of your patch that uses the same
approach as you have used on x86/x86_64 on all platforms. If the patch
looks OK please send it to wine-devel.
Notes about performance:
- it has similar performance as your previous patch on x86/x86_64
- it's faster on ARM comparing to what's currently in wine
- performance on ARM varies a lot depending on hardware capabilities
Thanks,
Piotr
-------------- next part --------------
From bd95150f2ef410be8be1438ad77ede7fe70a4483 Mon Sep 17 00:00:00 2001
From: Jan Sikorski <jsikorski at codeweavers.com>
Date: Tue, 19 Apr 2022 15:53:46 +0200
Subject: [PATCH] msvcrt: Faster memcmp().
To: wine-devel <wine-devel at winehq.org>
---
dlls/msvcrt/string.c | 55 +++++++++++++++++++++++++++++++++++++++-----
1 file changed, 49 insertions(+), 6 deletions(-)
diff --git a/dlls/msvcrt/string.c b/dlls/msvcrt/string.c
index 3b352ac0bf2..181a161481c 100644
--- a/dlls/msvcrt/string.c
+++ b/dlls/msvcrt/string.c
@@ -2675,21 +2675,64 @@ int CDECL I10_OUTPUT(MSVCRT__LDOUBLE ld80, int prec, int flag, struct _I10_OUTPU
}
#undef I10_OUTPUT_MAX_PREC
-/*********************************************************************
- * memcmp (MSVCRT.@)
- */
-int __cdecl memcmp(const void *ptr1, const void *ptr2, size_t n)
+static inline int memcmp_bytes(const void *ptr1, const void *ptr2, size_t n)
{
const unsigned char *p1, *p2;
for (p1 = ptr1, p2 = ptr2; n; n--, p1++, p2++)
{
- if (*p1 < *p2) return -1;
- if (*p1 > *p2) return 1;
+ if (*p1 != *p2)
+ return *p1 > *p2 ? 1 : -1;
}
return 0;
}
+static inline int memcmp_blocks(const void *ptr1, const void *ptr2, size_t size)
+{
+ typedef uint64_t DECLSPEC_ALIGN(1) unaligned_ui64;
+
+ const uint64_t *p1 = ptr1;
+ const unaligned_ui64 *p2 = ptr2;
+ size_t remainder = size & (sizeof(uint64_t) - 1);
+ size_t block_count = size / sizeof(uint64_t);
+
+ while (block_count)
+ {
+ if (*p1 != *p2)
+ return memcmp_bytes(p1, p2, sizeof(uint64_t));
+
+ p1++;
+ p2++;
+ block_count--;
+ }
+
+ return memcmp_bytes(p1, p2, remainder);
+}
+
+/*********************************************************************
+ * memcmp (MSVCRT.@)
+ */
+int __cdecl memcmp(const void *ptr1, const void *ptr2, size_t n)
+{
+ const unsigned char *p1 = ptr1, *p2 = ptr2;
+ size_t align;
+ int result;
+
+ if (n < sizeof(uint64_t))
+ return memcmp_bytes(p1, p2, n);
+
+ align = -(size_t)p1 & (sizeof(uint64_t) - 1);
+
+ if ((result = memcmp_bytes(p1, p2, align)))
+ return result;
+
+ p1 += align;
+ p2 += align;
+ n -= align;
+
+ return memcmp_blocks(p1, p2, n);
+}
+
#if defined(__i386__) || defined(__x86_64__)
#ifdef __i386__
--
2.35.1
More information about the wine-devel
mailing list