[libc-commits] [libc] a10300a - [libc] Allow customization of memcpy via flags.
Guillaume Chatelet via libc-commits
libc-commits at lists.llvm.org
Fri Jan 15 01:26:58 PST 2021
Author: Guillaume Chatelet
Date: 2021-01-15T09:26:45Z
New Revision: a10300a2b27c426556f9266364337d5d546a3c14
URL: https://github.com/llvm/llvm-project/commit/a10300a2b27c426556f9266364337d5d546a3c14
DIFF: https://github.com/llvm/llvm-project/commit/a10300a2b27c426556f9266364337d5d546a3c14.diff
LOG: [libc] Allow customization of memcpy via flags.
- Adds LLVM_LIBC_IS_DEFINED macro to libc/src/__support/common.h
- Adds a few knobs to memcpy to help with experimentations:
- LLVM_LIBC_MEMCPY_X86_USE_ONLY_REPMOVSB replaces the implementation with a single call to rep;movsb
- LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE customizes where the usage of rep;movsb
Differential Revision: https://reviews.llvm.org/D94692
Added:
Modified:
libc/src/__support/common.h
libc/src/string/x86/memcpy.cpp
Removed:
################################################################################
diff --git a/libc/src/__support/common.h b/libc/src/__support/common.h
index 208c8bdfea41..53a63fc2e917 100644
--- a/libc/src/__support/common.h
+++ b/libc/src/__support/common.h
@@ -29,4 +29,27 @@
#define LLVM_LIBC_FUNCTION(type, name, arglist) type name arglist
#endif
+namespace __llvm_libc {
+namespace internal {
+constexpr bool same_string(char const *lhs, char const *rhs) {
+ for (; *lhs || *rhs; ++lhs, ++rhs)
+ if (*lhs != *rhs)
+ return false;
+ return true;
+}
+} // namespace internal
+} // namespace __llvm_libc
+
+// LLVM_LIBC_IS_DEFINED checks whether a particular macro is defined.
+// Usage: constexpr bool kUseAvx = LLVM_LIBC_IS_DEFINED(__AVX__);
+//
+// This works by comparing the stringified version of the macro with and without
+// evaluation. If FOO is not undefined both stringifications yield "FOO". If FOO
+// is defined, one stringification yields "FOO" while the other yields its
+// stringified value "1".
+#define LLVM_LIBC_IS_DEFINED(macro) \
+ !__llvm_libc::internal::same_string( \
+ LLVM_LIBC_IS_DEFINED__EVAL_AND_STRINGIZE(macro), #macro)
+#define LLVM_LIBC_IS_DEFINED__EVAL_AND_STRINGIZE(s) #s
+
#endif // LLVM_LIBC_SUPPORT_COMMON_H
diff --git a/libc/src/string/x86/memcpy.cpp b/libc/src/string/x86/memcpy.cpp
index 7c5740ba00e8..b9163d978bef 100644
--- a/libc/src/string/x86/memcpy.cpp
+++ b/libc/src/string/x86/memcpy.cpp
@@ -12,6 +12,26 @@
namespace __llvm_libc {
+// Whether to use only rep;movsb.
+constexpr bool kUseOnlyRepMovsb =
+ LLVM_LIBC_IS_DEFINED(LLVM_LIBC_MEMCPY_X86_USE_ONLY_REPMOVSB);
+
+// kRepMovsBSize == -1 : Only CopyAligned is used.
+// kRepMovsBSize == 0 : Only RepMovsb is used.
+// else CopyAligned is used up to kRepMovsBSize and then RepMovsb.
+constexpr size_t kRepMovsBSize =
+#ifdef LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE
+ LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE;
+#else
+ -1;
+#endif // LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE
+
+// Whether target supports AVX instructions.
+constexpr bool kHasAvx = LLVM_LIBC_IS_DEFINED(__AVX__);
+
+// The chunk size used for the loop copy strategy.
+constexpr size_t kLoopCopyBlockSize = kHasAvx ? 64 : 32;
+
static void CopyRepMovsb(char *__restrict dst, const char *__restrict src,
size_t count) {
// FIXME: Add MSVC support with
@@ -21,12 +41,6 @@ static void CopyRepMovsb(char *__restrict dst, const char *__restrict src,
asm volatile("rep movsb" : "+D"(dst), "+S"(src), "+c"(count) : : "memory");
}
-#if defined(__AVX__)
-#define BEST_SIZE 64
-#else
-#define BEST_SIZE 32
-#endif
-
// Design rationale
// ================
//
@@ -47,6 +61,9 @@ static void CopyRepMovsb(char *__restrict dst, const char *__restrict src,
// with little change on the code side.
static void memcpy_x86(char *__restrict dst, const char *__restrict src,
size_t count) {
+ if (kUseOnlyRepMovsb)
+ return CopyRepMovsb(dst, src, count);
+
if (count == 0)
return;
if (count == 1)
@@ -67,16 +84,10 @@ static void memcpy_x86(char *__restrict dst, const char *__restrict src,
return CopyBlockOverlap<32>(dst, src, count);
if (count < 128)
return CopyBlockOverlap<64>(dst, src, count);
-#if defined(__AVX__)
- if (count < 256)
+ if (kHasAvx && count < 256)
return CopyBlockOverlap<128>(dst, src, count);
-#endif
- // kRepMovsBSize == -1 : Only CopyAligned is used.
- // kRepMovsBSize == 0 : Only RepMovsb is used.
- // else CopyAligned is used to to kRepMovsBSize and then RepMovsb.
- constexpr size_t kRepMovsBSize = -1;
if (count <= kRepMovsBSize)
- return CopyAlignedBlocks<BEST_SIZE>(dst, src, count);
+ return CopyAlignedBlocks<kLoopCopyBlockSize>(dst, src, count);
return CopyRepMovsb(dst, src, count);
}
More information about the libc-commits
mailing list