[libc-commits] [PATCH] D152811: [libc] Dispatch memmove to memcpy when buffers are disjoint

Guillaume Chatelet via Phabricator via libc-commits libc-commits at lists.llvm.org
Tue Jun 13 05:41:27 PDT 2023


gchatelet created this revision.
gchatelet added a reviewer: courbet.
Herald added subscribers: libc-commits, luismarques, pengfei, s.egerton, PkmX, simoncook, asb, kristof.beyls, arichardson.
Herald added projects: libc-project, All.
gchatelet requested review of this revision.
Herald added a subscriber: pcwang-thead.

Most of the time `memmove` is called on buffers that are disjoint, in that case we can use `memcpy` which is faster.
The additional test is branchless on x86, aarch64 and RISCV with the zbb extension (bitmanip).
On x86 this patch adds a latency of 2 to 3 cycles.

Before
------

Benchmark                      Time             CPU   Iterations UserCounters...
--------------------------------------------------------------------------------

BM_Memmove/0/0_median       5.00 ns         5.00 ns           10 bytes_per_cycle=1.25477/s bytes_per_second=2.62933G/s items_per_second=199.87M/s __llvm_libc::memmove,memmove Google A
BM_Memmove/1/0_median       6.21 ns         6.21 ns           10 bytes_per_cycle=3.22173/s bytes_per_second=6.75106G/s items_per_second=160.955M/s __llvm_libc::memmove,memmove Google B
BM_Memmove/2/0_median       8.09 ns         8.09 ns           10 bytes_per_cycle=5.31462/s bytes_per_second=11.1366G/s items_per_second=123.603M/s __llvm_libc::memmove,memmove Google D
BM_Memmove/3/0_median       5.95 ns         5.95 ns           10 bytes_per_cycle=2.71865/s bytes_per_second=5.69687G/s items_per_second=167.967M/s __llvm_libc::memmove,memmove Google L
BM_Memmove/4/0_median       5.63 ns         5.63 ns           10 bytes_per_cycle=2.28294/s bytes_per_second=4.78383G/s items_per_second=177.615M/s __llvm_libc::memmove,memmove Google M
BM_Memmove/5/0_median       5.68 ns         5.68 ns           10 bytes_per_cycle=2.16798/s bytes_per_second=4.54295G/s items_per_second=176.015M/s __llvm_libc::memmove,memmove Google Q
BM_Memmove/6/0_median       7.46 ns         7.46 ns           10 bytes_per_cycle=3.97619/s bytes_per_second=8.332G/s items_per_second=134.044M/s __llvm_libc::memmove,memmove Google S
BM_Memmove/7/0_median       5.40 ns         5.40 ns           10 bytes_per_cycle=1.79695/s bytes_per_second=3.76546G/s items_per_second=185.211M/s __llvm_libc::memmove,memmove Google U
BM_Memmove/8/0_median       5.62 ns         5.62 ns           10 bytes_per_cycle=3.18747/s bytes_per_second=6.67927G/s items_per_second=177.983M/s __llvm_libc::memmove,memmove Google W
BM_Memmove/9/0_median        101 ns          101 ns           10 bytes_per_cycle=9.77359/s bytes_per_second=20.4803G/s items_per_second=9.9333M/s __llvm_libc::memmove,uniform 384 to 4096

After
BM_Memmove/0/0_median       3.57 ns         3.57 ns           10 bytes_per_cycle=1.71375/s bytes_per_second=3.59112G/s items_per_second=280.411M/s __llvm_libc::memmove,memmove Google A
BM_Memmove/1/0_median       4.52 ns         4.52 ns           10 bytes_per_cycle=4.47557/s bytes_per_second=9.37843G/s items_per_second=221.427M/s __llvm_libc::memmove,memmove Google B
BM_Memmove/2/0_median       5.70 ns         5.70 ns           10 bytes_per_cycle=7.37396/s bytes_per_second=15.4519G/s items_per_second=175.399M/s __llvm_libc::memmove,memmove Google D
BM_Memmove/3/0_median       4.47 ns         4.47 ns           10 bytes_per_cycle=3.4148/s bytes_per_second=7.15563G/s items_per_second=223.743M/s __llvm_libc::memmove,memmove Google L
BM_Memmove/4/0_median       4.53 ns         4.53 ns           10 bytes_per_cycle=2.86071/s bytes_per_second=5.99454G/s items_per_second=220.69M/s __llvm_libc::memmove,memmove Google M
BM_Memmove/5/0_median       4.19 ns         4.19 ns           10 bytes_per_cycle=2.5484/s bytes_per_second=5.3401G/s items_per_second=238.924M/s __llvm_libc::memmove,memmove Google Q
BM_Memmove/6/0_median       5.02 ns         5.02 ns           10 bytes_per_cycle=5.94164/s bytes_per_second=12.4505G/s items_per_second=199.14M/s __llvm_libc::memmove,memmove Google S
BM_Memmove/7/0_median       4.03 ns         4.03 ns           10 bytes_per_cycle=2.47028/s bytes_per_second=5.17641G/s items_per_second=247.906M/s __llvm_libc::memmove,memmove Google U
BM_Memmove/8/0_median       4.70 ns         4.70 ns           10 bytes_per_cycle=3.84975/s bytes_per_second=8.06706G/s items_per_second=212.72M/s __llvm_libc::memmove,memmove Google W
BM_Memmove/9/0_median       90.7 ns         90.7 ns           10 bytes_per_cycle=10.8681/s bytes_per_second=22.7739G/s items_per_second=11.02M/s __llvm_libc::memmove,uniform 384 to 4096


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D152811

Files:
  libc/src/string/memmove.cpp
  libc/src/string/memory_utils/utils.h
  libc/test/src/string/memory_utils/utils_test.cpp


Index: libc/test/src/string/memory_utils/utils_test.cpp
===================================================================
--- libc/test/src/string/memory_utils/utils_test.cpp
+++ libc/test/src/string/memory_utils/utils_test.cpp
@@ -144,6 +144,17 @@
   }
 }
 
+TEST(LlvmLibcUtilsTest, DisjointBuffers) {
+  char a, b;
+  EXPECT_TRUE(disjoint(&a, &b, 0));
+  EXPECT_TRUE(disjoint(&a, &b, 1));
+  EXPECT_FALSE(disjoint(&a, &b, 2));
+
+  EXPECT_TRUE(disjoint(&b, &a, 0));
+  EXPECT_TRUE(disjoint(&b, &a, 1));
+  EXPECT_FALSE(disjoint(&b, &a, 2));
+}
+
 TEST(LlvmLibcUtilsTest, LoadStoreAligned) {
   const uint64_t init = 0xDEAD'C0DE'BEEF'F00D;
   CPtr const src = reinterpret_cast<CPtr>(&init);
Index: libc/src/string/memory_utils/utils.h
===================================================================
--- libc/src/string/memory_utils/utils.h
+++ libc/src/string/memory_utils/utils.h
@@ -84,6 +84,21 @@
   return reinterpret_cast<T *>(__builtin_assume_aligned(ptr, alignment));
 }
 
+// Returns true iff memory regions [p1, p1 + size] and [p2, 2 + size] are
+// disjoint.
+LIBC_INLINE bool disjoint(const void *p1, const void *p2, size_t size) {
+  const char *a = static_cast<const char *>(p1);
+  const char *b = static_cast<const char *>(p2);
+  if (a > b) {
+    // Swap a and b, this is compile down to conditionnal move for aarch64 / x86
+    // and RISCV with zbb extension.
+    auto tmp = a;
+    a = b;
+    b = tmp;
+  }
+  return a + size <= b;
+}
+
 #if LIBC_HAS_BUILTIN(__builtin_memcpy_inline)
 #define LLVM_LIBC_HAS_BUILTIN_MEMCPY_INLINE
 #endif
Index: libc/src/string/memmove.cpp
===================================================================
--- libc/src/string/memmove.cpp
+++ libc/src/string/memmove.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "src/string/memmove.h"
+#include "src/string/memory_utils/memcpy_implementations.h"
 #include "src/string/memory_utils/memmove_implementations.h"
 #include <stddef.h> // size_t
 
@@ -14,7 +15,10 @@
 
 LLVM_LIBC_FUNCTION(void *, memmove,
                    (void *dst, const void *src, size_t count)) {
-  inline_memmove(dst, src, count);
+  if (disjoint(dst, src, count))
+    inline_memcpy(dst, src, count);
+  else
+    inline_memmove(dst, src, count);
   return dst;
 }
 


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D152811.530878.patch
Type: text/x-patch
Size: 2319 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/libc-commits/attachments/20230613/ff8ffa8e/attachment-0001.bin>


More information about the libc-commits mailing list