[libc-commits] [PATCH] D150663: [libc] Add optimized memcmp for RISCV

Guillaume Chatelet via Phabricator via libc-commits libc-commits at lists.llvm.org
Tue May 16 10:40:38 PDT 2023


This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG893f02c2aff9: [libc] Add optimized memcmp for RISCV (authored by gchatelet).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D150663/new/

https://reviews.llvm.org/D150663

Files:
  libc/src/string/memory_utils/memcmp_implementations.h


Index: libc/src/string/memory_utils/memcmp_implementations.h
===================================================================
--- libc/src/string/memory_utils/memcmp_implementations.h
+++ libc/src/string/memory_utils/memcmp_implementations.h
@@ -26,21 +26,86 @@
 namespace __llvm_libc {
 
 [[maybe_unused]] LIBC_INLINE MemcmpReturnType
-inline_memcmp_embedded_tiny(CPtr p1, CPtr p2, size_t count) {
+inline_memcmp_byte_per_byte(CPtr p1, CPtr p2, size_t offset, size_t count) {
   LIBC_LOOP_NOUNROLL
-  for (size_t offset = 0; offset < count; ++offset)
+  for (; offset < count; ++offset)
     if (auto value = generic::Memcmp<1>::block(p1 + offset, p2 + offset))
       return value;
   return MemcmpReturnType::ZERO();
 }
 
+[[maybe_unused]] LIBC_INLINE MemcmpReturnType
+inline_memcmp_aligned_access_64bit(CPtr p1, CPtr p2, size_t count) {
+  constexpr size_t kAlign = sizeof(uint64_t);
+  if (count <= 2 * kAlign)
+    return inline_memcmp_byte_per_byte(p1, p2, 0, count);
+  size_t bytes_to_p1_align = distance_to_align_up<kAlign>(p1);
+  if (auto value = inline_memcmp_byte_per_byte(p1, p2, 0, bytes_to_p1_align))
+    return value;
+  size_t offset = bytes_to_p1_align;
+  size_t p2_alignment = distance_to_align_down<kAlign>(p2 + offset);
+  for (; offset < count - kAlign; offset += kAlign) {
+    uint64_t b;
+    if (p2_alignment == 0)
+      b = load64_aligned<uint64_t>(p2, offset);
+    else if (p2_alignment == 4)
+      b = load64_aligned<uint32_t, uint32_t>(p2, offset);
+    else if (p2_alignment == 2)
+      b = load64_aligned<uint16_t, uint16_t, uint16_t, uint16_t>(p2, offset);
+    else
+      b = load64_aligned<uint8_t, uint16_t, uint16_t, uint16_t, uint8_t>(
+          p2, offset);
+    uint64_t a = load64_aligned<uint64_t>(p1, offset);
+    if (a != b) {
+      // TODO use cmp_neq_uint64_t from D148717 once it's submitted.
+      return Endian::to_big_endian(a) < Endian::to_big_endian(b) ? -1 : 1;
+    }
+  }
+  return inline_memcmp_byte_per_byte(p1, p2, offset, count);
+}
+
+[[maybe_unused]] LIBC_INLINE MemcmpReturnType
+inline_memcmp_aligned_access_32bit(CPtr p1, CPtr p2, size_t count) {
+  constexpr size_t kAlign = sizeof(uint32_t);
+  if (count <= 2 * kAlign)
+    return inline_memcmp_byte_per_byte(p1, p2, 0, count);
+  size_t bytes_to_p1_align = distance_to_align_up<kAlign>(p1);
+  if (auto value = inline_memcmp_byte_per_byte(p1, p2, 0, bytes_to_p1_align))
+    return value;
+  size_t offset = bytes_to_p1_align;
+  size_t p2_alignment = distance_to_align_down<kAlign>(p2 + offset);
+  for (; offset < count - kAlign; offset += kAlign) {
+    uint32_t b;
+    if (p2_alignment == 0)
+      b = load32_aligned<uint32_t>(p2, offset);
+    else if (p2_alignment == 2)
+      b = load32_aligned<uint16_t, uint16_t>(p2, offset);
+    else
+      b = load32_aligned<uint8_t, uint16_t, uint8_t>(p2, offset);
+    uint32_t a = load32_aligned<uint32_t>(p1, offset);
+    if (a != b) {
+      // TODO use cmp_uint32_t from D148717 once it's submitted.
+      // We perform the difference as an uint64_t.
+      const int64_t diff = static_cast<int64_t>(Endian::to_big_endian(a)) -
+                           static_cast<int64_t>(Endian::to_big_endian(b));
+      // And reduce the uint64_t into an uint32_t.
+      return static_cast<int32_t>((diff >> 1) | (diff & 0xFFFF));
+    }
+  }
+  return inline_memcmp_byte_per_byte(p1, p2, offset, count);
+}
+
 LIBC_INLINE MemcmpReturnType inline_memcmp(CPtr p1, CPtr p2, size_t count) {
 #if defined(LIBC_TARGET_ARCH_IS_X86)
   return inline_memcmp_x86(p1, p2, count);
 #elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
   return inline_memcmp_aarch64(p1, p2, count);
+#elif defined(LIBC_TARGET_ARCH_IS_RISCV64)
+  return inline_memcmp_aligned_access_64bit(p1, p2, count);
+#elif defined(LIBC_TARGET_ARCH_IS_RISCV32)
+  return inline_memcmp_aligned_access_32bit(p1, p2, count);
 #else
-  return inline_memcmp_embedded_tiny(p1, p2, count);
+  return inline_memcmp_byte_per_byte(p1, p2, 0, count);
 #endif
 }
 


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D150663.522701.patch
Type: text/x-patch
Size: 3987 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/libc-commits/attachments/20230516/0f332f12/attachment.bin>


More information about the libc-commits mailing list