[libc-commits] [PATCH] D132128: [libc] improve {mem|b}cmp for aarch64

Thu Aug 18 05:31:43 PDT 2022

gchatelet created this revision.
Herald added subscribers: libc-commits, ecnelises, tschuett, kristof.beyls.
Herald added projects: libc-project, All.
gchatelet requested review of this revision.

Improving the 16B case for equals and three way compare on aarch64. This is beneficial for bcmp and memcmp.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D132128

Files:
  libc/src/string/memory_utils/elements_aarch64.h


Index: libc/src/string/memory_utils/elements_aarch64.h
===================================================================

--- libc/src/string/memory_utils/elements_aarch64.h
+++ libc/src/string/memory_utils/elements_aarch64.h
@@ -81,9 +81,34 @@
 using _3 = __llvm_libc::scalar::_3;
 using _4 = __llvm_libc::scalar::_4;
 using _8 = __llvm_libc::scalar::_8;
-using _16 = __llvm_libc::scalar::_16;
 
 #ifdef __ARM_NEON
+struct N16 {
+  static constexpr size_t SIZE = 16;
+  static bool equals(const char *lhs, const char *rhs) {
+    uint8x16_t l_0 = vld1q_u8((const uint8_t *)lhs);
+    uint8x16_t r_0 = vld1q_u8((const uint8_t *)rhs);
+    uint8x16_t cmp = vceqq_u8(l_0, r_0);
+    uint8x8_t narrowed = vshrn_n_u16(vreinterpretq_u16_u8(cmp), 4);
+    return (vget_lane_u64(narrowed, 0) == ~0ull);
+  }
+
+  static int three_way_compare(const char *lhs, const char *rhs) {
+    uint8x16_t l_0 = vld1q_u8((const uint8_t *)lhs);
+    uint8x16_t r_0 = vld1q_u8((const uint8_t *)rhs);
+    uint8x16_t cmp = vceqq_u8(l_0, r_0);
+    uint8x8_t narrowed = vshrn_n_u16(vreinterpretq_u16_u8(cmp), 4);
+    uint64_t result = ~vget_lane_u64(narrowed, 0);
+    if (result == 0) {
+      return 0;
+    }
+    uint32_t bit = __builtin_ctzll(result) >> 2;
+    const int ca = (unsigned char)lhs[bit];
+    const int cb = (unsigned char)rhs[bit];
+    return ca - cb;
+  }
+};
+
 struct N32 {
   static constexpr size_t SIZE = 32;
   static bool equals(const char *lhs, const char *rhs) {
@@ -113,9 +138,11 @@
   }
 };
 
+using _16 = N16;
 using _32 = N32;
 using _64 = Repeated<_32, 2>;
 #else
+using _16 = __llvm_libc::scalar::_16;
 using _32 = __llvm_libc::scalar::_32;
 using _64 = __llvm_libc::scalar::_64;
 #endif // __ARM_NEON


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D132128.453631.patch
Type: text/x-patch
Size: 1718 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/libc-commits/attachments/20220818/cae34e1f/attachment.bin>