[libc-commits] [libc] 355a5d5 - [libc][NFC] Use new approach based on types to code memmove

Guillaume Chatelet via libc-commits libc-commits at lists.llvm.org
Tue Apr 11 04:53:38 PDT 2023


Author: Guillaume Chatelet
Date: 2023-04-11T11:53:27Z
New Revision: 355a5d5e6dc00a2938772164e822967ec4796deb

URL: https://github.com/llvm/llvm-project/commit/355a5d5e6dc00a2938772164e822967ec4796deb
DIFF: https://github.com/llvm/llvm-project/commit/355a5d5e6dc00a2938772164e822967ec4796deb.diff

LOG: [libc][NFC] Use new approach based on types to code memmove

Added: 
    

Modified: 
    libc/src/string/memory_utils/memmove_implementations.h
    libc/src/string/memory_utils/op_generic.h

Removed: 
    


################################################################################
diff  --git a/libc/src/string/memory_utils/memmove_implementations.h b/libc/src/string/memory_utils/memmove_implementations.h
index 1eb6d4e7e235..8a203174caef 100644
--- a/libc/src/string/memory_utils/memmove_implementations.h
+++ b/libc/src/string/memory_utils/memmove_implementations.h
@@ -34,73 +34,54 @@ inline_memmove_embedded_tiny(Ptr dst, CPtr src, size_t count) {
   }
 }
 
-template <size_t MaxSize>
-[[maybe_unused]] LIBC_INLINE void inline_memmove_generic(Ptr dst, CPtr src,
-                                                         size_t count) {
-  if (count == 0)
-    return;
-  if (count == 1)
-    return generic::Memmove<1, MaxSize>::block(dst, src);
-  if (count <= 4)
-    return generic::Memmove<2, MaxSize>::head_tail(dst, src, count);
-  if (count <= 8)
-    return generic::Memmove<4, MaxSize>::head_tail(dst, src, count);
-  if (count <= 16)
-    return generic::Memmove<8, MaxSize>::head_tail(dst, src, count);
-  if (count <= 32)
-    return generic::Memmove<16, MaxSize>::head_tail(dst, src, count);
-  if (count <= 64)
-    return generic::Memmove<32, MaxSize>::head_tail(dst, src, count);
-  if (count <= 128)
-    return generic::Memmove<64, MaxSize>::head_tail(dst, src, count);
-  if (dst < src) {
-    generic::Memmove<32, MaxSize>::template align_forward<Arg::Src>(dst, src,
-                                                                    count);
-    return generic::Memmove<64, MaxSize>::loop_and_tail_forward(dst, src,
-                                                                count);
-  } else {
-    generic::Memmove<32, MaxSize>::template align_backward<Arg::Src>(dst, src,
-                                                                     count);
-    return generic::Memmove<64, MaxSize>::loop_and_tail_backward(dst, src,
-                                                                 count);
-  }
-}
-
 LIBC_INLINE void inline_memmove(Ptr dst, CPtr src, size_t count) {
 #if defined(LIBC_TARGET_ARCH_IS_X86) || defined(LIBC_TARGET_ARCH_IS_AARCH64)
 #if defined(LIBC_TARGET_ARCH_IS_X86)
-  static constexpr size_t kMaxSize = x86::kAvx512F ? 64
-                                     : x86::kAvx   ? 32
-                                     : x86::kSse2  ? 16
-                                                   : 8;
+#if defined(__AVX512F__)
+  using uint128_t = uint8x16_t;
+  using uint256_t = uint8x32_t;
+  using uint512_t = uint8x64_t;
+#elif defined(__AVX__)
+  using uint128_t = uint8x16_t;
+  using uint256_t = uint8x32_t;
+  using uint512_t = cpp::array<uint8x32_t, 2>;
+#elif defined(__SSE2__)
+  using uint128_t = uint8x16_t;
+  using uint256_t = cpp::array<uint8x16_t, 2>;
+  using uint512_t = cpp::array<uint8x16_t, 4>;
+#else
+  using uint128_t = cpp::array<uint64_t, 2>;
+  using uint256_t = cpp::array<uint64_t, 4>;
+  using uint512_t = cpp::array<uint64_t, 8>;
+#endif
 #elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
-  static constexpr size_t kMaxSize = aarch64::kNeon ? 16 : 8;
+  static_assert(aarch64::kNeon, "aarch64 supports vector types");
+  using uint128_t = uint8x16_t;
+  using uint256_t = uint8x32_t;
+  using uint512_t = uint8x64_t;
 #endif
-  // return inline_memmove_generic<kMaxSize>(dst, src, count);
   if (count == 0)
     return;
   if (count == 1)
-    return generic::Memmove<1, kMaxSize>::block(dst, src);
+    return generic::Memmove<uint8_t>::block(dst, src);
   if (count <= 4)
-    return generic::Memmove<2, kMaxSize>::head_tail(dst, src, count);
+    return generic::Memmove<uint16_t>::head_tail(dst, src, count);
   if (count <= 8)
-    return generic::Memmove<4, kMaxSize>::head_tail(dst, src, count);
+    return generic::Memmove<uint32_t>::head_tail(dst, src, count);
   if (count <= 16)
-    return generic::Memmove<8, kMaxSize>::head_tail(dst, src, count);
+    return generic::Memmove<uint64_t>::head_tail(dst, src, count);
   if (count <= 32)
-    return generic::Memmove<16, kMaxSize>::head_tail(dst, src, count);
+    return generic::Memmove<uint128_t>::head_tail(dst, src, count);
   if (count <= 64)
-    return generic::Memmove<32, kMaxSize>::head_tail(dst, src, count);
+    return generic::Memmove<uint256_t>::head_tail(dst, src, count);
   if (count <= 128)
-    return generic::Memmove<64, kMaxSize>::head_tail(dst, src, count);
+    return generic::Memmove<uint512_t>::head_tail(dst, src, count);
   if (dst < src) {
-    generic::Memmove<32, kMaxSize>::align_forward<Arg::Src>(dst, src, count);
-    return generic::Memmove<64, kMaxSize>::loop_and_tail_forward(dst, src,
-                                                                 count);
+    generic::Memmove<uint256_t>::align_forward<Arg::Src>(dst, src, count);
+    return generic::Memmove<uint512_t>::loop_and_tail_forward(dst, src, count);
   } else {
-    generic::Memmove<32, kMaxSize>::align_backward<Arg::Src>(dst, src, count);
-    return generic::Memmove<64, kMaxSize>::loop_and_tail_backward(dst, src,
-                                                                  count);
+    generic::Memmove<uint256_t>::align_backward<Arg::Src>(dst, src, count);
+    return generic::Memmove<uint512_t>::loop_and_tail_backward(dst, src, count);
   }
 #else
   return inline_memmove_embedded_tiny(dst, src, count);

diff  --git a/libc/src/string/memory_utils/op_generic.h b/libc/src/string/memory_utils/op_generic.h
index a7c5636c2d1c..1d203d626140 100644
--- a/libc/src/string/memory_utils/op_generic.h
+++ b/libc/src/string/memory_utils/op_generic.h
@@ -254,32 +254,22 @@ template <typename T, typename... TS> struct Memset {
 // Memmove
 ///////////////////////////////////////////////////////////////////////////////
 
-template <size_t Size, size_t MaxSize> struct Memmove {
-  static_assert(is_power2(MaxSize));
-  using T = details::getTypeFor<Size, MaxSize>;
-  static constexpr size_t SIZE = Size;
+template <typename T> struct Memmove {
+  static constexpr size_t SIZE = sum_sizeof<T>();
 
   LIBC_INLINE static void block(Ptr dst, CPtr src) {
-    if constexpr (details::is_void_v<T>) {
-      deferred_static_assert("Unimplemented Size");
-    } else {
-      store<T>(dst, load<T>(src));
-    }
+    store<T>(dst, load<T>(src));
   }
 
   LIBC_INLINE static void head_tail(Ptr dst, CPtr src, size_t count) {
-    const size_t offset = count - Size;
-    if constexpr (details::is_void_v<T>) {
-      deferred_static_assert("Unimplemented Size");
-    } else {
-      // The load and store operations can be performed in any order as long as
-      // they are not interleaved. More investigations are needed to determine
-      // the best order.
-      const auto head = load<T>(src);
-      const auto tail = load<T>(src + offset);
-      store<T>(dst, head);
-      store<T>(dst + offset, tail);
-    }
+    const size_t offset = count - SIZE;
+    // The load and store operations can be performed in any order as long as
+    // they are not interleaved. More investigations are needed to determine
+    // the best order.
+    const auto head = load<T>(src);
+    const auto tail = load<T>(src + offset);
+    store<T>(dst, head);
+    store<T>(dst + offset, tail);
   }
 
   // Align forward suitable when dst < src. The alignment is performed with
@@ -305,8 +295,8 @@ template <size_t Size, size_t MaxSize> struct Memmove {
     Ptr prev_dst = dst;
     CPtr prev_src = src;
     size_t prev_count = count;
-    align_to_next_boundary<Size, AlignOn>(dst, src, count);
-    adjust(Size, dst, src, count);
+    align_to_next_boundary<SIZE, AlignOn>(dst, src, count);
+    adjust(SIZE, dst, src, count);
     head_tail(prev_dst, prev_src, prev_count - count);
   }
 
@@ -333,9 +323,9 @@ template <size_t Size, size_t MaxSize> struct Memmove {
     Ptr headtail_dst = dst + count;
     CPtr headtail_src = src + count;
     size_t headtail_size = 0;
-    align_to_next_boundary<Size, AlignOn>(headtail_dst, headtail_src,
+    align_to_next_boundary<SIZE, AlignOn>(headtail_dst, headtail_src,
                                           headtail_size);
-    adjust(-2 * Size, headtail_dst, headtail_src, headtail_size);
+    adjust(-2 * SIZE, headtail_dst, headtail_src, headtail_size);
     head_tail(headtail_dst, headtail_src, headtail_size);
     count -= headtail_size;
   }
@@ -356,15 +346,15 @@ template <size_t Size, size_t MaxSize> struct Memmove {
   // [_______________________SSSSSSSS_____]
   LIBC_INLINE static void loop_and_tail_forward(Ptr dst, CPtr src,
                                                 size_t count) {
-    static_assert(Size > 1, "a loop of size 1 does not need tail");
-    const size_t tail_offset = count - Size;
+    static_assert(SIZE > 1, "a loop of size 1 does not need tail");
+    const size_t tail_offset = count - SIZE;
     const auto tail_value = load<T>(src + tail_offset);
     size_t offset = 0;
     LIBC_LOOP_NOUNROLL
     do {
       block(dst + offset, src + offset);
-      offset += Size;
-    } while (offset < count - Size);
+      offset += SIZE;
+    } while (offset < count - SIZE);
     store<T>(dst + tail_offset, tail_value);
   }
 
@@ -384,13 +374,13 @@ template <size_t Size, size_t MaxSize> struct Memmove {
   // [_____SSSSSSSS_______________________]
   LIBC_INLINE static void loop_and_tail_backward(Ptr dst, CPtr src,
                                                  size_t count) {
-    static_assert(Size > 1, "a loop of size 1 does not need tail");
+    static_assert(SIZE > 1, "a loop of size 1 does not need tail");
     const auto head_value = load<T>(src);
-    ptr
diff _t offset = count - Size;
+    ptr
diff _t offset = count - SIZE;
     LIBC_LOOP_NOUNROLL
     do {
       block(dst + offset, src + offset);
-      offset -= Size;
+      offset -= SIZE;
     } while (offset >= 0);
     store<T>(dst, head_value);
   }


        


More information about the libc-commits mailing list