[libc-commits] [libc] 1c4e4e0 - [libc][NFC] Split bcmp	implementations per platform
    Guillaume Chatelet via libc-commits 
    libc-commits at lists.llvm.org
       
    Thu Jul 13 03:19:10 PDT 2023
    
    
  
Author: Guillaume Chatelet
Date: 2023-07-13T10:19:00Z
New Revision: 1c4e4e03bdf6e64670947600ec864a73eafafda0
URL: https://github.com/llvm/llvm-project/commit/1c4e4e03bdf6e64670947600ec864a73eafafda0
DIFF: https://github.com/llvm/llvm-project/commit/1c4e4e03bdf6e64670947600ec864a73eafafda0.diff
LOG: [libc][NFC] Split bcmp implementations per platform
This is a follow up on D154800 and D154770 to make the code structure more principled and avoid too many nested #ifdef/#endif.
Reviewed By: courbet
Differential Revision: https://reviews.llvm.org/D155076
Added: 
    libc/src/string/memory_utils/aarch64/bcmp_implementations.h
    libc/src/string/memory_utils/riscv/bcmp_implementations.h
    libc/src/string/memory_utils/x86_64/bcmp_implementations.h
Modified: 
    libc/src/string/memory_utils/CMakeLists.txt
    libc/src/string/memory_utils/bcmp_implementations.h
    utils/bazel/llvm-project-overlay/libc/BUILD.bazel
Removed: 
    
################################################################################
diff  --git a/libc/src/string/memory_utils/CMakeLists.txt b/libc/src/string/memory_utils/CMakeLists.txt
index 22e3249c1ff60a..d6fa1d9df32e8f 100644
--- a/libc/src/string/memory_utils/CMakeLists.txt
+++ b/libc/src/string/memory_utils/CMakeLists.txt
@@ -1,7 +1,8 @@
-#TODO(michaelrj): split out the implementations from memory_utils
+# TODO(michaelrj): split out the implementations from memory_utils
 add_header_library(
   memory_utils
   HDRS
+    aarch64/bcmp_implementations.h
     aarch64/memcmp_implementations.h
     aarch64/memcpy_implementations.h
     bcmp_implementations.h
@@ -16,7 +17,9 @@ add_header_library(
     op_builtin.h
     op_generic.h
     op_x86.h
+    riscv/bcmp_implementations.h
     utils.h
+    x86_64/bcmp_implementations.h
     x86_64/memcmp_implementations.h
     x86_64/memcpy_implementations.h
   DEPS
diff  --git a/libc/src/string/memory_utils/aarch64/bcmp_implementations.h b/libc/src/string/memory_utils/aarch64/bcmp_implementations.h
new file mode 100644
index 00000000000000..61c4c4c63bb3fa
--- /dev/null
+++ b/libc/src/string/memory_utils/aarch64/bcmp_implementations.h
@@ -0,0 +1,70 @@
+//===-- Bcmp implementation for aarch64 -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_BCMP_IMPLEMENTATIONS_H
+#define LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_BCMP_IMPLEMENTATIONS_H
+
+#include "src/__support/macros/attributes.h"   // LIBC_INLINE
+#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
+#include "src/string/memory_utils/op_aarch64.h"
+#include "src/string/memory_utils/op_generic.h"
+#include "src/string/memory_utils/utils.h" // Ptr, CPtr
+
+#include <stddef.h> // size_t
+
+namespace __llvm_libc {
+
+[[maybe_unused]] LIBC_INLINE BcmpReturnType inline_bcmp_aarch64(CPtr p1,
+                                                                CPtr p2,
+                                                                size_t count) {
+  if (LIBC_LIKELY(count <= 32)) {
+    if (LIBC_UNLIKELY(count >= 16)) {
+      return aarch64::Bcmp<16>::head_tail(p1, p2, count);
+    }
+    switch (count) {
+    case 0:
+      return BcmpReturnType::ZERO();
+    case 1:
+      return generic::Bcmp<uint8_t>::block(p1, p2);
+    case 2:
+      return generic::Bcmp<uint16_t>::block(p1, p2);
+    case 3:
+      return generic::Bcmp<uint16_t>::head_tail(p1, p2, count);
+    case 4:
+      return generic::Bcmp<uint32_t>::block(p1, p2);
+    case 5:
+    case 6:
+    case 7:
+      return generic::Bcmp<uint32_t>::head_tail(p1, p2, count);
+    case 8:
+      return generic::Bcmp<uint64_t>::block(p1, p2);
+    case 9:
+    case 10:
+    case 11:
+    case 12:
+    case 13:
+    case 14:
+    case 15:
+      return generic::Bcmp<uint64_t>::head_tail(p1, p2, count);
+    }
+  }
+
+  if (count <= 64)
+    return aarch64::Bcmp<32>::head_tail(p1, p2, count);
+
+  // Aligned loop if > 256, otherwise normal loop
+  if (LIBC_UNLIKELY(count > 256)) {
+    if (auto value = aarch64::Bcmp<32>::block(p1, p2))
+      return value;
+    align_to_next_boundary<16, Arg::P1>(p1, p2, count);
+  }
+  return aarch64::Bcmp<32>::loop_and_tail(p1, p2, count);
+}
+
+} // namespace __llvm_libc
+
+#endif // LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_BCMP_IMPLEMENTATIONS_H
diff  --git a/libc/src/string/memory_utils/bcmp_implementations.h b/libc/src/string/memory_utils/bcmp_implementations.h
index 7bfc1737f6126a..bfd52014325e5c 100644
--- a/libc/src/string/memory_utils/bcmp_implementations.h
+++ b/libc/src/string/memory_utils/bcmp_implementations.h
@@ -1,4 +1,4 @@
-//===-- Implementation of bcmp --------------------------------------------===//
+//===-- Dispatch logic for bcmp -------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -10,164 +10,34 @@
 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BCMP_IMPLEMENTATIONS_H
 
 #include "src/__support/common.h"
-#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY LIBC_LOOP_NOUNROLL
-#include "src/__support/macros/properties/architectures.h"
-#include "src/string/memory_utils/generic/aligned_access.h"
-#include "src/string/memory_utils/generic/byte_per_byte.h"
-#include "src/string/memory_utils/op_aarch64.h"
-#include "src/string/memory_utils/op_builtin.h"
-#include "src/string/memory_utils/op_generic.h"
-#include "src/string/memory_utils/op_riscv.h"
-#include "src/string/memory_utils/op_x86.h"
+#include "src/__support/macros/properties/architectures.h" // LIBC_TARGET_ARCH_IS_
 
 #include <stddef.h> // size_t
 
-namespace __llvm_libc {
-
-#if defined(LIBC_TARGET_ARCH_IS_X86) || defined(LIBC_TARGET_ARCH_IS_AARCH64)
-[[maybe_unused]] LIBC_INLINE BcmpReturnType
-inline_bcmp_generic_gt16(CPtr p1, CPtr p2, size_t count) {
-  return generic::Bcmp<uint64_t>::loop_and_tail_align_above(256, p1, p2, count);
-}
-#endif // defined(LIBC_TARGET_ARCH_IS_X86) ||
-       // defined(LIBC_TARGET_ARCH_IS_AARCH64)
-
 #if defined(LIBC_TARGET_ARCH_IS_X86)
-#if defined(__SSE4_1__)
-[[maybe_unused]] LIBC_INLINE BcmpReturnType
-inline_bcmp_x86_sse41_gt16(CPtr p1, CPtr p2, size_t count) {
-  if (count <= 32)
-    return generic::Bcmp<__m128i>::head_tail(p1, p2, count);
-  return generic::Bcmp<__m128i>::loop_and_tail_align_above(256, p1, p2, count);
-}
-#endif // __SSE4_1__
-
-#if defined(__AVX__)
-[[maybe_unused]] LIBC_INLINE BcmpReturnType
-inline_bcmp_x86_avx_gt16(CPtr p1, CPtr p2, size_t count) {
-  if (count <= 32)
-    return generic::Bcmp<__m128i>::head_tail(p1, p2, count);
-  if (count <= 64)
-    return generic::Bcmp<__m256i>::head_tail(p1, p2, count);
-  return generic::Bcmp<__m256i>::loop_and_tail_align_above(256, p1, p2, count);
-}
-#endif // __AVX__
-
-#if defined(__AVX512BW__)
-[[maybe_unused]] LIBC_INLINE BcmpReturnType
-inline_bcmp_x86_avx512bw_gt16(CPtr p1, CPtr p2, size_t count) {
-  if (count <= 32)
-    return generic::Bcmp<__m128i>::head_tail(p1, p2, count);
-  if (count <= 64)
-    return generic::Bcmp<__m256i>::head_tail(p1, p2, count);
-  if (count <= 128)
-    return generic::Bcmp<__m512i>::head_tail(p1, p2, count);
-  return generic::Bcmp<__m512i>::loop_and_tail_align_above(256, p1, p2, count);
-}
-#endif // __AVX512BW__
-
-[[maybe_unused]] LIBC_INLINE BcmpReturnType inline_bcmp_x86(CPtr p1, CPtr p2,
-                                                            size_t count) {
-  if (count == 0)
-    return BcmpReturnType::ZERO();
-  if (count == 1)
-    return generic::Bcmp<uint8_t>::block(p1, p2);
-  if (count == 2)
-    return generic::Bcmp<uint16_t>::block(p1, p2);
-  if (count == 3)
-    return generic::BcmpSequence<uint16_t, uint8_t>::block(p1, p2);
-  if (count == 4)
-    return generic::Bcmp<uint32_t>::block(p1, p2);
-  if (count == 5)
-    return generic::BcmpSequence<uint32_t, uint8_t>::block(p1, p2);
-  if (count == 6)
-    return generic::BcmpSequence<uint32_t, uint16_t>::block(p1, p2);
-  if (count == 7)
-    return generic::BcmpSequence<uint32_t, uint16_t, uint8_t>::block(p1, p2);
-  if (count == 8)
-    return generic::Bcmp<uint64_t>::block(p1, p2);
-  if (count <= 16)
-    return generic::Bcmp<uint64_t>::head_tail(p1, p2, count);
-#if defined(__AVX512BW__)
-  return inline_bcmp_x86_avx512bw_gt16(p1, p2, count);
-#elif defined(__AVX__)
-  return inline_bcmp_x86_avx_gt16(p1, p2, count);
-#elif defined(__SSE4_1__)
-  return inline_bcmp_x86_sse41_gt16(p1, p2, count);
-#else
-  return inline_bcmp_generic_gt16(p1, p2, count);
-#endif
-}
-#endif // defined(LIBC_TARGET_ARCH_IS_X86)
-
-#if defined(LIBC_TARGET_ARCH_IS_AARCH64)
-[[maybe_unused]] LIBC_INLINE BcmpReturnType inline_bcmp_aarch64(CPtr p1,
-                                                                CPtr p2,
-                                                                size_t count) {
-  if (LIBC_LIKELY(count <= 32)) {
-    if (LIBC_UNLIKELY(count >= 16)) {
-      return aarch64::Bcmp<16>::head_tail(p1, p2, count);
-    }
-    switch (count) {
-    case 0:
-      return BcmpReturnType::ZERO();
-    case 1:
-      return generic::Bcmp<uint8_t>::block(p1, p2);
-    case 2:
-      return generic::Bcmp<uint16_t>::block(p1, p2);
-    case 3:
-      return generic::Bcmp<uint16_t>::head_tail(p1, p2, count);
-    case 4:
-      return generic::Bcmp<uint32_t>::block(p1, p2);
-    case 5:
-    case 6:
-    case 7:
-      return generic::Bcmp<uint32_t>::head_tail(p1, p2, count);
-    case 8:
-      return generic::Bcmp<uint64_t>::block(p1, p2);
-    case 9:
-    case 10:
-    case 11:
-    case 12:
-    case 13:
-    case 14:
-    case 15:
-      return generic::Bcmp<uint64_t>::head_tail(p1, p2, count);
-    }
-  }
-
-  if (count <= 64)
-    return aarch64::Bcmp<32>::head_tail(p1, p2, count);
-
-  // Aligned loop if > 256, otherwise normal loop
-  if (LIBC_UNLIKELY(count > 256)) {
-    if (auto value = aarch64::Bcmp<32>::block(p1, p2))
-      return value;
-    align_to_next_boundary<16, Arg::P1>(p1, p2, count);
-  }
-  return aarch64::Bcmp<32>::loop_and_tail(p1, p2, count);
-}
-#endif // defined(LIBC_TARGET_ARCH_IS_AARCH64)
-
-LIBC_INLINE BcmpReturnType inline_bcmp(CPtr p1, CPtr p2, size_t count) {
-#if defined(LIBC_TARGET_ARCH_IS_X86)
-  return inline_bcmp_x86(p1, p2, count);
+#include "src/string/memory_utils/x86_64/bcmp_implementations.h"
+#define LIBC_SRC_STRING_MEMORY_UTILS_BCMP inline_bcmp_x86
 #elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
-  return inline_bcmp_aarch64(p1, p2, count);
-#elif defined(LIBC_TARGET_ARCH_IS_RISCV64)
-  return inline_bcmp_aligned_access_64bit(p1, p2, count);
-#elif defined(LIBC_TARGET_ARCH_IS_RISCV32)
-  return inline_bcmp_aligned_access_32bit(p1, p2, count);
+#include "src/string/memory_utils/aarch64/bcmp_implementations.h"
+#define LIBC_SRC_STRING_MEMORY_UTILS_BCMP inline_bcmp_aarch64
+#elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
+#include "src/string/memory_utils/riscv/bcmp_implementations.h"
+#define LIBC_SRC_STRING_MEMORY_UTILS_BCMP inline_bcmp_riscv
 #else
-  return inline_bcmp_byte_per_byte(p1, p2, count);
+// We may want to error instead of defaulting to suboptimal implementation.
+#include "src/string/memory_utils/generic/byte_per_byte.h"
+#define LIBC_SRC_STRING_MEMORY_UTILS_BCMP inline_bcmp_byte_per_byte
 #endif
-}
+
+namespace __llvm_libc {
 
 LIBC_INLINE int inline_bcmp(const void *p1, const void *p2, size_t count) {
-  return static_cast<int>(inline_bcmp(reinterpret_cast<CPtr>(p1),
-                                      reinterpret_cast<CPtr>(p2), count));
+  return static_cast<int>(LIBC_SRC_STRING_MEMORY_UTILS_BCMP(
+      reinterpret_cast<CPtr>(p1), reinterpret_cast<CPtr>(p2), count));
 }
 
 } // namespace __llvm_libc
 
+#undef LIBC_SRC_STRING_MEMORY_UTILS_BCMP
+
 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BCMP_IMPLEMENTATIONS_H
diff  --git a/libc/src/string/memory_utils/riscv/bcmp_implementations.h b/libc/src/string/memory_utils/riscv/bcmp_implementations.h
new file mode 100644
index 00000000000000..b7a29fea2c2fbf
--- /dev/null
+++ b/libc/src/string/memory_utils/riscv/bcmp_implementations.h
@@ -0,0 +1,33 @@
+//===-- Bcmp implementation for riscv ---------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef LIBC_SRC_STRING_MEMORY_UTILS_RISCV_BCMP_IMPLEMENTATIONS_H
+#define LIBC_SRC_STRING_MEMORY_UTILS_RISCV_BCMP_IMPLEMENTATIONS_H
+
+#include "src/__support/macros/attributes.h"               // LIBC_INLINE
+#include "src/__support/macros/properties/architectures.h" // LIBC_TARGET_ARCH_IS_RISCV64
+#include "src/string/memory_utils/generic/aligned_access.h"
+#include "src/string/memory_utils/utils.h" // Ptr, CPtr
+
+#include <stddef.h> // size_t
+
+namespace __llvm_libc {
+
+[[maybe_unused]] LIBC_INLINE BcmpReturnType inline_bcmp_riscv(CPtr p1, CPtr p2,
+                                                              size_t count) {
+#if defined(LIBC_TARGET_ARCH_IS_RISCV64)
+  return inline_bcmp_aligned_access_64bit(p1, p2, count);
+#elif defined(LIBC_TARGET_ARCH_IS_RISCV32)
+  return inline_bcmp_aligned_access_32bit(p1, p2, count);
+#else
+#error "Unimplemented"
+#endif
+}
+
+} // namespace __llvm_libc
+
+#endif // LIBC_SRC_STRING_MEMORY_UTILS_RISCV_BCMP_IMPLEMENTATIONS_H
diff  --git a/libc/src/string/memory_utils/x86_64/bcmp_implementations.h b/libc/src/string/memory_utils/x86_64/bcmp_implementations.h
new file mode 100644
index 00000000000000..4c610fcac0967c
--- /dev/null
+++ b/libc/src/string/memory_utils/x86_64/bcmp_implementations.h
@@ -0,0 +1,93 @@
+//===-- Bcmp implementation for x86_64 --------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef LIBC_SRC_STRING_MEMORY_UTILS_X86_64_BCMP_IMPLEMENTATIONS_H
+#define LIBC_SRC_STRING_MEMORY_UTILS_X86_64_BCMP_IMPLEMENTATIONS_H
+
+#include "src/__support/macros/attributes.h" // LIBC_INLINE
+#include "src/string/memory_utils/op_generic.h"
+#include "src/string/memory_utils/op_x86.h"
+#include "src/string/memory_utils/utils.h" // Ptr, CPtr
+
+#include <stddef.h> // size_t
+
+namespace __llvm_libc {
+
+[[maybe_unused]] LIBC_INLINE BcmpReturnType
+inline_bcmp_generic_gt16(CPtr p1, CPtr p2, size_t count) {
+  return generic::Bcmp<uint64_t>::loop_and_tail_align_above(256, p1, p2, count);
+}
+
+#if defined(__SSE4_1__)
+[[maybe_unused]] LIBC_INLINE BcmpReturnType
+inline_bcmp_x86_sse41_gt16(CPtr p1, CPtr p2, size_t count) {
+  if (count <= 32)
+    return generic::Bcmp<__m128i>::head_tail(p1, p2, count);
+  return generic::Bcmp<__m128i>::loop_and_tail_align_above(256, p1, p2, count);
+}
+#endif // __SSE4_1__
+
+#if defined(__AVX__)
+[[maybe_unused]] LIBC_INLINE BcmpReturnType
+inline_bcmp_x86_avx_gt16(CPtr p1, CPtr p2, size_t count) {
+  if (count <= 32)
+    return generic::Bcmp<__m128i>::head_tail(p1, p2, count);
+  if (count <= 64)
+    return generic::Bcmp<__m256i>::head_tail(p1, p2, count);
+  return generic::Bcmp<__m256i>::loop_and_tail_align_above(256, p1, p2, count);
+}
+#endif // __AVX__
+
+#if defined(__AVX512BW__)
+[[maybe_unused]] LIBC_INLINE BcmpReturnType
+inline_bcmp_x86_avx512bw_gt16(CPtr p1, CPtr p2, size_t count) {
+  if (count <= 32)
+    return generic::Bcmp<__m128i>::head_tail(p1, p2, count);
+  if (count <= 64)
+    return generic::Bcmp<__m256i>::head_tail(p1, p2, count);
+  if (count <= 128)
+    return generic::Bcmp<__m512i>::head_tail(p1, p2, count);
+  return generic::Bcmp<__m512i>::loop_and_tail_align_above(256, p1, p2, count);
+}
+#endif // __AVX512BW__
+
+[[maybe_unused]] LIBC_INLINE BcmpReturnType inline_bcmp_x86(CPtr p1, CPtr p2,
+                                                            size_t count) {
+  if (count == 0)
+    return BcmpReturnType::ZERO();
+  if (count == 1)
+    return generic::Bcmp<uint8_t>::block(p1, p2);
+  if (count == 2)
+    return generic::Bcmp<uint16_t>::block(p1, p2);
+  if (count == 3)
+    return generic::BcmpSequence<uint16_t, uint8_t>::block(p1, p2);
+  if (count == 4)
+    return generic::Bcmp<uint32_t>::block(p1, p2);
+  if (count == 5)
+    return generic::BcmpSequence<uint32_t, uint8_t>::block(p1, p2);
+  if (count == 6)
+    return generic::BcmpSequence<uint32_t, uint16_t>::block(p1, p2);
+  if (count == 7)
+    return generic::BcmpSequence<uint32_t, uint16_t, uint8_t>::block(p1, p2);
+  if (count == 8)
+    return generic::Bcmp<uint64_t>::block(p1, p2);
+  if (count <= 16)
+    return generic::Bcmp<uint64_t>::head_tail(p1, p2, count);
+#if defined(__AVX512BW__)
+  return inline_bcmp_x86_avx512bw_gt16(p1, p2, count);
+#elif defined(__AVX__)
+  return inline_bcmp_x86_avx_gt16(p1, p2, count);
+#elif defined(__SSE4_1__)
+  return inline_bcmp_x86_sse41_gt16(p1, p2, count);
+#else
+  return inline_bcmp_generic_gt16(p1, p2, count);
+#endif
+}
+
+} // namespace __llvm_libc
+
+#endif // LIBC_SRC_STRING_MEMORY_UTILS_X86_64_BCMP_IMPLEMENTATIONS_H
diff  --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index a67f248b1bb134..46c88527d73e2c 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -2009,6 +2009,7 @@ libc_support_library(
     ],
     defines = MEMORY_COPTS,
     textual_hdrs = [
+        "src/string/memory_utils/aarch64/bcmp_implementations.h",
         "src/string/memory_utils/aarch64/memcmp_implementations.h",
         "src/string/memory_utils/aarch64/memcpy_implementations.h",
         "src/string/memory_utils/bcmp_implementations.h",
@@ -2020,8 +2021,10 @@ libc_support_library(
         "src/string/memory_utils/memmem_implementations.h",
         "src/string/memory_utils/memmove_implementations.h",
         "src/string/memory_utils/memset_implementations.h",
+        "src/string/memory_utils/riscv/bcmp_implementations.h",
         "src/string/memory_utils/strcmp_implementations.h",
         "src/string/memory_utils/strstr_implementations.h",
+        "src/string/memory_utils/x86_64/bcmp_implementations.h",
         "src/string/memory_utils/x86_64/memcmp_implementations.h",
         "src/string/memory_utils/x86_64/memcpy_implementations.h",
     ],
        
    
    
More information about the libc-commits
mailing list