[libc-commits] [libc] 1c4e4e0 - [libc][NFC] Split bcmp implementations per platform
Guillaume Chatelet via libc-commits
libc-commits at lists.llvm.org
Thu Jul 13 03:19:10 PDT 2023
Author: Guillaume Chatelet
Date: 2023-07-13T10:19:00Z
New Revision: 1c4e4e03bdf6e64670947600ec864a73eafafda0
URL: https://github.com/llvm/llvm-project/commit/1c4e4e03bdf6e64670947600ec864a73eafafda0
DIFF: https://github.com/llvm/llvm-project/commit/1c4e4e03bdf6e64670947600ec864a73eafafda0.diff
LOG: [libc][NFC] Split bcmp implementations per platform
This is a follow up on D154800 and D154770 to make the code structure more principled and avoid too many nested #ifdef/#endif.
Reviewed By: courbet
Differential Revision: https://reviews.llvm.org/D155076
Added:
libc/src/string/memory_utils/aarch64/bcmp_implementations.h
libc/src/string/memory_utils/riscv/bcmp_implementations.h
libc/src/string/memory_utils/x86_64/bcmp_implementations.h
Modified:
libc/src/string/memory_utils/CMakeLists.txt
libc/src/string/memory_utils/bcmp_implementations.h
utils/bazel/llvm-project-overlay/libc/BUILD.bazel
Removed:
################################################################################
diff --git a/libc/src/string/memory_utils/CMakeLists.txt b/libc/src/string/memory_utils/CMakeLists.txt
index 22e3249c1ff60a..d6fa1d9df32e8f 100644
--- a/libc/src/string/memory_utils/CMakeLists.txt
+++ b/libc/src/string/memory_utils/CMakeLists.txt
@@ -1,7 +1,8 @@
-#TODO(michaelrj): split out the implementations from memory_utils
+# TODO(michaelrj): split out the implementations from memory_utils
add_header_library(
memory_utils
HDRS
+ aarch64/bcmp_implementations.h
aarch64/memcmp_implementations.h
aarch64/memcpy_implementations.h
bcmp_implementations.h
@@ -16,7 +17,9 @@ add_header_library(
op_builtin.h
op_generic.h
op_x86.h
+ riscv/bcmp_implementations.h
utils.h
+ x86_64/bcmp_implementations.h
x86_64/memcmp_implementations.h
x86_64/memcpy_implementations.h
DEPS
diff --git a/libc/src/string/memory_utils/aarch64/bcmp_implementations.h b/libc/src/string/memory_utils/aarch64/bcmp_implementations.h
new file mode 100644
index 00000000000000..61c4c4c63bb3fa
--- /dev/null
+++ b/libc/src/string/memory_utils/aarch64/bcmp_implementations.h
@@ -0,0 +1,70 @@
+//===-- Bcmp implementation for aarch64 -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_BCMP_IMPLEMENTATIONS_H
+#define LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_BCMP_IMPLEMENTATIONS_H
+
+#include "src/__support/macros/attributes.h" // LIBC_INLINE
+#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
+#include "src/string/memory_utils/op_aarch64.h"
+#include "src/string/memory_utils/op_generic.h"
+#include "src/string/memory_utils/utils.h" // Ptr, CPtr
+
+#include <stddef.h> // size_t
+
+namespace __llvm_libc {
+
+[[maybe_unused]] LIBC_INLINE BcmpReturnType inline_bcmp_aarch64(CPtr p1,
+ CPtr p2,
+ size_t count) {
+ if (LIBC_LIKELY(count <= 32)) {
+ if (LIBC_UNLIKELY(count >= 16)) {
+ return aarch64::Bcmp<16>::head_tail(p1, p2, count);
+ }
+ switch (count) {
+ case 0:
+ return BcmpReturnType::ZERO();
+ case 1:
+ return generic::Bcmp<uint8_t>::block(p1, p2);
+ case 2:
+ return generic::Bcmp<uint16_t>::block(p1, p2);
+ case 3:
+ return generic::Bcmp<uint16_t>::head_tail(p1, p2, count);
+ case 4:
+ return generic::Bcmp<uint32_t>::block(p1, p2);
+ case 5:
+ case 6:
+ case 7:
+ return generic::Bcmp<uint32_t>::head_tail(p1, p2, count);
+ case 8:
+ return generic::Bcmp<uint64_t>::block(p1, p2);
+ case 9:
+ case 10:
+ case 11:
+ case 12:
+ case 13:
+ case 14:
+ case 15:
+ return generic::Bcmp<uint64_t>::head_tail(p1, p2, count);
+ }
+ }
+
+ if (count <= 64)
+ return aarch64::Bcmp<32>::head_tail(p1, p2, count);
+
+ // Aligned loop if > 256, otherwise normal loop
+ if (LIBC_UNLIKELY(count > 256)) {
+ if (auto value = aarch64::Bcmp<32>::block(p1, p2))
+ return value;
+ align_to_next_boundary<16, Arg::P1>(p1, p2, count);
+ }
+ return aarch64::Bcmp<32>::loop_and_tail(p1, p2, count);
+}
+
+} // namespace __llvm_libc
+
+#endif // LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_BCMP_IMPLEMENTATIONS_H
diff --git a/libc/src/string/memory_utils/bcmp_implementations.h b/libc/src/string/memory_utils/bcmp_implementations.h
index 7bfc1737f6126a..bfd52014325e5c 100644
--- a/libc/src/string/memory_utils/bcmp_implementations.h
+++ b/libc/src/string/memory_utils/bcmp_implementations.h
@@ -1,4 +1,4 @@
-//===-- Implementation of bcmp --------------------------------------------===//
+//===-- Dispatch logic for bcmp -------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -10,164 +10,34 @@
#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BCMP_IMPLEMENTATIONS_H
#include "src/__support/common.h"
-#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY LIBC_LOOP_NOUNROLL
-#include "src/__support/macros/properties/architectures.h"
-#include "src/string/memory_utils/generic/aligned_access.h"
-#include "src/string/memory_utils/generic/byte_per_byte.h"
-#include "src/string/memory_utils/op_aarch64.h"
-#include "src/string/memory_utils/op_builtin.h"
-#include "src/string/memory_utils/op_generic.h"
-#include "src/string/memory_utils/op_riscv.h"
-#include "src/string/memory_utils/op_x86.h"
+#include "src/__support/macros/properties/architectures.h" // LIBC_TARGET_ARCH_IS_
#include <stddef.h> // size_t
-namespace __llvm_libc {
-
-#if defined(LIBC_TARGET_ARCH_IS_X86) || defined(LIBC_TARGET_ARCH_IS_AARCH64)
-[[maybe_unused]] LIBC_INLINE BcmpReturnType
-inline_bcmp_generic_gt16(CPtr p1, CPtr p2, size_t count) {
- return generic::Bcmp<uint64_t>::loop_and_tail_align_above(256, p1, p2, count);
-}
-#endif // defined(LIBC_TARGET_ARCH_IS_X86) ||
- // defined(LIBC_TARGET_ARCH_IS_AARCH64)
-
#if defined(LIBC_TARGET_ARCH_IS_X86)
-#if defined(__SSE4_1__)
-[[maybe_unused]] LIBC_INLINE BcmpReturnType
-inline_bcmp_x86_sse41_gt16(CPtr p1, CPtr p2, size_t count) {
- if (count <= 32)
- return generic::Bcmp<__m128i>::head_tail(p1, p2, count);
- return generic::Bcmp<__m128i>::loop_and_tail_align_above(256, p1, p2, count);
-}
-#endif // __SSE4_1__
-
-#if defined(__AVX__)
-[[maybe_unused]] LIBC_INLINE BcmpReturnType
-inline_bcmp_x86_avx_gt16(CPtr p1, CPtr p2, size_t count) {
- if (count <= 32)
- return generic::Bcmp<__m128i>::head_tail(p1, p2, count);
- if (count <= 64)
- return generic::Bcmp<__m256i>::head_tail(p1, p2, count);
- return generic::Bcmp<__m256i>::loop_and_tail_align_above(256, p1, p2, count);
-}
-#endif // __AVX__
-
-#if defined(__AVX512BW__)
-[[maybe_unused]] LIBC_INLINE BcmpReturnType
-inline_bcmp_x86_avx512bw_gt16(CPtr p1, CPtr p2, size_t count) {
- if (count <= 32)
- return generic::Bcmp<__m128i>::head_tail(p1, p2, count);
- if (count <= 64)
- return generic::Bcmp<__m256i>::head_tail(p1, p2, count);
- if (count <= 128)
- return generic::Bcmp<__m512i>::head_tail(p1, p2, count);
- return generic::Bcmp<__m512i>::loop_and_tail_align_above(256, p1, p2, count);
-}
-#endif // __AVX512BW__
-
-[[maybe_unused]] LIBC_INLINE BcmpReturnType inline_bcmp_x86(CPtr p1, CPtr p2,
- size_t count) {
- if (count == 0)
- return BcmpReturnType::ZERO();
- if (count == 1)
- return generic::Bcmp<uint8_t>::block(p1, p2);
- if (count == 2)
- return generic::Bcmp<uint16_t>::block(p1, p2);
- if (count == 3)
- return generic::BcmpSequence<uint16_t, uint8_t>::block(p1, p2);
- if (count == 4)
- return generic::Bcmp<uint32_t>::block(p1, p2);
- if (count == 5)
- return generic::BcmpSequence<uint32_t, uint8_t>::block(p1, p2);
- if (count == 6)
- return generic::BcmpSequence<uint32_t, uint16_t>::block(p1, p2);
- if (count == 7)
- return generic::BcmpSequence<uint32_t, uint16_t, uint8_t>::block(p1, p2);
- if (count == 8)
- return generic::Bcmp<uint64_t>::block(p1, p2);
- if (count <= 16)
- return generic::Bcmp<uint64_t>::head_tail(p1, p2, count);
-#if defined(__AVX512BW__)
- return inline_bcmp_x86_avx512bw_gt16(p1, p2, count);
-#elif defined(__AVX__)
- return inline_bcmp_x86_avx_gt16(p1, p2, count);
-#elif defined(__SSE4_1__)
- return inline_bcmp_x86_sse41_gt16(p1, p2, count);
-#else
- return inline_bcmp_generic_gt16(p1, p2, count);
-#endif
-}
-#endif // defined(LIBC_TARGET_ARCH_IS_X86)
-
-#if defined(LIBC_TARGET_ARCH_IS_AARCH64)
-[[maybe_unused]] LIBC_INLINE BcmpReturnType inline_bcmp_aarch64(CPtr p1,
- CPtr p2,
- size_t count) {
- if (LIBC_LIKELY(count <= 32)) {
- if (LIBC_UNLIKELY(count >= 16)) {
- return aarch64::Bcmp<16>::head_tail(p1, p2, count);
- }
- switch (count) {
- case 0:
- return BcmpReturnType::ZERO();
- case 1:
- return generic::Bcmp<uint8_t>::block(p1, p2);
- case 2:
- return generic::Bcmp<uint16_t>::block(p1, p2);
- case 3:
- return generic::Bcmp<uint16_t>::head_tail(p1, p2, count);
- case 4:
- return generic::Bcmp<uint32_t>::block(p1, p2);
- case 5:
- case 6:
- case 7:
- return generic::Bcmp<uint32_t>::head_tail(p1, p2, count);
- case 8:
- return generic::Bcmp<uint64_t>::block(p1, p2);
- case 9:
- case 10:
- case 11:
- case 12:
- case 13:
- case 14:
- case 15:
- return generic::Bcmp<uint64_t>::head_tail(p1, p2, count);
- }
- }
-
- if (count <= 64)
- return aarch64::Bcmp<32>::head_tail(p1, p2, count);
-
- // Aligned loop if > 256, otherwise normal loop
- if (LIBC_UNLIKELY(count > 256)) {
- if (auto value = aarch64::Bcmp<32>::block(p1, p2))
- return value;
- align_to_next_boundary<16, Arg::P1>(p1, p2, count);
- }
- return aarch64::Bcmp<32>::loop_and_tail(p1, p2, count);
-}
-#endif // defined(LIBC_TARGET_ARCH_IS_AARCH64)
-
-LIBC_INLINE BcmpReturnType inline_bcmp(CPtr p1, CPtr p2, size_t count) {
-#if defined(LIBC_TARGET_ARCH_IS_X86)
- return inline_bcmp_x86(p1, p2, count);
+#include "src/string/memory_utils/x86_64/bcmp_implementations.h"
+#define LIBC_SRC_STRING_MEMORY_UTILS_BCMP inline_bcmp_x86
#elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
- return inline_bcmp_aarch64(p1, p2, count);
-#elif defined(LIBC_TARGET_ARCH_IS_RISCV64)
- return inline_bcmp_aligned_access_64bit(p1, p2, count);
-#elif defined(LIBC_TARGET_ARCH_IS_RISCV32)
- return inline_bcmp_aligned_access_32bit(p1, p2, count);
+#include "src/string/memory_utils/aarch64/bcmp_implementations.h"
+#define LIBC_SRC_STRING_MEMORY_UTILS_BCMP inline_bcmp_aarch64
+#elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
+#include "src/string/memory_utils/riscv/bcmp_implementations.h"
+#define LIBC_SRC_STRING_MEMORY_UTILS_BCMP inline_bcmp_riscv
#else
- return inline_bcmp_byte_per_byte(p1, p2, count);
+// We may want to error instead of defaulting to suboptimal implementation.
+#include "src/string/memory_utils/generic/byte_per_byte.h"
+#define LIBC_SRC_STRING_MEMORY_UTILS_BCMP inline_bcmp_byte_per_byte
#endif
-}
+
+namespace __llvm_libc {
LIBC_INLINE int inline_bcmp(const void *p1, const void *p2, size_t count) {
- return static_cast<int>(inline_bcmp(reinterpret_cast<CPtr>(p1),
- reinterpret_cast<CPtr>(p2), count));
+ return static_cast<int>(LIBC_SRC_STRING_MEMORY_UTILS_BCMP(
+ reinterpret_cast<CPtr>(p1), reinterpret_cast<CPtr>(p2), count));
}
} // namespace __llvm_libc
+#undef LIBC_SRC_STRING_MEMORY_UTILS_BCMP
+
#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BCMP_IMPLEMENTATIONS_H
diff --git a/libc/src/string/memory_utils/riscv/bcmp_implementations.h b/libc/src/string/memory_utils/riscv/bcmp_implementations.h
new file mode 100644
index 00000000000000..b7a29fea2c2fbf
--- /dev/null
+++ b/libc/src/string/memory_utils/riscv/bcmp_implementations.h
@@ -0,0 +1,33 @@
+//===-- Bcmp implementation for riscv ---------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef LIBC_SRC_STRING_MEMORY_UTILS_RISCV_BCMP_IMPLEMENTATIONS_H
+#define LIBC_SRC_STRING_MEMORY_UTILS_RISCV_BCMP_IMPLEMENTATIONS_H
+
+#include "src/__support/macros/attributes.h" // LIBC_INLINE
+#include "src/__support/macros/properties/architectures.h" // LIBC_TARGET_ARCH_IS_RISCV64
+#include "src/string/memory_utils/generic/aligned_access.h"
+#include "src/string/memory_utils/utils.h" // Ptr, CPtr
+
+#include <stddef.h> // size_t
+
+namespace __llvm_libc {
+
+[[maybe_unused]] LIBC_INLINE BcmpReturnType inline_bcmp_riscv(CPtr p1, CPtr p2,
+ size_t count) {
+#if defined(LIBC_TARGET_ARCH_IS_RISCV64)
+ return inline_bcmp_aligned_access_64bit(p1, p2, count);
+#elif defined(LIBC_TARGET_ARCH_IS_RISCV32)
+ return inline_bcmp_aligned_access_32bit(p1, p2, count);
+#else
+#error "Unimplemented"
+#endif
+}
+
+} // namespace __llvm_libc
+
+#endif // LIBC_SRC_STRING_MEMORY_UTILS_RISCV_BCMP_IMPLEMENTATIONS_H
diff --git a/libc/src/string/memory_utils/x86_64/bcmp_implementations.h b/libc/src/string/memory_utils/x86_64/bcmp_implementations.h
new file mode 100644
index 00000000000000..4c610fcac0967c
--- /dev/null
+++ b/libc/src/string/memory_utils/x86_64/bcmp_implementations.h
@@ -0,0 +1,93 @@
+//===-- Bcmp implementation for x86_64 --------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef LIBC_SRC_STRING_MEMORY_UTILS_X86_64_BCMP_IMPLEMENTATIONS_H
+#define LIBC_SRC_STRING_MEMORY_UTILS_X86_64_BCMP_IMPLEMENTATIONS_H
+
+#include "src/__support/macros/attributes.h" // LIBC_INLINE
+#include "src/string/memory_utils/op_generic.h"
+#include "src/string/memory_utils/op_x86.h"
+#include "src/string/memory_utils/utils.h" // Ptr, CPtr
+
+#include <stddef.h> // size_t
+
+namespace __llvm_libc {
+
+[[maybe_unused]] LIBC_INLINE BcmpReturnType
+inline_bcmp_generic_gt16(CPtr p1, CPtr p2, size_t count) {
+ return generic::Bcmp<uint64_t>::loop_and_tail_align_above(256, p1, p2, count);
+}
+
+#if defined(__SSE4_1__)
+[[maybe_unused]] LIBC_INLINE BcmpReturnType
+inline_bcmp_x86_sse41_gt16(CPtr p1, CPtr p2, size_t count) {
+ if (count <= 32)
+ return generic::Bcmp<__m128i>::head_tail(p1, p2, count);
+ return generic::Bcmp<__m128i>::loop_and_tail_align_above(256, p1, p2, count);
+}
+#endif // __SSE4_1__
+
+#if defined(__AVX__)
+[[maybe_unused]] LIBC_INLINE BcmpReturnType
+inline_bcmp_x86_avx_gt16(CPtr p1, CPtr p2, size_t count) {
+ if (count <= 32)
+ return generic::Bcmp<__m128i>::head_tail(p1, p2, count);
+ if (count <= 64)
+ return generic::Bcmp<__m256i>::head_tail(p1, p2, count);
+ return generic::Bcmp<__m256i>::loop_and_tail_align_above(256, p1, p2, count);
+}
+#endif // __AVX__
+
+#if defined(__AVX512BW__)
+[[maybe_unused]] LIBC_INLINE BcmpReturnType
+inline_bcmp_x86_avx512bw_gt16(CPtr p1, CPtr p2, size_t count) {
+ if (count <= 32)
+ return generic::Bcmp<__m128i>::head_tail(p1, p2, count);
+ if (count <= 64)
+ return generic::Bcmp<__m256i>::head_tail(p1, p2, count);
+ if (count <= 128)
+ return generic::Bcmp<__m512i>::head_tail(p1, p2, count);
+ return generic::Bcmp<__m512i>::loop_and_tail_align_above(256, p1, p2, count);
+}
+#endif // __AVX512BW__
+
+[[maybe_unused]] LIBC_INLINE BcmpReturnType inline_bcmp_x86(CPtr p1, CPtr p2,
+ size_t count) {
+ if (count == 0)
+ return BcmpReturnType::ZERO();
+ if (count == 1)
+ return generic::Bcmp<uint8_t>::block(p1, p2);
+ if (count == 2)
+ return generic::Bcmp<uint16_t>::block(p1, p2);
+ if (count == 3)
+ return generic::BcmpSequence<uint16_t, uint8_t>::block(p1, p2);
+ if (count == 4)
+ return generic::Bcmp<uint32_t>::block(p1, p2);
+ if (count == 5)
+ return generic::BcmpSequence<uint32_t, uint8_t>::block(p1, p2);
+ if (count == 6)
+ return generic::BcmpSequence<uint32_t, uint16_t>::block(p1, p2);
+ if (count == 7)
+ return generic::BcmpSequence<uint32_t, uint16_t, uint8_t>::block(p1, p2);
+ if (count == 8)
+ return generic::Bcmp<uint64_t>::block(p1, p2);
+ if (count <= 16)
+ return generic::Bcmp<uint64_t>::head_tail(p1, p2, count);
+#if defined(__AVX512BW__)
+ return inline_bcmp_x86_avx512bw_gt16(p1, p2, count);
+#elif defined(__AVX__)
+ return inline_bcmp_x86_avx_gt16(p1, p2, count);
+#elif defined(__SSE4_1__)
+ return inline_bcmp_x86_sse41_gt16(p1, p2, count);
+#else
+ return inline_bcmp_generic_gt16(p1, p2, count);
+#endif
+}
+
+} // namespace __llvm_libc
+
+#endif // LIBC_SRC_STRING_MEMORY_UTILS_X86_64_BCMP_IMPLEMENTATIONS_H
diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index a67f248b1bb134..46c88527d73e2c 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -2009,6 +2009,7 @@ libc_support_library(
],
defines = MEMORY_COPTS,
textual_hdrs = [
+ "src/string/memory_utils/aarch64/bcmp_implementations.h",
"src/string/memory_utils/aarch64/memcmp_implementations.h",
"src/string/memory_utils/aarch64/memcpy_implementations.h",
"src/string/memory_utils/bcmp_implementations.h",
@@ -2020,8 +2021,10 @@ libc_support_library(
"src/string/memory_utils/memmem_implementations.h",
"src/string/memory_utils/memmove_implementations.h",
"src/string/memory_utils/memset_implementations.h",
+ "src/string/memory_utils/riscv/bcmp_implementations.h",
"src/string/memory_utils/strcmp_implementations.h",
"src/string/memory_utils/strstr_implementations.h",
+ "src/string/memory_utils/x86_64/bcmp_implementations.h",
"src/string/memory_utils/x86_64/memcmp_implementations.h",
"src/string/memory_utils/x86_64/memcpy_implementations.h",
],
More information about the libc-commits
mailing list