[libc-commits] [libc] da30225 - [libc] Add support for string/memory_utils functions for AArch64 without HW FP/SIMD (#137592)
via libc-commits
libc-commits at lists.llvm.org
Fri May 2 04:36:03 PDT 2025
Author: William
Date: 2025-05-02T13:36:00+02:00
New Revision: da3022577e1f277999922acaef9be169c20dfd48
URL: https://github.com/llvm/llvm-project/commit/da3022577e1f277999922acaef9be169c20dfd48
DIFF: https://github.com/llvm/llvm-project/commit/da3022577e1f277999922acaef9be169c20dfd48.diff
LOG: [libc] Add support for string/memory_utils functions for AArch64 without HW FP/SIMD (#137592)
Add conditional compilation to add support for AArch64 without vector
registers and/or hardware FPUs by using the generic implementation.
**Context:**
A few functions were hard-coded to use vector registers/hardware FPUs.
This meant that libc would not compile on architectures that did not
support these features. This fix falls back on the generic
implementation if a feature is not supported.
Added:
Modified:
libc/src/__support/FPUtil/FEnvImpl.h
libc/src/__support/FPUtil/nearest_integer.h
libc/src/__support/FPUtil/sqrt.h
libc/src/string/memory_utils/aarch64/inline_bcmp.h
libc/src/string/memory_utils/aarch64/inline_memcmp.h
libc/src/string/memory_utils/aarch64/inline_memmove.h
libc/src/string/memory_utils/aarch64/inline_memset.h
libc/src/string/memory_utils/inline_bcmp.h
libc/src/string/memory_utils/inline_memcmp.h
libc/src/string/memory_utils/inline_memset.h
libc/src/string/memory_utils/op_aarch64.h
Removed:
################################################################################
diff --git a/libc/src/__support/FPUtil/FEnvImpl.h b/libc/src/__support/FPUtil/FEnvImpl.h
index 1c5a1108ff9e0..4c8f34a435bdf 100644
--- a/libc/src/__support/FPUtil/FEnvImpl.h
+++ b/libc/src/__support/FPUtil/FEnvImpl.h
@@ -17,7 +17,7 @@
#include "src/__support/macros/properties/architectures.h"
#include "src/errno/libc_errno.h"
-#if defined(LIBC_TARGET_ARCH_IS_AARCH64)
+#if defined(LIBC_TARGET_ARCH_IS_AARCH64) && defined(__ARM_FP)
#if defined(__APPLE__)
#include "aarch64/fenv_darwin_impl.h"
#else
diff --git a/libc/src/__support/FPUtil/nearest_integer.h b/libc/src/__support/FPUtil/nearest_integer.h
index 5d0deddd751f5..768f13414bd95 100644
--- a/libc/src/__support/FPUtil/nearest_integer.h
+++ b/libc/src/__support/FPUtil/nearest_integer.h
@@ -16,7 +16,7 @@
#if (defined(LIBC_TARGET_ARCH_IS_X86_64) && defined(LIBC_TARGET_CPU_HAS_SSE4_2))
#include "x86_64/nearest_integer.h"
-#elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
+#elif (defined(LIBC_TARGET_ARCH_IS_AARCH64) && defined(__ARM_FP))
#include "aarch64/nearest_integer.h"
#elif defined(LIBC_TARGET_ARCH_IS_GPU)
diff --git a/libc/src/__support/FPUtil/sqrt.h b/libc/src/__support/FPUtil/sqrt.h
index 89da44ff2970f..1d377ab9a4e2d 100644
--- a/libc/src/__support/FPUtil/sqrt.h
+++ b/libc/src/__support/FPUtil/sqrt.h
@@ -42,7 +42,7 @@ template <> LIBC_INLINE double sqrt<double>(double x) {
// Use inline assembly when __builtin_elementwise_sqrt is not available.
#if defined(LIBC_TARGET_CPU_HAS_SSE2)
#include "x86_64/sqrt.h"
-#elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
+#elif defined(LIBC_TARGET_ARCH_IS_AARCH64) && defined(__ARM_FP)
#include "aarch64/sqrt.h"
#elif defined(LIBC_TARGET_ARCH_IS_ARM)
#include "arm/sqrt.h"
diff --git a/libc/src/string/memory_utils/aarch64/inline_bcmp.h b/libc/src/string/memory_utils/aarch64/inline_bcmp.h
index e41ac202dbaac..66d24378095b9 100644
--- a/libc/src/string/memory_utils/aarch64/inline_bcmp.h
+++ b/libc/src/string/memory_utils/aarch64/inline_bcmp.h
@@ -19,9 +19,43 @@
namespace LIBC_NAMESPACE_DECL {
-[[maybe_unused]] LIBC_INLINE BcmpReturnType inline_bcmp_aarch64(CPtr p1,
- CPtr p2,
- size_t count) {
+[[maybe_unused]] LIBC_INLINE BcmpReturnType
+inline_bcmp_aarch64_no_fp(CPtr p1, CPtr p2, size_t count) {
+ if (LIBC_LIKELY(count < 16)) {
+ switch (count) {
+ case 0:
+ return BcmpReturnType::zero();
+ case 1:
+ return generic::Bcmp<uint8_t>::block(p1, p2);
+ case 2:
+ return generic::Bcmp<uint16_t>::block(p1, p2);
+ case 3:
+ return generic::Bcmp<uint16_t>::head_tail(p1, p2, count);
+ case 4:
+ return generic::Bcmp<uint32_t>::block(p1, p2);
+ case 5:
+ case 6:
+ case 7:
+ return generic::Bcmp<uint32_t>::head_tail(p1, p2, count);
+ case 8:
+ return generic::Bcmp<uint64_t>::block(p1, p2);
+ case 9:
+ case 10:
+ case 11:
+ case 12:
+ case 13:
+ case 14:
+ case 15:
+ return generic::Bcmp<uint64_t>::head_tail(p1, p2, count);
+ }
+ }
+
+ return generic::Bcmp<uint64_t>::loop_and_tail_align_above(256, p1, p2, count);
+}
+
+#ifdef __ARM_NEON
+[[maybe_unused]] LIBC_INLINE BcmpReturnType
+inline_bcmp_aarch64_with_fp(CPtr p1, CPtr p2, size_t count) {
if (LIBC_LIKELY(count <= 32)) {
if (LIBC_UNLIKELY(count >= 16)) {
return aarch64::Bcmp<16>::head_tail(p1, p2, count);
@@ -65,6 +99,16 @@ namespace LIBC_NAMESPACE_DECL {
}
return aarch64::Bcmp<32>::loop_and_tail(p1, p2, count);
}
+#endif
+
+[[gnu::flatten]] LIBC_INLINE BcmpReturnType
+inline_bcmp_aarch64_dispatch(CPtr p1, CPtr p2, size_t count) {
+#if defined(__ARM_NEON)
+ return inline_bcmp_aarch64_with_fp(p1, p2, count);
+#else
+ return inline_bcmp_aarch64_no_fp(p1, p2, count);
+#endif
+}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/string/memory_utils/aarch64/inline_memcmp.h b/libc/src/string/memory_utils/aarch64/inline_memcmp.h
index 35ca077dab526..380ebb410efb5 100644
--- a/libc/src/string/memory_utils/aarch64/inline_memcmp.h
+++ b/libc/src/string/memory_utils/aarch64/inline_memcmp.h
@@ -17,17 +17,40 @@
namespace LIBC_NAMESPACE_DECL {
[[maybe_unused]] LIBC_INLINE MemcmpReturnType
-inline_memcmp_generic_gt16(CPtr p1, CPtr p2, size_t count) {
- if (LIBC_UNLIKELY(count >= 384)) {
- if (auto value = generic::Memcmp<uint8x16_t>::block(p1, p2))
- return value;
- align_to_next_boundary<16, Arg::P1>(p1, p2, count);
- }
- return generic::Memcmp<uint8x16_t>::loop_and_tail(p1, p2, count);
+inline_memcmp_aarch64_no_fp(CPtr p1, CPtr p2, size_t count) {
+ if (count == 0)
+ return MemcmpReturnType::zero();
+ if (count == 1)
+ return generic::Memcmp<uint8_t>::block(p1, p2);
+ if (count == 2)
+ return generic::Memcmp<uint16_t>::block(p1, p2);
+ if (count == 3)
+ return generic::MemcmpSequence<uint16_t, uint8_t>::block(p1, p2);
+ if (count <= 8)
+ return generic::Memcmp<uint32_t>::head_tail(p1, p2, count);
+ if (count <= 16)
+ return generic::Memcmp<uint64_t>::head_tail(p1, p2, count);
+
+ return generic::Memcmp<uint64_t>::loop_and_tail_align_above(384, p1, p2,
+ count);
}
+#if defined(__ARM_NEON)
[[maybe_unused]] LIBC_INLINE MemcmpReturnType
-inline_memcmp_aarch64_neon_gt16(CPtr p1, CPtr p2, size_t count) {
+inline_memcmp_aarch64_with_fp(CPtr p1, CPtr p2, size_t count) {
+ if (count == 0)
+ return MemcmpReturnType::zero();
+ if (count == 1)
+ return generic::Memcmp<uint8_t>::block(p1, p2);
+ if (count == 2)
+ return generic::Memcmp<uint16_t>::block(p1, p2);
+ if (count == 3)
+ return generic::MemcmpSequence<uint16_t, uint8_t>::block(p1, p2);
+ if (count <= 8)
+ return generic::Memcmp<uint32_t>::head_tail(p1, p2, count);
+ if (count <= 16)
+ return generic::Memcmp<uint64_t>::head_tail(p1, p2, count);
+
if (LIBC_UNLIKELY(count >= 128)) { // [128, ∞]
if (auto value = generic::Memcmp<uint8x16_t>::block(p1, p2))
return value;
@@ -46,25 +69,15 @@ inline_memcmp_aarch64_neon_gt16(CPtr p1, CPtr p2, size_t count) {
return generic::Memcmp<uint8x16_t>::loop_and_tail(p1 + 32, p2 + 32,
count - 32);
}
+#endif
-LIBC_INLINE MemcmpReturnType inline_memcmp_aarch64(CPtr p1, CPtr p2,
- size_t count) {
- if (count == 0)
- return MemcmpReturnType::zero();
- if (count == 1)
- return generic::Memcmp<uint8_t>::block(p1, p2);
- if (count == 2)
- return generic::Memcmp<uint16_t>::block(p1, p2);
- if (count == 3)
- return generic::MemcmpSequence<uint16_t, uint8_t>::block(p1, p2);
- if (count <= 8)
- return generic::Memcmp<uint32_t>::head_tail(p1, p2, count);
- if (count <= 16)
- return generic::Memcmp<uint64_t>::head_tail(p1, p2, count);
- if constexpr (aarch64::kNeon)
- return inline_memcmp_aarch64_neon_gt16(p1, p2, count);
- else
- return inline_memcmp_generic_gt16(p1, p2, count);
+[[gnu::flatten]] LIBC_INLINE MemcmpReturnType
+inline_memcmp_aarch64_dispatch(CPtr p1, CPtr p2, size_t count) {
+#if defined(__ARM_NEON)
+ return inline_memcmp_aarch64_with_fp(p1, p2, count);
+#else
+ return inline_memcmp_aarch64_no_fp(p1, p2, count);
+#endif
}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/string/memory_utils/aarch64/inline_memmove.h b/libc/src/string/memory_utils/aarch64/inline_memmove.h
index 2b238031af49d..d8d276966fd27 100644
--- a/libc/src/string/memory_utils/aarch64/inline_memmove.h
+++ b/libc/src/string/memory_utils/aarch64/inline_memmove.h
@@ -8,8 +8,7 @@
#ifndef LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_MEMMOVE_H
#define LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_MEMMOVE_H
-#include "src/__support/macros/attributes.h" // LIBC_INLINE
-#include "src/string/memory_utils/op_aarch64.h" // aarch64::kNeon
+#include "src/__support/macros/attributes.h" // LIBC_INLINE
#include "src/string/memory_utils/op_builtin.h"
#include "src/string/memory_utils/op_generic.h"
#include "src/string/memory_utils/utils.h"
@@ -19,7 +18,6 @@
namespace LIBC_NAMESPACE_DECL {
LIBC_INLINE void inline_memmove_aarch64(Ptr dst, CPtr src, size_t count) {
- static_assert(aarch64::kNeon, "aarch64 supports vector types");
using uint128_t = generic_v128;
using uint256_t = generic_v256;
using uint512_t = generic_v512;
diff --git a/libc/src/string/memory_utils/aarch64/inline_memset.h b/libc/src/string/memory_utils/aarch64/inline_memset.h
index efcbfd0705983..1b4b871792c69 100644
--- a/libc/src/string/memory_utils/aarch64/inline_memset.h
+++ b/libc/src/string/memory_utils/aarch64/inline_memset.h
@@ -18,12 +18,12 @@
namespace LIBC_NAMESPACE_DECL {
+using uint128_t = generic_v128;
+using uint256_t = generic_v256;
+using uint512_t = generic_v512;
+
[[maybe_unused]] LIBC_INLINE static void
-inline_memset_aarch64(Ptr dst, uint8_t value, size_t count) {
- static_assert(aarch64::kNeon, "aarch64 supports vector types");
- using uint128_t = generic_v128;
- using uint256_t = generic_v256;
- using uint512_t = generic_v512;
+inline_memset_aarch64_no_fp(Ptr dst, uint8_t value, size_t count) {
if (count == 0)
return;
if (count <= 3) {
@@ -46,15 +46,57 @@ inline_memset_aarch64(Ptr dst, uint8_t value, size_t count) {
generic::Memset<uint256_t>::tail(dst, value, count);
return;
}
+
+ generic::Memset<uint128_t>::block(dst, value);
+ align_to_next_boundary<16>(dst, count);
+ return generic::Memset<uint512_t>::loop_and_tail(dst, value, count);
+}
+
+#if defined(__ARM_NEON)
+[[maybe_unused]] LIBC_INLINE static void
+inline_memset_aarch64_with_fp(Ptr dst, uint8_t value, size_t count) {
+ if (count == 0)
+ return;
+ if (count <= 3) {
+ generic::Memset<uint8_t>::block(dst, value);
+ if (count > 1)
+ generic::Memset<uint16_t>::tail(dst, value, count);
+ return;
+ }
+ if (count <= 8)
+ return generic::Memset<uint32_t>::head_tail(dst, value, count);
+ if (count <= 16)
+ return generic::Memset<uint64_t>::head_tail(dst, value, count);
+ if (count <= 32)
+ return generic::Memset<uint128_t>::head_tail(dst, value, count);
+ if (count <= (32 + 64)) {
+ generic::Memset<uint256_t>::block(dst, value);
+ if (count <= 64)
+ return generic::Memset<uint256_t>::tail(dst, value, count);
+ generic::Memset<uint256_t>::block(dst + 32, value);
+ generic::Memset<uint256_t>::tail(dst, value, count);
+ return;
+ }
+
if (count >= 448 && value == 0 && aarch64::neon::hasZva()) {
generic::Memset<uint512_t>::block(dst, 0);
align_to_next_boundary<64>(dst, count);
return aarch64::neon::BzeroCacheLine::loop_and_tail(dst, 0, count);
- } else {
- generic::Memset<uint128_t>::block(dst, value);
- align_to_next_boundary<16>(dst, count);
- return generic::Memset<uint512_t>::loop_and_tail(dst, value, count);
}
+
+ generic::Memset<uint128_t>::block(dst, value);
+ align_to_next_boundary<16>(dst, count);
+ return generic::Memset<uint512_t>::loop_and_tail(dst, value, count);
+}
+#endif
+
+[[gnu::flatten]] [[maybe_unused]] LIBC_INLINE static void
+inline_memset_aarch64_dispatch(Ptr dst, uint8_t value, size_t count) {
+#if defined(__ARM_NEON)
+ return inline_memset_aarch64_with_fp(dst, value, count);
+#else
+ return inline_memset_aarch64_no_fp(dst, value, count);
+#endif
}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/string/memory_utils/inline_bcmp.h b/libc/src/string/memory_utils/inline_bcmp.h
index 3c1dc808cc5ce..955d764aade2b 100644
--- a/libc/src/string/memory_utils/inline_bcmp.h
+++ b/libc/src/string/memory_utils/inline_bcmp.h
@@ -21,7 +21,7 @@
#define LIBC_SRC_STRING_MEMORY_UTILS_BCMP inline_bcmp_x86
#elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
#include "src/string/memory_utils/aarch64/inline_bcmp.h"
-#define LIBC_SRC_STRING_MEMORY_UTILS_BCMP inline_bcmp_aarch64
+#define LIBC_SRC_STRING_MEMORY_UTILS_BCMP inline_bcmp_aarch64_dispatch
#elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
#include "src/string/memory_utils/riscv/inline_bcmp.h"
#define LIBC_SRC_STRING_MEMORY_UTILS_BCMP inline_bcmp_riscv
diff --git a/libc/src/string/memory_utils/inline_memcmp.h b/libc/src/string/memory_utils/inline_memcmp.h
index a2ca9afd7f79d..85a614b2fb95e 100644
--- a/libc/src/string/memory_utils/inline_memcmp.h
+++ b/libc/src/string/memory_utils/inline_memcmp.h
@@ -20,7 +20,7 @@
#define LIBC_SRC_STRING_MEMORY_UTILS_MEMCMP inline_memcmp_x86
#elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
#include "src/string/memory_utils/aarch64/inline_memcmp.h"
-#define LIBC_SRC_STRING_MEMORY_UTILS_MEMCMP inline_memcmp_aarch64
+#define LIBC_SRC_STRING_MEMORY_UTILS_MEMCMP inline_memcmp_aarch64_dispatch
#elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
#include "src/string/memory_utils/riscv/inline_memcmp.h"
#define LIBC_SRC_STRING_MEMORY_UTILS_MEMCMP inline_memcmp_riscv
diff --git a/libc/src/string/memory_utils/inline_memset.h b/libc/src/string/memory_utils/inline_memset.h
index aed37071265aa..fd9c29ea4410a 100644
--- a/libc/src/string/memory_utils/inline_memset.h
+++ b/libc/src/string/memory_utils/inline_memset.h
@@ -20,7 +20,7 @@
#define LIBC_SRC_STRING_MEMORY_UTILS_MEMSET inline_memset_x86
#elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
#include "src/string/memory_utils/aarch64/inline_memset.h"
-#define LIBC_SRC_STRING_MEMORY_UTILS_MEMSET inline_memset_aarch64
+#define LIBC_SRC_STRING_MEMORY_UTILS_MEMSET inline_memset_aarch64_dispatch
#elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
#include "src/string/memory_utils/riscv/inline_memset.h"
#define LIBC_SRC_STRING_MEMORY_UTILS_MEMSET inline_memset_riscv
diff --git a/libc/src/string/memory_utils/op_aarch64.h b/libc/src/string/memory_utils/op_aarch64.h
index 868c64474c0b4..e552601fbb708 100644
--- a/libc/src/string/memory_utils/op_aarch64.h
+++ b/libc/src/string/memory_utils/op_aarch64.h
@@ -25,7 +25,6 @@
#ifdef __ARM_NEON
#include <arm_neon.h>
-#endif //__ARM_NEON
namespace LIBC_NAMESPACE_DECL {
namespace aarch64 {
@@ -176,6 +175,8 @@ template <size_t Size> struct Bcmp {
} // namespace aarch64
} // namespace LIBC_NAMESPACE_DECL
+#endif //__ARM_NEON
+
namespace LIBC_NAMESPACE_DECL {
namespace generic {
@@ -225,6 +226,8 @@ LIBC_INLINE MemcmpReturnType cmp<uint64_t>(CPtr p1, CPtr p2, size_t offset) {
return MemcmpReturnType::zero();
}
+#if defined(__ARM_NEON)
+
///////////////////////////////////////////////////////////////////////////////
// Specializations for uint8x16_t
template <> struct is_vector<uint8x16_t> : cpp::true_type {};
@@ -269,6 +272,9 @@ LIBC_INLINE MemcmpReturnType cmp<uint8x16x2_t>(CPtr p1, CPtr p2,
}
return MemcmpReturnType::zero();
}
+
+#endif // __ARM_NEON
+
} // namespace generic
} // namespace LIBC_NAMESPACE_DECL
More information about the libc-commits
mailing list