[libc-commits] [libc] [libc] Add support for string/memory_utils functions for AArch64 without HW FP/SIMD (PR #137592)
via libc-commits
libc-commits at lists.llvm.org
Mon Apr 28 00:49:33 PDT 2025
https://github.com/saturn691 created https://github.com/llvm/llvm-project/pull/137592
Add conditional compilation to add support for AArch64 without vector registers and/or hardware FPUs by using the generic implementation.
**Context:**
A few functions were hard-coded to use vector registers/hardware FPUs. This meant that libc would not compile on architectures that did not support these features. This fix falls back on the generic implementation if a feature is not supported.
>From f447844c7112c4f625a3166f42c73144fa26622b Mon Sep 17 00:00:00 2001
From: William Huynh <William.Huynh at arm.com>
Date: Mon, 28 Apr 2025 08:46:16 +0100
Subject: [PATCH] [libc] Add support for string/memory_utils functions for
AArch64 without HW FP/SIMD
Add conditional compilation to add support for AArch64 without vector registers
and/or hardware FPUs by using the generic implementation
---
libc/src/__support/FPUtil/FEnvImpl.h | 2 +-
libc/src/__support/FPUtil/nearest_integer.h | 2 +-
libc/src/__support/FPUtil/sqrt.h | 2 +-
.../string/memory_utils/aarch64/inline_bcmp.h | 46 +++++++++++++------
.../memory_utils/aarch64/inline_memcmp.h | 28 +++++------
.../memory_utils/aarch64/inline_memmove.h | 4 +-
.../memory_utils/aarch64/inline_memset.h | 45 ++++++++++++------
libc/src/string/memory_utils/op_aarch64.h | 8 +++-
8 files changed, 89 insertions(+), 48 deletions(-)
diff --git a/libc/src/__support/FPUtil/FEnvImpl.h b/libc/src/__support/FPUtil/FEnvImpl.h
index 1c5a1108ff9e0..4c8f34a435bdf 100644
--- a/libc/src/__support/FPUtil/FEnvImpl.h
+++ b/libc/src/__support/FPUtil/FEnvImpl.h
@@ -17,7 +17,7 @@
#include "src/__support/macros/properties/architectures.h"
#include "src/errno/libc_errno.h"
-#if defined(LIBC_TARGET_ARCH_IS_AARCH64)
+#if defined(LIBC_TARGET_ARCH_IS_AARCH64) && defined(__ARM_FP)
#if defined(__APPLE__)
#include "aarch64/fenv_darwin_impl.h"
#else
diff --git a/libc/src/__support/FPUtil/nearest_integer.h b/libc/src/__support/FPUtil/nearest_integer.h
index 5d0deddd751f5..768f13414bd95 100644
--- a/libc/src/__support/FPUtil/nearest_integer.h
+++ b/libc/src/__support/FPUtil/nearest_integer.h
@@ -16,7 +16,7 @@
#if (defined(LIBC_TARGET_ARCH_IS_X86_64) && defined(LIBC_TARGET_CPU_HAS_SSE4_2))
#include "x86_64/nearest_integer.h"
-#elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
+#elif (defined(LIBC_TARGET_ARCH_IS_AARCH64) && defined(__ARM_FP))
#include "aarch64/nearest_integer.h"
#elif defined(LIBC_TARGET_ARCH_IS_GPU)
diff --git a/libc/src/__support/FPUtil/sqrt.h b/libc/src/__support/FPUtil/sqrt.h
index 89da44ff2970f..1d377ab9a4e2d 100644
--- a/libc/src/__support/FPUtil/sqrt.h
+++ b/libc/src/__support/FPUtil/sqrt.h
@@ -42,7 +42,7 @@ template <> LIBC_INLINE double sqrt<double>(double x) {
// Use inline assembly when __builtin_elementwise_sqrt is not available.
#if defined(LIBC_TARGET_CPU_HAS_SSE2)
#include "x86_64/sqrt.h"
-#elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
+#elif defined(LIBC_TARGET_ARCH_IS_AARCH64) && defined(__ARM_FP)
#include "aarch64/sqrt.h"
#elif defined(LIBC_TARGET_ARCH_IS_ARM)
#include "arm/sqrt.h"
diff --git a/libc/src/string/memory_utils/aarch64/inline_bcmp.h b/libc/src/string/memory_utils/aarch64/inline_bcmp.h
index e41ac202dbaac..2a64ceee10a6d 100644
--- a/libc/src/string/memory_utils/aarch64/inline_bcmp.h
+++ b/libc/src/string/memory_utils/aarch64/inline_bcmp.h
@@ -19,13 +19,36 @@
namespace LIBC_NAMESPACE_DECL {
+#if defined(__ARM_NEON)
+[[maybe_unused]] LIBC_INLINE BcmpReturnType
+inline_bcmp_aarch64_neon(CPtr p1, CPtr p2, size_t count) {
+ if (count <= 32) {
+ return aarch64::Bcmp<16>::head_tail(p1, p2, count);
+ }
+
+ if (count <= 64) {
+ return aarch64::Bcmp<32>::head_tail(p1, p2, count);
+ }
+
+ if (LIBC_UNLIKELY(count > 256)) {
+ if (auto value = aarch64::Bcmp<32>::block(p1, p2))
+ return value;
+ align_to_next_boundary<16, Arg::P1>(p1, p2, count);
+ }
+
+ return aarch64::Bcmp<32>::loop_and_tail(p1, p2, count);
+}
+#else
+[[maybe_unused]] LIBC_INLINE BcmpReturnType
+inline_bcmp_aarch64_no_neon(CPtr p1, CPtr p2, size_t count) {
+ return generic::Bcmp<uint64_t>::loop_and_tail_align_above(256, p1, p2, count);
+}
+#endif // __ARM_NEON
+
[[maybe_unused]] LIBC_INLINE BcmpReturnType inline_bcmp_aarch64(CPtr p1,
CPtr p2,
size_t count) {
- if (LIBC_LIKELY(count <= 32)) {
- if (LIBC_UNLIKELY(count >= 16)) {
- return aarch64::Bcmp<16>::head_tail(p1, p2, count);
- }
+ if (LIBC_LIKELY(count <= 16)) {
switch (count) {
case 0:
return BcmpReturnType::zero();
@@ -54,16 +77,11 @@ namespace LIBC_NAMESPACE_DECL {
}
}
- if (count <= 64)
- return aarch64::Bcmp<32>::head_tail(p1, p2, count);
-
- // Aligned loop if > 256, otherwise normal loop
- if (LIBC_UNLIKELY(count > 256)) {
- if (auto value = aarch64::Bcmp<32>::block(p1, p2))
- return value;
- align_to_next_boundary<16, Arg::P1>(p1, p2, count);
- }
- return aarch64::Bcmp<32>::loop_and_tail(p1, p2, count);
+#if defined(__ARM_NEON)
+ return inline_bcmp_aarch64_neon(p1, p2, count);
+#else
+ return inline_bcmp_aarch64_no_neon(p1, p2, count);
+#endif
}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/string/memory_utils/aarch64/inline_memcmp.h b/libc/src/string/memory_utils/aarch64/inline_memcmp.h
index 35ca077dab526..f017cc950d2e4 100644
--- a/libc/src/string/memory_utils/aarch64/inline_memcmp.h
+++ b/libc/src/string/memory_utils/aarch64/inline_memcmp.h
@@ -16,16 +16,7 @@
namespace LIBC_NAMESPACE_DECL {
-[[maybe_unused]] LIBC_INLINE MemcmpReturnType
-inline_memcmp_generic_gt16(CPtr p1, CPtr p2, size_t count) {
- if (LIBC_UNLIKELY(count >= 384)) {
- if (auto value = generic::Memcmp<uint8x16_t>::block(p1, p2))
- return value;
- align_to_next_boundary<16, Arg::P1>(p1, p2, count);
- }
- return generic::Memcmp<uint8x16_t>::loop_and_tail(p1, p2, count);
-}
-
+#if defined(__ARM_NEON)
[[maybe_unused]] LIBC_INLINE MemcmpReturnType
inline_memcmp_aarch64_neon_gt16(CPtr p1, CPtr p2, size_t count) {
if (LIBC_UNLIKELY(count >= 128)) { // [128, ∞]
@@ -46,6 +37,13 @@ inline_memcmp_aarch64_neon_gt16(CPtr p1, CPtr p2, size_t count) {
return generic::Memcmp<uint8x16_t>::loop_and_tail(p1 + 32, p2 + 32,
count - 32);
}
+#else
+[[maybe_unused]] LIBC_INLINE MemcmpReturnType
+inline_memcmp_generic_gt16(CPtr p1, CPtr p2, size_t count) {
+ return generic::Memcmp<uint64_t>::loop_and_tail_align_above(384, p1, p2,
+ count);
+}
+#endif
LIBC_INLINE MemcmpReturnType inline_memcmp_aarch64(CPtr p1, CPtr p2,
size_t count) {
@@ -61,10 +59,12 @@ LIBC_INLINE MemcmpReturnType inline_memcmp_aarch64(CPtr p1, CPtr p2,
return generic::Memcmp<uint32_t>::head_tail(p1, p2, count);
if (count <= 16)
return generic::Memcmp<uint64_t>::head_tail(p1, p2, count);
- if constexpr (aarch64::kNeon)
- return inline_memcmp_aarch64_neon_gt16(p1, p2, count);
- else
- return inline_memcmp_generic_gt16(p1, p2, count);
+
+#if defined(__ARM_NEON)
+ return inline_memcmp_aarch64_neon_gt16(p1, p2, count);
+#else
+ return inline_memcmp_generic_gt16(p1, p2, count);
+#endif
}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/string/memory_utils/aarch64/inline_memmove.h b/libc/src/string/memory_utils/aarch64/inline_memmove.h
index 2b238031af49d..d8d276966fd27 100644
--- a/libc/src/string/memory_utils/aarch64/inline_memmove.h
+++ b/libc/src/string/memory_utils/aarch64/inline_memmove.h
@@ -8,8 +8,7 @@
#ifndef LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_MEMMOVE_H
#define LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_MEMMOVE_H
-#include "src/__support/macros/attributes.h" // LIBC_INLINE
-#include "src/string/memory_utils/op_aarch64.h" // aarch64::kNeon
+#include "src/__support/macros/attributes.h" // LIBC_INLINE
#include "src/string/memory_utils/op_builtin.h"
#include "src/string/memory_utils/op_generic.h"
#include "src/string/memory_utils/utils.h"
@@ -19,7 +18,6 @@
namespace LIBC_NAMESPACE_DECL {
LIBC_INLINE void inline_memmove_aarch64(Ptr dst, CPtr src, size_t count) {
- static_assert(aarch64::kNeon, "aarch64 supports vector types");
using uint128_t = generic_v128;
using uint256_t = generic_v256;
using uint512_t = generic_v512;
diff --git a/libc/src/string/memory_utils/aarch64/inline_memset.h b/libc/src/string/memory_utils/aarch64/inline_memset.h
index efcbfd0705983..71b686d92670b 100644
--- a/libc/src/string/memory_utils/aarch64/inline_memset.h
+++ b/libc/src/string/memory_utils/aarch64/inline_memset.h
@@ -18,12 +18,34 @@
namespace LIBC_NAMESPACE_DECL {
+using uint128_t = generic_v128;
+using uint256_t = generic_v256;
+using uint512_t = generic_v512;
+
+#if defined(__ARM_NEON)
+[[maybe_unused]] LIBC_INLINE static void
+inline_memset_aarch64_neon(Ptr dst, uint8_t value, size_t count) {
+ if (count >= 448 && value == 0 && aarch64::neon::hasZva()) {
+ generic::Memset<uint512_t>::block(dst, 0);
+ align_to_next_boundary<64>(dst, count);
+ return aarch64::neon::BzeroCacheLine::loop_and_tail(dst, 0, count);
+ }
+
+ generic::Memset<uint128_t>::block(dst, value);
+ align_to_next_boundary<16>(dst, count);
+ return generic::Memset<uint512_t>::loop_and_tail(dst, value, count);
+}
+#else
+[[maybe_unused]] LIBC_INLINE static void
+inline_memset_aarch64_no_neon(Ptr dst, uint8_t value, size_t count) {
+ generic::Memset<uint128_t>::block(dst, value);
+ align_to_next_boundary<16>(dst, count);
+ return generic::Memset<uint512_t>::loop_and_tail(dst, value, count);
+}
+#endif // __ARM_NEON
+
[[maybe_unused]] LIBC_INLINE static void
inline_memset_aarch64(Ptr dst, uint8_t value, size_t count) {
- static_assert(aarch64::kNeon, "aarch64 supports vector types");
- using uint128_t = generic_v128;
- using uint256_t = generic_v256;
- using uint512_t = generic_v512;
if (count == 0)
return;
if (count <= 3) {
@@ -46,15 +68,12 @@ inline_memset_aarch64(Ptr dst, uint8_t value, size_t count) {
generic::Memset<uint256_t>::tail(dst, value, count);
return;
}
- if (count >= 448 && value == 0 && aarch64::neon::hasZva()) {
- generic::Memset<uint512_t>::block(dst, 0);
- align_to_next_boundary<64>(dst, count);
- return aarch64::neon::BzeroCacheLine::loop_and_tail(dst, 0, count);
- } else {
- generic::Memset<uint128_t>::block(dst, value);
- align_to_next_boundary<16>(dst, count);
- return generic::Memset<uint512_t>::loop_and_tail(dst, value, count);
- }
+
+#if defined(__ARM_NEON)
+ return inline_memset_aarch64_neon(dst, value, count);
+#else
+ return inline_memset_aarch64_no_neon(dst, value, count);
+#endif
}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/string/memory_utils/op_aarch64.h b/libc/src/string/memory_utils/op_aarch64.h
index 868c64474c0b4..e552601fbb708 100644
--- a/libc/src/string/memory_utils/op_aarch64.h
+++ b/libc/src/string/memory_utils/op_aarch64.h
@@ -25,7 +25,6 @@
#ifdef __ARM_NEON
#include <arm_neon.h>
-#endif //__ARM_NEON
namespace LIBC_NAMESPACE_DECL {
namespace aarch64 {
@@ -176,6 +175,8 @@ template <size_t Size> struct Bcmp {
} // namespace aarch64
} // namespace LIBC_NAMESPACE_DECL
+#endif //__ARM_NEON
+
namespace LIBC_NAMESPACE_DECL {
namespace generic {
@@ -225,6 +226,8 @@ LIBC_INLINE MemcmpReturnType cmp<uint64_t>(CPtr p1, CPtr p2, size_t offset) {
return MemcmpReturnType::zero();
}
+#if defined(__ARM_NEON)
+
///////////////////////////////////////////////////////////////////////////////
// Specializations for uint8x16_t
template <> struct is_vector<uint8x16_t> : cpp::true_type {};
@@ -269,6 +272,9 @@ LIBC_INLINE MemcmpReturnType cmp<uint8x16x2_t>(CPtr p1, CPtr p2,
}
return MemcmpReturnType::zero();
}
+
+#endif // __ARM_NEON
+
} // namespace generic
} // namespace LIBC_NAMESPACE_DECL
More information about the libc-commits
mailing list