[libc-commits] [libc] [libc] Add support for string/memory_utils functions for AArch64 without HW FP/SIMD (PR #137592)

via libc-commits libc-commits at lists.llvm.org
Mon Apr 28 00:49:33 PDT 2025


https://github.com/saturn691 created https://github.com/llvm/llvm-project/pull/137592

Add conditional compilation to add support for AArch64 without vector registers and/or hardware FPUs by using the generic implementation.

**Context:**

A few functions were hard-coded to use vector registers/hardware FPUs. This meant that libc would not compile on architectures that did not support these features. This fix falls back on the generic implementation if a feature is not supported.

>From f447844c7112c4f625a3166f42c73144fa26622b Mon Sep 17 00:00:00 2001
From: William Huynh <William.Huynh at arm.com>
Date: Mon, 28 Apr 2025 08:46:16 +0100
Subject: [PATCH] [libc] Add support for string/memory_utils functions for
 AArch64 without HW FP/SIMD

Add conditional compilation to add support for AArch64 without vector registers
and/or hardware FPUs by using the generic implementation
---
 libc/src/__support/FPUtil/FEnvImpl.h          |  2 +-
 libc/src/__support/FPUtil/nearest_integer.h   |  2 +-
 libc/src/__support/FPUtil/sqrt.h              |  2 +-
 .../string/memory_utils/aarch64/inline_bcmp.h | 46 +++++++++++++------
 .../memory_utils/aarch64/inline_memcmp.h      | 28 +++++------
 .../memory_utils/aarch64/inline_memmove.h     |  4 +-
 .../memory_utils/aarch64/inline_memset.h      | 45 ++++++++++++------
 libc/src/string/memory_utils/op_aarch64.h     |  8 +++-
 8 files changed, 89 insertions(+), 48 deletions(-)

diff --git a/libc/src/__support/FPUtil/FEnvImpl.h b/libc/src/__support/FPUtil/FEnvImpl.h
index 1c5a1108ff9e0..4c8f34a435bdf 100644
--- a/libc/src/__support/FPUtil/FEnvImpl.h
+++ b/libc/src/__support/FPUtil/FEnvImpl.h
@@ -17,7 +17,7 @@
 #include "src/__support/macros/properties/architectures.h"
 #include "src/errno/libc_errno.h"
 
-#if defined(LIBC_TARGET_ARCH_IS_AARCH64)
+#if defined(LIBC_TARGET_ARCH_IS_AARCH64) && defined(__ARM_FP)
 #if defined(__APPLE__)
 #include "aarch64/fenv_darwin_impl.h"
 #else
diff --git a/libc/src/__support/FPUtil/nearest_integer.h b/libc/src/__support/FPUtil/nearest_integer.h
index 5d0deddd751f5..768f13414bd95 100644
--- a/libc/src/__support/FPUtil/nearest_integer.h
+++ b/libc/src/__support/FPUtil/nearest_integer.h
@@ -16,7 +16,7 @@
 
 #if (defined(LIBC_TARGET_ARCH_IS_X86_64) && defined(LIBC_TARGET_CPU_HAS_SSE4_2))
 #include "x86_64/nearest_integer.h"
-#elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
+#elif (defined(LIBC_TARGET_ARCH_IS_AARCH64) && defined(__ARM_FP))
 #include "aarch64/nearest_integer.h"
 #elif defined(LIBC_TARGET_ARCH_IS_GPU)
 
diff --git a/libc/src/__support/FPUtil/sqrt.h b/libc/src/__support/FPUtil/sqrt.h
index 89da44ff2970f..1d377ab9a4e2d 100644
--- a/libc/src/__support/FPUtil/sqrt.h
+++ b/libc/src/__support/FPUtil/sqrt.h
@@ -42,7 +42,7 @@ template <> LIBC_INLINE double sqrt<double>(double x) {
 // Use inline assembly when __builtin_elementwise_sqrt is not available.
 #if defined(LIBC_TARGET_CPU_HAS_SSE2)
 #include "x86_64/sqrt.h"
-#elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
+#elif defined(LIBC_TARGET_ARCH_IS_AARCH64) && defined(__ARM_FP)
 #include "aarch64/sqrt.h"
 #elif defined(LIBC_TARGET_ARCH_IS_ARM)
 #include "arm/sqrt.h"
diff --git a/libc/src/string/memory_utils/aarch64/inline_bcmp.h b/libc/src/string/memory_utils/aarch64/inline_bcmp.h
index e41ac202dbaac..2a64ceee10a6d 100644
--- a/libc/src/string/memory_utils/aarch64/inline_bcmp.h
+++ b/libc/src/string/memory_utils/aarch64/inline_bcmp.h
@@ -19,13 +19,36 @@
 
 namespace LIBC_NAMESPACE_DECL {
 
+#if defined(__ARM_NEON)
+[[maybe_unused]] LIBC_INLINE BcmpReturnType
+inline_bcmp_aarch64_neon(CPtr p1, CPtr p2, size_t count) {
+  if (count <= 32) {
+    return aarch64::Bcmp<16>::head_tail(p1, p2, count);
+  }
+
+  if (count <= 64) {
+    return aarch64::Bcmp<32>::head_tail(p1, p2, count);
+  }
+
+  if (LIBC_UNLIKELY(count > 256)) {
+    if (auto value = aarch64::Bcmp<32>::block(p1, p2))
+      return value;
+    align_to_next_boundary<16, Arg::P1>(p1, p2, count);
+  }
+
+  return aarch64::Bcmp<32>::loop_and_tail(p1, p2, count);
+}
+#else
+[[maybe_unused]] LIBC_INLINE BcmpReturnType
+inline_bcmp_aarch64_no_neon(CPtr p1, CPtr p2, size_t count) {
+  return generic::Bcmp<uint64_t>::loop_and_tail_align_above(256, p1, p2, count);
+}
+#endif // __ARM_NEON
+
 [[maybe_unused]] LIBC_INLINE BcmpReturnType inline_bcmp_aarch64(CPtr p1,
                                                                 CPtr p2,
                                                                 size_t count) {
-  if (LIBC_LIKELY(count <= 32)) {
-    if (LIBC_UNLIKELY(count >= 16)) {
-      return aarch64::Bcmp<16>::head_tail(p1, p2, count);
-    }
+  if (LIBC_LIKELY(count <= 16)) {
     switch (count) {
     case 0:
       return BcmpReturnType::zero();
@@ -54,16 +77,11 @@ namespace LIBC_NAMESPACE_DECL {
     }
   }
 
-  if (count <= 64)
-    return aarch64::Bcmp<32>::head_tail(p1, p2, count);
-
-  // Aligned loop if > 256, otherwise normal loop
-  if (LIBC_UNLIKELY(count > 256)) {
-    if (auto value = aarch64::Bcmp<32>::block(p1, p2))
-      return value;
-    align_to_next_boundary<16, Arg::P1>(p1, p2, count);
-  }
-  return aarch64::Bcmp<32>::loop_and_tail(p1, p2, count);
+#if defined(__ARM_NEON)
+  return inline_bcmp_aarch64_neon(p1, p2, count);
+#else
+  return inline_bcmp_aarch64_no_neon(p1, p2, count);
+#endif
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/string/memory_utils/aarch64/inline_memcmp.h b/libc/src/string/memory_utils/aarch64/inline_memcmp.h
index 35ca077dab526..f017cc950d2e4 100644
--- a/libc/src/string/memory_utils/aarch64/inline_memcmp.h
+++ b/libc/src/string/memory_utils/aarch64/inline_memcmp.h
@@ -16,16 +16,7 @@
 
 namespace LIBC_NAMESPACE_DECL {
 
-[[maybe_unused]] LIBC_INLINE MemcmpReturnType
-inline_memcmp_generic_gt16(CPtr p1, CPtr p2, size_t count) {
-  if (LIBC_UNLIKELY(count >= 384)) {
-    if (auto value = generic::Memcmp<uint8x16_t>::block(p1, p2))
-      return value;
-    align_to_next_boundary<16, Arg::P1>(p1, p2, count);
-  }
-  return generic::Memcmp<uint8x16_t>::loop_and_tail(p1, p2, count);
-}
-
+#if defined(__ARM_NEON)
 [[maybe_unused]] LIBC_INLINE MemcmpReturnType
 inline_memcmp_aarch64_neon_gt16(CPtr p1, CPtr p2, size_t count) {
   if (LIBC_UNLIKELY(count >= 128)) { // [128, ∞]
@@ -46,6 +37,13 @@ inline_memcmp_aarch64_neon_gt16(CPtr p1, CPtr p2, size_t count) {
   return generic::Memcmp<uint8x16_t>::loop_and_tail(p1 + 32, p2 + 32,
                                                     count - 32);
 }
+#else
+[[maybe_unused]] LIBC_INLINE MemcmpReturnType
+inline_memcmp_generic_gt16(CPtr p1, CPtr p2, size_t count) {
+  return generic::Memcmp<uint64_t>::loop_and_tail_align_above(384, p1, p2,
+                                                              count);
+}
+#endif
 
 LIBC_INLINE MemcmpReturnType inline_memcmp_aarch64(CPtr p1, CPtr p2,
                                                    size_t count) {
@@ -61,10 +59,12 @@ LIBC_INLINE MemcmpReturnType inline_memcmp_aarch64(CPtr p1, CPtr p2,
     return generic::Memcmp<uint32_t>::head_tail(p1, p2, count);
   if (count <= 16)
     return generic::Memcmp<uint64_t>::head_tail(p1, p2, count);
-  if constexpr (aarch64::kNeon)
-    return inline_memcmp_aarch64_neon_gt16(p1, p2, count);
-  else
-    return inline_memcmp_generic_gt16(p1, p2, count);
+
+#if defined(__ARM_NEON)
+  return inline_memcmp_aarch64_neon_gt16(p1, p2, count);
+#else
+  return inline_memcmp_generic_gt16(p1, p2, count);
+#endif
 }
 } // namespace LIBC_NAMESPACE_DECL
 
diff --git a/libc/src/string/memory_utils/aarch64/inline_memmove.h b/libc/src/string/memory_utils/aarch64/inline_memmove.h
index 2b238031af49d..d8d276966fd27 100644
--- a/libc/src/string/memory_utils/aarch64/inline_memmove.h
+++ b/libc/src/string/memory_utils/aarch64/inline_memmove.h
@@ -8,8 +8,7 @@
 #ifndef LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_MEMMOVE_H
 #define LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_MEMMOVE_H
 
-#include "src/__support/macros/attributes.h"    // LIBC_INLINE
-#include "src/string/memory_utils/op_aarch64.h" // aarch64::kNeon
+#include "src/__support/macros/attributes.h" // LIBC_INLINE
 #include "src/string/memory_utils/op_builtin.h"
 #include "src/string/memory_utils/op_generic.h"
 #include "src/string/memory_utils/utils.h"
@@ -19,7 +18,6 @@
 namespace LIBC_NAMESPACE_DECL {
 
 LIBC_INLINE void inline_memmove_aarch64(Ptr dst, CPtr src, size_t count) {
-  static_assert(aarch64::kNeon, "aarch64 supports vector types");
   using uint128_t = generic_v128;
   using uint256_t = generic_v256;
   using uint512_t = generic_v512;
diff --git a/libc/src/string/memory_utils/aarch64/inline_memset.h b/libc/src/string/memory_utils/aarch64/inline_memset.h
index efcbfd0705983..71b686d92670b 100644
--- a/libc/src/string/memory_utils/aarch64/inline_memset.h
+++ b/libc/src/string/memory_utils/aarch64/inline_memset.h
@@ -18,12 +18,34 @@
 
 namespace LIBC_NAMESPACE_DECL {
 
+using uint128_t = generic_v128;
+using uint256_t = generic_v256;
+using uint512_t = generic_v512;
+
+#if defined(__ARM_NEON)
+[[maybe_unused]] LIBC_INLINE static void
+inline_memset_aarch64_neon(Ptr dst, uint8_t value, size_t count) {
+  if (count >= 448 && value == 0 && aarch64::neon::hasZva()) {
+    generic::Memset<uint512_t>::block(dst, 0);
+    align_to_next_boundary<64>(dst, count);
+    return aarch64::neon::BzeroCacheLine::loop_and_tail(dst, 0, count);
+  }
+
+  generic::Memset<uint128_t>::block(dst, value);
+  align_to_next_boundary<16>(dst, count);
+  return generic::Memset<uint512_t>::loop_and_tail(dst, value, count);
+}
+#else
+[[maybe_unused]] LIBC_INLINE static void
+inline_memset_aarch64_no_neon(Ptr dst, uint8_t value, size_t count) {
+  generic::Memset<uint128_t>::block(dst, value);
+  align_to_next_boundary<16>(dst, count);
+  return generic::Memset<uint512_t>::loop_and_tail(dst, value, count);
+}
+#endif // __ARM_NEON
+
 [[maybe_unused]] LIBC_INLINE static void
 inline_memset_aarch64(Ptr dst, uint8_t value, size_t count) {
-  static_assert(aarch64::kNeon, "aarch64 supports vector types");
-  using uint128_t = generic_v128;
-  using uint256_t = generic_v256;
-  using uint512_t = generic_v512;
   if (count == 0)
     return;
   if (count <= 3) {
@@ -46,15 +68,12 @@ inline_memset_aarch64(Ptr dst, uint8_t value, size_t count) {
     generic::Memset<uint256_t>::tail(dst, value, count);
     return;
   }
-  if (count >= 448 && value == 0 && aarch64::neon::hasZva()) {
-    generic::Memset<uint512_t>::block(dst, 0);
-    align_to_next_boundary<64>(dst, count);
-    return aarch64::neon::BzeroCacheLine::loop_and_tail(dst, 0, count);
-  } else {
-    generic::Memset<uint128_t>::block(dst, value);
-    align_to_next_boundary<16>(dst, count);
-    return generic::Memset<uint512_t>::loop_and_tail(dst, value, count);
-  }
+
+#if defined(__ARM_NEON)
+  return inline_memset_aarch64_neon(dst, value, count);
+#else
+  return inline_memset_aarch64_no_neon(dst, value, count);
+#endif
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/string/memory_utils/op_aarch64.h b/libc/src/string/memory_utils/op_aarch64.h
index 868c64474c0b4..e552601fbb708 100644
--- a/libc/src/string/memory_utils/op_aarch64.h
+++ b/libc/src/string/memory_utils/op_aarch64.h
@@ -25,7 +25,6 @@
 
 #ifdef __ARM_NEON
 #include <arm_neon.h>
-#endif //__ARM_NEON
 
 namespace LIBC_NAMESPACE_DECL {
 namespace aarch64 {
@@ -176,6 +175,8 @@ template <size_t Size> struct Bcmp {
 } // namespace aarch64
 } // namespace LIBC_NAMESPACE_DECL
 
+#endif //__ARM_NEON
+
 namespace LIBC_NAMESPACE_DECL {
 namespace generic {
 
@@ -225,6 +226,8 @@ LIBC_INLINE MemcmpReturnType cmp<uint64_t>(CPtr p1, CPtr p2, size_t offset) {
   return MemcmpReturnType::zero();
 }
 
+#if defined(__ARM_NEON)
+
 ///////////////////////////////////////////////////////////////////////////////
 // Specializations for uint8x16_t
 template <> struct is_vector<uint8x16_t> : cpp::true_type {};
@@ -269,6 +272,9 @@ LIBC_INLINE MemcmpReturnType cmp<uint8x16x2_t>(CPtr p1, CPtr p2,
   }
   return MemcmpReturnType::zero();
 }
+
+#endif // __ARM_NEON
+
 } // namespace generic
 } // namespace LIBC_NAMESPACE_DECL
 



More information about the libc-commits mailing list