[libc-commits] [libc] [libc] Add support for string/memory_utils functions for AArch64 without HW FP/SIMD (PR #137592)

via libc-commits libc-commits at lists.llvm.org
Mon Apr 28 03:26:53 PDT 2025


https://github.com/saturn691 updated https://github.com/llvm/llvm-project/pull/137592

>From 2e5f7d2d48d1bfbfec587b711b3d98e570d00859 Mon Sep 17 00:00:00 2001
From: William Huynh <William.Huynh at arm.com>
Date: Mon, 28 Apr 2025 11:26:15 +0100
Subject: [PATCH] [libc] Add support for string/memory_utils functions for
 AArch64 without HW FP/SIMD

Add conditional compilation to add support for AArch64 without vector registers
and/or hardware FPUs by using the generic implementation
---
 libc/src/__support/FPUtil/FEnvImpl.h          |  2 +-
 libc/src/__support/FPUtil/nearest_integer.h   |  2 +-
 libc/src/__support/FPUtil/sqrt.h              |  2 +-
 .../string/memory_utils/aarch64/inline_bcmp.h | 51 ++++++++++++-------
 .../memory_utils/aarch64/inline_memcmp.h      | 24 ++++-----
 .../memory_utils/aarch64/inline_memmove.h     |  4 +-
 .../memory_utils/aarch64/inline_memset.h      | 47 ++++++++++++-----
 libc/src/string/memory_utils/inline_bcmp.h    |  2 +-
 libc/src/string/memory_utils/inline_memcmp.h  |  2 +-
 libc/src/string/memory_utils/inline_memset.h  |  2 +-
 libc/src/string/memory_utils/op_aarch64.h     |  8 ++-
 11 files changed, 93 insertions(+), 53 deletions(-)

diff --git a/libc/src/__support/FPUtil/FEnvImpl.h b/libc/src/__support/FPUtil/FEnvImpl.h
index 1c5a1108ff9e0..4c8f34a435bdf 100644
--- a/libc/src/__support/FPUtil/FEnvImpl.h
+++ b/libc/src/__support/FPUtil/FEnvImpl.h
@@ -17,7 +17,7 @@
 #include "src/__support/macros/properties/architectures.h"
 #include "src/errno/libc_errno.h"
 
-#if defined(LIBC_TARGET_ARCH_IS_AARCH64)
+#if defined(LIBC_TARGET_ARCH_IS_AARCH64) && defined(__ARM_FP)
 #if defined(__APPLE__)
 #include "aarch64/fenv_darwin_impl.h"
 #else
diff --git a/libc/src/__support/FPUtil/nearest_integer.h b/libc/src/__support/FPUtil/nearest_integer.h
index 5d0deddd751f5..768f13414bd95 100644
--- a/libc/src/__support/FPUtil/nearest_integer.h
+++ b/libc/src/__support/FPUtil/nearest_integer.h
@@ -16,7 +16,7 @@
 
 #if (defined(LIBC_TARGET_ARCH_IS_X86_64) && defined(LIBC_TARGET_CPU_HAS_SSE4_2))
 #include "x86_64/nearest_integer.h"
-#elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
+#elif (defined(LIBC_TARGET_ARCH_IS_AARCH64) && defined(__ARM_FP))
 #include "aarch64/nearest_integer.h"
 #elif defined(LIBC_TARGET_ARCH_IS_GPU)
 
diff --git a/libc/src/__support/FPUtil/sqrt.h b/libc/src/__support/FPUtil/sqrt.h
index 89da44ff2970f..1d377ab9a4e2d 100644
--- a/libc/src/__support/FPUtil/sqrt.h
+++ b/libc/src/__support/FPUtil/sqrt.h
@@ -42,7 +42,7 @@ template <> LIBC_INLINE double sqrt<double>(double x) {
 // Use inline assembly when __builtin_elementwise_sqrt is not available.
 #if defined(LIBC_TARGET_CPU_HAS_SSE2)
 #include "x86_64/sqrt.h"
-#elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
+#elif defined(LIBC_TARGET_ARCH_IS_AARCH64) && defined(__ARM_FP)
 #include "aarch64/sqrt.h"
 #elif defined(LIBC_TARGET_ARCH_IS_ARM)
 #include "arm/sqrt.h"
diff --git a/libc/src/string/memory_utils/aarch64/inline_bcmp.h b/libc/src/string/memory_utils/aarch64/inline_bcmp.h
index e41ac202dbaac..8bdebff4becec 100644
--- a/libc/src/string/memory_utils/aarch64/inline_bcmp.h
+++ b/libc/src/string/memory_utils/aarch64/inline_bcmp.h
@@ -19,13 +19,35 @@
 
 namespace LIBC_NAMESPACE_DECL {
 
-[[maybe_unused]] LIBC_INLINE BcmpReturnType inline_bcmp_aarch64(CPtr p1,
-                                                                CPtr p2,
-                                                                size_t count) {
-  if (LIBC_LIKELY(count <= 32)) {
-    if (LIBC_UNLIKELY(count >= 16)) {
-      return aarch64::Bcmp<16>::head_tail(p1, p2, count);
-    }
+[[maybe_unused]] LIBC_INLINE BcmpReturnType
+inline_bcmp_aarch64_no_fp(CPtr p1, CPtr p2, size_t count) {
+  return generic::Bcmp<uint64_t>::loop_and_tail_align_above(256, p1, p2, count);
+}
+
+#ifdef __ARM_NEON
+[[maybe_unused]] LIBC_INLINE BcmpReturnType
+inline_bcmp_aarch64_with_fp(CPtr p1, CPtr p2, size_t count) {
+  if (count <= 32) {
+    return aarch64::Bcmp<16>::head_tail(p1, p2, count);
+  }
+
+  if (count <= 64) {
+    return aarch64::Bcmp<32>::head_tail(p1, p2, count);
+  }
+
+  if (LIBC_UNLIKELY(count > 256)) {
+    if (auto value = aarch64::Bcmp<32>::block(p1, p2))
+      return value;
+    align_to_next_boundary<16, Arg::P1>(p1, p2, count);
+  }
+
+  return aarch64::Bcmp<32>::loop_and_tail(p1, p2, count);
+}
+#endif
+
+[[gnu::flatten]] LIBC_INLINE BcmpReturnType
+inline_bcmp_aarch64_dispatch(CPtr p1, CPtr p2, size_t count) {
+  if (LIBC_LIKELY(count <= 16)) {
     switch (count) {
     case 0:
       return BcmpReturnType::zero();
@@ -54,16 +76,11 @@ namespace LIBC_NAMESPACE_DECL {
     }
   }
 
-  if (count <= 64)
-    return aarch64::Bcmp<32>::head_tail(p1, p2, count);
-
-  // Aligned loop if > 256, otherwise normal loop
-  if (LIBC_UNLIKELY(count > 256)) {
-    if (auto value = aarch64::Bcmp<32>::block(p1, p2))
-      return value;
-    align_to_next_boundary<16, Arg::P1>(p1, p2, count);
-  }
-  return aarch64::Bcmp<32>::loop_and_tail(p1, p2, count);
+#if defined(__ARM_NEON)
+  return inline_bcmp_aarch64_with_fp(p1, p2, count);
+#else
+  return inline_bcmp_aarch64_no_fp(p1, p2, count);
+#endif
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/string/memory_utils/aarch64/inline_memcmp.h b/libc/src/string/memory_utils/aarch64/inline_memcmp.h
index 35ca077dab526..6b0bfe7dd9e6f 100644
--- a/libc/src/string/memory_utils/aarch64/inline_memcmp.h
+++ b/libc/src/string/memory_utils/aarch64/inline_memcmp.h
@@ -18,14 +18,11 @@ namespace LIBC_NAMESPACE_DECL {
 
 [[maybe_unused]] LIBC_INLINE MemcmpReturnType
 inline_memcmp_generic_gt16(CPtr p1, CPtr p2, size_t count) {
-  if (LIBC_UNLIKELY(count >= 384)) {
-    if (auto value = generic::Memcmp<uint8x16_t>::block(p1, p2))
-      return value;
-    align_to_next_boundary<16, Arg::P1>(p1, p2, count);
-  }
-  return generic::Memcmp<uint8x16_t>::loop_and_tail(p1, p2, count);
+  return generic::Memcmp<uint64_t>::loop_and_tail_align_above(384, p1, p2,
+                                                              count);
 }
 
+#if defined(__ARM_NEON)
 [[maybe_unused]] LIBC_INLINE MemcmpReturnType
 inline_memcmp_aarch64_neon_gt16(CPtr p1, CPtr p2, size_t count) {
   if (LIBC_UNLIKELY(count >= 128)) { // [128, ∞]
@@ -46,9 +43,10 @@ inline_memcmp_aarch64_neon_gt16(CPtr p1, CPtr p2, size_t count) {
   return generic::Memcmp<uint8x16_t>::loop_and_tail(p1 + 32, p2 + 32,
                                                     count - 32);
 }
+#endif
 
-LIBC_INLINE MemcmpReturnType inline_memcmp_aarch64(CPtr p1, CPtr p2,
-                                                   size_t count) {
+[[gnu::flatten]] LIBC_INLINE MemcmpReturnType
+inline_memcmp_aarch64_dispatch(CPtr p1, CPtr p2, size_t count) {
   if (count == 0)
     return MemcmpReturnType::zero();
   if (count == 1)
@@ -61,10 +59,12 @@ LIBC_INLINE MemcmpReturnType inline_memcmp_aarch64(CPtr p1, CPtr p2,
     return generic::Memcmp<uint32_t>::head_tail(p1, p2, count);
   if (count <= 16)
     return generic::Memcmp<uint64_t>::head_tail(p1, p2, count);
-  if constexpr (aarch64::kNeon)
-    return inline_memcmp_aarch64_neon_gt16(p1, p2, count);
-  else
-    return inline_memcmp_generic_gt16(p1, p2, count);
+
+#if defined(__ARM_NEON)
+  return inline_memcmp_aarch64_neon_gt16(p1, p2, count);
+#else
+  return inline_memcmp_generic_gt16(p1, p2, count);
+#endif
 }
 } // namespace LIBC_NAMESPACE_DECL
 
diff --git a/libc/src/string/memory_utils/aarch64/inline_memmove.h b/libc/src/string/memory_utils/aarch64/inline_memmove.h
index 2b238031af49d..d8d276966fd27 100644
--- a/libc/src/string/memory_utils/aarch64/inline_memmove.h
+++ b/libc/src/string/memory_utils/aarch64/inline_memmove.h
@@ -8,8 +8,7 @@
 #ifndef LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_MEMMOVE_H
 #define LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_MEMMOVE_H
 
-#include "src/__support/macros/attributes.h"    // LIBC_INLINE
-#include "src/string/memory_utils/op_aarch64.h" // aarch64::kNeon
+#include "src/__support/macros/attributes.h" // LIBC_INLINE
 #include "src/string/memory_utils/op_builtin.h"
 #include "src/string/memory_utils/op_generic.h"
 #include "src/string/memory_utils/utils.h"
@@ -19,7 +18,6 @@
 namespace LIBC_NAMESPACE_DECL {
 
 LIBC_INLINE void inline_memmove_aarch64(Ptr dst, CPtr src, size_t count) {
-  static_assert(aarch64::kNeon, "aarch64 supports vector types");
   using uint128_t = generic_v128;
   using uint256_t = generic_v256;
   using uint512_t = generic_v512;
diff --git a/libc/src/string/memory_utils/aarch64/inline_memset.h b/libc/src/string/memory_utils/aarch64/inline_memset.h
index efcbfd0705983..d43d331fee7af 100644
--- a/libc/src/string/memory_utils/aarch64/inline_memset.h
+++ b/libc/src/string/memory_utils/aarch64/inline_memset.h
@@ -18,12 +18,34 @@
 
 namespace LIBC_NAMESPACE_DECL {
 
+using uint128_t = generic_v128;
+using uint256_t = generic_v256;
+using uint512_t = generic_v512;
+
 [[maybe_unused]] LIBC_INLINE static void
-inline_memset_aarch64(Ptr dst, uint8_t value, size_t count) {
-  static_assert(aarch64::kNeon, "aarch64 supports vector types");
-  using uint128_t = generic_v128;
-  using uint256_t = generic_v256;
-  using uint512_t = generic_v512;
+inline_memset_aarch64_no_fp(Ptr dst, uint8_t value, size_t count) {
+  generic::Memset<uint128_t>::block(dst, value);
+  align_to_next_boundary<16>(dst, count);
+  return generic::Memset<uint512_t>::loop_and_tail(dst, value, count);
+}
+
+#if defined(__ARM_NEON)
+[[maybe_unused]] LIBC_INLINE static void
+inline_memset_aarch64_with_fp(Ptr dst, uint8_t value, size_t count) {
+  if (count >= 448 && value == 0 && aarch64::neon::hasZva()) {
+    generic::Memset<uint512_t>::block(dst, 0);
+    align_to_next_boundary<64>(dst, count);
+    return aarch64::neon::BzeroCacheLine::loop_and_tail(dst, 0, count);
+  }
+
+  generic::Memset<uint128_t>::block(dst, value);
+  align_to_next_boundary<16>(dst, count);
+  return generic::Memset<uint512_t>::loop_and_tail(dst, value, count);
+}
+#endif
+
+[[gnu::flatten]] [[maybe_unused]] LIBC_INLINE static void
+inline_memset_aarch64_dispatch(Ptr dst, uint8_t value, size_t count) {
   if (count == 0)
     return;
   if (count <= 3) {
@@ -46,15 +68,12 @@ inline_memset_aarch64(Ptr dst, uint8_t value, size_t count) {
     generic::Memset<uint256_t>::tail(dst, value, count);
     return;
   }
-  if (count >= 448 && value == 0 && aarch64::neon::hasZva()) {
-    generic::Memset<uint512_t>::block(dst, 0);
-    align_to_next_boundary<64>(dst, count);
-    return aarch64::neon::BzeroCacheLine::loop_and_tail(dst, 0, count);
-  } else {
-    generic::Memset<uint128_t>::block(dst, value);
-    align_to_next_boundary<16>(dst, count);
-    return generic::Memset<uint512_t>::loop_and_tail(dst, value, count);
-  }
+
+#if defined(__ARM_NEON)
+  return inline_memset_aarch64_with_fp(dst, value, count);
+#else
+  return inline_memset_aarch64_no_fp(dst, value, count);
+#endif
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/string/memory_utils/inline_bcmp.h b/libc/src/string/memory_utils/inline_bcmp.h
index 3c1dc808cc5ce..955d764aade2b 100644
--- a/libc/src/string/memory_utils/inline_bcmp.h
+++ b/libc/src/string/memory_utils/inline_bcmp.h
@@ -21,7 +21,7 @@
 #define LIBC_SRC_STRING_MEMORY_UTILS_BCMP inline_bcmp_x86
 #elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
 #include "src/string/memory_utils/aarch64/inline_bcmp.h"
-#define LIBC_SRC_STRING_MEMORY_UTILS_BCMP inline_bcmp_aarch64
+#define LIBC_SRC_STRING_MEMORY_UTILS_BCMP inline_bcmp_aarch64_dispatch
 #elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
 #include "src/string/memory_utils/riscv/inline_bcmp.h"
 #define LIBC_SRC_STRING_MEMORY_UTILS_BCMP inline_bcmp_riscv
diff --git a/libc/src/string/memory_utils/inline_memcmp.h b/libc/src/string/memory_utils/inline_memcmp.h
index a2ca9afd7f79d..85a614b2fb95e 100644
--- a/libc/src/string/memory_utils/inline_memcmp.h
+++ b/libc/src/string/memory_utils/inline_memcmp.h
@@ -20,7 +20,7 @@
 #define LIBC_SRC_STRING_MEMORY_UTILS_MEMCMP inline_memcmp_x86
 #elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
 #include "src/string/memory_utils/aarch64/inline_memcmp.h"
-#define LIBC_SRC_STRING_MEMORY_UTILS_MEMCMP inline_memcmp_aarch64
+#define LIBC_SRC_STRING_MEMORY_UTILS_MEMCMP inline_memcmp_aarch64_dispatch
 #elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
 #include "src/string/memory_utils/riscv/inline_memcmp.h"
 #define LIBC_SRC_STRING_MEMORY_UTILS_MEMCMP inline_memcmp_riscv
diff --git a/libc/src/string/memory_utils/inline_memset.h b/libc/src/string/memory_utils/inline_memset.h
index aed37071265aa..fd9c29ea4410a 100644
--- a/libc/src/string/memory_utils/inline_memset.h
+++ b/libc/src/string/memory_utils/inline_memset.h
@@ -20,7 +20,7 @@
 #define LIBC_SRC_STRING_MEMORY_UTILS_MEMSET inline_memset_x86
 #elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
 #include "src/string/memory_utils/aarch64/inline_memset.h"
-#define LIBC_SRC_STRING_MEMORY_UTILS_MEMSET inline_memset_aarch64
+#define LIBC_SRC_STRING_MEMORY_UTILS_MEMSET inline_memset_aarch64_dispatch
 #elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
 #include "src/string/memory_utils/riscv/inline_memset.h"
 #define LIBC_SRC_STRING_MEMORY_UTILS_MEMSET inline_memset_riscv
diff --git a/libc/src/string/memory_utils/op_aarch64.h b/libc/src/string/memory_utils/op_aarch64.h
index 868c64474c0b4..e552601fbb708 100644
--- a/libc/src/string/memory_utils/op_aarch64.h
+++ b/libc/src/string/memory_utils/op_aarch64.h
@@ -25,7 +25,6 @@
 
 #ifdef __ARM_NEON
 #include <arm_neon.h>
-#endif //__ARM_NEON
 
 namespace LIBC_NAMESPACE_DECL {
 namespace aarch64 {
@@ -176,6 +175,8 @@ template <size_t Size> struct Bcmp {
 } // namespace aarch64
 } // namespace LIBC_NAMESPACE_DECL
 
+#endif //__ARM_NEON
+
 namespace LIBC_NAMESPACE_DECL {
 namespace generic {
 
@@ -225,6 +226,8 @@ LIBC_INLINE MemcmpReturnType cmp<uint64_t>(CPtr p1, CPtr p2, size_t offset) {
   return MemcmpReturnType::zero();
 }
 
+#if defined(__ARM_NEON)
+
 ///////////////////////////////////////////////////////////////////////////////
 // Specializations for uint8x16_t
 template <> struct is_vector<uint8x16_t> : cpp::true_type {};
@@ -269,6 +272,9 @@ LIBC_INLINE MemcmpReturnType cmp<uint8x16x2_t>(CPtr p1, CPtr p2,
   }
   return MemcmpReturnType::zero();
 }
+
+#endif // __ARM_NEON
+
 } // namespace generic
 } // namespace LIBC_NAMESPACE_DECL
 



More information about the libc-commits mailing list