[libc-commits] [libc] da30225 - [libc] Add support for string/memory_utils functions for AArch64 without HW FP/SIMD (#137592)

via libc-commits libc-commits at lists.llvm.org
Fri May 2 04:36:03 PDT 2025


Author: William
Date: 2025-05-02T13:36:00+02:00
New Revision: da3022577e1f277999922acaef9be169c20dfd48

URL: https://github.com/llvm/llvm-project/commit/da3022577e1f277999922acaef9be169c20dfd48
DIFF: https://github.com/llvm/llvm-project/commit/da3022577e1f277999922acaef9be169c20dfd48.diff

LOG: [libc] Add support for string/memory_utils functions for AArch64 without HW FP/SIMD (#137592)

Add conditional compilation to add support for AArch64 without vector
registers and/or hardware FPUs by using the generic implementation.

**Context:**

A few functions were hard-coded to use vector registers/hardware FPUs.
This meant that libc would not compile on architectures that did not
support these features. This fix falls back on the generic
implementation if a feature is not supported.

Added: 
    

Modified: 
    libc/src/__support/FPUtil/FEnvImpl.h
    libc/src/__support/FPUtil/nearest_integer.h
    libc/src/__support/FPUtil/sqrt.h
    libc/src/string/memory_utils/aarch64/inline_bcmp.h
    libc/src/string/memory_utils/aarch64/inline_memcmp.h
    libc/src/string/memory_utils/aarch64/inline_memmove.h
    libc/src/string/memory_utils/aarch64/inline_memset.h
    libc/src/string/memory_utils/inline_bcmp.h
    libc/src/string/memory_utils/inline_memcmp.h
    libc/src/string/memory_utils/inline_memset.h
    libc/src/string/memory_utils/op_aarch64.h

Removed: 
    


################################################################################
diff  --git a/libc/src/__support/FPUtil/FEnvImpl.h b/libc/src/__support/FPUtil/FEnvImpl.h
index 1c5a1108ff9e0..4c8f34a435bdf 100644
--- a/libc/src/__support/FPUtil/FEnvImpl.h
+++ b/libc/src/__support/FPUtil/FEnvImpl.h
@@ -17,7 +17,7 @@
 #include "src/__support/macros/properties/architectures.h"
 #include "src/errno/libc_errno.h"
 
-#if defined(LIBC_TARGET_ARCH_IS_AARCH64)
+#if defined(LIBC_TARGET_ARCH_IS_AARCH64) && defined(__ARM_FP)
 #if defined(__APPLE__)
 #include "aarch64/fenv_darwin_impl.h"
 #else

diff  --git a/libc/src/__support/FPUtil/nearest_integer.h b/libc/src/__support/FPUtil/nearest_integer.h
index 5d0deddd751f5..768f13414bd95 100644
--- a/libc/src/__support/FPUtil/nearest_integer.h
+++ b/libc/src/__support/FPUtil/nearest_integer.h
@@ -16,7 +16,7 @@
 
 #if (defined(LIBC_TARGET_ARCH_IS_X86_64) && defined(LIBC_TARGET_CPU_HAS_SSE4_2))
 #include "x86_64/nearest_integer.h"
-#elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
+#elif (defined(LIBC_TARGET_ARCH_IS_AARCH64) && defined(__ARM_FP))
 #include "aarch64/nearest_integer.h"
 #elif defined(LIBC_TARGET_ARCH_IS_GPU)
 

diff  --git a/libc/src/__support/FPUtil/sqrt.h b/libc/src/__support/FPUtil/sqrt.h
index 89da44ff2970f..1d377ab9a4e2d 100644
--- a/libc/src/__support/FPUtil/sqrt.h
+++ b/libc/src/__support/FPUtil/sqrt.h
@@ -42,7 +42,7 @@ template <> LIBC_INLINE double sqrt<double>(double x) {
 // Use inline assembly when __builtin_elementwise_sqrt is not available.
 #if defined(LIBC_TARGET_CPU_HAS_SSE2)
 #include "x86_64/sqrt.h"
-#elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
+#elif defined(LIBC_TARGET_ARCH_IS_AARCH64) && defined(__ARM_FP)
 #include "aarch64/sqrt.h"
 #elif defined(LIBC_TARGET_ARCH_IS_ARM)
 #include "arm/sqrt.h"

diff  --git a/libc/src/string/memory_utils/aarch64/inline_bcmp.h b/libc/src/string/memory_utils/aarch64/inline_bcmp.h
index e41ac202dbaac..66d24378095b9 100644
--- a/libc/src/string/memory_utils/aarch64/inline_bcmp.h
+++ b/libc/src/string/memory_utils/aarch64/inline_bcmp.h
@@ -19,9 +19,43 @@
 
 namespace LIBC_NAMESPACE_DECL {
 
-[[maybe_unused]] LIBC_INLINE BcmpReturnType inline_bcmp_aarch64(CPtr p1,
-                                                                CPtr p2,
-                                                                size_t count) {
+[[maybe_unused]] LIBC_INLINE BcmpReturnType
+inline_bcmp_aarch64_no_fp(CPtr p1, CPtr p2, size_t count) {
+  if (LIBC_LIKELY(count < 16)) {
+    switch (count) {
+    case 0:
+      return BcmpReturnType::zero();
+    case 1:
+      return generic::Bcmp<uint8_t>::block(p1, p2);
+    case 2:
+      return generic::Bcmp<uint16_t>::block(p1, p2);
+    case 3:
+      return generic::Bcmp<uint16_t>::head_tail(p1, p2, count);
+    case 4:
+      return generic::Bcmp<uint32_t>::block(p1, p2);
+    case 5:
+    case 6:
+    case 7:
+      return generic::Bcmp<uint32_t>::head_tail(p1, p2, count);
+    case 8:
+      return generic::Bcmp<uint64_t>::block(p1, p2);
+    case 9:
+    case 10:
+    case 11:
+    case 12:
+    case 13:
+    case 14:
+    case 15:
+      return generic::Bcmp<uint64_t>::head_tail(p1, p2, count);
+    }
+  }
+
+  return generic::Bcmp<uint64_t>::loop_and_tail_align_above(256, p1, p2, count);
+}
+
+#ifdef __ARM_NEON
+[[maybe_unused]] LIBC_INLINE BcmpReturnType
+inline_bcmp_aarch64_with_fp(CPtr p1, CPtr p2, size_t count) {
   if (LIBC_LIKELY(count <= 32)) {
     if (LIBC_UNLIKELY(count >= 16)) {
       return aarch64::Bcmp<16>::head_tail(p1, p2, count);
@@ -65,6 +99,16 @@ namespace LIBC_NAMESPACE_DECL {
   }
   return aarch64::Bcmp<32>::loop_and_tail(p1, p2, count);
 }
+#endif
+
+[[gnu::flatten]] LIBC_INLINE BcmpReturnType
+inline_bcmp_aarch64_dispatch(CPtr p1, CPtr p2, size_t count) {
+#if defined(__ARM_NEON)
+  return inline_bcmp_aarch64_with_fp(p1, p2, count);
+#else
+  return inline_bcmp_aarch64_no_fp(p1, p2, count);
+#endif
+}
 
 } // namespace LIBC_NAMESPACE_DECL
 

diff  --git a/libc/src/string/memory_utils/aarch64/inline_memcmp.h b/libc/src/string/memory_utils/aarch64/inline_memcmp.h
index 35ca077dab526..380ebb410efb5 100644
--- a/libc/src/string/memory_utils/aarch64/inline_memcmp.h
+++ b/libc/src/string/memory_utils/aarch64/inline_memcmp.h
@@ -17,17 +17,40 @@
 namespace LIBC_NAMESPACE_DECL {
 
 [[maybe_unused]] LIBC_INLINE MemcmpReturnType
-inline_memcmp_generic_gt16(CPtr p1, CPtr p2, size_t count) {
-  if (LIBC_UNLIKELY(count >= 384)) {
-    if (auto value = generic::Memcmp<uint8x16_t>::block(p1, p2))
-      return value;
-    align_to_next_boundary<16, Arg::P1>(p1, p2, count);
-  }
-  return generic::Memcmp<uint8x16_t>::loop_and_tail(p1, p2, count);
+inline_memcmp_aarch64_no_fp(CPtr p1, CPtr p2, size_t count) {
+  if (count == 0)
+    return MemcmpReturnType::zero();
+  if (count == 1)
+    return generic::Memcmp<uint8_t>::block(p1, p2);
+  if (count == 2)
+    return generic::Memcmp<uint16_t>::block(p1, p2);
+  if (count == 3)
+    return generic::MemcmpSequence<uint16_t, uint8_t>::block(p1, p2);
+  if (count <= 8)
+    return generic::Memcmp<uint32_t>::head_tail(p1, p2, count);
+  if (count <= 16)
+    return generic::Memcmp<uint64_t>::head_tail(p1, p2, count);
+
+  return generic::Memcmp<uint64_t>::loop_and_tail_align_above(384, p1, p2,
+                                                              count);
 }
 
+#if defined(__ARM_NEON)
 [[maybe_unused]] LIBC_INLINE MemcmpReturnType
-inline_memcmp_aarch64_neon_gt16(CPtr p1, CPtr p2, size_t count) {
+inline_memcmp_aarch64_with_fp(CPtr p1, CPtr p2, size_t count) {
+  if (count == 0)
+    return MemcmpReturnType::zero();
+  if (count == 1)
+    return generic::Memcmp<uint8_t>::block(p1, p2);
+  if (count == 2)
+    return generic::Memcmp<uint16_t>::block(p1, p2);
+  if (count == 3)
+    return generic::MemcmpSequence<uint16_t, uint8_t>::block(p1, p2);
+  if (count <= 8)
+    return generic::Memcmp<uint32_t>::head_tail(p1, p2, count);
+  if (count <= 16)
+    return generic::Memcmp<uint64_t>::head_tail(p1, p2, count);
+
   if (LIBC_UNLIKELY(count >= 128)) { // [128, ∞]
     if (auto value = generic::Memcmp<uint8x16_t>::block(p1, p2))
       return value;
@@ -46,25 +69,15 @@ inline_memcmp_aarch64_neon_gt16(CPtr p1, CPtr p2, size_t count) {
   return generic::Memcmp<uint8x16_t>::loop_and_tail(p1 + 32, p2 + 32,
                                                     count - 32);
 }
+#endif
 
-LIBC_INLINE MemcmpReturnType inline_memcmp_aarch64(CPtr p1, CPtr p2,
-                                                   size_t count) {
-  if (count == 0)
-    return MemcmpReturnType::zero();
-  if (count == 1)
-    return generic::Memcmp<uint8_t>::block(p1, p2);
-  if (count == 2)
-    return generic::Memcmp<uint16_t>::block(p1, p2);
-  if (count == 3)
-    return generic::MemcmpSequence<uint16_t, uint8_t>::block(p1, p2);
-  if (count <= 8)
-    return generic::Memcmp<uint32_t>::head_tail(p1, p2, count);
-  if (count <= 16)
-    return generic::Memcmp<uint64_t>::head_tail(p1, p2, count);
-  if constexpr (aarch64::kNeon)
-    return inline_memcmp_aarch64_neon_gt16(p1, p2, count);
-  else
-    return inline_memcmp_generic_gt16(p1, p2, count);
+[[gnu::flatten]] LIBC_INLINE MemcmpReturnType
+inline_memcmp_aarch64_dispatch(CPtr p1, CPtr p2, size_t count) {
+#if defined(__ARM_NEON)
+  return inline_memcmp_aarch64_with_fp(p1, p2, count);
+#else
+  return inline_memcmp_aarch64_no_fp(p1, p2, count);
+#endif
 }
 } // namespace LIBC_NAMESPACE_DECL
 

diff  --git a/libc/src/string/memory_utils/aarch64/inline_memmove.h b/libc/src/string/memory_utils/aarch64/inline_memmove.h
index 2b238031af49d..d8d276966fd27 100644
--- a/libc/src/string/memory_utils/aarch64/inline_memmove.h
+++ b/libc/src/string/memory_utils/aarch64/inline_memmove.h
@@ -8,8 +8,7 @@
 #ifndef LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_MEMMOVE_H
 #define LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_MEMMOVE_H
 
-#include "src/__support/macros/attributes.h"    // LIBC_INLINE
-#include "src/string/memory_utils/op_aarch64.h" // aarch64::kNeon
+#include "src/__support/macros/attributes.h" // LIBC_INLINE
 #include "src/string/memory_utils/op_builtin.h"
 #include "src/string/memory_utils/op_generic.h"
 #include "src/string/memory_utils/utils.h"
@@ -19,7 +18,6 @@
 namespace LIBC_NAMESPACE_DECL {
 
 LIBC_INLINE void inline_memmove_aarch64(Ptr dst, CPtr src, size_t count) {
-  static_assert(aarch64::kNeon, "aarch64 supports vector types");
   using uint128_t = generic_v128;
   using uint256_t = generic_v256;
   using uint512_t = generic_v512;

diff  --git a/libc/src/string/memory_utils/aarch64/inline_memset.h b/libc/src/string/memory_utils/aarch64/inline_memset.h
index efcbfd0705983..1b4b871792c69 100644
--- a/libc/src/string/memory_utils/aarch64/inline_memset.h
+++ b/libc/src/string/memory_utils/aarch64/inline_memset.h
@@ -18,12 +18,12 @@
 
 namespace LIBC_NAMESPACE_DECL {
 
+using uint128_t = generic_v128;
+using uint256_t = generic_v256;
+using uint512_t = generic_v512;
+
 [[maybe_unused]] LIBC_INLINE static void
-inline_memset_aarch64(Ptr dst, uint8_t value, size_t count) {
-  static_assert(aarch64::kNeon, "aarch64 supports vector types");
-  using uint128_t = generic_v128;
-  using uint256_t = generic_v256;
-  using uint512_t = generic_v512;
+inline_memset_aarch64_no_fp(Ptr dst, uint8_t value, size_t count) {
   if (count == 0)
     return;
   if (count <= 3) {
@@ -46,15 +46,57 @@ inline_memset_aarch64(Ptr dst, uint8_t value, size_t count) {
     generic::Memset<uint256_t>::tail(dst, value, count);
     return;
   }
+
+  generic::Memset<uint128_t>::block(dst, value);
+  align_to_next_boundary<16>(dst, count);
+  return generic::Memset<uint512_t>::loop_and_tail(dst, value, count);
+}
+
+#if defined(__ARM_NEON)
+[[maybe_unused]] LIBC_INLINE static void
+inline_memset_aarch64_with_fp(Ptr dst, uint8_t value, size_t count) {
+  if (count == 0)
+    return;
+  if (count <= 3) {
+    generic::Memset<uint8_t>::block(dst, value);
+    if (count > 1)
+      generic::Memset<uint16_t>::tail(dst, value, count);
+    return;
+  }
+  if (count <= 8)
+    return generic::Memset<uint32_t>::head_tail(dst, value, count);
+  if (count <= 16)
+    return generic::Memset<uint64_t>::head_tail(dst, value, count);
+  if (count <= 32)
+    return generic::Memset<uint128_t>::head_tail(dst, value, count);
+  if (count <= (32 + 64)) {
+    generic::Memset<uint256_t>::block(dst, value);
+    if (count <= 64)
+      return generic::Memset<uint256_t>::tail(dst, value, count);
+    generic::Memset<uint256_t>::block(dst + 32, value);
+    generic::Memset<uint256_t>::tail(dst, value, count);
+    return;
+  }
+
   if (count >= 448 && value == 0 && aarch64::neon::hasZva()) {
     generic::Memset<uint512_t>::block(dst, 0);
     align_to_next_boundary<64>(dst, count);
     return aarch64::neon::BzeroCacheLine::loop_and_tail(dst, 0, count);
-  } else {
-    generic::Memset<uint128_t>::block(dst, value);
-    align_to_next_boundary<16>(dst, count);
-    return generic::Memset<uint512_t>::loop_and_tail(dst, value, count);
   }
+
+  generic::Memset<uint128_t>::block(dst, value);
+  align_to_next_boundary<16>(dst, count);
+  return generic::Memset<uint512_t>::loop_and_tail(dst, value, count);
+}
+#endif
+
+[[gnu::flatten]] [[maybe_unused]] LIBC_INLINE static void
+inline_memset_aarch64_dispatch(Ptr dst, uint8_t value, size_t count) {
+#if defined(__ARM_NEON)
+  return inline_memset_aarch64_with_fp(dst, value, count);
+#else
+  return inline_memset_aarch64_no_fp(dst, value, count);
+#endif
 }
 
 } // namespace LIBC_NAMESPACE_DECL

diff  --git a/libc/src/string/memory_utils/inline_bcmp.h b/libc/src/string/memory_utils/inline_bcmp.h
index 3c1dc808cc5ce..955d764aade2b 100644
--- a/libc/src/string/memory_utils/inline_bcmp.h
+++ b/libc/src/string/memory_utils/inline_bcmp.h
@@ -21,7 +21,7 @@
 #define LIBC_SRC_STRING_MEMORY_UTILS_BCMP inline_bcmp_x86
 #elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
 #include "src/string/memory_utils/aarch64/inline_bcmp.h"
-#define LIBC_SRC_STRING_MEMORY_UTILS_BCMP inline_bcmp_aarch64
+#define LIBC_SRC_STRING_MEMORY_UTILS_BCMP inline_bcmp_aarch64_dispatch
 #elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
 #include "src/string/memory_utils/riscv/inline_bcmp.h"
 #define LIBC_SRC_STRING_MEMORY_UTILS_BCMP inline_bcmp_riscv

diff  --git a/libc/src/string/memory_utils/inline_memcmp.h b/libc/src/string/memory_utils/inline_memcmp.h
index a2ca9afd7f79d..85a614b2fb95e 100644
--- a/libc/src/string/memory_utils/inline_memcmp.h
+++ b/libc/src/string/memory_utils/inline_memcmp.h
@@ -20,7 +20,7 @@
 #define LIBC_SRC_STRING_MEMORY_UTILS_MEMCMP inline_memcmp_x86
 #elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
 #include "src/string/memory_utils/aarch64/inline_memcmp.h"
-#define LIBC_SRC_STRING_MEMORY_UTILS_MEMCMP inline_memcmp_aarch64
+#define LIBC_SRC_STRING_MEMORY_UTILS_MEMCMP inline_memcmp_aarch64_dispatch
 #elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
 #include "src/string/memory_utils/riscv/inline_memcmp.h"
 #define LIBC_SRC_STRING_MEMORY_UTILS_MEMCMP inline_memcmp_riscv

diff  --git a/libc/src/string/memory_utils/inline_memset.h b/libc/src/string/memory_utils/inline_memset.h
index aed37071265aa..fd9c29ea4410a 100644
--- a/libc/src/string/memory_utils/inline_memset.h
+++ b/libc/src/string/memory_utils/inline_memset.h
@@ -20,7 +20,7 @@
 #define LIBC_SRC_STRING_MEMORY_UTILS_MEMSET inline_memset_x86
 #elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
 #include "src/string/memory_utils/aarch64/inline_memset.h"
-#define LIBC_SRC_STRING_MEMORY_UTILS_MEMSET inline_memset_aarch64
+#define LIBC_SRC_STRING_MEMORY_UTILS_MEMSET inline_memset_aarch64_dispatch
 #elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
 #include "src/string/memory_utils/riscv/inline_memset.h"
 #define LIBC_SRC_STRING_MEMORY_UTILS_MEMSET inline_memset_riscv

diff  --git a/libc/src/string/memory_utils/op_aarch64.h b/libc/src/string/memory_utils/op_aarch64.h
index 868c64474c0b4..e552601fbb708 100644
--- a/libc/src/string/memory_utils/op_aarch64.h
+++ b/libc/src/string/memory_utils/op_aarch64.h
@@ -25,7 +25,6 @@
 
 #ifdef __ARM_NEON
 #include <arm_neon.h>
-#endif //__ARM_NEON
 
 namespace LIBC_NAMESPACE_DECL {
 namespace aarch64 {
@@ -176,6 +175,8 @@ template <size_t Size> struct Bcmp {
 } // namespace aarch64
 } // namespace LIBC_NAMESPACE_DECL
 
+#endif //__ARM_NEON
+
 namespace LIBC_NAMESPACE_DECL {
 namespace generic {
 
@@ -225,6 +226,8 @@ LIBC_INLINE MemcmpReturnType cmp<uint64_t>(CPtr p1, CPtr p2, size_t offset) {
   return MemcmpReturnType::zero();
 }
 
+#if defined(__ARM_NEON)
+
 ///////////////////////////////////////////////////////////////////////////////
 // Specializations for uint8x16_t
 template <> struct is_vector<uint8x16_t> : cpp::true_type {};
@@ -269,6 +272,9 @@ LIBC_INLINE MemcmpReturnType cmp<uint8x16x2_t>(CPtr p1, CPtr p2,
   }
   return MemcmpReturnType::zero();
 }
+
+#endif // __ARM_NEON
+
 } // namespace generic
 } // namespace LIBC_NAMESPACE_DECL
 


        


More information about the libc-commits mailing list