[libc-commits] [libc] [libc][SVE] add sve handling for memcpy with count less than 32b (PR #167446)
via libc-commits
libc-commits at lists.llvm.org
Mon Nov 10 19:51:43 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-libc
Author: Schrodinger ZHU Yifan (SchrodingerZhu)
<details>
<summary>Changes</summary>
---
Full diff: https://github.com/llvm/llvm-project/pull/167446.diff
1 Files Affected:
- (modified) libc/src/string/memory_utils/aarch64/inline_memcpy.h (+27-1)
``````````diff
diff --git a/libc/src/string/memory_utils/aarch64/inline_memcpy.h b/libc/src/string/memory_utils/aarch64/inline_memcpy.h
index 11cf022e12b1f..7af0963c7e11e 100644
--- a/libc/src/string/memory_utils/aarch64/inline_memcpy.h
+++ b/libc/src/string/memory_utils/aarch64/inline_memcpy.h
@@ -9,15 +9,40 @@
#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_MEMCPY_H
#include "src/__support/macros/attributes.h" // LIBC_INLINE
+#include "src/__support/macros/properties/cpu_features.h"
#include "src/string/memory_utils/op_builtin.h"
#include "src/string/memory_utils/utils.h"
#include <stddef.h> // size_t
+#if defined(LIBC_TARGET_CPU_HAS_SVE)
+#include <arm_sve.h>
+#endif
namespace LIBC_NAMESPACE_DECL {
-
+#if defined(LIBC_TARGET_CPU_HAS_SVE)
+[[maybe_unused, gnu::always_inline]] LIBC_INLINE void
+inline_memcpy_aarch64_sve_32_bytes(Ptr __restrict dst, CPtr __restrict src,
+ size_t count) {
+ auto src_ptr = reinterpret_cast<const uint8_t *>(src);
+ auto dst_ptr = reinterpret_cast<uint8_t *>(dst);
+ const size_t vlen = svcntb();
+ svbool_t less_than_count_fst = svwhilelt_b8_u64(0, count);
+ svbool_t less_than_count_snd = svwhilelt_b8_u64(vlen, count);
+ svuint8_t fst = svld1_u8(less_than_count_fst, &src_ptr[0]);
+ svuint8_t snd = svld1_u8(less_than_count_snd, &src_ptr[vlen]);
+ svst1_u8(less_than_count_fst, &dst_ptr[0], fst);
+ svst1_u8(less_than_count_snd, &dst_ptr[vlen], snd);
+}
+#endif
[[maybe_unused]] LIBC_INLINE void
inline_memcpy_aarch64(Ptr __restrict dst, CPtr __restrict src, size_t count) {
+#if defined(LIBC_TARGET_CPU_HAS_SVE)
+ // SVE register is at least 16 bytes, we can use it to avoid branching in
+ // small cases. Here we use 2 SVE registers to always cover cases where count
+ // <= 32.
+ if (count <= 32)
+ return inline_memcpy_aarch64_sve_32_bytes(dst, src, count);
+#else
if (count == 0)
return;
if (count == 1)
@@ -34,6 +59,7 @@ inline_memcpy_aarch64(Ptr __restrict dst, CPtr __restrict src, size_t count) {
return builtin::Memcpy<8>::head_tail(dst, src, count);
if (count < 32)
return builtin::Memcpy<16>::head_tail(dst, src, count);
+#endif
if (count < 64)
return builtin::Memcpy<32>::head_tail(dst, src, count);
if (count < 128)
``````````
</details>
https://github.com/llvm/llvm-project/pull/167446
More information about the libc-commits
mailing list