[compiler-rt] [compiler-rt][AArch64] Provide basic implementations of SME memcpy/memmove in case of strictly aligned memory access (PR #138250)
Victor Campos via llvm-commits
llvm-commits at lists.llvm.org
Fri May 2 03:57:07 PDT 2025
https://github.com/vhscampos created https://github.com/llvm/llvm-project/pull/138250
The existing implementations, written in assembly, make use of unaligned accesses for performance reasons. They are not compatible with strict aligned configurations, i.e. with `-mno-unaligned-access`.
If the functions are used in this scenario, an exception is raised due to unaligned memory accesses.
This patch reintroduces vanilla implementations for these functions to be used in strictly aligned configurations. The actual code is largely based on the code from https://github.com/llvm/llvm-project/pull/77496
>From cd24ec57a84ed837e3e22d589de15e3ed3691ddb Mon Sep 17 00:00:00 2001
From: Victor Campos <victor.campos at arm.com>
Date: Thu, 1 May 2025 13:31:36 +0100
Subject: [PATCH] [compiler-rt][AArch64] Provide basic implementations of SME
memcpy/memmove in case of strictly aligned memory access
The existing implementations, written in assembly, make use of unaligned
accesses for performance reasons. They are not compatible with strict
aligned configurations, i.e. with `-mno-unaligned-access`.
If the functions are used in this scenario, an exception is raised due
to unaligned memory accesses.
This patch reintroduces vanilla implementations for these functions to
be used in strictly aligned configurations. The actual code is largely
based on the code from https://github.com/llvm/llvm-project/pull/77496
---
.../builtins/aarch64/sme-libc-mem-routines.S | 6 ++-
.../lib/builtins/aarch64/sme-libc-routines.c | 48 ++++++++++++++++++-
2 files changed, 51 insertions(+), 3 deletions(-)
diff --git a/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S b/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S
index e736829967c0c..cba4a2cbc4fa9 100644
--- a/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S
+++ b/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S
@@ -6,6 +6,8 @@
#include "../assembly.h"
+#ifdef __ARM_FEATURE_UNALIGNED
+
//
// __arm_sc_memcpy / __arm_sc_memmove
//
@@ -346,4 +348,6 @@ DEFINE_COMPILERRT_FUNCTION(__arm_sc_memset)
ret
END_COMPILERRT_FUNCTION(__arm_sc_memset)
-#endif // __aarch64__
+#endif /* defined(__aarch64__) && __ARM_FP != 0 */
+
+#endif /* __ARM_FEATURE_UNALIGNED */
diff --git a/compiler-rt/lib/builtins/aarch64/sme-libc-routines.c b/compiler-rt/lib/builtins/aarch64/sme-libc-routines.c
index 07d6681485556..24b9b3bcb49da 100644
--- a/compiler-rt/lib/builtins/aarch64/sme-libc-routines.c
+++ b/compiler-rt/lib/builtins/aarch64/sme-libc-routines.c
@@ -1,7 +1,8 @@
#include <stddef.h>
-/* The asm version uses FP registers. Use this on targets without them */
-#if __ARM_FP == 0
+// The asm version uses FP registers and unaligned memory accesses. Use this on
+// targets without them.
+#if __ARM_FP == 0 || !defined(__ARM_FEATURE_UNALIGNED)
void *__arm_sc_memset(void *dest, int c, size_t n) __arm_streaming_compatible {
unsigned char *destp = (unsigned char *)dest;
unsigned char c8 = (unsigned char)c;
@@ -22,3 +23,46 @@ const void *__arm_sc_memchr(const void *src, int c,
return NULL;
}
+
+#ifndef __ARM_FEATURE_UNALIGNED
+
+static void *memcpy_fwd(void *dest, const void *src,
+ size_t n) __arm_streaming_compatible {
+ unsigned char *destp = (unsigned char *)dest;
+ const unsigned char *srcp = (const unsigned char *)src;
+
+ for (size_t i = 0; i < n; ++i)
+ destp[i] = srcp[i];
+ return dest;
+}
+
+static void *memcpy_rev(void *dest, const void *src,
+ size_t n) __arm_streaming_compatible {
+ unsigned char *destp = (unsigned char *)dest;
+ const unsigned char *srcp = (const unsigned char *)src;
+
+ while (n > 0) {
+ --n;
+ destp[n] = srcp[n];
+ }
+ return dest;
+}
+
+void *__arm_sc_memcpy(void *__restrict dest, const void *__restrict src,
+ size_t n) __arm_streaming_compatible {
+ return memcpy_fwd(dest, src, n);
+}
+
+void *__arm_sc_memmove(void *dest, const void *src,
+ size_t n) __arm_streaming_compatible {
+ unsigned char *destp = (unsigned char *)dest;
+ const unsigned char *srcp = (const unsigned char *)src;
+
+ if ((srcp > (destp + n)) || (destp > (srcp + n)))
+ return __arm_sc_memcpy(dest, src, n);
+ if (srcp > destp)
+ return memcpy_fwd(dest, src, n);
+ return memcpy_rev(dest, src, n);
+}
+
+#endif /* !defined(__ARM_FEATURE_UNALIGNED) */
More information about the llvm-commits
mailing list