[compiler-rt] [AArch64][SME] Rewrite __arm_sc_memset to remove invalid instruction (PR #101522)
Kerry McLaughlin via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 2 03:28:01 PDT 2024
https://github.com/kmclaughlin-arm updated https://github.com/llvm/llvm-project/pull/101522
>From e62090d0ebfaf6bc67236f7232a27fc6cc98f545 Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin at arm.com>
Date: Thu, 1 Aug 2024 12:47:42 +0000
Subject: [PATCH 1/2] [AArch64][SME] Rewrite __arm_sc_memset to remove invalid
instruction
The implementation of __arm_sc_memset in compiler-rt contains a Neon
dup instruction which is not valid in streaming mode.
This patch rewrites the function to use spills & fills, or to use
an SVE mov instruction if available.
---
.../builtins/aarch64/sme-libc-mem-routines.S | 24 ++++++++++++++++++-
1 file changed, 23 insertions(+), 1 deletion(-)
diff --git a/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S b/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S
index 926ad3b1b6331..a3fa59ff19003 100644
--- a/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S
+++ b/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S
@@ -252,7 +252,29 @@ DEFINE_COMPILERRT_FUNCTION_ALIAS(__arm_sc_memmove, __arm_sc_memcpy)
#define zva_val x5
DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_sc_memset)
- dup v0.16B, valw
+# ifdef __ARM_FEATURE_SVE
+ mov z0.b, valw
+# else
+ sub sp, sp, #16
+ .cfi_def_cfa_offset 16
+ strb valw, [sp, #15]
+ strb valw, [sp, #14]
+ strb valw, [sp, #13]
+ strb valw, [sp, #12]
+ strb valw, [sp, #11]
+ strb valw, [sp, #10]
+ strb valw, [sp, #9]
+ strb valw, [sp, #8]
+ strb valw, [sp, #7]
+ strb valw, [sp, #6]
+ strb valw, [sp, #5]
+ strb valw, [sp, #4]
+ strb valw, [sp, #3]
+ strb valw, [sp, #2]
+ strb valw, [sp, #1]
+ strb valw, [sp]
+ ldr q0, [sp], #16
+# endif
add dstend2, dstin, count
cmp count, 96
>From 21566dcc792233cff8cd57a8abbaaa7d8ebc243d Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin at arm.com>
Date: Fri, 2 Aug 2024 10:04:16 +0000
Subject: [PATCH 2/2] - Rewrite copy of valw when SVE is not available
---
.../builtins/aarch64/sme-libc-mem-routines.S | 24 ++++---------------
1 file changed, 5 insertions(+), 19 deletions(-)
diff --git a/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S b/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S
index a3fa59ff19003..b623be043d7a3 100644
--- a/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S
+++ b/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S
@@ -255,25 +255,11 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_sc_memset)
# ifdef __ARM_FEATURE_SVE
mov z0.b, valw
# else
- sub sp, sp, #16
- .cfi_def_cfa_offset 16
- strb valw, [sp, #15]
- strb valw, [sp, #14]
- strb valw, [sp, #13]
- strb valw, [sp, #12]
- strb valw, [sp, #11]
- strb valw, [sp, #10]
- strb valw, [sp, #9]
- strb valw, [sp, #8]
- strb valw, [sp, #7]
- strb valw, [sp, #6]
- strb valw, [sp, #5]
- strb valw, [sp, #4]
- strb valw, [sp, #3]
- strb valw, [sp, #2]
- strb valw, [sp, #1]
- strb valw, [sp]
- ldr q0, [sp], #16
+ bfi valw, valw, #8, #8
+ bfi valw, valw, #16, #16
+ bfi val, val, #32, #32
+ fmov d0, val
+ fmov v0.d[1], val
# endif
add dstend2, dstin, count
More information about the llvm-commits
mailing list