[compiler-rt] [AArch64][SME] Rewrite __arm_sc_memset to remove invalid instruction (PR #101522)

Kerry McLaughlin via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 1 10:11:24 PDT 2024


https://github.com/kmclaughlin-arm created https://github.com/llvm/llvm-project/pull/101522

The implementation of __arm_sc_memset in compiler-rt contains
a Neon dup instruction which is not valid in streaming mode.
This patch rewrites the function to use spills & fills, or to use
an SVE mov instruction if available.

>From e62090d0ebfaf6bc67236f7232a27fc6cc98f545 Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin at arm.com>
Date: Thu, 1 Aug 2024 12:47:42 +0000
Subject: [PATCH] [AArch64][SME] Rewrite __arm_sc_memset to remove invalid
 instruction

The implementation of __arm_sc_memset in compiler-rt contains a Neon
dup instruction which is not valid in streaming mode.
This patch rewrites the function to use spills & fills, or to use
an SVE mov instruction if available.
---
 .../builtins/aarch64/sme-libc-mem-routines.S  | 24 ++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S b/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S
index 926ad3b1b6331..a3fa59ff19003 100644
--- a/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S
+++ b/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S
@@ -252,7 +252,29 @@ DEFINE_COMPILERRT_FUNCTION_ALIAS(__arm_sc_memmove, __arm_sc_memcpy)
 #define zva_val  x5
 
 DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_sc_memset)
-        dup     v0.16B, valw
+#  ifdef __ARM_FEATURE_SVE
+        mov     z0.b, valw
+#  else
+        sub     sp, sp, #16
+        .cfi_def_cfa_offset 16
+        strb    valw, [sp, #15]
+        strb    valw, [sp, #14]
+        strb    valw, [sp, #13]
+        strb    valw, [sp, #12]
+        strb    valw, [sp, #11]
+        strb    valw, [sp, #10]
+        strb    valw, [sp, #9]
+        strb    valw, [sp, #8]
+        strb    valw, [sp, #7]
+        strb    valw, [sp, #6]
+        strb    valw, [sp, #5]
+        strb    valw, [sp, #4]
+        strb    valw, [sp, #3]
+        strb    valw, [sp, #2]
+        strb    valw, [sp, #1]
+        strb    valw, [sp]
+        ldr     q0, [sp], #16
+#  endif
         add     dstend2, dstin, count
 
         cmp     count, 96



More information about the llvm-commits mailing list