[llvm-branch-commits] [compiler-rt] release/19.x: [AArch64][SME] Rewrite __arm_get_current_vg to preserve required registers (#100143) (PR #100546)
Tobias Hieta via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Jul 26 00:28:38 PDT 2024
https://github.com/tru updated https://github.com/llvm/llvm-project/pull/100546
>From 3e4abe985a584f3067448b5169c05509d3b571d2 Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin at arm.com>
Date: Wed, 24 Jul 2024 14:30:25 +0100
Subject: [PATCH] [AArch64][SME] Rewrite __arm_get_current_vg to preserve
required registers (#100143)
The documentation for the __arm_get_current_vg support routine specifies
that the following registers are call-preserved:
- X1-X15, X19-X29 and SP
- Z0-Z31
- P0-P15
This patch rewrites the implementation of this routine in compiler-rt,
as the current version does not guarantee that these registers will be
preserved.
(cherry picked from commit 6da6772bf0a33131aa8540c9d4f60d5db75c32b5)
---
compiler-rt/lib/builtins/aarch64/sme-abi-vg.c | 28 ------------
compiler-rt/lib/builtins/aarch64/sme-abi.S | 44 +++++++++++++++++++
2 files changed, 44 insertions(+), 28 deletions(-)
diff --git a/compiler-rt/lib/builtins/aarch64/sme-abi-vg.c b/compiler-rt/lib/builtins/aarch64/sme-abi-vg.c
index 062cf80fc6848..20061012e16c6 100644
--- a/compiler-rt/lib/builtins/aarch64/sme-abi-vg.c
+++ b/compiler-rt/lib/builtins/aarch64/sme-abi-vg.c
@@ -10,15 +10,6 @@ struct FEATURES {
extern struct FEATURES __aarch64_cpu_features;
-struct SME_STATE {
- long PSTATE;
- long TPIDR2_EL0;
-};
-
-extern struct SME_STATE __arm_sme_state(void) __arm_streaming_compatible;
-
-extern bool __aarch64_has_sme_and_tpidr2_el0;
-
#if __GNUC__ >= 9
#pragma GCC diagnostic ignored "-Wprio-ctor-dtor"
#endif
@@ -28,22 +19,3 @@ __attribute__((constructor(90))) static void get_aarch64_cpu_features(void) {
__init_cpu_features();
}
-
-__attribute__((target("sve"))) long
-__arm_get_current_vg(void) __arm_streaming_compatible {
- struct SME_STATE State = __arm_sme_state();
- unsigned long long features =
- __atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED);
- bool HasSVE = features & (1ULL << FEAT_SVE);
-
- if (!HasSVE && !__aarch64_has_sme_and_tpidr2_el0)
- return 0;
-
- if (HasSVE || (State.PSTATE & 1)) {
- long vl;
- __asm__ __volatile__("cntd %0" : "=r"(vl));
- return vl;
- }
-
- return 0;
-}
diff --git a/compiler-rt/lib/builtins/aarch64/sme-abi.S b/compiler-rt/lib/builtins/aarch64/sme-abi.S
index 4c0ff66931db7..cd8153f60670f 100644
--- a/compiler-rt/lib/builtins/aarch64/sme-abi.S
+++ b/compiler-rt/lib/builtins/aarch64/sme-abi.S
@@ -12,11 +12,15 @@
#if !defined(__APPLE__)
#define TPIDR2_SYMBOL SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)
#define TPIDR2_SYMBOL_OFFSET :lo12:SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)
+#define CPU_FEATS_SYMBOL SYMBOL_NAME(__aarch64_cpu_features)
+#define CPU_FEATS_SYMBOL_OFFSET :lo12:SYMBOL_NAME(__aarch64_cpu_features)
#else
// MachO requires @page/@pageoff directives because the global is defined
// in a different file. Otherwise this file may fail to build.
#define TPIDR2_SYMBOL SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)@page
#define TPIDR2_SYMBOL_OFFSET SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)@pageoff
+#define CPU_FEATS_SYMBOL SYMBOL_NAME(__aarch64_cpu_features)@page
+#define CPU_FEATS_SYMBOL_OFFSET SYMBOL_NAME(__aarch64_cpu_features)@pageoff
#endif
.arch armv9-a+sme
@@ -180,6 +184,46 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_za_disable)
ret
END_COMPILERRT_OUTLINE_FUNCTION(__arm_za_disable)
+DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_get_current_vg)
+ .variant_pcs __arm_get_current_vg
+ BTI_C
+
+ stp x29, x30, [sp, #-16]!
+ .cfi_def_cfa_offset 16
+ mov x29, sp
+ .cfi_def_cfa w29, 16
+ .cfi_offset w30, -8
+ .cfi_offset w29, -16
+ adrp x17, CPU_FEATS_SYMBOL
+ ldr w17, [x17, CPU_FEATS_SYMBOL_OFFSET]
+ tbnz w17, #30, 0f
+ adrp x16, TPIDR2_SYMBOL
+ ldrb w16, [x16, TPIDR2_SYMBOL_OFFSET]
+ cbz w16, 1f
+0:
+ mov x18, x1
+ bl __arm_sme_state
+ mov x1, x18
+ and x17, x17, #0x40000000
+ bfxil x17, x0, #0, #1
+ cbz x17, 1f
+ cntd x0
+ .cfi_def_cfa wsp, 16
+ ldp x29, x30, [sp], #16
+ .cfi_def_cfa_offset 0
+ .cfi_restore w30
+ .cfi_restore w29
+ ret
+1:
+ mov x0, xzr
+ .cfi_def_cfa wsp, 16
+ ldp x29, x30, [sp], #16
+ .cfi_def_cfa_offset 0
+ .cfi_restore w30
+ .cfi_restore w29
+ ret
+END_COMPILERRT_OUTLINE_FUNCTION(__arm_get_current_vg)
+
NO_EXEC_STACK_DIRECTIVE
// GNU property note for BTI and PAC
More information about the llvm-branch-commits
mailing list