[compiler-rt] [AArch64][SME] Rewrite __arm_get_current_vg to preserve required registers (PR #100143)

Kerry McLaughlin via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 24 02:57:39 PDT 2024


https://github.com/kmclaughlin-arm updated https://github.com/llvm/llvm-project/pull/100143

>From a648df5f13527380c442d866eb303ecb42d37133 Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin at arm.com>
Date: Mon, 22 Jul 2024 16:28:25 +0000
Subject: [PATCH 1/3] [AArch64][SME] Rewrite __arm_get_current_vg to preserve
 required registers

The documentation for the __arm_get_current_vg support routine specifies
that the following registers are call-preserved:
 - X1-X15, X19-X29 and SP
 - Z0-Z31
 - P0-P15

This patch rewrites the implementation of this routine in compiler-rt, as
the current version does not guarantee that these registers will be preserved.

See https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst#arm-get-current-vg
---
 compiler-rt/lib/builtins/aarch64/sme-abi-vg.c | 19 --------
 compiler-rt/lib/builtins/aarch64/sme-abi.S    | 46 +++++++++++++++++++
 2 files changed, 46 insertions(+), 19 deletions(-)

diff --git a/compiler-rt/lib/builtins/aarch64/sme-abi-vg.c b/compiler-rt/lib/builtins/aarch64/sme-abi-vg.c
index 062cf80fc6848..7b10d5fc826cf 100644
--- a/compiler-rt/lib/builtins/aarch64/sme-abi-vg.c
+++ b/compiler-rt/lib/builtins/aarch64/sme-abi-vg.c
@@ -28,22 +28,3 @@ __attribute__((constructor(90))) static void get_aarch64_cpu_features(void) {
 
   __init_cpu_features();
 }
-
-__attribute__((target("sve"))) long
-__arm_get_current_vg(void) __arm_streaming_compatible {
-  struct SME_STATE State = __arm_sme_state();
-  unsigned long long features =
-      __atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED);
-  bool HasSVE = features & (1ULL << FEAT_SVE);
-
-  if (!HasSVE && !__aarch64_has_sme_and_tpidr2_el0)
-    return 0;
-
-  if (HasSVE || (State.PSTATE & 1)) {
-    long vl;
-    __asm__ __volatile__("cntd %0" : "=r"(vl));
-    return vl;
-  }
-
-  return 0;
-}
diff --git a/compiler-rt/lib/builtins/aarch64/sme-abi.S b/compiler-rt/lib/builtins/aarch64/sme-abi.S
index 4c0ff66931db7..48232cf81f778 100644
--- a/compiler-rt/lib/builtins/aarch64/sme-abi.S
+++ b/compiler-rt/lib/builtins/aarch64/sme-abi.S
@@ -12,11 +12,15 @@
 #if !defined(__APPLE__)
 #define TPIDR2_SYMBOL SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)
 #define TPIDR2_SYMBOL_OFFSET :lo12:SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)
+#define CPU_FEATS_SYMBOL :got:SYMBOL_NAME(__aarch64_cpu_features)
+#define CPU_FEATS_SYMBOL_OFFSET :got_lo12:SYMBOL_NAME(__aarch64_cpu_features)
 #else
 // MachO requires @page/@pageoff directives because the global is defined
 // in a different file. Otherwise this file may fail to build.
 #define TPIDR2_SYMBOL SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)@page
 #define TPIDR2_SYMBOL_OFFSET SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)@pageoff
+#define CPU_FEATS_SYMBOL :got:SYMBOL_NAME(__aarch64_cpu_features)@page
+#define CPU_FEATS_SYMBOL_OFFSET :got_lo12:SYMBOL_NAME(__aarch64_cpu_features)@pageoff
 #endif
 
 .arch armv9-a+sme
@@ -180,6 +184,48 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_za_disable)
   ret
 END_COMPILERRT_OUTLINE_FUNCTION(__arm_za_disable)
 
+DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_get_current_vg)
+  .variant_pcs __arm_get_current_vg
+  BTI_C
+  mov x0, xzr
+
+  stp     x29, x30, [sp, #-16]!
+  .cfi_def_cfa_offset 16
+  mov     x29, sp
+  .cfi_def_cfa w29, 16
+  .cfi_offset w30, -8
+  .cfi_offset w29, -16
+  mov     x17, x1
+  bl      __arm_sme_state
+  mov     x1, x17
+  adrp    x16, CPU_FEATS_SYMBOL
+  ldr     x16, [x16, CPU_FEATS_SYMBOL_OFFSET]
+  ldr     x16, [x16]
+  tbnz    w16, #30, 0f
+  adrp    x17, TPIDR2_SYMBOL
+  ldrb    w17, [x17, TPIDR2_SYMBOL_OFFSET]
+  cbz     w17, 1f
+0:
+  and     x16, x16, #0x40000000
+  bfxil   x16, x0, #0, #1
+  cbz     x16, 1f
+  cntd    x0
+  .cfi_def_cfa wsp, 16
+  ldp     x29, x30, [sp], #16
+  .cfi_def_cfa_offset 0
+  .cfi_restore w30
+  .cfi_restore w29
+  ret
+1:
+  mov     x0, xzr
+  .cfi_def_cfa wsp, 16
+  ldp     x29, x30, [sp], #16
+  .cfi_def_cfa_offset 0
+  .cfi_restore w30
+  .cfi_restore w29
+  ret
+END_COMPILERRT_OUTLINE_FUNCTION(__arm_get_current_vg)
+
 NO_EXEC_STACK_DIRECTIVE
 
 // GNU property note for BTI and PAC

>From 9240fd2ddc3c262bf50f82c6875a29ddb82106cf Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin at arm.com>
Date: Tue, 23 Jul 2024 15:58:32 +0000
Subject: [PATCH 2/3] - Removed extra mov at beginning of __arm_get_current_vg
 & moved   __arm_sme_state call to the '0' branch. - Removed SME_STATE struct
 from sme-abi-vg.c

---
 compiler-rt/lib/builtins/aarch64/sme-abi-vg.c |  9 -------
 compiler-rt/lib/builtins/aarch64/sme-abi.S    | 27 +++++++++----------
 2 files changed, 13 insertions(+), 23 deletions(-)

diff --git a/compiler-rt/lib/builtins/aarch64/sme-abi-vg.c b/compiler-rt/lib/builtins/aarch64/sme-abi-vg.c
index 7b10d5fc826cf..20061012e16c6 100644
--- a/compiler-rt/lib/builtins/aarch64/sme-abi-vg.c
+++ b/compiler-rt/lib/builtins/aarch64/sme-abi-vg.c
@@ -10,15 +10,6 @@ struct FEATURES {
 
 extern struct FEATURES __aarch64_cpu_features;
 
-struct SME_STATE {
-  long PSTATE;
-  long TPIDR2_EL0;
-};
-
-extern struct SME_STATE __arm_sme_state(void) __arm_streaming_compatible;
-
-extern bool __aarch64_has_sme_and_tpidr2_el0;
-
 #if __GNUC__ >= 9
 #pragma GCC diagnostic ignored "-Wprio-ctor-dtor"
 #endif
diff --git a/compiler-rt/lib/builtins/aarch64/sme-abi.S b/compiler-rt/lib/builtins/aarch64/sme-abi.S
index 48232cf81f778..f4eb1040f4738 100644
--- a/compiler-rt/lib/builtins/aarch64/sme-abi.S
+++ b/compiler-rt/lib/builtins/aarch64/sme-abi.S
@@ -187,7 +187,6 @@ END_COMPILERRT_OUTLINE_FUNCTION(__arm_za_disable)
 DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_get_current_vg)
   .variant_pcs __arm_get_current_vg
   BTI_C
-  mov x0, xzr
 
   stp     x29, x30, [sp, #-16]!
   .cfi_def_cfa_offset 16
@@ -195,20 +194,20 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_get_current_vg)
   .cfi_def_cfa w29, 16
   .cfi_offset w30, -8
   .cfi_offset w29, -16
-  mov     x17, x1
-  bl      __arm_sme_state
-  mov     x1, x17
-  adrp    x16, CPU_FEATS_SYMBOL
-  ldr     x16, [x16, CPU_FEATS_SYMBOL_OFFSET]
-  ldr     x16, [x16]
-  tbnz    w16, #30, 0f
-  adrp    x17, TPIDR2_SYMBOL
-  ldrb    w17, [x17, TPIDR2_SYMBOL_OFFSET]
-  cbz     w17, 1f
+  adrp    x17, CPU_FEATS_SYMBOL
+  ldr     x17, [x17, CPU_FEATS_SYMBOL_OFFSET]
+  ldr     x17, [x17]
+  tbnz    w17, #30, 0f
+  adrp    x16, TPIDR2_SYMBOL
+  ldrb    w16, [x16, TPIDR2_SYMBOL_OFFSET]
+  cbz     w16, 1f
 0:
-  and     x16, x16, #0x40000000
-  bfxil   x16, x0, #0, #1
-  cbz     x16, 1f
+  mov     x18, x1
+  bl      __arm_sme_state
+  mov     x1, x18
+  and     x17, x17, #0x40000000
+  bfxil   x17, x0, #0, #1
+  cbz     x17, 1f
   cntd    x0
   .cfi_def_cfa wsp, 16
   ldp     x29, x30, [sp], #16

>From aaf13bc71249db9b6054af510c83ee0f019f8bc3 Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin at arm.com>
Date: Wed, 24 Jul 2024 09:47:25 +0000
Subject: [PATCH 3/3] - Removed relocation specifiers from CPU_FEATS_SYMBOL to
 be consistent with TPIDR2_SYMBOL

---
 compiler-rt/lib/builtins/aarch64/sme-abi.S | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/compiler-rt/lib/builtins/aarch64/sme-abi.S b/compiler-rt/lib/builtins/aarch64/sme-abi.S
index f4eb1040f4738..cd8153f60670f 100644
--- a/compiler-rt/lib/builtins/aarch64/sme-abi.S
+++ b/compiler-rt/lib/builtins/aarch64/sme-abi.S
@@ -12,15 +12,15 @@
 #if !defined(__APPLE__)
 #define TPIDR2_SYMBOL SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)
 #define TPIDR2_SYMBOL_OFFSET :lo12:SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)
-#define CPU_FEATS_SYMBOL :got:SYMBOL_NAME(__aarch64_cpu_features)
-#define CPU_FEATS_SYMBOL_OFFSET :got_lo12:SYMBOL_NAME(__aarch64_cpu_features)
+#define CPU_FEATS_SYMBOL SYMBOL_NAME(__aarch64_cpu_features)
+#define CPU_FEATS_SYMBOL_OFFSET :lo12:SYMBOL_NAME(__aarch64_cpu_features)
 #else
 // MachO requires @page/@pageoff directives because the global is defined
 // in a different file. Otherwise this file may fail to build.
 #define TPIDR2_SYMBOL SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)@page
 #define TPIDR2_SYMBOL_OFFSET SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)@pageoff
-#define CPU_FEATS_SYMBOL :got:SYMBOL_NAME(__aarch64_cpu_features)@page
-#define CPU_FEATS_SYMBOL_OFFSET :got_lo12:SYMBOL_NAME(__aarch64_cpu_features)@pageoff
+#define CPU_FEATS_SYMBOL SYMBOL_NAME(__aarch64_cpu_features)@page
+#define CPU_FEATS_SYMBOL_OFFSET SYMBOL_NAME(__aarch64_cpu_features)@pageoff
 #endif
 
 .arch armv9-a+sme
@@ -195,8 +195,7 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_get_current_vg)
   .cfi_offset w30, -8
   .cfi_offset w29, -16
   adrp    x17, CPU_FEATS_SYMBOL
-  ldr     x17, [x17, CPU_FEATS_SYMBOL_OFFSET]
-  ldr     x17, [x17]
+  ldr     w17, [x17, CPU_FEATS_SYMBOL_OFFSET]
   tbnz    w17, #30, 0f
   adrp    x16, TPIDR2_SYMBOL
   ldrb    w16, [x16, TPIDR2_SYMBOL_OFFSET]



More information about the llvm-commits mailing list