[compiler-rt] [compiler-rt][AArch64] Rewrite SME routines to all use __aarch64_cpu_features. (PR #119414)

Sander de Smalen via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 10 09:07:58 PST 2024


https://github.com/sdesmalen-arm updated https://github.com/llvm/llvm-project/pull/119414

>From 450ed8aba32437e971b9e329029703f3d4664669 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Fri, 6 Dec 2024 17:22:33 +0000
Subject: [PATCH] [compiler-rt][AArch64] Rewrite SME routines to all use FMV
 feature bits.

When #92921 added the `__arm_get_current_vg` functionality, it used
the FMV feature bits mechanism rather than the existing mechanism that was
previously added for SME that called `getauxval` (on Linux platforms)
or `__aarch64_sme_accessible` (required for baremetal libraries).

It seems simpler to always use the FMV feature bits mechanism, but
for baremetal targets we still need to rely on `__arm_sme_accessible`.
---
 compiler-rt/lib/builtins/CMakeLists.txt       |  9 +++-
 .../lib/builtins/aarch64/sme-abi-assert.c     | 10 ++++
 .../lib/builtins/aarch64/sme-abi-init.c       | 50 -------------------
 compiler-rt/lib/builtins/aarch64/sme-abi.S    | 29 +++++------
 compiler-rt/lib/builtins/cpu_model/aarch64.c  |  2 +
 .../cpu_model/aarch64/fmv/baremetal_sme.inc   | 31 ++++++++++++
 6 files changed, 62 insertions(+), 69 deletions(-)
 create mode 100644 compiler-rt/lib/builtins/aarch64/sme-abi-assert.c
 delete mode 100644 compiler-rt/lib/builtins/aarch64/sme-abi-init.c
 create mode 100644 compiler-rt/lib/builtins/cpu_model/aarch64/fmv/baremetal_sme.inc

diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt
index 70dc7d860d8f6a..b0266c00d4efbc 100644
--- a/compiler-rt/lib/builtins/CMakeLists.txt
+++ b/compiler-rt/lib/builtins/CMakeLists.txt
@@ -573,10 +573,13 @@ set(aarch64_SOURCES
 )
 
 if (COMPILER_RT_HAS_AARCH64_SME)
-  if (NOT COMPILER_RT_DISABLE_AARCH64_FMV AND COMPILER_RT_HAS_FNO_BUILTIN_FLAG AND (COMPILER_RT_HAS_AUXV OR COMPILER_RT_BAREMETAL_BUILD))
-    list(APPEND aarch64_SOURCES aarch64/sme-abi.S aarch64/sme-libc-mem-routines.S aarch64/sme-abi-init.c aarch64/sme-libc-routines.c)
+  if (NOT COMPILER_RT_DISABLE_AARCH64_FMV AND COMPILER_RT_HAS_FNO_BUILTIN_FLAG)
+    list(APPEND aarch64_SOURCES aarch64/sme-abi.S aarch64/sme-libc-mem-routines.S aarch64/sme-abi-assert.c aarch64/sme-libc-routines.c)
     message(STATUS "AArch64 SME ABI routines enabled")
     set_source_files_properties(aarch64/sme-libc-routines.c PROPERTIES COMPILE_FLAGS "-fno-builtin")
+    if(COMPILER_RT_BAREMETAL_BUILD)
+      set(COMPILER_RT_BAREMETAL_AARCH64_SME TRUE)
+    endif()
   else()
     if(COMPILER_RT_DISABLE_AARCH64_FMV)
       message(WARNING "AArch64 SME ABI routines require function multiversioning support.")
@@ -844,6 +847,8 @@ else ()
     list(APPEND BUILTIN_DEFS DISABLE_AARCH64_FMV)
   endif()
 
+  append_list_if(COMPILER_RT_BAREMETAL_AARCH64_SME -DENABLE_BAREMETAL_AARCH64_SME_FMV_FEATURES BUILTIN_CFLAGS)
+
   append_list_if(COMPILER_RT_HAS_ASM_LSE HAS_ASM_LSE BUILTIN_DEFS)
 
   foreach (arch ${BUILTIN_SUPPORTED_ARCH})
diff --git a/compiler-rt/lib/builtins/aarch64/sme-abi-assert.c b/compiler-rt/lib/builtins/aarch64/sme-abi-assert.c
new file mode 100644
index 00000000000000..4333353f8d2d1b
--- /dev/null
+++ b/compiler-rt/lib/builtins/aarch64/sme-abi-assert.c
@@ -0,0 +1,10 @@
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// We rely on the FMV __aarch64_cpu_features mechanism to determine
+// which features are set at runtime.
+
+#include "../cpu_model/AArch64CPUFeatures.inc"
+_Static_assert(FEAT_SVE == 30, "sme-abi.S assumes FEAT_SVE = 30");
+_Static_assert(FEAT_SME == 42, "sme-abi.S assumes FEAT_SME = 42");
diff --git a/compiler-rt/lib/builtins/aarch64/sme-abi-init.c b/compiler-rt/lib/builtins/aarch64/sme-abi-init.c
deleted file mode 100644
index d3cd8278a5d214..00000000000000
--- a/compiler-rt/lib/builtins/aarch64/sme-abi-init.c
+++ /dev/null
@@ -1,50 +0,0 @@
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-__attribute__((visibility("hidden"), nocommon))
-_Bool __aarch64_has_sme_and_tpidr2_el0;
-
-// We have multiple ways to check that the function has SME, depending on our
-// target.
-// * For Linux/Glibc we can use getauxval().
-// * For Android we can use getauxval().
-// * For newlib we can use __aarch64_sme_accessible().
-
-#if defined(__linux__)
-
-#if defined(__ANDROID__)
-#include <sys/auxv.h>
-#elif __has_include(<sys/auxv.h>)
-#include <sys/auxv.h>
-#else
-#define getauxval(x) 0
-#endif
-#include "../cpu_model/aarch64/hwcap.inc"
-
-static _Bool has_sme(void) { return getauxval(AT_HWCAP2) & HWCAP2_SME; }
-
-#else  // defined(__linux__)
-
-#if defined(COMPILER_RT_SHARED_LIB)
-__attribute__((weak))
-#endif
-extern _Bool __aarch64_sme_accessible(void);
-
-static _Bool has_sme(void)  {
-#if defined(COMPILER_RT_SHARED_LIB)
-  if (!__aarch64_sme_accessible)
-    return 0;
-#endif
-  return __aarch64_sme_accessible();
-}
-
-#endif // defined(__linux__)
-
-#if __GNUC__ >= 9
-#pragma GCC diagnostic ignored "-Wprio-ctor-dtor"
-#endif
-__attribute__((constructor(90)))
-static void init_aarch64_has_sme(void) {
-  __aarch64_has_sme_and_tpidr2_el0 = has_sme();
-}
diff --git a/compiler-rt/lib/builtins/aarch64/sme-abi.S b/compiler-rt/lib/builtins/aarch64/sme-abi.S
index 623a95dd4dae5f..a6bb921bd9e6b9 100644
--- a/compiler-rt/lib/builtins/aarch64/sme-abi.S
+++ b/compiler-rt/lib/builtins/aarch64/sme-abi.S
@@ -9,18 +9,15 @@
 #include "../assembly.h"
 
 .set FEAT_SVE_BIT, 30
+.set FEAT_SME_BIT, 42
 .set SVCR_PSTATE_SM_BIT, 0
 
 #if !defined(__APPLE__)
-#define TPIDR2_SYMBOL SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)
-#define TPIDR2_SYMBOL_OFFSET :lo12:SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)
 #define CPU_FEATS_SYMBOL SYMBOL_NAME(__aarch64_cpu_features)
 #define CPU_FEATS_SYMBOL_OFFSET :lo12:SYMBOL_NAME(__aarch64_cpu_features)
 #else
 // MachO requires @page/@pageoff directives because the global is defined
 // in a different file. Otherwise this file may fail to build.
-#define TPIDR2_SYMBOL SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)@page
-#define TPIDR2_SYMBOL_OFFSET SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)@pageoff
 #define CPU_FEATS_SYMBOL SYMBOL_NAME(__aarch64_cpu_features)@page
 #define CPU_FEATS_SYMBOL_OFFSET SYMBOL_NAME(__aarch64_cpu_features)@pageoff
 #endif
@@ -63,9 +60,9 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_sme_state)
   mov x0, xzr
   mov x1, xzr
 
-  adrp  x16, TPIDR2_SYMBOL
-  ldrb w16, [x16, TPIDR2_SYMBOL_OFFSET]
-  cbz w16, 1f
+  adrp x16, CPU_FEATS_SYMBOL
+  ldr x16, [x16, CPU_FEATS_SYMBOL_OFFSET]
+  tbz x16, #FEAT_SME_BIT, 1f
 0:
   orr x0, x0, #0xC000000000000000
   mrs x16, SVCR
@@ -116,9 +113,9 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_save)
   BTI_C
   // If the current thread does not have access to TPIDR2_EL0, the subroutine
   // does nothing.
-  adrp  x14, TPIDR2_SYMBOL
-  ldrb w14, [x14, TPIDR2_SYMBOL_OFFSET]
-  cbz w14, 1f
+  adrp x14, CPU_FEATS_SYMBOL
+  ldr x14, [x14, CPU_FEATS_SYMBOL_OFFSET]
+  tbz x14, #FEAT_SME_BIT, 1f
 
   // If TPIDR2_EL0 is null, the subroutine does nothing.
   mrs x16, TPIDR2_EL0
@@ -157,9 +154,9 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_za_disable)
   BTI_C
   // If the current thread does not have access to SME, the subroutine does
   // nothing.
-  adrp  x14, TPIDR2_SYMBOL
-  ldrb w14, [x14, TPIDR2_SYMBOL_OFFSET]
-  cbz w14, 0f
+  adrp x14, CPU_FEATS_SYMBOL
+  ldr x14, [x14, CPU_FEATS_SYMBOL_OFFSET]
+  tbz x14, #FEAT_SME_BIT, 0f
 
   // Otherwise, the subroutine behaves as if it did the following:
   // * Call __arm_tpidr2_save.
@@ -191,11 +188,9 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_get_current_vg)
   BTI_C
 
   adrp    x17, CPU_FEATS_SYMBOL
-  ldr     w17, [x17, CPU_FEATS_SYMBOL_OFFSET]
+  ldr     x17, [x17, CPU_FEATS_SYMBOL_OFFSET]
   tbnz    w17, #FEAT_SVE_BIT, 1f
-  adrp    x17, TPIDR2_SYMBOL
-  ldrb    w17, [x17, TPIDR2_SYMBOL_OFFSET]
-  cbz     x17, 2f
+  tbz     x17, #FEAT_SME_BIT, 2f
 0:
   mrs     x17, SVCR
   tbz     x17, #SVCR_PSTATE_SM_BIT, 2f
diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64.c b/compiler-rt/lib/builtins/cpu_model/aarch64.c
index 74e5e01b66c547..ef15518ad5f754 100644
--- a/compiler-rt/lib/builtins/cpu_model/aarch64.c
+++ b/compiler-rt/lib/builtins/cpu_model/aarch64.c
@@ -80,6 +80,8 @@ struct {
 #include "aarch64/fmv/getauxval.inc"
 #elif defined(_WIN32)
 #include "aarch64/fmv/windows.inc"
+#elif defined(ENABLE_BAREMETAL_AARCH64_SME_FMV_FEATURES)
+#include "aarch64/fmv/baremetal_sme.inc"
 #else
 #include "aarch64/fmv/unimplemented.inc"
 #endif
diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/baremetal_sme.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/baremetal_sme.inc
new file mode 100644
index 00000000000000..f188e84808e019
--- /dev/null
+++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/baremetal_sme.inc
@@ -0,0 +1,31 @@
+// For baremetal platforms, we don't really initialise '__aarch64_cpu_features',
+// with exception of FEAT_SME that we can get from '__aarch64_sme_accessible'.
+
+#if defined(COMPILER_RT_SHARED_LIB)
+__attribute__((weak))
+#endif
+extern _Bool
+__aarch64_sme_accessible(void);
+
+static _Bool has_sme(void) {
+#if defined(COMPILER_RT_SHARED_LIB)
+  if (!__aarch64_sme_accessible)
+    return 0;
+#endif
+  return __aarch64_sme_accessible();
+}
+
+void __init_cpu_features_resolver(unsigned long hwcap,
+                                  const __ifunc_arg_t *arg) {}
+
+void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
+  // CPU features already initialized.
+  if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
+    return;
+
+  unsigned long long feat = 0;
+  if (has_sme())
+    feat |= 1ULL << FEAT_SME;
+
+  __atomic_store_n(&__aarch64_cpu_features.features, feat, __ATOMIC_RELAXED);
+}



More information about the llvm-commits mailing list