[Mlir-commits] [compiler-rt] [llvm] [mlir] [compiler-rt] Don't provide `__arm_sme_state` for baremetal targets (PR #191434)
Benjamin Maxwell
llvmlistbot at llvm.org
Thu Apr 16 10:43:38 PDT 2026
https://github.com/MacDue updated https://github.com/llvm/llvm-project/pull/191434
>From 41d131e4388edabfd0fc773281669dad06988b70 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Fri, 10 Apr 2026 14:23:05 +0000
Subject: [PATCH 1/3] [compiler-rt] Don't provide `__arm_sme_state` for
baremetal targets
Previosuly, we required baremetal runtimes to implement an undocumented
`__aarch64_sme_accessible` hook to check if SME is available (as
checking CPU features may vary across targets).
This allowed us to provide a generic `__arm_sme_state` implementation
but caused some friction toolchains that depend on compiler-rt.
This patch instead removes the implementation of `__arm_sme_state` for
baremetal. This makes it the responsibility of the runtime (e.g. libc)
to provide this function for baremetal targets.
The requirements of this function are documented in the AAPCS64:
https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst#811__arm_sme_state
All other SME ABI rountines are still provided by compiler-rt.
---
compiler-rt/lib/builtins/CMakeLists.txt | 2 +-
compiler-rt/lib/builtins/aarch64/sme-abi.S | 4 ++
compiler-rt/lib/builtins/cpu_model/aarch64.c | 6 +--
.../cpu_model/aarch64/fmv/baremetal.inc | 38 ++++++++++---------
4 files changed, 29 insertions(+), 21 deletions(-)
diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt
index 503a9aa3ff4ec..6444119abd340 100644
--- a/compiler-rt/lib/builtins/CMakeLists.txt
+++ b/compiler-rt/lib/builtins/CMakeLists.txt
@@ -992,7 +992,7 @@ else ()
elseif(COMPILER_RT_BAREMETAL_BUILD)
foreach (arch ${BUILTIN_SUPPORTED_ARCH})
if("${arch}" MATCHES "arm64|aarch64")
- list(APPEND BUILTIN_DEFS ENABLE_BAREMETAL_AARCH64_FMV)
+ list(APPEND BUILTIN_DEFS TARGET_BAREMETAL_AARCH64)
endif()
endforeach ()
endif()
diff --git a/compiler-rt/lib/builtins/aarch64/sme-abi.S b/compiler-rt/lib/builtins/aarch64/sme-abi.S
index 465f1c763c0d3..45a31cd21d0ab 100644
--- a/compiler-rt/lib/builtins/aarch64/sme-abi.S
+++ b/compiler-rt/lib/builtins/aarch64/sme-abi.S
@@ -53,6 +53,9 @@ DEFINE_COMPILERRT_PRIVATE_FUNCTION(do_abort)
.cfi_endproc
END_COMPILERRT_FUNCTION(do_abort)
+// Note: It is the responsibility of the baremetal runtime/libc to implement
+// __arm_sme_state.
+#if !defined(TARGET_BAREMETAL_AARCH64)
// __arm_sme_state fills the result registers based on a local
// that is set as part of the compiler-rt startup code.
// __aarch64_has_sme_and_tpidr2_el0
@@ -73,6 +76,7 @@ DEFINE_COMPILERRT_FUNCTION(__arm_sme_state)
1:
ret
END_COMPILERRT_FUNCTION(__arm_sme_state)
+#endif // !defined(TARGET_BAREMETAL_AARCH64)
DEFINE_COMPILERRT_FUNCTION(__arm_tpidr2_restore)
.variant_pcs __arm_tpidr2_restore
diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64.c b/compiler-rt/lib/builtins/cpu_model/aarch64.c
index 119d18b8df8d7..347bb4e121c97 100644
--- a/compiler-rt/lib/builtins/cpu_model/aarch64.c
+++ b/compiler-rt/lib/builtins/cpu_model/aarch64.c
@@ -69,7 +69,9 @@ struct {
// The formatter wants to re-order these includes, but doing so is incorrect:
// clang-format off
-#if defined(__APPLE__)
+#if defined(TARGET_BAREMETAL_AARCH64)
+#include "aarch64/fmv/baremetal.inc"
+#elif defined(__APPLE__)
#include "aarch64/fmv/apple.inc"
#elif defined(__FreeBSD__) || defined(__OpenBSD__)
#include "aarch64/fmv/hwcap.inc"
@@ -84,8 +86,6 @@ struct {
#include "aarch64/fmv/getauxval.inc"
#elif defined(_WIN32)
#include "aarch64/fmv/windows.inc"
-#elif defined(ENABLE_BAREMETAL_AARCH64_FMV)
-#include "aarch64/fmv/baremetal.inc"
#else
#include "aarch64/fmv/unimplemented.inc"
#endif
diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/baremetal.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/baremetal.inc
index f188e84808e01..e6229300bce91 100644
--- a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/baremetal.inc
+++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/baremetal.inc
@@ -1,19 +1,21 @@
// For baremetal platforms, we don't really initialise '__aarch64_cpu_features',
-// with exception of FEAT_SME that we can get from '__aarch64_sme_accessible'.
-
-#if defined(COMPILER_RT_SHARED_LIB)
-__attribute__((weak))
-#endif
-extern _Bool
-__aarch64_sme_accessible(void);
-
-static _Bool has_sme(void) {
-#if defined(COMPILER_RT_SHARED_LIB)
- if (!__aarch64_sme_accessible)
- return 0;
-#endif
- return __aarch64_sme_accessible();
-}
+// with exception of FEAT_SME that we can get from '__arm_sme_state'.
+
+#define SME_STATE_X0_HAS_SME (1ULL << 63)
+
+typedef unsigned long long u64;
+_Static_assert(sizeof(u64) == 8, "u64 should be 64-bits");
+
+// The libc/runtime for baremetal targets should provide an implementation of
+// __arm_sme_state. LLVM's compiler-rt does not provide this function for
+// baremetal as checking CPU features is implementation defined.
+//
+// See:
+// https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst#811__arm_sme_state
+extern struct {
+ u64 x0;
+ u64 x1;
+} __arm_sme_state(void);
void __init_cpu_features_resolver(unsigned long hwcap,
const __ifunc_arg_t *arg) {}
@@ -23,8 +25,10 @@ void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
return;
- unsigned long long feat = 0;
- if (has_sme())
+ _Bool has_sme = (__arm_sme_state().x0 & SME_STATE_X0_HAS_SME) != 0;
+
+ u64 feat = 0;
+ if (has_sme)
feat |= 1ULL << FEAT_SME;
__atomic_store_n(&__aarch64_cpu_features.features, feat, __ATOMIC_RELAXED);
>From be64dd062847035bdd33d9025a440a7f7252909f Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Fri, 10 Apr 2026 16:03:22 +0000
Subject: [PATCH 2/3] Fixups
---
.../cpu_model/aarch64/fmv/baremetal.inc | 23 ++++++++++---------
1 file changed, 12 insertions(+), 11 deletions(-)
diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/baremetal.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/baremetal.inc
index e6229300bce91..e95ac9fac8709 100644
--- a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/baremetal.inc
+++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/baremetal.inc
@@ -1,10 +1,4 @@
-// For baremetal platforms, we don't really initialise '__aarch64_cpu_features',
-// with exception of FEAT_SME that we can get from '__arm_sme_state'.
-
-#define SME_STATE_X0_HAS_SME (1ULL << 63)
-
-typedef unsigned long long u64;
-_Static_assert(sizeof(u64) == 8, "u64 should be 64-bits");
+#include <stdint.h>
// The libc/runtime for baremetal targets should provide an implementation of
// __arm_sme_state. LLVM's compiler-rt does not provide this function for
@@ -13,10 +7,14 @@ _Static_assert(sizeof(u64) == 8, "u64 should be 64-bits");
// See:
// https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst#811__arm_sme_state
extern struct {
- u64 x0;
- u64 x1;
+ uint64_t x0;
+ uint64_t x1;
} __arm_sme_state(void);
+// __arm_sme_state:
+// Bit 63 of X0 is set to one iff the current thread has access to SME.
+#define SME_STATE_HAS_SME_BIT_X0 (1ULL << 63)
+
void __init_cpu_features_resolver(unsigned long hwcap,
const __ifunc_arg_t *arg) {}
@@ -25,9 +23,12 @@ void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
return;
- _Bool has_sme = (__arm_sme_state().x0 & SME_STATE_X0_HAS_SME) != 0;
+ // For baremetal platforms, we don't really initialise
+ // '__aarch64_cpu_features', with exception of FEAT_SME that we can get from
+ // '__arm_sme_state'.
+ _Bool has_sme = (__arm_sme_state().x0 & SME_STATE_HAS_SME_BIT_X0) != 0;
- u64 feat = 0;
+ uint64_t feat = 0;
if (has_sme)
feat |= 1ULL << FEAT_SME;
>From 09ded1e7e3c63bbf4bee1ca3eadf7f9692357f0a Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Mon, 13 Apr 2026 13:31:26 +0000
Subject: [PATCH 3/3] Fixups
---
compiler-rt/lib/builtins/aarch64/sme-abi.S | 1 -
llvm/test/CodeGen/AArch64/aarch64-sme-stubs.ll | 9 ---------
mlir/lib/ExecutionEngine/ArmSMEStubs.cpp | 5 -----
3 files changed, 15 deletions(-)
diff --git a/compiler-rt/lib/builtins/aarch64/sme-abi.S b/compiler-rt/lib/builtins/aarch64/sme-abi.S
index 45a31cd21d0ab..9a9187622b1ae 100644
--- a/compiler-rt/lib/builtins/aarch64/sme-abi.S
+++ b/compiler-rt/lib/builtins/aarch64/sme-abi.S
@@ -58,7 +58,6 @@ END_COMPILERRT_FUNCTION(do_abort)
#if !defined(TARGET_BAREMETAL_AARCH64)
// __arm_sme_state fills the result registers based on a local
// that is set as part of the compiler-rt startup code.
-// __aarch64_has_sme_and_tpidr2_el0
DEFINE_COMPILERRT_FUNCTION(__arm_sme_state)
.variant_pcs __arm_sme_state
BTI_C
diff --git a/llvm/test/CodeGen/AArch64/aarch64-sme-stubs.ll b/llvm/test/CodeGen/AArch64/aarch64-sme-stubs.ll
index f7182e2a166a5..9369cb99d88ab 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-sme-stubs.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-sme-stubs.ll
@@ -3,15 +3,6 @@
; Checks SME ABI routines can be implemented as stubs without +sme.
-define i1 @__aarch64_sme_accessible() {
-; CHECK-LABEL: __aarch64_sme_accessible:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov w0, #1 // =0x1
-; CHECK-NEXT: ret
-entry:
- ret i1 true
-}
-
define [2 x i64] @__arm_sme_state() {
; CHECK-LABEL: __arm_sme_state:
; CHECK: // %bb.0: // %entry
diff --git a/mlir/lib/ExecutionEngine/ArmSMEStubs.cpp b/mlir/lib/ExecutionEngine/ArmSMEStubs.cpp
index e01384ba91a0d..8abd2073494c3 100644
--- a/mlir/lib/ExecutionEngine/ArmSMEStubs.cpp
+++ b/mlir/lib/ExecutionEngine/ArmSMEStubs.cpp
@@ -34,11 +34,6 @@
extern "C" {
-bool MLIR_ARMSMEABISTUBS_EXPORTED __aarch64_sme_accessible() {
- // The ArmSME tests are run within an emulator so we assume SME is available.
- return true;
-}
-
struct sme_state {
int64_t x0;
int64_t x1;
More information about the Mlir-commits
mailing list