[Mlir-commits] [compiler-rt] [llvm] [mlir] [compiler-rt] Don't provide `__arm_sme_state` for baremetal targets (PR #191434)

Benjamin Maxwell llvmlistbot at llvm.org
Thu Apr 16 10:43:38 PDT 2026


https://github.com/MacDue updated https://github.com/llvm/llvm-project/pull/191434

>From 41d131e4388edabfd0fc773281669dad06988b70 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Fri, 10 Apr 2026 14:23:05 +0000
Subject: [PATCH 1/3] [compiler-rt] Don't provide `__arm_sme_state` for
 baremetal targets

Previosuly, we required baremetal runtimes to implement an undocumented
`__aarch64_sme_accessible` hook to check if SME is available (as
checking CPU features may vary across targets).

This allowed us to provide a generic `__arm_sme_state` implementation
but caused some friction toolchains that depend on compiler-rt.

This patch instead removes the implementation of `__arm_sme_state` for
baremetal. This makes it the responsibility of the runtime (e.g. libc)
to provide this function for baremetal targets.

The requirements of this function are documented in the AAPCS64:
https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst#811__arm_sme_state

All other SME ABI rountines are still provided by compiler-rt.
---
 compiler-rt/lib/builtins/CMakeLists.txt       |  2 +-
 compiler-rt/lib/builtins/aarch64/sme-abi.S    |  4 ++
 compiler-rt/lib/builtins/cpu_model/aarch64.c  |  6 +--
 .../cpu_model/aarch64/fmv/baremetal.inc       | 38 ++++++++++---------
 4 files changed, 29 insertions(+), 21 deletions(-)

diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt
index 503a9aa3ff4ec..6444119abd340 100644
--- a/compiler-rt/lib/builtins/CMakeLists.txt
+++ b/compiler-rt/lib/builtins/CMakeLists.txt
@@ -992,7 +992,7 @@ else ()
   elseif(COMPILER_RT_BAREMETAL_BUILD)
     foreach (arch ${BUILTIN_SUPPORTED_ARCH})
       if("${arch}" MATCHES "arm64|aarch64")
-        list(APPEND BUILTIN_DEFS ENABLE_BAREMETAL_AARCH64_FMV)
+        list(APPEND BUILTIN_DEFS TARGET_BAREMETAL_AARCH64)
       endif()
     endforeach ()
   endif()
diff --git a/compiler-rt/lib/builtins/aarch64/sme-abi.S b/compiler-rt/lib/builtins/aarch64/sme-abi.S
index 465f1c763c0d3..45a31cd21d0ab 100644
--- a/compiler-rt/lib/builtins/aarch64/sme-abi.S
+++ b/compiler-rt/lib/builtins/aarch64/sme-abi.S
@@ -53,6 +53,9 @@ DEFINE_COMPILERRT_PRIVATE_FUNCTION(do_abort)
   .cfi_endproc
 END_COMPILERRT_FUNCTION(do_abort)
 
+// Note: It is the responsibility of the baremetal runtime/libc to implement
+// __arm_sme_state.
+#if !defined(TARGET_BAREMETAL_AARCH64)
 // __arm_sme_state fills the result registers based on a local
 // that is set as part of the compiler-rt startup code.
 //   __aarch64_has_sme_and_tpidr2_el0
@@ -73,6 +76,7 @@ DEFINE_COMPILERRT_FUNCTION(__arm_sme_state)
 1:
   ret
 END_COMPILERRT_FUNCTION(__arm_sme_state)
+#endif // !defined(TARGET_BAREMETAL_AARCH64)
 
 DEFINE_COMPILERRT_FUNCTION(__arm_tpidr2_restore)
   .variant_pcs __arm_tpidr2_restore
diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64.c b/compiler-rt/lib/builtins/cpu_model/aarch64.c
index 119d18b8df8d7..347bb4e121c97 100644
--- a/compiler-rt/lib/builtins/cpu_model/aarch64.c
+++ b/compiler-rt/lib/builtins/cpu_model/aarch64.c
@@ -69,7 +69,9 @@ struct {
 
 // The formatter wants to re-order these includes, but doing so is incorrect:
 // clang-format off
-#if defined(__APPLE__)
+#if defined(TARGET_BAREMETAL_AARCH64)
+#include "aarch64/fmv/baremetal.inc"
+#elif defined(__APPLE__)
 #include "aarch64/fmv/apple.inc"
 #elif defined(__FreeBSD__) || defined(__OpenBSD__)
 #include "aarch64/fmv/hwcap.inc"
@@ -84,8 +86,6 @@ struct {
 #include "aarch64/fmv/getauxval.inc"
 #elif defined(_WIN32)
 #include "aarch64/fmv/windows.inc"
-#elif defined(ENABLE_BAREMETAL_AARCH64_FMV)
-#include "aarch64/fmv/baremetal.inc"
 #else
 #include "aarch64/fmv/unimplemented.inc"
 #endif
diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/baremetal.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/baremetal.inc
index f188e84808e01..e6229300bce91 100644
--- a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/baremetal.inc
+++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/baremetal.inc
@@ -1,19 +1,21 @@
 // For baremetal platforms, we don't really initialise '__aarch64_cpu_features',
-// with exception of FEAT_SME that we can get from '__aarch64_sme_accessible'.
-
-#if defined(COMPILER_RT_SHARED_LIB)
-__attribute__((weak))
-#endif
-extern _Bool
-__aarch64_sme_accessible(void);
-
-static _Bool has_sme(void) {
-#if defined(COMPILER_RT_SHARED_LIB)
-  if (!__aarch64_sme_accessible)
-    return 0;
-#endif
-  return __aarch64_sme_accessible();
-}
+// with exception of FEAT_SME that we can get from '__arm_sme_state'.
+
+#define SME_STATE_X0_HAS_SME (1ULL << 63)
+
+typedef unsigned long long u64;
+_Static_assert(sizeof(u64) == 8, "u64 should be 64-bits");
+
+// The libc/runtime for baremetal targets should provide an implementation of
+// __arm_sme_state. LLVM's compiler-rt does not provide this function for
+// baremetal as checking CPU features is implementation defined.
+//
+// See:
+// https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst#811__arm_sme_state
+extern struct {
+  u64 x0;
+  u64 x1;
+} __arm_sme_state(void);
 
 void __init_cpu_features_resolver(unsigned long hwcap,
                                   const __ifunc_arg_t *arg) {}
@@ -23,8 +25,10 @@ void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
   if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
     return;
 
-  unsigned long long feat = 0;
-  if (has_sme())
+  _Bool has_sme = (__arm_sme_state().x0 & SME_STATE_X0_HAS_SME) != 0;
+
+  u64 feat = 0;
+  if (has_sme)
     feat |= 1ULL << FEAT_SME;
 
   __atomic_store_n(&__aarch64_cpu_features.features, feat, __ATOMIC_RELAXED);

>From be64dd062847035bdd33d9025a440a7f7252909f Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Fri, 10 Apr 2026 16:03:22 +0000
Subject: [PATCH 2/3] Fixups

---
 .../cpu_model/aarch64/fmv/baremetal.inc       | 23 ++++++++++---------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/baremetal.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/baremetal.inc
index e6229300bce91..e95ac9fac8709 100644
--- a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/baremetal.inc
+++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/baremetal.inc
@@ -1,10 +1,4 @@
-// For baremetal platforms, we don't really initialise '__aarch64_cpu_features',
-// with exception of FEAT_SME that we can get from '__arm_sme_state'.
-
-#define SME_STATE_X0_HAS_SME (1ULL << 63)
-
-typedef unsigned long long u64;
-_Static_assert(sizeof(u64) == 8, "u64 should be 64-bits");
+#include <stdint.h>
 
 // The libc/runtime for baremetal targets should provide an implementation of
 // __arm_sme_state. LLVM's compiler-rt does not provide this function for
@@ -13,10 +7,14 @@ _Static_assert(sizeof(u64) == 8, "u64 should be 64-bits");
 // See:
 // https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst#811__arm_sme_state
 extern struct {
-  u64 x0;
-  u64 x1;
+  uint64_t x0;
+  uint64_t x1;
 } __arm_sme_state(void);
 
+// __arm_sme_state:
+// Bit 63 of X0 is set to one iff the current thread has access to SME.
+#define SME_STATE_HAS_SME_BIT_X0 (1ULL << 63)
+
 void __init_cpu_features_resolver(unsigned long hwcap,
                                   const __ifunc_arg_t *arg) {}
 
@@ -25,9 +23,12 @@ void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
   if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
     return;
 
-  _Bool has_sme = (__arm_sme_state().x0 & SME_STATE_X0_HAS_SME) != 0;
+  // For baremetal platforms, we don't really initialise
+  // '__aarch64_cpu_features', with exception of FEAT_SME that we can get from
+  // '__arm_sme_state'.
+  _Bool has_sme = (__arm_sme_state().x0 & SME_STATE_HAS_SME_BIT_X0) != 0;
 
-  u64 feat = 0;
+  uint64_t feat = 0;
   if (has_sme)
     feat |= 1ULL << FEAT_SME;
 

>From 09ded1e7e3c63bbf4bee1ca3eadf7f9692357f0a Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Mon, 13 Apr 2026 13:31:26 +0000
Subject: [PATCH 3/3] Fixups

---
 compiler-rt/lib/builtins/aarch64/sme-abi.S     | 1 -
 llvm/test/CodeGen/AArch64/aarch64-sme-stubs.ll | 9 ---------
 mlir/lib/ExecutionEngine/ArmSMEStubs.cpp       | 5 -----
 3 files changed, 15 deletions(-)

diff --git a/compiler-rt/lib/builtins/aarch64/sme-abi.S b/compiler-rt/lib/builtins/aarch64/sme-abi.S
index 45a31cd21d0ab..9a9187622b1ae 100644
--- a/compiler-rt/lib/builtins/aarch64/sme-abi.S
+++ b/compiler-rt/lib/builtins/aarch64/sme-abi.S
@@ -58,7 +58,6 @@ END_COMPILERRT_FUNCTION(do_abort)
 #if !defined(TARGET_BAREMETAL_AARCH64)
 // __arm_sme_state fills the result registers based on a local
 // that is set as part of the compiler-rt startup code.
-//   __aarch64_has_sme_and_tpidr2_el0
 DEFINE_COMPILERRT_FUNCTION(__arm_sme_state)
   .variant_pcs __arm_sme_state
   BTI_C
diff --git a/llvm/test/CodeGen/AArch64/aarch64-sme-stubs.ll b/llvm/test/CodeGen/AArch64/aarch64-sme-stubs.ll
index f7182e2a166a5..9369cb99d88ab 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-sme-stubs.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-sme-stubs.ll
@@ -3,15 +3,6 @@
 
 ; Checks SME ABI routines can be implemented as stubs without +sme.
 
-define i1 @__aarch64_sme_accessible() {
-; CHECK-LABEL: __aarch64_sme_accessible:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    mov w0, #1 // =0x1
-; CHECK-NEXT:    ret
-entry:
-  ret i1 true
-}
-
 define [2 x i64] @__arm_sme_state() {
 ; CHECK-LABEL: __arm_sme_state:
 ; CHECK:       // %bb.0: // %entry
diff --git a/mlir/lib/ExecutionEngine/ArmSMEStubs.cpp b/mlir/lib/ExecutionEngine/ArmSMEStubs.cpp
index e01384ba91a0d..8abd2073494c3 100644
--- a/mlir/lib/ExecutionEngine/ArmSMEStubs.cpp
+++ b/mlir/lib/ExecutionEngine/ArmSMEStubs.cpp
@@ -34,11 +34,6 @@
 
 extern "C" {
 
-bool MLIR_ARMSMEABISTUBS_EXPORTED __aarch64_sme_accessible() {
-  // The ArmSME tests are run within an emulator so we assume SME is available.
-  return true;
-}
-
 struct sme_state {
   int64_t x0;
   int64_t x1;



More information about the Mlir-commits mailing list