[compiler-rt] cb4f4a8 - [compiler-rt][AArch64] Rewrite SME routines to all use __aarch64_cpu_features. (#119414)

via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 11 07:53:22 PST 2024


Author: Sander de Smalen
Date: 2024-12-11T15:53:17Z
New Revision: cb4f4a8a4dd18bf00604b49faadd7b0ee4394d3d

URL: https://github.com/llvm/llvm-project/commit/cb4f4a8a4dd18bf00604b49faadd7b0ee4394d3d
DIFF: https://github.com/llvm/llvm-project/commit/cb4f4a8a4dd18bf00604b49faadd7b0ee4394d3d.diff

LOG: [compiler-rt][AArch64] Rewrite SME routines to all use __aarch64_cpu_features. (#119414)

When #92921 added the `__arm_get_current_vg` functionality, it used the
FMV feature bits mechanism rather than the mechanism that was previously
added for SME which called `getauxval` on Linux platforms or
`__aarch64_sme_accessible` required for baremetal libraries. It is
better to always use `__aarch64_cpu_features`.

For baremetal we still need to rely on `__arm_sme_accessible` to
initialise the struct.

Added: 
    compiler-rt/lib/builtins/aarch64/sme-abi-assert.c
    compiler-rt/lib/builtins/cpu_model/aarch64/fmv/baremetal.inc

Modified: 
    compiler-rt/lib/builtins/CMakeLists.txt
    compiler-rt/lib/builtins/aarch64/sme-abi.S
    compiler-rt/lib/builtins/cpu_model/aarch64.c

Removed: 
    compiler-rt/lib/builtins/aarch64/sme-abi-init.c


################################################################################
diff  --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt
index 70dc7d860d8f6a..b32b42423f6a90 100644
--- a/compiler-rt/lib/builtins/CMakeLists.txt
+++ b/compiler-rt/lib/builtins/CMakeLists.txt
@@ -573,8 +573,8 @@ set(aarch64_SOURCES
 )
 
 if (COMPILER_RT_HAS_AARCH64_SME)
-  if (NOT COMPILER_RT_DISABLE_AARCH64_FMV AND COMPILER_RT_HAS_FNO_BUILTIN_FLAG AND (COMPILER_RT_HAS_AUXV OR COMPILER_RT_BAREMETAL_BUILD))
-    list(APPEND aarch64_SOURCES aarch64/sme-abi.S aarch64/sme-libc-mem-routines.S aarch64/sme-abi-init.c aarch64/sme-libc-routines.c)
+  if (NOT COMPILER_RT_DISABLE_AARCH64_FMV AND COMPILER_RT_HAS_FNO_BUILTIN_FLAG)
+    list(APPEND aarch64_SOURCES aarch64/sme-abi.S aarch64/sme-libc-mem-routines.S aarch64/sme-abi-assert.c aarch64/sme-libc-routines.c)
     message(STATUS "AArch64 SME ABI routines enabled")
     set_source_files_properties(aarch64/sme-libc-routines.c PROPERTIES COMPILE_FLAGS "-fno-builtin")
   else()
@@ -842,6 +842,8 @@ else ()
 
   if(COMPILER_RT_DISABLE_AARCH64_FMV)
     list(APPEND BUILTIN_DEFS DISABLE_AARCH64_FMV)
+  elseif(COMPILER_RT_BAREMETAL_BUILD)
+    list(APPEND BUILTIN_DEFS ENABLE_BAREMETAL_AARCH64_FMV)
   endif()
 
   append_list_if(COMPILER_RT_HAS_ASM_LSE HAS_ASM_LSE BUILTIN_DEFS)

diff  --git a/compiler-rt/lib/builtins/aarch64/sme-abi-assert.c b/compiler-rt/lib/builtins/aarch64/sme-abi-assert.c
new file mode 100644
index 00000000000000..4333353f8d2d1b
--- /dev/null
+++ b/compiler-rt/lib/builtins/aarch64/sme-abi-assert.c
@@ -0,0 +1,10 @@
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// We rely on the FMV __aarch64_cpu_features mechanism to determine
+// which features are set at runtime.
+
+#include "../cpu_model/AArch64CPUFeatures.inc"
+_Static_assert(FEAT_SVE == 30, "sme-abi.S assumes FEAT_SVE = 30");
+_Static_assert(FEAT_SME == 42, "sme-abi.S assumes FEAT_SME = 42");

diff  --git a/compiler-rt/lib/builtins/aarch64/sme-abi-init.c b/compiler-rt/lib/builtins/aarch64/sme-abi-init.c
deleted file mode 100644
index d3cd8278a5d214..00000000000000
--- a/compiler-rt/lib/builtins/aarch64/sme-abi-init.c
+++ /dev/null
@@ -1,50 +0,0 @@
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-__attribute__((visibility("hidden"), nocommon))
-_Bool __aarch64_has_sme_and_tpidr2_el0;
-
-// We have multiple ways to check that the function has SME, depending on our
-// target.
-// * For Linux/Glibc we can use getauxval().
-// * For Android we can use getauxval().
-// * For newlib we can use __aarch64_sme_accessible().
-
-#if defined(__linux__)
-
-#if defined(__ANDROID__)
-#include <sys/auxv.h>
-#elif __has_include(<sys/auxv.h>)
-#include <sys/auxv.h>
-#else
-#define getauxval(x) 0
-#endif
-#include "../cpu_model/aarch64/hwcap.inc"
-
-static _Bool has_sme(void) { return getauxval(AT_HWCAP2) & HWCAP2_SME; }
-
-#else  // defined(__linux__)
-
-#if defined(COMPILER_RT_SHARED_LIB)
-__attribute__((weak))
-#endif
-extern _Bool __aarch64_sme_accessible(void);
-
-static _Bool has_sme(void)  {
-#if defined(COMPILER_RT_SHARED_LIB)
-  if (!__aarch64_sme_accessible)
-    return 0;
-#endif
-  return __aarch64_sme_accessible();
-}
-
-#endif // defined(__linux__)
-
-#if __GNUC__ >= 9
-#pragma GCC diagnostic ignored "-Wprio-ctor-dtor"
-#endif
-__attribute__((constructor(90)))
-static void init_aarch64_has_sme(void) {
-  __aarch64_has_sme_and_tpidr2_el0 = has_sme();
-}

diff  --git a/compiler-rt/lib/builtins/aarch64/sme-abi.S b/compiler-rt/lib/builtins/aarch64/sme-abi.S
index 623a95dd4dae5f..a6bb921bd9e6b9 100644
--- a/compiler-rt/lib/builtins/aarch64/sme-abi.S
+++ b/compiler-rt/lib/builtins/aarch64/sme-abi.S
@@ -9,18 +9,15 @@
 #include "../assembly.h"
 
 .set FEAT_SVE_BIT, 30
+.set FEAT_SME_BIT, 42
 .set SVCR_PSTATE_SM_BIT, 0
 
 #if !defined(__APPLE__)
-#define TPIDR2_SYMBOL SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)
-#define TPIDR2_SYMBOL_OFFSET :lo12:SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)
 #define CPU_FEATS_SYMBOL SYMBOL_NAME(__aarch64_cpu_features)
 #define CPU_FEATS_SYMBOL_OFFSET :lo12:SYMBOL_NAME(__aarch64_cpu_features)
 #else
 // MachO requires @page/@pageoff directives because the global is defined
 // in a 
diff erent file. Otherwise this file may fail to build.
-#define TPIDR2_SYMBOL SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)@page
-#define TPIDR2_SYMBOL_OFFSET SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)@pageoff
 #define CPU_FEATS_SYMBOL SYMBOL_NAME(__aarch64_cpu_features)@page
 #define CPU_FEATS_SYMBOL_OFFSET SYMBOL_NAME(__aarch64_cpu_features)@pageoff
 #endif
@@ -63,9 +60,9 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_sme_state)
   mov x0, xzr
   mov x1, xzr
 
-  adrp  x16, TPIDR2_SYMBOL
-  ldrb w16, [x16, TPIDR2_SYMBOL_OFFSET]
-  cbz w16, 1f
+  adrp x16, CPU_FEATS_SYMBOL
+  ldr x16, [x16, CPU_FEATS_SYMBOL_OFFSET]
+  tbz x16, #FEAT_SME_BIT, 1f
 0:
   orr x0, x0, #0xC000000000000000
   mrs x16, SVCR
@@ -116,9 +113,9 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_save)
   BTI_C
   // If the current thread does not have access to TPIDR2_EL0, the subroutine
   // does nothing.
-  adrp  x14, TPIDR2_SYMBOL
-  ldrb w14, [x14, TPIDR2_SYMBOL_OFFSET]
-  cbz w14, 1f
+  adrp x14, CPU_FEATS_SYMBOL
+  ldr x14, [x14, CPU_FEATS_SYMBOL_OFFSET]
+  tbz x14, #FEAT_SME_BIT, 1f
 
   // If TPIDR2_EL0 is null, the subroutine does nothing.
   mrs x16, TPIDR2_EL0
@@ -157,9 +154,9 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_za_disable)
   BTI_C
   // If the current thread does not have access to SME, the subroutine does
   // nothing.
-  adrp  x14, TPIDR2_SYMBOL
-  ldrb w14, [x14, TPIDR2_SYMBOL_OFFSET]
-  cbz w14, 0f
+  adrp x14, CPU_FEATS_SYMBOL
+  ldr x14, [x14, CPU_FEATS_SYMBOL_OFFSET]
+  tbz x14, #FEAT_SME_BIT, 0f
 
   // Otherwise, the subroutine behaves as if it did the following:
   // * Call __arm_tpidr2_save.
@@ -191,11 +188,9 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_get_current_vg)
   BTI_C
 
   adrp    x17, CPU_FEATS_SYMBOL
-  ldr     w17, [x17, CPU_FEATS_SYMBOL_OFFSET]
+  ldr     x17, [x17, CPU_FEATS_SYMBOL_OFFSET]
   tbnz    w17, #FEAT_SVE_BIT, 1f
-  adrp    x17, TPIDR2_SYMBOL
-  ldrb    w17, [x17, TPIDR2_SYMBOL_OFFSET]
-  cbz     x17, 2f
+  tbz     x17, #FEAT_SME_BIT, 2f
 0:
   mrs     x17, SVCR
   tbz     x17, #SVCR_PSTATE_SM_BIT, 2f

diff  --git a/compiler-rt/lib/builtins/cpu_model/aarch64.c b/compiler-rt/lib/builtins/cpu_model/aarch64.c
index 74e5e01b66c547..4082fd62ea11a2 100644
--- a/compiler-rt/lib/builtins/cpu_model/aarch64.c
+++ b/compiler-rt/lib/builtins/cpu_model/aarch64.c
@@ -80,6 +80,8 @@ struct {
 #include "aarch64/fmv/getauxval.inc"
 #elif defined(_WIN32)
 #include "aarch64/fmv/windows.inc"
+#elif defined(ENABLE_BAREMETAL_AARCH64_FMV)
+#include "aarch64/fmv/baremetal.inc"
 #else
 #include "aarch64/fmv/unimplemented.inc"
 #endif

diff  --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/baremetal.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/baremetal.inc
new file mode 100644
index 00000000000000..f188e84808e019
--- /dev/null
+++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/baremetal.inc
@@ -0,0 +1,31 @@
+// For baremetal platforms, we don't really initialise '__aarch64_cpu_features',
+// with exception of FEAT_SME that we can get from '__aarch64_sme_accessible'.
+
+#if defined(COMPILER_RT_SHARED_LIB)
+__attribute__((weak))
+#endif
+extern _Bool
+__aarch64_sme_accessible(void);
+
+static _Bool has_sme(void) {
+#if defined(COMPILER_RT_SHARED_LIB)
+  if (!__aarch64_sme_accessible)
+    return 0;
+#endif
+  return __aarch64_sme_accessible();
+}
+
+void __init_cpu_features_resolver(unsigned long hwcap,
+                                  const __ifunc_arg_t *arg) {}
+
+void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
+  // CPU features already initialized.
+  if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
+    return;
+
+  unsigned long long feat = 0;
+  if (has_sme())
+    feat |= 1ULL << FEAT_SME;
+
+  __atomic_store_n(&__aarch64_cpu_features.features, feat, __ATOMIC_RELAXED);
+}


        


More information about the llvm-commits mailing list