[compiler-rt] f445be9 - Reland "[compiler-rt][aarch64] Add SME ABI support routines." (#68875)

Sander de Smalen via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 12 06:58:41 PDT 2023


Author: Sander de Smalen
Date: 2023-10-12T13:56:58Z
New Revision: f445be9790f9998e9048fc2a12402f3e213c079f

URL: https://github.com/llvm/llvm-project/commit/f445be9790f9998e9048fc2a12402f3e213c079f
DIFF: https://github.com/llvm/llvm-project/commit/f445be9790f9998e9048fc2a12402f3e213c079f.diff

LOG: Reland "[compiler-rt][aarch64] Add SME ABI support routines." (#68875)

Resolved issue with green dragon build by fixing relocations for
MachO/Darwin which doesn't compile without @page/@pageoff directives.

Also silenced a warning about constructor(90) priority being < 101,
which is reserved for the implementation. In this case, we're compiling
the implementation so we should be able to use 90.

This reverts commit 072713add4408199d4bce7b3b02cc74a4a382ee0.

Added: 
    compiler-rt/lib/builtins/aarch64/sme-abi-init.c
    compiler-rt/lib/builtins/aarch64/sme-abi.S

Modified: 
    compiler-rt/cmake/Modules/AddCompilerRT.cmake
    compiler-rt/cmake/builtin-config-ix.cmake
    compiler-rt/lib/builtins/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/compiler-rt/cmake/Modules/AddCompilerRT.cmake b/compiler-rt/cmake/Modules/AddCompilerRT.cmake
index a72e279dd75e8fe..5ed49f0f5588144 100644
--- a/compiler-rt/cmake/Modules/AddCompilerRT.cmake
+++ b/compiler-rt/cmake/Modules/AddCompilerRT.cmake
@@ -312,6 +312,10 @@ function(add_compiler_rt_runtime name type)
       set(COMPONENT_OPTION COMPONENT ${libname})
     endif()
 
+    if(type STREQUAL "SHARED")
+      list(APPEND LIB_DEFS COMPILER_RT_SHARED_LIB)
+    endif()
+
     if(type STREQUAL "OBJECT")
       if(CMAKE_C_COMPILER_ID MATCHES Clang AND CMAKE_C_COMPILER_TARGET)
         list(APPEND extra_cflags_${libname} "--target=${CMAKE_C_COMPILER_TARGET}")

diff  --git a/compiler-rt/cmake/builtin-config-ix.cmake b/compiler-rt/cmake/builtin-config-ix.cmake
index 9cf4877baf48953..e91e3923a756c53 100644
--- a/compiler-rt/cmake/builtin-config-ix.cmake
+++ b/compiler-rt/cmake/builtin-config-ix.cmake
@@ -33,6 +33,12 @@ asm(\".arch armv8-a+lse\");
 asm(\"cas w0, w1, [x2]\");
 ")
 
+builtin_check_c_compiler_source(COMPILER_RT_HAS_ASM_SME
+"
+asm(\".arch armv9-a+sme\");
+asm(\"smstart\");
+")
+
 if(ANDROID)
   set(OS_NAME "Android")
 else()

diff  --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt
index 1afceddc62d846f..b1863746a57e7ee 100644
--- a/compiler-rt/lib/builtins/CMakeLists.txt
+++ b/compiler-rt/lib/builtins/CMakeLists.txt
@@ -551,6 +551,8 @@ set(aarch64_SOURCES
   ${GENERIC_SOURCES}
   cpu_model.c
   aarch64/fp_mode.c
+  aarch64/sme-abi.S
+  aarch64/sme-abi-init.c
 )
 
 # Generate outline atomics helpers from lse.S base
@@ -780,6 +782,7 @@ else ()
   endif()
 
   append_list_if(COMPILER_RT_HAS_ASM_LSE HAS_ASM_LSE BUILTIN_DEFS)
+  append_list_if(COMPILER_RT_HAS_ASM_SME HAS_ASM_SME BUILTIN_DEFS)
 
   foreach (arch ${BUILTIN_SUPPORTED_ARCH})
     if (CAN_TARGET_${arch})

diff  --git a/compiler-rt/lib/builtins/aarch64/sme-abi-init.c b/compiler-rt/lib/builtins/aarch64/sme-abi-init.c
new file mode 100644
index 000000000000000..b6ee12170d56dbd
--- /dev/null
+++ b/compiler-rt/lib/builtins/aarch64/sme-abi-init.c
@@ -0,0 +1,52 @@
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+__attribute__((visibility("hidden"), nocommon))
+_Bool __aarch64_has_sme_and_tpidr2_el0;
+
+// We have multiple ways to check that the function has SME, depending on our
+// target.
+// * For Linux we can use __getauxval().
+// * For newlib we can use __aarch64_sme_accessible().
+
+#if defined(__linux__)
+
+#ifndef AT_HWCAP2
+#define AT_HWCAP2 26
+#endif
+
+#ifndef HWCAP2_SME
+#define HWCAP2_SME (1 << 23)
+#endif
+
+extern unsigned long int __getauxval (unsigned long int);
+
+static _Bool has_sme(void) {
+  return __getauxval(AT_HWCAP2) & HWCAP2_SME;
+}
+
+#else  // defined(__linux__)
+
+#if defined(COMPILER_RT_SHARED_LIB)
+__attribute__((weak))
+#endif
+extern _Bool __aarch64_sme_accessible(void);
+
+static _Bool has_sme(void)  {
+#if defined(COMPILER_RT_SHARED_LIB)
+  if (!__aarch64_sme_accessible)
+    return 0;
+#endif
+  return __aarch64_sme_accessible();
+}
+
+#endif // defined(__linux__)
+
+#if __GNUC__ >= 9
+#pragma GCC diagnostic ignored "-Wprio-ctor-dtor"
+#endif
+__attribute__((constructor(90)))
+static void init_aarch64_has_sme(void) {
+  __aarch64_has_sme_and_tpidr2_el0 = has_sme();
+}

diff  --git a/compiler-rt/lib/builtins/aarch64/sme-abi.S b/compiler-rt/lib/builtins/aarch64/sme-abi.S
new file mode 100644
index 000000000000000..207810b2e2521f8
--- /dev/null
+++ b/compiler-rt/lib/builtins/aarch64/sme-abi.S
@@ -0,0 +1,197 @@
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// This patch implements the support routines for the SME ABI,
+// described here:
+//  https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst#sme-support-routines
+
+#include "../assembly.h"
+
+#ifdef HAS_ASM_SME
+#define ARCH armv9-a+sme
+#define SMSTOP_SM smstop sm
+#define SMSTOP_ZA smstop za
+#define REG_TPIDR2_EL0 TPIDR2_EL0
+#define REG_SVCR SVCR
+#define ADDSVL_X16_X16_1 addsvl x16, x16, #1
+#define LDR_ZA_W15_0_X16 ldr za[w15,0], [x16]
+#define STR_ZA_W15_0_X16 str za[w15,0], [x16]
+#define CNTD_X0 cntd x0
+#define CFI_OFFSET_VG_MINUS_16 .cfi_offset vg, -16
+#else
+#define ARCH armv8-a
+#define SMSTOP_SM .inst 0xd503427f
+#define SMSTOP_ZA .inst 0xd503447f
+#define REG_TPIDR2_EL0 S3_3_C13_C0_5
+#define REG_SVCR S3_3_C4_C2_2
+#define ADDSVL_X16_X16_1 .inst 0x04305830
+#define LDR_ZA_W15_0_X16 .inst 0xe1006200
+#define STR_ZA_W15_0_X16 .inst 0xe1206200
+#define CNTD_X0 .inst 0x04e0e3e0
+#define CFI_OFFSET_VG_MINUS_16 .cfi_escape 0x10, 0x2e, 0x03, 0x11, 0x70, 0x22 // $vg  @ cfa - 16
+#endif
+
+#if !defined(__APPLE__)
+#define TPIDR2_SYMBOL SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)
+#define TPIDR2_SYMBOL_OFFSET :lo12:SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)
+#else
+// MachO requires @page/@pageoff directives because the global is defined
+// in a 
diff erent file. Otherwise this file may fail to build.
+#define TPIDR2_SYMBOL SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)@page
+#define TPIDR2_SYMBOL_OFFSET SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)@pageoff
+#endif
+
+.arch ARCH
+
+// Utility function which calls a system's abort() routine. Because the function
+// is streaming-compatible it should disable streaming-SVE mode before calling
+// abort(). Note that there is no need to preserve any state before the call,
+// because the function does not return.
+DEFINE_COMPILERRT_PRIVATE_FUNCTION(do_abort)
+.cfi_startproc
+	.variant_pcs	SYMBOL_NAME(do_abort)
+	stp	x29, x30, [sp, #-32]!
+  CNTD_X0
+  // Store VG to a stack location that we describe with .cfi_offset
+  str x0, [sp, #16]
+  .cfi_def_cfa_offset 32
+  .cfi_offset w30, -24
+  .cfi_offset w29, -32
+  CFI_OFFSET_VG_MINUS_16
+	bl	__arm_sme_state
+	tbz	x0, #0, 2f
+1:
+	SMSTOP_SM
+2:
+  // We can't make this into a tail-call because the unwinder would
+  // need to restore the value of VG.
+	bl	SYMBOL_NAME(abort)
+.cfi_endproc
+END_COMPILERRT_FUNCTION(do_abort)
+
+// __arm_sme_state fills the result registers based on a local
+// that is set as part of the compiler-rt startup code.
+//   __aarch64_has_sme_and_tpidr2_el0
+DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_sme_state)
+	.variant_pcs	__arm_sme_state
+  mov x0, xzr
+  mov x1, xzr
+
+  adrp  x16, TPIDR2_SYMBOL
+  ldrb w16, [x16, TPIDR2_SYMBOL_OFFSET]
+  cbz w16, 1f
+0:
+  orr x0, x0, #0xC000000000000000
+  mrs x16, REG_SVCR
+  bfxil x0, x16, #0, #2
+  mrs x1, REG_TPIDR2_EL0
+1:
+  ret
+END_COMPILERRT_OUTLINE_FUNCTION(__arm_sme_state)
+
+DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_restore)
+	.variant_pcs	__arm_tpidr2_restore
+  // If TPIDR2_EL0 is nonnull, the subroutine aborts in some platform-specific
+  // manner.
+  mrs x14, REG_TPIDR2_EL0
+  cbnz  x14, 2f
+
+  // If any of the reserved bytes in the first 16 bytes of BLK are nonzero,
+  // the subroutine [..] aborts in some platform-defined manner.
+  ldrh  w14, [x0, #10]
+  cbnz  w14, 2f
+  ldr w14, [x0, #12]
+  cbnz  w14, 2f
+
+  // If BLK.za_save_buffer is NULL, the subroutine does nothing.
+  ldr x16, [x0]
+  cbz x16, 1f
+
+  // If BLK.num_za_save_slices is zero, the subroutine does nothing.
+  ldrh  w14, [x0, #8]
+  cbz x14, 1f
+
+  mov x15, xzr
+0:
+  LDR_ZA_W15_0_X16
+  ADDSVL_X16_X16_1
+  add x15, x15, #1
+  cmp x14, x15
+  b.ne  0b
+1:
+  ret
+2:
+  b  SYMBOL_NAME(do_abort)
+END_COMPILERRT_OUTLINE_FUNCTION(__arm_tpidr2_restore)
+
+DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_save)
+  // If the current thread does not have access to TPIDR2_EL0, the subroutine
+  // does nothing.
+  adrp  x14, TPIDR2_SYMBOL
+  ldrb w14, [x14, TPIDR2_SYMBOL_OFFSET]
+  cbz w14, 1f
+
+  // If TPIDR2_EL0 is null, the subroutine does nothing.
+  mrs x16, REG_TPIDR2_EL0
+  cbz x16, 1f
+
+  // If any of the reserved bytes in the first 16 bytes of the TPIDR2 block are
+  // nonzero, the subroutine [..] aborts in some platform-defined manner.
+  ldrh  w14, [x16, #10]
+  cbnz  w14, 2f
+  ldr w14, [x16, #12]
+  cbnz  w14, 2f
+
+  // If num_za_save_slices is zero, the subroutine does nothing.
+  ldrh  w14, [x16, #8]
+  cbz x14, 1f
+
+  // If za_save_buffer is NULL, the subroutine does nothing.
+  ldr x16, [x16]
+  cbz x16, 1f
+
+  mov x15, xzr
+0:
+  STR_ZA_W15_0_X16
+  ADDSVL_X16_X16_1
+  add x15, x15, #1
+  cmp x14, x15
+  b.ne  0b
+1:
+  ret
+2:
+  b  SYMBOL_NAME(do_abort)
+END_COMPILERRT_OUTLINE_FUNCTION(__arm_tpidr2_save)
+
+DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_za_disable)
+  // If the current thread does not have access to SME, the subroutine does
+  // nothing.
+  adrp  x14, TPIDR2_SYMBOL
+  ldrb w14, [x14, TPIDR2_SYMBOL_OFFSET]
+  cbz w14, 0f
+
+  // Otherwise, the subroutine behaves as if it did the following:
+  // * Call __arm_tpidr2_save.
+  stp x29, x30, [sp, #-16]!
+  .cfi_def_cfa_offset 16
+  mov x29, sp
+  .cfi_def_cfa w29, 16
+  .cfi_offset w30, -8
+  .cfi_offset w29, -16
+  bl  __arm_tpidr2_save
+
+  // * Set TPIDR2_EL0 to null.
+  msr REG_TPIDR2_EL0, xzr
+
+  // * Set PSTATE.ZA to 0.
+  SMSTOP_ZA
+
+  .cfi_def_cfa wsp, 16
+  ldp x29, x30, [sp], #16
+  .cfi_def_cfa_offset 0
+  .cfi_restore w30
+  .cfi_restore w29
+0:
+  ret
+END_COMPILERRT_OUTLINE_FUNCTION(__arm_za_disable)


        


More information about the llvm-commits mailing list