[compiler-rt] [compiler-rt][AArch64] Allow platform-specific mangling of SME routines. (PR #119864)

Sander de Smalen via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 13 03:52:36 PST 2024


https://github.com/sdesmalen-arm created https://github.com/llvm/llvm-project/pull/119864

Support platform-specific mangling to avoid the compiler emitting a call to a function that is mangled differently than the definition in the runtime library.

>From 83152b309f61f243ed79431be0cd02d49fbf5034 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Mon, 9 Dec 2024 11:58:51 +0000
Subject: [PATCH] [compiler-rt][AArch64] Allow platform-specific mangling of
 SME routines.

Support platform-specific mangling to avoid the compiler emitting
a call to a function that is mangled differently than the definition
in the runtime library.
---
 compiler-rt/lib/builtins/aarch64/sme-abi.S    | 26 ++++++++++---------
 .../builtins/aarch64/sme-libc-mem-routines.S  |  8 +++---
 2 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/compiler-rt/lib/builtins/aarch64/sme-abi.S b/compiler-rt/lib/builtins/aarch64/sme-abi.S
index a6bb921bd9e6b9..45bd221655fd66 100644
--- a/compiler-rt/lib/builtins/aarch64/sme-abi.S
+++ b/compiler-rt/lib/builtins/aarch64/sme-abi.S
@@ -40,7 +40,7 @@ DEFINE_COMPILERRT_PRIVATE_FUNCTION(do_abort)
   .cfi_offset w30, -24
   .cfi_offset w29, -32
   .cfi_offset 46, -16
-  bl  __arm_sme_state
+  bl  SYMBOL_NAME(__arm_sme_state)
   tbz  x0, #0, 2f
 1:
   smstop sm
@@ -54,7 +54,7 @@ END_COMPILERRT_FUNCTION(do_abort)
 // __arm_sme_state fills the result registers based on a local
 // that is set as part of the compiler-rt startup code.
 //   __aarch64_has_sme_and_tpidr2_el0
-DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_sme_state)
+DEFINE_COMPILERRT_FUNCTION(__arm_sme_state)
   .variant_pcs __arm_sme_state
   BTI_C
   mov x0, xzr
@@ -70,9 +70,9 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_sme_state)
   mrs x1, TPIDR2_EL0
 1:
   ret
-END_COMPILERRT_OUTLINE_FUNCTION(__arm_sme_state)
+END_COMPILERRT_FUNCTION(__arm_sme_state)
 
-DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_restore)
+DEFINE_COMPILERRT_FUNCTION(__arm_tpidr2_restore)
   .variant_pcs __arm_tpidr2_restore
   BTI_C
   // If TPIDR2_EL0 is nonnull, the subroutine aborts in some platform-specific
@@ -106,9 +106,9 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_restore)
   ret
 2:
   b  SYMBOL_NAME(do_abort)
-END_COMPILERRT_OUTLINE_FUNCTION(__arm_tpidr2_restore)
+END_COMPILERRT_FUNCTION(__arm_tpidr2_restore)
 
-DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_save)
+DEFINE_COMPILERRT_FUNCTION(__arm_tpidr2_save)
   .variant_pcs __arm_tpidr2_save
   BTI_C
   // If the current thread does not have access to TPIDR2_EL0, the subroutine
@@ -147,9 +147,10 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_save)
   ret
 2:
   b  SYMBOL_NAME(do_abort)
-END_COMPILERRT_OUTLINE_FUNCTION(__arm_tpidr2_save)
+END_COMPILERRT_FUNCTION(__arm_tpidr2_save)
 
-DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_za_disable)
+DEFINE_COMPILERRT_FUNCTION(__arm_za_disable)
+  .cfi_startproc
   .variant_pcs __arm_za_disable
   BTI_C
   // If the current thread does not have access to SME, the subroutine does
@@ -166,7 +167,7 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_za_disable)
   .cfi_def_cfa w29, 16
   .cfi_offset w30, -8
   .cfi_offset w29, -16
-  bl  __arm_tpidr2_save
+  bl  SYMBOL_NAME(__arm_tpidr2_save)
 
   // * Set TPIDR2_EL0 to null.
   msr TPIDR2_EL0, xzr
@@ -181,9 +182,10 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_za_disable)
   .cfi_restore w29
 0:
   ret
-END_COMPILERRT_OUTLINE_FUNCTION(__arm_za_disable)
+  .cfi_endproc
+END_COMPILERRT_FUNCTION(__arm_za_disable)
 
-DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_get_current_vg)
+DEFINE_COMPILERRT_FUNCTION(__arm_get_current_vg)
   .variant_pcs __arm_get_current_vg
   BTI_C
 
@@ -200,7 +202,7 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_get_current_vg)
 2:
   mov     x0, xzr
   ret
-END_COMPILERRT_OUTLINE_FUNCTION(__arm_get_current_vg)
+END_COMPILERRT_FUNCTION(__arm_get_current_vg)
 
 NO_EXEC_STACK_DIRECTIVE
 
diff --git a/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S b/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S
index 6e13a03691cfd6..092b499b3d3bfc 100644
--- a/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S
+++ b/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S
@@ -52,7 +52,7 @@
    The loop tail is handled by always copying 64 bytes from the end.
 */
 
-DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_sc_memcpy)
+DEFINE_COMPILERRT_FUNCTION(__arm_sc_memcpy)
         add     srcend1, src, count
         add     dstend1, dstin, count
         cmp     count, 128
@@ -232,7 +232,7 @@ L(copy64_from_start):
         stp     B_l, B_h, [dstin, 16]
         stp     C_l, C_h, [dstin]
         ret
-END_COMPILERRT_OUTLINE_FUNCTION(__arm_sc_memcpy)
+END_COMPILERRT_FUNCTION(__arm_sc_memcpy)
 
 DEFINE_COMPILERRT_FUNCTION_ALIAS(__arm_sc_memmove, __arm_sc_memcpy)
 
@@ -250,7 +250,7 @@ DEFINE_COMPILERRT_FUNCTION_ALIAS(__arm_sc_memmove, __arm_sc_memcpy)
 #define dstend2  x4
 #define zva_val  x5
 
-DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_sc_memset)
+DEFINE_COMPILERRT_FUNCTION(__arm_sc_memset)
 #ifdef __ARM_FEATURE_SVE
         mov     z0.b, valw
 #else
@@ -346,6 +346,6 @@ L(no_zva_loop):
         stp     q0, q0, [dstend2, -64]
         stp     q0, q0, [dstend2, -32]
         ret
-END_COMPILERRT_OUTLINE_FUNCTION(__arm_sc_memset)
+END_COMPILERRT_FUNCTION(__arm_sc_memset)
 
 #endif // __aarch64__



More information about the llvm-commits mailing list