[compiler-rt] [llvm] [AArch64] Implement INIT/ADJUST_TRAMPOLINE (PR #70267)

Carlos Seo via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 16 10:59:35 PDT 2024


https://github.com/ceseo updated https://github.com/llvm/llvm-project/pull/70267

>From 8512cfcad445c4d9a83d47ca04fa9131efbf84b8 Mon Sep 17 00:00:00 2001
From: Carlos Eduardo Seo <carlos.seo at linaro.org>
Date: Mon, 15 Jul 2024 18:00:37 +0000
Subject: [PATCH] [AArch64] Implement INIT/ADJUST_TRAMPOLINE

Add support for llvm.init.trampoline and llvm.adjust.trampoline intrinsics for
AArch64.

Fixes #65573
Fixes #76927
Fixes #83555
Updates #66157
---
 compiler-rt/lib/builtins/README.txt           |  5 ++
 compiler-rt/lib/builtins/trampoline_setup.c   | 42 ++++++++++++++
 .../builtins/Unit/trampoline_setup_test.c     |  2 +-
 .../Target/AArch64/AArch64ISelLowering.cpp    | 58 +++++++++++++++++++
 llvm/lib/Target/AArch64/AArch64ISelLowering.h |  2 +
 llvm/test/CodeGen/AArch64/trampoline.ll       | 31 ++++++++++
 6 files changed, 139 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/AArch64/trampoline.ll

diff --git a/compiler-rt/lib/builtins/README.txt b/compiler-rt/lib/builtins/README.txt
index 2d213d95f333a..19f26c92a0f94 100644
--- a/compiler-rt/lib/builtins/README.txt
+++ b/compiler-rt/lib/builtins/README.txt
@@ -272,6 +272,11 @@ switch32
 switch8
 switchu8
 
+// This function generates a custom trampoline function with the specific
+// realFunc and localsPtr values.
+void __trampoline_setup(uint32_t* trampOnStack, int trampSizeAllocated,
+                        const void* realFunc, void* localsPtr);
+
 // There is no C interface to the *_vfp_d8_d15_regs functions.  There are
 // called in the prolog and epilog of Thumb1 functions.  When the C++ ABI use
 // SJLJ for exceptions, each function with a catch clause or destructors needs
diff --git a/compiler-rt/lib/builtins/trampoline_setup.c b/compiler-rt/lib/builtins/trampoline_setup.c
index 844eb27944142..c57f46307f0a2 100644
--- a/compiler-rt/lib/builtins/trampoline_setup.c
+++ b/compiler-rt/lib/builtins/trampoline_setup.c
@@ -41,3 +41,45 @@ COMPILER_RT_ABI void __trampoline_setup(uint32_t *trampOnStack,
   __clear_cache(trampOnStack, &trampOnStack[10]);
 }
 #endif // __powerpc__ && !defined(__powerpc64__)
+
+// The AArch64 compiler generates calls to __trampoline_setup() when creating
+// trampoline functions on the stack for use with nested functions.
+// This function creates a custom 36-byte trampoline function on the stack
+// which loads x18 with a pointer to the outer function's locals
+// and then jumps to the target nested function.
+// Note: x18 is a reserved platform register on Windows and macOS.
+
+#if defined(__aarch64__) && !defined(__APPLE__) && !defined(_WIN64)
+COMPILER_RT_ABI void __trampoline_setup(uint32_t *trampOnStack,
+                                        int trampSizeAllocated,
+                                        const void *realFunc, void *localsPtr) {
+  // This should never happen, but if compiler did not allocate
+  // enough space on stack for the trampoline, abort.
+  if (trampSizeAllocated < 36)
+    compilerrt_abort();
+
+  // create trampoline
+  // Load realFunc into x17. mov/movk 16 bits at a time.
+  trampOnStack[0] =
+      0xd2800000u | ((((uint64_t)realFunc >> 0) & 0xffffu) << 5) | 0x11;
+  trampOnStack[1] =
+      0xf2a00000u | ((((uint64_t)realFunc >> 16) & 0xffffu) << 5) | 0x11;
+  trampOnStack[2] =
+      0xf2c00000u | ((((uint64_t)realFunc >> 32) & 0xffffu) << 5) | 0x11;
+  trampOnStack[3] =
+      0xf2e00000u | ((((uint64_t)realFunc >> 48) & 0xffffu) << 5) | 0x11;
+  // Load localsPtr into x18
+  trampOnStack[4] =
+      0xd2800000u | ((((uint64_t)localsPtr >> 0) & 0xffffu) << 5) | 0x12;
+  trampOnStack[5] =
+      0xf2a00000u | ((((uint64_t)localsPtr >> 16) & 0xffffu) << 5) | 0x12;
+  trampOnStack[6] =
+      0xf2c00000u | ((((uint64_t)localsPtr >> 32) & 0xffffu) << 5) | 0x12;
+  trampOnStack[7] =
+      0xf2e00000u | ((((uint64_t)localsPtr >> 48) & 0xffffu) << 5) | 0x12;
+  trampOnStack[8] = 0xd61f0220; // br x17
+
+  // Clear instruction cache.
+  __clear_cache(trampOnStack, &trampOnStack[9]);
+}
+#endif // defined(__aarch64__) && !defined(__APPLE__) && !defined(_WIN64)
diff --git a/compiler-rt/test/builtins/Unit/trampoline_setup_test.c b/compiler-rt/test/builtins/Unit/trampoline_setup_test.c
index da115fe764271..d51d35acaa02f 100644
--- a/compiler-rt/test/builtins/Unit/trampoline_setup_test.c
+++ b/compiler-rt/test/builtins/Unit/trampoline_setup_test.c
@@ -7,7 +7,7 @@
 
 /*
  * Tests nested functions
- * The ppc compiler generates a call to __trampoline_setup
+ * The ppc and aarch64 compilers generates a call to __trampoline_setup
  * The i386 and x86_64 compilers generate a call to ___enable_execute_stack
  */
 
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 7294da474c4bc..6014a97ca4e4d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1074,6 +1074,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
   // Try to create BICs for vector ANDs.
   setTargetDAGCombine(ISD::AND);
 
+  // llvm.init.trampoline and llvm.adjust.trampoline
+  setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
+  setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
+
   // Vector add and sub nodes may conceal a high-half opportunity.
   // Also, try to fold ADD into CSINC/CSINV..
   setTargetDAGCombine({ISD::ADD, ISD::ABS, ISD::SUB, ISD::XOR, ISD::SINT_TO_FP,
@@ -6653,6 +6657,56 @@ static SDValue LowerFLDEXP(SDValue Op, SelectionDAG &DAG) {
   return Final;
 }
 
+SDValue AArch64TargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
+                                                      SelectionDAG &DAG) const {
+  // Note: x18 cannot be used for the Nest parameter on Windows and macOS.
+  if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
+    report_fatal_error(
+        "ADJUST_TRAMPOLINE operation is only supported on Linux.");
+
+  return Op.getOperand(0);
+}
+
+SDValue AArch64TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
+                                                    SelectionDAG &DAG) const {
+
+  // Note: x18 cannot be used for the Nest parameter on Windows and macOS.
+  if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
+    report_fatal_error("INIT_TRAMPOLINE operation is only supported on Linux.");
+
+  SDValue Chain = Op.getOperand(0);
+  SDValue Trmp = Op.getOperand(1); // trampoline
+  SDValue FPtr = Op.getOperand(2); // nested function
+  SDValue Nest = Op.getOperand(3); // 'nest' parameter value
+  SDLoc dl(Op);
+
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
+  Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
+
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+
+  Entry.Ty = IntPtrTy;
+  Entry.Node = Trmp;
+  Args.push_back(Entry);
+  Entry.Node = DAG.getConstant(20, dl, MVT::i64);
+  Args.push_back(Entry);
+
+  Entry.Node = FPtr;
+  Args.push_back(Entry);
+  Entry.Node = Nest;
+  Args.push_back(Entry);
+
+  // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
+  TargetLowering::CallLoweringInfo CLI(DAG);
+  CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
+      CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+      DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
+
+  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+  return CallResult.second;
+}
+
 SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
                                               SelectionDAG &DAG) const {
   LLVM_DEBUG(dbgs() << "Custom lowering: ");
@@ -6670,6 +6724,10 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
     return LowerGlobalTLSAddress(Op, DAG);
   case ISD::PtrAuthGlobalAddress:
     return LowerPtrAuthGlobalAddress(Op, DAG);
+  case ISD::ADJUST_TRAMPOLINE:
+    return LowerADJUST_TRAMPOLINE(Op, DAG);
+  case ISD::INIT_TRAMPOLINE:
+    return LowerINIT_TRAMPOLINE(Op, DAG);
   case ISD::SETCC:
   case ISD::STRICT_FSETCC:
   case ISD::STRICT_FSETCCS:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 047c852bb01d2..7f0217328537b 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -1141,6 +1141,8 @@ class AArch64TargetLowering : public TargetLowering {
   SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS,
                          SDValue TVal, SDValue FVal, const SDLoc &dl,
                          SelectionDAG &DAG) const;
+  SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/AArch64/trampoline.ll b/llvm/test/CodeGen/AArch64/trampoline.ll
new file mode 100644
index 0000000000000..48b5c8746fc47
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/trampoline.ll
@@ -0,0 +1,31 @@
+; RUN: llc -mtriple=aarch64-- < %s | FileCheck %s
+; UNSUPPORTED: darwin, system-windows
+
+declare void @llvm.init.trampoline(i8*, i8*, i8*);
+declare i8* @llvm.adjust.trampoline(i8*);
+
+define i64 @func(i64* nest %ptr, i64 %val)
+{
+    %x = load i64, i64* %ptr
+    %sum = add i64 %x, %val
+    ret i64 %sum
+}
+
+; CHECK-LABEL: main
+define i64 @main(i64, i8**)
+{
+    %val = alloca i64
+    store i64 13, i64* %val
+    %nval = bitcast i64* %val to i8*
+    %tramp_buf = alloca [36 x i8], align 4
+    %tramp = getelementptr [36 x i8], [36 x i8]* %tramp_buf, i64 0, i64 0
+; CHECK:	bl	__trampoline_setup
+    call void @llvm.init.trampoline(
+            i8* %tramp,
+            i8* bitcast (i64 (i64*, i64)* @func to i8*),
+            i8* %nval)
+    %ptr = call i8* @llvm.adjust.trampoline(i8* %tramp)
+    %fptr = bitcast i8* %ptr to i64(i64)*
+    %retval = call i64 %fptr (i64 42)
+    ret i64 %retval
+}



More information about the llvm-commits mailing list