[compiler-rt] [llvm] [AArch64] Implement INIT/ADJUST_TRAMPOLINE (PR #70267)
Carlos Seo via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 16 10:59:35 PDT 2024
https://github.com/ceseo updated https://github.com/llvm/llvm-project/pull/70267
>From 8512cfcad445c4d9a83d47ca04fa9131efbf84b8 Mon Sep 17 00:00:00 2001
From: Carlos Eduardo Seo <carlos.seo at linaro.org>
Date: Mon, 15 Jul 2024 18:00:37 +0000
Subject: [PATCH] [AArch64] Implement INIT/ADJUST_TRAMPOLINE
Add support for llvm.init.trampoline and llvm.adjust.trampoline intrinsics for
AArch64.
Fixes #65573
Fixes #76927
Fixes #83555
Updates #66157
---
compiler-rt/lib/builtins/README.txt | 5 ++
compiler-rt/lib/builtins/trampoline_setup.c | 42 ++++++++++++++
.../builtins/Unit/trampoline_setup_test.c | 2 +-
.../Target/AArch64/AArch64ISelLowering.cpp | 58 +++++++++++++++++++
llvm/lib/Target/AArch64/AArch64ISelLowering.h | 2 +
llvm/test/CodeGen/AArch64/trampoline.ll | 31 ++++++++++
6 files changed, 139 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/AArch64/trampoline.ll
diff --git a/compiler-rt/lib/builtins/README.txt b/compiler-rt/lib/builtins/README.txt
index 2d213d95f333a..19f26c92a0f94 100644
--- a/compiler-rt/lib/builtins/README.txt
+++ b/compiler-rt/lib/builtins/README.txt
@@ -272,6 +272,11 @@ switch32
switch8
switchu8
+// This function generates a custom trampoline function with the specific
+// realFunc and localsPtr values.
+void __trampoline_setup(uint32_t* trampOnStack, int trampSizeAllocated,
+ const void* realFunc, void* localsPtr);
+
// There is no C interface to the *_vfp_d8_d15_regs functions. There are
// called in the prolog and epilog of Thumb1 functions. When the C++ ABI use
// SJLJ for exceptions, each function with a catch clause or destructors needs
diff --git a/compiler-rt/lib/builtins/trampoline_setup.c b/compiler-rt/lib/builtins/trampoline_setup.c
index 844eb27944142..c57f46307f0a2 100644
--- a/compiler-rt/lib/builtins/trampoline_setup.c
+++ b/compiler-rt/lib/builtins/trampoline_setup.c
@@ -41,3 +41,45 @@ COMPILER_RT_ABI void __trampoline_setup(uint32_t *trampOnStack,
__clear_cache(trampOnStack, &trampOnStack[10]);
}
#endif // __powerpc__ && !defined(__powerpc64__)
+
+// The AArch64 compiler generates calls to __trampoline_setup() when creating
+// trampoline functions on the stack for use with nested functions.
+// This function creates a custom 36-byte trampoline function on the stack
+// which loads x18 with a pointer to the outer function's locals
+// and then jumps to the target nested function.
+// Note: x18 is a reserved platform register on Windows and macOS.
+
+#if defined(__aarch64__) && !defined(__APPLE__) && !defined(_WIN64)
+COMPILER_RT_ABI void __trampoline_setup(uint32_t *trampOnStack,
+ int trampSizeAllocated,
+ const void *realFunc, void *localsPtr) {
+ // This should never happen, but if compiler did not allocate
+ // enough space on stack for the trampoline, abort.
+ if (trampSizeAllocated < 36)
+ compilerrt_abort();
+
+ // create trampoline
+ // Load realFunc into x17. mov/movk 16 bits at a time.
+ trampOnStack[0] =
+ 0xd2800000u | ((((uint64_t)realFunc >> 0) & 0xffffu) << 5) | 0x11;
+ trampOnStack[1] =
+ 0xf2a00000u | ((((uint64_t)realFunc >> 16) & 0xffffu) << 5) | 0x11;
+ trampOnStack[2] =
+ 0xf2c00000u | ((((uint64_t)realFunc >> 32) & 0xffffu) << 5) | 0x11;
+ trampOnStack[3] =
+ 0xf2e00000u | ((((uint64_t)realFunc >> 48) & 0xffffu) << 5) | 0x11;
+ // Load localsPtr into x18
+ trampOnStack[4] =
+ 0xd2800000u | ((((uint64_t)localsPtr >> 0) & 0xffffu) << 5) | 0x12;
+ trampOnStack[5] =
+ 0xf2a00000u | ((((uint64_t)localsPtr >> 16) & 0xffffu) << 5) | 0x12;
+ trampOnStack[6] =
+ 0xf2c00000u | ((((uint64_t)localsPtr >> 32) & 0xffffu) << 5) | 0x12;
+ trampOnStack[7] =
+ 0xf2e00000u | ((((uint64_t)localsPtr >> 48) & 0xffffu) << 5) | 0x12;
+ trampOnStack[8] = 0xd61f0220; // br x17
+
+ // Clear instruction cache.
+ __clear_cache(trampOnStack, &trampOnStack[9]);
+}
+#endif // defined(__aarch64__) && !defined(__APPLE__) && !defined(_WIN64)
diff --git a/compiler-rt/test/builtins/Unit/trampoline_setup_test.c b/compiler-rt/test/builtins/Unit/trampoline_setup_test.c
index da115fe764271..d51d35acaa02f 100644
--- a/compiler-rt/test/builtins/Unit/trampoline_setup_test.c
+++ b/compiler-rt/test/builtins/Unit/trampoline_setup_test.c
@@ -7,7 +7,7 @@
/*
* Tests nested functions
- * The ppc compiler generates a call to __trampoline_setup
+ * The ppc and aarch64 compilers generates a call to __trampoline_setup
* The i386 and x86_64 compilers generate a call to ___enable_execute_stack
*/
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 7294da474c4bc..6014a97ca4e4d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1074,6 +1074,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
// Try to create BICs for vector ANDs.
setTargetDAGCombine(ISD::AND);
+ // llvm.init.trampoline and llvm.adjust.trampoline
+ setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
+ setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
+
// Vector add and sub nodes may conceal a high-half opportunity.
// Also, try to fold ADD into CSINC/CSINV..
setTargetDAGCombine({ISD::ADD, ISD::ABS, ISD::SUB, ISD::XOR, ISD::SINT_TO_FP,
@@ -6653,6 +6657,56 @@ static SDValue LowerFLDEXP(SDValue Op, SelectionDAG &DAG) {
return Final;
}
+SDValue AArch64TargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
+ SelectionDAG &DAG) const {
+ // Note: x18 cannot be used for the Nest parameter on Windows and macOS.
+ if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
+ report_fatal_error(
+ "ADJUST_TRAMPOLINE operation is only supported on Linux.");
+
+ return Op.getOperand(0);
+}
+
+SDValue AArch64TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
+ SelectionDAG &DAG) const {
+
+ // Note: x18 cannot be used for the Nest parameter on Windows and macOS.
+ if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
+ report_fatal_error("INIT_TRAMPOLINE operation is only supported on Linux.");
+
+ SDValue Chain = Op.getOperand(0);
+ SDValue Trmp = Op.getOperand(1); // trampoline
+ SDValue FPtr = Op.getOperand(2); // nested function
+ SDValue Nest = Op.getOperand(3); // 'nest' parameter value
+ SDLoc dl(Op);
+
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+
+ Entry.Ty = IntPtrTy;
+ Entry.Node = Trmp;
+ Args.push_back(Entry);
+ Entry.Node = DAG.getConstant(20, dl, MVT::i64);
+ Args.push_back(Entry);
+
+ Entry.Node = FPtr;
+ Args.push_back(Entry);
+ Entry.Node = Nest;
+ Args.push_back(Entry);
+
+ // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
+ CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
+
+ std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+ return CallResult.second;
+}
+
SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
LLVM_DEBUG(dbgs() << "Custom lowering: ");
@@ -6670,6 +6724,10 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerGlobalTLSAddress(Op, DAG);
case ISD::PtrAuthGlobalAddress:
return LowerPtrAuthGlobalAddress(Op, DAG);
+ case ISD::ADJUST_TRAMPOLINE:
+ return LowerADJUST_TRAMPOLINE(Op, DAG);
+ case ISD::INIT_TRAMPOLINE:
+ return LowerINIT_TRAMPOLINE(Op, DAG);
case ISD::SETCC:
case ISD::STRICT_FSETCC:
case ISD::STRICT_FSETCCS:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 047c852bb01d2..7f0217328537b 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -1141,6 +1141,8 @@ class AArch64TargetLowering : public TargetLowering {
SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS,
SDValue TVal, SDValue FVal, const SDLoc &dl,
SelectionDAG &DAG) const;
+ SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/AArch64/trampoline.ll b/llvm/test/CodeGen/AArch64/trampoline.ll
new file mode 100644
index 0000000000000..48b5c8746fc47
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/trampoline.ll
@@ -0,0 +1,31 @@
+; RUN: llc -mtriple=aarch64-- < %s | FileCheck %s
+; UNSUPPORTED: darwin, system-windows
+
+declare void @llvm.init.trampoline(i8*, i8*, i8*);
+declare i8* @llvm.adjust.trampoline(i8*);
+
+define i64 @func(i64* nest %ptr, i64 %val)
+{
+ %x = load i64, i64* %ptr
+ %sum = add i64 %x, %val
+ ret i64 %sum
+}
+
+; CHECK-LABEL: main
+define i64 @main(i64, i8**)
+{
+ %val = alloca i64
+ store i64 13, i64* %val
+ %nval = bitcast i64* %val to i8*
+ %tramp_buf = alloca [36 x i8], align 4
+ %tramp = getelementptr [36 x i8], [36 x i8]* %tramp_buf, i64 0, i64 0
+; CHECK: bl __trampoline_setup
+ call void @llvm.init.trampoline(
+ i8* %tramp,
+ i8* bitcast (i64 (i64*, i64)* @func to i8*),
+ i8* %nval)
+ %ptr = call i8* @llvm.adjust.trampoline(i8* %tramp)
+ %fptr = bitcast i8* %ptr to i64(i64)*
+ %retval = call i64 %fptr (i64 42)
+ ret i64 %retval
+}
More information about the llvm-commits
mailing list