[llvm-branch-commits] [compiler-rt] 01bd039 - [AArch64] Implement INIT/ADJUST_TRAMPOLINE (#70267)
Tobias Hieta via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Jul 26 00:27:01 PDT 2024
Author: Carlos Seo
Date: 2024-07-26T09:26:46+02:00
New Revision: 01bd0394c9d5809be2d125ae1dfd3faef8bf0942
URL: https://github.com/llvm/llvm-project/commit/01bd0394c9d5809be2d125ae1dfd3faef8bf0942
DIFF: https://github.com/llvm/llvm-project/commit/01bd0394c9d5809be2d125ae1dfd3faef8bf0942.diff
LOG: [AArch64] Implement INIT/ADJUST_TRAMPOLINE (#70267)
Add support for llvm.init.trampoline and llvm.adjust.trampoline
intrinsics for AArch64.
Fixes https://github.com/llvm/llvm-project/issues/65573
Fixes https://github.com/llvm/llvm-project/issues/76927
Fixes https://github.com/llvm/llvm-project/issues/83555
Updates https://github.com/llvm/llvm-project/pull/66157
(cherry picked from commit c4b66bf4d065d3bbc2e2fac8512a6df8e013c704)
Added:
llvm/test/CodeGen/AArch64/trampoline.ll
Modified:
compiler-rt/lib/builtins/README.txt
compiler-rt/lib/builtins/trampoline_setup.c
compiler-rt/test/builtins/Unit/trampoline_setup_test.c
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.h
Removed:
################################################################################
diff --git a/compiler-rt/lib/builtins/README.txt b/compiler-rt/lib/builtins/README.txt
index 2d213d95f333a..19f26c92a0f94 100644
--- a/compiler-rt/lib/builtins/README.txt
+++ b/compiler-rt/lib/builtins/README.txt
@@ -272,6 +272,11 @@ switch32
switch8
switchu8
+// This function generates a custom trampoline function with the specific
+// realFunc and localsPtr values.
+void __trampoline_setup(uint32_t* trampOnStack, int trampSizeAllocated,
+ const void* realFunc, void* localsPtr);
+
// There is no C interface to the *_vfp_d8_d15_regs functions. There are
// called in the prolog and epilog of Thumb1 functions. When the C++ ABI use
// SJLJ for exceptions, each function with a catch clause or destructors needs
diff --git a/compiler-rt/lib/builtins/trampoline_setup.c b/compiler-rt/lib/builtins/trampoline_setup.c
index 844eb27944142..830e25e4c0303 100644
--- a/compiler-rt/lib/builtins/trampoline_setup.c
+++ b/compiler-rt/lib/builtins/trampoline_setup.c
@@ -41,3 +41,45 @@ COMPILER_RT_ABI void __trampoline_setup(uint32_t *trampOnStack,
__clear_cache(trampOnStack, &trampOnStack[10]);
}
#endif // __powerpc__ && !defined(__powerpc64__)
+
+// The AArch64 compiler generates calls to __trampoline_setup() when creating
+// trampoline functions on the stack for use with nested functions.
+// This function creates a custom 36-byte trampoline function on the stack
+// which loads x18 with a pointer to the outer function's locals
+// and then jumps to the target nested function.
+// Note: x18 is a reserved platform register on Windows and macOS.
+
+#if defined(__aarch64__) && defined(__ELF__)
+COMPILER_RT_ABI void __trampoline_setup(uint32_t *trampOnStack,
+ int trampSizeAllocated,
+ const void *realFunc, void *localsPtr) {
+ // This should never happen, but if compiler did not allocate
+ // enough space on stack for the trampoline, abort.
+ if (trampSizeAllocated < 36)
+ compilerrt_abort();
+
+ // create trampoline
+ // Load realFunc into x17. mov/movk 16 bits at a time.
+ trampOnStack[0] =
+ 0xd2800000u | ((((uint64_t)realFunc >> 0) & 0xffffu) << 5) | 0x11;
+ trampOnStack[1] =
+ 0xf2a00000u | ((((uint64_t)realFunc >> 16) & 0xffffu) << 5) | 0x11;
+ trampOnStack[2] =
+ 0xf2c00000u | ((((uint64_t)realFunc >> 32) & 0xffffu) << 5) | 0x11;
+ trampOnStack[3] =
+ 0xf2e00000u | ((((uint64_t)realFunc >> 48) & 0xffffu) << 5) | 0x11;
+ // Load localsPtr into x18
+ trampOnStack[4] =
+ 0xd2800000u | ((((uint64_t)localsPtr >> 0) & 0xffffu) << 5) | 0x12;
+ trampOnStack[5] =
+ 0xf2a00000u | ((((uint64_t)localsPtr >> 16) & 0xffffu) << 5) | 0x12;
+ trampOnStack[6] =
+ 0xf2c00000u | ((((uint64_t)localsPtr >> 32) & 0xffffu) << 5) | 0x12;
+ trampOnStack[7] =
+ 0xf2e00000u | ((((uint64_t)localsPtr >> 48) & 0xffffu) << 5) | 0x12;
+ trampOnStack[8] = 0xd61f0220; // br x17
+
+ // Clear instruction cache.
+ __clear_cache(trampOnStack, &trampOnStack[9]);
+}
+#endif // defined(__aarch64__) && !defined(__APPLE__) && !defined(_WIN64)
diff --git a/compiler-rt/test/builtins/Unit/trampoline_setup_test.c b/compiler-rt/test/builtins/Unit/trampoline_setup_test.c
index da115fe764271..d51d35acaa02f 100644
--- a/compiler-rt/test/builtins/Unit/trampoline_setup_test.c
+++ b/compiler-rt/test/builtins/Unit/trampoline_setup_test.c
@@ -7,7 +7,7 @@
/*
* Tests nested functions
- * The ppc compiler generates a call to __trampoline_setup
+ * The ppc and aarch64 compilers generates a call to __trampoline_setup
* The i386 and x86_64 compilers generate a call to ___enable_execute_stack
*/
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 87e7750768d2d..6d413a09407a9 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1080,6 +1080,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
// Try to create BICs for vector ANDs.
setTargetDAGCombine(ISD::AND);
+ // llvm.init.trampoline and llvm.adjust.trampoline
+ setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
+ setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
+
// Vector add and sub nodes may conceal a high-half opportunity.
// Also, try to fold ADD into CSINC/CSINV..
setTargetDAGCombine({ISD::ADD, ISD::ABS, ISD::SUB, ISD::XOR, ISD::SINT_TO_FP,
@@ -6688,6 +6692,56 @@ static SDValue LowerFLDEXP(SDValue Op, SelectionDAG &DAG) {
return Final;
}
+SDValue AArch64TargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
+ SelectionDAG &DAG) const {
+ // Note: x18 cannot be used for the Nest parameter on Windows and macOS.
+ if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
+ report_fatal_error(
+ "ADJUST_TRAMPOLINE operation is only supported on Linux.");
+
+ return Op.getOperand(0);
+}
+
+SDValue AArch64TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
+ SelectionDAG &DAG) const {
+
+ // Note: x18 cannot be used for the Nest parameter on Windows and macOS.
+ if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
+ report_fatal_error("INIT_TRAMPOLINE operation is only supported on Linux.");
+
+ SDValue Chain = Op.getOperand(0);
+ SDValue Trmp = Op.getOperand(1); // trampoline
+ SDValue FPtr = Op.getOperand(2); // nested function
+ SDValue Nest = Op.getOperand(3); // 'nest' parameter value
+ SDLoc dl(Op);
+
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+
+ Entry.Ty = IntPtrTy;
+ Entry.Node = Trmp;
+ Args.push_back(Entry);
+ Entry.Node = DAG.getConstant(20, dl, MVT::i64);
+ Args.push_back(Entry);
+
+ Entry.Node = FPtr;
+ Args.push_back(Entry);
+ Entry.Node = Nest;
+ Args.push_back(Entry);
+
+ // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
+ CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
+
+ std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+ return CallResult.second;
+}
+
SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
LLVM_DEBUG(dbgs() << "Custom lowering: ");
@@ -6705,6 +6759,10 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerGlobalTLSAddress(Op, DAG);
case ISD::PtrAuthGlobalAddress:
return LowerPtrAuthGlobalAddress(Op, DAG);
+ case ISD::ADJUST_TRAMPOLINE:
+ return LowerADJUST_TRAMPOLINE(Op, DAG);
+ case ISD::INIT_TRAMPOLINE:
+ return LowerINIT_TRAMPOLINE(Op, DAG);
case ISD::SETCC:
case ISD::STRICT_FSETCC:
case ISD::STRICT_FSETCCS:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index ef45e4f01ecd3..81e15185f985d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -1143,6 +1143,8 @@ class AArch64TargetLowering : public TargetLowering {
SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS,
SDValue TVal, SDValue FVal, const SDLoc &dl,
SelectionDAG &DAG) const;
+ SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBRIND(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/AArch64/trampoline.ll b/llvm/test/CodeGen/AArch64/trampoline.ll
new file mode 100644
index 0000000000000..293e538a7459d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/trampoline.ll
@@ -0,0 +1,19 @@
+; RUN: llc -mtriple=aarch64-- < %s | FileCheck %s
+
+declare void @llvm.init.trampoline(ptr, ptr, ptr);
+declare ptr @llvm.adjust.trampoline(ptr);
+
+define i64 @f(ptr nest %c, i64 %x, i64 %y) {
+ %sum = add i64 %x, %y
+ ret i64 %sum
+}
+
+define i64 @main() {
+ %val = alloca i64
+ %nval = bitcast ptr %val to ptr
+ %tramp = alloca [36 x i8], align 8
+ ; CHECK: bl __trampoline_setup
+ call void @llvm.init.trampoline(ptr %tramp, ptr @f, ptr %nval)
+ %fp = call ptr @llvm.adjust.trampoline(ptr %tramp)
+ ret i64 0
+}
More information about the llvm-branch-commits
mailing list