[llvm-branch-commits] [compiler-rt] [flang] [llvm] release/19.x: [Flang][Docs] Update information about AArch64 trampolines (#100391) (PR #100471)
Tobias Hieta via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Jul 26 00:26:54 PDT 2024
https://github.com/tru updated https://github.com/llvm/llvm-project/pull/100471
>From 01bd0394c9d5809be2d125ae1dfd3faef8bf0942 Mon Sep 17 00:00:00 2001
From: Carlos Seo <carlos.seo at linaro.org>
Date: Wed, 24 Jul 2024 11:18:08 -0300
Subject: [PATCH 1/2] [AArch64] Implement INIT/ADJUST_TRAMPOLINE (#70267)
Add support for llvm.init.trampoline and llvm.adjust.trampoline
intrinsics for AArch64.
Fixes https://github.com/llvm/llvm-project/issues/65573
Fixes https://github.com/llvm/llvm-project/issues/76927
Fixes https://github.com/llvm/llvm-project/issues/83555
Updates https://github.com/llvm/llvm-project/pull/66157
(cherry picked from commit c4b66bf4d065d3bbc2e2fac8512a6df8e013c704)
---
compiler-rt/lib/builtins/README.txt | 5 ++
compiler-rt/lib/builtins/trampoline_setup.c | 42 ++++++++++++++
.../builtins/Unit/trampoline_setup_test.c | 2 +-
.../Target/AArch64/AArch64ISelLowering.cpp | 58 +++++++++++++++++++
llvm/lib/Target/AArch64/AArch64ISelLowering.h | 2 +
llvm/test/CodeGen/AArch64/trampoline.ll | 19 ++++++
6 files changed, 127 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/AArch64/trampoline.ll
diff --git a/compiler-rt/lib/builtins/README.txt b/compiler-rt/lib/builtins/README.txt
index 2d213d95f333a..19f26c92a0f94 100644
--- a/compiler-rt/lib/builtins/README.txt
+++ b/compiler-rt/lib/builtins/README.txt
@@ -272,6 +272,11 @@ switch32
switch8
switchu8
+// This function generates a custom trampoline function with the specific
+// realFunc and localsPtr values.
+void __trampoline_setup(uint32_t* trampOnStack, int trampSizeAllocated,
+ const void* realFunc, void* localsPtr);
+
// There is no C interface to the *_vfp_d8_d15_regs functions. There are
// called in the prolog and epilog of Thumb1 functions. When the C++ ABI use
// SJLJ for exceptions, each function with a catch clause or destructors needs
diff --git a/compiler-rt/lib/builtins/trampoline_setup.c b/compiler-rt/lib/builtins/trampoline_setup.c
index 844eb27944142..830e25e4c0303 100644
--- a/compiler-rt/lib/builtins/trampoline_setup.c
+++ b/compiler-rt/lib/builtins/trampoline_setup.c
@@ -41,3 +41,45 @@ COMPILER_RT_ABI void __trampoline_setup(uint32_t *trampOnStack,
__clear_cache(trampOnStack, &trampOnStack[10]);
}
#endif // __powerpc__ && !defined(__powerpc64__)
+
+// The AArch64 compiler generates calls to __trampoline_setup() when creating
+// trampoline functions on the stack for use with nested functions.
+// This function creates a custom 36-byte trampoline function on the stack
+// which loads x18 with a pointer to the outer function's locals
+// and then jumps to the target nested function.
+// Note: x18 is a reserved platform register on Windows and macOS.
+
+#if defined(__aarch64__) && defined(__ELF__)
+COMPILER_RT_ABI void __trampoline_setup(uint32_t *trampOnStack,
+ int trampSizeAllocated,
+ const void *realFunc, void *localsPtr) {
+ // This should never happen, but if compiler did not allocate
+ // enough space on stack for the trampoline, abort.
+ if (trampSizeAllocated < 36)
+ compilerrt_abort();
+
+ // create trampoline
+ // Load realFunc into x17. mov/movk 16 bits at a time.
+ trampOnStack[0] =
+ 0xd2800000u | ((((uint64_t)realFunc >> 0) & 0xffffu) << 5) | 0x11;
+ trampOnStack[1] =
+ 0xf2a00000u | ((((uint64_t)realFunc >> 16) & 0xffffu) << 5) | 0x11;
+ trampOnStack[2] =
+ 0xf2c00000u | ((((uint64_t)realFunc >> 32) & 0xffffu) << 5) | 0x11;
+ trampOnStack[3] =
+ 0xf2e00000u | ((((uint64_t)realFunc >> 48) & 0xffffu) << 5) | 0x11;
+ // Load localsPtr into x18
+ trampOnStack[4] =
+ 0xd2800000u | ((((uint64_t)localsPtr >> 0) & 0xffffu) << 5) | 0x12;
+ trampOnStack[5] =
+ 0xf2a00000u | ((((uint64_t)localsPtr >> 16) & 0xffffu) << 5) | 0x12;
+ trampOnStack[6] =
+ 0xf2c00000u | ((((uint64_t)localsPtr >> 32) & 0xffffu) << 5) | 0x12;
+ trampOnStack[7] =
+ 0xf2e00000u | ((((uint64_t)localsPtr >> 48) & 0xffffu) << 5) | 0x12;
+ trampOnStack[8] = 0xd61f0220; // br x17
+
+ // Clear instruction cache.
+ __clear_cache(trampOnStack, &trampOnStack[9]);
+}
+#endif // defined(__aarch64__) && !defined(__APPLE__) && !defined(_WIN64)
diff --git a/compiler-rt/test/builtins/Unit/trampoline_setup_test.c b/compiler-rt/test/builtins/Unit/trampoline_setup_test.c
index da115fe764271..d51d35acaa02f 100644
--- a/compiler-rt/test/builtins/Unit/trampoline_setup_test.c
+++ b/compiler-rt/test/builtins/Unit/trampoline_setup_test.c
@@ -7,7 +7,7 @@
/*
* Tests nested functions
- * The ppc compiler generates a call to __trampoline_setup
+ * The ppc and aarch64 compilers generates a call to __trampoline_setup
* The i386 and x86_64 compilers generate a call to ___enable_execute_stack
*/
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 87e7750768d2d..6d413a09407a9 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1080,6 +1080,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
// Try to create BICs for vector ANDs.
setTargetDAGCombine(ISD::AND);
+ // llvm.init.trampoline and llvm.adjust.trampoline
+ setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
+ setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
+
// Vector add and sub nodes may conceal a high-half opportunity.
// Also, try to fold ADD into CSINC/CSINV..
setTargetDAGCombine({ISD::ADD, ISD::ABS, ISD::SUB, ISD::XOR, ISD::SINT_TO_FP,
@@ -6688,6 +6692,56 @@ static SDValue LowerFLDEXP(SDValue Op, SelectionDAG &DAG) {
return Final;
}
+SDValue AArch64TargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
+ SelectionDAG &DAG) const {
+ // Note: x18 cannot be used for the Nest parameter on Windows and macOS.
+ if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
+ report_fatal_error(
+ "ADJUST_TRAMPOLINE operation is only supported on Linux.");
+
+ return Op.getOperand(0);
+}
+
+SDValue AArch64TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
+ SelectionDAG &DAG) const {
+
+ // Note: x18 cannot be used for the Nest parameter on Windows and macOS.
+ if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
+ report_fatal_error("INIT_TRAMPOLINE operation is only supported on Linux.");
+
+ SDValue Chain = Op.getOperand(0);
+ SDValue Trmp = Op.getOperand(1); // trampoline
+ SDValue FPtr = Op.getOperand(2); // nested function
+ SDValue Nest = Op.getOperand(3); // 'nest' parameter value
+ SDLoc dl(Op);
+
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+
+ Entry.Ty = IntPtrTy;
+ Entry.Node = Trmp;
+ Args.push_back(Entry);
+ Entry.Node = DAG.getConstant(20, dl, MVT::i64);
+ Args.push_back(Entry);
+
+ Entry.Node = FPtr;
+ Args.push_back(Entry);
+ Entry.Node = Nest;
+ Args.push_back(Entry);
+
+ // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
+ CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
+
+ std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+ return CallResult.second;
+}
+
SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
LLVM_DEBUG(dbgs() << "Custom lowering: ");
@@ -6705,6 +6759,10 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerGlobalTLSAddress(Op, DAG);
case ISD::PtrAuthGlobalAddress:
return LowerPtrAuthGlobalAddress(Op, DAG);
+ case ISD::ADJUST_TRAMPOLINE:
+ return LowerADJUST_TRAMPOLINE(Op, DAG);
+ case ISD::INIT_TRAMPOLINE:
+ return LowerINIT_TRAMPOLINE(Op, DAG);
case ISD::SETCC:
case ISD::STRICT_FSETCC:
case ISD::STRICT_FSETCCS:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index ef45e4f01ecd3..81e15185f985d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -1143,6 +1143,8 @@ class AArch64TargetLowering : public TargetLowering {
SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS,
SDValue TVal, SDValue FVal, const SDLoc &dl,
SelectionDAG &DAG) const;
+ SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBRIND(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/AArch64/trampoline.ll b/llvm/test/CodeGen/AArch64/trampoline.ll
new file mode 100644
index 0000000000000..293e538a7459d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/trampoline.ll
@@ -0,0 +1,19 @@
+; RUN: llc -mtriple=aarch64-- < %s | FileCheck %s
+
+declare void @llvm.init.trampoline(ptr, ptr, ptr);
+declare ptr @llvm.adjust.trampoline(ptr);
+
+define i64 @f(ptr nest %c, i64 %x, i64 %y) {
+ %sum = add i64 %x, %y
+ ret i64 %sum
+}
+
+define i64 @main() {
+ %val = alloca i64
+ %nval = bitcast ptr %val to ptr
+ %tramp = alloca [36 x i8], align 8
+ ; CHECK: bl __trampoline_setup
+ call void @llvm.init.trampoline(ptr %tramp, ptr @f, ptr %nval)
+ %fp = call ptr @llvm.adjust.trampoline(ptr %tramp)
+ ret i64 0
+}
>From 98b2bc5b08802ab0ee79b28e10ed3ea531588d67 Mon Sep 17 00:00:00 2001
From: Carlos Seo <carlos.seo at linaro.org>
Date: Wed, 24 Jul 2024 18:14:05 -0300
Subject: [PATCH 2/2] [Flang][Docs] Update information about AArch64
trampolines (#100391)
Commits c4b66bf and 7647174 add support for AArch64 trampolines. Updated
documentation to reflect the changes.
(cherry picked from commit c6e69b041a7e6d18463f6cf684b10fd46a62c496)
---
flang/docs/InternalProcedureTrampolines.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/flang/docs/InternalProcedureTrampolines.md b/flang/docs/InternalProcedureTrampolines.md
index ef02f1d737c82..41f6155332a47 100644
--- a/flang/docs/InternalProcedureTrampolines.md
+++ b/flang/docs/InternalProcedureTrampolines.md
@@ -239,7 +239,7 @@ automatically deallocated at the end of `host()` invocation.
Unfortunately, this requires the program stack to be writeable and executable
at the same time, which might be a security concern.
-> NOTE: LLVM's AArch64 backend supports `nest` attribute, but it does not seem to support trampoline intrinsics.
+> NOTE: LLVM's AArch64 backend supports `nest` attribute, but it requires the compiler-rt runtime selected via the `-rtlib=compiler-rt` flag.
## Alternative implementation(s)
More information about the llvm-branch-commits
mailing list