[llvm] [PowerPC] Initial support of tail call optimization on AIXDigger/tail call (PR #161690)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 2 08:40:40 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-powerpc
Author: zhijian lin (diggerlin)
<details>
<summary>Changes</summary>
AIX is lacking support of TCO. This PR adds TCO for fastcc calling convention on AIX.
The patch is based on the https://github.com/llvm/llvm-project/pull/70016, thanks for Kai Luo's work
---
Full diff: https://github.com/llvm/llvm-project/pull/161690.diff
6 Files Affected:
- (modified) llvm/lib/Target/PowerPC/PPCFrameLowering.cpp (+6)
- (modified) llvm/lib/Target/PowerPC/PPCISelLowering.cpp (+38-9)
- (modified) llvm/lib/Target/PowerPC/PPCInstr64Bit.td (+2)
- (modified) llvm/lib/Target/PowerPC/PPCInstrInfo.td (+4)
- (added) llvm/test/CodeGen/PowerPC/aix-tailcall-opt.ll (+111)
- (modified) llvm/test/CodeGen/PowerPC/ppc64-sibcall-shrinkwrap.ll (+1-1)
``````````diff
diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 910bc9d281259..efa75fd5681b3 100644
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -1935,6 +1935,9 @@ void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
else if (JumpTarget.isSymbol())
BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
addExternalSymbol(JumpTarget.getSymbolName());
+ else if (JumpTarget.isMCSymbol())
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB))
+ .addSym(JumpTarget.getMCSymbol());
else
llvm_unreachable("Expecting Global or External Symbol");
} else if (RetOpcode == PPC::TCRETURNri) {
@@ -1954,6 +1957,9 @@ void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
else if (JumpTarget.isSymbol())
BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
addExternalSymbol(JumpTarget.getSymbolName());
+ else if (JumpTarget.isMCSymbol())
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8))
+ .addSym(JumpTarget.getMCSymbol());
else
llvm_unreachable("Expecting Global or External Symbol");
} else if (RetOpcode == PPC::TCRETURNri8) {
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index f69218056fc44..a35cdf4e2c282 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -5190,7 +5190,13 @@ bool PPCTargetLowering::IsEligibleForTailCallOptimization(
const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
CallingConv::ID CallerCC, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins) const {
- if (!getTargetMachine().Options.GuaranteedTailCallOpt)
+ bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
+
+ // Enable SCO on AIX.
+ if (!TailCallOpt && !Subtarget.isAIXABI())
+ return false;
+
+ if (DisableSCO)
return false;
// Variable argument functions are not supported.
@@ -5869,6 +5875,7 @@ SDValue PPCTargetLowering::FinishCall(
Callee.getOpcode() == ISD::TargetExternalSymbol ||
Callee.getOpcode() == ISD::TargetGlobalAddress ||
isa<ConstantSDNode>(Callee) ||
+ (Subtarget.isAIXABI() && Callee.getOpcode() == ISD::MCSymbol) ||
(CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
"Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
@@ -7244,8 +7251,10 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
CallConv == CallingConv::Fast) &&
"Unexpected calling convention!");
- if (getTargetMachine().Options.GuaranteedTailCallOpt)
- report_fatal_error("Tail call support is unimplemented on AIX.");
+ if (getTargetMachine().Options.GuaranteedTailCallOpt &&
+ CallConv != CallingConv::Fast)
+ report_fatal_error("Tail call support for non-fastcc calling convention is "
+ "unimplemented on AIX.");
if (useSoftFloat())
report_fatal_error("Soft float support is unimplemented on AIX.");
@@ -7254,6 +7263,9 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
const bool IsPPC64 = Subtarget.isPPC64();
const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
+ // Potential tail calls could cause overwriting of argument stack slots.
+ const bool IsImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
+ (CallConv == CallingConv::Fast));
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
@@ -7319,10 +7331,6 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
// Objects are right-justified because AIX is big-endian.
if (LocSize > ValSize)
CurArgOffset += LocSize - ValSize;
- // Potential tail calls could cause overwriting of argument stack slots.
- const bool IsImmutable =
- !(getTargetMachine().Options.GuaranteedTailCallOpt &&
- (CallConv == CallingConv::Fast));
int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
SDValue ArgValue =
@@ -7616,6 +7624,8 @@ SDValue PPCTargetLowering::LowerCall_AIX(
// The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
const bool IsPPC64 = Subtarget.isPPC64();
+ bool IsSibCall =
+ CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt;
const EVT PtrVT = getPointerTy(DAG.getDataLayout());
const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
@@ -7631,13 +7641,25 @@ SDValue PPCTargetLowering::LowerCall_AIX(
const unsigned NumBytes = std::max<unsigned>(
LinkageSize + MinParameterSaveAreaSize, CCInfo.getStackSize());
+ int SPDiff =
+ IsSibCall ? 0 : CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
+
+ // To protect arguments on the stack from being clobbered in a tail call,
+ // force all the loads to happen before doing any other lowering.
+ if (CFlags.IsTailCall)
+ Chain = DAG.getStackArgumentTokenFactor(Chain);
+
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass.
- Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
+ if (!IsSibCall)
+ Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
SDValue CallSeqStart = Chain;
+ SDValue LROp, FPOp;
+ Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
+ SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
// Set up a copy of the stack pointer for loading and storing any
// arguments that may not fit in the registers available for argument
@@ -7814,6 +7836,7 @@ SDValue PPCTargetLowering::LowerCall_AIX(
}
if (VA.isMemLoc()) {
+ if (!CFlags.IsTailCall) {
SDValue PtrOff =
DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
@@ -7821,6 +7844,9 @@ SDValue PPCTargetLowering::LowerCall_AIX(
DAG.getStore(Chain, dl, Arg, PtrOff,
MachinePointerInfo::getStack(MF, VA.getLocMemOffset()),
Subtarget.getFrameLowering()->getStackAlign()));
+ } else
+ CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff,
+ VA.getLocMemOffset(), TailCallArguments);
continue;
}
@@ -7903,7 +7929,10 @@ SDValue PPCTargetLowering::LowerCall_AIX(
InGlue = Chain.getValue(1);
}
- const int SPDiff = 0;
+ if (CFlags.IsTailCall && !IsSibCall)
+ PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
+ TailCallArguments);
+
return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
Callee, SPDiff, NumBytes, Ins, InVals, CB);
}
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index 269d30318bca8..aa913ae82484e 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -474,6 +474,8 @@ def : Pat<(PPCtc_return (i64 texternalsym:$dst), imm:$imm),
def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm),
(TCRETURNri8 CTRRC8:$dst, imm:$imm)>;
+def : Pat<(PPCtc_return (i64 mcsym:$dst), imm:$imm),
+ (TCRETURNdi8 mcsym:$dst, imm:$imm)>;
// 64-bit CR instructions
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 1c45050cdf9ca..f2ce9db9ab1c9 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -3155,6 +3155,10 @@ def : Pat<(PPCtc_return CTRRC:$dst, imm:$imm),
(TCRETURNri CTRRC:$dst, imm:$imm)>;
def : Pat<(int_ppc_fence), (FENCE)>;
+
+def : Pat<(PPCtc_return (i32 mcsym:$dst), imm:$imm),
+ (TCRETURNdi mcsym:$dst, imm:$imm)>;
+
def : Pat<(int_ppc_readflm), (MFFS)>;
def : Pat<(int_ppc_mffsl), (MFFSL)>;
diff --git a/llvm/test/CodeGen/PowerPC/aix-tailcall-opt.ll b/llvm/test/CodeGen/PowerPC/aix-tailcall-opt.ll
new file mode 100644
index 0000000000000..a23fd2a8ae2c0
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-tailcall-opt.ll
@@ -0,0 +1,111 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix < %s | FileCheck --check-prefix=AIX-32 %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix < %s | FileCheck --check-prefix=AIX-64 %s
+
+define hidden fastcc i32 @k(i32 %a, i32 %b) {
+; AIX-32-LABEL: k:
+; AIX-32: # %bb.0: # %entry
+; AIX-32-NEXT: add 3, 3, 4
+; AIX-32-NEXT: blr
+;
+; AIX-64-LABEL: k:
+; AIX-64: # %bb.0: # %entry
+; AIX-64-NEXT: add 3, 3, 4
+; AIX-64-NEXT: blr
+entry:
+ %c = add i32 %a, %b
+ ret i32 %c
+}
+
+define hidden fastcc i32 @ff(i32 %a) {
+; AIX-32-LABEL: ff:
+; AIX-32: # %bb.0: # %entry
+; AIX-32-NEXT: blr
+;
+; AIX-64-LABEL: ff:
+; AIX-64: # %bb.0: # %entry
+; AIX-64-NEXT: blr
+entry:
+ ret i32 %a
+}
+
+define fastcc i32 @f(i32 %a, i32 %b) {
+; AIX-32-LABEL: f:
+; AIX-32: # %bb.0: # %entry
+; AIX-32-NEXT: b .ff
+; AIX-32-NEXT: #TC_RETURNd .ff 0
+;
+; AIX-64-LABEL: f:
+; AIX-64: # %bb.0: # %entry
+; AIX-64-NEXT: clrldi 3, 3, 32
+; AIX-64-NEXT: b .ff
+; AIX-64-NEXT: #TC_RETURNd8 .ff 0
+entry:
+ %r = tail call fastcc i32 @ff(i32 %a)
+ ret i32 %r
+}
+
+define fastcc i32 @kk(i32 %a) {
+; AIX-32-LABEL: kk:
+; AIX-32: # %bb.0: # %entry
+; AIX-32-NEXT: li 4, 1024
+; AIX-32-NEXT: b .k
+; AIX-32-NEXT: #TC_RETURNd .k 0
+;
+; AIX-64-LABEL: kk:
+; AIX-64: # %bb.0: # %entry
+; AIX-64-NEXT: clrldi 3, 3, 32
+; AIX-64-NEXT: li 4, 1024
+; AIX-64-NEXT: b .k
+; AIX-64-NEXT: #TC_RETURNd8 .k 0
+entry:
+ %r = tail call fastcc i32 @k(i32 %a, i32 1024)
+ ret i32 %r
+}
+
+define fastcc i32 @g(i32 %a) {
+; AIX-32-LABEL: g:
+; AIX-32: # %bb.0: # %entry
+; AIX-32-NEXT: b .ff
+; AIX-32-NEXT: #TC_RETURNd .ff 0
+;
+; AIX-64-LABEL: g:
+; AIX-64: # %bb.0: # %entry
+; AIX-64-NEXT: clrldi 3, 3, 32
+; AIX-64-NEXT: b .ff
+; AIX-64-NEXT: #TC_RETURNd8 .ff 0
+entry:
+ %r = tail call fastcc i32 @ff(i32 %a)
+ ret i32 %r
+}
+
+define fastcc i32 @gg(i32 %a) {
+; AIX-32-LABEL: gg:
+; AIX-32: # %bb.0: # %entry
+; AIX-32-NEXT: mflr 0
+; AIX-32-NEXT: stwu 1, -64(1)
+; AIX-32-NEXT: stw 0, 72(1)
+; AIX-32-NEXT: bl .ff
+; AIX-32-NEXT: addi 3, 3, 1
+; AIX-32-NEXT: addi 1, 1, 64
+; AIX-32-NEXT: lwz 0, 8(1)
+; AIX-32-NEXT: mtlr 0
+; AIX-32-NEXT: blr
+;
+; AIX-64-LABEL: gg:
+; AIX-64: # %bb.0: # %entry
+; AIX-64-NEXT: mflr 0
+; AIX-64-NEXT: stdu 1, -112(1)
+; AIX-64-NEXT: clrldi 3, 3, 32
+; AIX-64-NEXT: std 0, 128(1)
+; AIX-64-NEXT: bl .ff
+; AIX-64-NEXT: addi 3, 3, 1
+; AIX-64-NEXT: addi 1, 1, 112
+; AIX-64-NEXT: ld 0, 16(1)
+; AIX-64-NEXT: mtlr 0
+; AIX-64-NEXT: blr
+entry:
+ %r = tail call fastcc i32 @ff(i32 %a)
+ %r.0 = add i32 %r, 1
+ ret i32 %r.0
+}
diff --git a/llvm/test/CodeGen/PowerPC/ppc64-sibcall-shrinkwrap.ll b/llvm/test/CodeGen/PowerPC/ppc64-sibcall-shrinkwrap.ll
index 1a3aaaec037ab..d0a7444e64458 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-sibcall-shrinkwrap.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-sibcall-shrinkwrap.ll
@@ -36,7 +36,7 @@ exit:
; CHECK-SCO-SR: stdu 1, -{{[0-9]+}}(1)
; CHECK-SCO-SR: bl __assert_fail
-; CHECK-AIX: LLVM ERROR: Tail call support is unimplemented on AIX.
+; CHECK-AIX: LLVM ERROR: Tail call support for non-fastcc calling convention is unimplemented on AIX.
}
define dso_local fastcc i8 @LVComputationKind(
``````````
</details>
https://github.com/llvm/llvm-project/pull/161690
More information about the llvm-commits
mailing list