[llvm] [PowerPC] Support tail call optimization on AIX (PR #70016)
Kai Luo via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 24 01:48:46 PDT 2023
https://github.com/bzEq updated https://github.com/llvm/llvm-project/pull/70016
>From f7165771da7ec3e401cefb3ca1b869b1f27ff7f5 Mon Sep 17 00:00:00 2001
From: Kai Luo <lkail at cn.ibm.com>
Date: Tue, 24 Oct 2023 08:37:25 +0000
Subject: [PATCH 1/2] Initial attempt
---
llvm/lib/Target/PowerPC/PPCFrameLowering.cpp | 6 +++
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 31 +++++++++++----
llvm/lib/Target/PowerPC/PPCInstr64Bit.td | 2 +
llvm/lib/Target/PowerPC/PPCInstrInfo.td | 3 ++
llvm/test/CodeGen/PowerPC/aix-tailcall-opt.ll | 39 +++++++++++++++++++
5 files changed, 74 insertions(+), 7 deletions(-)
create mode 100644 llvm/test/CodeGen/PowerPC/aix-tailcall-opt.ll
diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index d41861ddcc8c6eb..29d07e4ed3add2b 100644
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -1938,6 +1938,9 @@ void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
else if (JumpTarget.isSymbol())
BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
addExternalSymbol(JumpTarget.getSymbolName());
+ else if (JumpTarget.isMCSymbol())
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB))
+ .addSym(JumpTarget.getMCSymbol());
else
llvm_unreachable("Expecting Global or External Symbol");
} else if (RetOpcode == PPC::TCRETURNri) {
@@ -1957,6 +1960,9 @@ void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
else if (JumpTarget.isSymbol())
BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
addExternalSymbol(JumpTarget.getSymbolName());
+ else if (JumpTarget.isMCSymbol())
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8))
+ .addSym(JumpTarget.getMCSymbol());
else
llvm_unreachable("Expecting Global or External Symbol");
} else if (RetOpcode == PPC::TCRETURNri8) {
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 5e0c2d62f5a9cb5..72a9e86458711de 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -5704,6 +5704,7 @@ SDValue PPCTargetLowering::FinishCall(
Callee.getOpcode() == ISD::TargetExternalSymbol ||
Callee.getOpcode() == ISD::TargetGlobalAddress ||
isa<ConstantSDNode>(Callee) ||
+ (Subtarget.isAIXABI() && Callee.getOpcode() == ISD::MCSymbol) ||
(CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
"Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
@@ -7065,7 +7066,8 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
CallConv == CallingConv::Fast) &&
"Unexpected calling convention!");
- if (getTargetMachine().Options.GuaranteedTailCallOpt)
+ if (getTargetMachine().Options.GuaranteedTailCallOpt &&
+ CallConv != CallingConv::Fast)
report_fatal_error("Tail call support is unimplemented on AIX.");
if (useSoftFloat())
@@ -7389,6 +7391,8 @@ SDValue PPCTargetLowering::LowerCall_AIX(
// The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
const bool IsPPC64 = Subtarget.isPPC64();
+ bool IsSibCall =
+ CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt;
const EVT PtrVT = getPointerTy(DAG.getDataLayout());
const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
@@ -7404,13 +7408,19 @@ SDValue PPCTargetLowering::LowerCall_AIX(
const unsigned NumBytes = std::max<unsigned>(
LinkageSize + MinParameterSaveAreaSize, CCInfo.getStackSize());
+ int SPDiff =
+ IsSibCall ? 0 : CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
+
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass.
Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
SDValue CallSeqStart = Chain;
+ SDValue LROp, FPOp;
+ Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
+ SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
// Set up a copy of the stack pointer for loading and storing any
// arguments that may not fit in the registers available for argument
@@ -7587,11 +7597,15 @@ SDValue PPCTargetLowering::LowerCall_AIX(
}
if (VA.isMemLoc()) {
- SDValue PtrOff =
- DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
- PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
- MemOpChains.push_back(
- DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
+ if (!CFlags.IsTailCall) {
+ SDValue PtrOff =
+ DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+ MemOpChains.push_back(
+ DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
+ } else
+ CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, VA.getLocMemOffset(),
+ TailCallArguments);
continue;
}
@@ -7674,7 +7688,10 @@ SDValue PPCTargetLowering::LowerCall_AIX(
InGlue = Chain.getValue(1);
}
- const int SPDiff = 0;
+ if (CFlags.IsTailCall && !IsSibCall)
+ PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
+ TailCallArguments);
+
return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
Callee, SPDiff, NumBytes, Ins, InVals, CB);
}
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index 0322bb37b1fdf8f..a99a347e13824f1 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -474,6 +474,8 @@ def : Pat<(PPCtc_return (i64 texternalsym:$dst), imm:$imm),
def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm),
(TCRETURNri8 CTRRC8:$dst, imm:$imm)>;
+def : Pat<(PPCtc_return (i64 mcsym:$dst), imm:$imm),
+ (TCRETURNdi8 mcsym:$dst, imm:$imm)>;
// 64-bit CR instructions
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index a97062e0c643fb2..002851a1dc91b92 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -3187,6 +3187,9 @@ def : Pat<(PPCtc_return (i32 texternalsym:$dst), imm:$imm),
def : Pat<(PPCtc_return CTRRC:$dst, imm:$imm),
(TCRETURNri CTRRC:$dst, imm:$imm)>;
+def : Pat<(PPCtc_return (i32 mcsym:$dst), imm:$imm),
+ (TCRETURNdi mcsym:$dst, imm:$imm)>;
+
def : Pat<(int_ppc_readflm), (MFFS)>;
def : Pat<(int_ppc_mffsl), (MFFSL)>;
diff --git a/llvm/test/CodeGen/PowerPC/aix-tailcall-opt.ll b/llvm/test/CodeGen/PowerPC/aix-tailcall-opt.ll
new file mode 100644
index 000000000000000..14b38b931dd0280
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-tailcall-opt.ll
@@ -0,0 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc -tailcallopt -verify-machineinstrs -mtriple=powerpc64-ibm-aix < %s | FileCheck --check-prefix=AIX-64
+; RUN: llc -tailcallopt -verify-machineinstrs -mtriple=powerpc-ibm-aix < %s | FileCheck --check-prefix=AIX-32
+
+define hidden fastcc i32 @ff(i32 %a) {
+; AIX-64-LABEL: ff:
+; AIX-64: # %bb.0: # %entry
+; AIX-64-NEXT: addi 1, 1, 112
+; AIX-64-NEXT: blr
+;
+; AIX-32-LABEL: ff:
+; AIX-32: # %bb.0: # %entry
+; AIX-32-NEXT: addi 1, 1, 64
+; AIX-32-NEXT: blr
+entry:
+ ret i32 %a
+}
+
+define fastcc i32 @f(i32 %a, i32 %b) {
+; AIX-64-LABEL: f:
+; AIX-64: # %bb.0: # %entry
+; AIX-64-NEXT: stdu 1, -112(1)
+; AIX-64-NEXT: clrldi 3, 3, 32
+; AIX-64-NEXT: addi 1, 1, 112
+; AIX-64-NEXT: b .ff
+; AIX-64-NEXT: #TC_RETURNd8 .ff 0
+;
+; AIX-32-LABEL: f:
+; AIX-32: # %bb.0: # %entry
+; AIX-32-NEXT: stwu 1, -64(1)
+; AIX-32-NEXT: lwz 4, 72(1)
+; AIX-32-NEXT: stw 4, 80(1)
+; AIX-32-NEXT: addi 1, 1, 72
+; AIX-32-NEXT: b .ff
+; AIX-32-NEXT: #TC_RETURNd .ff 8
+entry:
+ %r = tail call fastcc i32 @ff(i32 %a)
+ ret i32 %r
+}
>From 385dc75abc20b91a5173e7d39c342af235a58197 Mon Sep 17 00:00:00 2001
From: Kai Luo <lkail at cn.ibm.com>
Date: Tue, 24 Oct 2023 08:48:33 +0000
Subject: [PATCH 2/2] Compare w/o TCO
---
llvm/test/CodeGen/PowerPC/aix-tailcall-opt.ll | 67 ++++++++++++++-----
1 file changed, 50 insertions(+), 17 deletions(-)
diff --git a/llvm/test/CodeGen/PowerPC/aix-tailcall-opt.ll b/llvm/test/CodeGen/PowerPC/aix-tailcall-opt.ll
index 14b38b931dd0280..1274b0d329c0676 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tailcall-opt.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tailcall-opt.ll
@@ -1,38 +1,71 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
-; RUN: llc -tailcallopt -verify-machineinstrs -mtriple=powerpc64-ibm-aix < %s | FileCheck --check-prefix=AIX-64
-; RUN: llc -tailcallopt -verify-machineinstrs -mtriple=powerpc-ibm-aix < %s | FileCheck --check-prefix=AIX-32
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix < %s | FileCheck --check-prefix=AIX-32
+; RUN: llc -tailcallopt -verify-machineinstrs -mtriple=powerpc-ibm-aix < %s | FileCheck --check-prefix=AIX-32-TCO
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix < %s | FileCheck --check-prefix=AIX-64
+; RUN: llc -tailcallopt -verify-machineinstrs -mtriple=powerpc64-ibm-aix < %s | FileCheck --check-prefix=AIX-64-TCO
define hidden fastcc i32 @ff(i32 %a) {
+; AIX-32-LABEL: ff:
+; AIX-32: # %bb.0: # %entry
+; AIX-32-NEXT: blr
+;
+; AIX-32-TCO-LABEL: ff:
+; AIX-32-TCO: # %bb.0: # %entry
+; AIX-32-TCO-NEXT: addi 1, 1, 64
+; AIX-32-TCO-NEXT: blr
+;
; AIX-64-LABEL: ff:
; AIX-64: # %bb.0: # %entry
-; AIX-64-NEXT: addi 1, 1, 112
; AIX-64-NEXT: blr
;
-; AIX-32-LABEL: ff:
-; AIX-32: # %bb.0: # %entry
-; AIX-32-NEXT: addi 1, 1, 64
-; AIX-32-NEXT: blr
+; AIX-64-TCO-LABEL: ff:
+; AIX-64-TCO: # %bb.0: # %entry
+; AIX-64-TCO-NEXT: addi 1, 1, 112
+; AIX-64-TCO-NEXT: blr
entry:
ret i32 %a
}
define fastcc i32 @f(i32 %a, i32 %b) {
+; AIX-32-LABEL: f:
+; AIX-32: # %bb.0: # %entry
+; AIX-32-NEXT: mflr 0
+; AIX-32-NEXT: stwu 1, -64(1)
+; AIX-32-NEXT: stw 0, 72(1)
+; AIX-32-NEXT: bl .ff
+; AIX-32-NEXT: addi 1, 1, 64
+; AIX-32-NEXT: lwz 0, 8(1)
+; AIX-32-NEXT: mtlr 0
+; AIX-32-NEXT: blr
+;
+; AIX-32-TCO-LABEL: f:
+; AIX-32-TCO: # %bb.0: # %entry
+; AIX-32-TCO-NEXT: stwu 1, -64(1)
+; AIX-32-TCO-NEXT: lwz 4, 72(1)
+; AIX-32-TCO-NEXT: stw 4, 80(1)
+; AIX-32-TCO-NEXT: addi 1, 1, 72
+; AIX-32-TCO-NEXT: b .ff
+; AIX-32-TCO-NEXT: #TC_RETURNd .ff 8
+;
; AIX-64-LABEL: f:
; AIX-64: # %bb.0: # %entry
+; AIX-64-NEXT: mflr 0
; AIX-64-NEXT: stdu 1, -112(1)
; AIX-64-NEXT: clrldi 3, 3, 32
+; AIX-64-NEXT: std 0, 128(1)
+; AIX-64-NEXT: bl .ff
; AIX-64-NEXT: addi 1, 1, 112
-; AIX-64-NEXT: b .ff
-; AIX-64-NEXT: #TC_RETURNd8 .ff 0
+; AIX-64-NEXT: ld 0, 16(1)
+; AIX-64-NEXT: mtlr 0
+; AIX-64-NEXT: blr
;
-; AIX-32-LABEL: f:
-; AIX-32: # %bb.0: # %entry
-; AIX-32-NEXT: stwu 1, -64(1)
-; AIX-32-NEXT: lwz 4, 72(1)
-; AIX-32-NEXT: stw 4, 80(1)
-; AIX-32-NEXT: addi 1, 1, 72
-; AIX-32-NEXT: b .ff
-; AIX-32-NEXT: #TC_RETURNd .ff 8
+; AIX-64-TCO-LABEL: f:
+; AIX-64-TCO: # %bb.0: # %entry
+; AIX-64-TCO-NEXT: stdu 1, -112(1)
+; AIX-64-TCO-NEXT: clrldi 3, 3, 32
+; AIX-64-TCO-NEXT: addi 1, 1, 112
+; AIX-64-TCO-NEXT: b .ff
+; AIX-64-TCO-NEXT: #TC_RETURNd8 .ff 0
entry:
%r = tail call fastcc i32 @ff(i32 %a)
ret i32 %r
More information about the llvm-commits
mailing list