[llvm] 6971c1b - [LoongArch] Add support for tail call optimization
via llvm-commits
llvm-commits at lists.llvm.org
Sat Nov 19 01:36:22 PST 2022
Author: wanglei
Date: 2022-11-19T17:36:06+08:00
New Revision: 6971c1b3702aba1fea5d3497258df21fb33922a7
URL: https://github.com/llvm/llvm-project/commit/6971c1b3702aba1fea5d3497258df21fb33922a7
DIFF: https://github.com/llvm/llvm-project/commit/6971c1b3702aba1fea5d3497258df21fb33922a7.diff
LOG: [LoongArch] Add support for tail call optimization
This patch adds tail call support to the LoongArch backend. When
appropriate, use the `b` or `jr` instruction for tail calls (the
`pcalau12i+jirl` instruction pair when use medium codemodel).
This patch also modifies the inappropriate operand name:
simm26_bl -> simm26_symbol
This has been modeled after RISCV's tail call opt.
Reviewed By: SixWeining
Differential Revision: https://reviews.llvm.org/D137889
Added:
llvm/test/CodeGen/LoongArch/tail-calls.ll
Modified:
llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp
llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
llvm/lib/Target/LoongArch/LoongArchISelLowering.h
llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td
llvm/test/CodeGen/LoongArch/codemodel-medium.ll
llvm/test/CodeGen/LoongArch/nomerge.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp
index d99b7757166c..bad39dc3a14f 100644
--- a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp
@@ -77,7 +77,8 @@ class LoongArchPreRAExpandPseudo : public MachineFunctionPass {
MachineBasicBlock::iterator &NextMBBI);
bool expandFunctionCALL(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- MachineBasicBlock::iterator &NextMBBI);
+ MachineBasicBlock::iterator &NextMBBI,
+ bool IsTailCall);
};
char LoongArchPreRAExpandPseudo::ID = 0;
@@ -121,7 +122,9 @@ bool LoongArchPreRAExpandPseudo::expandMI(
case LoongArch::PseudoLA_TLS_GD:
return expandLoadAddressTLSGD(MBB, MBBI, NextMBBI);
case LoongArch::PseudoCALL:
- return expandFunctionCALL(MBB, MBBI, NextMBBI);
+ return expandFunctionCALL(MBB, MBBI, NextMBBI, /*IsTailCall=*/false);
+ case LoongArch::PseudoTAIL:
+ return expandFunctionCALL(MBB, MBBI, NextMBBI, /*IsTailCall=*/true);
}
return false;
}
@@ -247,27 +250,43 @@ bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSGD(
bool LoongArchPreRAExpandPseudo::expandFunctionCALL(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
- MachineBasicBlock::iterator &NextMBBI) {
+ MachineBasicBlock::iterator &NextMBBI, bool IsTailCall) {
MachineFunction *MF = MBB.getParent();
MachineInstr &MI = *MBBI;
DebugLoc DL = MI.getDebugLoc();
const MachineOperand &Func = MI.getOperand(0);
MachineInstrBuilder CALL;
+ unsigned Opcode;
switch (MF->getTarget().getCodeModel()) {
default:
report_fatal_error("Unsupported code model");
break;
- case CodeModel::Small: // Default CodeModel.
- CALL = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::BL)).add(Func);
+ case CodeModel::Small: {
+ // CALL:
+ // bl func
+ // TAIL:
+ // b func
+ Opcode = IsTailCall ? LoongArch::PseudoB_TAIL : LoongArch::BL;
+ CALL = BuildMI(MBB, MBBI, DL, TII->get(Opcode)).add(Func);
break;
+ }
case CodeModel::Medium: {
+ // CALL:
// pcalau12i $ra, %pc_hi20(func)
// jirl $ra, $ra, %pc_lo12(func)
+ // TAIL:
+ // pcalau12i $scratch, %pc_hi20(func)
+ // jirl $r0, $scratch, %pc_lo12(func)
+ Opcode =
+ IsTailCall ? LoongArch::PseudoJIRL_TAIL : LoongArch::PseudoJIRL_CALL;
+ Register ScratchReg =
+ IsTailCall
+ ? MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass)
+ : LoongArch::R1;
MachineInstrBuilder MIB =
- BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PCALAU12I), LoongArch::R1);
- CALL = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PseudoJIRL_CALL))
- .addReg(LoongArch::R1);
+ BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PCALAU12I), ScratchReg);
+ CALL = BuildMI(MBB, MBBI, DL, TII->get(Opcode)).addReg(ScratchReg);
if (Func.isSymbol()) {
const char *FnName = Func.getSymbolName();
MIB.addExternalSymbol(FnName, LoongArchII::MO_PCREL_HI);
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index f0386949f611..eec32fd49054 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -31,6 +31,8 @@ using namespace llvm;
#define DEBUG_TYPE "loongarch-isel-lowering"
+STATISTIC(NumTailCalls, "Number of tail calls");
+
static cl::opt<bool> ZeroDivCheck(
"loongarch-check-zero-division", cl::Hidden,
cl::desc("Trap on integer division by zero."),
@@ -1334,6 +1336,7 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
// TODO: Add more target-dependent nodes later.
NODE_NAME_CASE(CALL)
NODE_NAME_CASE(RET)
+ NODE_NAME_CASE(TAIL)
NODE_NAME_CASE(SLL_W)
NODE_NAME_CASE(SRA_W)
NODE_NAME_CASE(SRL_W)
@@ -1808,6 +1811,48 @@ SDValue LoongArchTargetLowering::LowerFormalArguments(
return Chain;
}
+// Check whether the call is eligible for tail call optimization.
+bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
+ CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
+ const SmallVectorImpl<CCValAssign> &ArgLocs) const {
+
+ auto CalleeCC = CLI.CallConv;
+ auto &Outs = CLI.Outs;
+ auto &Caller = MF.getFunction();
+ auto CallerCC = Caller.getCallingConv();
+
+ // Do not tail call opt if the stack is used to pass parameters.
+ if (CCInfo.getNextStackOffset() != 0)
+ return false;
+
+ // Do not tail call opt if any parameters need to be passed indirectly.
+ for (auto &VA : ArgLocs)
+ if (VA.getLocInfo() == CCValAssign::Indirect)
+ return false;
+
+ // Do not tail call opt if either caller or callee uses struct return
+ // semantics.
+ auto IsCallerStructRet = Caller.hasStructRetAttr();
+ auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
+ if (IsCallerStructRet || IsCalleeStructRet)
+ return false;
+
+ // Do not tail call opt if either the callee or caller has a byval argument.
+ for (auto &Arg : Outs)
+ if (Arg.Flags.isByVal())
+ return false;
+
+ // The callee has to preserve all registers the caller needs to preserve.
+ const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
+ const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
+ if (CalleeCC != CallerCC) {
+ const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
+ if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
+ return false;
+ }
+ return true;
+}
+
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) {
return DAG.getDataLayout().getPrefTypeAlign(
VT.getTypeForEVT(*DAG.getContext()));
@@ -1829,7 +1874,7 @@ LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI,
bool IsVarArg = CLI.IsVarArg;
EVT PtrVT = getPointerTy(DAG.getDataLayout());
MVT GRLenVT = Subtarget.getGRLenVT();
- CLI.IsTailCall = false;
+ bool &IsTailCall = CLI.IsTailCall;
MachineFunction &MF = DAG.getMachineFunction();
@@ -1839,6 +1884,16 @@ LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI,
analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
+ // Check if it's really possible to do a tail call.
+ if (IsTailCall)
+ IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
+
+ if (IsTailCall)
+ ++NumTailCalls;
+ else if (CLI.CB && CLI.CB->isMustTailCall())
+ report_fatal_error("failed to perform tail call elimination on a call "
+ "site marked musttail");
+
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = ArgCCInfo.getNextStackOffset();
@@ -1860,12 +1915,13 @@ LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI,
Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
/*IsVolatile=*/false,
- /*AlwaysInline=*/false, /*isTailCall=*/false,
+ /*AlwaysInline=*/false, /*isTailCall=*/IsTailCall,
MachinePointerInfo(), MachinePointerInfo());
ByValArgs.push_back(FIPtr);
}
- Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
+ if (!IsTailCall)
+ Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
// Copy argument values to their designated locations.
SmallVector<std::pair<Register, SDValue>> RegsToPass;
@@ -1932,6 +1988,8 @@ LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI,
RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
} else {
assert(VA.isMemLoc() && "Argument not register or memory");
+ assert(!IsTailCall && "Tail call not allowed if stack is used "
+ "for passing parameters");
// Work out the address of the stack slot.
if (!StackPtr.getNode())
@@ -1986,11 +2044,13 @@ LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI,
for (auto &Reg : RegsToPass)
Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
- // Add a register mask operand representing the call-preserved registers.
- const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
- const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
- assert(Mask && "Missing call preserved mask for calling convention");
- Ops.push_back(DAG.getRegisterMask(Mask));
+ if (!IsTailCall) {
+ // Add a register mask operand representing the call-preserved registers.
+ const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
+ const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(DAG.getRegisterMask(Mask));
+ }
// Glue the call to the argument copies, if any.
if (Glue.getNode())
@@ -1999,6 +2059,11 @@ LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI,
// Emit the call.
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ if (IsTailCall) {
+ MF.getFrameInfo().setHasTailCall();
+ return DAG.getNode(LoongArchISD::TAIL, DL, NodeTys, Ops);
+ }
+
Chain = DAG.getNode(LoongArchISD::CALL, DL, NodeTys, Ops);
DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
Glue = Chain.getValue(1);
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index 4b7bf9d9c699..e181c104b1e1 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -29,6 +29,8 @@ enum NodeType : unsigned {
// TODO: add more LoongArchISDs
CALL,
RET,
+ TAIL,
+
// 32-bit shifts, directly matching the semantics of the named LoongArch
// instructions.
SLL_W,
@@ -204,6 +206,10 @@ class LoongArchTargetLowering : public TargetLowering {
void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
+
+ bool isEligibleForTailCallOptimization(
+ CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
+ const SmallVectorImpl<CCValAssign> &ArgLocs) const;
};
} // end namespace llvm
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
index 885c4d75f0b9..84b9f2c29e5a 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -50,6 +50,9 @@ def loongarch_call : SDNode<"LoongArchISD::CALL", SDT_LoongArchCall,
SDNPVariadic]>;
def loongarch_ret : SDNode<"LoongArchISD::RET", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+def loongarch_tail : SDNode<"LoongArchISD::TAIL", SDT_LoongArchCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
def loongarch_sll_w : SDNode<"LoongArchISD::SLL_W", SDT_LoongArchIntBinOpW>;
def loongarch_sra_w : SDNode<"LoongArchISD::SRA_W", SDT_LoongArchIntBinOpW>;
def loongarch_srl_w : SDNode<"LoongArchISD::SRL_W", SDT_LoongArchIntBinOpW>;
@@ -232,8 +235,8 @@ def SImm26OperandBL: AsmOperandClass {
let ParserMethod = "parseSImm26Operand";
}
-// A symbol or an imm used in BL/PseudoCALL.
-def simm26_bl : Operand<GRLenVT> {
+// A symbol or an imm used in BL/PseudoCALL/PseudoTAIL.
+def simm26_symbol : Operand<GRLenVT> {
let ParserMatchClass = SImm26OperandBL;
let EncoderMethod = "getImmOpValueAsr2";
let DecoderMethod = "decodeSImmOperand<26, 2>";
@@ -455,7 +458,7 @@ def BNEZ : BrCCZ_1RI21<0b010001, "bnez">;
def B : Br_I26<0b010100, "b">;
let isCall = 1, Defs=[R1] in
-def BL : FmtI26<0b010101, (outs), (ins simm26_bl:$imm26), "bl", "$imm26">;
+def BL : FmtI26<0b010101, (outs), (ins simm26_symbol:$imm26), "bl", "$imm26">;
def JIRL : Fmt2RI16<0b010011, (outs GPR:$rd),
(ins GPR:$rj, simm16_lsl2:$imm16), "jirl",
"$rd, $rj, $imm16">;
@@ -934,7 +937,7 @@ def : Pat<(brind (add GPR:$rj, simm16_lsl2:$imm16)),
(PseudoBRIND GPR:$rj, simm16_lsl2:$imm16)>;
let isCall = 1, Defs = [R1] in
-def PseudoCALL : Pseudo<(outs), (ins simm26_bl:$func)>;
+def PseudoCALL : Pseudo<(outs), (ins simm26_symbol:$func)>;
def : Pat<(loongarch_call tglobaladdr:$func), (PseudoCALL tglobaladdr:$func)>;
def : Pat<(loongarch_call texternalsym:$func), (PseudoCALL texternalsym:$func)>;
@@ -953,6 +956,28 @@ let isBarrier = 1, isReturn = 1, isTerminator = 1 in
def PseudoRET : Pseudo<(outs), (ins), [(loongarch_ret)]>,
PseudoInstExpansion<(JIRL R0, R1, 0)>;
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [R3] in
+def PseudoTAIL : Pseudo<(outs), (ins simm26_symbol:$dst)>;
+
+def : Pat<(loongarch_tail (iPTR tglobaladdr:$dst)),
+ (PseudoTAIL tglobaladdr:$dst)>;
+def : Pat<(loongarch_tail (iPTR texternalsym:$dst)),
+ (PseudoTAIL texternalsym:$dst)>;
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [R3] in
+def PseudoTAILIndirect : Pseudo<(outs), (ins GPRT:$rj),
+ [(loongarch_tail GPRT:$rj)]>,
+ PseudoInstExpansion<(JIRL R0, GPR:$rj, 0)>;
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [R3] in
+def PseudoB_TAIL : Pseudo<(outs), (ins simm26_b:$imm26)>,
+ PseudoInstExpansion<(B simm26_b:$imm26)>;
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [R3] in
+def PseudoJIRL_TAIL : Pseudo<(outs), (ins GPR:$rj, simm16_lsl2:$imm16)>,
+ PseudoInstExpansion<(JIRL R0, GPR:$rj,
+ simm16_lsl2:$imm16)>;
+
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
def PseudoLA_PCREL : Pseudo<(outs GPR:$dst), (ins grlenimm:$src)>;
diff --git a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td
index 2a46c6e57a49..ff914f805e5b 100644
--- a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td
@@ -98,6 +98,16 @@ def GPR : RegisterClass<"LoongArch", [GRLenVT], 32, (add
let RegInfos = GRLenRI;
}
+// GPR for indirect tail calls. We can't use callee-saved registers, as they are
+// restored to the saved value before the tail call, which would clobber a call
+// address.
+def GPRT : RegisterClass<"LoongArch", [GRLenVT], 32, (add
+ // a0...a7, t0...t8
+ (sequence "R%u", 4, 20)
+ )> {
+ let RegInfos = GRLenRI;
+}
+
// Floating point registers
let RegAltNameIndices = [RegAliasName] in {
diff --git a/llvm/test/CodeGen/LoongArch/codemodel-medium.ll b/llvm/test/CodeGen/LoongArch/codemodel-medium.ll
index aad38bb81952..d4d97e7df804 100644
--- a/llvm/test/CodeGen/LoongArch/codemodel-medium.ll
+++ b/llvm/test/CodeGen/LoongArch/codemodel-medium.ll
@@ -61,3 +61,19 @@ entry:
call void @llvm.memset.p0.i64(ptr %dst, i8 0, i64 1000, i1 false)
ret void
}
+
+;; Tail call with
diff erent codemodel.
+declare i32 @callee_tail(i32 %i)
+define i32 @caller_tail(i32 %i) nounwind {
+; SMALL-LABEL: caller_tail:
+; SMALL: # %bb.0: # %entry
+; SMALL-NEXT: b %plt(callee_tail)
+;
+; MEDIUM-LABEL: caller_tail:
+; MEDIUM: # %bb.0: # %entry
+; MEDIUM-NEXT: pcalau12i $a1, %pc_hi20(callee_tail)
+; MEDIUM-NEXT: jirl $zero, $a1, %pc_lo12(callee_tail)
+entry:
+ %r = tail call i32 @callee_tail(i32 %i)
+ ret i32 %r
+}
diff --git a/llvm/test/CodeGen/LoongArch/nomerge.ll b/llvm/test/CodeGen/LoongArch/nomerge.ll
index 6c69f0d15675..e4aecd79993e 100644
--- a/llvm/test/CodeGen/LoongArch/nomerge.ll
+++ b/llvm/test/CodeGen/LoongArch/nomerge.ll
@@ -32,4 +32,4 @@ attributes #0 = { nomerge }
; CHECK: .LBB0_3: # %if.then2
; CHECK-NEXT: bl %plt(bar)
; CHECK: .LBB0_4: # %if.end3
-; CHECK: bl %plt(bar)
+; CHECK: b %plt(bar)
diff --git a/llvm/test/CodeGen/LoongArch/tail-calls.ll b/llvm/test/CodeGen/LoongArch/tail-calls.ll
new file mode 100644
index 000000000000..f09b49688263
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/tail-calls.ll
@@ -0,0 +1,187 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s
+
+;; Perform tail call optimization for global address.
+declare i32 @callee_tail(i32 %i)
+define i32 @caller_tail(i32 %i) nounwind {
+; CHECK-LABEL: caller_tail:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: b %plt(callee_tail)
+entry:
+ %r = tail call i32 @callee_tail(i32 %i)
+ ret i32 %r
+}
+
+;; Perform tail call optimization for external symbol.
+ at dest = global [2 x i8] zeroinitializer
+declare void @llvm.memcpy.p0i8.p0i8.i32(ptr, ptr, i32, i1)
+define void @caller_extern(ptr %src) optsize {
+; CHECK-LABEL: caller_extern:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: move $a1, $a0
+; CHECK-NEXT: pcalau12i $a0, %got_pc_hi20(dest)
+; CHECK-NEXT: ld.d $a0, $a0, %got_pc_lo12(dest)
+; CHECK-NEXT: ori $a2, $zero, 7
+; CHECK-NEXT: b %plt(memcpy)
+entry:
+ tail call void @llvm.memcpy.p0i8.p0i8.i32(ptr getelementptr inbounds ([2 x i8], ptr @dest, i32 0, i32 0), ptr %src, i32 7, i1 false)
+ ret void
+}
+
+;; Perform indirect tail call optimization (for function pointer call).
+declare void @callee_indirect1()
+declare void @callee_indirect2()
+define void @caller_indirect_tail(i32 %a) nounwind {
+; CHECK-LABEL: caller_indirect_tail:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: bstrpick.d $a0, $a0, 31, 0
+; CHECK-NEXT: sltui $a0, $a0, 1
+; CHECK-NEXT: pcalau12i $a1, %got_pc_hi20(callee_indirect2)
+; CHECK-NEXT: ld.d $a1, $a1, %got_pc_lo12(callee_indirect2)
+; CHECK-NEXT: masknez $a1, $a1, $a0
+; CHECK-NEXT: pcalau12i $a2, %got_pc_hi20(callee_indirect1)
+; CHECK-NEXT: ld.d $a2, $a2, %got_pc_lo12(callee_indirect1)
+; CHECK-NEXT: maskeqz $a0, $a2, $a0
+; CHECK-NEXT: or $a0, $a0, $a1
+; CHECK-NEXT: jr $a0
+entry:
+ %tobool = icmp eq i32 %a, 0
+ %callee = select i1 %tobool, ptr @callee_indirect1, ptr @callee_indirect2
+ tail call void %callee()
+ ret void
+}
+
+;; Do not tail call optimize functions with varargs passed by stack.
+declare i32 @callee_varargs(i32, ...)
+define void @caller_varargs(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: caller_varargs:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -16
+; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $a0, $sp, 0
+; CHECK-NEXT: move $a2, $a1
+; CHECK-NEXT: move $a3, $a0
+; CHECK-NEXT: move $a4, $a0
+; CHECK-NEXT: move $a5, $a1
+; CHECK-NEXT: move $a6, $a1
+; CHECK-NEXT: move $a7, $a0
+; CHECK-NEXT: bl %plt(callee_varargs)
+; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 16
+; CHECK-NEXT: ret
+entry:
+ %call = tail call i32 (i32, ...) @callee_varargs(i32 %a, i32 %b, i32 %b, i32 %a, i32 %a, i32 %b, i32 %b, i32 %a, i32 %a)
+ ret void
+}
+
+;; Do not tail call optimize if stack is used to pass parameters.
+declare i32 @callee_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i)
+define i32 @caller_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i) nounwind {
+; CHECK-LABEL: caller_args:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -16
+; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; CHECK-NEXT: ld.d $t0, $sp, 16
+; CHECK-NEXT: st.d $t0, $sp, 0
+; CHECK-NEXT: bl %plt(callee_args)
+; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 16
+; CHECK-NEXT: ret
+entry:
+ %r = tail call i32 @callee_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i)
+ ret i32 %r
+}
+
+;; Do not tail call optimize if parameters need to be passed indirectly.
+declare i32 @callee_indirect_args(i256 %a)
+define void @caller_indirect_args() nounwind {
+; CHECK-LABEL: caller_indirect_args:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -48
+; CHECK-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $zero, $sp, 24
+; CHECK-NEXT: st.d $zero, $sp, 16
+; CHECK-NEXT: st.d $zero, $sp, 8
+; CHECK-NEXT: ori $a0, $zero, 1
+; CHECK-NEXT: st.d $a0, $sp, 0
+; CHECK-NEXT: addi.d $a0, $sp, 0
+; CHECK-NEXT: bl %plt(callee_indirect_args)
+; CHECK-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 48
+; CHECK-NEXT: ret
+entry:
+ %call = tail call i32 @callee_indirect_args(i256 1)
+ ret void
+}
+
+;; Do not tail call optimize if byval parameters need to be passed.
+declare i32 @callee_byval(ptr byval(ptr) %a)
+define i32 @caller_byval() nounwind {
+; CHECK-LABEL: caller_byval:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -32
+; CHECK-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
+; CHECK-NEXT: ld.d $a0, $sp, 16
+; CHECK-NEXT: st.d $a0, $sp, 8
+; CHECK-NEXT: addi.d $a0, $sp, 8
+; CHECK-NEXT: bl %plt(callee_byval)
+; CHECK-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 32
+; CHECK-NEXT: ret
+entry:
+ %a = alloca ptr
+ %r = tail call i32 @callee_byval(ptr byval(ptr) %a)
+ ret i32 %r
+}
+
+;; Do not tail call optimize if callee uses structret semantics.
+%struct.A = type { i32 }
+ at a = global %struct.A zeroinitializer
+
+declare void @callee_struct(ptr sret(%struct.A) %a)
+define void @caller_nostruct() nounwind {
+; CHECK-LABEL: caller_nostruct:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -16
+; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; CHECK-NEXT: pcalau12i $a0, %got_pc_hi20(a)
+; CHECK-NEXT: ld.d $a0, $a0, %got_pc_lo12(a)
+; CHECK-NEXT: bl %plt(callee_struct)
+; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 16
+; CHECK-NEXT: ret
+entry:
+ tail call void @callee_struct(ptr sret(%struct.A) @a)
+ ret void
+}
+
+;; Do not tail call optimize if caller uses structret semantics.
+declare void @callee_nostruct()
+define void @caller_struct(ptr sret(%struct.A) %a) nounwind {
+; CHECK-LABEL: caller_struct:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -16
+; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; CHECK-NEXT: bl %plt(callee_nostruct)
+; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 16
+; CHECK-NEXT: ret
+entry:
+ tail call void @callee_nostruct()
+ ret void
+}
+
+;; Do not tail call optimize if disabled.
+define i32 @disable_tail_calls(i32 %i) nounwind "disable-tail-calls"="true" {
+; CHECK-LABEL: disable_tail_calls:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -16
+; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; CHECK-NEXT: bl %plt(callee_tail)
+; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 16
+; CHECK-NEXT: ret
+entry:
+ %rv = tail call i32 @callee_tail(i32 %i)
+ ret i32 %rv
+}
More information about the llvm-commits
mailing list