[llvm] 1c235c3 - [Sparc] Add tail call support
Daniel Cederman via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 8 04:52:13 PST 2022
Author: Daniel Cederman
Date: 2022-03-08T13:50:54+01:00
New Revision: 1c235c375492180c2eecb6331f169486019fd2d2
URL: https://github.com/llvm/llvm-project/commit/1c235c375492180c2eecb6331f169486019fd2d2
DIFF: https://github.com/llvm/llvm-project/commit/1c235c375492180c2eecb6331f169486019fd2d2.diff
LOG: [Sparc] Add tail call support
This patch adds tail call support to the 32-bit Sparc backend.
Two new instructions are defined, TAIL_CALL and TAIL_CALLri. They are
encoded the same as CALL and BINDri, but are marked with isReturn so
that the epilogue gets emitted. In contrast to CALL, TAIL_CALL is not
marked with isCall. This makes it possible to use the leaf function
optimization when the only call a function makes is a tail call.
TAIL_CALL modifies the return address in %o7, so for leaf functions
the value in %o7 needs to be restored after the call. For normal
functions which uses the restore instruction this is not necessary.
Reviewed By: koakuma
Differential Revision: https://reviews.llvm.org/D51206
Added:
llvm/test/CodeGen/SPARC/tailcall.ll
Modified:
llvm/lib/Target/Sparc/DelaySlotFiller.cpp
llvm/lib/Target/Sparc/SparcCallingConv.td
llvm/lib/Target/Sparc/SparcFrameLowering.cpp
llvm/lib/Target/Sparc/SparcISelLowering.cpp
llvm/lib/Target/Sparc/SparcISelLowering.h
llvm/lib/Target/Sparc/SparcInstrInfo.td
llvm/test/CodeGen/SPARC/2011-01-11-Call.ll
llvm/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/Sparc/DelaySlotFiller.cpp b/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
index 259b379541839..cc132d46de856 100644
--- a/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
+++ b/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -174,17 +174,20 @@ Filler::findDelayInstr(MachineBasicBlock &MBB,
if (slot == MBB.begin())
return MBB.end();
- if (slot->getOpcode() == SP::RET || slot->getOpcode() == SP::TLS_CALL)
+ unsigned Opc = slot->getOpcode();
+
+ if (Opc == SP::RET || Opc == SP::TLS_CALL)
return MBB.end();
- if (slot->getOpcode() == SP::RETL) {
+ if (Opc == SP::RETL || Opc == SP::TAIL_CALL || Opc == SP::TAIL_CALLri) {
MachineBasicBlock::iterator J = slot;
--J;
if (J->getOpcode() == SP::RESTORErr
|| J->getOpcode() == SP::RESTOREri) {
// change retl to ret.
- slot->setDesc(Subtarget->getInstrInfo()->get(SP::RET));
+ if (Opc == SP::RETL)
+ slot->setDesc(Subtarget->getInstrInfo()->get(SP::RET));
return J;
}
}
@@ -360,6 +363,8 @@ bool Filler::needsUnimp(MachineBasicBlock::iterator I, unsigned &StructSize)
case SP::CALLrr:
case SP::CALLri: structSizeOpNum = 2; break;
case SP::TLS_CALL: return false;
+ case SP::TAIL_CALLri:
+ case SP::TAIL_CALL: return false;
}
const MachineOperand &MO = I->getOperand(structSizeOpNum);
diff --git a/llvm/lib/Target/Sparc/SparcCallingConv.td b/llvm/lib/Target/Sparc/SparcCallingConv.td
index db540d6f0c425..e6d23f741ea5f 100644
--- a/llvm/lib/Target/Sparc/SparcCallingConv.td
+++ b/llvm/lib/Target/Sparc/SparcCallingConv.td
@@ -134,7 +134,7 @@ def RetCC_Sparc64 : CallingConv<[
// Callee-saved registers are handled by the register window mechanism.
def CSR : CalleeSavedRegs<(add)> {
let OtherPreserved = (add (sequence "I%u", 0, 7),
- (sequence "L%u", 0, 7));
+ (sequence "L%u", 0, 7), O6);
}
// Callee-saved registers for calls with ReturnsTwice attribute.
diff --git a/llvm/lib/Target/Sparc/SparcFrameLowering.cpp b/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
index a740de9123c97..3e08b03b984e7 100644
--- a/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
+++ b/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -218,8 +218,9 @@ void SparcFrameLowering::emitEpilogue(MachineFunction &MF,
const SparcInstrInfo &TII =
*static_cast<const SparcInstrInfo *>(MF.getSubtarget().getInstrInfo());
DebugLoc dl = MBBI->getDebugLoc();
- assert(MBBI->getOpcode() == SP::RETL &&
- "Can only put epilog before 'retl' instruction!");
+ assert((MBBI->getOpcode() == SP::RETL || MBBI->getOpcode() == SP::TAIL_CALL ||
+ MBBI->getOpcode() == SP::TAIL_CALLri) &&
+ "Can only put epilog before 'retl' or 'tail_call' instruction!");
if (!FuncInfo->isLeafProc()) {
BuildMI(MBB, MBBI, dl, TII.get(SP::RESTORErr), SP::G0).addReg(SP::G0)
.addReg(SP::G0);
@@ -228,10 +229,19 @@ void SparcFrameLowering::emitEpilogue(MachineFunction &MF,
MachineFrameInfo &MFI = MF.getFrameInfo();
int NumBytes = (int) MFI.getStackSize();
- if (NumBytes == 0)
- return;
-
- emitSPAdjustment(MF, MBB, MBBI, NumBytes, SP::ADDrr, SP::ADDri);
+ if (NumBytes != 0)
+ emitSPAdjustment(MF, MBB, MBBI, NumBytes, SP::ADDrr, SP::ADDri);
+
+ // Preserve return address in %o7
+ if (MBBI->getOpcode() == SP::TAIL_CALL) {
+ MBB.addLiveIn(SP::O7);
+ BuildMI(MBB, MBBI, dl, TII.get(SP::ORrr), SP::G1)
+ .addReg(SP::G0)
+ .addReg(SP::O7);
+ BuildMI(MBB, MBBI, dl, TII.get(SP::ORrr), SP::O7)
+ .addReg(SP::G0)
+ .addReg(SP::G1);
+ }
}
bool SparcFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
index 501bb4d4910f6..ef0c72dacd1cf 100644
--- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -710,6 +710,36 @@ static bool hasReturnsTwiceAttr(SelectionDAG &DAG, SDValue Callee,
return CalleeFn->hasFnAttribute(Attribute::ReturnsTwice);
}
+/// IsEligibleForTailCallOptimization - Check whether the call is eligible
+/// for tail call optimization.
+bool SparcTargetLowering::IsEligibleForTailCallOptimization(
+ CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF) const {
+
+ auto &Outs = CLI.Outs;
+ auto &Caller = MF.getFunction();
+
+ // Do not tail call opt functions with "disable-tail-calls" attribute.
+ if (Caller.getFnAttribute("disable-tail-calls").getValueAsString() == "true")
+ return false;
+
+ // Do not tail call opt if the stack is used to pass parameters.
+ if (CCInfo.getNextStackOffset() != 0)
+ return false;
+
+ // Do not tail call opt if either the callee or caller returns
+ // a struct and the other does not.
+ if (!Outs.empty() && Caller.hasStructRetAttr() != Outs[0].Flags.isSRet())
+ return false;
+
+ // Byval parameters hand the function a pointer directly into the stack area
+ // we want to reuse during a tail call.
+ for (auto &Arg : Outs)
+ if (Arg.Flags.isByVal())
+ return false;
+
+ return true;
+}
+
// Lower a call for the 32-bit ABI.
SDValue
SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
@@ -725,15 +755,15 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
CallingConv::ID CallConv = CLI.CallConv;
bool isVarArg = CLI.IsVarArg;
- // Sparc target does not yet support tail call optimization.
- isTailCall = false;
-
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
*DAG.getContext());
CCInfo.AnalyzeCallOperands(Outs, CC_Sparc32);
+ isTailCall = isTailCall && IsEligibleForTailCallOptimization(
+ CCInfo, CLI, DAG.getMachineFunction());
+
// Get the size of the outgoing arguments stack space requirement.
unsigned ArgsSize = CCInfo.getNextStackOffset();
@@ -771,7 +801,10 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
}
}
- Chain = DAG.getCALLSEQ_START(Chain, ArgsSize, 0, dl);
+ assert(!isTailCall || ArgsSize == 0);
+
+ if (!isTailCall)
+ Chain = DAG.getCALLSEQ_START(Chain, ArgsSize, 0, dl);
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
@@ -816,6 +849,10 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
if (Flags.isSRet()) {
assert(VA.needsCustom());
+
+ if (isTailCall)
+ continue;
+
// store SRet argument in %sp+64
SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
SDValue PtrOff = DAG.getIntPtrConstant(64, dl);
@@ -928,7 +965,9 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
// stuck together.
SDValue InFlag;
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
- Register Reg = toCallerWindow(RegsToPass[i].first);
+ Register Reg = RegsToPass[i].first;
+ if (!isTailCall)
+ Reg = toCallerWindow(Reg);
Chain = DAG.getCopyToReg(Chain, dl, Reg, RegsToPass[i].second, InFlag);
InFlag = Chain.getValue(1);
}
@@ -952,9 +991,12 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
Ops.push_back(Callee);
if (hasStructRetAttr)
Ops.push_back(DAG.getTargetConstant(SRetArgSize, dl, MVT::i32));
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
- Ops.push_back(DAG.getRegister(toCallerWindow(RegsToPass[i].first),
- RegsToPass[i].second.getValueType()));
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Register Reg = RegsToPass[i].first;
+ if (!isTailCall)
+ Reg = toCallerWindow(Reg);
+ Ops.push_back(DAG.getRegister(Reg, RegsToPass[i].second.getValueType()));
+ }
// Add a register mask operand representing the call-preserved registers.
const SparcRegisterInfo *TRI = Subtarget->getRegisterInfo();
@@ -968,6 +1010,11 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
if (InFlag.getNode())
Ops.push_back(InFlag);
+ if (isTailCall) {
+ DAG.getMachineFunction().getFrameInfo().setHasTailCall();
+ return DAG.getNode(SPISD::TAIL_CALL, dl, MVT::Other, Ops);
+ }
+
Chain = DAG.getNode(SPISD::CALL, dl, NodeTys, Ops);
InFlag = Chain.getValue(1);
@@ -1852,6 +1899,7 @@ const char *SparcTargetLowering::getTargetNodeName(unsigned Opcode) const {
case SPISD::TLS_ADD: return "SPISD::TLS_ADD";
case SPISD::TLS_LD: return "SPISD::TLS_LD";
case SPISD::TLS_CALL: return "SPISD::TLS_CALL";
+ case SPISD::TAIL_CALL: return "SPISD::TAIL_CALL";
}
return nullptr;
}
diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.h b/llvm/lib/Target/Sparc/SparcISelLowering.h
index 5c9703823a644..94a5141d95b22 100644
--- a/llvm/lib/Target/Sparc/SparcISelLowering.h
+++ b/llvm/lib/Target/Sparc/SparcISelLowering.h
@@ -44,6 +44,8 @@ namespace llvm {
GLOBAL_BASE_REG, // Global base reg for PIC.
FLUSHW, // FLUSH register windows to stack.
+ TAIL_CALL, // Tail call
+
TLS_ADD, // For Thread Local Storage (TLS).
TLS_LD,
TLS_CALL
@@ -182,6 +184,10 @@ namespace llvm {
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
+ bool IsEligibleForTailCallOptimization(CCState &CCInfo,
+ CallLoweringInfo &CLI,
+ MachineFunction &MF) const;
+
bool ShouldShrinkFPConstant(EVT VT) const override {
// Do not shrink FP constpool if VT == MVT::f128.
// (ldd, call _Q_fdtoq) is more expensive than two ldds.
diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.td b/llvm/lib/Target/Sparc/SparcInstrInfo.td
index 5e305fc9df71a..28a85c7118acc 100644
--- a/llvm/lib/Target/Sparc/SparcInstrInfo.td
+++ b/llvm/lib/Target/Sparc/SparcInstrInfo.td
@@ -248,6 +248,10 @@ def call : SDNode<"SPISD::CALL", SDT_SPCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
+def tailcall : SDNode<"SPISD::TAIL_CALL", SDT_SPCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+
def SDT_SPRet : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
def retflag : SDNode<"SPISD::RET_FLAG", SDT_SPRet,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
@@ -1368,6 +1372,31 @@ let Uses = [O6], isCall = 1, hasDelaySlot = 1 in
}
}
+//===----------------------------------------------------------------------===//
+// Instructions for tail calls.
+//===----------------------------------------------------------------------===//
+let isCodeGenOnly = 1, isReturn = 1, hasDelaySlot = 1,
+ isTerminator = 1, isBarrier = 1 in {
+ def TAIL_CALL : InstSP<(outs), (ins calltarget:$disp, variable_ops),
+ "call $disp",
+ [(tailcall tglobaladdr:$disp)]> {
+ bits<30> disp;
+ let op = 1;
+ let Inst{29-0} = disp;
+ }
+}
+
+def : Pat<(tailcall (iPTR texternalsym:$dst)),
+ (TAIL_CALL texternalsym:$dst)>;
+
+let isCodeGenOnly = 1, isReturn = 1, hasDelaySlot = 1, isTerminator = 1,
+ isBarrier = 1, rd = 0 in {
+ def TAIL_CALLri : F3_2<2, 0b111000,
+ (outs), (ins MEMri:$ptr, variable_ops),
+ "jmp $ptr",
+ [(tailcall ADDRri:$ptr)]>;
+}
+
//===----------------------------------------------------------------------===//
// V9 Instructions
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/SPARC/2011-01-11-Call.ll b/llvm/test/CodeGen/SPARC/2011-01-11-Call.ll
index 8097e49ad3f7b..aeafd8bba1daf 100644
--- a/llvm/test/CodeGen/SPARC/2011-01-11-Call.ll
+++ b/llvm/test/CodeGen/SPARC/2011-01-11-Call.ll
@@ -20,7 +20,7 @@
; V9: ret
; V9-NEXT: restore
-define void @test() nounwind {
+define void @test() #0 {
entry:
%0 = tail call i32 (...) @foo() nounwind
tail call void (...) @bar() nounwind
@@ -31,13 +31,10 @@ declare i32 @foo(...)
declare void @bar(...)
-
; V8-LABEL: test_tail_call_with_return
-; V8: save %sp
-; V8: call foo
-; V8-NEXT: nop
-; V8: ret
-; V8-NEXT: restore %g0, %o0, %o0
+; V8: mov %o7, %g1
+; V8-NEXT: call foo
+; V8-NEXT: mov %g1, %o7
; V9-LABEL: test_tail_call_with_return
; V9: save %sp
@@ -51,3 +48,5 @@ entry:
%0 = tail call i32 (...) @foo() nounwind
ret i32 %0
}
+
+attributes #0 = { nounwind "disable-tail-calls"="true" }
diff --git a/llvm/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll b/llvm/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll
index ff6da8288fc39..90c35ce86e1fe 100644
--- a/llvm/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll
+++ b/llvm/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll
@@ -3,7 +3,7 @@
target triple = "sparc-unknown-linux-gnu"
-define i32 @test(i32 %a) nounwind {
+define i32 @test(i32 %a) #0 {
entry:
; CHECK: test
; CHECK: call bar
@@ -14,7 +14,7 @@ entry:
ret i32 %0
}
-define i32 @test_jmpl(i32 (i32, i32)* nocapture %f, i32 %a, i32 %b) nounwind {
+define i32 @test_jmpl(i32 (i32, i32)* nocapture %f, i32 %a, i32 %b) #0 {
entry:
; CHECK: test_jmpl
; CHECK: call
@@ -53,7 +53,7 @@ bb5: ; preds = %bb, %entry
ret i32 %a_addr.1.lcssa
}
-define i32 @test_inlineasm(i32 %a) nounwind {
+define i32 @test_inlineasm(i32 %a) #0 {
entry:
;CHECK-LABEL: test_inlineasm:
;CHECK: cmp
@@ -79,7 +79,7 @@ declare i32 @foo(...)
declare i32 @bar(i32)
-define i32 @test_implicit_def() nounwind {
+define i32 @test_implicit_def() #0 {
entry:
;UNOPT-LABEL: test_implicit_def:
;UNOPT: call func
@@ -88,7 +88,7 @@ entry:
ret i32 0
}
-define i32 @prevent_o7_in_call_delay_slot(i32 %i0) {
+define i32 @prevent_o7_in_call_delay_slot(i32 %i0) #0 {
entry:
;CHECK-LABEL: prevent_o7_in_call_delay_slot:
;CHECK: add %i0, 2, %o5
@@ -128,7 +128,7 @@ entry:
ret i32 %1
}
-define i32 @restore_or(i32 %a) {
+define i32 @restore_or(i32 %a) #0 {
entry:
;CHECK-LABEL: restore_or:
;CHECK: ret
@@ -184,3 +184,4 @@ entry:
ret i32 %2
}
+attributes #0 = { nounwind "disable-tail-calls"="true" }
diff --git a/llvm/test/CodeGen/SPARC/tailcall.ll b/llvm/test/CodeGen/SPARC/tailcall.ll
new file mode 100644
index 0000000000000..6c95e3d72920c
--- /dev/null
+++ b/llvm/test/CodeGen/SPARC/tailcall.ll
@@ -0,0 +1,207 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=sparc -verify-machineinstrs | FileCheck %s
+
+define i32 @simple_leaf(i32 %i) #0 {
+; CHECK-LABEL: simple_leaf:
+; CHECK: ! %bb.0: ! %entry
+; CHECK-NEXT: mov %o7, %g1
+; CHECK-NEXT: call foo
+; CHECK-NEXT: mov %g1, %o7
+entry:
+ %call = tail call i32 @foo(i32 %i)
+ ret i32 %call
+}
+
+define i32 @simple_standard(i32 %i) #1 {
+; CHECK-LABEL: simple_standard:
+; CHECK: ! %bb.0: ! %entry
+; CHECK-NEXT: save %sp, -96, %sp
+; CHECK-NEXT: call foo
+; CHECK-NEXT: restore
+entry:
+ %call = tail call i32 @foo(i32 %i)
+ ret i32 %call
+}
+
+define i32 @extra_arg_leaf(i32 %i) #0 {
+; CHECK-LABEL: extra_arg_leaf:
+; CHECK: ! %bb.0: ! %entry
+; CHECK-NEXT: mov 12, %o1
+; CHECK-NEXT: mov %o7, %g1
+; CHECK-NEXT: call foo2
+; CHECK-NEXT: mov %g1, %o7
+entry:
+ %call = tail call i32 @foo2(i32 %i, i32 12)
+ ret i32 %call
+}
+
+define i32 @extra_arg_standard(i32 %i) #1 {
+; CHECK-LABEL: extra_arg_standard:
+; CHECK: ! %bb.0: ! %entry
+; CHECK-NEXT: save %sp, -96, %sp
+; CHECK-NEXT: call foo2
+; CHECK-NEXT: restore %g0, 12, %o1
+entry:
+ %call = tail call i32 @foo2(i32 %i, i32 12)
+ ret i32 %call
+}
+
+; Perform tail call optimization for external symbol.
+
+define void @caller_extern(i8* %src) optsize #0 {
+; CHECK-LABEL: caller_extern:
+; CHECK: ! %bb.0: ! %entry
+; CHECK-NEXT: sethi %hi(dest), %o1
+; CHECK-NEXT: add %o1, %lo(dest), %o1
+; CHECK-NEXT: mov 7, %o2
+; CHECK-NEXT: mov %o0, %o3
+; CHECK-NEXT: mov %o1, %o0
+; CHECK-NEXT: mov %o3, %o1
+; CHECK-NEXT: mov %o7, %g1
+; CHECK-NEXT: call memcpy
+; CHECK-NEXT: mov %g1, %o7
+entry:
+ tail call void @llvm.memcpy.p0i8.p0i8.i32(
+ i8* getelementptr inbounds ([2 x i8],
+ [2 x i8]* @dest, i32 0, i32 0),
+ i8* %src, i32 7, i1 false)
+ ret void
+}
+
+; Perform tail call optimization for function pointer.
+
+define i32 @func_ptr_test(i32 ()* nocapture %func_ptr) #0 {
+; CHECK-LABEL: func_ptr_test:
+; CHECK: ! %bb.0: ! %entry
+; CHECK-NEXT: jmp %o0
+; CHECK-NEXT: nop
+entry:
+ %call = tail call i32 %func_ptr() #1
+ ret i32 %call
+}
+
+define i32 @func_ptr_test2(i32 (i32, i32, i32)* nocapture %func_ptr,
+; CHECK-LABEL: func_ptr_test2:
+; CHECK: ! %bb.0: ! %entry
+; CHECK-NEXT: save %sp, -96, %sp
+; CHECK-NEXT: mov 10, %i3
+; CHECK-NEXT: mov %i0, %i4
+; CHECK-NEXT: mov %i1, %i0
+; CHECK-NEXT: jmp %i4
+; CHECK-NEXT: restore %g0, %i3, %o1
+ i32 %r, i32 %q) #1 {
+entry:
+ %call = tail call i32 %func_ptr(i32 %r, i32 10, i32 %q) #1
+ ret i32 %call
+}
+
+
+; Do not tail call optimize if stack is used to pass parameters.
+
+define i32 @caller_args() #0 {
+; CHECK-LABEL: caller_args:
+; CHECK: ! %bb.0: ! %entry
+; CHECK-NEXT: save %sp, -104, %sp
+; CHECK-NEXT: mov 6, %i0
+; CHECK-NEXT: mov %g0, %o0
+; CHECK-NEXT: mov 1, %o1
+; CHECK-NEXT: mov 2, %o2
+; CHECK-NEXT: mov 3, %o3
+; CHECK-NEXT: mov 4, %o4
+; CHECK-NEXT: mov 5, %o5
+; CHECK-NEXT: call foo7
+; CHECK-NEXT: st %i0, [%sp+92]
+; CHECK-NEXT: ret
+; CHECK-NEXT: restore %g0, %o0, %o0
+entry:
+ %r = tail call i32 @foo7(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6)
+ ret i32 %r
+}
+
+; Byval parameters hand the function a pointer directly into the stack area
+; we want to reuse during a tail call. Do not tail call optimize functions with
+; byval parameters.
+
+define i32 @caller_byval() #0 {
+; CHECK-LABEL: caller_byval:
+; CHECK: ! %bb.0: ! %entry
+; CHECK-NEXT: save %sp, -104, %sp
+; CHECK-NEXT: ld [%fp+-4], %i0
+; CHECK-NEXT: st %i0, [%fp+-8]
+; CHECK-NEXT: call callee_byval
+; CHECK-NEXT: add %fp, -8, %o0
+; CHECK-NEXT: ret
+; CHECK-NEXT: restore %g0, %o0, %o0
+entry:
+ %a = alloca i32*
+ %r = tail call i32 @callee_byval(i32** byval(i32*) %a)
+ ret i32 %r
+}
+
+; Perform tail call optimization for sret function.
+
+define void @sret_test(%struct.a* noalias sret(%struct.a) %agg.result) #0 {
+; CHECK-LABEL: sret_test:
+; CHECK: ! %bb.0: ! %entry
+; CHECK-NEXT: mov %o7, %g1
+; CHECK-NEXT: call sret_func
+; CHECK-NEXT: mov %g1, %o7
+entry:
+ tail call void bitcast (void (%struct.a*)* @sret_func to
+ void (%struct.a*)*)(%struct.a* sret(%struct.a) %agg.result)
+ ret void
+}
+
+; Do not tail call if either caller or callee returns
+; a struct and the other does not. Returning a large
+; struct will generate a memcpy as the tail function.
+
+define void @ret_large_struct(%struct.big* noalias sret(%struct.big) %agg.result) #0 {
+; CHECK-LABEL: ret_large_struct:
+; CHECK: ! %bb.0: ! %entry
+; CHECK-NEXT: save %sp, -96, %sp
+; CHECK-NEXT: ld [%fp+64], %i0
+; CHECK-NEXT: sethi %hi(bigstruct), %i1
+; CHECK-NEXT: add %i1, %lo(bigstruct), %o1
+; CHECK-NEXT: mov 400, %o2
+; CHECK-NEXT: call memcpy
+; CHECK-NEXT: mov %i0, %o0
+; CHECK-NEXT: jmp %i7+12
+; CHECK-NEXT: restore
+entry:
+ %0 = bitcast %struct.big* %agg.result to i8*
+ tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %0, i8* align 4 bitcast (%struct.big* @bigstruct to i8*), i32 400, i1 false)
+ ret void
+}
+
+; Test register + immediate pattern.
+
+define void @addri_test(i32 %ptr) #0 {
+; CHECK-LABEL: addri_test:
+; CHECK: ! %bb.0: ! %entry
+; CHECK-NEXT: jmp %o0+4
+; CHECK-NEXT: nop
+entry:
+ %add = add nsw i32 %ptr, 4
+ %0 = inttoptr i32 %add to void ()*
+ tail call void %0() #1
+ ret void
+}
+
+%struct.a = type { i32, i32 }
+ at dest = global [2 x i8] zeroinitializer
+
+%struct.big = type { [100 x i32] }
+ at bigstruct = global %struct.big zeroinitializer
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1)
+declare void @sret_func(%struct.a* sret(%struct.a))
+declare i32 @callee_byval(i32** byval(i32*) %a)
+declare i32 @foo(i32)
+declare i32 @foo2(i32, i32)
+declare i32 @foo7(i32, i32, i32, i32, i32, i32, i32)
+
+attributes #0 = { nounwind "disable-tail-calls"="false"
+ "frame-pointer"="none" }
+attributes #1 = { nounwind "disable-tail-calls"="false"
+ "frame-pointer"="all" }
More information about the llvm-commits
mailing list