[llvm] 1c235c3 - [Sparc] Add tail call support

Daniel Cederman via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 8 04:52:13 PST 2022


Author: Daniel Cederman
Date: 2022-03-08T13:50:54+01:00
New Revision: 1c235c375492180c2eecb6331f169486019fd2d2

URL: https://github.com/llvm/llvm-project/commit/1c235c375492180c2eecb6331f169486019fd2d2
DIFF: https://github.com/llvm/llvm-project/commit/1c235c375492180c2eecb6331f169486019fd2d2.diff

LOG: [Sparc] Add tail call support

This patch adds tail call support to the 32-bit Sparc backend.

Two new instructions are defined, TAIL_CALL and TAIL_CALLri. They are
encoded the same as CALL and BINDri, but are marked with isReturn so
that the epilogue gets emitted. In contrast to CALL, TAIL_CALL is not
marked with isCall. This makes it possible to use the leaf function
optimization when the only call a function makes is a tail call.

TAIL_CALL modifies the return address in %o7, so for leaf functions
the value in %o7 needs to be restored after the call. For normal
functions which uses the restore instruction this is not necessary.

Reviewed By: koakuma

Differential Revision: https://reviews.llvm.org/D51206

Added: 
    llvm/test/CodeGen/SPARC/tailcall.ll

Modified: 
    llvm/lib/Target/Sparc/DelaySlotFiller.cpp
    llvm/lib/Target/Sparc/SparcCallingConv.td
    llvm/lib/Target/Sparc/SparcFrameLowering.cpp
    llvm/lib/Target/Sparc/SparcISelLowering.cpp
    llvm/lib/Target/Sparc/SparcISelLowering.h
    llvm/lib/Target/Sparc/SparcInstrInfo.td
    llvm/test/CodeGen/SPARC/2011-01-11-Call.ll
    llvm/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/Sparc/DelaySlotFiller.cpp b/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
index 259b379541839..cc132d46de856 100644
--- a/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
+++ b/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -174,17 +174,20 @@ Filler::findDelayInstr(MachineBasicBlock &MBB,
   if (slot == MBB.begin())
     return MBB.end();
 
-  if (slot->getOpcode() == SP::RET || slot->getOpcode() == SP::TLS_CALL)
+  unsigned Opc = slot->getOpcode();
+
+  if (Opc == SP::RET || Opc == SP::TLS_CALL)
     return MBB.end();
 
-  if (slot->getOpcode() == SP::RETL) {
+  if (Opc == SP::RETL || Opc == SP::TAIL_CALL || Opc == SP::TAIL_CALLri) {
     MachineBasicBlock::iterator J = slot;
     --J;
 
     if (J->getOpcode() == SP::RESTORErr
         || J->getOpcode() == SP::RESTOREri) {
       // change retl to ret.
-      slot->setDesc(Subtarget->getInstrInfo()->get(SP::RET));
+      if (Opc == SP::RETL)
+        slot->setDesc(Subtarget->getInstrInfo()->get(SP::RET));
       return J;
     }
   }
@@ -360,6 +363,8 @@ bool Filler::needsUnimp(MachineBasicBlock::iterator I, unsigned &StructSize)
   case SP::CALLrr:
   case SP::CALLri: structSizeOpNum = 2; break;
   case SP::TLS_CALL: return false;
+  case SP::TAIL_CALLri:
+  case SP::TAIL_CALL: return false;
   }
 
   const MachineOperand &MO = I->getOperand(structSizeOpNum);

diff  --git a/llvm/lib/Target/Sparc/SparcCallingConv.td b/llvm/lib/Target/Sparc/SparcCallingConv.td
index db540d6f0c425..e6d23f741ea5f 100644
--- a/llvm/lib/Target/Sparc/SparcCallingConv.td
+++ b/llvm/lib/Target/Sparc/SparcCallingConv.td
@@ -134,7 +134,7 @@ def RetCC_Sparc64 : CallingConv<[
 // Callee-saved registers are handled by the register window mechanism.
 def CSR : CalleeSavedRegs<(add)> {
   let OtherPreserved = (add (sequence "I%u", 0, 7),
-                            (sequence "L%u", 0, 7));
+                            (sequence "L%u", 0, 7), O6);
 }
 
 // Callee-saved registers for calls with ReturnsTwice attribute.

diff  --git a/llvm/lib/Target/Sparc/SparcFrameLowering.cpp b/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
index a740de9123c97..3e08b03b984e7 100644
--- a/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
+++ b/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -218,8 +218,9 @@ void SparcFrameLowering::emitEpilogue(MachineFunction &MF,
   const SparcInstrInfo &TII =
       *static_cast<const SparcInstrInfo *>(MF.getSubtarget().getInstrInfo());
   DebugLoc dl = MBBI->getDebugLoc();
-  assert(MBBI->getOpcode() == SP::RETL &&
-         "Can only put epilog before 'retl' instruction!");
+  assert((MBBI->getOpcode() == SP::RETL || MBBI->getOpcode() == SP::TAIL_CALL ||
+          MBBI->getOpcode() == SP::TAIL_CALLri) &&
+         "Can only put epilog before 'retl' or 'tail_call' instruction!");
   if (!FuncInfo->isLeafProc()) {
     BuildMI(MBB, MBBI, dl, TII.get(SP::RESTORErr), SP::G0).addReg(SP::G0)
       .addReg(SP::G0);
@@ -228,10 +229,19 @@ void SparcFrameLowering::emitEpilogue(MachineFunction &MF,
   MachineFrameInfo &MFI = MF.getFrameInfo();
 
   int NumBytes = (int) MFI.getStackSize();
-  if (NumBytes == 0)
-    return;
-
-  emitSPAdjustment(MF, MBB, MBBI, NumBytes, SP::ADDrr, SP::ADDri);
+  if (NumBytes != 0)
+    emitSPAdjustment(MF, MBB, MBBI, NumBytes, SP::ADDrr, SP::ADDri);
+
+  // Preserve return address in %o7
+  if (MBBI->getOpcode() == SP::TAIL_CALL) {
+    MBB.addLiveIn(SP::O7);
+    BuildMI(MBB, MBBI, dl, TII.get(SP::ORrr), SP::G1)
+        .addReg(SP::G0)
+        .addReg(SP::O7);
+    BuildMI(MBB, MBBI, dl, TII.get(SP::ORrr), SP::O7)
+        .addReg(SP::G0)
+        .addReg(SP::G1);
+  }
 }
 
 bool SparcFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {

diff  --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
index 501bb4d4910f6..ef0c72dacd1cf 100644
--- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -710,6 +710,36 @@ static bool hasReturnsTwiceAttr(SelectionDAG &DAG, SDValue Callee,
   return CalleeFn->hasFnAttribute(Attribute::ReturnsTwice);
 }
 
+/// IsEligibleForTailCallOptimization - Check whether the call is eligible
+/// for tail call optimization.
+bool SparcTargetLowering::IsEligibleForTailCallOptimization(
+    CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF) const {
+
+  auto &Outs = CLI.Outs;
+  auto &Caller = MF.getFunction();
+
+  // Do not tail call opt functions with "disable-tail-calls" attribute.
+  if (Caller.getFnAttribute("disable-tail-calls").getValueAsString() == "true")
+    return false;
+
+  // Do not tail call opt if the stack is used to pass parameters.
+  if (CCInfo.getNextStackOffset() != 0)
+    return false;
+
+  // Do not tail call opt if either the callee or caller returns
+  // a struct and the other does not.
+  if (!Outs.empty() && Caller.hasStructRetAttr() != Outs[0].Flags.isSRet())
+    return false;
+
+  // Byval parameters hand the function a pointer directly into the stack area
+  // we want to reuse during a tail call.
+  for (auto &Arg : Outs)
+    if (Arg.Flags.isByVal())
+      return false;
+
+  return true;
+}
+
 // Lower a call for the 32-bit ABI.
 SDValue
 SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
@@ -725,15 +755,15 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
   CallingConv::ID CallConv              = CLI.CallConv;
   bool isVarArg                         = CLI.IsVarArg;
 
-  // Sparc target does not yet support tail call optimization.
-  isTailCall = false;
-
   // Analyze operands of the call, assigning locations to each operand.
   SmallVector<CCValAssign, 16> ArgLocs;
   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
                  *DAG.getContext());
   CCInfo.AnalyzeCallOperands(Outs, CC_Sparc32);
 
+  isTailCall = isTailCall && IsEligibleForTailCallOptimization(
+                                 CCInfo, CLI, DAG.getMachineFunction());
+
   // Get the size of the outgoing arguments stack space requirement.
   unsigned ArgsSize = CCInfo.getNextStackOffset();
 
@@ -771,7 +801,10 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
     }
   }
 
-  Chain = DAG.getCALLSEQ_START(Chain, ArgsSize, 0, dl);
+  assert(!isTailCall || ArgsSize == 0);
+
+  if (!isTailCall)
+    Chain = DAG.getCALLSEQ_START(Chain, ArgsSize, 0, dl);
 
   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
   SmallVector<SDValue, 8> MemOpChains;
@@ -816,6 +849,10 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
 
     if (Flags.isSRet()) {
       assert(VA.needsCustom());
+
+      if (isTailCall)
+        continue;
+
       // store SRet argument in %sp+64
       SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
       SDValue PtrOff = DAG.getIntPtrConstant(64, dl);
@@ -928,7 +965,9 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
   // stuck together.
   SDValue InFlag;
   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
-    Register Reg = toCallerWindow(RegsToPass[i].first);
+    Register Reg = RegsToPass[i].first;
+    if (!isTailCall)
+      Reg = toCallerWindow(Reg);
     Chain = DAG.getCopyToReg(Chain, dl, Reg, RegsToPass[i].second, InFlag);
     InFlag = Chain.getValue(1);
   }
@@ -952,9 +991,12 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
   Ops.push_back(Callee);
   if (hasStructRetAttr)
     Ops.push_back(DAG.getTargetConstant(SRetArgSize, dl, MVT::i32));
-  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
-    Ops.push_back(DAG.getRegister(toCallerWindow(RegsToPass[i].first),
-                                  RegsToPass[i].second.getValueType()));
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    Register Reg = RegsToPass[i].first;
+    if (!isTailCall)
+      Reg = toCallerWindow(Reg);
+    Ops.push_back(DAG.getRegister(Reg, RegsToPass[i].second.getValueType()));
+  }
 
   // Add a register mask operand representing the call-preserved registers.
   const SparcRegisterInfo *TRI = Subtarget->getRegisterInfo();
@@ -968,6 +1010,11 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
   if (InFlag.getNode())
     Ops.push_back(InFlag);
 
+  if (isTailCall) {
+    DAG.getMachineFunction().getFrameInfo().setHasTailCall();
+    return DAG.getNode(SPISD::TAIL_CALL, dl, MVT::Other, Ops);
+  }
+
   Chain = DAG.getNode(SPISD::CALL, dl, NodeTys, Ops);
   InFlag = Chain.getValue(1);
 
@@ -1852,6 +1899,7 @@ const char *SparcTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case SPISD::TLS_ADD:         return "SPISD::TLS_ADD";
   case SPISD::TLS_LD:          return "SPISD::TLS_LD";
   case SPISD::TLS_CALL:        return "SPISD::TLS_CALL";
+  case SPISD::TAIL_CALL:       return "SPISD::TAIL_CALL";
   }
   return nullptr;
 }

diff  --git a/llvm/lib/Target/Sparc/SparcISelLowering.h b/llvm/lib/Target/Sparc/SparcISelLowering.h
index 5c9703823a644..94a5141d95b22 100644
--- a/llvm/lib/Target/Sparc/SparcISelLowering.h
+++ b/llvm/lib/Target/Sparc/SparcISelLowering.h
@@ -44,6 +44,8 @@ namespace llvm {
       GLOBAL_BASE_REG, // Global base reg for PIC.
       FLUSHW,      // FLUSH register windows to stack.
 
+      TAIL_CALL,   // Tail call
+
       TLS_ADD,     // For Thread Local Storage (TLS).
       TLS_LD,
       TLS_CALL
@@ -182,6 +184,10 @@ namespace llvm {
 
     SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
 
+    bool IsEligibleForTailCallOptimization(CCState &CCInfo,
+                                           CallLoweringInfo &CLI,
+                                           MachineFunction &MF) const;
+
     bool ShouldShrinkFPConstant(EVT VT) const override {
       // Do not shrink FP constpool if VT == MVT::f128.
       // (ldd, call _Q_fdtoq) is more expensive than two ldds.

diff  --git a/llvm/lib/Target/Sparc/SparcInstrInfo.td b/llvm/lib/Target/Sparc/SparcInstrInfo.td
index 5e305fc9df71a..28a85c7118acc 100644
--- a/llvm/lib/Target/Sparc/SparcInstrInfo.td
+++ b/llvm/lib/Target/Sparc/SparcInstrInfo.td
@@ -248,6 +248,10 @@ def call          : SDNode<"SPISD::CALL", SDT_SPCall,
                            [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
                             SDNPVariadic]>;
 
+def tailcall      : SDNode<"SPISD::TAIL_CALL", SDT_SPCall,
+                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                            SDNPVariadic]>;
+
 def SDT_SPRet     : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
 def retflag       : SDNode<"SPISD::RET_FLAG", SDT_SPRet,
                            [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
@@ -1368,6 +1372,31 @@ let Uses = [O6], isCall = 1, hasDelaySlot = 1 in
 }
 }
 
+//===----------------------------------------------------------------------===//
+// Instructions for tail calls.
+//===----------------------------------------------------------------------===//
+let isCodeGenOnly = 1, isReturn = 1,  hasDelaySlot = 1,
+    isTerminator = 1, isBarrier = 1 in {
+  def TAIL_CALL : InstSP<(outs), (ins calltarget:$disp, variable_ops),
+                         "call $disp",
+                         [(tailcall tglobaladdr:$disp)]> {
+  bits<30> disp;
+  let op = 1;
+  let Inst{29-0} = disp;
+  }
+}
+
+def : Pat<(tailcall (iPTR texternalsym:$dst)),
+          (TAIL_CALL texternalsym:$dst)>;
+
+let isCodeGenOnly = 1, isReturn = 1,  hasDelaySlot = 1,  isTerminator = 1,
+    isBarrier = 1, rd = 0 in {
+  def TAIL_CALLri : F3_2<2, 0b111000,
+                         (outs), (ins MEMri:$ptr, variable_ops),
+                         "jmp $ptr",
+                         [(tailcall ADDRri:$ptr)]>;
+}
+
 //===----------------------------------------------------------------------===//
 // V9 Instructions
 //===----------------------------------------------------------------------===//

diff  --git a/llvm/test/CodeGen/SPARC/2011-01-11-Call.ll b/llvm/test/CodeGen/SPARC/2011-01-11-Call.ll
index 8097e49ad3f7b..aeafd8bba1daf 100644
--- a/llvm/test/CodeGen/SPARC/2011-01-11-Call.ll
+++ b/llvm/test/CodeGen/SPARC/2011-01-11-Call.ll
@@ -20,7 +20,7 @@
 ; V9:       ret
 ; V9-NEXT:  restore
 
-define void @test() nounwind {
+define void @test() #0 {
 entry:
  %0 = tail call i32 (...) @foo() nounwind
  tail call void (...) @bar() nounwind
@@ -31,13 +31,10 @@ declare i32 @foo(...)
 
 declare void @bar(...)
 
-
 ; V8-LABEL: test_tail_call_with_return
-; V8:       save %sp
-; V8:       call foo
-; V8-NEXT:  nop
-; V8:       ret
-; V8-NEXT:  restore %g0, %o0, %o0
+; V8:       mov %o7, %g1
+; V8-NEXT:  call foo
+; V8-NEXT:  mov %g1, %o7
 
 ; V9-LABEL: test_tail_call_with_return
 ; V9:       save %sp
@@ -51,3 +48,5 @@ entry:
  %0 = tail call i32 (...) @foo() nounwind
  ret i32 %0
 }
+
+attributes #0 = { nounwind "disable-tail-calls"="true" }

diff  --git a/llvm/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll b/llvm/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll
index ff6da8288fc39..90c35ce86e1fe 100644
--- a/llvm/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll
+++ b/llvm/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll
@@ -3,7 +3,7 @@
 
 target triple = "sparc-unknown-linux-gnu"
 
-define i32 @test(i32 %a) nounwind {
+define i32 @test(i32 %a) #0 {
 entry:
 ; CHECK: test
 ; CHECK: call bar
@@ -14,7 +14,7 @@ entry:
   ret i32 %0
 }
 
-define i32 @test_jmpl(i32 (i32, i32)* nocapture %f, i32 %a, i32 %b) nounwind {
+define i32 @test_jmpl(i32 (i32, i32)* nocapture %f, i32 %a, i32 %b) #0 {
 entry:
 ; CHECK:      test_jmpl
 ; CHECK:      call
@@ -53,7 +53,7 @@ bb5:                                              ; preds = %bb, %entry
   ret i32 %a_addr.1.lcssa
 }
 
-define i32 @test_inlineasm(i32 %a) nounwind {
+define i32 @test_inlineasm(i32 %a) #0 {
 entry:
 ;CHECK-LABEL:      test_inlineasm:
 ;CHECK: cmp
@@ -79,7 +79,7 @@ declare i32 @foo(...)
 declare i32 @bar(i32)
 
 
-define i32 @test_implicit_def() nounwind {
+define i32 @test_implicit_def() #0 {
 entry:
 ;UNOPT-LABEL:       test_implicit_def:
 ;UNOPT:       call func
@@ -88,7 +88,7 @@ entry:
   ret i32 0
 }
 
-define i32 @prevent_o7_in_call_delay_slot(i32 %i0) {
+define i32 @prevent_o7_in_call_delay_slot(i32 %i0) #0 {
 entry:
 ;CHECK-LABEL:       prevent_o7_in_call_delay_slot:
 ;CHECK:       add %i0, 2, %o5
@@ -128,7 +128,7 @@ entry:
   ret i32 %1
 }
 
-define i32 @restore_or(i32 %a) {
+define i32 @restore_or(i32 %a) #0 {
 entry:
 ;CHECK-LABEL:  restore_or:
 ;CHECK:  ret
@@ -184,3 +184,4 @@ entry:
   ret i32 %2
 }
 
+attributes #0 = { nounwind "disable-tail-calls"="true" }

diff  --git a/llvm/test/CodeGen/SPARC/tailcall.ll b/llvm/test/CodeGen/SPARC/tailcall.ll
new file mode 100644
index 0000000000000..6c95e3d72920c
--- /dev/null
+++ b/llvm/test/CodeGen/SPARC/tailcall.ll
@@ -0,0 +1,207 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=sparc -verify-machineinstrs | FileCheck %s
+
+define i32 @simple_leaf(i32 %i) #0 {
+; CHECK-LABEL: simple_leaf:
+; CHECK:       ! %bb.0: ! %entry
+; CHECK-NEXT:    mov %o7, %g1
+; CHECK-NEXT:    call foo
+; CHECK-NEXT:    mov %g1, %o7
+entry:
+  %call = tail call i32 @foo(i32 %i)
+  ret i32 %call
+}
+
+define i32 @simple_standard(i32 %i) #1 {
+; CHECK-LABEL: simple_standard:
+; CHECK:       ! %bb.0: ! %entry
+; CHECK-NEXT:    save %sp, -96, %sp
+; CHECK-NEXT:    call foo
+; CHECK-NEXT:    restore
+entry:
+  %call = tail call i32 @foo(i32 %i)
+  ret i32 %call
+}
+
+define i32 @extra_arg_leaf(i32 %i) #0 {
+; CHECK-LABEL: extra_arg_leaf:
+; CHECK:       ! %bb.0: ! %entry
+; CHECK-NEXT:    mov 12, %o1
+; CHECK-NEXT:    mov %o7, %g1
+; CHECK-NEXT:    call foo2
+; CHECK-NEXT:    mov %g1, %o7
+entry:
+  %call = tail call i32 @foo2(i32 %i, i32 12)
+  ret i32 %call
+}
+
+define i32 @extra_arg_standard(i32 %i) #1 {
+; CHECK-LABEL: extra_arg_standard:
+; CHECK:       ! %bb.0: ! %entry
+; CHECK-NEXT:    save %sp, -96, %sp
+; CHECK-NEXT:    call foo2
+; CHECK-NEXT:    restore %g0, 12, %o1
+entry:
+  %call = tail call i32 @foo2(i32 %i, i32 12)
+  ret i32 %call
+}
+
+; Perform tail call optimization for external symbol.
+
+define void @caller_extern(i8* %src) optsize #0 {
+; CHECK-LABEL: caller_extern:
+; CHECK:       ! %bb.0: ! %entry
+; CHECK-NEXT:    sethi %hi(dest), %o1
+; CHECK-NEXT:    add %o1, %lo(dest), %o1
+; CHECK-NEXT:    mov 7, %o2
+; CHECK-NEXT:    mov %o0, %o3
+; CHECK-NEXT:    mov %o1, %o0
+; CHECK-NEXT:    mov %o3, %o1
+; CHECK-NEXT:    mov %o7, %g1
+; CHECK-NEXT:    call memcpy
+; CHECK-NEXT:    mov %g1, %o7
+entry:
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(
+    i8* getelementptr inbounds ([2 x i8],
+    [2 x i8]* @dest, i32 0, i32 0),
+    i8* %src, i32 7, i1 false)
+  ret void
+}
+
+; Perform tail call optimization for function pointer.
+
+define i32 @func_ptr_test(i32 ()* nocapture %func_ptr) #0 {
+; CHECK-LABEL: func_ptr_test:
+; CHECK:       ! %bb.0: ! %entry
+; CHECK-NEXT:    jmp %o0
+; CHECK-NEXT:    nop
+entry:
+  %call = tail call i32 %func_ptr() #1
+  ret i32 %call
+}
+
+define i32 @func_ptr_test2(i32 (i32, i32, i32)* nocapture %func_ptr,
+; CHECK-LABEL: func_ptr_test2:
+; CHECK:       ! %bb.0: ! %entry
+; CHECK-NEXT:    save %sp, -96, %sp
+; CHECK-NEXT:    mov 10, %i3
+; CHECK-NEXT:    mov %i0, %i4
+; CHECK-NEXT:    mov %i1, %i0
+; CHECK-NEXT:    jmp %i4
+; CHECK-NEXT:    restore %g0, %i3, %o1
+                           i32 %r, i32 %q) #1 {
+entry:
+  %call = tail call i32 %func_ptr(i32 %r, i32 10, i32 %q) #1
+  ret i32 %call
+}
+
+
+; Do not tail call optimize if stack is used to pass parameters.
+
+define i32 @caller_args() #0 {
+; CHECK-LABEL: caller_args:
+; CHECK:       ! %bb.0: ! %entry
+; CHECK-NEXT:    save %sp, -104, %sp
+; CHECK-NEXT:    mov 6, %i0
+; CHECK-NEXT:    mov %g0, %o0
+; CHECK-NEXT:    mov 1, %o1
+; CHECK-NEXT:    mov 2, %o2
+; CHECK-NEXT:    mov 3, %o3
+; CHECK-NEXT:    mov 4, %o4
+; CHECK-NEXT:    mov 5, %o5
+; CHECK-NEXT:    call foo7
+; CHECK-NEXT:    st %i0, [%sp+92]
+; CHECK-NEXT:    ret
+; CHECK-NEXT:    restore %g0, %o0, %o0
+entry:
+  %r = tail call i32 @foo7(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6)
+  ret i32 %r
+}
+
+; Byval parameters hand the function a pointer directly into the stack area
+; we want to reuse during a tail call. Do not tail call optimize functions with
+; byval parameters.
+
+define i32 @caller_byval() #0 {
+; CHECK-LABEL: caller_byval:
+; CHECK:       ! %bb.0: ! %entry
+; CHECK-NEXT:    save %sp, -104, %sp
+; CHECK-NEXT:    ld [%fp+-4], %i0
+; CHECK-NEXT:    st %i0, [%fp+-8]
+; CHECK-NEXT:    call callee_byval
+; CHECK-NEXT:    add %fp, -8, %o0
+; CHECK-NEXT:    ret
+; CHECK-NEXT:    restore %g0, %o0, %o0
+entry:
+  %a = alloca i32*
+  %r = tail call i32 @callee_byval(i32** byval(i32*) %a)
+  ret i32 %r
+}
+
+; Perform tail call optimization for sret function.
+
+define void @sret_test(%struct.a* noalias sret(%struct.a) %agg.result) #0 {
+; CHECK-LABEL: sret_test:
+; CHECK:       ! %bb.0: ! %entry
+; CHECK-NEXT:    mov %o7, %g1
+; CHECK-NEXT:    call sret_func
+; CHECK-NEXT:    mov %g1, %o7
+entry:
+  tail call void bitcast (void (%struct.a*)* @sret_func to
+                          void (%struct.a*)*)(%struct.a* sret(%struct.a) %agg.result)
+  ret void
+}
+
+; Do not tail call if either caller or callee returns
+; a struct and the other does not. Returning a large
+; struct will generate a memcpy as the tail function.
+
+define void @ret_large_struct(%struct.big* noalias sret(%struct.big) %agg.result) #0 {
+; CHECK-LABEL: ret_large_struct:
+; CHECK:       ! %bb.0: ! %entry
+; CHECK-NEXT:    save %sp, -96, %sp
+; CHECK-NEXT:    ld [%fp+64], %i0
+; CHECK-NEXT:    sethi %hi(bigstruct), %i1
+; CHECK-NEXT:    add %i1, %lo(bigstruct), %o1
+; CHECK-NEXT:    mov 400, %o2
+; CHECK-NEXT:    call memcpy
+; CHECK-NEXT:    mov %i0, %o0
+; CHECK-NEXT:    jmp %i7+12
+; CHECK-NEXT:    restore
+entry:
+  %0 = bitcast %struct.big* %agg.result to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %0, i8* align 4 bitcast (%struct.big* @bigstruct to i8*), i32 400, i1 false)
+  ret void
+}
+
+; Test register + immediate pattern.
+
+define void @addri_test(i32 %ptr) #0 {
+; CHECK-LABEL: addri_test:
+; CHECK:       ! %bb.0: ! %entry
+; CHECK-NEXT:    jmp %o0+4
+; CHECK-NEXT:    nop
+entry:
+  %add = add nsw i32 %ptr, 4
+  %0 = inttoptr i32 %add to void ()*
+  tail call void %0() #1
+  ret void
+}
+
+%struct.a = type { i32, i32 }
+ at dest = global [2 x i8] zeroinitializer
+
+%struct.big = type { [100 x i32] }
+ at bigstruct = global %struct.big zeroinitializer
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1)
+declare void @sret_func(%struct.a* sret(%struct.a))
+declare i32 @callee_byval(i32** byval(i32*) %a)
+declare i32 @foo(i32)
+declare i32 @foo2(i32, i32)
+declare i32 @foo7(i32, i32, i32, i32, i32, i32, i32)
+
+attributes #0 = { nounwind "disable-tail-calls"="false"
+                  "frame-pointer"="none" }
+attributes #1 = { nounwind "disable-tail-calls"="false"
+                  "frame-pointer"="all" }


        


More information about the llvm-commits mailing list