[llvm] r242714 - [ARM] Refactor the prologue/epilogue emission to be more robust.

Quentin Colombet qcolombet at apple.com
Mon Jul 20 14:42:15 PDT 2015


Author: qcolombet
Date: Mon Jul 20 16:42:14 2015
New Revision: 242714

URL: http://llvm.org/viewvc/llvm-project?rev=242714&view=rev
Log:
[ARM] Refactor the prologue/epilogue emission to be more robust.

This is the first step toward supporting shrink-wrapping for this target.

The changes could be summarized by these items:
- Expand the tail-call return as part of the expand pseudo pass.
- Get rid of the assumptions that the epilogue is the exit block:
  * Do not assume which registers are free in the epilogue. (This indirectly
    improve the lowering of the code for the segmented stacks, see the test
    cases.)
  * Take into account that the basic block can be empty.

Related to <rdar://problem/20821730>

Modified:
    llvm/trunk/lib/Target/ARM/ARMExpandPseudoInsts.cpp
    llvm/trunk/lib/Target/ARM/ARMFrameLowering.cpp
    llvm/trunk/lib/Target/ARM/ARMFrameLowering.h
    llvm/trunk/lib/Target/ARM/Thumb1FrameLowering.cpp
    llvm/trunk/test/CodeGen/ARM/fold-stack-adjust.ll
    llvm/trunk/test/CodeGen/ARM/thumb1_return_sequence.ll
    llvm/trunk/test/CodeGen/Thumb/pop.ll
    llvm/trunk/test/CodeGen/Thumb/vargs.ll

Modified: llvm/trunk/lib/Target/ARM/ARMExpandPseudoInsts.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMExpandPseudoInsts.cpp?rev=242714&r1=242713&r2=242714&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMExpandPseudoInsts.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMExpandPseudoInsts.cpp Mon Jul 20 16:42:14 2015
@@ -747,6 +747,55 @@ bool ARMExpandPseudo::ExpandMI(MachineBa
   switch (Opcode) {
     default:
       return false;
+
+    case ARM::TCRETURNdi:
+    case ARM::TCRETURNri: {
+      MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+      assert(MBBI->isReturn() &&
+             "Can only insert epilog into returning blocks");
+      unsigned RetOpcode = MBBI->getOpcode();
+      DebugLoc dl = MBBI->getDebugLoc();
+      const ARMBaseInstrInfo &TII = *static_cast<const ARMBaseInstrInfo *>(
+          MBB.getParent()->getSubtarget().getInstrInfo());
+
+      // Tail call return: adjust the stack pointer and jump to callee.
+      MBBI = MBB.getLastNonDebugInstr();
+      MachineOperand &JumpTarget = MBBI->getOperand(0);
+
+      // Jump to label or value in register.
+      if (RetOpcode == ARM::TCRETURNdi) {
+        unsigned TCOpcode =
+            STI->isThumb()
+                ? (STI->isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND)
+                : ARM::TAILJMPd;
+        MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
+        if (JumpTarget.isGlobal())
+          MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
+                               JumpTarget.getTargetFlags());
+        else {
+          assert(JumpTarget.isSymbol());
+          MIB.addExternalSymbol(JumpTarget.getSymbolName(),
+                                JumpTarget.getTargetFlags());
+        }
+
+        // Add the default predicate in Thumb mode.
+        if (STI->isThumb())
+          MIB.addImm(ARMCC::AL).addReg(0);
+      } else if (RetOpcode == ARM::TCRETURNri) {
+        BuildMI(MBB, MBBI, dl,
+                TII.get(STI->isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr))
+            .addReg(JumpTarget.getReg(), RegState::Kill);
+      }
+
+      MachineInstr *NewMI = std::prev(MBBI);
+      for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i)
+        NewMI->addOperand(MBBI->getOperand(i));
+
+      // Delete the pseudo instruction TCRETURN.
+      MBB.erase(MBBI);
+      MBBI = NewMI;
+      return true;
+    }
     case ARM::VMOVScc:
     case ARM::VMOVDcc: {
       unsigned newOpc = Opcode == ARM::VMOVScc ? ARM::VMOVS : ARM::VMOVD;

Modified: llvm/trunk/lib/Target/ARM/ARMFrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMFrameLowering.cpp?rev=242714&r1=242713&r2=242714&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMFrameLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMFrameLowering.cpp Mon Jul 20 16:42:14 2015
@@ -689,60 +689,8 @@ void ARMFrameLowering::emitPrologue(Mach
     AFI->setShouldRestoreSPFromFP(true);
 }
 
-// Resolve TCReturn pseudo-instruction
-void ARMFrameLowering::fixTCReturn(MachineFunction &MF,
-                                   MachineBasicBlock &MBB) const {
-  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
-  assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
-  unsigned RetOpcode = MBBI->getOpcode();
-  DebugLoc dl = MBBI->getDebugLoc();
-  const ARMBaseInstrInfo &TII =
-      *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
-
-  if (!(RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri))
-    return;
-
-  // Tail call return: adjust the stack pointer and jump to callee.
-  MBBI = MBB.getLastNonDebugInstr();
-  MachineOperand &JumpTarget = MBBI->getOperand(0);
-
-  // Jump to label or value in register.
-  if (RetOpcode == ARM::TCRETURNdi) {
-    unsigned TCOpcode = STI.isThumb() ?
-             (STI.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND) :
-             ARM::TAILJMPd;
-    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
-    if (JumpTarget.isGlobal())
-      MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
-                           JumpTarget.getTargetFlags());
-    else {
-      assert(JumpTarget.isSymbol());
-      MIB.addExternalSymbol(JumpTarget.getSymbolName(),
-                            JumpTarget.getTargetFlags());
-    }
-
-    // Add the default predicate in Thumb mode.
-    if (STI.isThumb()) MIB.addImm(ARMCC::AL).addReg(0);
-  } else if (RetOpcode == ARM::TCRETURNri) {
-    BuildMI(MBB, MBBI, dl,
-            TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)).
-      addReg(JumpTarget.getReg(), RegState::Kill);
-  }
-
-  MachineInstr *NewMI = std::prev(MBBI);
-  for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i)
-    NewMI->addOperand(MBBI->getOperand(i));
-
-  // Delete the pseudo instruction TCRETURN.
-  MBB.erase(MBBI);
-  MBBI = NewMI;
-}
-
 void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
                                     MachineBasicBlock &MBB) const {
-  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
-  assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
-  DebugLoc dl = MBBI->getDebugLoc();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
@@ -758,10 +706,12 @@ void ARMFrameLowering::emitEpilogue(Mach
 
   // All calls are tail calls in GHC calling conv, and functions have no
   // prologue/epilogue.
-  if (MF.getFunction()->getCallingConv() == CallingConv::GHC) {
-    fixTCReturn(MF, MBB);
+  if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
     return;
-  }
+
+  // First put ourselves on the first (from top) terminator instructions.
+  MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
+  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
 
   if (!AFI->hasStackFrame()) {
     if (NumBytes - ArgRegsSaveSize != 0)
@@ -840,8 +790,6 @@ void ARMFrameLowering::emitEpilogue(Mach
     if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
   }
 
-  fixTCReturn(MF, MBB);
-
   if (ArgRegsSaveSize)
     emitSPUpdate(isARM, MBB, MBBI, dl, TII, ArgRegsSaveSize);
 }
@@ -1008,7 +956,8 @@ void ARMFrameLowering::emitPushInst(Mach
     // Put any subsequent vpush instructions before this one: they will refer to
     // higher register numbers so need to be pushed first in order to preserve
     // monotonicity.
-    --MI;
+    if (MI != MBB.begin())
+      --MI;
   }
 }
 
@@ -1022,12 +971,16 @@ void ARMFrameLowering::emitPopInst(Machi
   MachineFunction &MF = *MBB.getParent();
   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  DebugLoc DL = MI->getDebugLoc();
-  unsigned RetOpcode = MI->getOpcode();
-  bool isTailCall = (RetOpcode == ARM::TCRETURNdi ||
-                     RetOpcode == ARM::TCRETURNri);
-  bool isInterrupt =
-      RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;
+  DebugLoc DL;
+  bool isTailCall = false;
+  bool isInterrupt = false;
+  if (MBB.end() != MI) {
+    DL = MI->getDebugLoc();
+    unsigned RetOpcode = MI->getOpcode();
+    isTailCall = (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri);
+    isInterrupt =
+        RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;
+  }
 
   SmallVector<unsigned, 4> Regs;
   unsigned i = CSI.size();
@@ -1044,10 +997,13 @@ void ARMFrameLowering::emitPopInst(Machi
 
       if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
           STI.hasV5TOps()) {
-        Reg = ARM::PC;
-        LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
+        if (MBB.succ_empty()) {
+          Reg = ARM::PC;
+          DeleteRet = true;
+          LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
+        } else
+          LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
         // Fold the return instruction into the LDM.
-        DeleteRet = true;
       }
 
       // If NoGap is true, pop consecutive registers and then leave the rest
@@ -1068,7 +1024,7 @@ void ARMFrameLowering::emitPopInst(Machi
                        .addReg(ARM::SP));
       for (unsigned i = 0, e = Regs.size(); i < e; ++i)
         MIB.addReg(Regs[i], getDefRegState(true));
-      if (DeleteRet) {
+      if (DeleteRet && MI != MBB.end()) {
         MIB.copyImplicitOps(&*MI);
         MI->eraseFromParent();
       }
@@ -1095,7 +1051,8 @@ void ARMFrameLowering::emitPopInst(Machi
 
     // Put any subsequent vpop instructions after this one: they will refer to
     // higher register numbers so need to be popped afterwards.
-    ++MI;
+    if (MI != MBB.end())
+      ++MI;
   }
 }
 
@@ -1913,21 +1870,51 @@ void ARMFrameLowering::adjustForSegmente
   MachineBasicBlock *GetMBB = MF.CreateMachineBasicBlock();
   MachineBasicBlock *McrMBB = MF.CreateMachineBasicBlock();
 
+  // Grab everything that reaches PrologueMBB to update there liveness as well.
+  SmallPtrSet<MachineBasicBlock *, 8> BeforePrologueRegion;
+  SmallVector<MachineBasicBlock *, 2> WalkList;
+  WalkList.push_back(&PrologueMBB);
+
+  do {
+    MachineBasicBlock *CurMBB = WalkList.pop_back_val();
+    for (MachineBasicBlock *PredBB : CurMBB->predecessors()) {
+      if (BeforePrologueRegion.insert(PredBB).second)
+        WalkList.push_back(PredBB);
+    }
+  } while (!WalkList.empty());
+
+  // The order in that list is important.
+  // The blocks will all be inserted before PrologueMBB using that order.
+  // Therefore the block that should appear first in the CFG should appear
+  // first in the list.
+  MachineBasicBlock *AddedBlocks[] = {PrevStackMBB, McrMBB, GetMBB, AllocMBB,
+                                      PostStackMBB};
+  const int NbAddedBlocks = sizeof(AddedBlocks) / sizeof(AddedBlocks[0]);
+
+  for (int Idx = 0; Idx < NbAddedBlocks; ++Idx)
+    BeforePrologueRegion.insert(AddedBlocks[Idx]);
+
   for (MachineBasicBlock::livein_iterator i = PrologueMBB.livein_begin(),
                                           e = PrologueMBB.livein_end();
        i != e; ++i) {
-    AllocMBB->addLiveIn(*i);
-    GetMBB->addLiveIn(*i);
-    McrMBB->addLiveIn(*i);
-    PrevStackMBB->addLiveIn(*i);
-    PostStackMBB->addLiveIn(*i);
+    for (MachineBasicBlock *PredBB : BeforePrologueRegion)
+      PredBB->addLiveIn(*i);
   }
 
-  MF.push_front(PostStackMBB);
-  MF.push_front(AllocMBB);
-  MF.push_front(GetMBB);
-  MF.push_front(McrMBB);
-  MF.push_front(PrevStackMBB);
+  // Remove the newly added blocks from the list, since we know
+  // we do not have to do the following updates for them.
+  for (int Idx = 0; Idx < NbAddedBlocks; ++Idx) {
+    BeforePrologueRegion.erase(AddedBlocks[Idx]);
+    MF.insert(&PrologueMBB, AddedBlocks[Idx]);
+  }
+
+  for (MachineBasicBlock *MBB : BeforePrologueRegion) {
+    // Make sure the LiveIns are still sorted and unique.
+    MBB->sortUniqueLiveIns();
+    // Replace the edges to PrologueMBB by edges to the sequences
+    // we are about to add.
+    MBB->ReplaceUsesOfBlockWith(&PrologueMBB, AddedBlocks[0]);
+  }
 
   // The required stack size that is aligned to ARM constant criterion.
   AlignedStackSize = alignToARMConstant(StackSize);

Modified: llvm/trunk/lib/Target/ARM/ARMFrameLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMFrameLowering.h?rev=242714&r1=242713&r2=242714&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMFrameLowering.h (original)
+++ llvm/trunk/lib/Target/ARM/ARMFrameLowering.h Mon Jul 20 16:42:14 2015
@@ -31,8 +31,6 @@ public:
   void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
   void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
 
-  void fixTCReturn(MachineFunction &MF, MachineBasicBlock &MBB) const;
-
   bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
                                  MachineBasicBlock::iterator MI,
                                  const std::vector<CalleeSavedInfo> &CSI,

Modified: llvm/trunk/lib/Target/ARM/Thumb1FrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/Thumb1FrameLowering.cpp?rev=242714&r1=242713&r2=242714&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/Thumb1FrameLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM/Thumb1FrameLowering.cpp Mon Jul 20 16:42:14 2015
@@ -13,6 +13,7 @@
 
 #include "Thumb1FrameLowering.h"
 #include "ARMMachineFunctionInfo.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -321,11 +322,8 @@ static bool isCSRestore(MachineInstr *MI
 
 void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
                                    MachineBasicBlock &MBB) const {
-  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
-  assert((MBBI->getOpcode() == ARM::tBX_RET ||
-          MBBI->getOpcode() == ARM::tPOP_RET) &&
-         "Can only insert epilog into returning blocks");
-  DebugLoc dl = MBBI->getDebugLoc();
+  MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
+  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   const ThumbRegisterInfo *RegInfo =
@@ -377,9 +375,8 @@ void Thumb1FrameLowering::emitEpilogue(M
                                ARM::SP)
           .addReg(FramePtr));
     } else {
-      if (MBBI->getOpcode() == ARM::tBX_RET &&
-          &MBB.front() != MBBI &&
-          std::prev(MBBI)->getOpcode() == ARM::tPOP) {
+      if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tBX_RET &&
+          &MBB.front() != MBBI && std::prev(MBBI)->getOpcode() == ARM::tPOP) {
         MachineBasicBlock::iterator PMBBI = std::prev(MBBI);
         if (!tryFoldSPUpdateIntoPushPop(STI, MF, PMBBI, NumBytes))
           emitSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes);
@@ -409,43 +406,112 @@ void Thumb1FrameLowering::emitEpilogue(M
   //   MOV r3, ip
   //   BX lr
   if (ArgRegsSaveSize || IsV4PopReturn) {
-    // Get the last instruction, tBX_RET
-    MBBI = MBB.getLastNonDebugInstr();
-    assert (MBBI->getOpcode() == ARM::tBX_RET);
-    DebugLoc dl = MBBI->getDebugLoc();
-
-    if (AFI->getReturnRegsCount() <= 3) {
-      // Epilogue: pop saved LR to R3 and branch off it. 
-      AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
-        .addReg(ARM::R3, RegState::Define);
-
-      emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize);
-
+    // If MBBI is a return instruction, we may be able to directly restore
+    // LR in the PC.
+    // This is possible if we do not need to emit any SP update.
+    // Otherwise, we need a temporary register to pop the value
+    // and copy that value into LR.
+    MBBI = MBB.getFirstTerminator();
+    if (!ArgRegsSaveSize && MBBI != MBB.end() &&
+        MBBI->getOpcode() == ARM::tBX_RET) {
       MachineInstrBuilder MIB =
-        BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX))
-        .addReg(ARM::R3, RegState::Kill);
-      AddDefaultPred(MIB);
+          AddDefaultPred(
+              BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP_RET)))
+              .addReg(ARM::PC, RegState::Define);
       MIB.copyImplicitOps(&*MBBI);
       // erase the old tBX_RET instruction
       MBB.erase(MBBI);
-    } else {
-      AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
-        .addReg(ARM::R12, RegState::Define)
-        .addReg(ARM::R3, RegState::Kill));
+      return;
+    }
+
+    // Look for a temporary register to use.
+    // First, compute the liveness information.
+    LivePhysRegs UsedRegs(STI.getRegisterInfo());
+    UsedRegs.addLiveOuts(&MBB, /*AddPristines*/ true);
+    // The semantic of pristines changed recently and now,
+    // the callee-saved registers that are touched in the function
+    // are not part of the pristines set anymore.
+    // Add those callee-saved now.
+    const TargetRegisterInfo *TRI = STI.getRegisterInfo();
+    const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
+    for (unsigned i = 0; CSRegs[i]; ++i)
+      UsedRegs.addReg(CSRegs[i]);
 
-      AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
-        .addReg(ARM::R3, RegState::Define);
+    DebugLoc dl = DebugLoc();
+    if (MBBI != MBB.end()) {
+      dl = MBBI->getDebugLoc();
+      auto InstUpToMBBI = MBB.end();
+      // The post-decrement is on purpose here.
+      // We want to have the liveness right before MBBI.
+      while (InstUpToMBBI-- != MBBI)
+        UsedRegs.stepBackward(*InstUpToMBBI);
+    }
+
+    // Look for a register that can be directly use in the POP.
+    unsigned PopReg = 0;
+    // And some temporary register, just in case.
+    unsigned TemporaryReg = 0;
+    BitVector PopFriendly =
+        TRI->getAllocatableSet(MF, TRI->getRegClass(ARM::tGPRRegClassID));
+    assert(PopFriendly.any() && "No allocatable pop-friendly register?!");
+    // Rebuild the GPRs from the high registers because they are removed
+    // form the GPR reg class for thumb1.
+    BitVector GPRsNoLRSP =
+        TRI->getAllocatableSet(MF, TRI->getRegClass(ARM::hGPRRegClassID));
+    GPRsNoLRSP |= PopFriendly;
+    GPRsNoLRSP.reset(ARM::LR);
+    GPRsNoLRSP.reset(ARM::SP);
+    GPRsNoLRSP.reset(ARM::PC);
+    for (int Register = GPRsNoLRSP.find_first(); Register != -1;
+         Register = GPRsNoLRSP.find_next(Register)) {
+      if (!UsedRegs.contains(Register)) {
+        // Remember the first pop-friendly register and exit.
+        if (PopFriendly.test(Register)) {
+          PopReg = Register;
+          TemporaryReg = 0;
+          break;
+        }
+        // Otherwise, remember that the register will be available to
+        // save a pop-friendly register.
+        TemporaryReg = Register;
+      }
+    }
 
-      emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize);
+    assert((PopReg || TemporaryReg) && "Cannot get LR");
 
+    if (TemporaryReg) {
+      assert(!PopReg && "Unnecessary MOV is about to be inserted");
+      PopReg = PopFriendly.find_first();
       AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
-        .addReg(ARM::LR, RegState::Define)
-        .addReg(ARM::R3, RegState::Kill));
+                         .addReg(TemporaryReg, RegState::Define)
+                         .addReg(PopReg, RegState::Kill));
+    }
+
+    assert(PopReg && "Do not know how to get LR");
+    AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
+        .addReg(PopReg, RegState::Define);
+
+    emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize);
 
+    if (!TemporaryReg && MBBI != MBB.end() &&
+        MBBI->getOpcode() == ARM::tBX_RET) {
+      MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX))
+                                    .addReg(PopReg, RegState::Kill);
+      AddDefaultPred(MIB);
+      MIB.copyImplicitOps(&*MBBI);
+      // erase the old tBX_RET instruction
+      MBB.erase(MBBI);
+      return;
+    }
+
+    AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
+                       .addReg(ARM::LR, RegState::Define)
+                       .addReg(PopReg, RegState::Kill));
+
+    if (TemporaryReg) {
       AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
-        .addReg(ARM::R3, RegState::Define)
-        .addReg(ARM::R12, RegState::Kill));
-      // Keep the tBX_RET instruction
+                         .addReg(PopReg, RegState::Define)
+                         .addReg(TemporaryReg, RegState::Kill));
     }
   }
 }
@@ -508,7 +574,7 @@ restoreCalleeSavedRegisters(MachineBasic
   bool NumRegs = false;
   for (unsigned i = CSI.size(); i != 0; --i) {
     unsigned Reg = CSI[i-1].getReg();
-    if (Reg == ARM::LR) {
+    if (Reg == ARM::LR && MBB.succ_empty()) {
       // Special epilogue for vararg functions. See emitEpilogue
       if (isVarArg)
         continue;
@@ -517,7 +583,8 @@ restoreCalleeSavedRegisters(MachineBasic
         continue;
       Reg = ARM::PC;
       (*MIB).setDesc(TII.get(ARM::tPOP_RET));
-      MIB.copyImplicitOps(&*MI);
+      if (MI != MBB.end())
+        MIB.copyImplicitOps(&*MI);
       MI = MBB.erase(MI);
     }
     MIB.addReg(Reg, getDefRegState(true));

Modified: llvm/trunk/test/CodeGen/ARM/fold-stack-adjust.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/fold-stack-adjust.ll?rev=242714&r1=242713&r2=242714&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/fold-stack-adjust.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/fold-stack-adjust.ll Mon Jul 20 16:42:14 2015
@@ -170,9 +170,9 @@ define void @test_varsize(...) minsize {
 ; CHECK-T1: push	{r5, r6, r7, lr}
 ; ...
 ; CHECK-T1: pop	{r2, r3, r7}
-; CHECK-T1: pop	{r3}
+; CHECK-T1: pop {[[POP_REG:r[0-3]]]}
 ; CHECK-T1: add	sp, #16
-; CHECK-T1: bx	r3
+; CHECK-T1: bx	[[POP_REG]]
 
 ; CHECK-LABEL: test_varsize:
 ; CHECK: sub	sp, #16

Modified: llvm/trunk/test/CodeGen/ARM/thumb1_return_sequence.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/thumb1_return_sequence.ll?rev=242714&r1=242713&r2=242714&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/thumb1_return_sequence.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/thumb1_return_sequence.ll Mon Jul 20 16:42:14 2015
@@ -23,11 +23,9 @@ entry:
 ; --------
 ; CHECK-V4T:         add sp,
 ; CHECK-V4T-NEXT:    pop {[[SAVED]]}
-; CHECK-V4T-NEXT:    mov r12, r3
-; CHECK-V4T-NEXT:    pop {r3}
-; CHECK-V4T-NEXT:    mov lr, r3
-; CHECK-V4T-NEXT:    mov r3, r12
-; CHECK-V4T:         bx  lr
+; We do not have any SP update to insert so we can just optimize
+; the pop sequence.
+; CHECK-V4T-NEXT:    pop {pc}
 ; CHECK-V5T:         pop {[[SAVED]], pc}
 }
 
@@ -53,19 +51,19 @@ entry:
 ; Epilogue
 ; --------
 ; CHECK-V4T:         pop {[[SAVED]]}
-; CHECK-V4T-NEXT:    mov r12, r3
-; CHECK-V4T-NEXT:    pop {r3}
+; CHECK-V4T-NEXT:    mov r12, [[POP_REG:r[0-7]]]
+; CHECK-V4T-NEXT:    pop {[[POP_REG]]}
 ; CHECK-V4T-NEXT:    add sp,
-; CHECK-V4T-NEXT:    mov lr, r3
-; CHECK-V4T-NEXT:    mov r3, r12
+; CHECK-V4T-NEXT:    mov lr, [[POP_REG]]
+; CHECK-V4T-NEXT:    mov [[POP_REG]], r12
 ; CHECK-V4T:         bx  lr
 ; CHECK-V5T:         add sp,
 ; CHECK-V5T-NEXT:    pop {[[SAVED]]}
-; CHECK-V5T-NEXT:    mov r12, r3
-; CHECK-V5T-NEXT:    pop {r3}
+; CHECK-V5T-NEXT:    mov r12, [[POP_REG:r[0-7]]]
+; CHECK-V5T-NEXT:    pop {[[POP_REG]]}
 ; CHECK-V5T-NEXT:    add sp,
-; CHECK-V5T-NEXT:    mov lr, r3
-; CHECK-V5T-NEXT:    mov r3, r12
+; CHECK-V5T-NEXT:    mov lr, [[POP_REG]]
+; CHECK-V5T-NEXT:    mov [[POP_REG]], r12
 ; CHECK-V5T-NEXT:    bx lr
 }
 
@@ -95,8 +93,7 @@ entry:
 ; Epilogue
 ; --------
 ; CHECK-V4T:    pop {[[SAVED]]}
-; CHECK-V4T:    pop {r3}
-; CHECK-V4T:    bx r3
+; CHECK-V4T:    pop {pc}
 ; CHECK-V5T:    pop {[[SAVED]], pc}
 }
 
@@ -148,14 +145,18 @@ entry:
 ; --------
 ; CHECK-V4T:         add sp,
 ; CHECK-V4T-NEXT:    pop {[[SAVED]]}
-; CHECK-V4T-NEXT:    pop {r3}
+; Only r1 to r3 are available to pop LR.
+; r0 is used for the return value.
+; CHECK-V4T-NEXT:    pop {[[POP_REG:r[1-3]]]}
 ; CHECK-V4T-NEXT:    add sp,
-; CHECK-V4T-NEXT:    bx r3
+; CHECK-V4T-NEXT:    bx [[POP_REG]]
 ; CHECK-V5T:         add sp,
 ; CHECK-V5T-NEXT:    pop {[[SAVED]]}
-; CHECK-V5T-NEXT:    pop {r3}
+; Only r1 to r3 are available to pop LR.
+; r0 is used for the return value.
+; CHECK-V5T-NEXT:    pop {[[POP_REG:r[1-3]]]}
 ; CHECK-V5T-NEXT:    add sp,
-; CHECK-V5T-NEXT:    bx r3
+; CHECK-V5T-NEXT:    bx [[POP_REG]]
 }
 
 ; CHECK-V4T-LABEL: noframe
@@ -191,13 +192,17 @@ entry:
 ; Epilogue
 ; --------
 ; CHECK-V4T:         pop {[[SAVED]]}
-; CHECK-V4T-NEXT:    pop {r3}
+; Only r1 to r3 are available to pop LR.
+; r0 is used for the return value.
+; CHECK-V4T-NEXT:    pop {[[POP_REG:r[1-3]]]}
 ; CHECK-V4T-NEXT:    add sp,
-; CHECK-V4T-NEXT:    bx r3
+; CHECK-V4T-NEXT:    bx [[POP_REG]]
 ; CHECK-V5T:         pop {[[SAVED]]}
-; CHECK-V5T-NEXT:    pop {r3}
+; Only r1 to r3 are available to pop LR.
+; r0 is used for the return value.
+; CHECK-V5T-NEXT:    pop {[[POP_REG:r[1-3]]]}
 ; CHECK-V5T-NEXT:    add sp,
-; CHECK-V5T-NEXT:    bx r3
+; CHECK-V5T-NEXT:    bx [[POP_REG]]
 }
 
 declare void @llvm.va_start(i8*) nounwind

Modified: llvm/trunk/test/CodeGen/Thumb/pop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb/pop.ll?rev=242714&r1=242713&r2=242714&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb/pop.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb/pop.ll Mon Jul 20 16:42:14 2015
@@ -3,9 +3,9 @@
 
 define void @t(i8* %a, ...) nounwind {
 ; CHECK-LABEL:      t:
-; CHECK:      pop {r3}
+; CHECK:      pop {[[POP_REG:r[0-3]]]}
 ; CHECK-NEXT: add sp, #12
-; CHECK-NEXT: bx r3
+; CHECK-NEXT: bx [[POP_REG]]
 entry:
   %a.addr = alloca i8, i32 4
   call void @llvm.va_start(i8* %a.addr)

Modified: llvm/trunk/test/CodeGen/Thumb/vargs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb/vargs.ll?rev=242714&r1=242713&r2=242714&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb/vargs.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb/vargs.ll Mon Jul 20 16:42:14 2015
@@ -32,12 +32,12 @@ bb7:            ; preds = %bb
         call void @llvm.va_end( i8* %va.upgrd.4 )
         ret void
 
-; The return sequence should pop the lr to r3, recover the stack space used to
+; The return sequence should pop the lr to r0-3, recover the stack space used to
 ; store variadic argument registers, then return via r3. Possibly there is a pop
 ; before this, but only if the function happened to use callee-saved registers.
-; CHECK: pop {r3}
+; CHECK: pop {[[POP_REG:r[0-3]]]}
 ; CHECK: add sp, #[[IMM]]
-; CHECK: bx r3
+; CHECK: bx [[POP_REG]]
 }
 
 declare void @llvm.va_start(i8*)





More information about the llvm-commits mailing list