[llvm] r242714 - [ARM] Refactor the prologue/epilogue emission to be more robust.
Quentin Colombet
qcolombet at apple.com
Mon Jul 20 14:42:15 PDT 2015
Author: qcolombet
Date: Mon Jul 20 16:42:14 2015
New Revision: 242714
URL: http://llvm.org/viewvc/llvm-project?rev=242714&view=rev
Log:
[ARM] Refactor the prologue/epilogue emission to be more robust.
This is the first step toward supporting shrink-wrapping for this target.
The changes could be summarized by these items:
- Expand the tail-call return as part of the expand pseudo pass.
- Get rid of the assumptions that the epilogue is the exit block:
* Do not assume which registers are free in the epilogue. (This indirectly
improve the lowering of the code for the segmented stacks, see the test
cases.)
* Take into account that the basic block can be empty.
Related to <rdar://problem/20821730>
Modified:
llvm/trunk/lib/Target/ARM/ARMExpandPseudoInsts.cpp
llvm/trunk/lib/Target/ARM/ARMFrameLowering.cpp
llvm/trunk/lib/Target/ARM/ARMFrameLowering.h
llvm/trunk/lib/Target/ARM/Thumb1FrameLowering.cpp
llvm/trunk/test/CodeGen/ARM/fold-stack-adjust.ll
llvm/trunk/test/CodeGen/ARM/thumb1_return_sequence.ll
llvm/trunk/test/CodeGen/Thumb/pop.ll
llvm/trunk/test/CodeGen/Thumb/vargs.ll
Modified: llvm/trunk/lib/Target/ARM/ARMExpandPseudoInsts.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMExpandPseudoInsts.cpp?rev=242714&r1=242713&r2=242714&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMExpandPseudoInsts.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMExpandPseudoInsts.cpp Mon Jul 20 16:42:14 2015
@@ -747,6 +747,55 @@ bool ARMExpandPseudo::ExpandMI(MachineBa
switch (Opcode) {
default:
return false;
+
+ case ARM::TCRETURNdi:
+ case ARM::TCRETURNri: {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ assert(MBBI->isReturn() &&
+ "Can only insert epilog into returning blocks");
+ unsigned RetOpcode = MBBI->getOpcode();
+ DebugLoc dl = MBBI->getDebugLoc();
+ const ARMBaseInstrInfo &TII = *static_cast<const ARMBaseInstrInfo *>(
+ MBB.getParent()->getSubtarget().getInstrInfo());
+
+ // Tail call return: adjust the stack pointer and jump to callee.
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+
+ // Jump to label or value in register.
+ if (RetOpcode == ARM::TCRETURNdi) {
+ unsigned TCOpcode =
+ STI->isThumb()
+ ? (STI->isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND)
+ : ARM::TAILJMPd;
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
+ if (JumpTarget.isGlobal())
+ MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
+ JumpTarget.getTargetFlags());
+ else {
+ assert(JumpTarget.isSymbol());
+ MIB.addExternalSymbol(JumpTarget.getSymbolName(),
+ JumpTarget.getTargetFlags());
+ }
+
+ // Add the default predicate in Thumb mode.
+ if (STI->isThumb())
+ MIB.addImm(ARMCC::AL).addReg(0);
+ } else if (RetOpcode == ARM::TCRETURNri) {
+ BuildMI(MBB, MBBI, dl,
+ TII.get(STI->isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr))
+ .addReg(JumpTarget.getReg(), RegState::Kill);
+ }
+
+ MachineInstr *NewMI = std::prev(MBBI);
+ for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i)
+ NewMI->addOperand(MBBI->getOperand(i));
+
+ // Delete the pseudo instruction TCRETURN.
+ MBB.erase(MBBI);
+ MBBI = NewMI;
+ return true;
+ }
case ARM::VMOVScc:
case ARM::VMOVDcc: {
unsigned newOpc = Opcode == ARM::VMOVScc ? ARM::VMOVS : ARM::VMOVD;
Modified: llvm/trunk/lib/Target/ARM/ARMFrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMFrameLowering.cpp?rev=242714&r1=242713&r2=242714&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMFrameLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMFrameLowering.cpp Mon Jul 20 16:42:14 2015
@@ -689,60 +689,8 @@ void ARMFrameLowering::emitPrologue(Mach
AFI->setShouldRestoreSPFromFP(true);
}
-// Resolve TCReturn pseudo-instruction
-void ARMFrameLowering::fixTCReturn(MachineFunction &MF,
- MachineBasicBlock &MBB) const {
- MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
- assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
- unsigned RetOpcode = MBBI->getOpcode();
- DebugLoc dl = MBBI->getDebugLoc();
- const ARMBaseInstrInfo &TII =
- *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
-
- if (!(RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri))
- return;
-
- // Tail call return: adjust the stack pointer and jump to callee.
- MBBI = MBB.getLastNonDebugInstr();
- MachineOperand &JumpTarget = MBBI->getOperand(0);
-
- // Jump to label or value in register.
- if (RetOpcode == ARM::TCRETURNdi) {
- unsigned TCOpcode = STI.isThumb() ?
- (STI.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND) :
- ARM::TAILJMPd;
- MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
- if (JumpTarget.isGlobal())
- MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
- JumpTarget.getTargetFlags());
- else {
- assert(JumpTarget.isSymbol());
- MIB.addExternalSymbol(JumpTarget.getSymbolName(),
- JumpTarget.getTargetFlags());
- }
-
- // Add the default predicate in Thumb mode.
- if (STI.isThumb()) MIB.addImm(ARMCC::AL).addReg(0);
- } else if (RetOpcode == ARM::TCRETURNri) {
- BuildMI(MBB, MBBI, dl,
- TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)).
- addReg(JumpTarget.getReg(), RegState::Kill);
- }
-
- MachineInstr *NewMI = std::prev(MBBI);
- for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i)
- NewMI->addOperand(MBBI->getOperand(i));
-
- // Delete the pseudo instruction TCRETURN.
- MBB.erase(MBBI);
- MBBI = NewMI;
-}
-
void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
- MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
- assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
- DebugLoc dl = MBBI->getDebugLoc();
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
@@ -758,10 +706,12 @@ void ARMFrameLowering::emitEpilogue(Mach
// All calls are tail calls in GHC calling conv, and functions have no
// prologue/epilogue.
- if (MF.getFunction()->getCallingConv() == CallingConv::GHC) {
- fixTCReturn(MF, MBB);
+ if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
return;
- }
+
+ // First put ourselves on the first (from top) terminator instructions.
+ MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
+ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
if (!AFI->hasStackFrame()) {
if (NumBytes - ArgRegsSaveSize != 0)
@@ -840,8 +790,6 @@ void ARMFrameLowering::emitEpilogue(Mach
if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
}
- fixTCReturn(MF, MBB);
-
if (ArgRegsSaveSize)
emitSPUpdate(isARM, MBB, MBBI, dl, TII, ArgRegsSaveSize);
}
@@ -1008,7 +956,8 @@ void ARMFrameLowering::emitPushInst(Mach
// Put any subsequent vpush instructions before this one: they will refer to
// higher register numbers so need to be pushed first in order to preserve
// monotonicity.
- --MI;
+ if (MI != MBB.begin())
+ --MI;
}
}
@@ -1022,12 +971,16 @@ void ARMFrameLowering::emitPopInst(Machi
MachineFunction &MF = *MBB.getParent();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- DebugLoc DL = MI->getDebugLoc();
- unsigned RetOpcode = MI->getOpcode();
- bool isTailCall = (RetOpcode == ARM::TCRETURNdi ||
- RetOpcode == ARM::TCRETURNri);
- bool isInterrupt =
- RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;
+ DebugLoc DL;
+ bool isTailCall = false;
+ bool isInterrupt = false;
+ if (MBB.end() != MI) {
+ DL = MI->getDebugLoc();
+ unsigned RetOpcode = MI->getOpcode();
+ isTailCall = (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri);
+ isInterrupt =
+ RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;
+ }
SmallVector<unsigned, 4> Regs;
unsigned i = CSI.size();
@@ -1044,10 +997,13 @@ void ARMFrameLowering::emitPopInst(Machi
if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
STI.hasV5TOps()) {
- Reg = ARM::PC;
- LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
+ if (MBB.succ_empty()) {
+ Reg = ARM::PC;
+ DeleteRet = true;
+ LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
+ } else
+ LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
// Fold the return instruction into the LDM.
- DeleteRet = true;
}
// If NoGap is true, pop consecutive registers and then leave the rest
@@ -1068,7 +1024,7 @@ void ARMFrameLowering::emitPopInst(Machi
.addReg(ARM::SP));
for (unsigned i = 0, e = Regs.size(); i < e; ++i)
MIB.addReg(Regs[i], getDefRegState(true));
- if (DeleteRet) {
+ if (DeleteRet && MI != MBB.end()) {
MIB.copyImplicitOps(&*MI);
MI->eraseFromParent();
}
@@ -1095,7 +1051,8 @@ void ARMFrameLowering::emitPopInst(Machi
// Put any subsequent vpop instructions after this one: they will refer to
// higher register numbers so need to be popped afterwards.
- ++MI;
+ if (MI != MBB.end())
+ ++MI;
}
}
@@ -1913,21 +1870,51 @@ void ARMFrameLowering::adjustForSegmente
MachineBasicBlock *GetMBB = MF.CreateMachineBasicBlock();
MachineBasicBlock *McrMBB = MF.CreateMachineBasicBlock();
+ // Grab everything that reaches PrologueMBB to update there liveness as well.
+ SmallPtrSet<MachineBasicBlock *, 8> BeforePrologueRegion;
+ SmallVector<MachineBasicBlock *, 2> WalkList;
+ WalkList.push_back(&PrologueMBB);
+
+ do {
+ MachineBasicBlock *CurMBB = WalkList.pop_back_val();
+ for (MachineBasicBlock *PredBB : CurMBB->predecessors()) {
+ if (BeforePrologueRegion.insert(PredBB).second)
+ WalkList.push_back(PredBB);
+ }
+ } while (!WalkList.empty());
+
+ // The order in that list is important.
+ // The blocks will all be inserted before PrologueMBB using that order.
+ // Therefore the block that should appear first in the CFG should appear
+ // first in the list.
+ MachineBasicBlock *AddedBlocks[] = {PrevStackMBB, McrMBB, GetMBB, AllocMBB,
+ PostStackMBB};
+ const int NbAddedBlocks = sizeof(AddedBlocks) / sizeof(AddedBlocks[0]);
+
+ for (int Idx = 0; Idx < NbAddedBlocks; ++Idx)
+ BeforePrologueRegion.insert(AddedBlocks[Idx]);
+
for (MachineBasicBlock::livein_iterator i = PrologueMBB.livein_begin(),
e = PrologueMBB.livein_end();
i != e; ++i) {
- AllocMBB->addLiveIn(*i);
- GetMBB->addLiveIn(*i);
- McrMBB->addLiveIn(*i);
- PrevStackMBB->addLiveIn(*i);
- PostStackMBB->addLiveIn(*i);
+ for (MachineBasicBlock *PredBB : BeforePrologueRegion)
+ PredBB->addLiveIn(*i);
}
- MF.push_front(PostStackMBB);
- MF.push_front(AllocMBB);
- MF.push_front(GetMBB);
- MF.push_front(McrMBB);
- MF.push_front(PrevStackMBB);
+ // Remove the newly added blocks from the list, since we know
+ // we do not have to do the following updates for them.
+ for (int Idx = 0; Idx < NbAddedBlocks; ++Idx) {
+ BeforePrologueRegion.erase(AddedBlocks[Idx]);
+ MF.insert(&PrologueMBB, AddedBlocks[Idx]);
+ }
+
+ for (MachineBasicBlock *MBB : BeforePrologueRegion) {
+ // Make sure the LiveIns are still sorted and unique.
+ MBB->sortUniqueLiveIns();
+ // Replace the edges to PrologueMBB by edges to the sequences
+ // we are about to add.
+ MBB->ReplaceUsesOfBlockWith(&PrologueMBB, AddedBlocks[0]);
+ }
// The required stack size that is aligned to ARM constant criterion.
AlignedStackSize = alignToARMConstant(StackSize);
Modified: llvm/trunk/lib/Target/ARM/ARMFrameLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMFrameLowering.h?rev=242714&r1=242713&r2=242714&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMFrameLowering.h (original)
+++ llvm/trunk/lib/Target/ARM/ARMFrameLowering.h Mon Jul 20 16:42:14 2015
@@ -31,8 +31,6 @@ public:
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
- void fixTCReturn(MachineFunction &MF, MachineBasicBlock &MBB) const;
-
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
Modified: llvm/trunk/lib/Target/ARM/Thumb1FrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/Thumb1FrameLowering.cpp?rev=242714&r1=242713&r2=242714&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/Thumb1FrameLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM/Thumb1FrameLowering.cpp Mon Jul 20 16:42:14 2015
@@ -13,6 +13,7 @@
#include "Thumb1FrameLowering.h"
#include "ARMMachineFunctionInfo.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -321,11 +322,8 @@ static bool isCSRestore(MachineInstr *MI
void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
- MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
- assert((MBBI->getOpcode() == ARM::tBX_RET ||
- MBBI->getOpcode() == ARM::tPOP_RET) &&
- "Can only insert epilog into returning blocks");
- DebugLoc dl = MBBI->getDebugLoc();
+ MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
+ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
const ThumbRegisterInfo *RegInfo =
@@ -377,9 +375,8 @@ void Thumb1FrameLowering::emitEpilogue(M
ARM::SP)
.addReg(FramePtr));
} else {
- if (MBBI->getOpcode() == ARM::tBX_RET &&
- &MBB.front() != MBBI &&
- std::prev(MBBI)->getOpcode() == ARM::tPOP) {
+ if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tBX_RET &&
+ &MBB.front() != MBBI && std::prev(MBBI)->getOpcode() == ARM::tPOP) {
MachineBasicBlock::iterator PMBBI = std::prev(MBBI);
if (!tryFoldSPUpdateIntoPushPop(STI, MF, PMBBI, NumBytes))
emitSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes);
@@ -409,43 +406,112 @@ void Thumb1FrameLowering::emitEpilogue(M
// MOV r3, ip
// BX lr
if (ArgRegsSaveSize || IsV4PopReturn) {
- // Get the last instruction, tBX_RET
- MBBI = MBB.getLastNonDebugInstr();
- assert (MBBI->getOpcode() == ARM::tBX_RET);
- DebugLoc dl = MBBI->getDebugLoc();
-
- if (AFI->getReturnRegsCount() <= 3) {
- // Epilogue: pop saved LR to R3 and branch off it.
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
- .addReg(ARM::R3, RegState::Define);
-
- emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize);
-
+ // If MBBI is a return instruction, we may be able to directly restore
+ // LR in the PC.
+ // This is possible if we do not need to emit any SP update.
+ // Otherwise, we need a temporary register to pop the value
+ // and copy that value into LR.
+ MBBI = MBB.getFirstTerminator();
+ if (!ArgRegsSaveSize && MBBI != MBB.end() &&
+ MBBI->getOpcode() == ARM::tBX_RET) {
MachineInstrBuilder MIB =
- BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX))
- .addReg(ARM::R3, RegState::Kill);
- AddDefaultPred(MIB);
+ AddDefaultPred(
+ BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP_RET)))
+ .addReg(ARM::PC, RegState::Define);
MIB.copyImplicitOps(&*MBBI);
// erase the old tBX_RET instruction
MBB.erase(MBBI);
- } else {
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
- .addReg(ARM::R12, RegState::Define)
- .addReg(ARM::R3, RegState::Kill));
+ return;
+ }
+
+ // Look for a temporary register to use.
+ // First, compute the liveness information.
+ LivePhysRegs UsedRegs(STI.getRegisterInfo());
+ UsedRegs.addLiveOuts(&MBB, /*AddPristines*/ true);
+ // The semantic of pristines changed recently and now,
+ // the callee-saved registers that are touched in the function
+ // are not part of the pristines set anymore.
+ // Add those callee-saved now.
+ const TargetRegisterInfo *TRI = STI.getRegisterInfo();
+ const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
+ for (unsigned i = 0; CSRegs[i]; ++i)
+ UsedRegs.addReg(CSRegs[i]);
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
- .addReg(ARM::R3, RegState::Define);
+ DebugLoc dl = DebugLoc();
+ if (MBBI != MBB.end()) {
+ dl = MBBI->getDebugLoc();
+ auto InstUpToMBBI = MBB.end();
+ // The post-decrement is on purpose here.
+ // We want to have the liveness right before MBBI.
+ while (InstUpToMBBI-- != MBBI)
+ UsedRegs.stepBackward(*InstUpToMBBI);
+ }
+
+ // Look for a register that can be directly use in the POP.
+ unsigned PopReg = 0;
+ // And some temporary register, just in case.
+ unsigned TemporaryReg = 0;
+ BitVector PopFriendly =
+ TRI->getAllocatableSet(MF, TRI->getRegClass(ARM::tGPRRegClassID));
+ assert(PopFriendly.any() && "No allocatable pop-friendly register?!");
+ // Rebuild the GPRs from the high registers because they are removed
+ // form the GPR reg class for thumb1.
+ BitVector GPRsNoLRSP =
+ TRI->getAllocatableSet(MF, TRI->getRegClass(ARM::hGPRRegClassID));
+ GPRsNoLRSP |= PopFriendly;
+ GPRsNoLRSP.reset(ARM::LR);
+ GPRsNoLRSP.reset(ARM::SP);
+ GPRsNoLRSP.reset(ARM::PC);
+ for (int Register = GPRsNoLRSP.find_first(); Register != -1;
+ Register = GPRsNoLRSP.find_next(Register)) {
+ if (!UsedRegs.contains(Register)) {
+ // Remember the first pop-friendly register and exit.
+ if (PopFriendly.test(Register)) {
+ PopReg = Register;
+ TemporaryReg = 0;
+ break;
+ }
+ // Otherwise, remember that the register will be available to
+ // save a pop-friendly register.
+ TemporaryReg = Register;
+ }
+ }
- emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize);
+ assert((PopReg || TemporaryReg) && "Cannot get LR");
+ if (TemporaryReg) {
+ assert(!PopReg && "Unnecessary MOV is about to be inserted");
+ PopReg = PopFriendly.find_first();
AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
- .addReg(ARM::LR, RegState::Define)
- .addReg(ARM::R3, RegState::Kill));
+ .addReg(TemporaryReg, RegState::Define)
+ .addReg(PopReg, RegState::Kill));
+ }
+
+ assert(PopReg && "Do not know how to get LR");
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
+ .addReg(PopReg, RegState::Define);
+
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize);
+ if (!TemporaryReg && MBBI != MBB.end() &&
+ MBBI->getOpcode() == ARM::tBX_RET) {
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX))
+ .addReg(PopReg, RegState::Kill);
+ AddDefaultPred(MIB);
+ MIB.copyImplicitOps(&*MBBI);
+ // erase the old tBX_RET instruction
+ MBB.erase(MBBI);
+ return;
+ }
+
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
+ .addReg(ARM::LR, RegState::Define)
+ .addReg(PopReg, RegState::Kill));
+
+ if (TemporaryReg) {
AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
- .addReg(ARM::R3, RegState::Define)
- .addReg(ARM::R12, RegState::Kill));
- // Keep the tBX_RET instruction
+ .addReg(PopReg, RegState::Define)
+ .addReg(TemporaryReg, RegState::Kill));
}
}
}
@@ -508,7 +574,7 @@ restoreCalleeSavedRegisters(MachineBasic
bool NumRegs = false;
for (unsigned i = CSI.size(); i != 0; --i) {
unsigned Reg = CSI[i-1].getReg();
- if (Reg == ARM::LR) {
+ if (Reg == ARM::LR && MBB.succ_empty()) {
// Special epilogue for vararg functions. See emitEpilogue
if (isVarArg)
continue;
@@ -517,7 +583,8 @@ restoreCalleeSavedRegisters(MachineBasic
continue;
Reg = ARM::PC;
(*MIB).setDesc(TII.get(ARM::tPOP_RET));
- MIB.copyImplicitOps(&*MI);
+ if (MI != MBB.end())
+ MIB.copyImplicitOps(&*MI);
MI = MBB.erase(MI);
}
MIB.addReg(Reg, getDefRegState(true));
Modified: llvm/trunk/test/CodeGen/ARM/fold-stack-adjust.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/fold-stack-adjust.ll?rev=242714&r1=242713&r2=242714&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/fold-stack-adjust.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/fold-stack-adjust.ll Mon Jul 20 16:42:14 2015
@@ -170,9 +170,9 @@ define void @test_varsize(...) minsize {
; CHECK-T1: push {r5, r6, r7, lr}
; ...
; CHECK-T1: pop {r2, r3, r7}
-; CHECK-T1: pop {r3}
+; CHECK-T1: pop {[[POP_REG:r[0-3]]]}
; CHECK-T1: add sp, #16
-; CHECK-T1: bx r3
+; CHECK-T1: bx [[POP_REG]]
; CHECK-LABEL: test_varsize:
; CHECK: sub sp, #16
Modified: llvm/trunk/test/CodeGen/ARM/thumb1_return_sequence.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/thumb1_return_sequence.ll?rev=242714&r1=242713&r2=242714&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/thumb1_return_sequence.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/thumb1_return_sequence.ll Mon Jul 20 16:42:14 2015
@@ -23,11 +23,9 @@ entry:
; --------
; CHECK-V4T: add sp,
; CHECK-V4T-NEXT: pop {[[SAVED]]}
-; CHECK-V4T-NEXT: mov r12, r3
-; CHECK-V4T-NEXT: pop {r3}
-; CHECK-V4T-NEXT: mov lr, r3
-; CHECK-V4T-NEXT: mov r3, r12
-; CHECK-V4T: bx lr
+; We do not have any SP update to insert so we can just optimize
+; the pop sequence.
+; CHECK-V4T-NEXT: pop {pc}
; CHECK-V5T: pop {[[SAVED]], pc}
}
@@ -53,19 +51,19 @@ entry:
; Epilogue
; --------
; CHECK-V4T: pop {[[SAVED]]}
-; CHECK-V4T-NEXT: mov r12, r3
-; CHECK-V4T-NEXT: pop {r3}
+; CHECK-V4T-NEXT: mov r12, [[POP_REG:r[0-7]]]
+; CHECK-V4T-NEXT: pop {[[POP_REG]]}
; CHECK-V4T-NEXT: add sp,
-; CHECK-V4T-NEXT: mov lr, r3
-; CHECK-V4T-NEXT: mov r3, r12
+; CHECK-V4T-NEXT: mov lr, [[POP_REG]]
+; CHECK-V4T-NEXT: mov [[POP_REG]], r12
; CHECK-V4T: bx lr
; CHECK-V5T: add sp,
; CHECK-V5T-NEXT: pop {[[SAVED]]}
-; CHECK-V5T-NEXT: mov r12, r3
-; CHECK-V5T-NEXT: pop {r3}
+; CHECK-V5T-NEXT: mov r12, [[POP_REG:r[0-7]]]
+; CHECK-V5T-NEXT: pop {[[POP_REG]]}
; CHECK-V5T-NEXT: add sp,
-; CHECK-V5T-NEXT: mov lr, r3
-; CHECK-V5T-NEXT: mov r3, r12
+; CHECK-V5T-NEXT: mov lr, [[POP_REG]]
+; CHECK-V5T-NEXT: mov [[POP_REG]], r12
; CHECK-V5T-NEXT: bx lr
}
@@ -95,8 +93,7 @@ entry:
; Epilogue
; --------
; CHECK-V4T: pop {[[SAVED]]}
-; CHECK-V4T: pop {r3}
-; CHECK-V4T: bx r3
+; CHECK-V4T: pop {pc}
; CHECK-V5T: pop {[[SAVED]], pc}
}
@@ -148,14 +145,18 @@ entry:
; --------
; CHECK-V4T: add sp,
; CHECK-V4T-NEXT: pop {[[SAVED]]}
-; CHECK-V4T-NEXT: pop {r3}
+; Only r1 to r3 are available to pop LR.
+; r0 is used for the return value.
+; CHECK-V4T-NEXT: pop {[[POP_REG:r[1-3]]]}
; CHECK-V4T-NEXT: add sp,
-; CHECK-V4T-NEXT: bx r3
+; CHECK-V4T-NEXT: bx [[POP_REG]]
; CHECK-V5T: add sp,
; CHECK-V5T-NEXT: pop {[[SAVED]]}
-; CHECK-V5T-NEXT: pop {r3}
+; Only r1 to r3 are available to pop LR.
+; r0 is used for the return value.
+; CHECK-V5T-NEXT: pop {[[POP_REG:r[1-3]]]}
; CHECK-V5T-NEXT: add sp,
-; CHECK-V5T-NEXT: bx r3
+; CHECK-V5T-NEXT: bx [[POP_REG]]
}
; CHECK-V4T-LABEL: noframe
@@ -191,13 +192,17 @@ entry:
; Epilogue
; --------
; CHECK-V4T: pop {[[SAVED]]}
-; CHECK-V4T-NEXT: pop {r3}
+; Only r1 to r3 are available to pop LR.
+; r0 is used for the return value.
+; CHECK-V4T-NEXT: pop {[[POP_REG:r[1-3]]]}
; CHECK-V4T-NEXT: add sp,
-; CHECK-V4T-NEXT: bx r3
+; CHECK-V4T-NEXT: bx [[POP_REG]]
; CHECK-V5T: pop {[[SAVED]]}
-; CHECK-V5T-NEXT: pop {r3}
+; Only r1 to r3 are available to pop LR.
+; r0 is used for the return value.
+; CHECK-V5T-NEXT: pop {[[POP_REG:r[1-3]]]}
; CHECK-V5T-NEXT: add sp,
-; CHECK-V5T-NEXT: bx r3
+; CHECK-V5T-NEXT: bx [[POP_REG]]
}
declare void @llvm.va_start(i8*) nounwind
Modified: llvm/trunk/test/CodeGen/Thumb/pop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb/pop.ll?rev=242714&r1=242713&r2=242714&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb/pop.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb/pop.ll Mon Jul 20 16:42:14 2015
@@ -3,9 +3,9 @@
define void @t(i8* %a, ...) nounwind {
; CHECK-LABEL: t:
-; CHECK: pop {r3}
+; CHECK: pop {[[POP_REG:r[0-3]]]}
; CHECK-NEXT: add sp, #12
-; CHECK-NEXT: bx r3
+; CHECK-NEXT: bx [[POP_REG]]
entry:
%a.addr = alloca i8, i32 4
call void @llvm.va_start(i8* %a.addr)
Modified: llvm/trunk/test/CodeGen/Thumb/vargs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb/vargs.ll?rev=242714&r1=242713&r2=242714&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb/vargs.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb/vargs.ll Mon Jul 20 16:42:14 2015
@@ -32,12 +32,12 @@ bb7: ; preds = %bb
call void @llvm.va_end( i8* %va.upgrd.4 )
ret void
-; The return sequence should pop the lr to r3, recover the stack space used to
+; The return sequence should pop the lr to r0-3, recover the stack space used to
; store variadic argument registers, then return via r3. Possibly there is a pop
; before this, but only if the function happened to use callee-saved registers.
-; CHECK: pop {r3}
+; CHECK: pop {[[POP_REG:r[0-3]]]}
; CHECK: add sp, #[[IMM]]
-; CHECK: bx r3
+; CHECK: bx [[POP_REG]]
}
declare void @llvm.va_start(i8*)
More information about the llvm-commits
mailing list