[llvm] a294d9e - Revert "[IPRA][ARM] Spill extra registers at -Oz"
Oliver Stannard via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 6 02:35:09 PDT 2020
Author: Oliver Stannard
Date: 2020-04-06T10:34:59+01:00
New Revision: a294d9eb2152ccd5c44b9e45ad291a199a944c56
URL: https://github.com/llvm/llvm-project/commit/a294d9eb2152ccd5c44b9e45ad291a199a944c56
DIFF: https://github.com/llvm/llvm-project/commit/a294d9eb2152ccd5c44b9e45ad291a199a944c56.diff
LOG: Revert "[IPRA][ARM] Spill extra registers at -Oz"
Reverting because this is causing failures on bots with expensive checks
enabled.
This reverts commit 73cea83a6f5ab521edf3cccfc603534776d691ec.
Added:
Modified:
llvm/include/llvm/CodeGen/MachineRegisterInfo.h
llvm/lib/CodeGen/MachineRegisterInfo.cpp
llvm/lib/CodeGen/PrologEpilogInserter.cpp
llvm/lib/Target/ARM/ARMFrameLowering.cpp
llvm/lib/Target/ARM/ARMFrameLowering.h
llvm/lib/Target/ARM/ARMISelLowering.cpp
llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
llvm/test/CodeGen/Thumb2/ifcvt-minsize.ll
Removed:
llvm/test/CodeGen/ARM/ipra-extra-spills-exceptions.ll
llvm/test/CodeGen/ARM/ipra-extra-spills.ll
################################################################################
diff --git a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
index 064b6075c095..c5a90b0c46e3 100644
--- a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -91,8 +91,6 @@ class MachineRegisterInfo {
/// all registers that were disabled are removed from the list.
SmallVector<MCPhysReg, 16> UpdatedCSRs;
- void initUpdatedCSRs();
-
/// RegAllocHints - This vector records register allocation hints for
/// virtual registers. For each virtual register, it keeps a pair of hint
/// type and hints vector making up the allocation hints. Only the first
@@ -233,17 +231,12 @@ class MachineRegisterInfo {
/// Disables the register from the list of CSRs.
/// I.e. the register will not appear as part of the CSR mask.
- /// \see UpdatedCSRs.
- void disableCalleeSavedRegister(Register Reg);
-
- /// Enables the register from the list of CSRs.
- /// I.e. the register will appear as part of the CSR mask.
- /// \see UpdatedCSRs.
- void enableCalleeSavedRegister(Register Reg);
+ /// \see UpdatedCalleeSavedRegs.
+ void disableCalleeSavedRegister(unsigned Reg);
/// Returns list of callee saved registers.
/// The function returns the updated CSR list (after taking into account
- /// registers that are enabled/disabled from the CSR list).
+ /// registers that are disabled from the CSR list).
const MCPhysReg *getCalleeSavedRegs() const;
/// Sets the updated Callee Saved Registers list.
diff --git a/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/llvm/lib/CodeGen/MachineRegisterInfo.cpp
index 12f1bafe1fb2..270ba125df00 100644
--- a/llvm/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -610,54 +610,30 @@ bool MachineRegisterInfo::isPhysRegUsed(MCRegister PhysReg) const {
return false;
}
-void MachineRegisterInfo::initUpdatedCSRs() {
- if (IsUpdatedCSRsInitialized)
- return;
-
- const TargetRegisterInfo *TRI = getTargetRegisterInfo();
- const MCPhysReg *CSR = TRI->getCalleeSavedRegs(MF);
- for (const MCPhysReg *I = CSR; *I; ++I)
- UpdatedCSRs.push_back(*I);
-
- // Zero value represents the end of the register list
- // (no more registers should be pushed).
- UpdatedCSRs.push_back(0);
-
- IsUpdatedCSRsInitialized = true;
-}
+void MachineRegisterInfo::disableCalleeSavedRegister(unsigned Reg) {
-void MachineRegisterInfo::disableCalleeSavedRegister(Register Reg) {
const TargetRegisterInfo *TRI = getTargetRegisterInfo();
assert(Reg && (Reg < TRI->getNumRegs()) &&
"Trying to disable an invalid register");
- initUpdatedCSRs();
+ if (!IsUpdatedCSRsInitialized) {
+ const MCPhysReg *CSR = TRI->getCalleeSavedRegs(MF);
+ for (const MCPhysReg *I = CSR; *I; ++I)
+ UpdatedCSRs.push_back(*I);
+
+ // Zero value represents the end of the register list
+ // (no more registers should be pushed).
+ UpdatedCSRs.push_back(0);
- // Remove the register (and its aliases) from the CSR list.
+ IsUpdatedCSRsInitialized = true;
+ }
+
+ // Remove the register (and its aliases from the list).
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
UpdatedCSRs.erase(std::remove(UpdatedCSRs.begin(), UpdatedCSRs.end(), *AI),
UpdatedCSRs.end());
}
-void MachineRegisterInfo::enableCalleeSavedRegister(Register Reg) {
- const TargetRegisterInfo *TRI = getTargetRegisterInfo();
- assert(Reg && (Reg < TRI->getNumRegs()) &&
- "Trying to disable an invalid register");
-
- initUpdatedCSRs();
-
- // Remove the null terminator from the end of the list.
- assert(UpdatedCSRs.back() == 0);
- UpdatedCSRs.pop_back();
-
- // Add the register (and its sub-registers) to the CSR list.
- for (MCSubRegIterator SRI(Reg, TRI, true); SRI.isValid(); ++SRI)
- UpdatedCSRs.push_back(*SRI);
-
- // Put the null terminator back.
- UpdatedCSRs.push_back(0);
-}
-
const MCPhysReg *MachineRegisterInfo::getCalleeSavedRegs() const {
if (IsUpdatedCSRsInitialized)
return UpdatedCSRs.data();
diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index b3cece0223b5..32e2deec353c 100644
--- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -451,8 +451,6 @@ static void assignCalleeSavedSpillSlots(MachineFunction &F,
FrameIdx = MFI.CreateFixedSpillStackObject(Size, FixedSlot->Offset);
}
- LLVM_DEBUG(dbgs() << "Assigned " << RegInfo->getName(Reg)
- << " to spill slot " << FrameIdx << "\n");
CS.setFrameIdx(FrameIdx);
}
}
diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index f9203f0e453e..76e516279487 100644
--- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -71,14 +71,6 @@ static cl::opt<bool>
SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true),
cl::desc("Align ARM NEON spills in prolog and epilog"));
-static cl::opt<bool> EnableExtraSpills(
- "arm-extra-spills", cl::Hidden, cl::init(false),
- cl::desc("Preserve extra registers when useful for IPRA"));
-
-// Testing option to bypass some profitability checks.
-static cl::opt<bool> ForceExtraSpills("arm-extra-spills-force", cl::Hidden,
- cl::init(false));
-
static MachineBasicBlock::iterator
skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
unsigned NumAlignedDPRCS2Regs);
@@ -1625,251 +1617,6 @@ checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs) {
SavedRegs.set(ARM::R4);
}
-// Compute the set of registers which cannot be preserved, because they are
-// either modified outside the PUSH/POP instructions, or are live at the point
-// where the POP will be inserted. This only considers r0-r3, which are
-// currently the only registers we voluntatrily save when the PCS doesn't
-// require it.
-void ARMFrameLowering::findRegDefsOutsideSaveRestore(
- MachineFunction &MF, BitVector &UnsaveableRegs) const {
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- MachineFrameInfo &MFI = MF.getFrameInfo();
-
- SmallSet<MachineBasicBlock *, 2> SaveBlocks;
- SmallSet<MachineBasicBlock *, 2> RestoreBlocks;
-
- if (MFI.getSavePoint()) {
- SaveBlocks.insert(MFI.getSavePoint());
- RestoreBlocks.insert(MFI.getRestorePoint());
- } else {
- SaveBlocks.insert(&MF.front());
- for (MachineBasicBlock &MBB : MF)
- if (MBB.isReturnBlock())
- RestoreBlocks.insert(&MBB);
- }
-
- // Walk blocks from the function entry and exits (following control flow both
- // ways), stopping when we get to a save/restore block. Check for
- // instructions which modify any of the registers we care about.
- SmallVector<MachineBasicBlock *, 4> WorkList;
- SmallSet<MachineBasicBlock *, 4> VisitedBlocks;
- LLVM_DEBUG(dbgs() << "Entry block: " << MF.front().getName() << "\n");
- WorkList.push_back(&MF.front());
- for (MachineBasicBlock &MBB : MF) {
- if (MBB.isReturnBlock()) {
- LLVM_DEBUG(dbgs() << "Return block: " << MBB.getName() << "\n");
- WorkList.push_back(&MBB);
- }
- }
-
- auto CheckOutsideInst = [&UnsaveableRegs, TRI](MachineInstr &MI) {
- for (Register Reg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) {
- if (MI.modifiesRegister(Reg, TRI)) {
- UnsaveableRegs.set(Reg);
- LLVM_DEBUG(dbgs() << "Register " << TRI->getName(Reg)
- << " modified by instruction " << MI << "\n");
- }
- }
- };
-
- while (!WorkList.empty()) {
- MachineBasicBlock *MBB = WorkList.pop_back_val();
-
- if (VisitedBlocks.count(MBB))
- continue;
- VisitedBlocks.insert(MBB);
-
- bool IsSave = SaveBlocks.count(MBB);
- bool IsRestore = RestoreBlocks.count(MBB);
-
- LLVM_DEBUG(dbgs() << "Visiting block " << MBB->getName() << ", IsSave="
- << IsSave << ", IsRestore=" << IsRestore << "\n");
-
- // If this is a restore block, the POP instruction will be inserted just
- // before the terminator, so we need to consider any terminator
- // instructions to be outside the preserved region. We also need to check
- // for registers which are live at the POP insertion point, because these
- // can't be restored without changing their value.
- if (IsRestore) {
- LivePhysRegs LPR(*TRI);
- LPR.addLiveOuts(*MBB);
- for (auto &Term : reverse(MBB->terminators())) {
- LPR.stepBackward(Term);
- CheckOutsideInst(Term);
- }
-
- for (Register Reg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) {
- if (LPR.contains(Reg)) {
- UnsaveableRegs.set(Reg);
- LLVM_DEBUG(dbgs() << "Register " << TRI->getName(Reg)
- << " live-out of restore block " << MBB->getName()
- << "\n");
- }
- }
- }
-
- // If this block is completely outside the save/restore region, then any
- // modified registers can't be preserved. A save block counts as being
- // inside the saved region, with the possible exception of the last few
- // instructions if it's also a restore block, handled above. We don't visit
- // blocks which are completely inside the saved region and don't have any
- // save/restore instructions, so don't need to check that here.
- if (!IsSave && !IsRestore)
- for (auto &MI : *MBB)
- CheckOutsideInst(MI);
-
- // Walk the control flow graph in both directions, except for blocks which
- // are inside the PUSH/POP region.
- if (IsSave || !IsRestore)
- for (auto Pred : MBB->predecessors())
- WorkList.push_back(Pred);
- if (!IsSave || IsRestore)
- for (auto Succ : MBB->successors())
- WorkList.push_back(Succ);
- }
-}
-
-bool ARMFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
- // Shrink wrapping is detrimental to code size because it prevents merging
- // the CSR restore and function return into one POP instruction. It also
- // conflicts with saving extra registers for IPRA, because it makes more
- // registers live at the PUSH/POP.
- if (MF.getFunction().hasMinSize())
- return false;
-
- return true;
-}
-
-// When doing inter-procedural register allocation, saving extra registers in
-// [r0,r3] will allow us to keep live values in them in any callers. The extra
-// saves and restores don't cost us any code-size if we are already emitting
-// PUSH and POP instructions.
-unsigned ARMFrameLowering::spillExtraRegsForIPRA(MachineFunction &MF,
- BitVector &SavedRegs,
- bool HasFPRegSaves) const {
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- MachineRegisterInfo &MRI = MF.getRegInfo();
- MachineFrameInfo &MFI = MF.getFrameInfo();
-
- LLVM_DEBUG(dbgs() << "Extra spills for " << MF.getName() << ": ");
-
- if (!EnableExtraSpills) {
- LLVM_DEBUG(dbgs() << "optimisation not enabled\n");
- return 0;
- }
-
- // If IPRA is not enabled, nothing will be able to take advantage of the
- // extra saved registers.
- if (!MF.getTarget().Options.EnableIPRA) {
- LLVM_DEBUG(dbgs() << "IPRA disabled\n");
- return 0;
- }
-
- // These registers will take extra time to save and restore, and will often
- // go unused, so only to this at -Oz.
- if (!MF.getFunction().hasMinSize()) {
- LLVM_DEBUG(dbgs() << "not minsize\n");
- return 0;
- }
-
- // If we are not currently spilling any registers, we'd need to add an extra
- // PUSH/POP pair, so this isn't worth it.
- if (!SavedRegs.any()) {
- LLVM_DEBUG(dbgs() << "no existing push/pop\n");
- return 0;
- }
-
- // If we can't guarantee that this definition of the function is the one
- // which will be picked by the linker, then IPRA can't make use of any extra
- // saved registers.
- if (!MF.getFunction().isDefinitionExact()) {
- LLVM_DEBUG(dbgs() << "inexact definition\n");
- return 0;
- }
-
- int NumVisibleCallers = 0;
- for (const User *U : MF.getFunction().users()) {
- if (const CallBase *Call = dyn_cast<CallBase>(U)) {
- if (Call->getCalledOperand() == &MF.getFunction()) {
- ++NumVisibleCallers;
- }
- }
- }
-
- // If we don't have any direct callers in the current translation unit,
- // nothing will be able to take advantage of the extra saved registers.
- if (NumVisibleCallers == 0 && !ForceExtraSpills) {
- LLVM_DEBUG(dbgs() << "no visible callers\n");
- return 0;
- }
-
- // If we need to emit unwind tables, these will be longer if we need to
- // preserve r0-r3, so we need a lot of visible calls to make this worthwhile.
- if (MF.getFunction().needsUnwindTableEntry() && NumVisibleCallers <= 8 &&
- !ForceExtraSpills) {
- LLVM_DEBUG(dbgs() << "needs unwind table\n");
- return 0;
- }
-
- // Ok, we've decided we are going to try the optimisation.
- LLVM_DEBUG(dbgs() << "enabled\n");
-
- // Compute the registers which can't be preserved because they are either
- // modified before the PUSH or after the POP, or are live at the point where
- // the POP will be inserted.
- BitVector NonPreserveableRegisters;
- NonPreserveableRegisters.resize(TRI->getNumRegs());
- findRegDefsOutsideSaveRestore(MF, NonPreserveableRegisters);
-
- unsigned NumExtraRegs = 0;
-
- // We'd also like to leave some registers free so that we can use them to
- // fold a small SP update into the PUSH/POP. We can't know exactly what this
- // optimisation can do, because stack layout isn't finalised, but we can make
- // a good enough estimate.
- unsigned StackSize = MFI.estimateStackSize(MF);
-
- // If the stack space is large, we probably won't be able to fold the SP
- // update into the push/pop, so we should use all the registers we want. If
- // we have FP register saves, then the SP update will be folded into the
- // VPUSH/VPOP instead, and we can use the GPRs freely.
- if (StackSize > 16 || HasFPRegSaves)
- StackSize = 0;
-
- LLVM_DEBUG(dbgs() << "Estimated " << StackSize
- << " bytes of SP update being folded into push/pop\n");
-
- for (Register Reg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) {
- if (StackSize) {
- StackSize -= 4;
- LLVM_DEBUG(dbgs() << "not saving " << TRI->getName(Reg)
- << ", wanted for SP update\n");
- continue;
- }
-
- // If we don't modify the register anywhere in this function, IPRA will
- // already know that it is preserved, and there's no point in saving it.
- if (!MRI.isPhysRegModified(Reg)) {
- LLVM_DEBUG(dbgs() << "not saving " << TRI->getName(Reg)
- << ", not modified\n");
- continue;
- }
-
- if (NonPreserveableRegisters[Reg]) {
- LLVM_DEBUG(dbgs() << "not saving " << TRI->getName(Reg)
- << ", modified outide save region\n");
- continue;
- }
-
- LLVM_DEBUG(dbgs() << "also saving " << TRI->getName(Reg) << " for IPRA\n");
- SavedRegs.set(Reg);
- MRI.enableCalleeSavedRegister(Reg);
- ++NumExtraRegs;
- }
-
- return NumExtraRegs;
-}
-
void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
BitVector &SavedRegs,
RegScavenger *RS) const {
@@ -2260,14 +2007,6 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
<< "\n");
}
- // When using IPRA, we might want to preserve some of r0-r3, to reduce
- // register pressure in our callers.
- unsigned ExtraIPRASpills =
- spillExtraRegsForIPRA(MF, SavedRegs, NumFPRSpills != 0);
- NumGPRSpills += ExtraIPRASpills;
- if (ExtraIPRASpills)
- CS1Spilled = true;
-
// Avoid spilling LR in Thumb1 if there's a tail call: it's expensive to
// restore LR in that case.
bool ExpensiveLRRestore = AFI->isThumb1OnlyFunction() && MFI.hasTailCall();
diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.h b/llvm/lib/Target/ARM/ARMFrameLowering.h
index e03e2d0e1cdb..f30f3895d972 100644
--- a/llvm/lib/Target/ARM/ARMFrameLowering.h
+++ b/llvm/lib/Target/ARM/ARMFrameLowering.h
@@ -56,10 +56,6 @@ class ARMFrameLowering : public TargetFrameLowering {
void getCalleeSaves(const MachineFunction &MF,
BitVector &SavedRegs) const override;
- void findRegDefsOutsideSaveRestore(MachineFunction &MF,
- BitVector &Regs) const;
- unsigned spillExtraRegsForIPRA(MachineFunction &MF, BitVector &SavedRegs,
- bool HasFPRegSaves) const;
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
RegScavenger *RS) const override;
@@ -67,8 +63,9 @@ class ARMFrameLowering : public TargetFrameLowering {
MachineBasicBlock &MBB) const override;
/// Returns true if the target will correctly handle shrink wrapping.
- bool enableShrinkWrapping(const MachineFunction &MF) const override;
-
+ bool enableShrinkWrapping(const MachineFunction &MF) const override {
+ return true;
+ }
bool isProfitableForNoCSROpt(const Function &F) const override {
// The no-CSR optimisation is bad for code size on ARM, because we can save
// many registers with a single PUSH/POP pair.
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index e4d1caa0b1e0..21c486658c0f 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -2463,24 +2463,25 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
RegsToPass[i].second.getValueType()));
// Add a register mask operand representing the call-preserved registers.
- const uint32_t *Mask;
- const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
- if (isThisReturn) {
- // For 'this' returns, use the R0-preserving mask if applicable
- Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
- if (!Mask) {
- // Set isThisReturn to false if the calling convention is not one that
- // allows 'returned' to be modeled in this way, so LowerCallResult does
- // not try to pass 'this' straight through
- isThisReturn = false;
+ if (!isTailCall) {
+ const uint32_t *Mask;
+ const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
+ if (isThisReturn) {
+ // For 'this' returns, use the R0-preserving mask if applicable
+ Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
+ if (!Mask) {
+ // Set isThisReturn to false if the calling convention is not one that
+ // allows 'returned' to be modeled in this way, so LowerCallResult does
+ // not try to pass 'this' straight through
+ isThisReturn = false;
+ Mask = ARI->getCallPreservedMask(MF, CallConv);
+ }
+ } else
Mask = ARI->getCallPreservedMask(MF, CallConv);
- }
- } else {
- Mask = ARI->getCallPreservedMask(MF, CallConv);
- }
- assert(Mask && "Missing call preserved mask for calling convention");
- Ops.push_back(DAG.getRegisterMask(Mask));
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(DAG.getRegisterMask(Mask));
+ }
if (InFlag.getNode())
Ops.push_back(InFlag);
diff --git a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
index df7a3fed6acd..c5ca64b0d78a 100644
--- a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -216,10 +216,6 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
break;
}
LLVM_FALLTHROUGH;
- case ARM::R0:
- case ARM::R1:
- case ARM::R2:
- case ARM::R3:
case ARM::R4:
case ARM::R5:
case ARM::R6:
@@ -852,8 +848,7 @@ bool Thumb1FrameLowering::spillCalleeSavedRegisters(
if (!LoRegsToSave.none()) {
MachineInstrBuilder MIB =
BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL));
- for (unsigned Reg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R4, ARM::R5,
- ARM::R6, ARM::R7, ARM::LR}) {
+ for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6, ARM::R7, ARM::LR}) {
if (LoRegsToSave[Reg]) {
bool isKill = !MRI.isLiveIn(Reg);
if (isKill && !MRI.isReserved(Reg))
@@ -961,9 +956,6 @@ bool Thumb1FrameLowering::restoreCalleeSavedRegisters(
llvm_unreachable("callee-saved register of unexpected class");
}
- if (Reg == ARM::LR)
- I.setRestored(false);
-
// If this is a low register not used as the frame pointer, we may want to
// use it for restoring the high registers.
if ((ARM::tGPRRegClass.contains(Reg)) &&
@@ -988,9 +980,6 @@ bool Thumb1FrameLowering::restoreCalleeSavedRegisters(
static const unsigned AllCopyRegs[] = {ARM::R0, ARM::R1, ARM::R2, ARM::R3,
ARM::R4, ARM::R5, ARM::R6, ARM::R7};
static const unsigned AllHighRegs[] = {ARM::R8, ARM::R9, ARM::R10, ARM::R11};
- static const unsigned AllLoRegs[] = {ARM::R0, ARM::R1, ARM::R2,
- ARM::R3, ARM::R4, ARM::R5,
- ARM::R6, ARM::R7, ARM::LR};
const unsigned *AllCopyRegsEnd = std::end(AllCopyRegs);
const unsigned *AllHighRegsEnd = std::end(AllHighRegs);
@@ -1029,10 +1018,16 @@ bool Thumb1FrameLowering::restoreCalleeSavedRegisters(
BuildMI(MF, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL));
bool NeedsPop = false;
- for (unsigned Reg : AllLoRegs) {
- if (!LoRegsToRestore[Reg])
+ for (unsigned i = CSI.size(); i != 0; --i) {
+ CalleeSavedInfo &Info = CSI[i-1];
+ unsigned Reg = Info.getReg();
+
+ // High registers (excluding lr) have already been dealt with
+ if (!(ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR))
continue;
+
if (Reg == ARM::LR) {
+ Info.setRestored(false);
if (!MBB.succ_empty() ||
MI->getOpcode() == ARM::TCRETURNdi ||
MI->getOpcode() == ARM::TCRETURNri)
diff --git a/llvm/test/CodeGen/ARM/ipra-extra-spills-exceptions.ll b/llvm/test/CodeGen/ARM/ipra-extra-spills-exceptions.ll
deleted file mode 100644
index ead0278cb362..000000000000
--- a/llvm/test/CodeGen/ARM/ipra-extra-spills-exceptions.ll
+++ /dev/null
@@ -1,149 +0,0 @@
-; RUN: llc -mtriple armv7a--none-eabi -enable-ipra=true -arm-extra-spills -arm-extra-spills-force -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple thumbv7a--none-eabi -enable-ipra=true -arm-extra-spills -arm-extra-spills-force -verify-machineinstrs < %s | FileCheck %s
-
-; Test the interaction between IPRA and C++ exception handling. Currently, IPRA
-; only marks registers as preserved on the non-exceptional return path, not in
-; the landing pad.
-
-declare dso_local i8* @__cxa_allocate_exception(i32) local_unnamed_addr
-declare dso_local void @__cxa_throw(i8*, i8*, i8*) local_unnamed_addr
-declare dso_local i32 @__gxx_personality_v0(...)
-declare i32 @llvm.eh.typeid.for(i8*) nounwind readnone
-declare dso_local i8* @__cxa_begin_catch(i8*) local_unnamed_addr
-declare dso_local void @__cxa_end_catch() local_unnamed_addr
-
- at g = dso_local local_unnamed_addr global i32 0, align 4
- at _ZTIi = external dso_local constant i8*
-
-define dso_local i32 @_Z11maybe_throwv() minsize {
-; This function might return normally, or might throw an exception. r0 is used
-; for a return value, we can preserve r1-r3 for IPRA.
-; CHECK: .save {r1, r2, r3, lr}
-; CHECK-NEXT: push {r1, r2, r3, lr}
-; CHECK: pop{{(..)?}} {r1, r2, r3, pc}
-entry:
- %0 = load i32, i32* @g, align 4
- %tobool = icmp eq i32 %0, 0
- br i1 %tobool, label %if.else, label %if.then
-
-if.then: ; preds = %entry
- %exception = tail call i8* @__cxa_allocate_exception(i32 4)
- %1 = bitcast i8* %exception to i32*
- store i32 42, i32* %1, align 8
- tail call void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null)
- unreachable
-
-if.else: ; preds = %entry
- ret i32 1337
-}
-
-; Use inline assembly to force r0-r3 to be alive across a potentially throwing
-; call, using them on the non-exceptional return path. r0 is the return value,
-; so must be copied to another register. r1-r3 are voluntarily preserved by the
-; callee, so can be left in those registers.
-define dso_local i32 @_Z25test_non_exceptional_pathv() minsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
-; CHECK: @APP
-; CHECK-NEXT: @ def r0-r3
-; CHECK-NEXT: @NO_APP
-; CHECK-NEXT: mov [[SAVE_R0:r[0-9]+]], r0
-; CHECK-NEXT: .Ltmp{{.*}}
-; CHECK-NEXT: bl _Z11maybe_throwv
-; CHECK: mov r0, [[SAVE_R0]]
-; CHECK-NEXT: @APP
-; CHECK-NEXT: @ use r0-r3
-; CHECK-NEXT: @NO_APP
-entry:
- %0 = tail call { i32, i32, i32, i32 } asm sideeffect "// def r0-r3", "={r0},={r1},={r2},={r3}"()
- %call = invoke i32 @_Z11maybe_throwv()
- to label %try.cont unwind label %lpad
-
-lpad: ; preds = %entry
- %1 = landingpad { i8*, i32 }
- cleanup
- catch i8* bitcast (i8** @_ZTIi to i8*)
- %2 = extractvalue { i8*, i32 } %1, 1
- %3 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*))
- %matches = icmp eq i32 %2, %3
- br i1 %matches, label %catch, label %ehcleanup
-
-catch: ; preds = %lpad
- %4 = extractvalue { i8*, i32 } %1, 0
- %5 = tail call i8* @__cxa_begin_catch(i8* %4)
- %6 = bitcast i8* %5 to i32*
- %7 = load i32, i32* %6, align 4
- tail call void @__cxa_end_catch()
- br label %cleanup
-
-try.cont: ; preds = %entry
- %asmresult3 = extractvalue { i32, i32, i32, i32 } %0, 3
- %asmresult2 = extractvalue { i32, i32, i32, i32 } %0, 2
- %asmresult1 = extractvalue { i32, i32, i32, i32 } %0, 1
- %asmresult = extractvalue { i32, i32, i32, i32 } %0, 0
- tail call void asm sideeffect "// use r0-r3", "{r0},{r1},{r2},{r3}"(i32 %asmresult, i32 %asmresult1, i32 %asmresult2, i32 %asmresult3)
- br label %cleanup
-
-cleanup: ; preds = %try.cont, %catch
- %retval.0 = phi i32 [ 0, %try.cont ], [ %7, %catch ]
- ret i32 %retval.0
-
-ehcleanup: ; preds = %lpad
- resume { i8*, i32 } %1
-}
-
-
-; Use inline assembly to force r0-r3 to be alive across a potentially throwing
-; call, using them after catching the exception. IPRA does not currently mark
-; voluntarily preserved registers as live into the landing pad block, so all
-; four registers must be copied elsewhere.
-define dso_local i32 @_Z21test_exceptional_pathv() local_unnamed_addr minsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
-; CHECK: @APP
-; CHECK-NEXT: @ def r0-r3
-; CHECK-NEXT: @NO_APP
-; CHECK-DAG: mov [[SAVE_R0:r[0-9]+]], r0
-; CHECK-DAG: mov [[SAVE_R1:r[0-9]+]], r1
-; CHECK-DAG: mov [[SAVE_R2:r[0-9]+]], r2
-; CHECK-DAG: mov [[SAVE_R3:r[0-9]+]], r3
-; CHECK: bl _Z11maybe_throw
-
-; CHECK: bl __cxa_begin_catch
-; CHECK: mov r0, [[SAVE_R0]]
-; CHECK-NEXT: mov r1, [[SAVE_R1]]
-; CHECK-NEXT: mov r2, [[SAVE_R2]]
-; CHECK-NEXT: mov r3, [[SAVE_R3]]
-; CHECK-NEXT: @APP
-; CHECK-NEXT: @ use r0-r3
-; CHECK-NEXT: @NO_APP
-entry:
- %0 = tail call { i32, i32, i32, i32 } asm sideeffect "// def r0-r3", "={r0},={r1},={r2},={r3}"()
- %asmresult = extractvalue { i32, i32, i32, i32 } %0, 0
- %asmresult1 = extractvalue { i32, i32, i32, i32 } %0, 1
- %asmresult2 = extractvalue { i32, i32, i32, i32 } %0, 2
- %asmresult3 = extractvalue { i32, i32, i32, i32 } %0, 3
- %call = invoke i32 @_Z11maybe_throwv()
- to label %cleanup unwind label %lpad
-
-lpad: ; preds = %entry
- %1 = landingpad { i8*, i32 }
- cleanup
- catch i8* bitcast (i8** @_ZTIi to i8*)
- %2 = extractvalue { i8*, i32 } %1, 1
- %3 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*))
- %matches = icmp eq i32 %2, %3
- br i1 %matches, label %catch, label %ehcleanup
-
-catch: ; preds = %lpad
- %4 = extractvalue { i8*, i32 } %1, 0
- %5 = tail call i8* @__cxa_begin_catch(i8* %4)
- %6 = bitcast i8* %5 to i32*
- %7 = load i32, i32* %6, align 4
- tail call void asm sideeffect "// use r0-r3", "{r0},{r1},{r2},{r3}"(i32 %asmresult, i32 %asmresult1, i32 %asmresult2, i32 %asmresult3)
- tail call void @__cxa_end_catch()
- br label %cleanup
-
-cleanup: ; preds = %entry, %catch
- %retval.0 = phi i32 [ %7, %catch ], [ 0, %entry ]
- ret i32 %retval.0
-
-ehcleanup: ; preds = %lpad
- resume { i8*, i32 } %1
-}
diff --git a/llvm/test/CodeGen/ARM/ipra-extra-spills.ll b/llvm/test/CodeGen/ARM/ipra-extra-spills.ll
deleted file mode 100644
index ad9f30abdbbd..000000000000
--- a/llvm/test/CodeGen/ARM/ipra-extra-spills.ll
+++ /dev/null
@@ -1,406 +0,0 @@
-; RUN: llc -mtriple armv7a--none-eabi -enable-ipra=true -arm-extra-spills -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK --check-prefix=ARM
-; RUN: llc -mtriple thumbv7a--none-eabi -enable-ipra=true -arm-extra-spills -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB2
-; RUN: llc -mtriple thumbv6m--none-eabi -enable-ipra=true -arm-extra-spills -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB1
-
-; This clobbers r0, and already needs a push/pop, so we also save and restore
-; r0. The push of r11 is to maintain stack alignment (though that isn't
-; technically needed in this example).
-define void @test_r0_r4() minsize nounwind {
-; CHECK-LABEL: test_r0_r4:
-; ARM: .save {r0, r4, r11, lr}
-; ARM: push {r0, r4, r11, lr}
-; ARM: pop {r0, r4, r11, pc}
-; THUMB1: .save {r0, r4, r7, lr}
-; THUMB1: push {r0, r4, r7, lr}
-; THUMB1: pop {r0, r4, r7, pc}
-; THUMB2: .save {r0, r4, r7, lr}
-; THUMB2: push {r0, r4, r7, lr}
-; THUMB2: pop {r0, r4, r7, pc}
- call void asm sideeffect "", "~{r0},~{r4}"()
- ret void
-}
-
-; This clobbers r0-r3, and already needs a push/pop, so we also save and
-; restore all of them.
-define void @test_r0_r1_r2_r3_r4() minsize nounwind {
-; CHECK-LABEL: test_r0_r1_r2_r3_r4:
-; CHECK: .save {r0, r1, r2, r3, r4, lr}
-; CHECK: push {r0, r1, r2, r3, r4, lr}
-; CHECK: pop {r0, r1, r2, r3, r4, pc}
- call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3},~{r4}"()
- ret void
-}
-
-; Check that IPRA does make use of the extra saved registers.
-define void @test_ipra() nounwind {
-; CHECK-LABEL: test_ipra:
-; CHECK: ASM1: r0, r1, r2, r3
-; CHECK-NOT: r0
-; CHECK-NOT: r1
-; CHECK-NOT: r2
-; CHECK-NOT: r3
-; CHECK: bl test_r0_r1_r2_r3_r4
-; CHECK-NOT: r0
-; CHECK-NOT: r1
-; CHECK-NOT: r2
-; CHECK-NOT: r3
-; CHECK: ASM2: r0, r1, r2, r3
- %regs = call { i32, i32, i32, i32 } asm sideeffect "// ASM1: $0, $1, $2, $3", "={r0},={r1},={r2},={r3}"()
- %r0 = extractvalue { i32, i32, i32, i32 } %regs, 0
- %r1 = extractvalue { i32, i32, i32, i32 } %regs, 1
- %r2 = extractvalue { i32, i32, i32, i32 } %regs, 2
- %r3 = extractvalue { i32, i32, i32, i32 } %regs, 3
- call void @test_r0_r1_r2_r3_r4()
- call void asm sideeffect "// ASM2: $0, $1, $2, $3", "{r0},{r1},{r2},{r3}"(i32 %r0, i32 %r1, i32 %r2, i32 %r3)
- ret void
-}
-
-; This clobbers r0-r3, but doesn't otherwise need a push/pop, so we don't add
-; them.
-define void @test_r0_r1_r2_r3() minsize nounwind {
-; CHECK-LABEL: test_r0_r1_r2_r3:
-; CHECK-NOT: push
-; CHECK-NOT: pop
- call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3}"()
- ret void
-}
-
-; This isn't called in this function, so we don't push any extra registers.
-define void @test_r0_r4_not_called() minsize nounwind {
-; CHECK-LABEL: test_r0_r4_not_called:
-; CHECK: .save {r4, lr}
-; CHECK: push {r4, lr}
-; CHECK: pop {r4, pc}
-; CHECK-NOT: push
-; CHECK-NOT: pop
- call void asm sideeffect "", "~{r0},~{r4}"()
- ret void
-}
-
-; This function is only optsize, not minsize, so we don't add any extra saves.
-define void @test_r0_r4_not_minsize() optsize nounwind {
-; CHECK-LABEL: test_r0_r4_not_minsize:
-; CHECK: .save {r4, lr}
-; CHECK: push {r4, lr}
-; CHECK: pop {r4, pc}
-; CHECK-NOT: push
-; CHECK-NOT: pop
- call void asm sideeffect "", "~{r0},~{r4}"()
- ret void
-}
-
-; This function is not an exact definition (the linker could pick an
-; alternative version of it), so we don't add any extra saves.
-define linkonce_odr void @test_r0_r4_not_exact() minsize nounwind {
-; CHECK-LABEL: test_r0_r4_not_exact:
-; CHECK: .save {r4, lr}
-; CHECK: push {r4, lr}
-; CHECK: pop {r4, pc}
-; CHECK-NOT: push
-; CHECK-NOT: pop
- call void asm sideeffect "", "~{r0},~{r4}"()
- ret void
-}
-
-; This clobbers r0-r3, but returns a value in r0, so only r1-r3 are saved.
-define i32 @test_r0_r1_r2_r3_r4_return_1() minsize nounwind {
-; CHECK-LABEL: test_r0_r1_r2_r3_r4_return_1:
-; ARM: .save {r1, r2, r3, r4, r11, lr}
-; ARM: push {r1, r2, r3, r4, r11, lr}
-; ARM: pop {r1, r2, r3, r4, r11, pc}
-; THUMB1: .save {r1, r2, r3, r4, r7, lr}
-; THUMB1: push {r1, r2, r3, r4, r7, lr}
-; THUMB1: pop {r1, r2, r3, r4, r7, pc}
-; THUMB2: .save {r1, r2, r3, r4, r7, lr}
-; THUMB2: push {r1, r2, r3, r4, r7, lr}
-; THUMB2: pop {r1, r2, r3, r4, r7, pc}
- call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3},~{r4}"()
- ret i32 42
-}
-
-; This clobbers r0-r3, but returns a value in r0 and r1, so only r2-r3 are
-; saved.
-define i64 @test_r0_r1_r2_r3_r4_return_2() minsize nounwind {
-; CHECK-LABEL: test_r0_r1_r2_r3_r4_return_2:
-; CHECK: .save {r2, r3, r4, lr}
-; CHECK: push {r2, r3, r4, lr}
-; CHECK: pop {r2, r3, r4, pc}
- call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3},~{r4}"()
- ret i64 42
-}
-
-; This clobbers r0-r3, but returns a value in all of r0-r3, so none of them can
-; be saved.
-define i128 @test_r0_r1_r2_r3_r4_return_4() minsize nounwind {
-; CHECK-LABEL: test_r0_r1_r2_r3_r4_return_4:
-; CHECK: .save {r4, lr}
-; CHECK: push {r4, lr}
-; CHECK: pop {r4, pc}
- call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3},~{r4}"()
- ret i128 42
-}
-
-; This clobbers r0-r3, and returns a value in s0, so all of r0-r3 are saved (we
-; previously only checked the number of return registers, ignoring their
-; class).
-define arm_aapcs_vfpcc float @test_r0_r1_r2_r3_r4_return_float() minsize nounwind {
-; CHECK-LABEL: test_r0_r1_r2_r3_r4_return_float:
-; ARM: .save {r0, r1, r2, r3, r4, lr}
-; ARM: push {r0, r1, r2, r3, r4, lr}
-; ARM: pop {r0, r1, r2, r3, r4, pc}
-; THUMB1: .save {r1, r2, r3, r4, r7, lr}
-; THUMB1: push {r1, r2, r3, r4, r7, lr}
-; THUMB1: pop {r1, r2, r3, r4, r7, pc}
-; THUMB2: .save {r0, r1, r2, r3, r4, lr}
-; THUMB2: push {r0, r1, r2, r3, r4, lr}
-; THUMB2: pop {r0, r1, r2, r3, r4, pc}
- call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3},~{r4}"()
- ret float 42.0
-}
-
-; Saving of high registers in thumb1 is more complicated, because they need to
-; be copied down to low registers to use push/pop instructions. Luckily, the
-; extra registers we are preserving are low registers, which are handled by the
-; outer-most push/pop pair, so this doesn't interact badly.
-define void @test_save_high_regs() minsize nounwind {
-; CHECK-LABEL: test_save_high_regs:
-; ARM: .save {r0, r1, r2, r3, r7, r8, r9, r10, r11, lr}
-; ARM: push {r0, r1, r2, r3, r7, r8, r9, r10, r11, lr}
-; ARM: pop {r0, r1, r2, r3, r7, r8, r9, r10, r11, pc}
-; THUMB1: .save {r0, r1, r2, r3, r7, lr}
-; THUMB1-NEXT: push {r0, r1, r2, r3, r7, lr}
-; THUMB1-NEXT: mov lr, r11
-; THUMB1-NEXT: mov r7, r10
-; THUMB1-NEXT: mov r3, r9
-; THUMB1-NEXT: mov r2, r8
-; THUMB1-NEXT: .save {r8, r9, r10, r11}
-; THUMB1-NEXT: push {r2, r3, r7, lr}
-; THUMB1: pop {r0, r1, r2, r3}
-; THUMB1-NEXT: mov r8, r0
-; THUMB1-NEXT: mov r9, r1
-; THUMB1-NEXT: mov r10, r2
-; THUMB1-NEXT: mov r11, r3
-; THUMB1-NEXT: pop {r0, r1, r2, r3, r7, pc}
-; THUMB2: .save {r0, r1, r2, r3, r7, r8, r9, r10, r11, lr}
-; THUMB2: push.w {r0, r1, r2, r3, r7, r8, r9, r10, r11, lr}
-; THUMB2: pop.w {r0, r1, r2, r3, r7, r8, r9, r10, r11, pc}
- call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3},~{r8},~{r9},~{r10},~{r11}"()
- ret void
-}
-
-; We can also use extra registers in the PUSH/POP instructions to move the SP
-; to make space for local variables. These registers aren't preserved, because
-; the space they are saved in is used for the local variable. We try to back
-; off the extra-CSRs optimisation to allow this to still happen. In this case,
-; there are 8 bytes of stack space needed, so we preserve two argument
-; registers and use the other two for the SP update.
-define void @test_r0_r1_r2_r3_r4_stack8() minsize nounwind {
-; CHECK-LABEL: test_r0_r1_r2_r3_r4_stack8:
-; CHECK: .save {r2, r3, r4, lr}
-; CHECK: push {r0, r1, r2, r3, r4, lr}
-; CHECK: pop {r0, r1, r2, r3, r4, pc}
- %a = alloca [2 x i32], align 4
- call void asm sideeffect "str $1, [$0]; str $1, [$0, #4]", "{r0},{r1},~{r2},~{r3},~{r4}"([2 x i32]* %a, i32 42)
- ret void
-}
-
-; Check that, when the above function is called, r0 and r1 (used for the SP
-; updates) are considered clobbered, and r2 and r3 are preserved.
-define void @test_r0_r1_r2_r3_r4_stack8_caller() nounwind {
-; CHECK-LABEL: test_r0_r1_r2_r3_r4_stack8_caller:
-; CHECK: ASM1: r0, r1, r2, r3
-; CHECK-NEXT: @NO_APP
-; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: mov r5, r1
-; CHECK-NEXT: bl test_r0_r1_r2_r3_r4
-; CHECK-NEXT: mov r0, r4
-; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: @APP
-; CHECK-NEXT: ASM2: r0, r1, r2, r3
- %regs = call { i32, i32, i32, i32 } asm sideeffect "// ASM1: $0, $1, $2, $3", "={r0},={r1},={r2},={r3}"()
- %r0 = extractvalue { i32, i32, i32, i32 } %regs, 0
- %r1 = extractvalue { i32, i32, i32, i32 } %regs, 1
- %r2 = extractvalue { i32, i32, i32, i32 } %regs, 2
- %r3 = extractvalue { i32, i32, i32, i32 } %regs, 3
- call void @test_r0_r1_r2_r3_r4_stack8()
- call void asm sideeffect "// ASM2: $0, $1, $2, $3", "{r0},{r1},{r2},{r3}"(i32 %r0, i32 %r1, i32 %r2, i32 %r3)
- ret void
-}
-
-; Like @test_r0_r1_r2_r3_r4_stack8, but 16 bytes of stack space are needed, so
-; all of r0-r3 are used for the SP update, and not preserved.
-define void @test_r0_r1_r2_r3_r4_stack16() minsize nounwind {
-; CHECK-LABEL: test_r0_r1_r2_r3_r4_stack16:
-; CHECK: .save {r4, lr}
-; CHECK: push {r0, r1, r2, r3, r4, lr}
-; CHECK: pop {r0, r1, r2, r3, r4, pc}
- %a = alloca [4 x i32], align 4
- call void asm sideeffect "str $1, [$0]; str $1, [$0, #4]", "{r0},{r1},~{r2},~{r3},~{r4}"([4 x i32]* %a, i32 42)
- ret void
-}
-
-; If more than 16 bytes of stack space are needed, it's unlikely that the
-; SP-update folding optimisation will succeed, so we revert back to preserving
-; r0-r3 for use in our callers.
-define void @test_r0_r1_r2_r3_r4_stack24() minsize nounwind {
-; CHECK-LABEL: test_r0_r1_r2_r3_r4_stack24:
-; CHECK: .save {r0, r1, r2, r3, r4, lr}
-; CHECK: push {r0, r1, r2, r3, r4, lr}
-; CHECK: pop {r0, r1, r2, r3, r4, pc}
- %a = alloca [6 x i32], align 4
- call void asm sideeffect "str $1, [$0]; str $1, [$0, #4]", "{r0},{r1},~{r2},~{r3},~{r4}"([6 x i32]* %a, i32 42)
- ret void
-}
-
-define i32 @tail_callee(i32 %a, i32 %b) minsize nounwind {
-entry:
- tail call void asm sideeffect "", "~{r2}"()
- ret i32 %a
-}
-
-; The tail call happens outside the save/restore region, so prevents us from
-; preserving some registers. r0 and r1 are outgoing arguments to the tail-call,
-; so can't be preserved. r2 is modified inside the tail-called function, so
-; can't be presrved. r3 is known to be preserved by the callee, so can be
-; presrved. For Thumb1, we can't (efficiently) use a tail-call here, so r1-r3
-; are all preserved, with r0 being the return value.
-define i32 @test_tail_call() minsize nounwind {
-entry:
-; CHECK-LABEL: test_tail_call:
-; ARM: .save {r3, lr}
-; ARM: push {r3, lr}
-; ARM: pop {r3, lr}
-; ARM: b tail_callee
-; THUMB2: .save {r3, lr}
-; THUMB2: push {r3, lr}
-; THUMB2: pop.w {r3, lr}
-; THUMB2: b tail_callee
-; THUMB1: .save {r1, r2, r3, lr}
-; THUMB1: push {r1, r2, r3, lr}
-; THUMB1: bl tail_callee
-; THUMB1: pop {r1, r2, r3, pc}
- tail call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3},~{lr}"()
- %call = tail call i32 @tail_callee(i32 3, i32 4)
- ret i32 %call
-}
-
-declare i32 @tail_callee_external(i32 %a, i32 %b)
-
-; If we tail-call an external function, it could clobber any of r0-r3.
-define i32 @test_tail_call_external() minsize nounwind {
-entry:
-; CHECK-LABEL: test_tail_call_external:
-; ARM: .save {r11, lr}
-; ARM: push {r11, lr}
-; ARM: pop {r11, lr}
-; ARM: b tail_callee_external
-; THUMB2: .save {r7, lr}
-; THUMB2: push {r7, lr}
-; THUMB2: pop.w {r7, lr}
-; THUMB2: b tail_callee_external
-; THUMB1: .save {r1, r2, r3, lr}
-; THUMB1: push {r1, r2, r3, lr}
-; THUMB1: bl tail_callee_external
-; THUMB1: pop {r1, r2, r3, pc}
- tail call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3},~{lr}"()
- %call = tail call i32 @tail_callee_external(i32 3, i32 4)
- ret i32 %call
-}
-
-define linkonce_odr i32 @tail_callee_linkonce_odr(i32 %a, i32 %b) minsize nounwind {
-entry:
- tail call void asm sideeffect "", "~{r2}"()
- ret i32 %a
-}
-
-; If a tail-callee has an interposable linkage type (such as linkonce_odr), we
-; can't assume the linker will pick the definition we can see, so must assume
-; it clobbers all of r0-r3.
-define i32 @test_tail_call_linkonce_odr() minsize nounwind {
-entry:
-; CHECK-LABEL: test_tail_call_linkonce_odr:
-; ARM: .save {r11, lr}
-; ARM: push {r11, lr}
-; ARM: pop {r11, lr}
-; ARM: b tail_callee_linkonce_odr
-; THUMB2: .save {r7, lr}
-; THUMB2: push {r7, lr}
-; THUMB2: pop.w {r7, lr}
-; THUMB2: b tail_callee_linkonce_odr
-; THUMB1: .save {r1, r2, r3, lr}
-; THUMB1: push {r1, r2, r3, lr}
-; THUMB1: bl tail_callee_linkonce_odr
-; THUMB1: pop {r1, r2, r3, pc}
- tail call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3},~{lr}"()
- %call = tail call i32 @tail_callee_linkonce_odr(i32 3, i32 4)
- ret i32 %call
-}
-
-; This function doesn't have the nounwind attribute, so unwind tables will be
-; emitted. Saving r0-r3 requires a longer unwind instruction sequence, which
-; results in an increase in total code size if there are few callers to make
-; use of the extra registers.
-define void @test_unwind_tables() minsize {
-; CHECK-LABEL: test_unwind_tables:
-; ARM: .save {r4, lr}
-; ARM: push {r4, lr}
-; ARM: pop {r4, pc}
-; THUMB1: .save {r4, lr}
-; THUMB1: push {r4, lr}
-; THUMB1: pop {r4, pc}
-; THUMB2: .save {r4, lr}
-; THUMB2: push {r4, lr}
-; THUMB2: pop {r4, pc}
- call void asm sideeffect "", "~{r0},~{r4}"()
- ret void
-}
-
-; This requires an unwind table, but has many call sites, so overall we expect
-; the benefits to outweigh the size increase of the unwind table.
-define void @test_unwind_tables_many_calls() minsize {
-; CHECK-LABEL: test_unwind_tables_many_calls:
-; ARM: .save {r0, r4, r11, lr}
-; ARM: push {r0, r4, r11, lr}
-; ARM: pop {r0, r4, r11, pc}
-; THUMB1: .save {r0, r4, r7, lr}
-; THUMB1: push {r0, r4, r7, lr}
-; THUMB1: pop {r0, r4, r7, pc}
-; THUMB2: .save {r0, r4, r7, lr}
-; THUMB2: push {r0, r4, r7, lr}
-; THUMB2: pop {r0, r4, r7, pc}
- call void asm sideeffect "", "~{r0},~{r4}"()
- ret void
-}
-
-; We don't do this optimisation is there are no callers in the same translation
-; unit (otherwise IPRA wouldn't be able to take advantage of the extra saved
-; registers), so most functions in this file are called here.
-define void @caller() {
-; CHECK-LABEL: caller:
- call void @test_r0_r4()
- call void @test_r0_r1_r2_r3_r4()
- call void @test_r0_r1_r2_r3()
- call void @test_r0_r4_not_minsize()
- call void @test_r0_r4_not_exact()
- %t1 = call i32 @test_r0_r1_r2_r3_r4_return_1()
- %t2 = call i64 @test_r0_r1_r2_r3_r4_return_2()
- %t3 = call i128 @test_r0_r1_r2_r3_r4_return_4()
- %t4 = call float @test_r0_r1_r2_r3_r4_return_float()
- call void @test_save_high_regs()
- call void @test_r0_r1_r2_r3_r4_stack16()
- call void @test_r0_r1_r2_r3_r4_stack24()
- %t5 = call i32 @test_tail_call()
- %t6 = call i32 @test_tail_call_external()
- %t7 = call i32 @test_tail_call_linkonce_odr()
- call void @test_unwind_tables()
- call void @test_unwind_tables_many_calls()
- call void @test_unwind_tables_many_calls()
- call void @test_unwind_tables_many_calls()
- call void @test_unwind_tables_many_calls()
- call void @test_unwind_tables_many_calls()
- call void @test_unwind_tables_many_calls()
- call void @test_unwind_tables_many_calls()
- call void @test_unwind_tables_many_calls()
- call void @test_unwind_tables_many_calls()
- ret void
-}
diff --git a/llvm/test/CodeGen/Thumb2/ifcvt-minsize.ll b/llvm/test/CodeGen/Thumb2/ifcvt-minsize.ll
index 63726ce18df1..146a2223c357 100644
--- a/llvm/test/CodeGen/Thumb2/ifcvt-minsize.ll
+++ b/llvm/test/CodeGen/Thumb2/ifcvt-minsize.ll
@@ -66,13 +66,16 @@ return: ; preds = %entry, %if.end
define void @f3(i32 %x) #0 {
; CHECK-LABEL: f3:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: cmp r0, #1
+; CHECK-NEXT: bne .LBB2_2
+; CHECK-NEXT: @ %bb.1: @ %t
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
-; CHECK-NEXT: cmp r0, #1
-; CHECK-NEXT: itt eq
-; CHECK-NEXT: moveq r0, #0
-; CHECK-NEXT: bleq fn
-; CHECK-NEXT: pop {r7, pc}
+; CHECK-NEXT: movs r0, #0
+; CHECK-NEXT: bl fn
+; CHECK-NEXT: pop.w {r7, lr}
+; CHECK-NEXT: .LBB2_2: @ %f
+; CHECK-NEXT: bx lr
entry:
%p = icmp eq i32 %x, 1
br i1 %p, label %t, label %f
More information about the llvm-commits
mailing list