[llvm] bb5befe - Revert "[CodeGen][ShrinkWrap] Split restore point"

via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 12 22:23:01 PDT 2023


Author: sgokhale
Date: 2023-04-13T10:52:28+05:30
New Revision: bb5befefc6e76330120d8fe3e6f4f27f9e0989f0

URL: https://github.com/llvm/llvm-project/commit/bb5befefc6e76330120d8fe3e6f4f27f9e0989f0
DIFF: https://github.com/llvm/llvm-project/commit/bb5befefc6e76330120d8fe3e6f4f27f9e0989f0.diff

LOG: Revert "[CodeGen][ShrinkWrap] Split restore point"

This reverts commit 5f0bccc3d1a74111458c71f009817c9995f4bf83.

An issue has been reported here: https://github.com/ClangBuiltLinux/linux/issues/1833

Added: 
    

Modified: 
    llvm/lib/CodeGen/ShrinkWrap.cpp
    llvm/test/CodeGen/AArch64/dont-shrink-wrap-stack-mayloadorstore.mir
    llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll
    llvm/test/CodeGen/ARM/ssat-unroll-loops.ll
    llvm/test/CodeGen/LoongArch/jump-table.ll
    llvm/test/CodeGen/PowerPC/common-chain-aix32.ll
    llvm/test/CodeGen/PowerPC/common-chain.ll
    llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll
    llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll
    llvm/test/CodeGen/PowerPC/shrink-wrap.ll
    llvm/test/CodeGen/PowerPC/shrink-wrap.mir
    llvm/test/CodeGen/RISCV/aext-to-sext.ll
    llvm/test/CodeGen/RISCV/fli-licm.ll
    llvm/test/CodeGen/RISCV/jumptable.ll
    llvm/test/CodeGen/Thumb/thumb-shrink-wrapping.ll
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/inlineasm.ll
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/sibling-loops.ll
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll
    llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
    llvm/test/CodeGen/Thumb2/mve-gather-increment.ll
    llvm/test/CodeGen/Thumb2/mve-gather-tailpred.ll
    llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll
    llvm/test/CodeGen/Thumb2/mve-scatter-increment.ll
    llvm/test/CodeGen/Thumb2/mve-tailpred-nonzerostart.ll
    llvm/test/CodeGen/Thumb2/mve-vmull-loop.ll
    llvm/test/CodeGen/X86/negative-stride-fptosi-user.ll
    llvm/test/CodeGen/X86/pr44412.ll
    llvm/test/CodeGen/X86/x86-shrink-wrapping.ll
    llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll

Removed: 
    llvm/test/CodeGen/AArch64/shrinkwrap-split-restore-point.mir


################################################################################
diff  --git a/llvm/lib/CodeGen/ShrinkWrap.cpp b/llvm/lib/CodeGen/ShrinkWrap.cpp
index d732f45248cec..2411b1ad52031 100644
--- a/llvm/lib/CodeGen/ShrinkWrap.cpp
+++ b/llvm/lib/CodeGen/ShrinkWrap.cpp
@@ -97,9 +97,6 @@ STATISTIC(NumCandidatesDropped,
 static cl::opt<cl::boolOrDefault>
 EnableShrinkWrapOpt("enable-shrink-wrap", cl::Hidden,
                     cl::desc("enable the shrink-wrapping pass"));
-static cl::opt<bool> EnablePostShrinkWrapOpt(
-    "enable-shrink-wrap-region-split", cl::init(true), cl::Hidden,
-    cl::desc("enable splitting of the restore block if possible"));
 
 namespace {
 
@@ -187,30 +184,6 @@ class ShrinkWrap : public MachineFunctionPass {
   /// this call.
   void updateSaveRestorePoints(MachineBasicBlock &MBB, RegScavenger *RS);
 
-  // Try to find safe point based on dominance and block frequency without
-  // any change in IR.
-  bool performShrinkWrapping(MachineFunction &MF, RegScavenger *RS);
-
-  /// This function tries to split the restore point if doing so can shrink the
-  /// save point further. \return True if restore point is split.
-  bool postShrinkWrapping(bool HasCandidate, MachineFunction &MF,
-                          RegScavenger *RS);
-
-  /// This function analyzes if the restore point can split to create a new
-  /// restore point. This function collects
-  /// 1. Any preds of current restore that are reachable by callee save/FI
-  /// blocks
-  /// - indicated by DirtyPreds
-  /// 2. Any preds of current restore that are not DirtyPreds - indicated by
-  /// CleanPreds
-  /// Both sets should be non-empty for considering restore point split.
-  bool checkIfRestoreSplittable(
-      const MachineBasicBlock *CurRestore,
-      const DenseSet<const MachineBasicBlock *> &ReachableByDirty,
-      SmallVectorImpl<MachineBasicBlock *> &DirtyPreds,
-      SmallVectorImpl<MachineBasicBlock *> &CleanPreds,
-      const TargetInstrInfo *TII, RegScavenger *RS);
-
   /// Initialize the pass for \p MF.
   void init(MachineFunction &MF) {
     RCI.runOnMachineFunction(MF);
@@ -347,303 +320,18 @@ bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI,
 /// Helper function to find the immediate (post) dominator.
 template <typename ListOfBBs, typename DominanceAnalysis>
 static MachineBasicBlock *FindIDom(MachineBasicBlock &Block, ListOfBBs BBs,
-                                   DominanceAnalysis &Dom, bool Strict = true) {
+                                   DominanceAnalysis &Dom) {
   MachineBasicBlock *IDom = &Block;
   for (MachineBasicBlock *BB : BBs) {
     IDom = Dom.findNearestCommonDominator(IDom, BB);
     if (!IDom)
       break;
   }
-  if (Strict && IDom == &Block)
+  if (IDom == &Block)
     return nullptr;
   return IDom;
 }
 
-static bool isAnalyzableBB(const TargetInstrInfo &TII,
-                           MachineBasicBlock &Entry) {
-  // Check if the block is analyzable.
-  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
-  SmallVector<MachineOperand, 4> Cond;
-  return !TII.analyzeBranch(Entry, TBB, FBB, Cond);
-}
-
-/// Determines if any predecessor of MBB is on the path from block that has use
-/// or def of CSRs/FI to MBB.
-/// ReachableByDirty: All blocks reachable from block that has use or def of
-/// CSR/FI.
-static bool
-hasDirtyPred(const DenseSet<const MachineBasicBlock *> &ReachableByDirty,
-             const MachineBasicBlock &MBB) {
-  for (const MachineBasicBlock *PredBB : MBB.predecessors())
-    if (ReachableByDirty.count(PredBB))
-      return true;
-  return false;
-}
-
-/// Derives the list of all the basic blocks reachable from MBB.
-static void markAllReachable(DenseSet<const MachineBasicBlock *> &Visited,
-                             const MachineBasicBlock &MBB) {
-  SmallVector<MachineBasicBlock *, 4> Worklist(MBB.succ_begin(),
-                                               MBB.succ_end());
-  Visited.insert(&MBB);
-  while (!Worklist.empty()) {
-    MachineBasicBlock *SuccMBB = Worklist.pop_back_val();
-    if (!Visited.insert(SuccMBB).second)
-      continue;
-    Worklist.append(SuccMBB->succ_begin(), SuccMBB->succ_end());
-  }
-}
-
-/// Collect blocks reachable by use or def of CSRs/FI.
-static void collectBlocksReachableByDirty(
-    const DenseSet<const MachineBasicBlock *> &DirtyBBs,
-    DenseSet<const MachineBasicBlock *> &ReachableByDirty) {
-  for (const MachineBasicBlock *MBB : DirtyBBs) {
-    if (ReachableByDirty.count(MBB))
-      continue;
-    // Mark all offsprings as reachable.
-    markAllReachable(ReachableByDirty, *MBB);
-  }
-}
-
-/// \return true if there is a clean path from SavePoint to the original
-/// Restore.
-static bool
-isSaveReachableThroughClean(const MachineBasicBlock *SavePoint,
-                            ArrayRef<MachineBasicBlock *> CleanPreds) {
-  DenseSet<const MachineBasicBlock *> Visited;
-  SmallVector<MachineBasicBlock *, 4> Worklist(CleanPreds.begin(),
-                                               CleanPreds.end());
-  while (!Worklist.empty()) {
-    MachineBasicBlock *CleanBB = Worklist.pop_back_val();
-    if (CleanBB == SavePoint)
-      return true;
-    if (!Visited.insert(CleanBB).second || !CleanBB->pred_size())
-      continue;
-    Worklist.append(CleanBB->pred_begin(), CleanBB->pred_end());
-  }
-  return false;
-}
-
-/// This function updates the branches post restore point split.
-///
-/// Restore point has been split.
-/// Old restore point: MBB
-/// New restore point: NMBB
-/// Any basic block(say BBToUpdate) which had a fallthrough to MBB
-/// previously should
-/// 1. Fallthrough to NMBB iff NMBB is inserted immediately above MBB in the
-/// block layout OR
-/// 2. Branch unconditionally to NMBB iff NMBB is inserted at any other place.
-static void updateTerminator(MachineBasicBlock *BBToUpdate,
-                             MachineBasicBlock *NMBB,
-                             const TargetInstrInfo *TII) {
-  DebugLoc DL = BBToUpdate->findBranchDebugLoc();
-  // if NMBB isn't the new layout successor for BBToUpdate, insert unconditional
-  // branch to it
-  if (!BBToUpdate->isLayoutSuccessor(NMBB))
-    TII->insertUnconditionalBranch(*BBToUpdate, NMBB, DL);
-}
-
-/// This function splits the restore point and returns new restore point/BB.
-///
-/// DirtyPreds: Predessors of \p MBB that are ReachableByDirty
-///
-/// Decision has been made to split the restore point.
-/// old restore point: \p MBB
-/// new restore point: \p NMBB
-/// This function makes the necessary block layout changes so that
-/// 1. \p NMBB points to \p MBB unconditionally
-/// 2. All dirtyPreds that previously pointed to \p MBB point to \p NMBB
-static MachineBasicBlock *
-tryToSplitRestore(MachineBasicBlock *MBB,
-                  ArrayRef<MachineBasicBlock *> DirtyPreds,
-                  const TargetInstrInfo *TII) {
-  MachineFunction *MF = MBB->getParent();
-
-  // get the list of DirtyPreds who have a fallthrough to MBB
-  // before the block layout change. This is just to ensure that if the NMBB is
-  // inserted after MBB, then we create unconditional branch from
-  // DirtyPred/CleanPred to NMBB
-  SmallPtrSet<MachineBasicBlock *, 8> MBBFallthrough;
-  for (MachineBasicBlock *BB : DirtyPreds)
-    if (BB->getFallThrough(false) == MBB)
-      MBBFallthrough.insert(BB);
-
-  MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
-  MF->insert(MachineFunction::iterator(MBB), NMBB);
-
-  for (const MachineBasicBlock::RegisterMaskPair &LI : MBB->liveins())
-    NMBB->addLiveIn(LI.PhysReg);
-
-  TII->insertUnconditionalBranch(*NMBB, MBB, DebugLoc());
-
-  // After splitting, all predecessors of the restore point should be dirty
-  // blocks.
-  for (MachineBasicBlock *SuccBB : DirtyPreds)
-    SuccBB->ReplaceUsesOfBlockWith(MBB, NMBB);
-
-  NMBB->addSuccessor(MBB);
-
-  for (MachineBasicBlock *BBToUpdate : MBBFallthrough)
-    updateTerminator(BBToUpdate, NMBB, TII);
-
-  return NMBB;
-}
-
-/// This function undoes the restore point split done earlier.
-///
-/// DirtyPreds: All predecessors of \p NMBB that are ReachableByDirty.
-///
-/// Restore point was split and the change needs to be unrolled. Make necessary
-/// changes to reset restore point from \p NMBB to \p MBB.
-static void rollbackRestoreSplit(MachineFunction &MF, MachineBasicBlock *NMBB,
-                                 MachineBasicBlock *MBB,
-                                 ArrayRef<MachineBasicBlock *> DirtyPreds,
-                                 const TargetInstrInfo *TII) {
-  // For a BB, if NMBB is fallthrough in the current layout, then in the new
-  // layout a. BB should fallthrough to MBB OR b. BB should undconditionally
-  // branch to MBB
-  SmallPtrSet<MachineBasicBlock *, 8> NMBBFallthrough;
-  for (MachineBasicBlock *BB : DirtyPreds)
-    if (BB->getFallThrough(false) == NMBB)
-      NMBBFallthrough.insert(BB);
-
-  NMBB->removeSuccessor(MBB);
-  for (MachineBasicBlock *SuccBB : DirtyPreds)
-    SuccBB->ReplaceUsesOfBlockWith(NMBB, MBB);
-
-  NMBB->erase(NMBB->begin(), NMBB->end());
-  NMBB->eraseFromParent();
-
-  for (MachineBasicBlock *BBToUpdate : NMBBFallthrough)
-    updateTerminator(BBToUpdate, MBB, TII);
-}
-
-// A block is deemed fit for restore point split iff there exist
-// 1. DirtyPreds - preds of CurRestore reachable from use or def of CSR/FI
-// 2. CleanPreds - preds of CurRestore that arent DirtyPreds
-bool ShrinkWrap::checkIfRestoreSplittable(
-    const MachineBasicBlock *CurRestore,
-    const DenseSet<const MachineBasicBlock *> &ReachableByDirty,
-    SmallVectorImpl<MachineBasicBlock *> &DirtyPreds,
-    SmallVectorImpl<MachineBasicBlock *> &CleanPreds,
-    const TargetInstrInfo *TII, RegScavenger *RS) {
-  for (const MachineInstr &MI : *CurRestore)
-    if (useOrDefCSROrFI(MI, RS))
-      return false;
-
-  for (MachineBasicBlock *PredBB : CurRestore->predecessors()) {
-    if (!isAnalyzableBB(*TII, *PredBB))
-      return false;
-
-    if (ReachableByDirty.count(PredBB))
-      DirtyPreds.push_back(PredBB);
-    else
-      CleanPreds.push_back(PredBB);
-  }
-
-  return !(CleanPreds.empty() || DirtyPreds.empty());
-}
-
-bool ShrinkWrap::postShrinkWrapping(bool HasCandidate, MachineFunction &MF,
-                                    RegScavenger *RS) {
-  if (!EnablePostShrinkWrapOpt)
-    return false;
-
-  MachineBasicBlock *InitSave = nullptr;
-  MachineBasicBlock *InitRestore = nullptr;
-
-  if (HasCandidate) {
-    InitSave = Save;
-    InitRestore = Restore;
-  } else {
-    InitRestore = nullptr;
-    InitSave = &MF.front();
-    for (MachineBasicBlock &MBB : MF) {
-      if (MBB.isEHFuncletEntry())
-        return false;
-      if (MBB.isReturnBlock()) {
-        // Do not support multiple restore points.
-        if (InitRestore)
-          return false;
-        InitRestore = &MBB;
-      }
-    }
-  }
-
-  if (!InitSave || !InitRestore || InitRestore == InitSave ||
-      !MDT->dominates(InitSave, InitRestore) ||
-      !MPDT->dominates(InitRestore, InitSave))
-    return false;
-
-  DenseSet<const MachineBasicBlock *> DirtyBBs;
-  for (MachineBasicBlock &MBB : MF) {
-    if (MBB.isEHPad()) {
-      DirtyBBs.insert(&MBB);
-      continue;
-    }
-    for (const MachineInstr &MI : MBB)
-      if (useOrDefCSROrFI(MI, RS)) {
-        DirtyBBs.insert(&MBB);
-        break;
-      }
-  }
-
-  // Find blocks reachable from the use or def of CSRs/FI.
-  DenseSet<const MachineBasicBlock *> ReachableByDirty;
-  collectBlocksReachableByDirty(DirtyBBs, ReachableByDirty);
-
-  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
-  SmallVector<MachineBasicBlock *, 2> DirtyPreds;
-  SmallVector<MachineBasicBlock *, 2> CleanPreds;
-  if (!checkIfRestoreSplittable(InitRestore, ReachableByDirty, DirtyPreds,
-                                CleanPreds, TII, RS))
-    return false;
-
-  // Trying to reach out to the new save point which dominates all dirty blocks.
-  MachineBasicBlock *NewSave =
-      FindIDom<>(**DirtyPreds.begin(), DirtyPreds, *MDT, false);
-
-  while (NewSave && (hasDirtyPred(ReachableByDirty, *NewSave) ||
-                     EntryFreq < MBFI->getBlockFreq(NewSave).getFrequency()))
-    NewSave = FindIDom<>(**NewSave->pred_begin(), NewSave->predecessors(), *MDT,
-                         false);
-
-  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
-  if (!NewSave || NewSave == InitSave ||
-      isSaveReachableThroughClean(NewSave, CleanPreds) ||
-      !TFI->canUseAsPrologue(*NewSave))
-    return false;
-
-  // Now we know that splitting a restore point can isolate the restore point
-  // from clean blocks and doing so can shrink the save point.
-  MachineBasicBlock *NewRestore =
-      tryToSplitRestore(InitRestore, DirtyPreds, TII);
-
-  // Make sure if the new restore point is valid as an epilogue, depending on
-  // targets.
-  if (!TFI->canUseAsEpilogue(*NewRestore)) {
-    rollbackRestoreSplit(MF, NewRestore, InitRestore, DirtyPreds, TII);
-    return false;
-  }
-
-  Save = NewSave;
-  Restore = NewRestore;
-
-  MDT->runOnMachineFunction(MF);
-  MPDT->runOnMachineFunction(MF);
-
-  assert((MDT->dominates(Save, Restore) && MPDT->dominates(Restore, Save)) &&
-         "Incorrect save or restore point due to dominance relations");
-  assert((!MLI->getLoopFor(Save) && !MLI->getLoopFor(Restore)) &&
-         "Unexpected save or restore point in a loop");
-  assert((EntryFreq >= MBFI->getBlockFreq(Save).getFrequency() &&
-          EntryFreq >= MBFI->getBlockFreq(Restore).getFrequency()) &&
-         "Incorrect save or restore point based on block frequency");
-  return true;
-}
-
 void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB,
                                          RegScavenger *RS) {
   // Get rid of the easy cases first.
@@ -775,7 +463,31 @@ static bool giveUpWithRemarks(MachineOptimizationRemarkEmitter *ORE,
   return false;
 }
 
-bool ShrinkWrap::performShrinkWrapping(MachineFunction &MF, RegScavenger *RS) {
+bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
+  if (skipFunction(MF.getFunction()) || MF.empty() || !isShrinkWrapEnabled(MF))
+    return false;
+
+  LLVM_DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n');
+
+  init(MF);
+
+  ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
+  if (containsIrreducibleCFG<MachineBasicBlock *>(RPOT, *MLI)) {
+    // If MF is irreducible, a block may be in a loop without
+    // MachineLoopInfo reporting it. I.e., we may use the
+    // post-dominance property in loops, which lead to incorrect
+    // results. Moreover, we may miss that the prologue and
+    // epilogue are not in the same loop, leading to unbalanced
+    // construction/deconstruction of the stack frame.
+    return giveUpWithRemarks(ORE, "UnsupportedIrreducibleCFG",
+                             "Irreducible CFGs are not supported yet.",
+                             MF.getFunction().getSubprogram(), &MF.front());
+  }
+
+  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+  std::unique_ptr<RegScavenger> RS(
+      TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr);
+
   for (MachineBasicBlock &MBB : MF) {
     LLVM_DEBUG(dbgs() << "Look into: " << MBB.getNumber() << ' '
                       << MBB.getName() << '\n');
@@ -791,7 +503,7 @@ bool ShrinkWrap::performShrinkWrapping(MachineFunction &MF, RegScavenger *RS) {
       // are at least at the boundary of the save and restore points.  The
       // problem is that a basic block can jump out from the middle in these
       // cases, which we do not handle.
-      updateSaveRestorePoints(MBB, RS);
+      updateSaveRestorePoints(MBB, RS.get());
       if (!ArePointsInteresting()) {
         LLVM_DEBUG(dbgs() << "EHPad/inlineasm_br prevents shrink-wrapping\n");
         return false;
@@ -800,11 +512,11 @@ bool ShrinkWrap::performShrinkWrapping(MachineFunction &MF, RegScavenger *RS) {
     }
 
     for (const MachineInstr &MI : MBB) {
-      if (!useOrDefCSROrFI(MI, RS))
+      if (!useOrDefCSROrFI(MI, RS.get()))
         continue;
       // Save (resp. restore) point must dominate (resp. post dominate)
       // MI. Look for the proper basic block for those.
-      updateSaveRestorePoints(MBB, RS);
+      updateSaveRestorePoints(MBB, RS.get());
       // If we are at a point where we cannot improve the placement of
       // save/restore instructions, just give up.
       if (!ArePointsInteresting()) {
@@ -858,49 +570,13 @@ bool ShrinkWrap::performShrinkWrapping(MachineFunction &MF, RegScavenger *RS) {
         break;
       NewBB = Restore;
     }
-    updateSaveRestorePoints(*NewBB, RS);
+    updateSaveRestorePoints(*NewBB, RS.get());
   } while (Save && Restore);
 
   if (!ArePointsInteresting()) {
     ++NumCandidatesDropped;
     return false;
   }
-  return true;
-}
-
-bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
-  if (skipFunction(MF.getFunction()) || MF.empty() || !isShrinkWrapEnabled(MF))
-    return false;
-
-  LLVM_DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n');
-
-  init(MF);
-
-  ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
-  if (containsIrreducibleCFG<MachineBasicBlock *>(RPOT, *MLI)) {
-    // If MF is irreducible, a block may be in a loop without
-    // MachineLoopInfo reporting it. I.e., we may use the
-    // post-dominance property in loops, which lead to incorrect
-    // results. Moreover, we may miss that the prologue and
-    // epilogue are not in the same loop, leading to unbalanced
-    // construction/deconstruction of the stack frame.
-    return giveUpWithRemarks(ORE, "UnsupportedIrreducibleCFG",
-                             "Irreducible CFGs are not supported yet.",
-                             MF.getFunction().getSubprogram(), &MF.front());
-  }
-
-  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
-  std::unique_ptr<RegScavenger> RS(
-      TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr);
-
-  bool Changed = false;
-
-  bool HasCandidate = performShrinkWrapping(MF, RS.get());
-  Changed = postShrinkWrapping(HasCandidate, MF, RS.get());
-  if (!HasCandidate && !Changed)
-    return false;
-  if (!ArePointsInteresting())
-    return Changed;
 
   LLVM_DEBUG(dbgs() << "Final shrink wrap candidates:\nSave: "
                     << Save->getNumber() << ' ' << Save->getName()
@@ -911,7 +587,7 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
   MFI.setSavePoint(Save);
   MFI.setRestorePoint(Restore);
   ++NumCandidates;
-  return Changed;
+  return false;
 }
 
 bool ShrinkWrap::isShrinkWrapEnabled(const MachineFunction &MF) {

diff  --git a/llvm/test/CodeGen/AArch64/dont-shrink-wrap-stack-mayloadorstore.mir b/llvm/test/CodeGen/AArch64/dont-shrink-wrap-stack-mayloadorstore.mir
index 34fafb750083c..bc60b7b571197 100644
--- a/llvm/test/CodeGen/AArch64/dont-shrink-wrap-stack-mayloadorstore.mir
+++ b/llvm/test/CodeGen/AArch64/dont-shrink-wrap-stack-mayloadorstore.mir
@@ -6,8 +6,8 @@
  ; RUN: llc -x=mir -simplify-mir -run-pass=shrink-wrap -o - %s | FileCheck %s
  ; CHECK:      name:            compiler_pop_stack
  ; CHECK:      frameInfo:       
- ; CHECK:      savePoint:       '%bb.1'
- ; CHECK-NEXT: restorePoint:    '%bb.7'
+ ; CHECK-NOT:  savePoint:
+ ; CHECK-NOT:  restorePoint:
  ; CHECK:      stack:
  ; CHECK:      name:            f
  ; CHECK:      frameInfo:       

diff  --git a/llvm/test/CodeGen/AArch64/shrinkwrap-split-restore-point.mir b/llvm/test/CodeGen/AArch64/shrinkwrap-split-restore-point.mir
deleted file mode 100644
index fc44f8ed97941..0000000000000
--- a/llvm/test/CodeGen/AArch64/shrinkwrap-split-restore-point.mir
+++ /dev/null
@@ -1,686 +0,0 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
-# RUN: llc -mtriple=aarch64 -run-pass=shrink-wrap -o - %s | FileCheck %s
-
---- |
-  define void @shrink_test1(i32 %a) {
-  entry:
-    %cmp5 = icmp sgt i32 %a, 0
-    br i1 %cmp5, label %BB0, label %exit
-
-  BB0:                                              ; preds = %entry
-    %call = call i32 @fun()
-    %c = icmp eq i32 %call, 0
-    br i1 %c, label %BB1, label %exit
-
-  BB1:                                              ; preds = %BB0
-    %call2 = call i32 @fun()
-    br label %exit
-
-  exit:                                             ; preds = %BB1, %BB0, %entry
-    ret void
-  }
-
-  define void @shrink_test2(i32 %a, ptr %P1, ptr %P2) {
-  BB00:
-    %cmp5 = icmp sgt i32 %a, 0
-    br i1 %cmp5, label %BB01, label %exit
-
-  BB01:                                             ; preds = %BB00
-    store i32 %a, ptr %P1, align 4
-    %c1 = icmp sgt i32 %a, 1
-    br i1 %c1, label %BB02, label %BB03
-
-  BB02:                                             ; preds = %BB01
-    store i32 %a, ptr %P2, align 4
-    br label %BB03
-
-  BB03:                                             ; preds = %BB02, %BB01
-    %call03 = call i32 @fun()
-    %c03 = icmp eq i32 %call03, 0
-    br i1 %c03, label %BB04, label %BB05
-
-  BB04:                                             ; preds = %BB03
-    %call04 = call i32 @fun()
-    br label %BB05
-
-  BB05:                                             ; preds = %BB04, %BB03
-    %call05 = call i32 @fun()
-    %c05 = icmp eq i32 %call05, 0
-    br i1 %c05, label %BB06, label %BB07
-
-  BB06:                                             ; preds = %BB05
-    %call06 = call i32 @fun()
-    br label %exit
-
-  BB07:                                             ; preds = %BB05
-    %call07 = call i32 @fun2()
-    br label %exit
-
-  exit:                                             ; preds = %BB07, %BB06, %BB00
-    ret void
-  }
-
-  define void @noshrink_test1(i32 %a, i32 %v, i32 %v2) {
-  entry:
-    %cmp5 = icmp sgt i32 %a, 0
-    br i1 %cmp5, label %BB0, label %exit
-
-  BB0:                                              ; preds = %entry
-    %c = icmp eq i32 %a, 10
-    %c1 = icmp eq i32 %v, 10
-    %or.cond = select i1 %c, i1 %c1, i1 false
-    br i1 %or.cond, label %BB3, label %BB2
-
-  BB2:                                              ; preds = %BB0
-    %c2 = icmp eq i32 %v2, 10
-    br i1 %c2, label %BB4, label %exit
-
-  BB3:                                              ; preds = %BB0
-    %call3 = call i32 @fun()
-    br label %exit
-
-  BB4:                                              ; preds = %BB2
-    %call4 = call i32 @fun2()
-    br label %exit
-
-  exit:                                             ; preds = %BB4, %BB3, %BB2, %entry
-    ret void
-  }
-
-  define void @noshrink_test2(i32 %a) {
-  BB00:
-    %cmp5 = icmp sgt i32 %a, 0
-    br i1 %cmp5, label %BB01, label %InfLoop.preheader
-
-  InfLoop.preheader:                                ; preds = %BB00
-    br label %InfLoop
-
-  BB01:                                             ; preds = %BB00
-    %call = call i32 @fun()
-    %c = icmp eq i32 %call, 0
-    br i1 %c, label %BB02, label %exit
-
-  BB02:                                             ; preds = %BB01
-    %call2 = call i32 @fun()
-    br label %exit
-
-  InfLoop:                                          ; preds = %InfLoop.preheader, %InfLoop
-    %call3 = call i32 @fun()
-    br label %InfLoop
-
-  exit:                                             ; preds = %BB02, %BB01
-    ret void
-  }
-
-  define void @noshrink_test3(i32 %a) {
-  BB00:
-    %cmp5 = icmp sgt i32 %a, 0
-    %call02 = call i32 @fun()
-    br i1 %cmp5, label %BB02, label %BB01
-
-  BB01:                                             ; preds = %BB00
-    %0 = icmp eq i32 %call02, 0
-    br i1 %0, label %BB01.1, label %exit
-
-  BB01.1:                                           ; preds = %BB01
-    call void @abort() #0
-    unreachable
-
-  BB02:                                             ; preds = %BB00
-    %1 = icmp eq i32 %call02, 0
-    br i1 %1, label %BB03, label %BB04
-
-  BB03:                                             ; preds = %BB02
-    %call03 = call i32 @fun()
-    %c03 = icmp eq i32 %call03, 0
-    br i1 %c03, label %BB04, label %exit
-
-  BB04:                                             ; preds = %BB03, %BB02
-    %call04 = call i32 @fun()
-    br label %exit
-
-  exit:                                             ; preds = %BB04, %BB03, %BB01
-    ret void
-  }
-
-  declare i32 @fun()
-  declare i32 @fun2()
-  declare void @abort()
-
-...
----
-name:            shrink_test1
-alignment:       4
-tracksRegLiveness: true
-tracksDebugUserValues: true
-liveins:
-  - { reg: '$w0' }
-frameInfo:
-  maxAlignment:    1
-  adjustsStack:    true
-  hasCalls:        true
-  maxCallFrameSize: 0
-machineFunctionInfo: {}
-body:             |
-  ; CHECK-LABEL: name: shrink_test1
-  ; CHECK: bb.0.entry:
-  ; CHECK-NEXT:   successors: %bb.1(0x50000000), %bb.3(0x30000000)
-  ; CHECK-NEXT:   liveins: $w0
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   dead $wzr = SUBSWri killed renamable $w0, 1, 0, implicit-def $nzcv
-  ; CHECK-NEXT:   Bcc 11, %bb.3, implicit killed $nzcv
-  ; CHECK-NEXT:   B %bb.1
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.1.BB0:
-  ; CHECK-NEXT:   successors: %bb.2(0x30000000), %bb.4(0x50000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
-  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   CBNZW killed renamable $w0, %bb.4
-  ; CHECK-NEXT:   B %bb.2
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.2.BB1:
-  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
-  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.4:
-  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   B %bb.3
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.3.exit:
-  ; CHECK-NEXT:   RET_ReallyLR
-  bb.0.entry:
-    successors: %bb.1(0x50000000), %bb.3(0x30000000)
-    liveins: $w0
-
-    dead $wzr = SUBSWri killed renamable $w0, 1, 0, implicit-def $nzcv
-    Bcc 11, %bb.3, implicit killed $nzcv
-    B %bb.1
-
-  bb.1.BB0:
-    successors: %bb.2(0x30000000), %bb.3(0x50000000)
-
-    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-    BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
-    ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-    CBNZW killed renamable $w0, %bb.3
-    B %bb.2
-
-  bb.2.BB1:
-    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-    BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
-    ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-
-  bb.3.exit:
-    RET_ReallyLR
-
-...
----
-name:            shrink_test2
-alignment:       4
-tracksRegLiveness: true
-tracksDebugUserValues: true
-liveins:
-  - { reg: '$w0' }
-  - { reg: '$x1' }
-  - { reg: '$x2' }
-frameInfo:
-  maxAlignment:    1
-  adjustsStack:    true
-  hasCalls:        true
-  maxCallFrameSize: 0
-machineFunctionInfo: {}
-body:             |
-  ; CHECK-LABEL: name: shrink_test2
-  ; CHECK: bb.0.BB00:
-  ; CHECK-NEXT:   successors: %bb.1(0x50000000), %bb.8(0x30000000)
-  ; CHECK-NEXT:   liveins: $w0, $x1, $x2
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   dead $wzr = SUBSWri renamable $w0, 1, 0, implicit-def $nzcv
-  ; CHECK-NEXT:   Bcc 11, %bb.8, implicit killed $nzcv
-  ; CHECK-NEXT:   B %bb.1
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.1.BB01:
-  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
-  ; CHECK-NEXT:   liveins: $w0, $x1, $x2
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   dead $wzr = SUBSWri renamable $w0, 2, 0, implicit-def $nzcv
-  ; CHECK-NEXT:   STRWui renamable $w0, killed renamable $x1, 0 :: (store (s32) into %ir.P1)
-  ; CHECK-NEXT:   Bcc 11, %bb.3, implicit killed $nzcv
-  ; CHECK-NEXT:   B %bb.2
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.2.BB02:
-  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
-  ; CHECK-NEXT:   liveins: $w0, $x2
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   STRWui killed renamable $w0, killed renamable $x2, 0 :: (store (s32) into %ir.P2)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.3.BB03:
-  ; CHECK-NEXT:   successors: %bb.4(0x30000000), %bb.5(0x50000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
-  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   CBNZW killed renamable $w0, %bb.5
-  ; CHECK-NEXT:   B %bb.4
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.4.BB04:
-  ; CHECK-NEXT:   successors: %bb.5(0x80000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
-  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.5.BB05:
-  ; CHECK-NEXT:   successors: %bb.6(0x30000000), %bb.7(0x50000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
-  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   CBNZW killed renamable $w0, %bb.7
-  ; CHECK-NEXT:   B %bb.6
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.6.BB06:
-  ; CHECK-NEXT:   successors: %bb.9(0x80000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
-  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   B %bb.9
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.7.BB07:
-  ; CHECK-NEXT:   successors: %bb.9(0x80000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   BL @fun2, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
-  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.9:
-  ; CHECK-NEXT:   successors: %bb.8(0x80000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   B %bb.8
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.8.exit:
-  ; CHECK-NEXT:   RET_ReallyLR
-  bb.0.BB00:
-    successors: %bb.1(0x50000000), %bb.8(0x30000000)
-    liveins: $w0, $x1, $x2
-
-    dead $wzr = SUBSWri renamable $w0, 1, 0, implicit-def $nzcv
-    Bcc 11, %bb.8, implicit killed $nzcv
-    B %bb.1
-
-  bb.1.BB01:
-    successors: %bb.2, %bb.3
-    liveins: $w0, $x1, $x2
-
-    dead $wzr = SUBSWri renamable $w0, 2, 0, implicit-def $nzcv
-    STRWui renamable $w0, killed renamable $x1, 0 :: (store (s32) into %ir.P1)
-    Bcc 11, %bb.3, implicit killed $nzcv
-    B %bb.2
-
-  bb.2.BB02:
-    liveins: $w0, $x2
-
-    STRWui killed renamable $w0, killed renamable $x2, 0 :: (store (s32) into %ir.P2)
-
-  bb.3.BB03:
-    successors: %bb.4(0x30000000), %bb.5(0x50000000)
-
-    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-    BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
-    ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-    CBNZW killed renamable $w0, %bb.5
-    B %bb.4
-
-  bb.4.BB04:
-    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-    BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
-    ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-
-  bb.5.BB05:
-    successors: %bb.6(0x30000000), %bb.7(0x50000000)
-
-    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-    BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
-    ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-    CBNZW killed renamable $w0, %bb.7
-    B %bb.6
-
-  bb.6.BB06:
-    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-    BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
-    ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-    B %bb.8
-
-  bb.7.BB07:
-    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-    BL @fun2, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
-    ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-
-  bb.8.exit:
-    RET_ReallyLR
-
-...
----
-name:            noshrink_test1
-alignment:       4
-tracksRegLiveness: true
-tracksDebugUserValues: true
-liveins:
-  - { reg: '$w0' }
-  - { reg: '$w1' }
-  - { reg: '$w2' }
-frameInfo:
-  maxAlignment:    1
-  adjustsStack:    true
-  hasCalls:        true
-  maxCallFrameSize: 0
-machineFunctionInfo: {}
-body:             |
-  ; CHECK-LABEL: name: noshrink_test1
-  ; CHECK: bb.0.entry:
-  ; CHECK-NEXT:   successors: %bb.1(0x50000000), %bb.6(0x30000000)
-  ; CHECK-NEXT:   liveins: $w0, $w1, $w2
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   dead $wzr = SUBSWri renamable $w0, 1, 0, implicit-def $nzcv
-  ; CHECK-NEXT:   Bcc 11, %bb.6, implicit killed $nzcv
-  ; CHECK-NEXT:   B %bb.1
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.1.BB0:
-  ; CHECK-NEXT:   successors: %bb.2(0x60000000), %bb.3(0x20000000)
-  ; CHECK-NEXT:   liveins: $w0, $w1, $w2
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   dead $wzr = SUBSWri killed renamable $w0, 10, 0, implicit-def $nzcv
-  ; CHECK-NEXT:   Bcc 1, %bb.3, implicit killed $nzcv
-  ; CHECK-NEXT:   B %bb.2
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.2.BB0:
-  ; CHECK-NEXT:   successors: %bb.4(0x55555555), %bb.3(0x2aaaaaab)
-  ; CHECK-NEXT:   liveins: $w1, $w2
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   dead $wzr = SUBSWri killed renamable $w1, 10, 0, implicit-def $nzcv
-  ; CHECK-NEXT:   Bcc 0, %bb.4, implicit killed $nzcv
-  ; CHECK-NEXT:   B %bb.3
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.3.BB2:
-  ; CHECK-NEXT:   successors: %bb.5(0x40000000), %bb.6(0x40000000)
-  ; CHECK-NEXT:   liveins: $w2
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   dead $wzr = SUBSWri killed renamable $w2, 10, 0, implicit-def $nzcv
-  ; CHECK-NEXT:   Bcc 0, %bb.5, implicit killed $nzcv
-  ; CHECK-NEXT:   B %bb.6
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.4.BB3:
-  ; CHECK-NEXT:   successors: %bb.6(0x80000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
-  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   B %bb.6
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.5.BB4:
-  ; CHECK-NEXT:   successors: %bb.6(0x80000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   BL @fun2, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
-  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.6.exit:
-  ; CHECK-NEXT:   RET_ReallyLR
-  bb.0.entry:
-    successors: %bb.1(0x50000000), %bb.6(0x30000000)
-    liveins: $w0, $w1, $w2
-
-    dead $wzr = SUBSWri renamable $w0, 1, 0, implicit-def $nzcv
-    Bcc 11, %bb.6, implicit killed $nzcv
-    B %bb.1
-
-  bb.1.BB0:
-    successors: %bb.2(0x60000000), %bb.3(0x20000000)
-    liveins: $w0, $w1, $w2
-
-    dead $wzr = SUBSWri killed renamable $w0, 10, 0, implicit-def $nzcv
-    Bcc 1, %bb.3, implicit killed $nzcv
-    B %bb.2
-
-  bb.2.BB0:
-    successors: %bb.4(0x55555555), %bb.3(0x2aaaaaab)
-    liveins: $w1, $w2
-
-    dead $wzr = SUBSWri killed renamable $w1, 10, 0, implicit-def $nzcv
-    Bcc 0, %bb.4, implicit killed $nzcv
-    B %bb.3
-
-  bb.3.BB2:
-    liveins: $w2
-
-    dead $wzr = SUBSWri killed renamable $w2, 10, 0, implicit-def $nzcv
-    Bcc 0, %bb.5, implicit killed $nzcv
-    B %bb.6
-
-  bb.4.BB3:
-    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-    BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
-    ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-    B %bb.6
-
-  bb.5.BB4:
-    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-    BL @fun2, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
-    ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-
-  bb.6.exit:
-    RET_ReallyLR
-
-...
----
-name:            noshrink_test2
-alignment:       4
-tracksRegLiveness: true
-tracksDebugUserValues: true
-liveins:
-  - { reg: '$w0' }
-frameInfo:
-  maxAlignment:    1
-  adjustsStack:    true
-  hasCalls:        true
-  maxCallFrameSize: 0
-machineFunctionInfo: {}
-body:             |
-  ; CHECK-LABEL: name: noshrink_test2
-  ; CHECK: bb.0.BB00:
-  ; CHECK-NEXT:   successors: %bb.2(0x50000000), %bb.1(0x30000000)
-  ; CHECK-NEXT:   liveins: $w0
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   dead $wzr = SUBSWri killed renamable $w0, 0, 0, implicit-def $nzcv
-  ; CHECK-NEXT:   Bcc 12, %bb.2, implicit killed $nzcv
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.1:
-  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   B %bb.4
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.2.BB01:
-  ; CHECK-NEXT:   successors: %bb.3(0x30000000), %bb.5(0x50000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
-  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   CBNZW killed renamable $w0, %bb.5
-  ; CHECK-NEXT:   B %bb.3
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.3.BB02:
-  ; CHECK-NEXT:   successors: %bb.5(0x80000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
-  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   B %bb.5
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.4.InfLoop:
-  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
-  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   B %bb.4
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.5.exit:
-  ; CHECK-NEXT:   RET_ReallyLR
-  bb.0.BB00:
-    successors: %bb.2(0x50000000), %bb.1(0x30000000)
-    liveins: $w0
-
-    dead $wzr = SUBSWri killed renamable $w0, 0, 0, implicit-def $nzcv
-    Bcc 12, %bb.2, implicit killed $nzcv
-
-  bb.1:
-    B %bb.4
-
-  bb.2.BB01:
-    successors: %bb.3(0x30000000), %bb.5(0x50000000)
-
-    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-    BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
-    ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-    CBNZW killed renamable $w0, %bb.5
-    B %bb.3
-
-  bb.3.BB02:
-    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-    BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
-    ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-    B %bb.5
-
-  bb.4.InfLoop:
-    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-    BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
-    ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-    B %bb.4
-
-  bb.5.exit:
-    RET_ReallyLR
-
-...
----
-name:            noshrink_test3
-alignment:       4
-tracksRegLiveness: true
-tracksDebugUserValues: true
-liveins:
-  - { reg: '$w0' }
-frameInfo:
-  maxAlignment:    1
-  adjustsStack:    true
-  hasCalls:        true
-  maxCallFrameSize: 0
-machineFunctionInfo: {}
-body:             |
-  ; CHECK-LABEL: name: noshrink_test3
-  ; CHECK: bb.0.BB00:
-  ; CHECK-NEXT:   successors: %bb.3(0x50000000), %bb.1(0x30000000)
-  ; CHECK-NEXT:   liveins: $w0
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w19 = COPY $w0
-  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
-  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   dead $wzr = SUBSWri killed renamable $w19, 0, 0, implicit-def $nzcv
-  ; CHECK-NEXT:   Bcc 12, %bb.3, implicit killed $nzcv
-  ; CHECK-NEXT:   B %bb.1
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.1.BB01:
-  ; CHECK-NEXT:   successors: %bb.2(0x00000800), %bb.6(0x7ffff800)
-  ; CHECK-NEXT:   liveins: $w0
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   CBNZW killed renamable $w0, %bb.6
-  ; CHECK-NEXT:   B %bb.2
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.2.BB01.1:
-  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   BL @abort, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
-  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.3.BB02:
-  ; CHECK-NEXT:   successors: %bb.4(0x30000000), %bb.5(0x50000000)
-  ; CHECK-NEXT:   liveins: $w0
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   CBNZW killed renamable $w0, %bb.5
-  ; CHECK-NEXT:   B %bb.4
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.4.BB03:
-  ; CHECK-NEXT:   successors: %bb.5(0x30000000), %bb.6(0x50000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
-  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   CBNZW killed renamable $w0, %bb.6
-  ; CHECK-NEXT:   B %bb.5
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.5.BB04:
-  ; CHECK-NEXT:   successors: %bb.6(0x80000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
-  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.6.exit:
-  ; CHECK-NEXT:   RET_ReallyLR
-  bb.0.BB00:
-    successors: %bb.3(0x50000000), %bb.1(0x30000000)
-    liveins: $w0
-
-    renamable $w19 = COPY $w0
-    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-    BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
-    ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-    dead $wzr = SUBSWri killed renamable $w19, 0, 0, implicit-def $nzcv
-    Bcc 12, %bb.3, implicit killed $nzcv
-    B %bb.1
-
-  bb.1.BB01:
-    successors: %bb.2(0x00000800), %bb.6(0x7ffff800)
-    liveins: $w0
-
-    CBNZW killed renamable $w0, %bb.6
-    B %bb.2
-
-  bb.2.BB01.1:
-
-    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-    BL @abort, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
-    ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-
-  bb.3.BB02:
-    successors: %bb.4(0x30000000), %bb.5(0x50000000)
-    liveins: $w0
-
-    CBNZW killed renamable $w0, %bb.5
-    B %bb.4
-
-  bb.4.BB03:
-    successors: %bb.5(0x30000000), %bb.6(0x50000000)
-
-    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-    BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
-    ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-    CBNZW killed renamable $w0, %bb.6
-    B %bb.5
-
-  bb.5.BB04:
-    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-    BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
-    ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-
-  bb.6.exit:
-    RET_ReallyLR
-
-...

diff  --git a/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll b/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll
index 58ab0f5250d00..3d60686d5a116 100644
--- a/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll
+++ b/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll
@@ -5,11 +5,11 @@
 define i32 @add_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture readonly %arg2, ptr nocapture readonly %arg3) {
 ; CHECK-LE-LABEL: add_user:
 ; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    .save {r4, lr}
+; CHECK-LE-NEXT:    push {r4, lr}
 ; CHECK-LE-NEXT:    cmp r0, #1
 ; CHECK-LE-NEXT:    blt .LBB0_4
 ; CHECK-LE-NEXT:  @ %bb.1: @ %for.body.preheader
-; CHECK-LE-NEXT:    .save {r4, lr}
-; CHECK-LE-NEXT:    push {r4, lr}
 ; CHECK-LE-NEXT:    subs r2, #2
 ; CHECK-LE-NEXT:    subs r3, #2
 ; CHECK-LE-NEXT:    mov.w r12, #0
@@ -23,24 +23,23 @@ define i32 @add_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture reado
 ; CHECK-LE-NEXT:    smlad r12, r4, lr, r12
 ; CHECK-LE-NEXT:    sxtah r1, r1, lr
 ; CHECK-LE-NEXT:    bne .LBB0_2
-; CHECK-LE-NEXT:  @ %bb.3:
-; CHECK-LE-NEXT:    pop.w {r4, lr}
+; CHECK-LE-NEXT:  @ %bb.3: @ %for.cond.cleanup
 ; CHECK-LE-NEXT:    add.w r0, r12, r1
-; CHECK-LE-NEXT:    bx lr
+; CHECK-LE-NEXT:    pop {r4, pc}
 ; CHECK-LE-NEXT:    .p2align 2
 ; CHECK-LE-NEXT:  .LBB0_4:
 ; CHECK-LE-NEXT:    mov.w r12, #0
 ; CHECK-LE-NEXT:    movs r1, #0
 ; CHECK-LE-NEXT:    add.w r0, r12, r1
-; CHECK-LE-NEXT:    bx lr
+; CHECK-LE-NEXT:    pop {r4, pc}
 ;
 ; CHECK-BE-LABEL: add_user:
 ; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    .save {r4, r5, r6, lr}
+; CHECK-BE-NEXT:    push {r4, r5, r6, lr}
 ; CHECK-BE-NEXT:    cmp r0, #1
 ; CHECK-BE-NEXT:    blt .LBB0_4
 ; CHECK-BE-NEXT:  @ %bb.1: @ %for.body.preheader
-; CHECK-BE-NEXT:    .save {r4, r5, r6, lr}
-; CHECK-BE-NEXT:    push {r4, r5, r6, lr}
 ; CHECK-BE-NEXT:    subs r2, #2
 ; CHECK-BE-NEXT:    subs r3, #2
 ; CHECK-BE-NEXT:    mov.w r12, #0
@@ -57,16 +56,15 @@ define i32 @add_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture reado
 ; CHECK-BE-NEXT:    subs r0, #1
 ; CHECK-BE-NEXT:    smlabb r12, r6, r4, r5
 ; CHECK-BE-NEXT:    bne .LBB0_2
-; CHECK-BE-NEXT:  @ %bb.3:
-; CHECK-BE-NEXT:    pop.w {r4, r5, r6, lr}
+; CHECK-BE-NEXT:  @ %bb.3: @ %for.cond.cleanup
 ; CHECK-BE-NEXT:    add.w r0, r12, r1
-; CHECK-BE-NEXT:    bx lr
+; CHECK-BE-NEXT:    pop {r4, r5, r6, pc}
 ; CHECK-BE-NEXT:    .p2align 2
 ; CHECK-BE-NEXT:  .LBB0_4:
 ; CHECK-BE-NEXT:    mov.w r12, #0
 ; CHECK-BE-NEXT:    movs r1, #0
 ; CHECK-BE-NEXT:    add.w r0, r12, r1
-; CHECK-BE-NEXT:    bx lr
+; CHECK-BE-NEXT:    pop {r4, r5, r6, pc}
 entry:
   %cmp24 = icmp sgt i32 %arg, 0
   br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup
@@ -111,11 +109,11 @@ for.body:
 define i32 @mul_bottom_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture readonly %arg2, ptr nocapture readonly %arg3) {
 ; CHECK-LE-LABEL: mul_bottom_user:
 ; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    .save {r4, r5, r7, lr}
+; CHECK-LE-NEXT:    push {r4, r5, r7, lr}
 ; CHECK-LE-NEXT:    cmp r0, #1
 ; CHECK-LE-NEXT:    blt .LBB1_4
 ; CHECK-LE-NEXT:  @ %bb.1: @ %for.body.preheader
-; CHECK-LE-NEXT:    .save {r4, r5, r7, lr}
-; CHECK-LE-NEXT:    push {r4, r5, r7, lr}
 ; CHECK-LE-NEXT:    sub.w lr, r2, #2
 ; CHECK-LE-NEXT:    subs r3, #2
 ; CHECK-LE-NEXT:    mov.w r12, #0
@@ -130,24 +128,23 @@ define i32 @mul_bottom_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocaptur
 ; CHECK-LE-NEXT:    subs r0, #1
 ; CHECK-LE-NEXT:    mul r1, r5, r1
 ; CHECK-LE-NEXT:    bne .LBB1_2
-; CHECK-LE-NEXT:  @ %bb.3:
-; CHECK-LE-NEXT:    pop.w {r4, r5, r7, lr}
+; CHECK-LE-NEXT:  @ %bb.3: @ %for.cond.cleanup
 ; CHECK-LE-NEXT:    add.w r0, r12, r1
-; CHECK-LE-NEXT:    bx lr
+; CHECK-LE-NEXT:    pop {r4, r5, r7, pc}
 ; CHECK-LE-NEXT:    .p2align 2
 ; CHECK-LE-NEXT:  .LBB1_4:
 ; CHECK-LE-NEXT:    mov.w r12, #0
 ; CHECK-LE-NEXT:    movs r1, #0
 ; CHECK-LE-NEXT:    add.w r0, r12, r1
-; CHECK-LE-NEXT:    bx lr
+; CHECK-LE-NEXT:    pop {r4, r5, r7, pc}
 ;
 ; CHECK-BE-LABEL: mul_bottom_user:
 ; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    .save {r4, r5, r6, lr}
+; CHECK-BE-NEXT:    push {r4, r5, r6, lr}
 ; CHECK-BE-NEXT:    cmp r0, #1
 ; CHECK-BE-NEXT:    blt .LBB1_4
 ; CHECK-BE-NEXT:  @ %bb.1: @ %for.body.preheader
-; CHECK-BE-NEXT:    .save {r4, r5, r6, lr}
-; CHECK-BE-NEXT:    push {r4, r5, r6, lr}
 ; CHECK-BE-NEXT:    subs r2, #2
 ; CHECK-BE-NEXT:    subs r3, #2
 ; CHECK-BE-NEXT:    mov.w r12, #0
@@ -164,16 +161,15 @@ define i32 @mul_bottom_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocaptur
 ; CHECK-BE-NEXT:    subs r0, #1
 ; CHECK-BE-NEXT:    mul r1, lr, r1
 ; CHECK-BE-NEXT:    bne .LBB1_2
-; CHECK-BE-NEXT:  @ %bb.3:
-; CHECK-BE-NEXT:    pop.w {r4, r5, r6, lr}
+; CHECK-BE-NEXT:  @ %bb.3: @ %for.cond.cleanup
 ; CHECK-BE-NEXT:    add.w r0, r12, r1
-; CHECK-BE-NEXT:    bx lr
+; CHECK-BE-NEXT:    pop {r4, r5, r6, pc}
 ; CHECK-BE-NEXT:    .p2align 2
 ; CHECK-BE-NEXT:  .LBB1_4:
 ; CHECK-BE-NEXT:    mov.w r12, #0
 ; CHECK-BE-NEXT:    movs r1, #0
 ; CHECK-BE-NEXT:    add.w r0, r12, r1
-; CHECK-BE-NEXT:    bx lr
+; CHECK-BE-NEXT:    pop {r4, r5, r6, pc}
 entry:
   %cmp24 = icmp sgt i32 %arg, 0
   br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup
@@ -218,11 +214,11 @@ for.body:
 define i32 @mul_top_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture readonly %arg2, ptr nocapture readonly %arg3) {
 ; CHECK-LE-LABEL: mul_top_user:
 ; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    .save {r4, lr}
+; CHECK-LE-NEXT:    push {r4, lr}
 ; CHECK-LE-NEXT:    cmp r0, #1
 ; CHECK-LE-NEXT:    blt .LBB2_4
 ; CHECK-LE-NEXT:  @ %bb.1: @ %for.body.preheader
-; CHECK-LE-NEXT:    .save {r4, lr}
-; CHECK-LE-NEXT:    push {r4, lr}
 ; CHECK-LE-NEXT:    subs r2, #2
 ; CHECK-LE-NEXT:    subs r3, #2
 ; CHECK-LE-NEXT:    mov.w r12, #0
@@ -237,24 +233,23 @@ define i32 @mul_top_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture r
 ; CHECK-LE-NEXT:    asr.w r4, r4, #16
 ; CHECK-LE-NEXT:    mul r1, r4, r1
 ; CHECK-LE-NEXT:    bne .LBB2_2
-; CHECK-LE-NEXT:  @ %bb.3:
-; CHECK-LE-NEXT:    pop.w {r4, lr}
+; CHECK-LE-NEXT:  @ %bb.3: @ %for.cond.cleanup
 ; CHECK-LE-NEXT:    add.w r0, r12, r1
-; CHECK-LE-NEXT:    bx lr
+; CHECK-LE-NEXT:    pop {r4, pc}
 ; CHECK-LE-NEXT:    .p2align 2
 ; CHECK-LE-NEXT:  .LBB2_4:
 ; CHECK-LE-NEXT:    mov.w r12, #0
 ; CHECK-LE-NEXT:    movs r1, #0
 ; CHECK-LE-NEXT:    add.w r0, r12, r1
-; CHECK-LE-NEXT:    bx lr
+; CHECK-LE-NEXT:    pop {r4, pc}
 ;
 ; CHECK-BE-LABEL: mul_top_user:
 ; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    .save {r4, r5, r6, lr}
+; CHECK-BE-NEXT:    push {r4, r5, r6, lr}
 ; CHECK-BE-NEXT:    cmp r0, #1
 ; CHECK-BE-NEXT:    blt .LBB2_4
 ; CHECK-BE-NEXT:  @ %bb.1: @ %for.body.preheader
-; CHECK-BE-NEXT:    .save {r4, r5, r6, lr}
-; CHECK-BE-NEXT:    push {r4, r5, r6, lr}
 ; CHECK-BE-NEXT:    subs r2, #2
 ; CHECK-BE-NEXT:    subs r3, #2
 ; CHECK-BE-NEXT:    mov.w r12, #0
@@ -271,16 +266,15 @@ define i32 @mul_top_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture r
 ; CHECK-BE-NEXT:    subs r0, #1
 ; CHECK-BE-NEXT:    mul r1, r6, r1
 ; CHECK-BE-NEXT:    bne .LBB2_2
-; CHECK-BE-NEXT:  @ %bb.3:
-; CHECK-BE-NEXT:    pop.w {r4, r5, r6, lr}
+; CHECK-BE-NEXT:  @ %bb.3: @ %for.cond.cleanup
 ; CHECK-BE-NEXT:    add.w r0, r12, r1
-; CHECK-BE-NEXT:    bx lr
+; CHECK-BE-NEXT:    pop {r4, r5, r6, pc}
 ; CHECK-BE-NEXT:    .p2align 2
 ; CHECK-BE-NEXT:  .LBB2_4:
 ; CHECK-BE-NEXT:    mov.w r12, #0
 ; CHECK-BE-NEXT:    movs r1, #0
 ; CHECK-BE-NEXT:    add.w r0, r12, r1
-; CHECK-BE-NEXT:    bx lr
+; CHECK-BE-NEXT:    pop {r4, r5, r6, pc}
 entry:
   %cmp24 = icmp sgt i32 %arg, 0
   br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup
@@ -325,11 +319,11 @@ for.body:
 define i32 @and_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture readonly %arg2, ptr nocapture readonly %arg3) {
 ; CHECK-LE-LABEL: and_user:
 ; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    .save {r4, lr}
+; CHECK-LE-NEXT:    push {r4, lr}
 ; CHECK-LE-NEXT:    cmp r0, #1
 ; CHECK-LE-NEXT:    blt .LBB3_4
 ; CHECK-LE-NEXT:  @ %bb.1: @ %for.body.preheader
-; CHECK-LE-NEXT:    .save {r4, lr}
-; CHECK-LE-NEXT:    push {r4, lr}
 ; CHECK-LE-NEXT:    sub.w lr, r2, #2
 ; CHECK-LE-NEXT:    subs r3, #2
 ; CHECK-LE-NEXT:    mov.w r12, #0
@@ -344,24 +338,23 @@ define i32 @and_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture reado
 ; CHECK-LE-NEXT:    uxth r2, r2
 ; CHECK-LE-NEXT:    mul r1, r2, r1
 ; CHECK-LE-NEXT:    bne .LBB3_2
-; CHECK-LE-NEXT:  @ %bb.3:
-; CHECK-LE-NEXT:    pop.w {r4, lr}
+; CHECK-LE-NEXT:  @ %bb.3: @ %for.cond.cleanup
 ; CHECK-LE-NEXT:    add.w r0, r12, r1
-; CHECK-LE-NEXT:    bx lr
+; CHECK-LE-NEXT:    pop {r4, pc}
 ; CHECK-LE-NEXT:    .p2align 2
 ; CHECK-LE-NEXT:  .LBB3_4:
 ; CHECK-LE-NEXT:    mov.w r12, #0
 ; CHECK-LE-NEXT:    movs r1, #0
 ; CHECK-LE-NEXT:    add.w r0, r12, r1
-; CHECK-LE-NEXT:    bx lr
+; CHECK-LE-NEXT:    pop {r4, pc}
 ;
 ; CHECK-BE-LABEL: and_user:
 ; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    .save {r4, r5, r6, lr}
+; CHECK-BE-NEXT:    push {r4, r5, r6, lr}
 ; CHECK-BE-NEXT:    cmp r0, #1
 ; CHECK-BE-NEXT:    blt .LBB3_4
 ; CHECK-BE-NEXT:  @ %bb.1: @ %for.body.preheader
-; CHECK-BE-NEXT:    .save {r4, r5, r6, lr}
-; CHECK-BE-NEXT:    push {r4, r5, r6, lr}
 ; CHECK-BE-NEXT:    subs r2, #2
 ; CHECK-BE-NEXT:    subs r3, #2
 ; CHECK-BE-NEXT:    mov.w r12, #0
@@ -378,16 +371,15 @@ define i32 @and_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture reado
 ; CHECK-BE-NEXT:    subs r0, #1
 ; CHECK-BE-NEXT:    mul r1, lr, r1
 ; CHECK-BE-NEXT:    bne .LBB3_2
-; CHECK-BE-NEXT:  @ %bb.3:
-; CHECK-BE-NEXT:    pop.w {r4, r5, r6, lr}
+; CHECK-BE-NEXT:  @ %bb.3: @ %for.cond.cleanup
 ; CHECK-BE-NEXT:    add.w r0, r12, r1
-; CHECK-BE-NEXT:    bx lr
+; CHECK-BE-NEXT:    pop {r4, r5, r6, pc}
 ; CHECK-BE-NEXT:    .p2align 2
 ; CHECK-BE-NEXT:  .LBB3_4:
 ; CHECK-BE-NEXT:    mov.w r12, #0
 ; CHECK-BE-NEXT:    movs r1, #0
 ; CHECK-BE-NEXT:    add.w r0, r12, r1
-; CHECK-BE-NEXT:    bx lr
+; CHECK-BE-NEXT:    pop {r4, r5, r6, pc}
 entry:
   %cmp24 = icmp sgt i32 %arg, 0
   br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup

diff  --git a/llvm/test/CodeGen/ARM/ssat-unroll-loops.ll b/llvm/test/CodeGen/ARM/ssat-unroll-loops.ll
index c9724674afd82..2755d354a6244 100644
--- a/llvm/test/CodeGen/ARM/ssat-unroll-loops.ll
+++ b/llvm/test/CodeGen/ARM/ssat-unroll-loops.ll
@@ -6,11 +6,11 @@
 define void @ssat_unroll(ptr %pSrcA, ptr %pSrcB, ptr %pDst, i32 %blockSize) {
 ; CHECK-LABEL: ssat_unroll:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    bxeq lr
-; CHECK-NEXT:  .LBB0_1: @ %while.body.preheader
 ; CHECK-NEXT:    .save {r11, lr}
 ; CHECK-NEXT:    push {r11, lr}
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    beq .LBB0_5
+; CHECK-NEXT:  @ %bb.1: @ %while.body.preheader
 ; CHECK-NEXT:    sub r12, r3, #1
 ; CHECK-NEXT:    tst r3, #1
 ; CHECK-NEXT:    beq .LBB0_3
@@ -23,7 +23,7 @@ define void @ssat_unroll(ptr %pSrcA, ptr %pSrcB, ptr %pDst, i32 %blockSize) {
 ; CHECK-NEXT:    mov r3, r12
 ; CHECK-NEXT:  .LBB0_3: @ %while.body.prol.loopexit
 ; CHECK-NEXT:    cmp r12, #0
-; CHECK-NEXT:    beq .LBB0_5
+; CHECK-NEXT:    popeq {r11, pc}
 ; CHECK-NEXT:  .LBB0_4: @ %while.body
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    ldrsh r12, [r0]
@@ -41,9 +41,8 @@ define void @ssat_unroll(ptr %pSrcA, ptr %pSrcB, ptr %pDst, i32 %blockSize) {
 ; CHECK-NEXT:    strh r12, [r2, #2]
 ; CHECK-NEXT:    add r2, r2, #4
 ; CHECK-NEXT:    bne .LBB0_4
-; CHECK-NEXT:  .LBB0_5:
-; CHECK-NEXT:    pop {r11, lr}
-; CHECK-NEXT:    bx lr
+; CHECK-NEXT:  .LBB0_5: @ %while.end
+; CHECK-NEXT:    pop {r11, pc}
 entry:
   %cmp.not7 = icmp eq i32 %blockSize, 0
   br i1 %cmp.not7, label %while.end, label %while.body.preheader
@@ -126,11 +125,11 @@ while.end:                                        ; preds = %while.body, %while.
 define void @ssat_unroll_minmax(ptr nocapture readonly %pSrcA, ptr nocapture readonly %pSrcB, ptr nocapture writeonly %pDst, i32 %blockSize) {
 ; CHECK-LABEL: ssat_unroll_minmax:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    bxeq lr
-; CHECK-NEXT:  .LBB1_1: @ %while.body.preheader
 ; CHECK-NEXT:    .save {r11, lr}
 ; CHECK-NEXT:    push {r11, lr}
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    beq .LBB1_5
+; CHECK-NEXT:  @ %bb.1: @ %while.body.preheader
 ; CHECK-NEXT:    sub r12, r3, #1
 ; CHECK-NEXT:    tst r3, #1
 ; CHECK-NEXT:    beq .LBB1_3
@@ -143,7 +142,7 @@ define void @ssat_unroll_minmax(ptr nocapture readonly %pSrcA, ptr nocapture rea
 ; CHECK-NEXT:    mov r3, r12
 ; CHECK-NEXT:  .LBB1_3: @ %while.body.prol.loopexit
 ; CHECK-NEXT:    cmp r12, #0
-; CHECK-NEXT:    beq .LBB1_5
+; CHECK-NEXT:    popeq {r11, pc}
 ; CHECK-NEXT:  .LBB1_4: @ %while.body
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    ldrsh r12, [r0]
@@ -161,9 +160,8 @@ define void @ssat_unroll_minmax(ptr nocapture readonly %pSrcA, ptr nocapture rea
 ; CHECK-NEXT:    strh r12, [r2, #2]
 ; CHECK-NEXT:    add r2, r2, #4
 ; CHECK-NEXT:    bne .LBB1_4
-; CHECK-NEXT:  .LBB1_5:
-; CHECK-NEXT:    pop {r11, lr}
-; CHECK-NEXT:    bx lr
+; CHECK-NEXT:  .LBB1_5: @ %while.end
+; CHECK-NEXT:    pop {r11, pc}
 entry:
   %cmp.not7 = icmp eq i32 %blockSize, 0
   br i1 %cmp.not7, label %while.end, label %while.body.preheader

diff  --git a/llvm/test/CodeGen/LoongArch/jump-table.ll b/llvm/test/CodeGen/LoongArch/jump-table.ll
index c3028bade3c30..8bd4c952cf1ee 100644
--- a/llvm/test/CodeGen/LoongArch/jump-table.ll
+++ b/llvm/test/CodeGen/LoongArch/jump-table.ll
@@ -98,7 +98,7 @@ define void @switch_4_arms(i32 %in, ptr %out) nounwind {
 ; LA32-JT-NEXT:    b .LBB0_5
 ; LA32-JT-NEXT:  .LBB0_4: # %bb4
 ; LA32-JT-NEXT:    ori $a0, $zero, 1
-; LA32-JT-NEXT:  .LBB0_5:
+; LA32-JT-NEXT:  .LBB0_5: # %exit
 ; LA32-JT-NEXT:    st.w $a0, $a1, 0
 ; LA32-JT-NEXT:  .LBB0_6: # %exit
 ; LA32-JT-NEXT:    ret
@@ -123,7 +123,7 @@ define void @switch_4_arms(i32 %in, ptr %out) nounwind {
 ; LA64-JT-NEXT:    b .LBB0_5
 ; LA64-JT-NEXT:  .LBB0_4: # %bb4
 ; LA64-JT-NEXT:    ori $a0, $zero, 1
-; LA64-JT-NEXT:  .LBB0_5:
+; LA64-JT-NEXT:  .LBB0_5: # %exit
 ; LA64-JT-NEXT:    st.w $a0, $a1, 0
 ; LA64-JT-NEXT:  .LBB0_6: # %exit
 ; LA64-JT-NEXT:    ret

diff  --git a/llvm/test/CodeGen/PowerPC/common-chain-aix32.ll b/llvm/test/CodeGen/PowerPC/common-chain-aix32.ll
index 35ddcfd9ba6d6..0cf7119eab84c 100644
--- a/llvm/test/CodeGen/PowerPC/common-chain-aix32.ll
+++ b/llvm/test/CodeGen/PowerPC/common-chain-aix32.ll
@@ -39,19 +39,19 @@ define i64 @two_chain_same_offset_succ_i32(ptr %p, i32 %offset, i32 %base1, i64
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    cmplwi r6, 0
 ; CHECK-NEXT:    cmpwi cr1, r6, 0
+; CHECK-NEXT:    stw r30, -8(r1) # 4-byte Folded Spill
+; CHECK-NEXT:    stw r31, -4(r1) # 4-byte Folded Spill
 ; CHECK-NEXT:    crandc 4*cr5+lt, 4*cr1+lt, eq
 ; CHECK-NEXT:    cmpwi cr1, r7, 0
-; CHECK-NEXT:    bc 12, 4*cr5+lt, L..BB0_6
+; CHECK-NEXT:    bc 12, 4*cr5+lt, L..BB0_5
 ; CHECK-NEXT:  # %bb.1: # %entry
 ; CHECK-NEXT:    crand 4*cr5+lt, eq, 4*cr1+eq
-; CHECK-NEXT:    bc 12, 4*cr5+lt, L..BB0_6
+; CHECK-NEXT:    bc 12, 4*cr5+lt, L..BB0_5
 ; CHECK-NEXT:  # %bb.2: # %for.body.preheader
 ; CHECK-NEXT:    slwi r8, r4, 1
 ; CHECK-NEXT:    li r10, 0
 ; CHECK-NEXT:    li r11, 0
-; CHECK-NEXT:    stw r30, -8(r1) # 4-byte Folded Spill
 ; CHECK-NEXT:    add r8, r4, r8
-; CHECK-NEXT:    stw r31, -4(r1) # 4-byte Folded Spill
 ; CHECK-NEXT:    add r9, r5, r8
 ; CHECK-NEXT:    add r5, r5, r4
 ; CHECK-NEXT:    add r8, r3, r5
@@ -83,15 +83,15 @@ define i64 @two_chain_same_offset_succ_i32(ptr %p, i32 %offset, i32 %base1, i64
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    crand 4*cr5+lt, eq, 4*cr1+lt
 ; CHECK-NEXT:    bc 12, 4*cr5+lt, L..BB0_3
-; CHECK-NEXT:  # %bb.5:
+; CHECK-NEXT:    b L..BB0_6
+; CHECK-NEXT:  L..BB0_5:
+; CHECK-NEXT:    li r3, 0
+; CHECK-NEXT:    li r5, 0
+; CHECK-NEXT:  L..BB0_6: # %for.cond.cleanup
 ; CHECK-NEXT:    lwz r31, -4(r1) # 4-byte Folded Reload
 ; CHECK-NEXT:    lwz r30, -8(r1) # 4-byte Folded Reload
 ; CHECK-NEXT:    mr r4, r5
 ; CHECK-NEXT:    blr
-; CHECK-NEXT:  L..BB0_6:
-; CHECK-NEXT:    li r3, 0
-; CHECK-NEXT:    li r4, 0
-; CHECK-NEXT:    blr
 entry:
   %add = add nsw i32 %base1, %offset
   %mul = shl nsw i32 %offset, 1

diff  --git a/llvm/test/CodeGen/PowerPC/common-chain.ll b/llvm/test/CodeGen/PowerPC/common-chain.ll
index 5f8c21e30f8fd..ea8a72e7d11e1 100644
--- a/llvm/test/CodeGen/PowerPC/common-chain.ll
+++ b/llvm/test/CodeGen/PowerPC/common-chain.ll
@@ -137,14 +137,14 @@ define i64 @not_perfect_chain_all_same_offset_fail(ptr %p, i64 %offset, i64 %bas
 ; CHECK-LABEL: not_perfect_chain_all_same_offset_fail:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    cmpdi r6, 0
+; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    ble cr0, .LBB1_4
 ; CHECK-NEXT:  # %bb.1: # %for.body.preheader
-; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    sldi r7, r4, 1
+; CHECK-NEXT:    sldi r9, r4, 2
 ; CHECK-NEXT:    add r5, r3, r5
 ; CHECK-NEXT:    li r3, 0
 ; CHECK-NEXT:    add r8, r4, r7
-; CHECK-NEXT:    sldi r9, r4, 2
 ; CHECK-NEXT:    mtctr r6
 ; CHECK-NEXT:    add r10, r4, r9
 ; CHECK-NEXT:    .p2align 4
@@ -161,11 +161,12 @@ define i64 @not_perfect_chain_all_same_offset_fail(ptr %p, i64 %offset, i64 %bas
 ; CHECK-NEXT:    mulld r6, r6, r0
 ; CHECK-NEXT:    maddld r3, r6, r30, r3
 ; CHECK-NEXT:    bdnz .LBB1_2
-; CHECK-NEXT:  # %bb.3:
+; CHECK-NEXT:  # %bb.3: # %for.cond.cleanup
 ; CHECK-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    blr
 ; CHECK-NEXT:  .LBB1_4:
 ; CHECK-NEXT:    li r3, 0
+; CHECK-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    blr
 entry:
   %mul = shl nsw i64 %offset, 1
@@ -424,20 +425,20 @@ define i64 @not_same_offset_fail(ptr %p, i64 %offset, i64 %base1, i64 %n) {
 ; CHECK-LABEL: not_same_offset_fail:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    cmpdi r6, 0
-; CHECK-NEXT:    ble cr0, .LBB4_4
-; CHECK-NEXT:  # %bb.1: # %for.body.preheader
 ; CHECK-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    add r5, r3, r5
-; CHECK-NEXT:    li r3, 0
 ; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    mtctr r6
+; CHECK-NEXT:    ble cr0, .LBB4_3
+; CHECK-NEXT:  # %bb.1: # %for.body.preheader
 ; CHECK-NEXT:    mulli r11, r4, 10
 ; CHECK-NEXT:    sldi r8, r4, 2
+; CHECK-NEXT:    add r5, r3, r5
+; CHECK-NEXT:    li r3, 0
 ; CHECK-NEXT:    add r8, r4, r8
 ; CHECK-NEXT:    sldi r9, r4, 3
-; CHECK-NEXT:    sub r10, r9, r4
+; CHECK-NEXT:    mtctr r6
 ; CHECK-NEXT:    sldi r7, r4, 1
+; CHECK-NEXT:    sub r10, r9, r4
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB4_2: # %for.body
 ; CHECK-NEXT:    #
@@ -454,14 +455,14 @@ define i64 @not_same_offset_fail(ptr %p, i64 %offset, i64 %base1, i64 %n) {
 ; CHECK-NEXT:    mulld r6, r6, r29
 ; CHECK-NEXT:    maddld r3, r6, r28, r3
 ; CHECK-NEXT:    bdnz .LBB4_2
-; CHECK-NEXT:  # %bb.3:
+; CHECK-NEXT:    b .LBB4_4
+; CHECK-NEXT:  .LBB4_3:
+; CHECK-NEXT:    li r3, 0
+; CHECK-NEXT:  .LBB4_4: # %for.cond.cleanup
 ; CHECK-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    blr
-; CHECK-NEXT:  .LBB4_4:
-; CHECK-NEXT:    li r3, 0
-; CHECK-NEXT:    blr
 entry:
   %mul = shl nsw i64 %offset, 1
   %mul2 = mul nsw i64 %offset, 5

diff  --git a/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll b/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll
index 37baef6043884..769b358131e9a 100644
--- a/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll
+++ b/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll
@@ -192,21 +192,21 @@ define i64 @test_max_number_reminder(ptr %arg, i32 signext %arg1) {
 ; CHECK-LABEL: test_max_number_reminder:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    cmplwi r4, 0
-; CHECK-NEXT:    beq cr0, .LBB2_4
+; CHECK-NEXT:    std r25, -56(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r26, -48(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r27, -40(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    beq cr0, .LBB2_3
 ; CHECK-NEXT:  # %bb.1: # %bb3.preheader
 ; CHECK-NEXT:    cmpldi r4, 1
 ; CHECK-NEXT:    li r5, 1
 ; CHECK-NEXT:    addi r9, r3, 4002
-; CHECK-NEXT:    std r25, -56(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    li r6, -1
-; CHECK-NEXT:    std r26, -48(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    li r7, 3
 ; CHECK-NEXT:    li r8, 5
 ; CHECK-NEXT:    li r10, 9
-; CHECK-NEXT:    std r27, -40(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    iselgt r3, r4, r5
 ; CHECK-NEXT:    mtctr r3
 ; CHECK-NEXT:    li r3, 0
@@ -232,7 +232,10 @@ define i64 @test_max_number_reminder(ptr %arg, i32 signext %arg1) {
 ; CHECK-NEXT:    mulld r11, r11, r26
 ; CHECK-NEXT:    maddld r3, r11, r25, r3
 ; CHECK-NEXT:    bdnz .LBB2_2
-; CHECK-NEXT:  # %bb.3:
+; CHECK-NEXT:    b .LBB2_4
+; CHECK-NEXT:  .LBB2_3:
+; CHECK-NEXT:    li r3, 0
+; CHECK-NEXT:  .LBB2_4: # %bb45
 ; CHECK-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
@@ -241,9 +244,6 @@ define i64 @test_max_number_reminder(ptr %arg, i32 signext %arg1) {
 ; CHECK-NEXT:    ld r26, -48(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r25, -56(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    blr
-; CHECK-NEXT:  .LBB2_4:
-; CHECK-NEXT:    addi r3, r4, 0
-; CHECK-NEXT:    blr
 bb:
   %i = sext i32 %arg1 to i64
   %i2 = icmp eq i32 %arg1, 0
@@ -475,11 +475,11 @@ define dso_local i64 @test_ds_multiple_chains(ptr %arg, ptr %arg1, i32 signext %
 ; CHECK-LABEL: test_ds_multiple_chains:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    cmplwi r5, 0
-; CHECK-NEXT:    beq cr0, .LBB5_4
+; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    beq cr0, .LBB5_3
 ; CHECK-NEXT:  # %bb.1: # %bb4.preheader
 ; CHECK-NEXT:    cmpldi r5, 1
 ; CHECK-NEXT:    li r6, 1
-; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    addi r3, r3, 4001
 ; CHECK-NEXT:    addi r4, r4, 4001
 ; CHECK-NEXT:    li r7, 9
@@ -507,13 +507,13 @@ define dso_local i64 @test_ds_multiple_chains(ptr %arg, ptr %arg1, i32 signext %
 ; CHECK-NEXT:    mulld r8, r8, r30
 ; CHECK-NEXT:    maddld r6, r8, r9, r6
 ; CHECK-NEXT:    bdnz .LBB5_2
-; CHECK-NEXT:  # %bb.3:
+; CHECK-NEXT:    b .LBB5_4
+; CHECK-NEXT:  .LBB5_3:
+; CHECK-NEXT:    li r6, 0
+; CHECK-NEXT:  .LBB5_4: # %bb43
 ; CHECK-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    add r3, r6, r5
 ; CHECK-NEXT:    blr
-; CHECK-NEXT:  .LBB5_4:
-; CHECK-NEXT:    addi r3, r5, 0
-; CHECK-NEXT:    blr
 bb:
   %i = sext i32 %arg2 to i64
   %i3 = icmp eq i32 %arg2, 0
@@ -595,17 +595,17 @@ define i64 @test_ds_cross_basic_blocks(ptr %arg, i32 signext %arg1) {
 ; CHECK-LABEL: test_ds_cross_basic_blocks:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    cmplwi r4, 0
-; CHECK-NEXT:    beq cr0, .LBB6_9
+; CHECK-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    beq cr0, .LBB6_8
 ; CHECK-NEXT:  # %bb.1: # %bb3
 ; CHECK-NEXT:    addis r5, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    cmpldi r4, 1
 ; CHECK-NEXT:    li r7, 1
 ; CHECK-NEXT:    addi r6, r3, 4009
-; CHECK-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    ld r5, .LC0 at toc@l(r5)
 ; CHECK-NEXT:    iselgt r3, r4, r7
-; CHECK-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    li r4, -7
 ; CHECK-NEXT:    li r8, -6
 ; CHECK-NEXT:    li r9, 1
@@ -634,7 +634,7 @@ define i64 @test_ds_cross_basic_blocks(ptr %arg, i32 signext %arg1) {
 ; CHECK-NEXT:    mulld r0, r0, r10
 ; CHECK-NEXT:    mulld r0, r0, r9
 ; CHECK-NEXT:    maddld r3, r0, r7, r3
-; CHECK-NEXT:    bdz .LBB6_8
+; CHECK-NEXT:    bdz .LBB6_9
 ; CHECK-NEXT:  .LBB6_4: # %bb5
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lbzu r0, 1(r5)
@@ -666,13 +666,12 @@ define i64 @test_ds_cross_basic_blocks(ptr %arg, i32 signext %arg1) {
 ; CHECK-NEXT:    add r7, r0, r7
 ; CHECK-NEXT:    b .LBB6_3
 ; CHECK-NEXT:  .LBB6_8:
+; CHECK-NEXT:    li r3, 0
+; CHECK-NEXT:  .LBB6_9: # %bb64
 ; CHECK-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    blr
-; CHECK-NEXT:  .LBB6_9:
-; CHECK-NEXT:    li r3, 0
-; CHECK-NEXT:    blr
 bb:
   %i = sext i32 %arg1 to i64
   %i2 = icmp eq i32 %arg1, 0

diff  --git a/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll b/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll
index 79f2ef3e3746a..b91f20b710a2d 100644
--- a/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll
+++ b/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll
@@ -6,24 +6,24 @@ define void @foo(ptr readonly %0, ptr %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6
 ; CHECK-LABEL: foo:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    cmpd 5, 7
-; CHECK-NEXT:    bgelr 0
-; CHECK-NEXT:  # %bb.1: # %.preheader
+; CHECK-NEXT:    std 22, -80(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 23, -72(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 24, -64(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 25, -56(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 26, -48(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std 27, -40(1) # 8-byte Folded Spill
-; CHECK-NEXT:    addi 27, 5, 2
 ; CHECK-NEXT:    std 28, -32(1) # 8-byte Folded Spill
-; CHECK-NEXT:    addi 28, 5, 3
+; CHECK-NEXT:    std 29, -24(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std 30, -16(1) # 8-byte Folded Spill
+; CHECK-NEXT:    bge 0, .LBB0_6
+; CHECK-NEXT:  # %bb.1: # %.preheader
 ; CHECK-NEXT:    addi 30, 5, 1
+; CHECK-NEXT:    addi 28, 5, 3
+; CHECK-NEXT:    addi 27, 5, 2
 ; CHECK-NEXT:    mulld 12, 8, 5
-; CHECK-NEXT:    mulld 0, 9, 8
-; CHECK-NEXT:    std 29, -24(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    addi 29, 3, 16
+; CHECK-NEXT:    mulld 0, 9, 8
 ; CHECK-NEXT:    sldi 11, 10, 3
-; CHECK-NEXT:    std 22, -80(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 23, -72(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 24, -64(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 25, -56(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 26, -48(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    mulld 30, 8, 30
 ; CHECK-NEXT:    mulld 28, 8, 28
 ; CHECK-NEXT:    mulld 8, 8, 27

diff  --git a/llvm/test/CodeGen/PowerPC/shrink-wrap.ll b/llvm/test/CodeGen/PowerPC/shrink-wrap.ll
index 612c3fdb6b9bf..98fa21c359054 100644
--- a/llvm/test/CodeGen/PowerPC/shrink-wrap.ll
+++ b/llvm/test/CodeGen/PowerPC/shrink-wrap.ll
@@ -28,18 +28,27 @@ entry:
 ; CHECK-LABEL: {{[\.]?}}shrinkwrapme:
 ; CHECK:            # %bb.0:
 ; CHECK-NEXT:         cmpwi
-; CHECKAIX:           blt
-
 ; Prolog code
-; CHECK:            # %bb.1:
 ; CHECK64-COUNT-18:   std
+
 ; CHECK32-COUNT-18:   stw
 
+; CHECK:              ble 0, {{.*}}BB0_3
+; CHECKAIX:           blt 0, {{.*}}BB0_3
+; CHECK:            # %bb.1:
+; CHECK:              li
+; CHECK:            {{.*}}BB0_2:
+; CHECK:              add
+; CHECK:              bdnz {{.*}}BB0_2
+; CHECK-NEXT:         b {{.*}}BB0_4
+; CHECK:            {{.*}}BB0_3:
+; CHECK-NEXT:         li
+; CHECK:            {{.*}}BB0_4:
+
 ; Epilog code
 ; CHECK64-COUNT-18:   ld
+;
 ; CHECK32-COUNT-18:   lwz
-; CHECK:              blr
 
-; CHECK:            {{.*}}BB0_4:
-; CHECK-NEXT:         li
+; CHECK:              blr
 }

diff  --git a/llvm/test/CodeGen/PowerPC/shrink-wrap.mir b/llvm/test/CodeGen/PowerPC/shrink-wrap.mir
index cd0a0d95f827d..f0540adad4937 100644
--- a/llvm/test/CodeGen/PowerPC/shrink-wrap.mir
+++ b/llvm/test/CodeGen/PowerPC/shrink-wrap.mir
@@ -1,4 +1,3 @@
-# NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 # RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple powerpc64le-unknown-linux-gnu \
 # RUN:   -run-pass=shrink-wrap -o - %s | FileCheck %s
 # RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple powerpc-ibm-aix-xcoff \
@@ -123,15 +122,13 @@ body:             |
     BDNZ8 %bb.4, implicit-def dead $ctr8, implicit $ctr8
     B %bb.3
 
-  ; CHECK:      savePoint:       '%bb.2'
-  ; CHECK-NEXT: restorePoint:    '%bb.5'
+  ; CHECK:      savePoint:       ''
+  ; CHECK-NEXT: restorePoint:    ''
 
   ; CHECK:      bb.4.for.body:
-  ; CHECK-NEXT:   successors: %bb.4(0x7c000000), %bb.5(0x04000000)
+  ; CHECK-NEXT:   successors: %bb.4(0x7c000000), %bb.3(0x04000000)
   ; CHECK-NEXT:   liveins: $r4, $x3
   ; CHECK:        INLINEASM
   ; CHECK-NEXT:   BDNZ8 %bb.4
-  ; CHECK-NEXT:   B %bb.5
+  ; CHECK-NEXT:   B %bb.3
 ...
-## NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-# CHECK: {{.*}}

diff  --git a/llvm/test/CodeGen/RISCV/aext-to-sext.ll b/llvm/test/CodeGen/RISCV/aext-to-sext.ll
index 0aa04f40f6a52..806c495fa6777 100644
--- a/llvm/test/CodeGen/RISCV/aext-to-sext.ll
+++ b/llvm/test/CodeGen/RISCV/aext-to-sext.ll
@@ -11,22 +11,21 @@
 define void @quux(i32 signext %arg, i32 signext %arg1) nounwind {
 ; RV64I-LABEL: quux:
 ; RV64I:       # %bb.0: # %bb
-; RV64I-NEXT:    beq a0, a1, .LBB0_4
-; RV64I-NEXT:  # %bb.1: # %bb2.preheader
 ; RV64I-NEXT:    addi sp, sp, -16
 ; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    beq a0, a1, .LBB0_3
+; RV64I-NEXT:  # %bb.1: # %bb2.preheader
 ; RV64I-NEXT:    subw s0, a1, a0
 ; RV64I-NEXT:  .LBB0_2: # %bb2
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV64I-NEXT:    call hoge at plt
 ; RV64I-NEXT:    addiw s0, s0, -1
 ; RV64I-NEXT:    bnez s0, .LBB0_2
-; RV64I-NEXT:  # %bb.3:
+; RV64I-NEXT:  .LBB0_3: # %bb6
 ; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 16
-; RV64I-NEXT:  .LBB0_4: # %bb6
 ; RV64I-NEXT:    ret
 bb:
   %tmp = icmp eq i32 %arg, %arg1

diff  --git a/llvm/test/CodeGen/RISCV/fli-licm.ll b/llvm/test/CodeGen/RISCV/fli-licm.ll
index f37ace801b159..93bb934c1cb0d 100644
--- a/llvm/test/CodeGen/RISCV/fli-licm.ll
+++ b/llvm/test/CodeGen/RISCV/fli-licm.ll
@@ -12,11 +12,11 @@
 define void @process_nodes(ptr %0) nounwind {
 ; RV32-LABEL: process_nodes:
 ; RV32:       # %bb.0: # %entry
-; RV32-NEXT:    beqz a0, .LBB0_4
-; RV32-NEXT:  # %bb.1: # %loop.preheader
 ; RV32-NEXT:    addi sp, sp, -16
 ; RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32-NEXT:    beqz a0, .LBB0_3
+; RV32-NEXT:  # %bb.1: # %loop.preheader
 ; RV32-NEXT:    mv s0, a0
 ; RV32-NEXT:  .LBB0_2: # %loop
 ; RV32-NEXT:    # =>This Inner Loop Header: Depth=1
@@ -25,20 +25,19 @@ define void @process_nodes(ptr %0) nounwind {
 ; RV32-NEXT:    call do_it at plt
 ; RV32-NEXT:    lw s0, 0(s0)
 ; RV32-NEXT:    bnez s0, .LBB0_2
-; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:  .LBB0_3: # %exit
 ; RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    addi sp, sp, 16
-; RV32-NEXT:  .LBB0_4: # %exit
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: process_nodes:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    beqz a0, .LBB0_4
-; RV64-NEXT:  # %bb.1: # %loop.preheader
 ; RV64-NEXT:    addi sp, sp, -16
 ; RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; RV64-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
+; RV64-NEXT:    beqz a0, .LBB0_3
+; RV64-NEXT:  # %bb.1: # %loop.preheader
 ; RV64-NEXT:    mv s0, a0
 ; RV64-NEXT:  .LBB0_2: # %loop
 ; RV64-NEXT:    # =>This Inner Loop Header: Depth=1
@@ -47,11 +46,10 @@ define void @process_nodes(ptr %0) nounwind {
 ; RV64-NEXT:    call do_it at plt
 ; RV64-NEXT:    ld s0, 0(s0)
 ; RV64-NEXT:    bnez s0, .LBB0_2
-; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:  .LBB0_3: # %exit
 ; RV64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64-NEXT:    addi sp, sp, 16
-; RV64-NEXT:  .LBB0_4: # %exit
 ; RV64-NEXT:    ret
 entry:
   %1 = icmp eq ptr %0, null

diff  --git a/llvm/test/CodeGen/RISCV/jumptable.ll b/llvm/test/CodeGen/RISCV/jumptable.ll
index abbedf015064d..4cc17cee230e7 100644
--- a/llvm/test/CodeGen/RISCV/jumptable.ll
+++ b/llvm/test/CodeGen/RISCV/jumptable.ll
@@ -97,7 +97,7 @@ define void @above_threshold(i32 signext %in, ptr %out) nounwind {
 ; RV32I-SMALL-NEXT:    j .LBB1_8
 ; RV32I-SMALL-NEXT:  .LBB1_7: # %bb6
 ; RV32I-SMALL-NEXT:    li a0, 200
-; RV32I-SMALL-NEXT:  .LBB1_8:
+; RV32I-SMALL-NEXT:  .LBB1_8: # %exit
 ; RV32I-SMALL-NEXT:    sw a0, 0(a1)
 ; RV32I-SMALL-NEXT:  .LBB1_9: # %exit
 ; RV32I-SMALL-NEXT:    ret
@@ -132,7 +132,7 @@ define void @above_threshold(i32 signext %in, ptr %out) nounwind {
 ; RV32I-MEDIUM-NEXT:    j .LBB1_8
 ; RV32I-MEDIUM-NEXT:  .LBB1_7: # %bb6
 ; RV32I-MEDIUM-NEXT:    li a0, 200
-; RV32I-MEDIUM-NEXT:  .LBB1_8:
+; RV32I-MEDIUM-NEXT:  .LBB1_8: # %exit
 ; RV32I-MEDIUM-NEXT:    sw a0, 0(a1)
 ; RV32I-MEDIUM-NEXT:  .LBB1_9: # %exit
 ; RV32I-MEDIUM-NEXT:    ret
@@ -168,7 +168,7 @@ define void @above_threshold(i32 signext %in, ptr %out) nounwind {
 ; RV32I-PIC-NEXT:    j .LBB1_8
 ; RV32I-PIC-NEXT:  .LBB1_7: # %bb6
 ; RV32I-PIC-NEXT:    li a0, 200
-; RV32I-PIC-NEXT:  .LBB1_8:
+; RV32I-PIC-NEXT:  .LBB1_8: # %exit
 ; RV32I-PIC-NEXT:    sw a0, 0(a1)
 ; RV32I-PIC-NEXT:  .LBB1_9: # %exit
 ; RV32I-PIC-NEXT:    ret
@@ -202,7 +202,7 @@ define void @above_threshold(i32 signext %in, ptr %out) nounwind {
 ; RV64I-SMALL-NEXT:    j .LBB1_8
 ; RV64I-SMALL-NEXT:  .LBB1_7: # %bb6
 ; RV64I-SMALL-NEXT:    li a0, 200
-; RV64I-SMALL-NEXT:  .LBB1_8:
+; RV64I-SMALL-NEXT:  .LBB1_8: # %exit
 ; RV64I-SMALL-NEXT:    sw a0, 0(a1)
 ; RV64I-SMALL-NEXT:  .LBB1_9: # %exit
 ; RV64I-SMALL-NEXT:    ret
@@ -237,7 +237,7 @@ define void @above_threshold(i32 signext %in, ptr %out) nounwind {
 ; RV64I-MEDIUM-NEXT:    j .LBB1_8
 ; RV64I-MEDIUM-NEXT:  .LBB1_7: # %bb6
 ; RV64I-MEDIUM-NEXT:    li a0, 200
-; RV64I-MEDIUM-NEXT:  .LBB1_8:
+; RV64I-MEDIUM-NEXT:  .LBB1_8: # %exit
 ; RV64I-MEDIUM-NEXT:    sw a0, 0(a1)
 ; RV64I-MEDIUM-NEXT:  .LBB1_9: # %exit
 ; RV64I-MEDIUM-NEXT:    ret
@@ -273,7 +273,7 @@ define void @above_threshold(i32 signext %in, ptr %out) nounwind {
 ; RV64I-PIC-NEXT:    j .LBB1_8
 ; RV64I-PIC-NEXT:  .LBB1_7: # %bb6
 ; RV64I-PIC-NEXT:    li a0, 200
-; RV64I-PIC-NEXT:  .LBB1_8:
+; RV64I-PIC-NEXT:  .LBB1_8: # %exit
 ; RV64I-PIC-NEXT:    sw a0, 0(a1)
 ; RV64I-PIC-NEXT:  .LBB1_9: # %exit
 ; RV64I-PIC-NEXT:    ret

diff  --git a/llvm/test/CodeGen/Thumb/thumb-shrink-wrapping.ll b/llvm/test/CodeGen/Thumb/thumb-shrink-wrapping.ll
index 640615facbd18..07e19dd58fe5a 100644
--- a/llvm/test/CodeGen/Thumb/thumb-shrink-wrapping.ll
+++ b/llvm/test/CodeGen/Thumb/thumb-shrink-wrapping.ll
@@ -1445,15 +1445,15 @@ if.end:
 define i1 @beq_to_bx(ptr %y, i32 %head) {
 ; ENABLE-V4T-LABEL: beq_to_bx:
 ; ENABLE-V4T:       @ %bb.0: @ %entry
-; ENABLE-V4T-NEXT:    movs r2, r0
-; ENABLE-V4T-NEXT:    movs r0, #1
-; ENABLE-V4T-NEXT:    cmp r2, #0
-; ENABLE-V4T-NEXT:    beq LBB11_4
-; ENABLE-V4T-NEXT:  @ %bb.1: @ %if.end
 ; ENABLE-V4T-NEXT:    push {r4, lr}
 ; ENABLE-V4T-NEXT:    .cfi_def_cfa_offset 8
 ; ENABLE-V4T-NEXT:    .cfi_offset lr, -4
 ; ENABLE-V4T-NEXT:    .cfi_offset r4, -8
+; ENABLE-V4T-NEXT:    movs r2, r0
+; ENABLE-V4T-NEXT:    movs r0, #1
+; ENABLE-V4T-NEXT:    cmp r2, #0
+; ENABLE-V4T-NEXT:    beq LBB11_3
+; ENABLE-V4T-NEXT:  @ %bb.1: @ %if.end
 ; ENABLE-V4T-NEXT:    ldr r3, [r2]
 ; ENABLE-V4T-NEXT:    lsls r4, r3, #30
 ; ENABLE-V4T-NEXT:    bpl LBB11_3
@@ -1461,24 +1461,22 @@ define i1 @beq_to_bx(ptr %y, i32 %head) {
 ; ENABLE-V4T-NEXT:    str r1, [r2]
 ; ENABLE-V4T-NEXT:    str r3, [r2]
 ; ENABLE-V4T-NEXT:    movs r0, #0
-; ENABLE-V4T-NEXT:  LBB11_3:
+; ENABLE-V4T-NEXT:  LBB11_3: @ %cleanup
 ; ENABLE-V4T-NEXT:    pop {r4}
 ; ENABLE-V4T-NEXT:    pop {r1}
-; ENABLE-V4T-NEXT:    mov lr, r1
-; ENABLE-V4T-NEXT:  LBB11_4: @ %cleanup
-; ENABLE-V4T-NEXT:    bx lr
+; ENABLE-V4T-NEXT:    bx r1
 ;
 ; ENABLE-V5T-LABEL: beq_to_bx:
 ; ENABLE-V5T:       @ %bb.0: @ %entry
-; ENABLE-V5T-NEXT:    movs r2, r0
-; ENABLE-V5T-NEXT:    movs r0, #1
-; ENABLE-V5T-NEXT:    cmp r2, #0
-; ENABLE-V5T-NEXT:    beq LBB11_4
-; ENABLE-V5T-NEXT:  @ %bb.1: @ %if.end
 ; ENABLE-V5T-NEXT:    push {r4, lr}
 ; ENABLE-V5T-NEXT:    .cfi_def_cfa_offset 8
 ; ENABLE-V5T-NEXT:    .cfi_offset lr, -4
 ; ENABLE-V5T-NEXT:    .cfi_offset r4, -8
+; ENABLE-V5T-NEXT:    movs r2, r0
+; ENABLE-V5T-NEXT:    movs r0, #1
+; ENABLE-V5T-NEXT:    cmp r2, #0
+; ENABLE-V5T-NEXT:    beq LBB11_3
+; ENABLE-V5T-NEXT:  @ %bb.1: @ %if.end
 ; ENABLE-V5T-NEXT:    ldr r3, [r2]
 ; ENABLE-V5T-NEXT:    lsls r4, r3, #30
 ; ENABLE-V5T-NEXT:    bpl LBB11_3
@@ -1486,10 +1484,8 @@ define i1 @beq_to_bx(ptr %y, i32 %head) {
 ; ENABLE-V5T-NEXT:    str r1, [r2]
 ; ENABLE-V5T-NEXT:    str r3, [r2]
 ; ENABLE-V5T-NEXT:    movs r0, #0
-; ENABLE-V5T-NEXT:  LBB11_3:
+; ENABLE-V5T-NEXT:  LBB11_3: @ %cleanup
 ; ENABLE-V5T-NEXT:    pop {r4, pc}
-; ENABLE-V5T-NEXT:  LBB11_4: @ %cleanup
-; ENABLE-V5T-NEXT:    bx lr
 ;
 ; DISABLE-V4T-LABEL: beq_to_bx:
 ; DISABLE-V4T:       @ %bb.0: @ %entry

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inlineasm.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inlineasm.ll
index 421b5b5364d35..d67e66d7a7131 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inlineasm.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inlineasm.ll
@@ -4,13 +4,11 @@
 define i32 @test(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) {
 ; CHECK-LABEL: test:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    cmp r2, #1
-; CHECK-NEXT:    itt lt
-; CHECK-NEXT:    movlt r0, #0
-; CHECK-NEXT:    bxlt lr
-; CHECK-NEXT:  .LBB0_1: @ %for.body.preheader
 ; CHECK-NEXT:    .save {r7, lr}
 ; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    cmp r2, #1
+; CHECK-NEXT:    blt .LBB0_4
+; CHECK-NEXT:  @ %bb.1: @ %for.body.preheader
 ; CHECK-NEXT:    mov lr, r0
 ; CHECK-NEXT:    movs r0, #0
 ; CHECK-NEXT:  .LBB0_2: @ %for.body
@@ -23,7 +21,10 @@ define i32 @test(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) {
 ; CHECK-NEXT:    @NO_APP
 ; CHECK-NEXT:    add r0, r3
 ; CHECK-NEXT:    bne .LBB0_2
-; CHECK-NEXT:  @ %bb.3:
+; CHECK-NEXT:  @ %bb.3: @ %for.cond.cleanup
+; CHECK-NEXT:    pop {r7, pc}
+; CHECK-NEXT:  .LBB0_4:
+; CHECK-NEXT:    movs r0, #0
 ; CHECK-NEXT:    pop {r7, pc}
 entry:
   %cmp9 = icmp sgt i32 %n, 0
@@ -50,13 +51,11 @@ for.body:                                         ; preds = %entry, %for.body
 define i32 @testlr(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) {
 ; CHECK-LABEL: testlr:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    cmp r2, #1
-; CHECK-NEXT:    itt lt
-; CHECK-NEXT:    movlt r0, #0
-; CHECK-NEXT:    bxlt lr
-; CHECK-NEXT:  .LBB1_1: @ %for.body.preheader
 ; CHECK-NEXT:    .save {r4, lr}
 ; CHECK-NEXT:    push {r4, lr}
+; CHECK-NEXT:    cmp r2, #1
+; CHECK-NEXT:    blt .LBB1_4
+; CHECK-NEXT:  @ %bb.1: @ %for.body.preheader
 ; CHECK-NEXT:    mov r3, r0
 ; CHECK-NEXT:    movs r0, #0
 ; CHECK-NEXT:  .LBB1_2: @ %for.body
@@ -69,7 +68,10 @@ define i32 @testlr(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n)
 ; CHECK-NEXT:    @NO_APP
 ; CHECK-NEXT:    add r0, r4
 ; CHECK-NEXT:    bne .LBB1_2
-; CHECK-NEXT:  @ %bb.3:
+; CHECK-NEXT:  @ %bb.3: @ %for.cond.cleanup
+; CHECK-NEXT:    pop {r4, pc}
+; CHECK-NEXT:  .LBB1_4:
+; CHECK-NEXT:    movs r0, #0
 ; CHECK-NEXT:    pop {r4, pc}
 entry:
   %cmp9 = icmp sgt i32 %n, 0

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll
index 59b32a3f441c1..99d169e63e5a5 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll
@@ -4,12 +4,11 @@
 define void @test_memcpy(ptr nocapture %x, ptr nocapture readonly %y, i32 %n, i32 %m) {
 ; CHECK-LABEL: test_memcpy:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    cmp r2, #1
-; CHECK-NEXT:    it lt
-; CHECK-NEXT:    bxlt lr
-; CHECK-NEXT:  .LBB0_1: @ %for.body.preheader
 ; CHECK-NEXT:    .save {r4, r5, r6, r7, lr}
 ; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    cmp r2, #1
+; CHECK-NEXT:    blt .LBB0_5
+; CHECK-NEXT:  @ %bb.1: @ %for.body.preheader
 ; CHECK-NEXT:    lsl.w r12, r3, #2
 ; CHECK-NEXT:    movs r7, #0
 ; CHECK-NEXT:    b .LBB0_2
@@ -32,9 +31,8 @@ define void @test_memcpy(ptr nocapture %x, ptr nocapture readonly %y, i32 %n, i3
 ; CHECK-NEXT:    vstrb.8 q0, [r5], #16
 ; CHECK-NEXT:    letp lr, .LBB0_4
 ; CHECK-NEXT:    b .LBB0_3
-; CHECK-NEXT:  .LBB0_5:
-; CHECK-NEXT:    pop.w {r4, r5, r6, r7, lr}
-; CHECK-NEXT:    bx lr
+; CHECK-NEXT:  .LBB0_5: @ %for.cond.cleanup
+; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
 entry:
   %cmp8 = icmp sgt i32 %n, 0
   br i1 %cmp8, label %for.body, label %for.cond.cleanup
@@ -57,12 +55,12 @@ for.body:                                         ; preds = %entry, %for.body
 define void @test_memset(ptr nocapture %x, i32 %n, i32 %m) {
 ; CHECK-LABEL: test_memset:
 ; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r7, lr}
+; CHECK-NEXT:    push {r7, lr}
 ; CHECK-NEXT:    cmp r1, #1
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    bxlt lr
+; CHECK-NEXT:    poplt {r7, pc}
 ; CHECK-NEXT:  .LBB1_1:
-; CHECK-NEXT:    .save {r7, lr}
-; CHECK-NEXT:    push {r7, lr}
 ; CHECK-NEXT:    vmov.i32 q0, #0x0
 ; CHECK-NEXT:    b .LBB1_2
 ; CHECK-NEXT:  .LBB1_2: @ %for.body
@@ -82,9 +80,8 @@ define void @test_memset(ptr nocapture %x, i32 %n, i32 %m) {
 ; CHECK-NEXT:    vstrb.8 q0, [r12], #16
 ; CHECK-NEXT:    letp lr, .LBB1_4
 ; CHECK-NEXT:    b .LBB1_3
-; CHECK-NEXT:  .LBB1_5:
-; CHECK-NEXT:    pop.w {r7, lr}
-; CHECK-NEXT:    bx lr
+; CHECK-NEXT:  .LBB1_5: @ %for.cond.cleanup
+; CHECK-NEXT:    pop {r7, pc}
 entry:
   %cmp5 = icmp sgt i32 %n, 0
   br i1 %cmp5, label %for.body, label %for.cond.cleanup
@@ -105,14 +102,13 @@ for.body:                                         ; preds = %entry, %for.body
 define void @test_memmove(ptr nocapture %x, ptr nocapture readonly %y, i32 %n, i32 %m) {
 ; CHECK-LABEL: test_memmove:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    cmp r2, #1
-; CHECK-NEXT:    it lt
-; CHECK-NEXT:    bxlt lr
-; CHECK-NEXT:  .LBB2_1: @ %for.body.preheader
 ; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, lr}
 ; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, lr}
 ; CHECK-NEXT:    .pad #4
 ; CHECK-NEXT:    sub sp, #4
+; CHECK-NEXT:    cmp r2, #1
+; CHECK-NEXT:    blt .LBB2_3
+; CHECK-NEXT:  @ %bb.1: @ %for.body.preheader
 ; CHECK-NEXT:    mov r8, r3
 ; CHECK-NEXT:    mov r5, r2
 ; CHECK-NEXT:    mov r9, r1
@@ -128,10 +124,9 @@ define void @test_memmove(ptr nocapture %x, ptr nocapture readonly %y, i32 %n, i
 ; CHECK-NEXT:    add r6, r4
 ; CHECK-NEXT:    subs r5, #1
 ; CHECK-NEXT:    bne .LBB2_2
-; CHECK-NEXT:  @ %bb.3:
+; CHECK-NEXT:  .LBB2_3: @ %for.cond.cleanup
 ; CHECK-NEXT:    add sp, #4
-; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, lr}
-; CHECK-NEXT:    bx lr
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, pc}
 entry:
   %cmp8 = icmp sgt i32 %n, 0
   br i1 %cmp8, label %for.body, label %for.cond.cleanup

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll
index 23eb5900bb7d1..13e39a8f16e33 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll
@@ -4,11 +4,10 @@
 define arm_aapcs_vfpcc void @float_float_mul(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture %c, i32 %N) {
 ; CHECK-LABEL: float_float_mul:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    it eq
-; CHECK-NEXT:    bxeq lr
-; CHECK-NEXT:  .LBB0_1: @ %for.body.preheader
 ; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    beq .LBB0_10
+; CHECK-NEXT:  @ %bb.1: @ %for.body.preheader
 ; CHECK-NEXT:    cmp r3, #3
 ; CHECK-NEXT:    bhi .LBB0_3
 ; CHECK-NEXT:  @ %bb.2:
@@ -81,9 +80,8 @@ define arm_aapcs_vfpcc void @float_float_mul(ptr nocapture readonly %a, ptr noca
 ; CHECK-NEXT:    vmul.f32 s0, s2, s0
 ; CHECK-NEXT:    vstr s0, [r5, #12]
 ; CHECK-NEXT:    bne .LBB0_9
-; CHECK-NEXT:  .LBB0_10:
-; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, lr}
-; CHECK-NEXT:    bx lr
+; CHECK-NEXT:  .LBB0_10: @ %for.cond.cleanup
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, pc}
 ; CHECK-NEXT:  .LBB0_11: @ %vector.ph
 ; CHECK-NEXT:    bic r12, r3, #3
 ; CHECK-NEXT:    movs r6, #1
@@ -217,11 +215,10 @@ for.body:                                         ; preds = %for.body.prol.loope
 define arm_aapcs_vfpcc void @float_float_add(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture %c, i32 %N) {
 ; CHECK-LABEL: float_float_add:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    it eq
-; CHECK-NEXT:    bxeq lr
-; CHECK-NEXT:  .LBB1_1: @ %for.body.preheader
 ; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    beq .LBB1_10
+; CHECK-NEXT:  @ %bb.1: @ %for.body.preheader
 ; CHECK-NEXT:    cmp r3, #3
 ; CHECK-NEXT:    bhi .LBB1_3
 ; CHECK-NEXT:  @ %bb.2:
@@ -294,9 +291,8 @@ define arm_aapcs_vfpcc void @float_float_add(ptr nocapture readonly %a, ptr noca
 ; CHECK-NEXT:    vadd.f32 s0, s2, s0
 ; CHECK-NEXT:    vstr s0, [r5, #12]
 ; CHECK-NEXT:    bne .LBB1_9
-; CHECK-NEXT:  .LBB1_10:
-; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, lr}
-; CHECK-NEXT:    bx lr
+; CHECK-NEXT:  .LBB1_10: @ %for.cond.cleanup
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, pc}
 ; CHECK-NEXT:  .LBB1_11: @ %vector.ph
 ; CHECK-NEXT:    bic r12, r3, #3
 ; CHECK-NEXT:    movs r6, #1
@@ -430,11 +426,10 @@ for.body:                                         ; preds = %for.body.prol.loope
 define arm_aapcs_vfpcc void @float_float_sub(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture %c, i32 %N) {
 ; CHECK-LABEL: float_float_sub:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    it eq
-; CHECK-NEXT:    bxeq lr
-; CHECK-NEXT:  .LBB2_1: @ %for.body.preheader
 ; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    beq .LBB2_10
+; CHECK-NEXT:  @ %bb.1: @ %for.body.preheader
 ; CHECK-NEXT:    cmp r3, #3
 ; CHECK-NEXT:    bhi .LBB2_3
 ; CHECK-NEXT:  @ %bb.2:
@@ -507,9 +502,8 @@ define arm_aapcs_vfpcc void @float_float_sub(ptr nocapture readonly %a, ptr noca
 ; CHECK-NEXT:    vsub.f32 s0, s2, s0
 ; CHECK-NEXT:    vstr s0, [r5, #12]
 ; CHECK-NEXT:    bne .LBB2_9
-; CHECK-NEXT:  .LBB2_10:
-; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, lr}
-; CHECK-NEXT:    bx lr
+; CHECK-NEXT:  .LBB2_10: @ %for.cond.cleanup
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, pc}
 ; CHECK-NEXT:  .LBB2_11: @ %vector.ph
 ; CHECK-NEXT:    bic r12, r3, #3
 ; CHECK-NEXT:    movs r6, #1
@@ -643,11 +637,10 @@ for.body:                                         ; preds = %for.body.prol.loope
 define arm_aapcs_vfpcc void @float_int_mul(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture %c, i32 %N) {
 ; CHECK-LABEL: float_int_mul:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    it eq
-; CHECK-NEXT:    bxeq lr
-; CHECK-NEXT:  .LBB3_1: @ %for.body.preheader
 ; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    beq.w .LBB3_13
+; CHECK-NEXT:  @ %bb.1: @ %for.body.preheader
 ; CHECK-NEXT:    cmp r3, #3
 ; CHECK-NEXT:    bls .LBB3_6
 ; CHECK-NEXT:  @ %bb.2: @ %vector.memcheck
@@ -736,9 +729,8 @@ define arm_aapcs_vfpcc void @float_int_mul(ptr nocapture readonly %a, ptr nocapt
 ; CHECK-NEXT:    vmul.f32 s0, s2, s0
 ; CHECK-NEXT:    vstr s0, [r6, #12]
 ; CHECK-NEXT:    bne .LBB3_12
-; CHECK-NEXT:  .LBB3_13:
-; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, lr}
-; CHECK-NEXT:    bx lr
+; CHECK-NEXT:  .LBB3_13: @ %for.cond.cleanup
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, pc}
 entry:
   %cmp8 = icmp eq i32 %N, 0
   br i1 %cmp8, label %for.cond.cleanup, label %for.body.preheader

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll
index 93119eac2d564..eb98b85eafc90 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll
@@ -411,12 +411,10 @@ for.cond.cleanup:                                 ; preds = %middle.block, %entr
 define dso_local arm_aapcs_vfpcc i32 @two_loops_mul_add_v4i32(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %N) local_unnamed_addr {
 ; CHECK-LABEL: two_loops_mul_add_v4i32:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    itt eq
-; CHECK-NEXT:    moveq r0, #0
-; CHECK-NEXT:    bxeq lr
-; CHECK-NEXT:  .LBB6_1: @ %vector.ph
 ; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    beq .LBB6_8
+; CHECK-NEXT:  @ %bb.1: @ %vector.ph
 ; CHECK-NEXT:    adds r3, r2, #3
 ; CHECK-NEXT:    vmov.i32 q1, #0x0
 ; CHECK-NEXT:    bic r3, r3, #3
@@ -463,10 +461,12 @@ define dso_local arm_aapcs_vfpcc i32 @two_loops_mul_add_v4i32(i8* nocapture read
 ; CHECK-NEXT:  @ %bb.6: @ %middle.block44
 ; CHECK-NEXT:    vpsel q0, q0, q1
 ; CHECK-NEXT:    vaddv.u32 r12, q0
-; CHECK-NEXT:  .LBB6_7:
-; CHECK-NEXT:    pop.w {r4, r5, r6, r7, lr}
+; CHECK-NEXT:  .LBB6_7: @ %for.cond.cleanup7
 ; CHECK-NEXT:    mov r0, r12
-; CHECK-NEXT:    bx lr
+; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
+; CHECK-NEXT:  .LBB6_8:
+; CHECK-NEXT:    movs r0, #0
+; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
 entry:
   %cmp35 = icmp eq i32 %N, 0
   br i1 %cmp35, label %for.cond.cleanup7, label %vector.ph

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/sibling-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/sibling-loops.ll
index 1f3a43923db61..caf7a339805fc 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/sibling-loops.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/sibling-loops.ll
@@ -4,11 +4,10 @@
 define arm_aapcs_vfpcc void @test(ptr noalias nocapture readonly %off, ptr noalias nocapture %data, ptr noalias nocapture %dst, i32 %n) {
 ; CHECK-LABEL: test:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    cmp r3, #1
-; CHECK-NEXT:    it lt
-; CHECK-NEXT:    bxlt lr
-; CHECK-NEXT:  .LBB0_1: @ %for.cond1.preheader.us.preheader
 ; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT:    cmp r3, #1
+; CHECK-NEXT:    blt .LBB0_7
+; CHECK-NEXT:  @ %bb.1: @ %for.cond1.preheader.us.preheader
 ; CHECK-NEXT:    mov r8, r3
 ; CHECK-NEXT:    lsl.w r12, r3, #1
 ; CHECK-NEXT:    movs r3, #0
@@ -48,9 +47,8 @@ define arm_aapcs_vfpcc void @test(ptr noalias nocapture readonly %off, ptr noali
 ; CHECK-NEXT:    add r4, r12
 ; CHECK-NEXT:    cmp r3, r8
 ; CHECK-NEXT:    bne .LBB0_2
-; CHECK-NEXT:  @ %bb.7:
-; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, lr}
-; CHECK-NEXT:    bx lr
+; CHECK-NEXT:  .LBB0_7: @ %for.cond.cleanup
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, pc}
 entry:
   %cmp252 = icmp sgt i32 %n, 0
   br i1 %cmp252, label %for.cond1.preheader.us, label %for.cond.cleanup

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll
index 3b42ee36e7c2e..fc58873f9857b 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll
@@ -53,12 +53,10 @@ if.end:                                           ; preds = %do.body, %entry
 define void @nested(ptr nocapture readonly %x, ptr nocapture readnone %y, ptr nocapture %z, i32 %m, i32 %n) {
 ; CHECK-LABEL: nested:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    it eq
-; CHECK-NEXT:    bxeq lr
-; CHECK-NEXT:  .LBB1_1: @ %for.body.preheader
 ; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, lr}
 ; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT:    cbz r3, .LBB1_8
+; CHECK-NEXT:  @ %bb.1: @ %for.body.preheader
 ; CHECK-NEXT:    ldr.w r12, [sp, #24]
 ; CHECK-NEXT:    movs r1, #0
 ; CHECK-NEXT:    b .LBB1_4
@@ -93,9 +91,8 @@ define void @nested(ptr nocapture readonly %x, ptr nocapture readnone %y, ptr no
 ; CHECK-NEXT:    sub.w r12, r12, r5
 ; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    b .LBB1_3
-; CHECK-NEXT:  .LBB1_8:
-; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, lr}
-; CHECK-NEXT:    bx lr
+; CHECK-NEXT:  .LBB1_8: @ %for.cond.cleanup
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, pc}
 entry:
   %cmp20.not = icmp eq i32 %m, 0
   br i1 %cmp20.not, label %for.cond.cleanup, label %for.body

diff  --git a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
index b7b19a477ab0f..6228d616b5842 100644
--- a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
@@ -981,13 +981,6 @@ if.end61:                                         ; preds = %if.then59, %while.e
 define void @fir(ptr nocapture readonly %S, ptr nocapture readonly %pSrc, ptr nocapture %pDst, i32 %blockSize) {
 ; CHECK-LABEL: fir:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    cmp r3, #8
-; CHECK-NEXT:    blo.w .LBB16_13
-; CHECK-NEXT:  @ %bb.1: @ %if.then
-; CHECK-NEXT:    lsrs.w r12, r3, #2
-; CHECK-NEXT:    it eq
-; CHECK-NEXT:    bxeq lr
-; CHECK-NEXT:  .LBB16_2: @ %while.body.lr.ph
 ; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; CHECK-NEXT:    .pad #4
@@ -996,6 +989,12 @@ define void @fir(ptr nocapture readonly %S, ptr nocapture readonly %pSrc, ptr no
 ; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
 ; CHECK-NEXT:    .pad #32
 ; CHECK-NEXT:    sub sp, #32
+; CHECK-NEXT:    cmp r3, #8
+; CHECK-NEXT:    blo.w .LBB16_12
+; CHECK-NEXT:  @ %bb.1: @ %if.then
+; CHECK-NEXT:    lsrs.w r12, r3, #2
+; CHECK-NEXT:    beq.w .LBB16_12
+; CHECK-NEXT:  @ %bb.2: @ %while.body.lr.ph
 ; CHECK-NEXT:    ldrh r6, [r0]
 ; CHECK-NEXT:    movs r5, #1
 ; CHECK-NEXT:    ldrd r4, r10, [r0, #4]
@@ -1107,13 +1106,11 @@ define void @fir(ptr nocapture readonly %S, ptr nocapture readonly %pSrc, ptr no
 ; CHECK-NEXT:    ldr r0, [sp, #20] @ 4-byte Reload
 ; CHECK-NEXT:    add.w r4, r4, r0, lsl #2
 ; CHECK-NEXT:    b .LBB16_4
-; CHECK-NEXT:  .LBB16_12:
+; CHECK-NEXT:  .LBB16_12: @ %if.end
 ; CHECK-NEXT:    add sp, #32
 ; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
 ; CHECK-NEXT:    add sp, #4
-; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; CHECK-NEXT:  .LBB16_13: @ %if.end
-; CHECK-NEXT:    bx lr
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 entry:
   %pState1 = getelementptr inbounds %struct.arm_fir_instance_f32, ptr %S, i32 0, i32 1
   %i = load ptr, ptr %pState1, align 4

diff  --git a/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll b/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll
index 0335d24c0a782..24f1831a3f07c 100644
--- a/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll
@@ -290,12 +290,12 @@ end:
 define arm_aapcs_vfpcc void @gather_inc_v4i32_simple(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n) {
 ; CHECK-LABEL: gather_inc_v4i32_simple:
 ; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r4, lr}
+; CHECK-NEXT:    push {r4, lr}
 ; CHECK-NEXT:    cmp r2, #1
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    bxlt lr
+; CHECK-NEXT:    poplt {r4, pc}
 ; CHECK-NEXT:  .LBB8_1: @ %vector.ph.preheader
-; CHECK-NEXT:    .save {r4, lr}
-; CHECK-NEXT:    push {r4, lr}
 ; CHECK-NEXT:    bic r12, r2, #3
 ; CHECK-NEXT:    movs r3, #1
 ; CHECK-NEXT:    sub.w lr, r12, #4
@@ -319,9 +319,8 @@ define arm_aapcs_vfpcc void @gather_inc_v4i32_simple(ptr noalias nocapture reado
 ; CHECK-NEXT:    @ in Loop: Header=BB8_2 Depth=1
 ; CHECK-NEXT:    cmp r12, r2
 ; CHECK-NEXT:    bne .LBB8_2
-; CHECK-NEXT:  @ %bb.5:
-; CHECK-NEXT:    pop.w {r4, lr}
-; CHECK-NEXT:    bx lr
+; CHECK-NEXT:  @ %bb.5: @ %for.cond.cleanup
+; CHECK-NEXT:    pop {r4, pc}
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  @ %bb.6:
 ; CHECK-NEXT:  .LCPI8_0:
@@ -360,14 +359,13 @@ for.cond.cleanup:                                 ; preds = %for.body, %middle.b
 define arm_aapcs_vfpcc void @gather_inc_v4i32_complex(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n) {
 ; CHECK-LABEL: gather_inc_v4i32_complex:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    cmp r2, #1
-; CHECK-NEXT:    it lt
-; CHECK-NEXT:    bxlt lr
-; CHECK-NEXT:  .LBB9_1: @ %vector.ph.preheader
 ; CHECK-NEXT:    .save {r4, r5, r7, lr}
 ; CHECK-NEXT:    push {r4, r5, r7, lr}
 ; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
 ; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK-NEXT:    cmp r2, #1
+; CHECK-NEXT:    blt .LBB9_5
+; CHECK-NEXT:  @ %bb.1: @ %vector.ph.preheader
 ; CHECK-NEXT:    bic r12, r2, #3
 ; CHECK-NEXT:    movs r3, #1
 ; CHECK-NEXT:    sub.w lr, r12, #4
@@ -403,10 +401,9 @@ define arm_aapcs_vfpcc void @gather_inc_v4i32_complex(ptr noalias nocapture read
 ; CHECK-NEXT:    @ in Loop: Header=BB9_2 Depth=1
 ; CHECK-NEXT:    cmp r12, r2
 ; CHECK-NEXT:    bne .LBB9_2
-; CHECK-NEXT:  @ %bb.5:
+; CHECK-NEXT:  .LBB9_5: @ %for.cond.cleanup
 ; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; CHECK-NEXT:    pop.w {r4, r5, r7, lr}
-; CHECK-NEXT:    bx lr
+; CHECK-NEXT:    pop {r4, r5, r7, pc}
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  @ %bb.6:
 ; CHECK-NEXT:  .LCPI9_0:
@@ -464,12 +461,12 @@ for.cond.cleanup:                                 ; preds = %for.body, %middle.b
 define arm_aapcs_vfpcc void @gather_inc_v4i32_large(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n) {
 ; CHECK-LABEL: gather_inc_v4i32_large:
 ; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r4, lr}
+; CHECK-NEXT:    push {r4, lr}
 ; CHECK-NEXT:    cmp r2, #1
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    bxlt lr
+; CHECK-NEXT:    poplt {r4, pc}
 ; CHECK-NEXT:  .LBB10_1: @ %vector.ph.preheader
-; CHECK-NEXT:    .save {r4, lr}
-; CHECK-NEXT:    push {r4, lr}
 ; CHECK-NEXT:    bic r12, r2, #3
 ; CHECK-NEXT:    movs r3, #1
 ; CHECK-NEXT:    sub.w lr, r12, #4
@@ -493,9 +490,8 @@ define arm_aapcs_vfpcc void @gather_inc_v4i32_large(ptr noalias nocapture readon
 ; CHECK-NEXT:    @ in Loop: Header=BB10_2 Depth=1
 ; CHECK-NEXT:    cmp r12, r2
 ; CHECK-NEXT:    bne .LBB10_2
-; CHECK-NEXT:  @ %bb.5:
-; CHECK-NEXT:    pop.w {r4, lr}
-; CHECK-NEXT:    bx lr
+; CHECK-NEXT:  @ %bb.5: @ %for.cond.cleanup
+; CHECK-NEXT:    pop {r4, pc}
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  @ %bb.6:
 ; CHECK-NEXT:  .LCPI10_0:

diff  --git a/llvm/test/CodeGen/Thumb2/mve-gather-tailpred.ll b/llvm/test/CodeGen/Thumb2/mve-gather-tailpred.ll
index ea186cd6ed2d4..9093b9af00656 100644
--- a/llvm/test/CodeGen/Thumb2/mve-gather-tailpred.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-gather-tailpred.ll
@@ -4,12 +4,12 @@
 define arm_aapcs_vfpcc void @gather_inc_v4i32_simple(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n) {
 ; CHECK-LABEL: gather_inc_v4i32_simple:
 ; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r4, lr}
+; CHECK-NEXT:    push {r4, lr}
 ; CHECK-NEXT:    cmp r2, #1
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    bxlt lr
+; CHECK-NEXT:    poplt {r4, pc}
 ; CHECK-NEXT:  .LBB0_1: @ %vector.ph.preheader
-; CHECK-NEXT:    .save {r4, lr}
-; CHECK-NEXT:    push {r4, lr}
 ; CHECK-NEXT:    bic r12, r2, #3
 ; CHECK-NEXT:    movs r3, #1
 ; CHECK-NEXT:    sub.w lr, r12, #4
@@ -33,9 +33,8 @@ define arm_aapcs_vfpcc void @gather_inc_v4i32_simple(ptr noalias nocapture reado
 ; CHECK-NEXT:    @ in Loop: Header=BB0_2 Depth=1
 ; CHECK-NEXT:    cmp r12, r2
 ; CHECK-NEXT:    bne .LBB0_2
-; CHECK-NEXT:  @ %bb.5:
-; CHECK-NEXT:    pop.w {r4, lr}
-; CHECK-NEXT:    bx lr
+; CHECK-NEXT:  @ %bb.5: @ %for.cond.cleanup
+; CHECK-NEXT:    pop {r4, pc}
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  @ %bb.6:
 ; CHECK-NEXT:  .LCPI0_0:

diff  --git a/llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll b/llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll
index da59cb259db61..5f3a12711dc0f 100644
--- a/llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll
@@ -211,12 +211,12 @@ entry:
 define void @test11(ptr nocapture %x, ptr nocapture %y, i32 %n) {
 ; CHECK-LABEL: test11:
 ; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r4, lr}
+; CHECK-NEXT:    push {r4, lr}
 ; CHECK-NEXT:    cmp.w r2, #-1
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    bxgt lr
+; CHECK-NEXT:    popgt {r4, pc}
 ; CHECK-NEXT:  .LBB10_1: @ %prehead
-; CHECK-NEXT:    .save {r4, lr}
-; CHECK-NEXT:    push {r4, lr}
 ; CHECK-NEXT:    mov r12, r1
 ; CHECK-NEXT:    mov r4, r0
 ; CHECK-NEXT:    wlstp.8 lr, r2, .LBB10_3
@@ -230,9 +230,8 @@ define void @test11(ptr nocapture %x, ptr nocapture %y, i32 %n) {
 ; CHECK-NEXT:    subs r2, #2
 ; CHECK-NEXT:    strb r3, [r1], #1
 ; CHECK-NEXT:    bne .LBB10_3
-; CHECK-NEXT:  @ %bb.4:
-; CHECK-NEXT:    pop.w {r4, lr}
-; CHECK-NEXT:    bx lr
+; CHECK-NEXT:  @ %bb.4: @ %for.cond.cleanup
+; CHECK-NEXT:    pop {r4, pc}
 entry:
   %cmp6 = icmp slt i32 %n, 0
   br i1 %cmp6, label %prehead, label %for.cond.cleanup
@@ -441,12 +440,12 @@ declare void @other()
 define void @multilooped_exit(i32 %b) {
 ; CHECK-LABEL: multilooped_exit:
 ; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r4, lr}
+; CHECK-NEXT:    push {r4, lr}
 ; CHECK-NEXT:    cmp r0, #1
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    bxlt lr
+; CHECK-NEXT:    poplt {r4, pc}
 ; CHECK-NEXT:  .LBB18_1: @ %loop.preheader
-; CHECK-NEXT:    .save {r4, lr}
-; CHECK-NEXT:    push {r4, lr}
 ; CHECK-NEXT:    mov.w r4, #-1
 ; CHECK-NEXT:    vmov.i32 q0, #0x0
 ; CHECK-NEXT:    b .LBB18_3
@@ -499,9 +498,8 @@ define void @multilooped_exit(i32 %b) {
 ; CHECK-NEXT:    vstrb.8 q0, [r3], #16
 ; CHECK-NEXT:    letp lr, .LBB18_11
 ; CHECK-NEXT:    b .LBB18_2
-; CHECK-NEXT:  .LBB18_12:
-; CHECK-NEXT:    pop.w {r4, lr}
-; CHECK-NEXT:    bx lr
+; CHECK-NEXT:  .LBB18_12: @ %exit
+; CHECK-NEXT:    pop {r4, pc}
 entry:
   %cmp8 = icmp sgt i32 %b, 0
   br i1 %cmp8, label %loop, label %exit

diff  --git a/llvm/test/CodeGen/Thumb2/mve-scatter-increment.ll b/llvm/test/CodeGen/Thumb2/mve-scatter-increment.ll
index 3a14e650bd53a..94397f0ae587b 100644
--- a/llvm/test/CodeGen/Thumb2/mve-scatter-increment.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-scatter-increment.ll
@@ -127,16 +127,15 @@ define arm_aapcs_vfpcc void @scatter_inc_mini_16i8(<16 x i8> %data, ptr %dst, <1
 define arm_aapcs_vfpcc void @scatter_inc_v4i32_complex(<4 x i32> %data1, <4 x i32> %data2, <4 x i32> %data3, ptr %dst, i32 %n) {
 ; CHECK-LABEL: scatter_inc_v4i32_complex:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    cmp r1, #1
-; CHECK-NEXT:    it lt
-; CHECK-NEXT:    bxlt lr
-; CHECK-NEXT:  .LBB3_1: @ %vector.ph.preheader
 ; CHECK-NEXT:    .save {r4, lr}
 ; CHECK-NEXT:    push {r4, lr}
 ; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
 ; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
 ; CHECK-NEXT:    .pad #16
 ; CHECK-NEXT:    sub sp, #16
+; CHECK-NEXT:    cmp r1, #1
+; CHECK-NEXT:    blt .LBB3_5
+; CHECK-NEXT:  @ %bb.1: @ %vector.ph.preheader
 ; CHECK-NEXT:    adr r4, .LCPI3_2
 ; CHECK-NEXT:    bic r2, r1, #3
 ; CHECK-NEXT:    vldrw.u32 q3, [r4]
@@ -169,11 +168,10 @@ define arm_aapcs_vfpcc void @scatter_inc_v4i32_complex(<4 x i32> %data1, <4 x i3
 ; CHECK-NEXT:    @ in Loop: Header=BB3_2 Depth=1
 ; CHECK-NEXT:    cmp r2, r1
 ; CHECK-NEXT:    bne .LBB3_2
-; CHECK-NEXT:  @ %bb.5:
+; CHECK-NEXT:  .LBB3_5: @ %for.cond.cleanup
 ; CHECK-NEXT:    add sp, #16
 ; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; CHECK-NEXT:    pop.w {r4, lr}
-; CHECK-NEXT:    bx lr
+; CHECK-NEXT:    pop {r4, pc}
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  @ %bb.6:
 ; CHECK-NEXT:  .LCPI3_0:

diff  --git a/llvm/test/CodeGen/Thumb2/mve-tailpred-nonzerostart.ll b/llvm/test/CodeGen/Thumb2/mve-tailpred-nonzerostart.ll
index 42a00b61b4183..85425db1eb6c8 100644
--- a/llvm/test/CodeGen/Thumb2/mve-tailpred-nonzerostart.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-tailpred-nonzerostart.ll
@@ -58,12 +58,11 @@ for.cond.cleanup:                                 ; preds = %vector.body, %entry
 define arm_aapcs_vfpcc void @start11(ptr nocapture readonly %x, ptr nocapture readonly %y, ptr noalias nocapture %z, float %a, i32 %n) {
 ; CHECK-LABEL: start11:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    cmp r3, #1
-; CHECK-NEXT:    it lt
-; CHECK-NEXT:    bxlt lr
-; CHECK-NEXT:  .LBB1_1: @ %vector.ph
 ; CHECK-NEXT:    .save {r4, r5, r7, lr}
 ; CHECK-NEXT:    push {r4, r5, r7, lr}
+; CHECK-NEXT:    cmp r3, #1
+; CHECK-NEXT:    blt .LBB1_3
+; CHECK-NEXT:  @ %bb.1: @ %vector.ph
 ; CHECK-NEXT:    vmov r12, s0
 ; CHECK-NEXT:    adds r4, r3, #3
 ; CHECK-NEXT:    adr r5, .LCPI1_0
@@ -86,9 +85,8 @@ define arm_aapcs_vfpcc void @start11(ptr nocapture readonly %x, ptr nocapture re
 ; CHECK-NEXT:    vpst
 ; CHECK-NEXT:    vstrwt.32 q3, [r2], #16
 ; CHECK-NEXT:    bne .LBB1_2
-; CHECK-NEXT:  @ %bb.3:
-; CHECK-NEXT:    pop.w {r4, r5, r7, lr}
-; CHECK-NEXT:    bx lr
+; CHECK-NEXT:  .LBB1_3: @ %for.cond.cleanup
+; CHECK-NEXT:    pop {r4, r5, r7, pc}
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  @ %bb.4:
 ; CHECK-NEXT:  .LCPI1_0:

diff  --git a/llvm/test/CodeGen/Thumb2/mve-vmull-loop.ll b/llvm/test/CodeGen/Thumb2/mve-vmull-loop.ll
index 0a26d9920981b..da0cd57d86dbb 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vmull-loop.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vmull-loop.ll
@@ -4,13 +4,11 @@
 define arm_aapcs_vfpcc void @test32(ptr noalias nocapture readonly %x, ptr noalias nocapture readonly %y, ptr nocapture %z, i32 %n) {
 ; CHECK-LABEL: test32:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    cmp r3, #1
-; CHECK-NEXT:    it lt
-; CHECK-NEXT:    bxlt lr
-; CHECK-NEXT:  .LBB0_1: @ %vector.body.preheader
 ; CHECK-NEXT:    .save {r4, r5, r7, lr}
 ; CHECK-NEXT:    push {r4, r5, r7, lr}
-; CHECK-NEXT:  .LBB0_2: @ %vector.body
+; CHECK-NEXT:    cmp r3, #1
+; CHECK-NEXT:    blt .LBB0_2
+; CHECK-NEXT:  .LBB0_1: @ %vector.body
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vldrw.u32 q0, [r0], #16
 ; CHECK-NEXT:    vldrw.u32 q1, [r1], #16
@@ -28,10 +26,9 @@ define arm_aapcs_vfpcc void @test32(ptr noalias nocapture readonly %x, ptr noali
 ; CHECK-NEXT:    lsrl r4, r5, #31
 ; CHECK-NEXT:    vmov q2[3], q2[1], r4, r12
 ; CHECK-NEXT:    vstrb.8 q2, [r2], #16
-; CHECK-NEXT:    bne .LBB0_2
-; CHECK-NEXT:  @ %bb.3:
-; CHECK-NEXT:    pop.w {r4, r5, r7, lr}
-; CHECK-NEXT:    bx lr
+; CHECK-NEXT:    bne .LBB0_1
+; CHECK-NEXT:  .LBB0_2: @ %for.cond.cleanup
+; CHECK-NEXT:    pop {r4, r5, r7, pc}
 entry:
   %0 = and i32 %n, 3
   %cmp = icmp eq i32 %0, 0

diff  --git a/llvm/test/CodeGen/X86/negative-stride-fptosi-user.ll b/llvm/test/CodeGen/X86/negative-stride-fptosi-user.ll
index d0d46b5f11836..e21d4de178719 100644
--- a/llvm/test/CodeGen/X86/negative-stride-fptosi-user.ll
+++ b/llvm/test/CodeGen/X86/negative-stride-fptosi-user.ll
@@ -9,14 +9,12 @@
 define void @foo(i32 %N) nounwind {
 ; CHECK-LABEL: foo:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    testl %edi, %edi
-; CHECK-NEXT:    js .LBB0_1
-; CHECK-NEXT:  # %bb.4: # %return
-; CHECK-NEXT:    retq
-; CHECK-NEXT:  .LBB0_1: # %bb.preheader
 ; CHECK-NEXT:    pushq %rbp
 ; CHECK-NEXT:    pushq %rbx
 ; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    testl %edi, %edi
+; CHECK-NEXT:    jns .LBB0_3
+; CHECK-NEXT:  # %bb.1: # %bb.preheader
 ; CHECK-NEXT:    movl %edi, %ebx
 ; CHECK-NEXT:    xorl %ebp, %ebp
 ; CHECK-NEXT:    .p2align 4, 0x90
@@ -28,7 +26,7 @@ define void @foo(i32 %N) nounwind {
 ; CHECK-NEXT:    decl %ebp
 ; CHECK-NEXT:    cmpl %ebp, %ebx
 ; CHECK-NEXT:    jne .LBB0_2
-; CHECK-NEXT:  # %bb.3:
+; CHECK-NEXT:  .LBB0_3: # %return
 ; CHECK-NEXT:    addq $8, %rsp
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    popq %rbp

diff  --git a/llvm/test/CodeGen/X86/pr44412.ll b/llvm/test/CodeGen/X86/pr44412.ll
index 67579a5bb7c52..6c33666fb5c3a 100644
--- a/llvm/test/CodeGen/X86/pr44412.ll
+++ b/llvm/test/CodeGen/X86/pr44412.ll
@@ -4,10 +4,10 @@
 define void @bar(i32 %0, i32 %1) nounwind {
 ; CHECK-LABEL: bar:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushq %rbx
 ; CHECK-NEXT:    testl %edi, %edi
-; CHECK-NEXT:    je .LBB0_4
+; CHECK-NEXT:    je .LBB0_3
 ; CHECK-NEXT:  # %bb.1: # %.preheader
-; CHECK-NEXT:    pushq %rbx
 ; CHECK-NEXT:    movl %edi, %ebx
 ; CHECK-NEXT:    decl %ebx
 ; CHECK-NEXT:    .p2align 4, 0x90
@@ -16,9 +16,8 @@ define void @bar(i32 %0, i32 %1) nounwind {
 ; CHECK-NEXT:    callq foo at PLT
 ; CHECK-NEXT:    addl $-1, %ebx
 ; CHECK-NEXT:    jb .LBB0_2
-; CHECK-NEXT:  # %bb.3:
+; CHECK-NEXT:  .LBB0_3:
 ; CHECK-NEXT:    popq %rbx
-; CHECK-NEXT:  .LBB0_4:
 ; CHECK-NEXT:    retq
   %3 = icmp eq i32 %0, 0
   br i1 %3, label %8, label %4
@@ -37,10 +36,10 @@ define void @bar(i32 %0, i32 %1) nounwind {
 define void @baz(i32 %0, i32 %1) nounwind {
 ; CHECK-LABEL: baz:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushq %rbx
 ; CHECK-NEXT:    testl %edi, %edi
-; CHECK-NEXT:    je .LBB1_4
+; CHECK-NEXT:    je .LBB1_3
 ; CHECK-NEXT:  # %bb.1: # %.preheader
-; CHECK-NEXT:    pushq %rbx
 ; CHECK-NEXT:    movl %edi, %ebx
 ; CHECK-NEXT:    decl %ebx
 ; CHECK-NEXT:    .p2align 4, 0x90
@@ -49,9 +48,8 @@ define void @baz(i32 %0, i32 %1) nounwind {
 ; CHECK-NEXT:    callq foo at PLT
 ; CHECK-NEXT:    addl $-1, %ebx
 ; CHECK-NEXT:    jae .LBB1_2
-; CHECK-NEXT:  # %bb.3:
+; CHECK-NEXT:  .LBB1_3:
 ; CHECK-NEXT:    popq %rbx
-; CHECK-NEXT:  .LBB1_4:
 ; CHECK-NEXT:    retq
   %3 = icmp eq i32 %0, 0
   br i1 %3, label %8, label %4

diff  --git a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll
index 379174fa04dda..ec4a12eadb94e 100644
--- a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll
+++ b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll
@@ -639,11 +639,11 @@ declare hidden fastcc ptr @find_temp_slot_from_address(ptr readonly)
 define void @useLEA(ptr readonly %x) {
 ; ENABLE-LABEL: useLEA:
 ; ENABLE:       ## %bb.0: ## %entry
-; ENABLE-NEXT:    testq %rdi, %rdi
-; ENABLE-NEXT:    je LBB8_8
-; ENABLE-NEXT:  ## %bb.1: ## %if.end
 ; ENABLE-NEXT:    pushq %rax
 ; ENABLE-NEXT:    .cfi_def_cfa_offset 16
+; ENABLE-NEXT:    testq %rdi, %rdi
+; ENABLE-NEXT:    je LBB8_7
+; ENABLE-NEXT:  ## %bb.1: ## %if.end
 ; ENABLE-NEXT:    cmpw $66, (%rdi)
 ; ENABLE-NEXT:    jne LBB8_7
 ; ENABLE-NEXT:  ## %bb.2: ## %lor.lhs.false
@@ -652,13 +652,12 @@ define void @useLEA(ptr readonly %x) {
 ; ENABLE-NEXT:    leal -54(%rax), %ecx
 ; ENABLE-NEXT:    cmpl $14, %ecx
 ; ENABLE-NEXT:    ja LBB8_3
-; ENABLE-NEXT:  ## %bb.9: ## %lor.lhs.false
+; ENABLE-NEXT:  ## %bb.8: ## %lor.lhs.false
 ; ENABLE-NEXT:    movl $24599, %edx ## imm = 0x6017
 ; ENABLE-NEXT:    btl %ecx, %edx
 ; ENABLE-NEXT:    jae LBB8_3
-; ENABLE-NEXT:  LBB8_7:
-; ENABLE-NEXT:    addq $8, %rsp
-; ENABLE-NEXT:  LBB8_8: ## %cleanup
+; ENABLE-NEXT:  LBB8_7: ## %cleanup
+; ENABLE-NEXT:    popq %rax
 ; ENABLE-NEXT:    retq
 ; ENABLE-NEXT:  LBB8_3: ## %lor.lhs.false
 ; ENABLE-NEXT:    cmpl $134, %eax
@@ -672,7 +671,8 @@ define void @useLEA(ptr readonly %x) {
 ; ENABLE-NEXT:    je LBB8_7
 ; ENABLE-NEXT:  ## %bb.6: ## %if.then.60
 ; ENABLE-NEXT:    movb $1, 57(%rax)
-; ENABLE-NEXT:    jmp LBB8_7
+; ENABLE-NEXT:    popq %rax
+; ENABLE-NEXT:    retq
 ;
 ; DISABLE-LABEL: useLEA:
 ; DISABLE:       ## %bb.0: ## %entry

diff  --git a/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
index 63a3c725ae89e..fa1c208ffbd77 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
@@ -182,12 +182,12 @@ exit:
 define void @extrastride(i8* nocapture %main, i32 %main_stride, i32* nocapture %res, i32 %x, i32 %y, i32 %z) nounwind {
 ; X64-LABEL: extrastride:
 ; X64:       # %bb.0: # %entry
+; X64-NEXT:    pushq %rbx
 ; X64-NEXT:    # kill: def $ecx killed $ecx def $rcx
 ; X64-NEXT:    # kill: def $esi killed $esi def $rsi
 ; X64-NEXT:    testl %r9d, %r9d
-; X64-NEXT:    je .LBB2_4
+; X64-NEXT:    je .LBB2_3
 ; X64-NEXT:  # %bb.1: # %for.body.lr.ph
-; X64-NEXT:    pushq %rbx
 ; X64-NEXT:    leal (%rsi,%rsi), %r10d
 ; X64-NEXT:    leal (%rsi,%rsi,2), %r11d
 ; X64-NEXT:    addl %esi, %ecx
@@ -213,9 +213,8 @@ define void @extrastride(i8* nocapture %main, i32 %main_stride, i32* nocapture %
 ; X64-NEXT:    addq %r8, %rdx
 ; X64-NEXT:    decl %r9d
 ; X64-NEXT:    jne .LBB2_2
-; X64-NEXT:  # %bb.3:
+; X64-NEXT:  .LBB2_3: # %for.end
 ; X64-NEXT:    popq %rbx
-; X64-NEXT:  .LBB2_4: # %for.end
 ; X64-NEXT:    retq
 ;
 ; X32-LABEL: extrastride:


        


More information about the llvm-commits mailing list