[llvm] f4999d3 - Revert "[CodeGen][ShrinkWrap] Split restore point"

Alan Zhao via llvm-commits llvm-commits at lists.llvm.org
Mon May 8 16:28:29 PDT 2023


Author: Alan Zhao
Date: 2023-05-08T16:27:59-07:00
New Revision: f4999d3535af93919d58e3cc56ccb50f2ccb8453

URL: https://github.com/llvm/llvm-project/commit/f4999d3535af93919d58e3cc56ccb50f2ccb8453
DIFF: https://github.com/llvm/llvm-project/commit/f4999d3535af93919d58e3cc56ccb50f2ccb8453.diff

LOG: Revert "[CodeGen][ShrinkWrap] Split restore point"

This reverts commit 1ddfd1c8186735c62b642df05c505dc4907ffac4.

The original commit causes a Chrome build assertion failure with
ThinLTO: https://crbug.com/1443635

Added: 
    

Modified: 
    llvm/lib/CodeGen/ShrinkWrap.cpp
    llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
    llvm/test/CodeGen/AArch64/dont-shrink-wrap-stack-mayloadorstore.mir
    llvm/test/CodeGen/AArch64/ragreedy-csr.ll
    llvm/test/CodeGen/AArch64/taildup-cfi.ll
    llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll
    llvm/test/CodeGen/ARM/code-placement.ll
    llvm/test/CodeGen/ARM/mbp.ll
    llvm/test/CodeGen/ARM/ssat-unroll-loops.ll
    llvm/test/CodeGen/PowerPC/common-chain-aix32.ll
    llvm/test/CodeGen/PowerPC/common-chain.ll
    llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll
    llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll
    llvm/test/CodeGen/PowerPC/shrink-wrap.ll
    llvm/test/CodeGen/PowerPC/shrink-wrap.mir
    llvm/test/CodeGen/RISCV/aext-to-sext.ll
    llvm/test/CodeGen/RISCV/fli-licm.ll
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/inlineasm.ll
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/sibling-loops.ll
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll
    llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
    llvm/test/CodeGen/Thumb2/mve-gather-increment.ll
    llvm/test/CodeGen/Thumb2/mve-gather-tailpred.ll
    llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll
    llvm/test/CodeGen/Thumb2/mve-postinc-dct.ll
    llvm/test/CodeGen/Thumb2/mve-scatter-increment.ll
    llvm/test/CodeGen/Thumb2/mve-tailpred-nonzerostart.ll
    llvm/test/CodeGen/Thumb2/mve-vmull-loop.ll
    llvm/test/CodeGen/X86/fold-call-3.ll
    llvm/test/CodeGen/X86/negative-stride-fptosi-user.ll
    llvm/test/CodeGen/X86/pr44412.ll
    llvm/test/CodeGen/X86/x86-shrink-wrapping.ll
    llvm/test/Transforms/LoopStrengthReduce/AArch64/pr53625.ll
    llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll

Removed: 
    llvm/test/CodeGen/AArch64/shrinkwrap-split-restore-point.mir


################################################################################
diff  --git a/llvm/lib/CodeGen/ShrinkWrap.cpp b/llvm/lib/CodeGen/ShrinkWrap.cpp
index 90cba95b7f199..b219b83bbc2fe 100644
--- a/llvm/lib/CodeGen/ShrinkWrap.cpp
+++ b/llvm/lib/CodeGen/ShrinkWrap.cpp
@@ -98,9 +98,6 @@ STATISTIC(NumCandidatesDropped,
 static cl::opt<cl::boolOrDefault>
 EnableShrinkWrapOpt("enable-shrink-wrap", cl::Hidden,
                     cl::desc("enable the shrink-wrapping pass"));
-static cl::opt<bool> EnablePostShrinkWrapOpt(
-    "enable-shrink-wrap-region-split", cl::init(true), cl::Hidden,
-    cl::desc("enable splitting of the restore block if possible"));
 
 namespace {
 
@@ -188,30 +185,6 @@ class ShrinkWrap : public MachineFunctionPass {
   /// this call.
   void updateSaveRestorePoints(MachineBasicBlock &MBB, RegScavenger *RS);
 
-  // Try to find safe point based on dominance and block frequency without
-  // any change in IR.
-  bool performShrinkWrapping(MachineFunction &MF, RegScavenger *RS);
-
-  /// This function tries to split the restore point if doing so can shrink the
-  /// save point further. \return True if restore point is split.
-  bool postShrinkWrapping(bool HasCandidate, MachineFunction &MF,
-                          RegScavenger *RS);
-
-  /// This function analyzes if the restore point can split to create a new
-  /// restore point. This function collects
-  /// 1. Any preds of current restore that are reachable by callee save/FI
-  /// blocks
-  /// - indicated by DirtyPreds
-  /// 2. Any preds of current restore that are not DirtyPreds - indicated by
-  /// CleanPreds
-  /// Both sets should be non-empty for considering restore point split.
-  bool checkIfRestoreSplittable(
-      const MachineBasicBlock *CurRestore,
-      const DenseSet<const MachineBasicBlock *> &ReachableByDirty,
-      SmallVectorImpl<MachineBasicBlock *> &DirtyPreds,
-      SmallVectorImpl<MachineBasicBlock *> &CleanPreds,
-      const TargetInstrInfo *TII, RegScavenger *RS);
-
   /// Initialize the pass for \p MF.
   void init(MachineFunction &MF) {
     RCI.runOnMachineFunction(MF);
@@ -365,311 +338,18 @@ bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI,
 /// Helper function to find the immediate (post) dominator.
 template <typename ListOfBBs, typename DominanceAnalysis>
 static MachineBasicBlock *FindIDom(MachineBasicBlock &Block, ListOfBBs BBs,
-                                   DominanceAnalysis &Dom, bool Strict = true) {
+                                   DominanceAnalysis &Dom) {
   MachineBasicBlock *IDom = &Block;
   for (MachineBasicBlock *BB : BBs) {
     IDom = Dom.findNearestCommonDominator(IDom, BB);
     if (!IDom)
       break;
   }
-  if (Strict && IDom == &Block)
+  if (IDom == &Block)
     return nullptr;
   return IDom;
 }
 
-static bool isAnalyzableBB(const TargetInstrInfo &TII,
-                           MachineBasicBlock &Entry) {
-  // Check if the block is analyzable.
-  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
-  SmallVector<MachineOperand, 4> Cond;
-  return !TII.analyzeBranch(Entry, TBB, FBB, Cond);
-}
-
-/// Determines if any predecessor of MBB is on the path from block that has use
-/// or def of CSRs/FI to MBB.
-/// ReachableByDirty: All blocks reachable from block that has use or def of
-/// CSR/FI.
-static bool
-hasDirtyPred(const DenseSet<const MachineBasicBlock *> &ReachableByDirty,
-             const MachineBasicBlock &MBB) {
-  for (const MachineBasicBlock *PredBB : MBB.predecessors())
-    if (ReachableByDirty.count(PredBB))
-      return true;
-  return false;
-}
-
-/// Derives the list of all the basic blocks reachable from MBB.
-static void markAllReachable(DenseSet<const MachineBasicBlock *> &Visited,
-                             const MachineBasicBlock &MBB) {
-  SmallVector<MachineBasicBlock *, 4> Worklist(MBB.succ_begin(),
-                                               MBB.succ_end());
-  Visited.insert(&MBB);
-  while (!Worklist.empty()) {
-    MachineBasicBlock *SuccMBB = Worklist.pop_back_val();
-    if (!Visited.insert(SuccMBB).second)
-      continue;
-    Worklist.append(SuccMBB->succ_begin(), SuccMBB->succ_end());
-  }
-}
-
-/// Collect blocks reachable by use or def of CSRs/FI.
-static void collectBlocksReachableByDirty(
-    const DenseSet<const MachineBasicBlock *> &DirtyBBs,
-    DenseSet<const MachineBasicBlock *> &ReachableByDirty) {
-  for (const MachineBasicBlock *MBB : DirtyBBs) {
-    if (ReachableByDirty.count(MBB))
-      continue;
-    // Mark all offsprings as reachable.
-    markAllReachable(ReachableByDirty, *MBB);
-  }
-}
-
-/// \return true if there is a clean path from SavePoint to the original
-/// Restore.
-static bool
-isSaveReachableThroughClean(const MachineBasicBlock *SavePoint,
-                            ArrayRef<MachineBasicBlock *> CleanPreds) {
-  DenseSet<const MachineBasicBlock *> Visited;
-  SmallVector<MachineBasicBlock *, 4> Worklist(CleanPreds.begin(),
-                                               CleanPreds.end());
-  while (!Worklist.empty()) {
-    MachineBasicBlock *CleanBB = Worklist.pop_back_val();
-    if (CleanBB == SavePoint)
-      return true;
-    if (!Visited.insert(CleanBB).second || !CleanBB->pred_size())
-      continue;
-    Worklist.append(CleanBB->pred_begin(), CleanBB->pred_end());
-  }
-  return false;
-}
-
-/// This function updates the branches post restore point split.
-///
-/// Restore point has been split.
-/// Old restore point: MBB
-/// New restore point: NMBB
-/// Any basic block(say BBToUpdate) which had a fallthrough to MBB
-/// previously should
-/// 1. Fallthrough to NMBB iff NMBB is inserted immediately above MBB in the
-/// block layout OR
-/// 2. Branch unconditionally to NMBB iff NMBB is inserted at any other place.
-static void updateTerminator(MachineBasicBlock *BBToUpdate,
-                             MachineBasicBlock *NMBB,
-                             const TargetInstrInfo *TII) {
-  DebugLoc DL = BBToUpdate->findBranchDebugLoc();
-  // if NMBB isn't the new layout successor for BBToUpdate, insert unconditional
-  // branch to it
-  if (!BBToUpdate->isLayoutSuccessor(NMBB))
-    TII->insertUnconditionalBranch(*BBToUpdate, NMBB, DL);
-}
-
-/// This function splits the restore point and returns new restore point/BB.
-///
-/// DirtyPreds: Predessors of \p MBB that are ReachableByDirty
-///
-/// Decision has been made to split the restore point.
-/// old restore point: \p MBB
-/// new restore point: \p NMBB
-/// This function makes the necessary block layout changes so that
-/// 1. \p NMBB points to \p MBB unconditionally
-/// 2. All dirtyPreds that previously pointed to \p MBB point to \p NMBB
-static MachineBasicBlock *
-tryToSplitRestore(MachineBasicBlock *MBB,
-                  ArrayRef<MachineBasicBlock *> DirtyPreds,
-                  const TargetInstrInfo *TII) {
-  MachineFunction *MF = MBB->getParent();
-
-  // get the list of DirtyPreds who have a fallthrough to MBB
-  // before the block layout change. This is just to ensure that if the NMBB is
-  // inserted after MBB, then we create unconditional branch from
-  // DirtyPred/CleanPred to NMBB
-  SmallPtrSet<MachineBasicBlock *, 8> MBBFallthrough;
-  for (MachineBasicBlock *BB : DirtyPreds)
-    if (BB->getFallThrough(false) == MBB)
-      MBBFallthrough.insert(BB);
-
-  MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
-  // Insert this block at the end of the function. Inserting in between may
-  // interfere with control flow optimizer decisions.
-  MF->insert(MF->end(), NMBB);
-
-  for (const MachineBasicBlock::RegisterMaskPair &LI : MBB->liveins())
-    NMBB->addLiveIn(LI.PhysReg);
-
-  TII->insertUnconditionalBranch(*NMBB, MBB, DebugLoc());
-
-  // After splitting, all predecessors of the restore point should be dirty
-  // blocks.
-  for (MachineBasicBlock *SuccBB : DirtyPreds)
-    SuccBB->ReplaceUsesOfBlockWith(MBB, NMBB);
-
-  NMBB->addSuccessor(MBB);
-
-  for (MachineBasicBlock *BBToUpdate : MBBFallthrough)
-    updateTerminator(BBToUpdate, NMBB, TII);
-
-  return NMBB;
-}
-
-/// This function undoes the restore point split done earlier.
-///
-/// DirtyPreds: All predecessors of \p NMBB that are ReachableByDirty.
-///
-/// Restore point was split and the change needs to be unrolled. Make necessary
-/// changes to reset restore point from \p NMBB to \p MBB.
-static void rollbackRestoreSplit(MachineFunction &MF, MachineBasicBlock *NMBB,
-                                 MachineBasicBlock *MBB,
-                                 ArrayRef<MachineBasicBlock *> DirtyPreds,
-                                 const TargetInstrInfo *TII) {
-  // For a BB, if NMBB is fallthrough in the current layout, then in the new
-  // layout a. BB should fallthrough to MBB OR b. BB should undconditionally
-  // branch to MBB
-  SmallPtrSet<MachineBasicBlock *, 8> NMBBFallthrough;
-  for (MachineBasicBlock *BB : DirtyPreds)
-    if (BB->getFallThrough(false) == NMBB)
-      NMBBFallthrough.insert(BB);
-
-  NMBB->removeSuccessor(MBB);
-  for (MachineBasicBlock *SuccBB : DirtyPreds)
-    SuccBB->ReplaceUsesOfBlockWith(NMBB, MBB);
-
-  NMBB->erase(NMBB->begin(), NMBB->end());
-  NMBB->eraseFromParent();
-
-  for (MachineBasicBlock *BBToUpdate : NMBBFallthrough)
-    updateTerminator(BBToUpdate, MBB, TII);
-}
-
-// A block is deemed fit for restore point split iff there exist
-// 1. DirtyPreds - preds of CurRestore reachable from use or def of CSR/FI
-// 2. CleanPreds - preds of CurRestore that arent DirtyPreds
-bool ShrinkWrap::checkIfRestoreSplittable(
-    const MachineBasicBlock *CurRestore,
-    const DenseSet<const MachineBasicBlock *> &ReachableByDirty,
-    SmallVectorImpl<MachineBasicBlock *> &DirtyPreds,
-    SmallVectorImpl<MachineBasicBlock *> &CleanPreds,
-    const TargetInstrInfo *TII, RegScavenger *RS) {
-  for (const MachineInstr &MI : *CurRestore)
-    if (useOrDefCSROrFI(MI, RS))
-      return false;
-
-  for (MachineBasicBlock *PredBB : CurRestore->predecessors()) {
-    if (!isAnalyzableBB(*TII, *PredBB))
-      return false;
-
-    if (ReachableByDirty.count(PredBB))
-      DirtyPreds.push_back(PredBB);
-    else
-      CleanPreds.push_back(PredBB);
-  }
-
-  return !(CleanPreds.empty() || DirtyPreds.empty());
-}
-
-bool ShrinkWrap::postShrinkWrapping(bool HasCandidate, MachineFunction &MF,
-                                    RegScavenger *RS) {
-  if (!EnablePostShrinkWrapOpt)
-    return false;
-
-  MachineBasicBlock *InitSave = nullptr;
-  MachineBasicBlock *InitRestore = nullptr;
-
-  if (HasCandidate) {
-    InitSave = Save;
-    InitRestore = Restore;
-  } else {
-    InitRestore = nullptr;
-    InitSave = &MF.front();
-    for (MachineBasicBlock &MBB : MF) {
-      if (MBB.isEHFuncletEntry())
-        return false;
-      if (MBB.isReturnBlock()) {
-        // Do not support multiple restore points.
-        if (InitRestore)
-          return false;
-        InitRestore = &MBB;
-      }
-    }
-  }
-
-  if (!InitSave || !InitRestore || InitRestore == InitSave ||
-      !MDT->dominates(InitSave, InitRestore) ||
-      !MPDT->dominates(InitRestore, InitSave))
-    return false;
-
-  // Bail out of the optimization if any of the basic block is target of
-  // INLINEASM_BR instruction
-  for (MachineBasicBlock &MBB : MF)
-    if (MBB.isInlineAsmBrIndirectTarget())
-      return false;
-
-  DenseSet<const MachineBasicBlock *> DirtyBBs;
-  for (MachineBasicBlock &MBB : MF) {
-    if (MBB.isEHPad()) {
-      DirtyBBs.insert(&MBB);
-      continue;
-    }
-    for (const MachineInstr &MI : MBB)
-      if (useOrDefCSROrFI(MI, RS)) {
-        DirtyBBs.insert(&MBB);
-        break;
-      }
-  }
-
-  // Find blocks reachable from the use or def of CSRs/FI.
-  DenseSet<const MachineBasicBlock *> ReachableByDirty;
-  collectBlocksReachableByDirty(DirtyBBs, ReachableByDirty);
-
-  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
-  SmallVector<MachineBasicBlock *, 2> DirtyPreds;
-  SmallVector<MachineBasicBlock *, 2> CleanPreds;
-  if (!checkIfRestoreSplittable(InitRestore, ReachableByDirty, DirtyPreds,
-                                CleanPreds, TII, RS))
-    return false;
-
-  // Trying to reach out to the new save point which dominates all dirty blocks.
-  MachineBasicBlock *NewSave =
-      FindIDom<>(**DirtyPreds.begin(), DirtyPreds, *MDT, false);
-
-  while (NewSave && (hasDirtyPred(ReachableByDirty, *NewSave) ||
-                     EntryFreq < MBFI->getBlockFreq(NewSave).getFrequency()))
-    NewSave = FindIDom<>(**NewSave->pred_begin(), NewSave->predecessors(), *MDT,
-                         false);
-
-  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
-  if (!NewSave || NewSave == InitSave ||
-      isSaveReachableThroughClean(NewSave, CleanPreds) ||
-      !TFI->canUseAsPrologue(*NewSave))
-    return false;
-
-  // Now we know that splitting a restore point can isolate the restore point
-  // from clean blocks and doing so can shrink the save point.
-  MachineBasicBlock *NewRestore =
-      tryToSplitRestore(InitRestore, DirtyPreds, TII);
-
-  // Make sure if the new restore point is valid as an epilogue, depending on
-  // targets.
-  if (!TFI->canUseAsEpilogue(*NewRestore)) {
-    rollbackRestoreSplit(MF, NewRestore, InitRestore, DirtyPreds, TII);
-    return false;
-  }
-
-  Save = NewSave;
-  Restore = NewRestore;
-
-  MDT->runOnMachineFunction(MF);
-  MPDT->runOnMachineFunction(MF);
-
-  assert((MDT->dominates(Save, Restore) && MPDT->dominates(Restore, Save)) &&
-         "Incorrect save or restore point due to dominance relations");
-  assert((!MLI->getLoopFor(Save) && !MLI->getLoopFor(Restore)) &&
-         "Unexpected save or restore point in a loop");
-  assert((EntryFreq >= MBFI->getBlockFreq(Save).getFrequency() &&
-          EntryFreq >= MBFI->getBlockFreq(Restore).getFrequency()) &&
-         "Incorrect save or restore point based on block frequency");
-  return true;
-}
-
 void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB,
                                          RegScavenger *RS) {
   // Get rid of the easy cases first.
@@ -801,7 +481,31 @@ static bool giveUpWithRemarks(MachineOptimizationRemarkEmitter *ORE,
   return false;
 }
 
-bool ShrinkWrap::performShrinkWrapping(MachineFunction &MF, RegScavenger *RS) {
+bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
+  if (skipFunction(MF.getFunction()) || MF.empty() || !isShrinkWrapEnabled(MF))
+    return false;
+
+  LLVM_DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n');
+
+  init(MF);
+
+  ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
+  if (containsIrreducibleCFG<MachineBasicBlock *>(RPOT, *MLI)) {
+    // If MF is irreducible, a block may be in a loop without
+    // MachineLoopInfo reporting it. I.e., we may use the
+    // post-dominance property in loops, which lead to incorrect
+    // results. Moreover, we may miss that the prologue and
+    // epilogue are not in the same loop, leading to unbalanced
+    // construction/deconstruction of the stack frame.
+    return giveUpWithRemarks(ORE, "UnsupportedIrreducibleCFG",
+                             "Irreducible CFGs are not supported yet.",
+                             MF.getFunction().getSubprogram(), &MF.front());
+  }
+
+  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+  std::unique_ptr<RegScavenger> RS(
+      TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr);
+
   for (MachineBasicBlock &MBB : MF) {
     LLVM_DEBUG(dbgs() << "Look into: " << MBB.getNumber() << ' '
                       << MBB.getName() << '\n');
@@ -817,7 +521,7 @@ bool ShrinkWrap::performShrinkWrapping(MachineFunction &MF, RegScavenger *RS) {
       // are at least at the boundary of the save and restore points.  The
       // problem is that a basic block can jump out from the middle in these
       // cases, which we do not handle.
-      updateSaveRestorePoints(MBB, RS);
+      updateSaveRestorePoints(MBB, RS.get());
       if (!ArePointsInteresting()) {
         LLVM_DEBUG(dbgs() << "EHPad/inlineasm_br prevents shrink-wrapping\n");
         return false;
@@ -826,11 +530,11 @@ bool ShrinkWrap::performShrinkWrapping(MachineFunction &MF, RegScavenger *RS) {
     }
 
     for (const MachineInstr &MI : MBB) {
-      if (!useOrDefCSROrFI(MI, RS))
+      if (!useOrDefCSROrFI(MI, RS.get()))
         continue;
       // Save (resp. restore) point must dominate (resp. post dominate)
       // MI. Look for the proper basic block for those.
-      updateSaveRestorePoints(MBB, RS);
+      updateSaveRestorePoints(MBB, RS.get());
       // If we are at a point where we cannot improve the placement of
       // save/restore instructions, just give up.
       if (!ArePointsInteresting()) {
@@ -884,49 +588,13 @@ bool ShrinkWrap::performShrinkWrapping(MachineFunction &MF, RegScavenger *RS) {
         break;
       NewBB = Restore;
     }
-    updateSaveRestorePoints(*NewBB, RS);
+    updateSaveRestorePoints(*NewBB, RS.get());
   } while (Save && Restore);
 
   if (!ArePointsInteresting()) {
     ++NumCandidatesDropped;
     return false;
   }
-  return true;
-}
-
-bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
-  if (skipFunction(MF.getFunction()) || MF.empty() || !isShrinkWrapEnabled(MF))
-    return false;
-
-  LLVM_DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n');
-
-  init(MF);
-
-  ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
-  if (containsIrreducibleCFG<MachineBasicBlock *>(RPOT, *MLI)) {
-    // If MF is irreducible, a block may be in a loop without
-    // MachineLoopInfo reporting it. I.e., we may use the
-    // post-dominance property in loops, which lead to incorrect
-    // results. Moreover, we may miss that the prologue and
-    // epilogue are not in the same loop, leading to unbalanced
-    // construction/deconstruction of the stack frame.
-    return giveUpWithRemarks(ORE, "UnsupportedIrreducibleCFG",
-                             "Irreducible CFGs are not supported yet.",
-                             MF.getFunction().getSubprogram(), &MF.front());
-  }
-
-  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
-  std::unique_ptr<RegScavenger> RS(
-      TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr);
-
-  bool Changed = false;
-
-  bool HasCandidate = performShrinkWrapping(MF, RS.get());
-  Changed = postShrinkWrapping(HasCandidate, MF, RS.get());
-  if (!HasCandidate && !Changed)
-    return false;
-  if (!ArePointsInteresting())
-    return Changed;
 
   LLVM_DEBUG(dbgs() << "Final shrink wrap candidates:\nSave: "
                     << Save->getNumber() << ' ' << Save->getName()
@@ -937,7 +605,7 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
   MFI.setSavePoint(Save);
   MFI.setRestorePoint(Restore);
   ++NumCandidates;
-  return Changed;
+  return false;
 }
 
 bool ShrinkWrap::isShrinkWrapEnabled(const MachineFunction &MF) {

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
index 8dd4da1ee4401..49a15528c041a 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
@@ -424,8 +424,8 @@ define i16 @red_mla_dup_ext_u8_s8_s16(i8* noalias nocapture noundef readonly %A,
 ; CHECK-NEXT:    mov w8, wzr
 ; CHECK-NEXT:    b .LBB5_7
 ; CHECK-NEXT:  .LBB5_3:
-; CHECK-NEXT:    mov w8, wzr
-; CHECK-NEXT:    b .LBB5_9
+; CHECK-NEXT:    mov w0, wzr
+; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB5_4: // %vector.ph
 ; CHECK-NEXT:    and x11, x10, #0xfffffff0
 ; CHECK-NEXT:    add x8, x0, #8

diff  --git a/llvm/test/CodeGen/AArch64/dont-shrink-wrap-stack-mayloadorstore.mir b/llvm/test/CodeGen/AArch64/dont-shrink-wrap-stack-mayloadorstore.mir
index 34fafb750083c..bc60b7b571197 100644
--- a/llvm/test/CodeGen/AArch64/dont-shrink-wrap-stack-mayloadorstore.mir
+++ b/llvm/test/CodeGen/AArch64/dont-shrink-wrap-stack-mayloadorstore.mir
@@ -6,8 +6,8 @@
  ; RUN: llc -x=mir -simplify-mir -run-pass=shrink-wrap -o - %s | FileCheck %s
  ; CHECK:      name:            compiler_pop_stack
  ; CHECK:      frameInfo:       
- ; CHECK:      savePoint:       '%bb.1'
- ; CHECK-NEXT: restorePoint:    '%bb.7'
+ ; CHECK-NOT:  savePoint:
+ ; CHECK-NOT:  restorePoint:
  ; CHECK:      stack:
  ; CHECK:      name:            f
  ; CHECK:      frameInfo:       

diff  --git a/llvm/test/CodeGen/AArch64/ragreedy-csr.ll b/llvm/test/CodeGen/AArch64/ragreedy-csr.ll
index 99f01883dbfb1..98c95c38bbb6b 100644
--- a/llvm/test/CodeGen/AArch64/ragreedy-csr.ll
+++ b/llvm/test/CodeGen/AArch64/ragreedy-csr.ll
@@ -21,16 +21,16 @@ declare i32 @__maskrune(i32, i64) #7
 define fastcc i32 @prune_match(ptr nocapture readonly %a, ptr nocapture readonly %b) #9 {
 ; CHECK-LABEL: prune_match:
 ; CHECK:       ; %bb.0: ; %entry
-; CHECK-NEXT:    ldrh w8, [x0]
-; CHECK-NEXT:    ldrh w9, [x1]
-; CHECK-NEXT:    cmp w8, w9
-; CHECK-NEXT:    b.ne LBB0_47
-; CHECK-NEXT:  ; %bb.1: ; %if.end
 ; CHECK-NEXT:    sub sp, sp, #64
 ; CHECK-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-NEXT:    stp x29, x30, [sp, #48] ; 16-byte Folded Spill
 ; CHECK-NEXT:    .cfi_offset w30, -8
 ; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    ldrh w8, [x0]
+; CHECK-NEXT:    ldrh w9, [x1]
+; CHECK-NEXT:    cmp w8, w9
+; CHECK-NEXT:    b.ne LBB0_42
+; CHECK-NEXT:  ; %bb.1: ; %if.end
 ; CHECK-NEXT:  Lloh0:
 ; CHECK-NEXT:    adrp x14, __DefaultRuneLocale at GOTPAGE
 ; CHECK-NEXT:    mov x9, xzr
@@ -243,7 +243,7 @@ define fastcc i32 @prune_match(ptr nocapture readonly %a, ptr nocapture readonly
 ; CHECK-NEXT:    b.eq LBB0_37
 ; CHECK-NEXT:  LBB0_42:
 ; CHECK-NEXT:    mov w0, wzr
-; CHECK-NEXT:  LBB0_43:
+; CHECK-NEXT:  LBB0_43: ; %return
 ; CHECK-NEXT:    ldp x29, x30, [sp, #48] ; 16-byte Folded Reload
 ; CHECK-NEXT:    add sp, sp, #64
 ; CHECK-NEXT:    ret
@@ -259,12 +259,6 @@ define fastcc i32 @prune_match(ptr nocapture readonly %a, ptr nocapture readonly
 ; CHECK-NEXT:  ; %bb.46: ; %land.lhs.true52
 ; CHECK-NEXT:    cbz w8, LBB0_43
 ; CHECK-NEXT:    b LBB0_12
-; CHECK-NEXT:  LBB0_47:
-; CHECK-NEXT:    .cfi_def_cfa wsp, 0
-; CHECK-NEXT:    .cfi_same_value w30
-; CHECK-NEXT:    .cfi_same_value w29
-; CHECK-NEXT:    mov w0, wzr
-; CHECK-NEXT:    ret
 ; CHECK-NEXT:    .loh AdrpLdrGot Lloh0, Lloh1
 ; CHECK-NEXT:    .loh AdrpLdrGot Lloh2, Lloh3
 ; CHECK-NEXT:    .loh AdrpLdrGot Lloh4, Lloh5

diff  --git a/llvm/test/CodeGen/AArch64/shrinkwrap-split-restore-point.mir b/llvm/test/CodeGen/AArch64/shrinkwrap-split-restore-point.mir
deleted file mode 100644
index 5b43dde0ae250..0000000000000
--- a/llvm/test/CodeGen/AArch64/shrinkwrap-split-restore-point.mir
+++ /dev/null
@@ -1,760 +0,0 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
-# RUN: llc -mtriple=aarch64 -run-pass=shrink-wrap -o - %s | FileCheck %s
-
---- |
-  define void @shrink_test1(i32 %a) {
-  entry:
-    %cmp5 = icmp sgt i32 %a, 0
-    br i1 %cmp5, label %BB0, label %exit
-
-  BB0:                                              ; preds = %entry
-    %call = call i32 @fun()
-    %c = icmp eq i32 %call, 0
-    br i1 %c, label %BB1, label %exit
-
-  BB1:                                              ; preds = %BB0
-    %call2 = call i32 @fun()
-    br label %exit
-
-  exit:                                             ; preds = %BB1, %BB0, %entry
-    ret void
-  }
-
-  define void @shrink_test2(i32 %a, ptr %P1, ptr %P2) {
-  BB00:
-    %cmp5 = icmp sgt i32 %a, 0
-    br i1 %cmp5, label %BB01, label %exit
-
-  BB01:                                             ; preds = %BB00
-    store i32 %a, ptr %P1, align 4
-    %c1 = icmp sgt i32 %a, 1
-    br i1 %c1, label %BB02, label %BB03
-
-  BB02:                                             ; preds = %BB01
-    store i32 %a, ptr %P2, align 4
-    br label %BB03
-
-  BB03:                                             ; preds = %BB02, %BB01
-    %call03 = call i32 @fun()
-    %c03 = icmp eq i32 %call03, 0
-    br i1 %c03, label %BB04, label %BB05
-
-  BB04:                                             ; preds = %BB03
-    %call04 = call i32 @fun()
-    br label %BB05
-
-  BB05:                                             ; preds = %BB04, %BB03
-    %call05 = call i32 @fun()
-    %c05 = icmp eq i32 %call05, 0
-    br i1 %c05, label %BB06, label %BB07
-
-  BB06:                                             ; preds = %BB05
-    %call06 = call i32 @fun()
-    br label %exit
-
-  BB07:                                             ; preds = %BB05
-    %call07 = call i32 @fun2()
-    br label %exit
-
-  exit:                                             ; preds = %BB07, %BB06, %BB00
-    ret void
-  }
-
-  define void @noshrink_test1(i32 %a, i32 %v, i32 %v2) {
-  entry:
-    %cmp5 = icmp sgt i32 %a, 0
-    br i1 %cmp5, label %BB0, label %exit
-
-  BB0:                                              ; preds = %entry
-    %c = icmp eq i32 %a, 10
-    %c1 = icmp eq i32 %v, 10
-    %or.cond = select i1 %c, i1 %c1, i1 false
-    br i1 %or.cond, label %BB3, label %BB2
-
-  BB2:                                              ; preds = %BB0
-    %c2 = icmp eq i32 %v2, 10
-    br i1 %c2, label %BB4, label %exit
-
-  BB3:                                              ; preds = %BB0
-    %call3 = call i32 @fun()
-    br label %exit
-
-  BB4:                                              ; preds = %BB2
-    %call4 = call i32 @fun2()
-    br label %exit
-
-  exit:                                             ; preds = %BB4, %BB3, %BB2, %entry
-    ret void
-  }
-
-  define void @noshrink_test2(i32 %a) {
-  BB00:
-    %cmp5 = icmp sgt i32 %a, 0
-    br i1 %cmp5, label %BB01, label %InfLoop.preheader
-
-  InfLoop.preheader:                                ; preds = %BB00
-    br label %InfLoop
-
-  BB01:                                             ; preds = %BB00
-    %call = call i32 @fun()
-    %c = icmp eq i32 %call, 0
-    br i1 %c, label %BB02, label %exit
-
-  BB02:                                             ; preds = %BB01
-    %call2 = call i32 @fun()
-    br label %exit
-
-  InfLoop:                                          ; preds = %InfLoop.preheader, %InfLoop
-    %call3 = call i32 @fun()
-    br label %InfLoop
-
-  exit:                                             ; preds = %BB02, %BB01
-    ret void
-  }
-
-  define void @noshrink_test3(i32 %a) {
-  BB00:
-    %cmp5 = icmp sgt i32 %a, 0
-    %call02 = call i32 @fun()
-    br i1 %cmp5, label %BB02, label %BB01
-
-  BB01:                                             ; preds = %BB00
-    %0 = icmp eq i32 %call02, 0
-    br i1 %0, label %BB01.1, label %exit
-
-  BB01.1:                                           ; preds = %BB01
-    call void @abort() #0
-    unreachable
-
-  BB02:                                             ; preds = %BB00
-    %1 = icmp eq i32 %call02, 0
-    br i1 %1, label %BB03, label %BB04
-
-  BB03:                                             ; preds = %BB02
-    %call03 = call i32 @fun()
-    %c03 = icmp eq i32 %call03, 0
-    br i1 %c03, label %BB04, label %exit
-
-  BB04:                                             ; preds = %BB03, %BB02
-    %call04 = call i32 @fun()
-    br label %exit
-
-  exit:                                             ; preds = %BB04, %BB03, %BB01
-    ret void
-  }
-
-  define void @noshrink_bb_as_inlineasmbr_target(i1 %cond) {
-  entry:
-    br i1 %cond, label %0, label %exit
-
-  0:                                                ; preds = %entry
-    callbr void asm sideeffect "", "!i,~{flags}"()
-      to label %1 [label %exit]
-
-  1:                                                ; preds = %0
-    call void @dosomething()
-    br label %exit
-
-  exit:                                             ; preds = %1, %0, %entry
-    ret void
-  }
-
-  declare i32 @fun()
-  declare i32 @fun2()
-  declare void @abort()
-  declare void @dosomething()
-...
----
-name:            shrink_test1
-alignment:       4
-tracksRegLiveness: true
-tracksDebugUserValues: true
-liveins:
-  - { reg: '$w0' }
-frameInfo:
-  maxAlignment:    1
-  adjustsStack:    true
-  hasCalls:        true
-  maxCallFrameSize: 0
-machineFunctionInfo: {}
-body:             |
-  ; CHECK-LABEL: name: shrink_test1
-  ; CHECK: bb.0.entry:
-  ; CHECK-NEXT:   successors: %bb.1(0x50000000), %bb.3(0x30000000)
-  ; CHECK-NEXT:   liveins: $w0
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   dead $wzr = SUBSWri killed renamable $w0, 1, 0, implicit-def $nzcv
-  ; CHECK-NEXT:   Bcc 11, %bb.3, implicit killed $nzcv
-  ; CHECK-NEXT:   B %bb.1
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.1.BB0:
-  ; CHECK-NEXT:   successors: %bb.2(0x30000000), %bb.4(0x50000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
-  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   CBNZW killed renamable $w0, %bb.4
-  ; CHECK-NEXT:   B %bb.2
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.2.BB1:
-  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
-  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   B %bb.4
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.3.exit:
-  ; CHECK-NEXT:   RET_ReallyLR
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.4:
-  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   B %bb.3
-  bb.0.entry:
-    successors: %bb.1(0x50000000), %bb.3(0x30000000)
-    liveins: $w0
-
-    dead $wzr = SUBSWri killed renamable $w0, 1, 0, implicit-def $nzcv
-    Bcc 11, %bb.3, implicit killed $nzcv
-    B %bb.1
-
-  bb.1.BB0:
-    successors: %bb.2(0x30000000), %bb.3(0x50000000)
-
-    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-    BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
-    ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-    CBNZW killed renamable $w0, %bb.3
-    B %bb.2
-
-  bb.2.BB1:
-    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-    BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
-    ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-
-  bb.3.exit:
-    RET_ReallyLR
-
-...
----
-name:            shrink_test2
-alignment:       4
-tracksRegLiveness: true
-tracksDebugUserValues: true
-liveins:
-  - { reg: '$w0' }
-  - { reg: '$x1' }
-  - { reg: '$x2' }
-frameInfo:
-  maxAlignment:    1
-  adjustsStack:    true
-  hasCalls:        true
-  maxCallFrameSize: 0
-machineFunctionInfo: {}
-body:             |
-  ; CHECK-LABEL: name: shrink_test2
-  ; CHECK: bb.0.BB00:
-  ; CHECK-NEXT:   successors: %bb.1(0x50000000), %bb.8(0x30000000)
-  ; CHECK-NEXT:   liveins: $w0, $x1, $x2
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   dead $wzr = SUBSWri renamable $w0, 1, 0, implicit-def $nzcv
-  ; CHECK-NEXT:   Bcc 11, %bb.8, implicit killed $nzcv
-  ; CHECK-NEXT:   B %bb.1
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.1.BB01:
-  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
-  ; CHECK-NEXT:   liveins: $w0, $x1, $x2
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   dead $wzr = SUBSWri renamable $w0, 2, 0, implicit-def $nzcv
-  ; CHECK-NEXT:   STRWui renamable $w0, killed renamable $x1, 0 :: (store (s32) into %ir.P1)
-  ; CHECK-NEXT:   Bcc 11, %bb.3, implicit killed $nzcv
-  ; CHECK-NEXT:   B %bb.2
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.2.BB02:
-  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
-  ; CHECK-NEXT:   liveins: $w0, $x2
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   STRWui killed renamable $w0, killed renamable $x2, 0 :: (store (s32) into %ir.P2)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.3.BB03:
-  ; CHECK-NEXT:   successors: %bb.4(0x30000000), %bb.5(0x50000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
-  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   CBNZW killed renamable $w0, %bb.5
-  ; CHECK-NEXT:   B %bb.4
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.4.BB04:
-  ; CHECK-NEXT:   successors: %bb.5(0x80000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
-  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.5.BB05:
-  ; CHECK-NEXT:   successors: %bb.6(0x30000000), %bb.7(0x50000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
-  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   CBNZW killed renamable $w0, %bb.7
-  ; CHECK-NEXT:   B %bb.6
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.6.BB06:
-  ; CHECK-NEXT:   successors: %bb.9(0x80000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
-  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   B %bb.9
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.7.BB07:
-  ; CHECK-NEXT:   successors: %bb.9(0x80000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   BL @fun2, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
-  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   B %bb.9
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.8.exit:
-  ; CHECK-NEXT:   RET_ReallyLR
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.9:
-  ; CHECK-NEXT:   successors: %bb.8(0x80000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   B %bb.8
-  bb.0.BB00:
-    successors: %bb.1(0x50000000), %bb.8(0x30000000)
-    liveins: $w0, $x1, $x2
-
-    dead $wzr = SUBSWri renamable $w0, 1, 0, implicit-def $nzcv
-    Bcc 11, %bb.8, implicit killed $nzcv
-    B %bb.1
-
-  bb.1.BB01:
-    successors: %bb.2, %bb.3
-    liveins: $w0, $x1, $x2
-
-    dead $wzr = SUBSWri renamable $w0, 2, 0, implicit-def $nzcv
-    STRWui renamable $w0, killed renamable $x1, 0 :: (store (s32) into %ir.P1)
-    Bcc 11, %bb.3, implicit killed $nzcv
-    B %bb.2
-
-  bb.2.BB02:
-    liveins: $w0, $x2
-
-    STRWui killed renamable $w0, killed renamable $x2, 0 :: (store (s32) into %ir.P2)
-
-  bb.3.BB03:
-    successors: %bb.4(0x30000000), %bb.5(0x50000000)
-
-    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-    BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
-    ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-    CBNZW killed renamable $w0, %bb.5
-    B %bb.4
-
-  bb.4.BB04:
-    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-    BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
-    ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-
-  bb.5.BB05:
-    successors: %bb.6(0x30000000), %bb.7(0x50000000)
-
-    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-    BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
-    ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-    CBNZW killed renamable $w0, %bb.7
-    B %bb.6
-
-  bb.6.BB06:
-    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-    BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
-    ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-    B %bb.8
-
-  bb.7.BB07:
-    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-    BL @fun2, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
-    ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-
-  bb.8.exit:
-    RET_ReallyLR
-
-...
----
-name:            noshrink_test1
-alignment:       4
-tracksRegLiveness: true
-tracksDebugUserValues: true
-liveins:
-  - { reg: '$w0' }
-  - { reg: '$w1' }
-  - { reg: '$w2' }
-frameInfo:
-  maxAlignment:    1
-  adjustsStack:    true
-  hasCalls:        true
-  maxCallFrameSize: 0
-machineFunctionInfo: {}
-body:             |
-  ; CHECK-LABEL: name: noshrink_test1
-  ; CHECK: bb.0.entry:
-  ; CHECK-NEXT:   successors: %bb.1(0x50000000), %bb.6(0x30000000)
-  ; CHECK-NEXT:   liveins: $w0, $w1, $w2
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   dead $wzr = SUBSWri renamable $w0, 1, 0, implicit-def $nzcv
-  ; CHECK-NEXT:   Bcc 11, %bb.6, implicit killed $nzcv
-  ; CHECK-NEXT:   B %bb.1
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.1.BB0:
-  ; CHECK-NEXT:   successors: %bb.2(0x60000000), %bb.3(0x20000000)
-  ; CHECK-NEXT:   liveins: $w0, $w1, $w2
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   dead $wzr = SUBSWri killed renamable $w0, 10, 0, implicit-def $nzcv
-  ; CHECK-NEXT:   Bcc 1, %bb.3, implicit killed $nzcv
-  ; CHECK-NEXT:   B %bb.2
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.2.BB0:
-  ; CHECK-NEXT:   successors: %bb.4(0x55555555), %bb.3(0x2aaaaaab)
-  ; CHECK-NEXT:   liveins: $w1, $w2
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   dead $wzr = SUBSWri killed renamable $w1, 10, 0, implicit-def $nzcv
-  ; CHECK-NEXT:   Bcc 0, %bb.4, implicit killed $nzcv
-  ; CHECK-NEXT:   B %bb.3
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.3.BB2:
-  ; CHECK-NEXT:   successors: %bb.5(0x40000000), %bb.6(0x40000000)
-  ; CHECK-NEXT:   liveins: $w2
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   dead $wzr = SUBSWri killed renamable $w2, 10, 0, implicit-def $nzcv
-  ; CHECK-NEXT:   Bcc 0, %bb.5, implicit killed $nzcv
-  ; CHECK-NEXT:   B %bb.6
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.4.BB3:
-  ; CHECK-NEXT:   successors: %bb.6(0x80000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
-  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   B %bb.6
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.5.BB4:
-  ; CHECK-NEXT:   successors: %bb.6(0x80000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   BL @fun2, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
-  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.6.exit:
-  ; CHECK-NEXT:   RET_ReallyLR
-  bb.0.entry:
-    successors: %bb.1(0x50000000), %bb.6(0x30000000)
-    liveins: $w0, $w1, $w2
-
-    dead $wzr = SUBSWri renamable $w0, 1, 0, implicit-def $nzcv
-    Bcc 11, %bb.6, implicit killed $nzcv
-    B %bb.1
-
-  bb.1.BB0:
-    successors: %bb.2(0x60000000), %bb.3(0x20000000)
-    liveins: $w0, $w1, $w2
-
-    dead $wzr = SUBSWri killed renamable $w0, 10, 0, implicit-def $nzcv
-    Bcc 1, %bb.3, implicit killed $nzcv
-    B %bb.2
-
-  bb.2.BB0:
-    successors: %bb.4(0x55555555), %bb.3(0x2aaaaaab)
-    liveins: $w1, $w2
-
-    dead $wzr = SUBSWri killed renamable $w1, 10, 0, implicit-def $nzcv
-    Bcc 0, %bb.4, implicit killed $nzcv
-    B %bb.3
-
-  bb.3.BB2:
-    liveins: $w2
-
-    dead $wzr = SUBSWri killed renamable $w2, 10, 0, implicit-def $nzcv
-    Bcc 0, %bb.5, implicit killed $nzcv
-    B %bb.6
-
-  bb.4.BB3:
-    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-    BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
-    ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-    B %bb.6
-
-  bb.5.BB4:
-    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-    BL @fun2, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
-    ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-
-  bb.6.exit:
-    RET_ReallyLR
-
-...
----
-name:            noshrink_test2
-alignment:       4
-tracksRegLiveness: true
-tracksDebugUserValues: true
-liveins:
-  - { reg: '$w0' }
-frameInfo:
-  maxAlignment:    1
-  adjustsStack:    true
-  hasCalls:        true
-  maxCallFrameSize: 0
-machineFunctionInfo: {}
-body:             |
-  ; CHECK-LABEL: name: noshrink_test2
-  ; CHECK: bb.0.BB00:
-  ; CHECK-NEXT:   successors: %bb.2(0x50000000), %bb.1(0x30000000)
-  ; CHECK-NEXT:   liveins: $w0
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   dead $wzr = SUBSWri killed renamable $w0, 0, 0, implicit-def $nzcv
-  ; CHECK-NEXT:   Bcc 12, %bb.2, implicit killed $nzcv
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.1:
-  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   B %bb.4
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.2.BB01:
-  ; CHECK-NEXT:   successors: %bb.3(0x30000000), %bb.5(0x50000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
-  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   CBNZW killed renamable $w0, %bb.5
-  ; CHECK-NEXT:   B %bb.3
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.3.BB02:
-  ; CHECK-NEXT:   successors: %bb.5(0x80000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
-  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   B %bb.5
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.4.InfLoop:
-  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
-  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   B %bb.4
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.5.exit:
-  ; CHECK-NEXT:   RET_ReallyLR
-  bb.0.BB00:
-    successors: %bb.2(0x50000000), %bb.1(0x30000000)
-    liveins: $w0
-
-    dead $wzr = SUBSWri killed renamable $w0, 0, 0, implicit-def $nzcv
-    Bcc 12, %bb.2, implicit killed $nzcv
-
-  bb.1:
-    B %bb.4
-
-  bb.2.BB01:
-    successors: %bb.3(0x30000000), %bb.5(0x50000000)
-
-    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-    BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
-    ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-    CBNZW killed renamable $w0, %bb.5
-    B %bb.3
-
-  bb.3.BB02:
-    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-    BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
-    ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-    B %bb.5
-
-  bb.4.InfLoop:
-    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-    BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
-    ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-    B %bb.4
-
-  bb.5.exit:
-    RET_ReallyLR
-
-...
----
-name:            noshrink_test3
-alignment:       4
-tracksRegLiveness: true
-tracksDebugUserValues: true
-liveins:
-  - { reg: '$w0' }
-frameInfo:
-  maxAlignment:    1
-  adjustsStack:    true
-  hasCalls:        true
-  maxCallFrameSize: 0
-machineFunctionInfo: {}
-body:             |
-  ; CHECK-LABEL: name: noshrink_test3
-  ; CHECK: bb.0.BB00:
-  ; CHECK-NEXT:   successors: %bb.3(0x50000000), %bb.1(0x30000000)
-  ; CHECK-NEXT:   liveins: $w0
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w19 = COPY $w0
-  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
-  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   dead $wzr = SUBSWri killed renamable $w19, 0, 0, implicit-def $nzcv
-  ; CHECK-NEXT:   Bcc 12, %bb.3, implicit killed $nzcv
-  ; CHECK-NEXT:   B %bb.1
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.1.BB01:
-  ; CHECK-NEXT:   successors: %bb.2(0x00000800), %bb.6(0x7ffff800)
-  ; CHECK-NEXT:   liveins: $w0
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   CBNZW killed renamable $w0, %bb.6
-  ; CHECK-NEXT:   B %bb.2
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.2.BB01.1:
-  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   BL @abort, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
-  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.3.BB02:
-  ; CHECK-NEXT:   successors: %bb.4(0x30000000), %bb.5(0x50000000)
-  ; CHECK-NEXT:   liveins: $w0
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   CBNZW killed renamable $w0, %bb.5
-  ; CHECK-NEXT:   B %bb.4
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.4.BB03:
-  ; CHECK-NEXT:   successors: %bb.5(0x30000000), %bb.6(0x50000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
-  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   CBNZW killed renamable $w0, %bb.6
-  ; CHECK-NEXT:   B %bb.5
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.5.BB04:
-  ; CHECK-NEXT:   successors: %bb.6(0x80000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
-  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.6.exit:
-  ; CHECK-NEXT:   RET_ReallyLR
-  bb.0.BB00:
-    successors: %bb.3(0x50000000), %bb.1(0x30000000)
-    liveins: $w0
-
-    renamable $w19 = COPY $w0
-    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-    BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
-    ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-    dead $wzr = SUBSWri killed renamable $w19, 0, 0, implicit-def $nzcv
-    Bcc 12, %bb.3, implicit killed $nzcv
-    B %bb.1
-
-  bb.1.BB01:
-    successors: %bb.2(0x00000800), %bb.6(0x7ffff800)
-    liveins: $w0
-
-    CBNZW killed renamable $w0, %bb.6
-    B %bb.2
-
-  bb.2.BB01.1:
-
-    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-    BL @abort, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
-    ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-
-  bb.3.BB02:
-    successors: %bb.4(0x30000000), %bb.5(0x50000000)
-    liveins: $w0
-
-    CBNZW killed renamable $w0, %bb.5
-    B %bb.4
-
-  bb.4.BB03:
-    successors: %bb.5(0x30000000), %bb.6(0x50000000)
-
-    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-    BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
-    ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-    CBNZW killed renamable $w0, %bb.6
-    B %bb.5
-
-  bb.5.BB04:
-    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-    BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
-    ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-
-  bb.6.exit:
-    RET_ReallyLR
-
-...
----
-name:            noshrink_bb_as_inlineasmbr_target
-registers:       []
-liveins:
-  - { reg: '$w0', virtual-reg: '' }
-frameInfo:
-  savePoint:       ''
-  restorePoint:    ''
-body:             |
-  ; CHECK-LABEL: name: noshrink_bb_as_inlineasmbr_target
-  ; CHECK: bb.0.entry:
-  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.3(0x40000000)
-  ; CHECK-NEXT:   liveins: $w0
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   TBZW killed renamable $w0, 0, %bb.3
-  ; CHECK-NEXT:   B %bb.1
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   successors: %bb.2(0x80000000), %bb.3(0x00000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   INLINEASM_BR &"", 1 /* sideeffect attdialect */, 13 /* imm */, %bb.3
-  ; CHECK-NEXT:   B %bb.2
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.2 (%ir-block.1):
-  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   BL @dosomething, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
-  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.3.exit (machine-block-address-taken, inlineasm-br-indirect-target):
-  ; CHECK-NEXT:   RET_ReallyLR
-  bb.0.entry:
-    successors: %bb.1(0x40000000), %bb.3(0x40000000)
-    liveins: $w0
-
-    TBZW killed renamable $w0, 0, %bb.3
-    B %bb.1
-
-  bb.1 (%ir-block.0):
-    successors: %bb.2(0x80000000), %bb.3(0x00000000)
-
-    INLINEASM_BR &"", 1 /* sideeffect attdialect */, 13 /* imm */, %bb.3
-    B %bb.2
-
-  bb.2 (%ir-block.1):
-    successors: %bb.3(0x80000000)
-
-    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
-    BL @dosomething, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
-    ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-
-  bb.3.exit (machine-block-address-taken, inlineasm-br-indirect-target):
-    RET_ReallyLR
-
-...

diff  --git a/llvm/test/CodeGen/AArch64/taildup-cfi.ll b/llvm/test/CodeGen/AArch64/taildup-cfi.ll
index 4a87ceefbcf03..221503009cdb6 100644
--- a/llvm/test/CodeGen/AArch64/taildup-cfi.ll
+++ b/llvm/test/CodeGen/AArch64/taildup-cfi.ll
@@ -32,7 +32,7 @@ if.then:                                          ; preds = %entry
   store i32 0, ptr @f, align 4, !tbaa !2
   br label %if.end
 
-; DARWIN:             Merging into block
+; DARWIN-NOT:           Merging into block
 ; LINUX:    	      Merging into block
 
 if.end:                                           ; preds = %entry.if.end_crit_edge, %if.then

diff  --git a/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll b/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll
index e45985136cf34..050696ad653eb 100644
--- a/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll
+++ b/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll
@@ -5,11 +5,11 @@
 define i32 @add_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture readonly %arg2, ptr nocapture readonly %arg3) {
 ; CHECK-LE-LABEL: add_user:
 ; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    .save {r4, lr}
+; CHECK-LE-NEXT:    push {r4, lr}
 ; CHECK-LE-NEXT:    cmp r0, #1
 ; CHECK-LE-NEXT:    blt .LBB0_4
 ; CHECK-LE-NEXT:  @ %bb.1: @ %for.body.preheader
-; CHECK-LE-NEXT:    .save {r4, lr}
-; CHECK-LE-NEXT:    push {r4, lr}
 ; CHECK-LE-NEXT:    sub.w lr, r3, #2
 ; CHECK-LE-NEXT:    subs r2, #2
 ; CHECK-LE-NEXT:    mov.w r12, #0
@@ -22,23 +22,22 @@ define i32 @add_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture reado
 ; CHECK-LE-NEXT:    sxtah r1, r1, r3
 ; CHECK-LE-NEXT:    smlad r12, r4, r3, r12
 ; CHECK-LE-NEXT:    bne .LBB0_2
-; CHECK-LE-NEXT:  @ %bb.3:
-; CHECK-LE-NEXT:    pop.w {r4, lr}
+; CHECK-LE-NEXT:  @ %bb.3: @ %for.cond.cleanup
 ; CHECK-LE-NEXT:    add.w r0, r12, r1
-; CHECK-LE-NEXT:    bx lr
+; CHECK-LE-NEXT:    pop {r4, pc}
 ; CHECK-LE-NEXT:  .LBB0_4:
 ; CHECK-LE-NEXT:    mov.w r12, #0
 ; CHECK-LE-NEXT:    movs r1, #0
 ; CHECK-LE-NEXT:    add.w r0, r12, r1
-; CHECK-LE-NEXT:    bx lr
+; CHECK-LE-NEXT:    pop {r4, pc}
 ;
 ; CHECK-BE-LABEL: add_user:
 ; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    .save {r4, r5, r7, lr}
+; CHECK-BE-NEXT:    push {r4, r5, r7, lr}
 ; CHECK-BE-NEXT:    cmp r0, #1
 ; CHECK-BE-NEXT:    blt .LBB0_4
 ; CHECK-BE-NEXT:  @ %bb.1: @ %for.body.preheader
-; CHECK-BE-NEXT:    .save {r4, r5, r7, lr}
-; CHECK-BE-NEXT:    push {r4, r5, r7, lr}
 ; CHECK-BE-NEXT:    subs r3, #2
 ; CHECK-BE-NEXT:    subs r2, #2
 ; CHECK-BE-NEXT:    mov.w r12, #0
@@ -54,15 +53,14 @@ define i32 @add_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture reado
 ; CHECK-BE-NEXT:    ldrsh.w r4, [r3, #2]
 ; CHECK-BE-NEXT:    smlabb r12, r5, r4, r12
 ; CHECK-BE-NEXT:    bne .LBB0_2
-; CHECK-BE-NEXT:  @ %bb.3:
-; CHECK-BE-NEXT:    pop.w {r4, r5, r7, lr}
+; CHECK-BE-NEXT:  @ %bb.3: @ %for.cond.cleanup
 ; CHECK-BE-NEXT:    add.w r0, r12, r1
-; CHECK-BE-NEXT:    bx lr
+; CHECK-BE-NEXT:    pop {r4, r5, r7, pc}
 ; CHECK-BE-NEXT:  .LBB0_4:
 ; CHECK-BE-NEXT:    mov.w r12, #0
 ; CHECK-BE-NEXT:    movs r1, #0
 ; CHECK-BE-NEXT:    add.w r0, r12, r1
-; CHECK-BE-NEXT:    bx lr
+; CHECK-BE-NEXT:    pop {r4, r5, r7, pc}
 entry:
   %cmp24 = icmp sgt i32 %arg, 0
   br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup
@@ -107,11 +105,11 @@ for.body:
 define i32 @mul_bottom_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture readonly %arg2, ptr nocapture readonly %arg3) {
 ; CHECK-LE-LABEL: mul_bottom_user:
 ; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    .save {r4, lr}
+; CHECK-LE-NEXT:    push {r4, lr}
 ; CHECK-LE-NEXT:    cmp r0, #1
 ; CHECK-LE-NEXT:    blt .LBB1_4
 ; CHECK-LE-NEXT:  @ %bb.1: @ %for.body.preheader
-; CHECK-LE-NEXT:    .save {r4, lr}
-; CHECK-LE-NEXT:    push {r4, lr}
 ; CHECK-LE-NEXT:    sub.w lr, r3, #2
 ; CHECK-LE-NEXT:    subs r2, #2
 ; CHECK-LE-NEXT:    mov.w r12, #0
@@ -125,23 +123,22 @@ define i32 @mul_bottom_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocaptur
 ; CHECK-LE-NEXT:    sxth r3, r3
 ; CHECK-LE-NEXT:    mul r1, r3, r1
 ; CHECK-LE-NEXT:    bne .LBB1_2
-; CHECK-LE-NEXT:  @ %bb.3:
-; CHECK-LE-NEXT:    pop.w {r4, lr}
+; CHECK-LE-NEXT:  @ %bb.3: @ %for.cond.cleanup
 ; CHECK-LE-NEXT:    add.w r0, r12, r1
-; CHECK-LE-NEXT:    bx lr
+; CHECK-LE-NEXT:    pop {r4, pc}
 ; CHECK-LE-NEXT:  .LBB1_4:
 ; CHECK-LE-NEXT:    mov.w r12, #0
 ; CHECK-LE-NEXT:    movs r1, #0
 ; CHECK-LE-NEXT:    add.w r0, r12, r1
-; CHECK-LE-NEXT:    bx lr
+; CHECK-LE-NEXT:    pop {r4, pc}
 ;
 ; CHECK-BE-LABEL: mul_bottom_user:
 ; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    .save {r4, r5, r7, lr}
+; CHECK-BE-NEXT:    push {r4, r5, r7, lr}
 ; CHECK-BE-NEXT:    cmp r0, #1
 ; CHECK-BE-NEXT:    blt .LBB1_4
 ; CHECK-BE-NEXT:  @ %bb.1: @ %for.body.preheader
-; CHECK-BE-NEXT:    .save {r4, r5, r7, lr}
-; CHECK-BE-NEXT:    push {r4, r5, r7, lr}
 ; CHECK-BE-NEXT:    subs r3, #2
 ; CHECK-BE-NEXT:    subs r2, #2
 ; CHECK-BE-NEXT:    mov.w r12, #0
@@ -157,15 +154,14 @@ define i32 @mul_bottom_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocaptur
 ; CHECK-BE-NEXT:    ldrsh.w r4, [r3, #2]
 ; CHECK-BE-NEXT:    smlabb r12, r5, r4, r12
 ; CHECK-BE-NEXT:    bne .LBB1_2
-; CHECK-BE-NEXT:  @ %bb.3:
-; CHECK-BE-NEXT:    pop.w {r4, r5, r7, lr}
+; CHECK-BE-NEXT:  @ %bb.3: @ %for.cond.cleanup
 ; CHECK-BE-NEXT:    add.w r0, r12, r1
-; CHECK-BE-NEXT:    bx lr
+; CHECK-BE-NEXT:    pop {r4, r5, r7, pc}
 ; CHECK-BE-NEXT:  .LBB1_4:
 ; CHECK-BE-NEXT:    mov.w r12, #0
 ; CHECK-BE-NEXT:    movs r1, #0
 ; CHECK-BE-NEXT:    add.w r0, r12, r1
-; CHECK-BE-NEXT:    bx lr
+; CHECK-BE-NEXT:    pop {r4, r5, r7, pc}
 entry:
   %cmp24 = icmp sgt i32 %arg, 0
   br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup
@@ -210,11 +206,11 @@ for.body:
 define i32 @mul_top_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture readonly %arg2, ptr nocapture readonly %arg3) {
 ; CHECK-LE-LABEL: mul_top_user:
 ; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    .save {r4, lr}
+; CHECK-LE-NEXT:    push {r4, lr}
 ; CHECK-LE-NEXT:    cmp r0, #1
 ; CHECK-LE-NEXT:    blt .LBB2_4
 ; CHECK-LE-NEXT:  @ %bb.1: @ %for.body.preheader
-; CHECK-LE-NEXT:    .save {r4, lr}
-; CHECK-LE-NEXT:    push {r4, lr}
 ; CHECK-LE-NEXT:    subs r3, #2
 ; CHECK-LE-NEXT:    subs r2, #2
 ; CHECK-LE-NEXT:    mov.w r12, #0
@@ -228,23 +224,22 @@ define i32 @mul_top_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture r
 ; CHECK-LE-NEXT:    asr.w r4, r4, #16
 ; CHECK-LE-NEXT:    mul r1, r4, r1
 ; CHECK-LE-NEXT:    bne .LBB2_2
-; CHECK-LE-NEXT:  @ %bb.3:
-; CHECK-LE-NEXT:    pop.w {r4, lr}
+; CHECK-LE-NEXT:  @ %bb.3: @ %for.cond.cleanup
 ; CHECK-LE-NEXT:    add.w r0, r12, r1
-; CHECK-LE-NEXT:    bx lr
+; CHECK-LE-NEXT:    pop {r4, pc}
 ; CHECK-LE-NEXT:  .LBB2_4:
 ; CHECK-LE-NEXT:    mov.w r12, #0
 ; CHECK-LE-NEXT:    movs r1, #0
 ; CHECK-LE-NEXT:    add.w r0, r12, r1
-; CHECK-LE-NEXT:    bx lr
+; CHECK-LE-NEXT:    pop {r4, pc}
 ;
 ; CHECK-BE-LABEL: mul_top_user:
 ; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    .save {r4, lr}
+; CHECK-BE-NEXT:    push {r4, lr}
 ; CHECK-BE-NEXT:    cmp r0, #1
 ; CHECK-BE-NEXT:    blt .LBB2_4
 ; CHECK-BE-NEXT:  @ %bb.1: @ %for.body.preheader
-; CHECK-BE-NEXT:    .save {r4, lr}
-; CHECK-BE-NEXT:    push {r4, lr}
 ; CHECK-BE-NEXT:    subs r3, #2
 ; CHECK-BE-NEXT:    subs r2, #2
 ; CHECK-BE-NEXT:    mov.w r12, #0
@@ -260,15 +255,14 @@ define i32 @mul_top_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture r
 ; CHECK-BE-NEXT:    mul r1, r4, r1
 ; CHECK-BE-NEXT:    smlabb r12, r4, lr, r12
 ; CHECK-BE-NEXT:    bne .LBB2_2
-; CHECK-BE-NEXT:  @ %bb.3:
-; CHECK-BE-NEXT:    pop.w {r4, lr}
+; CHECK-BE-NEXT:  @ %bb.3: @ %for.cond.cleanup
 ; CHECK-BE-NEXT:    add.w r0, r12, r1
-; CHECK-BE-NEXT:    bx lr
+; CHECK-BE-NEXT:    pop {r4, pc}
 ; CHECK-BE-NEXT:  .LBB2_4:
 ; CHECK-BE-NEXT:    mov.w r12, #0
 ; CHECK-BE-NEXT:    movs r1, #0
 ; CHECK-BE-NEXT:    add.w r0, r12, r1
-; CHECK-BE-NEXT:    bx lr
+; CHECK-BE-NEXT:    pop {r4, pc}
 entry:
   %cmp24 = icmp sgt i32 %arg, 0
   br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup
@@ -313,11 +307,11 @@ for.body:
 define i32 @and_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture readonly %arg2, ptr nocapture readonly %arg3) {
 ; CHECK-LE-LABEL: and_user:
 ; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    .save {r4, lr}
+; CHECK-LE-NEXT:    push {r4, lr}
 ; CHECK-LE-NEXT:    cmp r0, #1
 ; CHECK-LE-NEXT:    blt .LBB3_4
 ; CHECK-LE-NEXT:  @ %bb.1: @ %for.body.preheader
-; CHECK-LE-NEXT:    .save {r4, lr}
-; CHECK-LE-NEXT:    push {r4, lr}
 ; CHECK-LE-NEXT:    sub.w lr, r3, #2
 ; CHECK-LE-NEXT:    subs r2, #2
 ; CHECK-LE-NEXT:    mov.w r12, #0
@@ -331,23 +325,22 @@ define i32 @and_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture reado
 ; CHECK-LE-NEXT:    uxth r3, r3
 ; CHECK-LE-NEXT:    mul r1, r3, r1
 ; CHECK-LE-NEXT:    bne .LBB3_2
-; CHECK-LE-NEXT:  @ %bb.3:
-; CHECK-LE-NEXT:    pop.w {r4, lr}
+; CHECK-LE-NEXT:  @ %bb.3: @ %for.cond.cleanup
 ; CHECK-LE-NEXT:    add.w r0, r12, r1
-; CHECK-LE-NEXT:    bx lr
+; CHECK-LE-NEXT:    pop {r4, pc}
 ; CHECK-LE-NEXT:  .LBB3_4:
 ; CHECK-LE-NEXT:    mov.w r12, #0
 ; CHECK-LE-NEXT:    movs r1, #0
 ; CHECK-LE-NEXT:    add.w r0, r12, r1
-; CHECK-LE-NEXT:    bx lr
+; CHECK-LE-NEXT:    pop {r4, pc}
 ;
 ; CHECK-BE-LABEL: and_user:
 ; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    .save {r4, r5, r7, lr}
+; CHECK-BE-NEXT:    push {r4, r5, r7, lr}
 ; CHECK-BE-NEXT:    cmp r0, #1
 ; CHECK-BE-NEXT:    blt .LBB3_4
 ; CHECK-BE-NEXT:  @ %bb.1: @ %for.body.preheader
-; CHECK-BE-NEXT:    .save {r4, r5, r7, lr}
-; CHECK-BE-NEXT:    push {r4, r5, r7, lr}
 ; CHECK-BE-NEXT:    subs r3, #2
 ; CHECK-BE-NEXT:    subs r2, #2
 ; CHECK-BE-NEXT:    mov.w r12, #0
@@ -363,15 +356,14 @@ define i32 @and_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture reado
 ; CHECK-BE-NEXT:    ldrsh.w r4, [r3, #2]
 ; CHECK-BE-NEXT:    smlabb r12, r5, r4, r12
 ; CHECK-BE-NEXT:    bne .LBB3_2
-; CHECK-BE-NEXT:  @ %bb.3:
-; CHECK-BE-NEXT:    pop.w {r4, r5, r7, lr}
+; CHECK-BE-NEXT:  @ %bb.3: @ %for.cond.cleanup
 ; CHECK-BE-NEXT:    add.w r0, r12, r1
-; CHECK-BE-NEXT:    bx lr
+; CHECK-BE-NEXT:    pop {r4, r5, r7, pc}
 ; CHECK-BE-NEXT:  .LBB3_4:
 ; CHECK-BE-NEXT:    mov.w r12, #0
 ; CHECK-BE-NEXT:    movs r1, #0
 ; CHECK-BE-NEXT:    add.w r0, r12, r1
-; CHECK-BE-NEXT:    bx lr
+; CHECK-BE-NEXT:    pop {r4, r5, r7, pc}
 entry:
   %cmp24 = icmp sgt i32 %arg, 0
   br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup

diff  --git a/llvm/test/CodeGen/ARM/code-placement.ll b/llvm/test/CodeGen/ARM/code-placement.ll
index 01d72f134aacb..7755ff53512ef 100644
--- a/llvm/test/CodeGen/ARM/code-placement.ll
+++ b/llvm/test/CodeGen/ARM/code-placement.ll
@@ -11,6 +11,7 @@ entry:
   br i1 %0, label %bb2, label %bb
 
 bb:
+; CHECK: LBB0_1:
 ; CHECK: LBB0_[[LABEL:[0-9]]]:
 ; CHECK: bne LBB0_[[LABEL]]
 ; CHECK-NOT: b LBB0_[[LABEL]]

diff  --git a/llvm/test/CodeGen/ARM/mbp.ll b/llvm/test/CodeGen/ARM/mbp.ll
index 4f96029e06b95..e7ab3860b52ac 100644
--- a/llvm/test/CodeGen/ARM/mbp.ll
+++ b/llvm/test/CodeGen/ARM/mbp.ll
@@ -1,4 +1,3 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
 ; RUN: llc < %s | FileCheck %s
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 target triple = "thumbv7-unknown-linux-gnueabihf"
@@ -7,50 +6,16 @@ target triple = "thumbv7-unknown-linux-gnueabihf"
 %List = type { i32, ptr }
 
 ; The entry block should be the first block of the function.
+; CHECK-LABEL: foo
+; CHECK:       %entry
+; CHECK:       %for.body
+; CHECK:       %for.inc
+; CHECK:       %if.then
+; CHECK:       %for.cond.i
+; CHECK:       %for.body.i
+; CHECK:       %return
 
 define i1 @foo(ptr %ha, i32 %he) !prof !39 {
-; CHECK-LABEL: foo:
-; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    ldr r2, [r0]
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    itt eq
-; CHECK-NEXT:    moveq r0, #0
-; CHECK-NEXT:    bxeq lr
-; CHECK-NEXT:  .LBB0_1: @ %for.body.preheader
-; CHECK-NEXT:    .save {r7, lr}
-; CHECK-NEXT:    push {r7, lr}
-; CHECK-NEXT:    b .LBB0_3
-; CHECK-NEXT:  .LBB0_2: @ %for.inc
-; CHECK-NEXT:    @ in Loop: Header=BB0_3 Depth=1
-; CHECK-NEXT:    ldr r2, [r2]
-; CHECK-NEXT:    movs r0, #0
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    it eq
-; CHECK-NEXT:    popeq {r7, pc}
-; CHECK-NEXT:  .LBB0_3: @ %for.body
-; CHECK-NEXT:    @ =>This Loop Header: Depth=1
-; CHECK-NEXT:    @ Child Loop BB0_5 Depth 2
-; CHECK-NEXT:    ldr r0, [r2, #4]
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    beq .LBB0_2
-; CHECK-NEXT:  @ %bb.4: @ %if.then
-; CHECK-NEXT:    @ in Loop: Header=BB0_3 Depth=1
-; CHECK-NEXT:    ldrd r3, r0, [r0]
-; CHECK-NEXT:    sub.w r12, r0, #4
-; CHECK-NEXT:  .LBB0_5: @ %for.cond.i
-; CHECK-NEXT:    @ Parent Loop BB0_3 Depth=1
-; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
-; CHECK-NEXT:    cmp r3, #1
-; CHECK-NEXT:    blt .LBB0_2
-; CHECK-NEXT:  @ %bb.6: @ %for.body.i
-; CHECK-NEXT:    @ in Loop: Header=BB0_5 Depth=2
-; CHECK-NEXT:    ldr.w lr, [r12, r3, lsl #2]
-; CHECK-NEXT:    subs r3, #1
-; CHECK-NEXT:    movs r0, #1
-; CHECK-NEXT:    cmp lr, r1
-; CHECK-NEXT:    bne .LBB0_5
-; CHECK-NEXT:  @ %bb.7:
-; CHECK-NEXT:    pop {r7, pc}
 entry:
   %TargetPtr = load ptr, ptr %ha, align 4
   %cmp1 = icmp eq ptr %TargetPtr, null

diff  --git a/llvm/test/CodeGen/ARM/ssat-unroll-loops.ll b/llvm/test/CodeGen/ARM/ssat-unroll-loops.ll
index c9724674afd82..2755d354a6244 100644
--- a/llvm/test/CodeGen/ARM/ssat-unroll-loops.ll
+++ b/llvm/test/CodeGen/ARM/ssat-unroll-loops.ll
@@ -6,11 +6,11 @@
 define void @ssat_unroll(ptr %pSrcA, ptr %pSrcB, ptr %pDst, i32 %blockSize) {
 ; CHECK-LABEL: ssat_unroll:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    bxeq lr
-; CHECK-NEXT:  .LBB0_1: @ %while.body.preheader
 ; CHECK-NEXT:    .save {r11, lr}
 ; CHECK-NEXT:    push {r11, lr}
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    beq .LBB0_5
+; CHECK-NEXT:  @ %bb.1: @ %while.body.preheader
 ; CHECK-NEXT:    sub r12, r3, #1
 ; CHECK-NEXT:    tst r3, #1
 ; CHECK-NEXT:    beq .LBB0_3
@@ -23,7 +23,7 @@ define void @ssat_unroll(ptr %pSrcA, ptr %pSrcB, ptr %pDst, i32 %blockSize) {
 ; CHECK-NEXT:    mov r3, r12
 ; CHECK-NEXT:  .LBB0_3: @ %while.body.prol.loopexit
 ; CHECK-NEXT:    cmp r12, #0
-; CHECK-NEXT:    beq .LBB0_5
+; CHECK-NEXT:    popeq {r11, pc}
 ; CHECK-NEXT:  .LBB0_4: @ %while.body
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    ldrsh r12, [r0]
@@ -41,9 +41,8 @@ define void @ssat_unroll(ptr %pSrcA, ptr %pSrcB, ptr %pDst, i32 %blockSize) {
 ; CHECK-NEXT:    strh r12, [r2, #2]
 ; CHECK-NEXT:    add r2, r2, #4
 ; CHECK-NEXT:    bne .LBB0_4
-; CHECK-NEXT:  .LBB0_5:
-; CHECK-NEXT:    pop {r11, lr}
-; CHECK-NEXT:    bx lr
+; CHECK-NEXT:  .LBB0_5: @ %while.end
+; CHECK-NEXT:    pop {r11, pc}
 entry:
   %cmp.not7 = icmp eq i32 %blockSize, 0
   br i1 %cmp.not7, label %while.end, label %while.body.preheader
@@ -126,11 +125,11 @@ while.end:                                        ; preds = %while.body, %while.
 define void @ssat_unroll_minmax(ptr nocapture readonly %pSrcA, ptr nocapture readonly %pSrcB, ptr nocapture writeonly %pDst, i32 %blockSize) {
 ; CHECK-LABEL: ssat_unroll_minmax:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    bxeq lr
-; CHECK-NEXT:  .LBB1_1: @ %while.body.preheader
 ; CHECK-NEXT:    .save {r11, lr}
 ; CHECK-NEXT:    push {r11, lr}
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    beq .LBB1_5
+; CHECK-NEXT:  @ %bb.1: @ %while.body.preheader
 ; CHECK-NEXT:    sub r12, r3, #1
 ; CHECK-NEXT:    tst r3, #1
 ; CHECK-NEXT:    beq .LBB1_3
@@ -143,7 +142,7 @@ define void @ssat_unroll_minmax(ptr nocapture readonly %pSrcA, ptr nocapture rea
 ; CHECK-NEXT:    mov r3, r12
 ; CHECK-NEXT:  .LBB1_3: @ %while.body.prol.loopexit
 ; CHECK-NEXT:    cmp r12, #0
-; CHECK-NEXT:    beq .LBB1_5
+; CHECK-NEXT:    popeq {r11, pc}
 ; CHECK-NEXT:  .LBB1_4: @ %while.body
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    ldrsh r12, [r0]
@@ -161,9 +160,8 @@ define void @ssat_unroll_minmax(ptr nocapture readonly %pSrcA, ptr nocapture rea
 ; CHECK-NEXT:    strh r12, [r2, #2]
 ; CHECK-NEXT:    add r2, r2, #4
 ; CHECK-NEXT:    bne .LBB1_4
-; CHECK-NEXT:  .LBB1_5:
-; CHECK-NEXT:    pop {r11, lr}
-; CHECK-NEXT:    bx lr
+; CHECK-NEXT:  .LBB1_5: @ %while.end
+; CHECK-NEXT:    pop {r11, pc}
 entry:
   %cmp.not7 = icmp eq i32 %blockSize, 0
   br i1 %cmp.not7, label %while.end, label %while.body.preheader

diff  --git a/llvm/test/CodeGen/PowerPC/common-chain-aix32.ll b/llvm/test/CodeGen/PowerPC/common-chain-aix32.ll
index 35ddcfd9ba6d6..0cf7119eab84c 100644
--- a/llvm/test/CodeGen/PowerPC/common-chain-aix32.ll
+++ b/llvm/test/CodeGen/PowerPC/common-chain-aix32.ll
@@ -39,19 +39,19 @@ define i64 @two_chain_same_offset_succ_i32(ptr %p, i32 %offset, i32 %base1, i64
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    cmplwi r6, 0
 ; CHECK-NEXT:    cmpwi cr1, r6, 0
+; CHECK-NEXT:    stw r30, -8(r1) # 4-byte Folded Spill
+; CHECK-NEXT:    stw r31, -4(r1) # 4-byte Folded Spill
 ; CHECK-NEXT:    crandc 4*cr5+lt, 4*cr1+lt, eq
 ; CHECK-NEXT:    cmpwi cr1, r7, 0
-; CHECK-NEXT:    bc 12, 4*cr5+lt, L..BB0_6
+; CHECK-NEXT:    bc 12, 4*cr5+lt, L..BB0_5
 ; CHECK-NEXT:  # %bb.1: # %entry
 ; CHECK-NEXT:    crand 4*cr5+lt, eq, 4*cr1+eq
-; CHECK-NEXT:    bc 12, 4*cr5+lt, L..BB0_6
+; CHECK-NEXT:    bc 12, 4*cr5+lt, L..BB0_5
 ; CHECK-NEXT:  # %bb.2: # %for.body.preheader
 ; CHECK-NEXT:    slwi r8, r4, 1
 ; CHECK-NEXT:    li r10, 0
 ; CHECK-NEXT:    li r11, 0
-; CHECK-NEXT:    stw r30, -8(r1) # 4-byte Folded Spill
 ; CHECK-NEXT:    add r8, r4, r8
-; CHECK-NEXT:    stw r31, -4(r1) # 4-byte Folded Spill
 ; CHECK-NEXT:    add r9, r5, r8
 ; CHECK-NEXT:    add r5, r5, r4
 ; CHECK-NEXT:    add r8, r3, r5
@@ -83,15 +83,15 @@ define i64 @two_chain_same_offset_succ_i32(ptr %p, i32 %offset, i32 %base1, i64
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    crand 4*cr5+lt, eq, 4*cr1+lt
 ; CHECK-NEXT:    bc 12, 4*cr5+lt, L..BB0_3
-; CHECK-NEXT:  # %bb.5:
+; CHECK-NEXT:    b L..BB0_6
+; CHECK-NEXT:  L..BB0_5:
+; CHECK-NEXT:    li r3, 0
+; CHECK-NEXT:    li r5, 0
+; CHECK-NEXT:  L..BB0_6: # %for.cond.cleanup
 ; CHECK-NEXT:    lwz r31, -4(r1) # 4-byte Folded Reload
 ; CHECK-NEXT:    lwz r30, -8(r1) # 4-byte Folded Reload
 ; CHECK-NEXT:    mr r4, r5
 ; CHECK-NEXT:    blr
-; CHECK-NEXT:  L..BB0_6:
-; CHECK-NEXT:    li r3, 0
-; CHECK-NEXT:    li r4, 0
-; CHECK-NEXT:    blr
 entry:
   %add = add nsw i32 %base1, %offset
   %mul = shl nsw i32 %offset, 1

diff  --git a/llvm/test/CodeGen/PowerPC/common-chain.ll b/llvm/test/CodeGen/PowerPC/common-chain.ll
index 5f8c21e30f8fd..ea8a72e7d11e1 100644
--- a/llvm/test/CodeGen/PowerPC/common-chain.ll
+++ b/llvm/test/CodeGen/PowerPC/common-chain.ll
@@ -137,14 +137,14 @@ define i64 @not_perfect_chain_all_same_offset_fail(ptr %p, i64 %offset, i64 %bas
 ; CHECK-LABEL: not_perfect_chain_all_same_offset_fail:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    cmpdi r6, 0
+; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    ble cr0, .LBB1_4
 ; CHECK-NEXT:  # %bb.1: # %for.body.preheader
-; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    sldi r7, r4, 1
+; CHECK-NEXT:    sldi r9, r4, 2
 ; CHECK-NEXT:    add r5, r3, r5
 ; CHECK-NEXT:    li r3, 0
 ; CHECK-NEXT:    add r8, r4, r7
-; CHECK-NEXT:    sldi r9, r4, 2
 ; CHECK-NEXT:    mtctr r6
 ; CHECK-NEXT:    add r10, r4, r9
 ; CHECK-NEXT:    .p2align 4
@@ -161,11 +161,12 @@ define i64 @not_perfect_chain_all_same_offset_fail(ptr %p, i64 %offset, i64 %bas
 ; CHECK-NEXT:    mulld r6, r6, r0
 ; CHECK-NEXT:    maddld r3, r6, r30, r3
 ; CHECK-NEXT:    bdnz .LBB1_2
-; CHECK-NEXT:  # %bb.3:
+; CHECK-NEXT:  # %bb.3: # %for.cond.cleanup
 ; CHECK-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    blr
 ; CHECK-NEXT:  .LBB1_4:
 ; CHECK-NEXT:    li r3, 0
+; CHECK-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    blr
 entry:
   %mul = shl nsw i64 %offset, 1
@@ -424,20 +425,20 @@ define i64 @not_same_offset_fail(ptr %p, i64 %offset, i64 %base1, i64 %n) {
 ; CHECK-LABEL: not_same_offset_fail:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    cmpdi r6, 0
-; CHECK-NEXT:    ble cr0, .LBB4_4
-; CHECK-NEXT:  # %bb.1: # %for.body.preheader
 ; CHECK-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    add r5, r3, r5
-; CHECK-NEXT:    li r3, 0
 ; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    mtctr r6
+; CHECK-NEXT:    ble cr0, .LBB4_3
+; CHECK-NEXT:  # %bb.1: # %for.body.preheader
 ; CHECK-NEXT:    mulli r11, r4, 10
 ; CHECK-NEXT:    sldi r8, r4, 2
+; CHECK-NEXT:    add r5, r3, r5
+; CHECK-NEXT:    li r3, 0
 ; CHECK-NEXT:    add r8, r4, r8
 ; CHECK-NEXT:    sldi r9, r4, 3
-; CHECK-NEXT:    sub r10, r9, r4
+; CHECK-NEXT:    mtctr r6
 ; CHECK-NEXT:    sldi r7, r4, 1
+; CHECK-NEXT:    sub r10, r9, r4
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB4_2: # %for.body
 ; CHECK-NEXT:    #
@@ -454,14 +455,14 @@ define i64 @not_same_offset_fail(ptr %p, i64 %offset, i64 %base1, i64 %n) {
 ; CHECK-NEXT:    mulld r6, r6, r29
 ; CHECK-NEXT:    maddld r3, r6, r28, r3
 ; CHECK-NEXT:    bdnz .LBB4_2
-; CHECK-NEXT:  # %bb.3:
+; CHECK-NEXT:    b .LBB4_4
+; CHECK-NEXT:  .LBB4_3:
+; CHECK-NEXT:    li r3, 0
+; CHECK-NEXT:  .LBB4_4: # %for.cond.cleanup
 ; CHECK-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    blr
-; CHECK-NEXT:  .LBB4_4:
-; CHECK-NEXT:    li r3, 0
-; CHECK-NEXT:    blr
 entry:
   %mul = shl nsw i64 %offset, 1
   %mul2 = mul nsw i64 %offset, 5

diff  --git a/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll b/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll
index 37baef6043884..769b358131e9a 100644
--- a/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll
+++ b/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll
@@ -192,21 +192,21 @@ define i64 @test_max_number_reminder(ptr %arg, i32 signext %arg1) {
 ; CHECK-LABEL: test_max_number_reminder:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    cmplwi r4, 0
-; CHECK-NEXT:    beq cr0, .LBB2_4
+; CHECK-NEXT:    std r25, -56(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r26, -48(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r27, -40(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    beq cr0, .LBB2_3
 ; CHECK-NEXT:  # %bb.1: # %bb3.preheader
 ; CHECK-NEXT:    cmpldi r4, 1
 ; CHECK-NEXT:    li r5, 1
 ; CHECK-NEXT:    addi r9, r3, 4002
-; CHECK-NEXT:    std r25, -56(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    li r6, -1
-; CHECK-NEXT:    std r26, -48(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    li r7, 3
 ; CHECK-NEXT:    li r8, 5
 ; CHECK-NEXT:    li r10, 9
-; CHECK-NEXT:    std r27, -40(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    iselgt r3, r4, r5
 ; CHECK-NEXT:    mtctr r3
 ; CHECK-NEXT:    li r3, 0
@@ -232,7 +232,10 @@ define i64 @test_max_number_reminder(ptr %arg, i32 signext %arg1) {
 ; CHECK-NEXT:    mulld r11, r11, r26
 ; CHECK-NEXT:    maddld r3, r11, r25, r3
 ; CHECK-NEXT:    bdnz .LBB2_2
-; CHECK-NEXT:  # %bb.3:
+; CHECK-NEXT:    b .LBB2_4
+; CHECK-NEXT:  .LBB2_3:
+; CHECK-NEXT:    li r3, 0
+; CHECK-NEXT:  .LBB2_4: # %bb45
 ; CHECK-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
@@ -241,9 +244,6 @@ define i64 @test_max_number_reminder(ptr %arg, i32 signext %arg1) {
 ; CHECK-NEXT:    ld r26, -48(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r25, -56(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    blr
-; CHECK-NEXT:  .LBB2_4:
-; CHECK-NEXT:    addi r3, r4, 0
-; CHECK-NEXT:    blr
 bb:
   %i = sext i32 %arg1 to i64
   %i2 = icmp eq i32 %arg1, 0
@@ -475,11 +475,11 @@ define dso_local i64 @test_ds_multiple_chains(ptr %arg, ptr %arg1, i32 signext %
 ; CHECK-LABEL: test_ds_multiple_chains:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    cmplwi r5, 0
-; CHECK-NEXT:    beq cr0, .LBB5_4
+; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    beq cr0, .LBB5_3
 ; CHECK-NEXT:  # %bb.1: # %bb4.preheader
 ; CHECK-NEXT:    cmpldi r5, 1
 ; CHECK-NEXT:    li r6, 1
-; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    addi r3, r3, 4001
 ; CHECK-NEXT:    addi r4, r4, 4001
 ; CHECK-NEXT:    li r7, 9
@@ -507,13 +507,13 @@ define dso_local i64 @test_ds_multiple_chains(ptr %arg, ptr %arg1, i32 signext %
 ; CHECK-NEXT:    mulld r8, r8, r30
 ; CHECK-NEXT:    maddld r6, r8, r9, r6
 ; CHECK-NEXT:    bdnz .LBB5_2
-; CHECK-NEXT:  # %bb.3:
+; CHECK-NEXT:    b .LBB5_4
+; CHECK-NEXT:  .LBB5_3:
+; CHECK-NEXT:    li r6, 0
+; CHECK-NEXT:  .LBB5_4: # %bb43
 ; CHECK-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    add r3, r6, r5
 ; CHECK-NEXT:    blr
-; CHECK-NEXT:  .LBB5_4:
-; CHECK-NEXT:    addi r3, r5, 0
-; CHECK-NEXT:    blr
 bb:
   %i = sext i32 %arg2 to i64
   %i3 = icmp eq i32 %arg2, 0
@@ -595,17 +595,17 @@ define i64 @test_ds_cross_basic_blocks(ptr %arg, i32 signext %arg1) {
 ; CHECK-LABEL: test_ds_cross_basic_blocks:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    cmplwi r4, 0
-; CHECK-NEXT:    beq cr0, .LBB6_9
+; CHECK-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    beq cr0, .LBB6_8
 ; CHECK-NEXT:  # %bb.1: # %bb3
 ; CHECK-NEXT:    addis r5, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    cmpldi r4, 1
 ; CHECK-NEXT:    li r7, 1
 ; CHECK-NEXT:    addi r6, r3, 4009
-; CHECK-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    ld r5, .LC0 at toc@l(r5)
 ; CHECK-NEXT:    iselgt r3, r4, r7
-; CHECK-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    li r4, -7
 ; CHECK-NEXT:    li r8, -6
 ; CHECK-NEXT:    li r9, 1
@@ -634,7 +634,7 @@ define i64 @test_ds_cross_basic_blocks(ptr %arg, i32 signext %arg1) {
 ; CHECK-NEXT:    mulld r0, r0, r10
 ; CHECK-NEXT:    mulld r0, r0, r9
 ; CHECK-NEXT:    maddld r3, r0, r7, r3
-; CHECK-NEXT:    bdz .LBB6_8
+; CHECK-NEXT:    bdz .LBB6_9
 ; CHECK-NEXT:  .LBB6_4: # %bb5
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lbzu r0, 1(r5)
@@ -666,13 +666,12 @@ define i64 @test_ds_cross_basic_blocks(ptr %arg, i32 signext %arg1) {
 ; CHECK-NEXT:    add r7, r0, r7
 ; CHECK-NEXT:    b .LBB6_3
 ; CHECK-NEXT:  .LBB6_8:
+; CHECK-NEXT:    li r3, 0
+; CHECK-NEXT:  .LBB6_9: # %bb64
 ; CHECK-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    blr
-; CHECK-NEXT:  .LBB6_9:
-; CHECK-NEXT:    li r3, 0
-; CHECK-NEXT:    blr
 bb:
   %i = sext i32 %arg1 to i64
   %i2 = icmp eq i32 %arg1, 0

diff  --git a/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll b/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll
index 79f2ef3e3746a..b91f20b710a2d 100644
--- a/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll
+++ b/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll
@@ -6,24 +6,24 @@ define void @foo(ptr readonly %0, ptr %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6
 ; CHECK-LABEL: foo:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    cmpd 5, 7
-; CHECK-NEXT:    bgelr 0
-; CHECK-NEXT:  # %bb.1: # %.preheader
+; CHECK-NEXT:    std 22, -80(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 23, -72(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 24, -64(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 25, -56(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 26, -48(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std 27, -40(1) # 8-byte Folded Spill
-; CHECK-NEXT:    addi 27, 5, 2
 ; CHECK-NEXT:    std 28, -32(1) # 8-byte Folded Spill
-; CHECK-NEXT:    addi 28, 5, 3
+; CHECK-NEXT:    std 29, -24(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std 30, -16(1) # 8-byte Folded Spill
+; CHECK-NEXT:    bge 0, .LBB0_6
+; CHECK-NEXT:  # %bb.1: # %.preheader
 ; CHECK-NEXT:    addi 30, 5, 1
+; CHECK-NEXT:    addi 28, 5, 3
+; CHECK-NEXT:    addi 27, 5, 2
 ; CHECK-NEXT:    mulld 12, 8, 5
-; CHECK-NEXT:    mulld 0, 9, 8
-; CHECK-NEXT:    std 29, -24(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    addi 29, 3, 16
+; CHECK-NEXT:    mulld 0, 9, 8
 ; CHECK-NEXT:    sldi 11, 10, 3
-; CHECK-NEXT:    std 22, -80(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 23, -72(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 24, -64(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 25, -56(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 26, -48(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    mulld 30, 8, 30
 ; CHECK-NEXT:    mulld 28, 8, 28
 ; CHECK-NEXT:    mulld 8, 8, 27

diff  --git a/llvm/test/CodeGen/PowerPC/shrink-wrap.ll b/llvm/test/CodeGen/PowerPC/shrink-wrap.ll
index 12d0b056ca886..08c391e34c6f4 100644
--- a/llvm/test/CodeGen/PowerPC/shrink-wrap.ll
+++ b/llvm/test/CodeGen/PowerPC/shrink-wrap.ll
@@ -7,9 +7,6 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) {
 ; POWERPC64-LABEL: shrinkwrapme:
 ; POWERPC64:       # %bb.0: # %entry
 ; POWERPC64-NEXT:    cmpwi 4, 0
-; POWERPC64-NEXT:    ble 0, .LBB0_4
-; POWERPC64-NEXT:  # %bb.1: # %for.body.preheader
-; POWERPC64-NEXT:    addi 4, 4, -1
 ; POWERPC64-NEXT:    std 14, -144(1) # 8-byte Folded Spill
 ; POWERPC64-NEXT:    std 15, -136(1) # 8-byte Folded Spill
 ; POWERPC64-NEXT:    std 16, -128(1) # 8-byte Folded Spill
@@ -25,11 +22,14 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) {
 ; POWERPC64-NEXT:    std 26, -48(1) # 8-byte Folded Spill
 ; POWERPC64-NEXT:    std 27, -40(1) # 8-byte Folded Spill
 ; POWERPC64-NEXT:    std 28, -32(1) # 8-byte Folded Spill
-; POWERPC64-NEXT:    clrldi 4, 4, 32
-; POWERPC64-NEXT:    addi 4, 4, 1
 ; POWERPC64-NEXT:    std 29, -24(1) # 8-byte Folded Spill
 ; POWERPC64-NEXT:    std 30, -16(1) # 8-byte Folded Spill
 ; POWERPC64-NEXT:    std 31, -8(1) # 8-byte Folded Spill
+; POWERPC64-NEXT:    ble 0, .LBB0_3
+; POWERPC64-NEXT:  # %bb.1: # %for.body.preheader
+; POWERPC64-NEXT:    addi 4, 4, -1
+; POWERPC64-NEXT:    clrldi 4, 4, 32
+; POWERPC64-NEXT:    addi 4, 4, 1
 ; POWERPC64-NEXT:    mtctr 4
 ; POWERPC64-NEXT:    li 4, 0
 ; POWERPC64-NEXT:    .p2align 4
@@ -39,7 +39,10 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) {
 ; POWERPC64-NEXT:    add 4, 3, 4
 ; POWERPC64-NEXT:    #NO_APP
 ; POWERPC64-NEXT:    bdnz .LBB0_2
-; POWERPC64-NEXT:  # %bb.3:
+; POWERPC64-NEXT:    b .LBB0_4
+; POWERPC64-NEXT:  .LBB0_3:
+; POWERPC64-NEXT:    li 4, 0
+; POWERPC64-NEXT:  .LBB0_4: # %for.cond.cleanup
 ; POWERPC64-NEXT:    ld 31, -8(1) # 8-byte Folded Reload
 ; POWERPC64-NEXT:    ld 30, -16(1) # 8-byte Folded Reload
 ; POWERPC64-NEXT:    ld 29, -24(1) # 8-byte Folded Reload
@@ -60,16 +63,10 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) {
 ; POWERPC64-NEXT:    ld 15, -136(1) # 8-byte Folded Reload
 ; POWERPC64-NEXT:    ld 14, -144(1) # 8-byte Folded Reload
 ; POWERPC64-NEXT:    blr
-; POWERPC64-NEXT:  .LBB0_4:
-; POWERPC64-NEXT:    li 4, 0
-; POWERPC64-NEXT:    extsw 3, 4
-; POWERPC64-NEXT:    blr
 ;
 ; POWERPC32-AIX-LABEL: shrinkwrapme:
 ; POWERPC32-AIX:       # %bb.0: # %entry
 ; POWERPC32-AIX-NEXT:    cmpwi 4, 0
-; POWERPC32-AIX-NEXT:    ble 0, L..BB0_4
-; POWERPC32-AIX-NEXT:  # %bb.1: # %for.body.preheader
 ; POWERPC32-AIX-NEXT:    stw 14, -72(1) # 4-byte Folded Spill
 ; POWERPC32-AIX-NEXT:    stw 15, -68(1) # 4-byte Folded Spill
 ; POWERPC32-AIX-NEXT:    stw 16, -64(1) # 4-byte Folded Spill
@@ -88,6 +85,8 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) {
 ; POWERPC32-AIX-NEXT:    stw 29, -12(1) # 4-byte Folded Spill
 ; POWERPC32-AIX-NEXT:    stw 30, -8(1) # 4-byte Folded Spill
 ; POWERPC32-AIX-NEXT:    stw 31, -4(1) # 4-byte Folded Spill
+; POWERPC32-AIX-NEXT:    ble 0, L..BB0_3
+; POWERPC32-AIX-NEXT:  # %bb.1: # %for.body.preheader
 ; POWERPC32-AIX-NEXT:    mtctr 4
 ; POWERPC32-AIX-NEXT:    li 4, 0
 ; POWERPC32-AIX-NEXT:    .align 4
@@ -97,7 +96,10 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) {
 ; POWERPC32-AIX-NEXT:    add 4, 3, 4
 ; POWERPC32-AIX-NEXT:    #NO_APP
 ; POWERPC32-AIX-NEXT:    bdnz L..BB0_2
-; POWERPC32-AIX-NEXT:  # %bb.3:
+; POWERPC32-AIX-NEXT:    b L..BB0_4
+; POWERPC32-AIX-NEXT:  L..BB0_3:
+; POWERPC32-AIX-NEXT:    li 4, 0
+; POWERPC32-AIX-NEXT:  L..BB0_4: # %for.cond.cleanup
 ; POWERPC32-AIX-NEXT:    lwz 31, -4(1) # 4-byte Folded Reload
 ; POWERPC32-AIX-NEXT:    lwz 30, -8(1) # 4-byte Folded Reload
 ; POWERPC32-AIX-NEXT:    lwz 29, -12(1) # 4-byte Folded Reload
@@ -118,16 +120,10 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) {
 ; POWERPC32-AIX-NEXT:    lwz 15, -68(1) # 4-byte Folded Reload
 ; POWERPC32-AIX-NEXT:    lwz 14, -72(1) # 4-byte Folded Reload
 ; POWERPC32-AIX-NEXT:    blr
-; POWERPC32-AIX-NEXT:  L..BB0_4:
-; POWERPC32-AIX-NEXT:    li 3, 0
-; POWERPC32-AIX-NEXT:    blr
 ;
 ; POWERPC64-AIX-LABEL: shrinkwrapme:
 ; POWERPC64-AIX:       # %bb.0: # %entry
 ; POWERPC64-AIX-NEXT:    cmpwi 4, 1
-; POWERPC64-AIX-NEXT:    blt 0, L..BB0_4
-; POWERPC64-AIX-NEXT:  # %bb.1: # %for.body.preheader
-; POWERPC64-AIX-NEXT:    addi 4, 4, -1
 ; POWERPC64-AIX-NEXT:    std 14, -144(1) # 8-byte Folded Spill
 ; POWERPC64-AIX-NEXT:    std 15, -136(1) # 8-byte Folded Spill
 ; POWERPC64-AIX-NEXT:    std 16, -128(1) # 8-byte Folded Spill
@@ -143,11 +139,14 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) {
 ; POWERPC64-AIX-NEXT:    std 26, -48(1) # 8-byte Folded Spill
 ; POWERPC64-AIX-NEXT:    std 27, -40(1) # 8-byte Folded Spill
 ; POWERPC64-AIX-NEXT:    std 28, -32(1) # 8-byte Folded Spill
-; POWERPC64-AIX-NEXT:    clrldi 4, 4, 32
-; POWERPC64-AIX-NEXT:    addi 4, 4, 1
 ; POWERPC64-AIX-NEXT:    std 29, -24(1) # 8-byte Folded Spill
 ; POWERPC64-AIX-NEXT:    std 30, -16(1) # 8-byte Folded Spill
 ; POWERPC64-AIX-NEXT:    std 31, -8(1) # 8-byte Folded Spill
+; POWERPC64-AIX-NEXT:    blt 0, L..BB0_3
+; POWERPC64-AIX-NEXT:  # %bb.1: # %for.body.preheader
+; POWERPC64-AIX-NEXT:    addi 4, 4, -1
+; POWERPC64-AIX-NEXT:    clrldi 4, 4, 32
+; POWERPC64-AIX-NEXT:    addi 4, 4, 1
 ; POWERPC64-AIX-NEXT:    mtctr 4
 ; POWERPC64-AIX-NEXT:    li 4, 0
 ; POWERPC64-AIX-NEXT:    .align 4
@@ -157,7 +156,10 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) {
 ; POWERPC64-AIX-NEXT:    add 4, 3, 4
 ; POWERPC64-AIX-NEXT:    #NO_APP
 ; POWERPC64-AIX-NEXT:    bdnz L..BB0_2
-; POWERPC64-AIX-NEXT:  # %bb.3:
+; POWERPC64-AIX-NEXT:    b L..BB0_4
+; POWERPC64-AIX-NEXT:  L..BB0_3:
+; POWERPC64-AIX-NEXT:    li 4, 0
+; POWERPC64-AIX-NEXT:  L..BB0_4: # %for.cond.cleanup
 ; POWERPC64-AIX-NEXT:    ld 31, -8(1) # 8-byte Folded Reload
 ; POWERPC64-AIX-NEXT:    ld 30, -16(1) # 8-byte Folded Reload
 ; POWERPC64-AIX-NEXT:    ld 29, -24(1) # 8-byte Folded Reload
@@ -178,10 +180,6 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) {
 ; POWERPC64-AIX-NEXT:    ld 15, -136(1) # 8-byte Folded Reload
 ; POWERPC64-AIX-NEXT:    ld 14, -144(1) # 8-byte Folded Reload
 ; POWERPC64-AIX-NEXT:    blr
-; POWERPC64-AIX-NEXT:  L..BB0_4:
-; POWERPC64-AIX-NEXT:    li 4, 0
-; POWERPC64-AIX-NEXT:    extsw 3, 4
-; POWERPC64-AIX-NEXT:    blr
 entry:
   %cmp5 = icmp sgt i32 %lim, 0
   br i1 %cmp5, label %for.body.preheader, label %for.cond.cleanup

diff  --git a/llvm/test/CodeGen/PowerPC/shrink-wrap.mir b/llvm/test/CodeGen/PowerPC/shrink-wrap.mir
index 561b193086bf5..1b6ccb92527e7 100644
--- a/llvm/test/CodeGen/PowerPC/shrink-wrap.mir
+++ b/llvm/test/CodeGen/PowerPC/shrink-wrap.mir
@@ -48,7 +48,42 @@
 ...
 ---
 name:            shrinkwrapme
+alignment:       16
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
 tracksRegLiveness: true
+hasWinCFI:       false
+registers:       []
+liveins:
+  - { reg: '$x3', virtual-reg: '' }
+  - { reg: '$x4', virtual-reg: '' }
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    0
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  maxCallFrameSize: 4294967295
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           []
+callSites:       []
+constants:       []
+machineFunctionInfo: {}
 body:             |
   ; CHECK-LABEL: name: shrinkwrapme
   ; CHECK: bb.0.entry:
@@ -82,17 +117,11 @@ body:             |
   ; CHECK-NEXT:   BLR8 implicit $lr8, implicit $rm, implicit $x3
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.4.for.body:
-  ; CHECK-NEXT:   successors: %bb.4(0x7c000000), %bb.5(0x04000000)
+  ; CHECK-NEXT:   successors: %bb.4(0x7c000000), %bb.3(0x04000000)
   ; CHECK-NEXT:   liveins: $r4, $x3
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   INLINEASM &"add $0, $1, $2", 0 /* attdialect */, 131082 /* regdef:GPRC */, def renamable $r4, 131081 /* reguse:GPRC */, renamable $r3, 131081 /* reguse:GPRC */, killed renamable $r4, 12 /* clobber */, implicit-def dead early-clobber $r14, 12 /* clobber */, implicit-def dead early-clobber $r15, 12 /* clobber */, implicit-def dead early-clobber $r16, 12 /* clobber */, implicit-def dead early-clobber $r17, 12 /* clobber */, implicit-def dead early-clobber $r18, 12 /* clobber */, implicit-def dead early-clobber $r19, 12 /* clobber */, implicit-def dead early-clobber $r20, 12 /* clobber */, implicit-def dead early-clobber $r21, 12 /* clobber */, implicit-def dead early-clobber $r22, 12 /* clobber */, implicit-def dead early-clobber $r23, 12 /* clobber */, implicit-def dead early-clobber $r24, 12 /* clobber */, implicit-def dead early-clobber $r25, 12 /* clobber */, implicit-def dead early-clobber $r26, 12 /* clobber */, implicit-def dead early-clobber $r27, 12 /* clobber */, implicit-def dead early-clobber $r28, 12 /* clobber */, implicit-def dead early-clobber $r29, 12 /* clobber */, implicit-def dead early-clobber $r30, 12 /* clobber */, implicit-def dead early-clobber $r31
   ; CHECK-NEXT:   BDNZ8 %bb.4, implicit-def dead $ctr8, implicit $ctr8
-  ; CHECK-NEXT:   B %bb.5
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.5:
-  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
-  ; CHECK-NEXT:   liveins: $r4
-  ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   B %bb.3
   bb.0.entry:
     successors: %bb.2(0x50000000), %bb.1(0x30000000)

diff  --git a/llvm/test/CodeGen/RISCV/aext-to-sext.ll b/llvm/test/CodeGen/RISCV/aext-to-sext.ll
index 0aa04f40f6a52..806c495fa6777 100644
--- a/llvm/test/CodeGen/RISCV/aext-to-sext.ll
+++ b/llvm/test/CodeGen/RISCV/aext-to-sext.ll
@@ -11,22 +11,21 @@
 define void @quux(i32 signext %arg, i32 signext %arg1) nounwind {
 ; RV64I-LABEL: quux:
 ; RV64I:       # %bb.0: # %bb
-; RV64I-NEXT:    beq a0, a1, .LBB0_4
-; RV64I-NEXT:  # %bb.1: # %bb2.preheader
 ; RV64I-NEXT:    addi sp, sp, -16
 ; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    beq a0, a1, .LBB0_3
+; RV64I-NEXT:  # %bb.1: # %bb2.preheader
 ; RV64I-NEXT:    subw s0, a1, a0
 ; RV64I-NEXT:  .LBB0_2: # %bb2
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV64I-NEXT:    call hoge at plt
 ; RV64I-NEXT:    addiw s0, s0, -1
 ; RV64I-NEXT:    bnez s0, .LBB0_2
-; RV64I-NEXT:  # %bb.3:
+; RV64I-NEXT:  .LBB0_3: # %bb6
 ; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 16
-; RV64I-NEXT:  .LBB0_4: # %bb6
 ; RV64I-NEXT:    ret
 bb:
   %tmp = icmp eq i32 %arg, %arg1

diff  --git a/llvm/test/CodeGen/RISCV/fli-licm.ll b/llvm/test/CodeGen/RISCV/fli-licm.ll
index f37ace801b159..93bb934c1cb0d 100644
--- a/llvm/test/CodeGen/RISCV/fli-licm.ll
+++ b/llvm/test/CodeGen/RISCV/fli-licm.ll
@@ -12,11 +12,11 @@
 define void @process_nodes(ptr %0) nounwind {
 ; RV32-LABEL: process_nodes:
 ; RV32:       # %bb.0: # %entry
-; RV32-NEXT:    beqz a0, .LBB0_4
-; RV32-NEXT:  # %bb.1: # %loop.preheader
 ; RV32-NEXT:    addi sp, sp, -16
 ; RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32-NEXT:    beqz a0, .LBB0_3
+; RV32-NEXT:  # %bb.1: # %loop.preheader
 ; RV32-NEXT:    mv s0, a0
 ; RV32-NEXT:  .LBB0_2: # %loop
 ; RV32-NEXT:    # =>This Inner Loop Header: Depth=1
@@ -25,20 +25,19 @@ define void @process_nodes(ptr %0) nounwind {
 ; RV32-NEXT:    call do_it at plt
 ; RV32-NEXT:    lw s0, 0(s0)
 ; RV32-NEXT:    bnez s0, .LBB0_2
-; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:  .LBB0_3: # %exit
 ; RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    addi sp, sp, 16
-; RV32-NEXT:  .LBB0_4: # %exit
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: process_nodes:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    beqz a0, .LBB0_4
-; RV64-NEXT:  # %bb.1: # %loop.preheader
 ; RV64-NEXT:    addi sp, sp, -16
 ; RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; RV64-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
+; RV64-NEXT:    beqz a0, .LBB0_3
+; RV64-NEXT:  # %bb.1: # %loop.preheader
 ; RV64-NEXT:    mv s0, a0
 ; RV64-NEXT:  .LBB0_2: # %loop
 ; RV64-NEXT:    # =>This Inner Loop Header: Depth=1
@@ -47,11 +46,10 @@ define void @process_nodes(ptr %0) nounwind {
 ; RV64-NEXT:    call do_it at plt
 ; RV64-NEXT:    ld s0, 0(s0)
 ; RV64-NEXT:    bnez s0, .LBB0_2
-; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:  .LBB0_3: # %exit
 ; RV64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64-NEXT:    addi sp, sp, 16
-; RV64-NEXT:  .LBB0_4: # %exit
 ; RV64-NEXT:    ret
 entry:
   %1 = icmp eq ptr %0, null

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inlineasm.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inlineasm.ll
index 421b5b5364d35..d67e66d7a7131 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inlineasm.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inlineasm.ll
@@ -4,13 +4,11 @@
 define i32 @test(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) {
 ; CHECK-LABEL: test:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    cmp r2, #1
-; CHECK-NEXT:    itt lt
-; CHECK-NEXT:    movlt r0, #0
-; CHECK-NEXT:    bxlt lr
-; CHECK-NEXT:  .LBB0_1: @ %for.body.preheader
 ; CHECK-NEXT:    .save {r7, lr}
 ; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    cmp r2, #1
+; CHECK-NEXT:    blt .LBB0_4
+; CHECK-NEXT:  @ %bb.1: @ %for.body.preheader
 ; CHECK-NEXT:    mov lr, r0
 ; CHECK-NEXT:    movs r0, #0
 ; CHECK-NEXT:  .LBB0_2: @ %for.body
@@ -23,7 +21,10 @@ define i32 @test(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) {
 ; CHECK-NEXT:    @NO_APP
 ; CHECK-NEXT:    add r0, r3
 ; CHECK-NEXT:    bne .LBB0_2
-; CHECK-NEXT:  @ %bb.3:
+; CHECK-NEXT:  @ %bb.3: @ %for.cond.cleanup
+; CHECK-NEXT:    pop {r7, pc}
+; CHECK-NEXT:  .LBB0_4:
+; CHECK-NEXT:    movs r0, #0
 ; CHECK-NEXT:    pop {r7, pc}
 entry:
   %cmp9 = icmp sgt i32 %n, 0
@@ -50,13 +51,11 @@ for.body:                                         ; preds = %entry, %for.body
 define i32 @testlr(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) {
 ; CHECK-LABEL: testlr:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    cmp r2, #1
-; CHECK-NEXT:    itt lt
-; CHECK-NEXT:    movlt r0, #0
-; CHECK-NEXT:    bxlt lr
-; CHECK-NEXT:  .LBB1_1: @ %for.body.preheader
 ; CHECK-NEXT:    .save {r4, lr}
 ; CHECK-NEXT:    push {r4, lr}
+; CHECK-NEXT:    cmp r2, #1
+; CHECK-NEXT:    blt .LBB1_4
+; CHECK-NEXT:  @ %bb.1: @ %for.body.preheader
 ; CHECK-NEXT:    mov r3, r0
 ; CHECK-NEXT:    movs r0, #0
 ; CHECK-NEXT:  .LBB1_2: @ %for.body
@@ -69,7 +68,10 @@ define i32 @testlr(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n)
 ; CHECK-NEXT:    @NO_APP
 ; CHECK-NEXT:    add r0, r4
 ; CHECK-NEXT:    bne .LBB1_2
-; CHECK-NEXT:  @ %bb.3:
+; CHECK-NEXT:  @ %bb.3: @ %for.cond.cleanup
+; CHECK-NEXT:    pop {r4, pc}
+; CHECK-NEXT:  .LBB1_4:
+; CHECK-NEXT:    movs r0, #0
 ; CHECK-NEXT:    pop {r4, pc}
 entry:
   %cmp9 = icmp sgt i32 %n, 0

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll
index 59b32a3f441c1..99d169e63e5a5 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll
@@ -4,12 +4,11 @@
 define void @test_memcpy(ptr nocapture %x, ptr nocapture readonly %y, i32 %n, i32 %m) {
 ; CHECK-LABEL: test_memcpy:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    cmp r2, #1
-; CHECK-NEXT:    it lt
-; CHECK-NEXT:    bxlt lr
-; CHECK-NEXT:  .LBB0_1: @ %for.body.preheader
 ; CHECK-NEXT:    .save {r4, r5, r6, r7, lr}
 ; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    cmp r2, #1
+; CHECK-NEXT:    blt .LBB0_5
+; CHECK-NEXT:  @ %bb.1: @ %for.body.preheader
 ; CHECK-NEXT:    lsl.w r12, r3, #2
 ; CHECK-NEXT:    movs r7, #0
 ; CHECK-NEXT:    b .LBB0_2
@@ -32,9 +31,8 @@ define void @test_memcpy(ptr nocapture %x, ptr nocapture readonly %y, i32 %n, i3
 ; CHECK-NEXT:    vstrb.8 q0, [r5], #16
 ; CHECK-NEXT:    letp lr, .LBB0_4
 ; CHECK-NEXT:    b .LBB0_3
-; CHECK-NEXT:  .LBB0_5:
-; CHECK-NEXT:    pop.w {r4, r5, r6, r7, lr}
-; CHECK-NEXT:    bx lr
+; CHECK-NEXT:  .LBB0_5: @ %for.cond.cleanup
+; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
 entry:
   %cmp8 = icmp sgt i32 %n, 0
   br i1 %cmp8, label %for.body, label %for.cond.cleanup
@@ -57,12 +55,12 @@ for.body:                                         ; preds = %entry, %for.body
 define void @test_memset(ptr nocapture %x, i32 %n, i32 %m) {
 ; CHECK-LABEL: test_memset:
 ; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r7, lr}
+; CHECK-NEXT:    push {r7, lr}
 ; CHECK-NEXT:    cmp r1, #1
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    bxlt lr
+; CHECK-NEXT:    poplt {r7, pc}
 ; CHECK-NEXT:  .LBB1_1:
-; CHECK-NEXT:    .save {r7, lr}
-; CHECK-NEXT:    push {r7, lr}
 ; CHECK-NEXT:    vmov.i32 q0, #0x0
 ; CHECK-NEXT:    b .LBB1_2
 ; CHECK-NEXT:  .LBB1_2: @ %for.body
@@ -82,9 +80,8 @@ define void @test_memset(ptr nocapture %x, i32 %n, i32 %m) {
 ; CHECK-NEXT:    vstrb.8 q0, [r12], #16
 ; CHECK-NEXT:    letp lr, .LBB1_4
 ; CHECK-NEXT:    b .LBB1_3
-; CHECK-NEXT:  .LBB1_5:
-; CHECK-NEXT:    pop.w {r7, lr}
-; CHECK-NEXT:    bx lr
+; CHECK-NEXT:  .LBB1_5: @ %for.cond.cleanup
+; CHECK-NEXT:    pop {r7, pc}
 entry:
   %cmp5 = icmp sgt i32 %n, 0
   br i1 %cmp5, label %for.body, label %for.cond.cleanup
@@ -105,14 +102,13 @@ for.body:                                         ; preds = %entry, %for.body
 define void @test_memmove(ptr nocapture %x, ptr nocapture readonly %y, i32 %n, i32 %m) {
 ; CHECK-LABEL: test_memmove:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    cmp r2, #1
-; CHECK-NEXT:    it lt
-; CHECK-NEXT:    bxlt lr
-; CHECK-NEXT:  .LBB2_1: @ %for.body.preheader
 ; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, lr}
 ; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, lr}
 ; CHECK-NEXT:    .pad #4
 ; CHECK-NEXT:    sub sp, #4
+; CHECK-NEXT:    cmp r2, #1
+; CHECK-NEXT:    blt .LBB2_3
+; CHECK-NEXT:  @ %bb.1: @ %for.body.preheader
 ; CHECK-NEXT:    mov r8, r3
 ; CHECK-NEXT:    mov r5, r2
 ; CHECK-NEXT:    mov r9, r1
@@ -128,10 +124,9 @@ define void @test_memmove(ptr nocapture %x, ptr nocapture readonly %y, i32 %n, i
 ; CHECK-NEXT:    add r6, r4
 ; CHECK-NEXT:    subs r5, #1
 ; CHECK-NEXT:    bne .LBB2_2
-; CHECK-NEXT:  @ %bb.3:
+; CHECK-NEXT:  .LBB2_3: @ %for.cond.cleanup
 ; CHECK-NEXT:    add sp, #4
-; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, lr}
-; CHECK-NEXT:    bx lr
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, pc}
 entry:
   %cmp8 = icmp sgt i32 %n, 0
   br i1 %cmp8, label %for.body, label %for.cond.cleanup

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll
index 23eb5900bb7d1..13e39a8f16e33 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll
@@ -4,11 +4,10 @@
 define arm_aapcs_vfpcc void @float_float_mul(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture %c, i32 %N) {
 ; CHECK-LABEL: float_float_mul:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    it eq
-; CHECK-NEXT:    bxeq lr
-; CHECK-NEXT:  .LBB0_1: @ %for.body.preheader
 ; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    beq .LBB0_10
+; CHECK-NEXT:  @ %bb.1: @ %for.body.preheader
 ; CHECK-NEXT:    cmp r3, #3
 ; CHECK-NEXT:    bhi .LBB0_3
 ; CHECK-NEXT:  @ %bb.2:
@@ -81,9 +80,8 @@ define arm_aapcs_vfpcc void @float_float_mul(ptr nocapture readonly %a, ptr noca
 ; CHECK-NEXT:    vmul.f32 s0, s2, s0
 ; CHECK-NEXT:    vstr s0, [r5, #12]
 ; CHECK-NEXT:    bne .LBB0_9
-; CHECK-NEXT:  .LBB0_10:
-; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, lr}
-; CHECK-NEXT:    bx lr
+; CHECK-NEXT:  .LBB0_10: @ %for.cond.cleanup
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, pc}
 ; CHECK-NEXT:  .LBB0_11: @ %vector.ph
 ; CHECK-NEXT:    bic r12, r3, #3
 ; CHECK-NEXT:    movs r6, #1
@@ -217,11 +215,10 @@ for.body:                                         ; preds = %for.body.prol.loope
 define arm_aapcs_vfpcc void @float_float_add(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture %c, i32 %N) {
 ; CHECK-LABEL: float_float_add:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    it eq
-; CHECK-NEXT:    bxeq lr
-; CHECK-NEXT:  .LBB1_1: @ %for.body.preheader
 ; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    beq .LBB1_10
+; CHECK-NEXT:  @ %bb.1: @ %for.body.preheader
 ; CHECK-NEXT:    cmp r3, #3
 ; CHECK-NEXT:    bhi .LBB1_3
 ; CHECK-NEXT:  @ %bb.2:
@@ -294,9 +291,8 @@ define arm_aapcs_vfpcc void @float_float_add(ptr nocapture readonly %a, ptr noca
 ; CHECK-NEXT:    vadd.f32 s0, s2, s0
 ; CHECK-NEXT:    vstr s0, [r5, #12]
 ; CHECK-NEXT:    bne .LBB1_9
-; CHECK-NEXT:  .LBB1_10:
-; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, lr}
-; CHECK-NEXT:    bx lr
+; CHECK-NEXT:  .LBB1_10: @ %for.cond.cleanup
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, pc}
 ; CHECK-NEXT:  .LBB1_11: @ %vector.ph
 ; CHECK-NEXT:    bic r12, r3, #3
 ; CHECK-NEXT:    movs r6, #1
@@ -430,11 +426,10 @@ for.body:                                         ; preds = %for.body.prol.loope
 define arm_aapcs_vfpcc void @float_float_sub(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture %c, i32 %N) {
 ; CHECK-LABEL: float_float_sub:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    it eq
-; CHECK-NEXT:    bxeq lr
-; CHECK-NEXT:  .LBB2_1: @ %for.body.preheader
 ; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    beq .LBB2_10
+; CHECK-NEXT:  @ %bb.1: @ %for.body.preheader
 ; CHECK-NEXT:    cmp r3, #3
 ; CHECK-NEXT:    bhi .LBB2_3
 ; CHECK-NEXT:  @ %bb.2:
@@ -507,9 +502,8 @@ define arm_aapcs_vfpcc void @float_float_sub(ptr nocapture readonly %a, ptr noca
 ; CHECK-NEXT:    vsub.f32 s0, s2, s0
 ; CHECK-NEXT:    vstr s0, [r5, #12]
 ; CHECK-NEXT:    bne .LBB2_9
-; CHECK-NEXT:  .LBB2_10:
-; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, lr}
-; CHECK-NEXT:    bx lr
+; CHECK-NEXT:  .LBB2_10: @ %for.cond.cleanup
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, pc}
 ; CHECK-NEXT:  .LBB2_11: @ %vector.ph
 ; CHECK-NEXT:    bic r12, r3, #3
 ; CHECK-NEXT:    movs r6, #1
@@ -643,11 +637,10 @@ for.body:                                         ; preds = %for.body.prol.loope
 define arm_aapcs_vfpcc void @float_int_mul(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture %c, i32 %N) {
 ; CHECK-LABEL: float_int_mul:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    it eq
-; CHECK-NEXT:    bxeq lr
-; CHECK-NEXT:  .LBB3_1: @ %for.body.preheader
 ; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    beq.w .LBB3_13
+; CHECK-NEXT:  @ %bb.1: @ %for.body.preheader
 ; CHECK-NEXT:    cmp r3, #3
 ; CHECK-NEXT:    bls .LBB3_6
 ; CHECK-NEXT:  @ %bb.2: @ %vector.memcheck
@@ -736,9 +729,8 @@ define arm_aapcs_vfpcc void @float_int_mul(ptr nocapture readonly %a, ptr nocapt
 ; CHECK-NEXT:    vmul.f32 s0, s2, s0
 ; CHECK-NEXT:    vstr s0, [r6, #12]
 ; CHECK-NEXT:    bne .LBB3_12
-; CHECK-NEXT:  .LBB3_13:
-; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, lr}
-; CHECK-NEXT:    bx lr
+; CHECK-NEXT:  .LBB3_13: @ %for.cond.cleanup
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, pc}
 entry:
   %cmp8 = icmp eq i32 %N, 0
   br i1 %cmp8, label %for.cond.cleanup, label %for.body.preheader

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll
index 93119eac2d564..eb98b85eafc90 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll
@@ -411,12 +411,10 @@ for.cond.cleanup:                                 ; preds = %middle.block, %entr
 define dso_local arm_aapcs_vfpcc i32 @two_loops_mul_add_v4i32(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %N) local_unnamed_addr {
 ; CHECK-LABEL: two_loops_mul_add_v4i32:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    itt eq
-; CHECK-NEXT:    moveq r0, #0
-; CHECK-NEXT:    bxeq lr
-; CHECK-NEXT:  .LBB6_1: @ %vector.ph
 ; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    beq .LBB6_8
+; CHECK-NEXT:  @ %bb.1: @ %vector.ph
 ; CHECK-NEXT:    adds r3, r2, #3
 ; CHECK-NEXT:    vmov.i32 q1, #0x0
 ; CHECK-NEXT:    bic r3, r3, #3
@@ -463,10 +461,12 @@ define dso_local arm_aapcs_vfpcc i32 @two_loops_mul_add_v4i32(i8* nocapture read
 ; CHECK-NEXT:  @ %bb.6: @ %middle.block44
 ; CHECK-NEXT:    vpsel q0, q0, q1
 ; CHECK-NEXT:    vaddv.u32 r12, q0
-; CHECK-NEXT:  .LBB6_7:
-; CHECK-NEXT:    pop.w {r4, r5, r6, r7, lr}
+; CHECK-NEXT:  .LBB6_7: @ %for.cond.cleanup7
 ; CHECK-NEXT:    mov r0, r12
-; CHECK-NEXT:    bx lr
+; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
+; CHECK-NEXT:  .LBB6_8:
+; CHECK-NEXT:    movs r0, #0
+; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
 entry:
   %cmp35 = icmp eq i32 %N, 0
   br i1 %cmp35, label %for.cond.cleanup7, label %vector.ph

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/sibling-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/sibling-loops.ll
index 1f3a43923db61..caf7a339805fc 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/sibling-loops.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/sibling-loops.ll
@@ -4,11 +4,10 @@
 define arm_aapcs_vfpcc void @test(ptr noalias nocapture readonly %off, ptr noalias nocapture %data, ptr noalias nocapture %dst, i32 %n) {
 ; CHECK-LABEL: test:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    cmp r3, #1
-; CHECK-NEXT:    it lt
-; CHECK-NEXT:    bxlt lr
-; CHECK-NEXT:  .LBB0_1: @ %for.cond1.preheader.us.preheader
 ; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT:    cmp r3, #1
+; CHECK-NEXT:    blt .LBB0_7
+; CHECK-NEXT:  @ %bb.1: @ %for.cond1.preheader.us.preheader
 ; CHECK-NEXT:    mov r8, r3
 ; CHECK-NEXT:    lsl.w r12, r3, #1
 ; CHECK-NEXT:    movs r3, #0
@@ -48,9 +47,8 @@ define arm_aapcs_vfpcc void @test(ptr noalias nocapture readonly %off, ptr noali
 ; CHECK-NEXT:    add r4, r12
 ; CHECK-NEXT:    cmp r3, r8
 ; CHECK-NEXT:    bne .LBB0_2
-; CHECK-NEXT:  @ %bb.7:
-; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, lr}
-; CHECK-NEXT:    bx lr
+; CHECK-NEXT:  .LBB0_7: @ %for.cond.cleanup
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, pc}
 entry:
   %cmp252 = icmp sgt i32 %n, 0
   br i1 %cmp252, label %for.cond1.preheader.us, label %for.cond.cleanup

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll
index be1f1de71be3d..9ef5a46edf934 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll
@@ -5,19 +5,17 @@
 define void @__arm_2d_impl_rgb16_colour_filling_with_alpha(ptr noalias nocapture %phwTargetBase, i16 signext %iTargetStride, ptr noalias nocapture readonly %ptCopySize, i16 zeroext %hwColour, i32 %chRatio) {
 ; CHECK-LABEL: __arm_2d_impl_rgb16_colour_filling_with_alpha:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    ldrsh.w r12, [r2, #2]
-; CHECK-NEXT:    cmp.w r12, #1
-; CHECK-NEXT:    it lt
-; CHECK-NEXT:    bxlt lr
-; CHECK-NEXT:  .LBB0_1: @ %for.cond3.preheader.lr.ph
 ; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
 ; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
 ; CHECK-NEXT:    sub sp, #64
-; CHECK-NEXT:    ldrsh.w r7, [r2]
-; CHECK-NEXT:    cmp r7, #1
-; CHECK-NEXT:    blt.w .LBB0_6
-; CHECK-NEXT:  @ %bb.2: @ %for.cond3.preheader.us.preheader
+; CHECK-NEXT:    ldrsh.w r12, [r2, #2]
+; CHECK-NEXT:    cmp.w r12, #1
+; CHECK-NEXT:    itt ge
+; CHECK-NEXT:    ldrshge.w r7, [r2]
+; CHECK-NEXT:    cmpge r7, #1
+; CHECK-NEXT:    blt.w .LBB0_5
+; CHECK-NEXT:  @ %bb.1: @ %for.cond3.preheader.us.preheader
 ; CHECK-NEXT:    movs r2, #252
 ; CHECK-NEXT:    ldr r4, [sp, #152]
 ; CHECK-NEXT:    and.w r6, r2, r3, lsr #3
@@ -48,14 +46,14 @@ define void @__arm_2d_impl_rgb16_colour_filling_with_alpha(ptr noalias nocapture
 ; CHECK-NEXT:    vstrw.32 q0, [sp] @ 16-byte Spill
 ; CHECK-NEXT:    vstrw.32 q2, [sp, #32] @ 16-byte Spill
 ; CHECK-NEXT:    vstrw.32 q3, [sp, #16] @ 16-byte Spill
-; CHECK-NEXT:  .LBB0_3: @ %vector.ph
+; CHECK-NEXT:  .LBB0_2: @ %vector.ph
 ; CHECK-NEXT:    @ =>This Loop Header: Depth=1
-; CHECK-NEXT:    @ Child Loop BB0_4 Depth 2
+; CHECK-NEXT:    @ Child Loop BB0_3 Depth 2
 ; CHECK-NEXT:    mov r5, r0
 ; CHECK-NEXT:    mov r6, r7
 ; CHECK-NEXT:    dls lr, r3
-; CHECK-NEXT:  .LBB0_4: @ %vector.body
-; CHECK-NEXT:    @ Parent Loop BB0_3 Depth=1
+; CHECK-NEXT:  .LBB0_3: @ %vector.body
+; CHECK-NEXT:    @ Parent Loop BB0_2 Depth=1
 ; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    vctp.16 r6
 ; CHECK-NEXT:    subs r6, #8
@@ -91,19 +89,18 @@ define void @__arm_2d_impl_rgb16_colour_filling_with_alpha(ptr noalias nocapture
 ; CHECK-NEXT:    vorr q0, q1, q0
 ; CHECK-NEXT:    vpst
 ; CHECK-NEXT:    vstrht.16 q0, [r5], #16
-; CHECK-NEXT:    le lr, .LBB0_4
-; CHECK-NEXT:  @ %bb.5: @ %for.cond3.for.cond.cleanup7_crit_edge.us
-; CHECK-NEXT:    @ in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT:    le lr, .LBB0_3
+; CHECK-NEXT:  @ %bb.4: @ %for.cond3.for.cond.cleanup7_crit_edge.us
+; CHECK-NEXT:    @ in Loop: Header=BB0_2 Depth=1
 ; CHECK-NEXT:    adds r4, #1
 ; CHECK-NEXT:    add.w r0, r0, r1, lsl #1
 ; CHECK-NEXT:    cmp r4, r12
-; CHECK-NEXT:    bne .LBB0_3
-; CHECK-NEXT:  .LBB0_6:
+; CHECK-NEXT:    bne .LBB0_2
+; CHECK-NEXT:  .LBB0_5: @ %for.cond.cleanup
 ; CHECK-NEXT:    add sp, #64
 ; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
 ; CHECK-NEXT:    add sp, #4
-; CHECK-NEXT:    pop.w {r4, r5, r6, r7, lr}
-; CHECK-NEXT:    bx lr
+; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
 entry:
   %iHeight = getelementptr inbounds %struct.arm_2d_size_t, ptr %ptCopySize, i32 0, i32 1
   %0 = load i16, ptr %iHeight, align 2
@@ -187,19 +184,18 @@ for.cond.cleanup:                                 ; preds = %for.cond3.for.cond.
 define void @__arm_2d_impl_rgb16_colour_filling_with_alpha_sched(ptr noalias nocapture %phwTargetBase, i16 signext %iTargetStride, ptr noalias nocapture readonly %ptCopySize, i16 zeroext %hwColour, i32 %chRatio) "target-cpu"="cortex-m55" {
 ; CHECK-LABEL: __arm_2d_impl_rgb16_colour_filling_with_alpha_sched:
 ; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    sub sp, #4
+; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK-NEXT:    sub sp, #80
 ; CHECK-NEXT:    ldrsh.w r12, [r2, #2]
 ; CHECK-NEXT:    cmp.w r12, #1
-; CHECK-NEXT:    blt.w .LBB1_7
+; CHECK-NEXT:    blt.w .LBB1_6
 ; CHECK-NEXT:  @ %bb.1: @ %for.cond3.preheader.lr.ph
 ; CHECK-NEXT:    ldrsh.w r2, [r2]
 ; CHECK-NEXT:    cmp r2, #1
-; CHECK-NEXT:    it lt
-; CHECK-NEXT:    bxlt lr
-; CHECK-NEXT:  .LBB1_2: @ %for.cond3.preheader.us.preheader
-; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
-; CHECK-NEXT:    sub sp, #4
-; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; CHECK-NEXT:    sub sp, #80
+; CHECK-NEXT:    blt .LBB1_6
+; CHECK-NEXT:  @ %bb.2: @ %for.cond3.preheader.us.preheader
 ; CHECK-NEXT:    ldr r7, [sp, #168]
 ; CHECK-NEXT:    movs r5, #120
 ; CHECK-NEXT:    lsls r6, r3, #3
@@ -269,13 +265,11 @@ define void @__arm_2d_impl_rgb16_colour_filling_with_alpha_sched(ptr noalias noc
 ; CHECK-NEXT:    adds r4, #1
 ; CHECK-NEXT:    cmp r4, r12
 ; CHECK-NEXT:    bne .LBB1_3
-; CHECK-NEXT:  @ %bb.6:
+; CHECK-NEXT:  .LBB1_6: @ %for.cond.cleanup
 ; CHECK-NEXT:    add sp, #80
 ; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
 ; CHECK-NEXT:    add sp, #4
-; CHECK-NEXT:    pop.w {r4, r5, r6, r7, lr}
-; CHECK-NEXT:  .LBB1_7: @ %for.cond.cleanup
-; CHECK-NEXT:    bx lr
+; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
 entry:
   %iHeight = getelementptr inbounds %struct.arm_2d_size_t, ptr %ptCopySize, i32 0, i32 1
   %0 = load i16, ptr %iHeight, align 2

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll
index 3b42ee36e7c2e..fc58873f9857b 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll
@@ -53,12 +53,10 @@ if.end:                                           ; preds = %do.body, %entry
 define void @nested(ptr nocapture readonly %x, ptr nocapture readnone %y, ptr nocapture %z, i32 %m, i32 %n) {
 ; CHECK-LABEL: nested:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    it eq
-; CHECK-NEXT:    bxeq lr
-; CHECK-NEXT:  .LBB1_1: @ %for.body.preheader
 ; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, lr}
 ; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT:    cbz r3, .LBB1_8
+; CHECK-NEXT:  @ %bb.1: @ %for.body.preheader
 ; CHECK-NEXT:    ldr.w r12, [sp, #24]
 ; CHECK-NEXT:    movs r1, #0
 ; CHECK-NEXT:    b .LBB1_4
@@ -93,9 +91,8 @@ define void @nested(ptr nocapture readonly %x, ptr nocapture readnone %y, ptr no
 ; CHECK-NEXT:    sub.w r12, r12, r5
 ; CHECK-NEXT:    mov r0, r8
 ; CHECK-NEXT:    b .LBB1_3
-; CHECK-NEXT:  .LBB1_8:
-; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, lr}
-; CHECK-NEXT:    bx lr
+; CHECK-NEXT:  .LBB1_8: @ %for.cond.cleanup
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, pc}
 entry:
   %cmp20.not = icmp eq i32 %m, 0
   br i1 %cmp20.not, label %for.cond.cleanup, label %for.body

diff  --git a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
index b7b19a477ab0f..6228d616b5842 100644
--- a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
@@ -981,13 +981,6 @@ if.end61:                                         ; preds = %if.then59, %while.e
 define void @fir(ptr nocapture readonly %S, ptr nocapture readonly %pSrc, ptr nocapture %pDst, i32 %blockSize) {
 ; CHECK-LABEL: fir:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    cmp r3, #8
-; CHECK-NEXT:    blo.w .LBB16_13
-; CHECK-NEXT:  @ %bb.1: @ %if.then
-; CHECK-NEXT:    lsrs.w r12, r3, #2
-; CHECK-NEXT:    it eq
-; CHECK-NEXT:    bxeq lr
-; CHECK-NEXT:  .LBB16_2: @ %while.body.lr.ph
 ; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; CHECK-NEXT:    .pad #4
@@ -996,6 +989,12 @@ define void @fir(ptr nocapture readonly %S, ptr nocapture readonly %pSrc, ptr no
 ; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
 ; CHECK-NEXT:    .pad #32
 ; CHECK-NEXT:    sub sp, #32
+; CHECK-NEXT:    cmp r3, #8
+; CHECK-NEXT:    blo.w .LBB16_12
+; CHECK-NEXT:  @ %bb.1: @ %if.then
+; CHECK-NEXT:    lsrs.w r12, r3, #2
+; CHECK-NEXT:    beq.w .LBB16_12
+; CHECK-NEXT:  @ %bb.2: @ %while.body.lr.ph
 ; CHECK-NEXT:    ldrh r6, [r0]
 ; CHECK-NEXT:    movs r5, #1
 ; CHECK-NEXT:    ldrd r4, r10, [r0, #4]
@@ -1107,13 +1106,11 @@ define void @fir(ptr nocapture readonly %S, ptr nocapture readonly %pSrc, ptr no
 ; CHECK-NEXT:    ldr r0, [sp, #20] @ 4-byte Reload
 ; CHECK-NEXT:    add.w r4, r4, r0, lsl #2
 ; CHECK-NEXT:    b .LBB16_4
-; CHECK-NEXT:  .LBB16_12:
+; CHECK-NEXT:  .LBB16_12: @ %if.end
 ; CHECK-NEXT:    add sp, #32
 ; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
 ; CHECK-NEXT:    add sp, #4
-; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; CHECK-NEXT:  .LBB16_13: @ %if.end
-; CHECK-NEXT:    bx lr
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 entry:
   %pState1 = getelementptr inbounds %struct.arm_fir_instance_f32, ptr %S, i32 0, i32 1
   %i = load ptr, ptr %pState1, align 4

diff  --git a/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll b/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll
index 0335d24c0a782..24f1831a3f07c 100644
--- a/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll
@@ -290,12 +290,12 @@ end:
 define arm_aapcs_vfpcc void @gather_inc_v4i32_simple(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n) {
 ; CHECK-LABEL: gather_inc_v4i32_simple:
 ; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r4, lr}
+; CHECK-NEXT:    push {r4, lr}
 ; CHECK-NEXT:    cmp r2, #1
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    bxlt lr
+; CHECK-NEXT:    poplt {r4, pc}
 ; CHECK-NEXT:  .LBB8_1: @ %vector.ph.preheader
-; CHECK-NEXT:    .save {r4, lr}
-; CHECK-NEXT:    push {r4, lr}
 ; CHECK-NEXT:    bic r12, r2, #3
 ; CHECK-NEXT:    movs r3, #1
 ; CHECK-NEXT:    sub.w lr, r12, #4
@@ -319,9 +319,8 @@ define arm_aapcs_vfpcc void @gather_inc_v4i32_simple(ptr noalias nocapture reado
 ; CHECK-NEXT:    @ in Loop: Header=BB8_2 Depth=1
 ; CHECK-NEXT:    cmp r12, r2
 ; CHECK-NEXT:    bne .LBB8_2
-; CHECK-NEXT:  @ %bb.5:
-; CHECK-NEXT:    pop.w {r4, lr}
-; CHECK-NEXT:    bx lr
+; CHECK-NEXT:  @ %bb.5: @ %for.cond.cleanup
+; CHECK-NEXT:    pop {r4, pc}
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  @ %bb.6:
 ; CHECK-NEXT:  .LCPI8_0:
@@ -360,14 +359,13 @@ for.cond.cleanup:                                 ; preds = %for.body, %middle.b
 define arm_aapcs_vfpcc void @gather_inc_v4i32_complex(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n) {
 ; CHECK-LABEL: gather_inc_v4i32_complex:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    cmp r2, #1
-; CHECK-NEXT:    it lt
-; CHECK-NEXT:    bxlt lr
-; CHECK-NEXT:  .LBB9_1: @ %vector.ph.preheader
 ; CHECK-NEXT:    .save {r4, r5, r7, lr}
 ; CHECK-NEXT:    push {r4, r5, r7, lr}
 ; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
 ; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK-NEXT:    cmp r2, #1
+; CHECK-NEXT:    blt .LBB9_5
+; CHECK-NEXT:  @ %bb.1: @ %vector.ph.preheader
 ; CHECK-NEXT:    bic r12, r2, #3
 ; CHECK-NEXT:    movs r3, #1
 ; CHECK-NEXT:    sub.w lr, r12, #4
@@ -403,10 +401,9 @@ define arm_aapcs_vfpcc void @gather_inc_v4i32_complex(ptr noalias nocapture read
 ; CHECK-NEXT:    @ in Loop: Header=BB9_2 Depth=1
 ; CHECK-NEXT:    cmp r12, r2
 ; CHECK-NEXT:    bne .LBB9_2
-; CHECK-NEXT:  @ %bb.5:
+; CHECK-NEXT:  .LBB9_5: @ %for.cond.cleanup
 ; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; CHECK-NEXT:    pop.w {r4, r5, r7, lr}
-; CHECK-NEXT:    bx lr
+; CHECK-NEXT:    pop {r4, r5, r7, pc}
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  @ %bb.6:
 ; CHECK-NEXT:  .LCPI9_0:
@@ -464,12 +461,12 @@ for.cond.cleanup:                                 ; preds = %for.body, %middle.b
 define arm_aapcs_vfpcc void @gather_inc_v4i32_large(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n) {
 ; CHECK-LABEL: gather_inc_v4i32_large:
 ; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r4, lr}
+; CHECK-NEXT:    push {r4, lr}
 ; CHECK-NEXT:    cmp r2, #1
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    bxlt lr
+; CHECK-NEXT:    poplt {r4, pc}
 ; CHECK-NEXT:  .LBB10_1: @ %vector.ph.preheader
-; CHECK-NEXT:    .save {r4, lr}
-; CHECK-NEXT:    push {r4, lr}
 ; CHECK-NEXT:    bic r12, r2, #3
 ; CHECK-NEXT:    movs r3, #1
 ; CHECK-NEXT:    sub.w lr, r12, #4
@@ -493,9 +490,8 @@ define arm_aapcs_vfpcc void @gather_inc_v4i32_large(ptr noalias nocapture readon
 ; CHECK-NEXT:    @ in Loop: Header=BB10_2 Depth=1
 ; CHECK-NEXT:    cmp r12, r2
 ; CHECK-NEXT:    bne .LBB10_2
-; CHECK-NEXT:  @ %bb.5:
-; CHECK-NEXT:    pop.w {r4, lr}
-; CHECK-NEXT:    bx lr
+; CHECK-NEXT:  @ %bb.5: @ %for.cond.cleanup
+; CHECK-NEXT:    pop {r4, pc}
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  @ %bb.6:
 ; CHECK-NEXT:  .LCPI10_0:

diff  --git a/llvm/test/CodeGen/Thumb2/mve-gather-tailpred.ll b/llvm/test/CodeGen/Thumb2/mve-gather-tailpred.ll
index ea186cd6ed2d4..9093b9af00656 100644
--- a/llvm/test/CodeGen/Thumb2/mve-gather-tailpred.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-gather-tailpred.ll
@@ -4,12 +4,12 @@
 define arm_aapcs_vfpcc void @gather_inc_v4i32_simple(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n) {
 ; CHECK-LABEL: gather_inc_v4i32_simple:
 ; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r4, lr}
+; CHECK-NEXT:    push {r4, lr}
 ; CHECK-NEXT:    cmp r2, #1
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    bxlt lr
+; CHECK-NEXT:    poplt {r4, pc}
 ; CHECK-NEXT:  .LBB0_1: @ %vector.ph.preheader
-; CHECK-NEXT:    .save {r4, lr}
-; CHECK-NEXT:    push {r4, lr}
 ; CHECK-NEXT:    bic r12, r2, #3
 ; CHECK-NEXT:    movs r3, #1
 ; CHECK-NEXT:    sub.w lr, r12, #4
@@ -33,9 +33,8 @@ define arm_aapcs_vfpcc void @gather_inc_v4i32_simple(ptr noalias nocapture reado
 ; CHECK-NEXT:    @ in Loop: Header=BB0_2 Depth=1
 ; CHECK-NEXT:    cmp r12, r2
 ; CHECK-NEXT:    bne .LBB0_2
-; CHECK-NEXT:  @ %bb.5:
-; CHECK-NEXT:    pop.w {r4, lr}
-; CHECK-NEXT:    bx lr
+; CHECK-NEXT:  @ %bb.5: @ %for.cond.cleanup
+; CHECK-NEXT:    pop {r4, pc}
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  @ %bb.6:
 ; CHECK-NEXT:  .LCPI0_0:

diff  --git a/llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll b/llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll
index da59cb259db61..5f3a12711dc0f 100644
--- a/llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll
@@ -211,12 +211,12 @@ entry:
 define void @test11(ptr nocapture %x, ptr nocapture %y, i32 %n) {
 ; CHECK-LABEL: test11:
 ; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r4, lr}
+; CHECK-NEXT:    push {r4, lr}
 ; CHECK-NEXT:    cmp.w r2, #-1
 ; CHECK-NEXT:    it gt
-; CHECK-NEXT:    bxgt lr
+; CHECK-NEXT:    popgt {r4, pc}
 ; CHECK-NEXT:  .LBB10_1: @ %prehead
-; CHECK-NEXT:    .save {r4, lr}
-; CHECK-NEXT:    push {r4, lr}
 ; CHECK-NEXT:    mov r12, r1
 ; CHECK-NEXT:    mov r4, r0
 ; CHECK-NEXT:    wlstp.8 lr, r2, .LBB10_3
@@ -230,9 +230,8 @@ define void @test11(ptr nocapture %x, ptr nocapture %y, i32 %n) {
 ; CHECK-NEXT:    subs r2, #2
 ; CHECK-NEXT:    strb r3, [r1], #1
 ; CHECK-NEXT:    bne .LBB10_3
-; CHECK-NEXT:  @ %bb.4:
-; CHECK-NEXT:    pop.w {r4, lr}
-; CHECK-NEXT:    bx lr
+; CHECK-NEXT:  @ %bb.4: @ %for.cond.cleanup
+; CHECK-NEXT:    pop {r4, pc}
 entry:
   %cmp6 = icmp slt i32 %n, 0
   br i1 %cmp6, label %prehead, label %for.cond.cleanup
@@ -441,12 +440,12 @@ declare void @other()
 define void @multilooped_exit(i32 %b) {
 ; CHECK-LABEL: multilooped_exit:
 ; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r4, lr}
+; CHECK-NEXT:    push {r4, lr}
 ; CHECK-NEXT:    cmp r0, #1
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    bxlt lr
+; CHECK-NEXT:    poplt {r4, pc}
 ; CHECK-NEXT:  .LBB18_1: @ %loop.preheader
-; CHECK-NEXT:    .save {r4, lr}
-; CHECK-NEXT:    push {r4, lr}
 ; CHECK-NEXT:    mov.w r4, #-1
 ; CHECK-NEXT:    vmov.i32 q0, #0x0
 ; CHECK-NEXT:    b .LBB18_3
@@ -499,9 +498,8 @@ define void @multilooped_exit(i32 %b) {
 ; CHECK-NEXT:    vstrb.8 q0, [r3], #16
 ; CHECK-NEXT:    letp lr, .LBB18_11
 ; CHECK-NEXT:    b .LBB18_2
-; CHECK-NEXT:  .LBB18_12:
-; CHECK-NEXT:    pop.w {r4, lr}
-; CHECK-NEXT:    bx lr
+; CHECK-NEXT:  .LBB18_12: @ %exit
+; CHECK-NEXT:    pop {r4, pc}
 entry:
   %cmp8 = icmp sgt i32 %b, 0
   br i1 %cmp8, label %loop, label %exit

diff  --git a/llvm/test/CodeGen/Thumb2/mve-postinc-dct.ll b/llvm/test/CodeGen/Thumb2/mve-postinc-dct.ll
index 45bb70ec44b73..7e059ae726fc6 100644
--- a/llvm/test/CodeGen/Thumb2/mve-postinc-dct.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-postinc-dct.ll
@@ -6,14 +6,13 @@
 define void @DCT_mve1(ptr nocapture readonly %S, ptr nocapture readonly %pIn, ptr nocapture %pOut) {
 ; CHECK-LABEL: DCT_mve1:
 ; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, lr}
 ; CHECK-NEXT:    ldr r3, [r0, #4]
 ; CHECK-NEXT:    sub.w r12, r3, #1
 ; CHECK-NEXT:    cmp.w r12, #2
-; CHECK-NEXT:    it lo
-; CHECK-NEXT:    bxlo lr
-; CHECK-NEXT:  .LBB0_1: @ %for.body.preheader
-; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, lr}
-; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-NEXT:    blo .LBB0_5
+; CHECK-NEXT:  @ %bb.1: @ %for.body.preheader
 ; CHECK-NEXT:    ldr r5, [r0, #8]
 ; CHECK-NEXT:    ldr r3, [r0]
 ; CHECK-NEXT:    add.w r3, r3, r5, lsl #2
@@ -44,9 +43,8 @@ define void @DCT_mve1(ptr nocapture readonly %S, ptr nocapture readonly %pIn, pt
 ; CHECK-NEXT:    vadd.f32 s0, s0, s2
 ; CHECK-NEXT:    vstr s0, [r7]
 ; CHECK-NEXT:    bne .LBB0_2
-; CHECK-NEXT:  @ %bb.5:
-; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, lr}
-; CHECK-NEXT:    bx lr
+; CHECK-NEXT:  .LBB0_5: @ %for.cond.cleanup
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, pc}
 entry:
   %NumInputs = getelementptr inbounds %struct.DCT_InstanceTypeDef, ptr %S, i32 0, i32 2
   %i = load i32, ptr %NumInputs, align 4

diff  --git a/llvm/test/CodeGen/Thumb2/mve-scatter-increment.ll b/llvm/test/CodeGen/Thumb2/mve-scatter-increment.ll
index 3a14e650bd53a..94397f0ae587b 100644
--- a/llvm/test/CodeGen/Thumb2/mve-scatter-increment.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-scatter-increment.ll
@@ -127,16 +127,15 @@ define arm_aapcs_vfpcc void @scatter_inc_mini_16i8(<16 x i8> %data, ptr %dst, <1
 define arm_aapcs_vfpcc void @scatter_inc_v4i32_complex(<4 x i32> %data1, <4 x i32> %data2, <4 x i32> %data3, ptr %dst, i32 %n) {
 ; CHECK-LABEL: scatter_inc_v4i32_complex:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    cmp r1, #1
-; CHECK-NEXT:    it lt
-; CHECK-NEXT:    bxlt lr
-; CHECK-NEXT:  .LBB3_1: @ %vector.ph.preheader
 ; CHECK-NEXT:    .save {r4, lr}
 ; CHECK-NEXT:    push {r4, lr}
 ; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
 ; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
 ; CHECK-NEXT:    .pad #16
 ; CHECK-NEXT:    sub sp, #16
+; CHECK-NEXT:    cmp r1, #1
+; CHECK-NEXT:    blt .LBB3_5
+; CHECK-NEXT:  @ %bb.1: @ %vector.ph.preheader
 ; CHECK-NEXT:    adr r4, .LCPI3_2
 ; CHECK-NEXT:    bic r2, r1, #3
 ; CHECK-NEXT:    vldrw.u32 q3, [r4]
@@ -169,11 +168,10 @@ define arm_aapcs_vfpcc void @scatter_inc_v4i32_complex(<4 x i32> %data1, <4 x i3
 ; CHECK-NEXT:    @ in Loop: Header=BB3_2 Depth=1
 ; CHECK-NEXT:    cmp r2, r1
 ; CHECK-NEXT:    bne .LBB3_2
-; CHECK-NEXT:  @ %bb.5:
+; CHECK-NEXT:  .LBB3_5: @ %for.cond.cleanup
 ; CHECK-NEXT:    add sp, #16
 ; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; CHECK-NEXT:    pop.w {r4, lr}
-; CHECK-NEXT:    bx lr
+; CHECK-NEXT:    pop {r4, pc}
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  @ %bb.6:
 ; CHECK-NEXT:  .LCPI3_0:

diff  --git a/llvm/test/CodeGen/Thumb2/mve-tailpred-nonzerostart.ll b/llvm/test/CodeGen/Thumb2/mve-tailpred-nonzerostart.ll
index 42a00b61b4183..85425db1eb6c8 100644
--- a/llvm/test/CodeGen/Thumb2/mve-tailpred-nonzerostart.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-tailpred-nonzerostart.ll
@@ -58,12 +58,11 @@ for.cond.cleanup:                                 ; preds = %vector.body, %entry
 define arm_aapcs_vfpcc void @start11(ptr nocapture readonly %x, ptr nocapture readonly %y, ptr noalias nocapture %z, float %a, i32 %n) {
 ; CHECK-LABEL: start11:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    cmp r3, #1
-; CHECK-NEXT:    it lt
-; CHECK-NEXT:    bxlt lr
-; CHECK-NEXT:  .LBB1_1: @ %vector.ph
 ; CHECK-NEXT:    .save {r4, r5, r7, lr}
 ; CHECK-NEXT:    push {r4, r5, r7, lr}
+; CHECK-NEXT:    cmp r3, #1
+; CHECK-NEXT:    blt .LBB1_3
+; CHECK-NEXT:  @ %bb.1: @ %vector.ph
 ; CHECK-NEXT:    vmov r12, s0
 ; CHECK-NEXT:    adds r4, r3, #3
 ; CHECK-NEXT:    adr r5, .LCPI1_0
@@ -86,9 +85,8 @@ define arm_aapcs_vfpcc void @start11(ptr nocapture readonly %x, ptr nocapture re
 ; CHECK-NEXT:    vpst
 ; CHECK-NEXT:    vstrwt.32 q3, [r2], #16
 ; CHECK-NEXT:    bne .LBB1_2
-; CHECK-NEXT:  @ %bb.3:
-; CHECK-NEXT:    pop.w {r4, r5, r7, lr}
-; CHECK-NEXT:    bx lr
+; CHECK-NEXT:  .LBB1_3: @ %for.cond.cleanup
+; CHECK-NEXT:    pop {r4, r5, r7, pc}
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  @ %bb.4:
 ; CHECK-NEXT:  .LCPI1_0:

diff  --git a/llvm/test/CodeGen/Thumb2/mve-vmull-loop.ll b/llvm/test/CodeGen/Thumb2/mve-vmull-loop.ll
index 0a26d9920981b..da0cd57d86dbb 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vmull-loop.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vmull-loop.ll
@@ -4,13 +4,11 @@
 define arm_aapcs_vfpcc void @test32(ptr noalias nocapture readonly %x, ptr noalias nocapture readonly %y, ptr nocapture %z, i32 %n) {
 ; CHECK-LABEL: test32:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    cmp r3, #1
-; CHECK-NEXT:    it lt
-; CHECK-NEXT:    bxlt lr
-; CHECK-NEXT:  .LBB0_1: @ %vector.body.preheader
 ; CHECK-NEXT:    .save {r4, r5, r7, lr}
 ; CHECK-NEXT:    push {r4, r5, r7, lr}
-; CHECK-NEXT:  .LBB0_2: @ %vector.body
+; CHECK-NEXT:    cmp r3, #1
+; CHECK-NEXT:    blt .LBB0_2
+; CHECK-NEXT:  .LBB0_1: @ %vector.body
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vldrw.u32 q0, [r0], #16
 ; CHECK-NEXT:    vldrw.u32 q1, [r1], #16
@@ -28,10 +26,9 @@ define arm_aapcs_vfpcc void @test32(ptr noalias nocapture readonly %x, ptr noali
 ; CHECK-NEXT:    lsrl r4, r5, #31
 ; CHECK-NEXT:    vmov q2[3], q2[1], r4, r12
 ; CHECK-NEXT:    vstrb.8 q2, [r2], #16
-; CHECK-NEXT:    bne .LBB0_2
-; CHECK-NEXT:  @ %bb.3:
-; CHECK-NEXT:    pop.w {r4, r5, r7, lr}
-; CHECK-NEXT:    bx lr
+; CHECK-NEXT:    bne .LBB0_1
+; CHECK-NEXT:  .LBB0_2: @ %for.cond.cleanup
+; CHECK-NEXT:    pop {r4, r5, r7, pc}
 entry:
   %0 = and i32 %n, 3
   %cmp = icmp eq i32 %0, 0

diff  --git a/llvm/test/CodeGen/X86/fold-call-3.ll b/llvm/test/CodeGen/X86/fold-call-3.ll
index 691f46b9eeb0e..9c9a50d3e9ce1 100644
--- a/llvm/test/CodeGen/X86/fold-call-3.ll
+++ b/llvm/test/CodeGen/X86/fold-call-3.ll
@@ -13,12 +13,12 @@
 define void @_Z25RawPointerPerformanceTestPvRN5clang6ActionE(ptr %Val, ptr %Actions) nounwind {
 ; CHECK-LABEL: _Z25RawPointerPerformanceTestPvRN5clang6ActionE:
 ; CHECK:       ## %bb.0: ## %entry
-; CHECK-NEXT:    cmpl $0, _NumTrials(%rip)
-; CHECK-NEXT:    je LBB0_4
-; CHECK-NEXT:  ## %bb.1: ## %bb.nph
 ; CHECK-NEXT:    pushq %rbp
 ; CHECK-NEXT:    pushq %rbx
 ; CHECK-NEXT:    subq $24, %rsp
+; CHECK-NEXT:    cmpl $0, _NumTrials(%rip)
+; CHECK-NEXT:    je LBB0_3
+; CHECK-NEXT:  ## %bb.1: ## %bb.nph
 ; CHECK-NEXT:    movq %rsi, %rbx
 ; CHECK-NEXT:    movq %rdi, %rax
 ; CHECK-NEXT:    xorl %ebp, %ebp
@@ -34,21 +34,20 @@ define void @_Z25RawPointerPerformanceTestPvRN5clang6ActionE(ptr %Val, ptr %Acti
 ; CHECK-NEXT:    incl %ebp
 ; CHECK-NEXT:    cmpl _NumTrials(%rip), %ebp
 ; CHECK-NEXT:    jb LBB0_2
-; CHECK-NEXT:  ## %bb.3:
+; CHECK-NEXT:  LBB0_3: ## %return
 ; CHECK-NEXT:    addq $24, %rsp
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    popq %rbp
-; CHECK-NEXT:  LBB0_4: ## %return
 ; CHECK-NEXT:    retq
 ;
 ; pre-RA-LABEL: _Z25RawPointerPerformanceTestPvRN5clang6ActionE:
 ; pre-RA:       ## %bb.0: ## %entry
-; pre-RA-NEXT:    cmpl $0, _NumTrials(%rip)
-; pre-RA-NEXT:    je LBB0_4
-; pre-RA-NEXT:  ## %bb.1: ## %bb.nph
 ; pre-RA-NEXT:    pushq %rbp
 ; pre-RA-NEXT:    pushq %rbx
 ; pre-RA-NEXT:    subq $24, %rsp
+; pre-RA-NEXT:    cmpl $0, _NumTrials(%rip)
+; pre-RA-NEXT:    je LBB0_3
+; pre-RA-NEXT:  ## %bb.1: ## %bb.nph
 ; pre-RA-NEXT:    movq %rsi, %rbx
 ; pre-RA-NEXT:    movq %rdi, %rax
 ; pre-RA-NEXT:    xorl %ebp, %ebp
@@ -64,11 +63,10 @@ define void @_Z25RawPointerPerformanceTestPvRN5clang6ActionE(ptr %Val, ptr %Acti
 ; pre-RA-NEXT:    movq %rdx, {{[0-9]+}}(%rsp)
 ; pre-RA-NEXT:    cmpl _NumTrials(%rip), %ebp
 ; pre-RA-NEXT:    jb LBB0_2
-; pre-RA-NEXT:  ## %bb.3:
+; pre-RA-NEXT:  LBB0_3: ## %return
 ; pre-RA-NEXT:    addq $24, %rsp
 ; pre-RA-NEXT:    popq %rbx
 ; pre-RA-NEXT:    popq %rbp
-; pre-RA-NEXT:  LBB0_4: ## %return
 ; pre-RA-NEXT:    retq
 entry:
   %i = alloca %"struct.clang::ActionBase::ActionResult<0u>", align 8

diff  --git a/llvm/test/CodeGen/X86/negative-stride-fptosi-user.ll b/llvm/test/CodeGen/X86/negative-stride-fptosi-user.ll
index d0d46b5f11836..e21d4de178719 100644
--- a/llvm/test/CodeGen/X86/negative-stride-fptosi-user.ll
+++ b/llvm/test/CodeGen/X86/negative-stride-fptosi-user.ll
@@ -9,14 +9,12 @@
 define void @foo(i32 %N) nounwind {
 ; CHECK-LABEL: foo:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    testl %edi, %edi
-; CHECK-NEXT:    js .LBB0_1
-; CHECK-NEXT:  # %bb.4: # %return
-; CHECK-NEXT:    retq
-; CHECK-NEXT:  .LBB0_1: # %bb.preheader
 ; CHECK-NEXT:    pushq %rbp
 ; CHECK-NEXT:    pushq %rbx
 ; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    testl %edi, %edi
+; CHECK-NEXT:    jns .LBB0_3
+; CHECK-NEXT:  # %bb.1: # %bb.preheader
 ; CHECK-NEXT:    movl %edi, %ebx
 ; CHECK-NEXT:    xorl %ebp, %ebp
 ; CHECK-NEXT:    .p2align 4, 0x90
@@ -28,7 +26,7 @@ define void @foo(i32 %N) nounwind {
 ; CHECK-NEXT:    decl %ebp
 ; CHECK-NEXT:    cmpl %ebp, %ebx
 ; CHECK-NEXT:    jne .LBB0_2
-; CHECK-NEXT:  # %bb.3:
+; CHECK-NEXT:  .LBB0_3: # %return
 ; CHECK-NEXT:    addq $8, %rsp
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    popq %rbp

diff  --git a/llvm/test/CodeGen/X86/pr44412.ll b/llvm/test/CodeGen/X86/pr44412.ll
index 67579a5bb7c52..6c33666fb5c3a 100644
--- a/llvm/test/CodeGen/X86/pr44412.ll
+++ b/llvm/test/CodeGen/X86/pr44412.ll
@@ -4,10 +4,10 @@
 define void @bar(i32 %0, i32 %1) nounwind {
 ; CHECK-LABEL: bar:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushq %rbx
 ; CHECK-NEXT:    testl %edi, %edi
-; CHECK-NEXT:    je .LBB0_4
+; CHECK-NEXT:    je .LBB0_3
 ; CHECK-NEXT:  # %bb.1: # %.preheader
-; CHECK-NEXT:    pushq %rbx
 ; CHECK-NEXT:    movl %edi, %ebx
 ; CHECK-NEXT:    decl %ebx
 ; CHECK-NEXT:    .p2align 4, 0x90
@@ -16,9 +16,8 @@ define void @bar(i32 %0, i32 %1) nounwind {
 ; CHECK-NEXT:    callq foo at PLT
 ; CHECK-NEXT:    addl $-1, %ebx
 ; CHECK-NEXT:    jb .LBB0_2
-; CHECK-NEXT:  # %bb.3:
+; CHECK-NEXT:  .LBB0_3:
 ; CHECK-NEXT:    popq %rbx
-; CHECK-NEXT:  .LBB0_4:
 ; CHECK-NEXT:    retq
   %3 = icmp eq i32 %0, 0
   br i1 %3, label %8, label %4
@@ -37,10 +36,10 @@ define void @bar(i32 %0, i32 %1) nounwind {
 define void @baz(i32 %0, i32 %1) nounwind {
 ; CHECK-LABEL: baz:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushq %rbx
 ; CHECK-NEXT:    testl %edi, %edi
-; CHECK-NEXT:    je .LBB1_4
+; CHECK-NEXT:    je .LBB1_3
 ; CHECK-NEXT:  # %bb.1: # %.preheader
-; CHECK-NEXT:    pushq %rbx
 ; CHECK-NEXT:    movl %edi, %ebx
 ; CHECK-NEXT:    decl %ebx
 ; CHECK-NEXT:    .p2align 4, 0x90
@@ -49,9 +48,8 @@ define void @baz(i32 %0, i32 %1) nounwind {
 ; CHECK-NEXT:    callq foo at PLT
 ; CHECK-NEXT:    addl $-1, %ebx
 ; CHECK-NEXT:    jae .LBB1_2
-; CHECK-NEXT:  # %bb.3:
+; CHECK-NEXT:  .LBB1_3:
 ; CHECK-NEXT:    popq %rbx
-; CHECK-NEXT:  .LBB1_4:
 ; CHECK-NEXT:    retq
   %3 = icmp eq i32 %0, 0
   br i1 %3, label %8, label %4

diff  --git a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll
index f22ea739092f6..ec4a12eadb94e 100644
--- a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll
+++ b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll
@@ -639,40 +639,40 @@ declare hidden fastcc ptr @find_temp_slot_from_address(ptr readonly)
 define void @useLEA(ptr readonly %x) {
 ; ENABLE-LABEL: useLEA:
 ; ENABLE:       ## %bb.0: ## %entry
+; ENABLE-NEXT:    pushq %rax
+; ENABLE-NEXT:    .cfi_def_cfa_offset 16
 ; ENABLE-NEXT:    testq %rdi, %rdi
-; ENABLE-NEXT:    je LBB8_9
+; ENABLE-NEXT:    je LBB8_7
 ; ENABLE-NEXT:  ## %bb.1: ## %if.end
 ; ENABLE-NEXT:    cmpw $66, (%rdi)
-; ENABLE-NEXT:    jne LBB8_9
+; ENABLE-NEXT:    jne LBB8_7
 ; ENABLE-NEXT:  ## %bb.2: ## %lor.lhs.false
-; ENABLE-NEXT:    pushq %rax
-; ENABLE-NEXT:    .cfi_def_cfa_offset 16
 ; ENABLE-NEXT:    movq 8(%rdi), %rdi
 ; ENABLE-NEXT:    movzwl (%rdi), %eax
 ; ENABLE-NEXT:    leal -54(%rax), %ecx
 ; ENABLE-NEXT:    cmpl $14, %ecx
 ; ENABLE-NEXT:    ja LBB8_3
-; ENABLE-NEXT:  ## %bb.7: ## %lor.lhs.false
+; ENABLE-NEXT:  ## %bb.8: ## %lor.lhs.false
 ; ENABLE-NEXT:    movl $24599, %edx ## imm = 0x6017
 ; ENABLE-NEXT:    btl %ecx, %edx
 ; ENABLE-NEXT:    jae LBB8_3
-; ENABLE-NEXT:  LBB8_8:
-; ENABLE-NEXT:    addq $8, %rsp
-; ENABLE-NEXT:  LBB8_9: ## %cleanup
+; ENABLE-NEXT:  LBB8_7: ## %cleanup
+; ENABLE-NEXT:    popq %rax
 ; ENABLE-NEXT:    retq
 ; ENABLE-NEXT:  LBB8_3: ## %lor.lhs.false
 ; ENABLE-NEXT:    cmpl $134, %eax
-; ENABLE-NEXT:    je LBB8_8
+; ENABLE-NEXT:    je LBB8_7
 ; ENABLE-NEXT:  ## %bb.4: ## %lor.lhs.false
 ; ENABLE-NEXT:    cmpl $140, %eax
-; ENABLE-NEXT:    je LBB8_8
+; ENABLE-NEXT:    je LBB8_7
 ; ENABLE-NEXT:  ## %bb.5: ## %if.end.55
 ; ENABLE-NEXT:    callq _find_temp_slot_from_address
 ; ENABLE-NEXT:    testq %rax, %rax
-; ENABLE-NEXT:    je LBB8_8
+; ENABLE-NEXT:    je LBB8_7
 ; ENABLE-NEXT:  ## %bb.6: ## %if.then.60
 ; ENABLE-NEXT:    movb $1, 57(%rax)
-; ENABLE-NEXT:    jmp LBB8_8
+; ENABLE-NEXT:    popq %rax
+; ENABLE-NEXT:    retq
 ;
 ; DISABLE-LABEL: useLEA:
 ; DISABLE:       ## %bb.0: ## %entry

diff  --git a/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr53625.ll b/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr53625.ll
index 536f9912f1b6f..2069e974c6905 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr53625.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr53625.ll
@@ -23,7 +23,7 @@ define i32 @test(i32 %c, ptr %a, ptr %b) {
 ; CHECK-NEXT:    mov w0, wzr
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB0_5:
-; CHECK-NEXT:    mov w0, #1 // =0x1
+; CHECK-NEXT:    mov w0, #1
 ; CHECK-NEXT:    ret
 entry:
   %cmp13 = icmp sgt i32 %c, 0
@@ -62,7 +62,7 @@ define i64 @IVIncHoist_not_all_user_in_header(i32 %c, ptr %a, ptr %b) {
 ; CHECK-NEXT:    mov w9, w0
 ; CHECK-NEXT:    add x10, x1, #4
 ; CHECK-NEXT:    add x11, x2, #8
-; CHECK-NEXT:    mov w0, #1 // =0x1
+; CHECK-NEXT:    mov w0, #1
 ; CHECK-NEXT:  .LBB1_2: // %for.body
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    ldr w12, [x10, x8, lsl #2]
@@ -142,7 +142,7 @@ define i32 @negative_test_type_is_struct(i32 %c, ptr %a, ptr %b) {
 ; CHECK-NEXT:    mov w0, wzr
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB2_5:
-; CHECK-NEXT:    mov w0, #1 // =0x1
+; CHECK-NEXT:    mov w0, #1
 ; CHECK-NEXT:    ret
 entry:
   %cmp13 = icmp sgt i32 %c, 0

diff  --git a/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
index 63a3c725ae89e..fa1c208ffbd77 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
@@ -182,12 +182,12 @@ exit:
 define void @extrastride(i8* nocapture %main, i32 %main_stride, i32* nocapture %res, i32 %x, i32 %y, i32 %z) nounwind {
 ; X64-LABEL: extrastride:
 ; X64:       # %bb.0: # %entry
+; X64-NEXT:    pushq %rbx
 ; X64-NEXT:    # kill: def $ecx killed $ecx def $rcx
 ; X64-NEXT:    # kill: def $esi killed $esi def $rsi
 ; X64-NEXT:    testl %r9d, %r9d
-; X64-NEXT:    je .LBB2_4
+; X64-NEXT:    je .LBB2_3
 ; X64-NEXT:  # %bb.1: # %for.body.lr.ph
-; X64-NEXT:    pushq %rbx
 ; X64-NEXT:    leal (%rsi,%rsi), %r10d
 ; X64-NEXT:    leal (%rsi,%rsi,2), %r11d
 ; X64-NEXT:    addl %esi, %ecx
@@ -213,9 +213,8 @@ define void @extrastride(i8* nocapture %main, i32 %main_stride, i32* nocapture %
 ; X64-NEXT:    addq %r8, %rdx
 ; X64-NEXT:    decl %r9d
 ; X64-NEXT:    jne .LBB2_2
-; X64-NEXT:  # %bb.3:
+; X64-NEXT:  .LBB2_3: # %for.end
 ; X64-NEXT:    popq %rbx
-; X64-NEXT:  .LBB2_4: # %for.end
 ; X64-NEXT:    retq
 ;
 ; X32-LABEL: extrastride:


        


More information about the llvm-commits mailing list