[llvm] 80c4910 - Revert "[MachineSink] replace MachineLoop with MachineCycle"

Tue May 24 19:54:21 PDT 2022

Author: Chen Zheng
Date: 2022-05-24T22:43:37-04:00
New Revision: 80c4910f3d4c8b36120bcdb6670d15c693f0f0df

URL: https://github.com/llvm/llvm-project/commit/80c4910f3d4c8b36120bcdb6670d15c693f0f0df
DIFF: https://github.com/llvm/llvm-project/commit/80c4910f3d4c8b36120bcdb6670d15c693f0f0df.diff

LOG: Revert "[MachineSink] replace MachineLoop with MachineCycle"

This reverts commit 62a9b36fcf728b104ea87e6eb84c0be69b779df7.
Cause build failure on lldb incremental buildbot:
https://green.lab.llvm.org/green/view/LLDB/job/lldb-cmake/43994/changes

Added: 
    

Modified: 
    llvm/include/llvm/ADT/GenericCycleImpl.h
    llvm/include/llvm/ADT/GenericCycleInfo.h
    llvm/include/llvm/CodeGen/MachineCycleAnalysis.h
    llvm/include/llvm/CodeGen/MachineSSAContext.h
    llvm/lib/CodeGen/MachineCycleAnalysis.cpp
    llvm/lib/CodeGen/MachineSink.cpp
    llvm/test/CodeGen/AArch64/O3-pipeline.ll
    llvm/test/CodeGen/AArch64/loop-sink-limit.mir
    llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
    llvm/test/CodeGen/ARM/O3-pipeline.ll
    llvm/test/CodeGen/PowerPC/O3-pipeline.ll
    llvm/test/CodeGen/RISCV/O3-pipeline.ll
    llvm/test/CodeGen/X86/opt-pipeline.ll
    llvm/test/CodeGen/X86/pr38795.ll
    llvm/test/CodeGen/X86/switch-phi-const.ll
    llvm/test/CodeGen/X86/x86-shrink-wrapping.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/ADT/GenericCycleImpl.h b/llvm/include/llvm/ADT/GenericCycleImpl.h
index ea2847f8c8ee..17f3a793c985 100644

--- a/llvm/include/llvm/ADT/GenericCycleImpl.h
+++ b/llvm/include/llvm/ADT/GenericCycleImpl.h
@@ -66,44 +66,6 @@ void GenericCycle<ContextT>::getExitBlocks(
   }
 }
 
-template <typename ContextT>
-auto GenericCycle<ContextT>::getCyclePreheader() const -> BlockT * {
-  BlockT *Predecessor = getCyclePredecessor();
-  if (!Predecessor)
-    return nullptr;
-
-  assert(isReducible() && "Cycle Predecessor must be in a reducible cycle!");
-
-  if (succ_size(Predecessor) != 1)
-    return nullptr;
-
-  // Make sure we are allowed to hoist instructions into the predecessor.
-  if (!Predecessor->isLegalToHoistInto())
-    return nullptr;
-
-  return Predecessor;
-}
-
-template <typename ContextT>
-auto GenericCycle<ContextT>::getCyclePredecessor() const -> BlockT * {
-  if (!isReducible())
-    return nullptr;
-
-  BlockT *Out = nullptr;
-
-  // Loop over the predecessors of the header node...
-  BlockT *Header = getHeader();
-  for (const auto Pred : predecessors(Header)) {
-    if (!contains(Pred)) {
-      if (Out && Out != Pred)
-        return nullptr;
-      Out = Pred;
-    }
-  }
-
-  return Out;
-}
-
 /// \brief Helper class for computing cycle information.
 template <typename ContextT> class GenericCycleInfoCompute {
   using BlockT = typename ContextT::BlockT;
@@ -364,18 +326,6 @@ auto GenericCycleInfo<ContextT>::getCycle(const BlockT *Block) const
   return nullptr;
 }
 
-/// \brief get the depth for the cycle which containing a given block.
-///
-/// \returns the depth for the innermost cycle containing \p Block or 0 if it is
-///          not contained in any cycle.
-template <typename ContextT>
-unsigned GenericCycleInfo<ContextT>::getCycleDepth(const BlockT *Block) const {
-  CycleT *Cycle = getCycle(Block);
-  if (!Cycle)
-    return 0;
-  return Cycle->getDepth();
-}
-
 #ifndef NDEBUG
 /// \brief Validate the internal consistency of the cycle tree.
 ///

diff  --git a/llvm/include/llvm/ADT/GenericCycleInfo.h b/llvm/include/llvm/ADT/GenericCycleInfo.h
index 970664b85715..5c2190411c1e 100644
--- a/llvm/include/llvm/ADT/GenericCycleInfo.h
+++ b/llvm/include/llvm/ADT/GenericCycleInfo.h
@@ -100,10 +100,6 @@ template <typename ContextT> class GenericCycle {
 
   BlockT *getHeader() const { return Entries[0]; }
 
-  const SmallVectorImpl<BlockT *> & getEntries() const {
-    return Entries;
-  }
-
   /// \brief Return whether \p Block is an entry block of the cycle.
   bool isEntry(BlockT *Block) const { return is_contained(Entries, Block); }
 
@@ -128,16 +124,6 @@ template <typename ContextT> class GenericCycle {
   /// branched to.
   void getExitBlocks(SmallVectorImpl<BlockT *> &TmpStorage) const;
 
-  /// Return the preheader block for this cycle. Pre-header is well-defined for
-  /// reducible cycle in docs/LoopTerminology.rst as: the only one entering
-  /// block and its only edge is to the entry block. Return null for irreducible
-  /// cycles.
-  BlockT *getCyclePreheader() const;
-
-  /// If the cycle has exactly one entry with exactly one predecessor, return
-  /// it, otherwise return nullptr.
-  BlockT *getCyclePredecessor() const;
-
   /// Iteration over child cycles.
   //@{
   using const_child_iterator_base =
@@ -253,7 +239,6 @@ template <typename ContextT> class GenericCycleInfo {
   const ContextT &getSSAContext() const { return Context; }
 
   CycleT *getCycle(const BlockT *Block) const;
-  unsigned getCycleDepth(const BlockT *Block) const;
   CycleT *getTopLevelParentCycle(const BlockT *Block) const;
 
   /// Move \p Child to \p NewParent by manipulating Children vectors.

diff  --git a/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h b/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h
index bdcf64a20753..54d9860e54c9 100644
--- a/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h
+++ b/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h
@@ -16,8 +16,6 @@
 
 #include "llvm/ADT/GenericCycleInfo.h"
 #include "llvm/CodeGen/MachineSSAContext.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/InitializePasses.h"
 
 namespace llvm {
 
@@ -27,39 +25,6 @@ extern template class GenericCycle<MachineSSAContext>;
 using MachineCycleInfo = GenericCycleInfo<MachineSSAContext>;
 using MachineCycle = MachineCycleInfo::CycleT;
 
-/// Legacy analysis pass which computes a \ref MachineCycleInfo.
-class MachineCycleInfoWrapperPass : public MachineFunctionPass {
-  MachineFunction *F = nullptr;
-  MachineCycleInfo CI;
-
-public:
-  static char ID;
-
-  MachineCycleInfoWrapperPass();
-
-  MachineCycleInfo &getCycleInfo() { return CI; }
-  const MachineCycleInfo &getCycleInfo() const { return CI; }
-
-  bool runOnMachineFunction(MachineFunction &F) override;
-  void getAnalysisUsage(AnalysisUsage &AU) const override;
-  void releaseMemory() override;
-  void print(raw_ostream &OS, const Module *M = nullptr) const override;
-};
-
-class MachineCycleInfoPrinterPass : public MachineFunctionPass {
-public:
-  static char ID;
-
-  MachineCycleInfoPrinterPass();
-
-  bool runOnMachineFunction(MachineFunction &F) override;
-  void getAnalysisUsage(AnalysisUsage &AU) const override;
-};
-
-// TODO: add this function to GenericCycle template after implementing IR
-//       version.
-bool isCycleInvariant(const MachineCycle *Cycle, MachineInstr &I);
-
 } // end namespace llvm
 
 #endif // LLVM_CODEGEN_MACHINECYCLEANALYSIS_H

diff  --git a/llvm/include/llvm/CodeGen/MachineSSAContext.h b/llvm/include/llvm/CodeGen/MachineSSAContext.h
index f59d7cf8a522..c945b8fefc80 100644
--- a/llvm/include/llvm/CodeGen/MachineSSAContext.h
+++ b/llvm/include/llvm/CodeGen/MachineSSAContext.h
@@ -28,8 +28,6 @@ template <typename, bool> class DominatorTreeBase;
 
 inline auto successors(MachineBasicBlock *BB) { return BB->successors(); }
 inline auto predecessors(MachineBasicBlock *BB) { return BB->predecessors(); }
-inline unsigned succ_size(MachineBasicBlock *BB) { return BB->succ_size(); }
-inline unsigned pred_size(MachineBasicBlock *BB) { return BB->pred_size(); }
 
 template <> class GenericSSAContext<MachineFunction> {
   const MachineRegisterInfo *RegInfo = nullptr;

diff  --git a/llvm/lib/CodeGen/MachineCycleAnalysis.cpp b/llvm/lib/CodeGen/MachineCycleAnalysis.cpp
index be171c2aa409..42a5e2b7af01 100644
--- a/llvm/lib/CodeGen/MachineCycleAnalysis.cpp
+++ b/llvm/lib/CodeGen/MachineCycleAnalysis.cpp
@@ -8,15 +8,50 @@
 
 #include "llvm/CodeGen/MachineCycleAnalysis.h"
 #include "llvm/ADT/GenericCycleImpl.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineSSAContext.h"
+#include "llvm/InitializePasses.h"
 
 using namespace llvm;
 
 template class llvm::GenericCycleInfo<llvm::MachineSSAContext>;
 template class llvm::GenericCycle<llvm::MachineSSAContext>;
 
+namespace {
+
+/// Legacy analysis pass which computes a \ref MachineCycleInfo.
+class MachineCycleInfoWrapperPass : public MachineFunctionPass {
+  MachineFunction *F = nullptr;
+  MachineCycleInfo CI;
+
+public:
+  static char ID;
+
+  MachineCycleInfoWrapperPass();
+
+  MachineCycleInfo &getCycleInfo() { return CI; }
+  const MachineCycleInfo &getCycleInfo() const { return CI; }
+
+  bool runOnMachineFunction(MachineFunction &F) override;
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+  void releaseMemory() override;
+  void print(raw_ostream &OS, const Module *M = nullptr) const override;
+
+  // TODO: verify analysis
+};
+
+class MachineCycleInfoPrinterPass : public MachineFunctionPass {
+public:
+  static char ID;
+
+  MachineCycleInfoPrinterPass();
+
+  bool runOnMachineFunction(MachineFunction &F) override;
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+};
+
+} // namespace
+
 char MachineCycleInfoWrapperPass::ID = 0;
 
 MachineCycleInfoWrapperPass::MachineCycleInfoWrapperPass()
@@ -76,62 +111,3 @@ bool MachineCycleInfoPrinterPass::runOnMachineFunction(MachineFunction &F) {
   CI.print(errs());
   return false;
 }
-
-bool llvm::isCycleInvariant(const MachineCycle *Cycle, MachineInstr &I) {
-  MachineFunction *MF = I.getParent()->getParent();
-  MachineRegisterInfo *MRI = &MF->getRegInfo();
-  const TargetSubtargetInfo &ST = MF->getSubtarget();
-  const TargetRegisterInfo *TRI = ST.getRegisterInfo();
-  const TargetInstrInfo *TII = ST.getInstrInfo();
-
-  // The instruction is cycle invariant if all of its operands are.
-  for (const MachineOperand &MO : I.operands()) {
-    if (!MO.isReg())
-      continue;
-
-    Register Reg = MO.getReg();
-    if (Reg == 0)
-      continue;
-
-    // An instruction that uses or defines a physical register can't e.g. be
-    // hoisted, so mark this as not invariant.
-    if (Register::isPhysicalRegister(Reg)) {
-      if (MO.isUse()) {
-        // If the physreg has no defs anywhere, it's just an ambient register
-        // and we can freely move its uses. Alternatively, if it's allocatable,
-        // it could get allocated to something with a def during allocation.
-        // However, if the physreg is known to always be caller saved/restored
-        // then this use is safe to hoist.
-        if (!MRI->isConstantPhysReg(Reg) &&
-            !(TRI->isCallerPreservedPhysReg(Reg.asMCReg(), *I.getMF())) &&
-            !TII->isIgnorableUse(MO))
-          return false;
-        // Otherwise it's safe to move.
-        continue;
-      } else if (!MO.isDead()) {
-        // A def that isn't dead can't be moved.
-        return false;
-      } else if (any_of(Cycle->getEntries(),
-                        [&](const MachineBasicBlock *Block) {
-                          return Block->isLiveIn(Reg);
-                        })) {
-        // If the reg is live into any header of the cycle we can't hoist an
-        // instruction which would clobber it.
-        return false;
-      }
-    }
-
-    if (!MO.isUse())
-      continue;
-
-    assert(MRI->getVRegDef(Reg) && "Machine instr not mapped for this vreg?!");
-
-    // If the cycle contains the definition of an operand, then the instruction
-    // isn't cycle invariant.
-    if (Cycle->contains(MRI->getVRegDef(Reg)->getParent()))
-      return false;
-  }
-
-  // If we got this far, the instruction is cycle invariant!
-  return true;
-}

diff  --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp
index 797433e7a280..966b012725d8 100644
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@@ -29,7 +29,6 @@
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
-#include "llvm/CodeGen/MachineCycleAnalysis.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -96,18 +95,18 @@ static cl::opt<unsigned> SinkLoadBlocksThreshold(
     cl::init(20), cl::Hidden);
 
 static cl::opt<bool>
-    SinkInstsIntoCycle("sink-insts-to-avoid-spills",
-                       cl::desc("Sink instructions into cycles to avoid "
-                                "register spills"),
-                       cl::init(false), cl::Hidden);
-
-static cl::opt<unsigned> SinkIntoCycleLimit(
-    "machine-sink-cycle-limit",
-    cl::desc("The maximum number of instructions considered for cycle sinking."),
+SinkInstsIntoLoop("sink-insts-to-avoid-spills",
+                  cl::desc("Sink instructions into loops to avoid "
+                           "register spills"),
+                  cl::init(false), cl::Hidden);
+
+static cl::opt<unsigned> SinkIntoLoopLimit(
+    "machine-sink-loop-limit",
+    cl::desc("The maximum number of instructions considered for loop sinking."),
     cl::init(50), cl::Hidden);
 
 STATISTIC(NumSunk,      "Number of machine instructions sunk");
-STATISTIC(NumCycleSunk,  "Number of machine instructions sunk into a cycle");
+STATISTIC(NumLoopSunk,  "Number of machine instructions sunk into a loop");
 STATISTIC(NumSplit,     "Number of critical edges split");
 STATISTIC(NumCoalesces, "Number of copies coalesced");
 STATISTIC(NumPostRACopySink, "Number of copies sunk after RA");
@@ -120,7 +119,7 @@ namespace {
     MachineRegisterInfo  *MRI;     // Machine register information
     MachineDominatorTree *DT;      // Machine dominator tree
     MachinePostDominatorTree *PDT; // Machine post dominator tree
-    MachineCycleInfo *CI;
+    MachineLoopInfo *LI;
     MachineBlockFrequencyInfo *MBFI;
     const MachineBranchProbabilityInfo *MBPI;
     AliasAnalysis *AA;
@@ -181,9 +180,8 @@ namespace {
       AU.addRequired<AAResultsWrapperPass>();
       AU.addRequired<MachineDominatorTree>();
       AU.addRequired<MachinePostDominatorTree>();
-      AU.addRequired<MachineCycleInfoWrapperPass>();
+      AU.addRequired<MachineLoopInfo>();
       AU.addRequired<MachineBranchProbabilityInfo>();
-      AU.addPreserved<MachineCycleInfoWrapperPass>();
       AU.addPreserved<MachineLoopInfo>();
       if (UseBlockFreqInfo)
         AU.addRequired<MachineBlockFrequencyInfo>();
@@ -234,9 +232,9 @@ namespace {
     MachineBasicBlock *FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
                bool &BreakPHIEdge, AllSuccsCache &AllSuccessors);
 
-    void FindCycleSinkCandidates(MachineCycle *Cycle, MachineBasicBlock *BB,
-                                 SmallVectorImpl<MachineInstr *> &Candidates);
-    bool SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I);
+    void FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *BB,
+                                SmallVectorImpl<MachineInstr *> &Candidates);
+    bool SinkIntoLoop(MachineLoop *L, MachineInstr &I);
 
     bool isProfitableToSinkTo(Register Reg, MachineInstr &MI,
                               MachineBasicBlock *MBB,
@@ -263,7 +261,7 @@ INITIALIZE_PASS_BEGIN(MachineSinking, DEBUG_TYPE,
                       "Machine code sinking", false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(MachineCycleInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 INITIALIZE_PASS_END(MachineSinking, DEBUG_TYPE,
                     "Machine code sinking", false, false)
@@ -380,27 +378,26 @@ static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) {
   return false;
 }
 
-void MachineSinking::FindCycleSinkCandidates(
-    MachineCycle *Cycle, MachineBasicBlock *BB,
+void MachineSinking::FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *BB,
     SmallVectorImpl<MachineInstr *> &Candidates) {
   for (auto &MI : *BB) {
-    LLVM_DEBUG(dbgs() << "CycleSink: Analysing candidate: " << MI);
+    LLVM_DEBUG(dbgs() << "LoopSink: Analysing candidate: " << MI);
     if (!TII->shouldSink(MI)) {
-      LLVM_DEBUG(dbgs() << "CycleSink: Instruction not a candidate for this "
+      LLVM_DEBUG(dbgs() << "LoopSink: Instruction not a candidate for this "
                            "target\n");
       continue;
     }
-    if (!isCycleInvariant(Cycle, MI)) {
-      LLVM_DEBUG(dbgs() << "CycleSink: Instruction is not cycle invariant\n");
+    if (!L->isLoopInvariant(MI)) {
+      LLVM_DEBUG(dbgs() << "LoopSink: Instruction is not loop invariant\n");
       continue;
     }
     bool DontMoveAcrossStore = true;
     if (!MI.isSafeToMove(AA, DontMoveAcrossStore)) {
-      LLVM_DEBUG(dbgs() << "CycleSink: Instruction not safe to move.\n");
+      LLVM_DEBUG(dbgs() << "LoopSink: Instruction not safe to move.\n");
       continue;
     }
     if (MI.mayLoad() && !mayLoadFromGOTOrConstantPool(MI)) {
-      LLVM_DEBUG(dbgs() << "CycleSink: Dont sink GOT or constant pool loads\n");
+      LLVM_DEBUG(dbgs() << "LoopSink: Dont sink GOT or constant pool loads\n");
       continue;
     }
     if (MI.isConvergent())
@@ -412,7 +409,7 @@ void MachineSinking::FindCycleSinkCandidates(
     if (!MRI->hasOneDef(MO.getReg()))
       continue;
 
-    LLVM_DEBUG(dbgs() << "CycleSink: Instruction added as candidate.\n");
+    LLVM_DEBUG(dbgs() << "LoopSink: Instruction added as candidate.\n");
     Candidates.push_back(&MI);
   }
 }
@@ -428,12 +425,22 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
   MRI = &MF.getRegInfo();
   DT = &getAnalysis<MachineDominatorTree>();
   PDT = &getAnalysis<MachinePostDominatorTree>();
-  CI = &getAnalysis<MachineCycleInfoWrapperPass>().getCycleInfo();
+  LI = &getAnalysis<MachineLoopInfo>();
   MBFI = UseBlockFreqInfo ? &getAnalysis<MachineBlockFrequencyInfo>() : nullptr;
   MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
   RegClassInfo.runOnMachineFunction(MF);
 
+  // MachineSink currently uses MachineLoopInfo, which only recognizes natural
+  // loops. As such, we could sink instructions into irreducible cycles, which
+  // would be non-profitable.
+  // WARNING: The current implementation of hasStoreBetween() is incorrect for
+  // sinking into irreducible cycles (PR53990), this bailout is currently
+  // necessary for correctness, not just profitability.
+  ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
+  if (containsIrreducibleCFG<MachineBasicBlock *>(RPOT, *LI))
+    return false;
+
   bool EverMadeChange = false;
 
   while (true) {
@@ -466,33 +473,32 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
     EverMadeChange = true;
   }
 
-  if (SinkInstsIntoCycle) {
-    SmallVector<MachineCycle *, 8> Cycles(CI->toplevel_begin(),
-                                          CI->toplevel_end());
-    for (auto *Cycle : Cycles) {
-      MachineBasicBlock *Preheader = Cycle->getCyclePreheader();
+  if (SinkInstsIntoLoop) {
+    SmallVector<MachineLoop *, 8> Loops(LI->begin(), LI->end());
+    for (auto *L : Loops) {
+      MachineBasicBlock *Preheader = LI->findLoopPreheader(L);
       if (!Preheader) {
-        LLVM_DEBUG(dbgs() << "CycleSink: Can't find preheader\n");
+        LLVM_DEBUG(dbgs() << "LoopSink: Can't find preheader\n");
         continue;
       }
       SmallVector<MachineInstr *, 8> Candidates;
-      FindCycleSinkCandidates(Cycle, Preheader, Candidates);
+      FindLoopSinkCandidates(L, Preheader, Candidates);
 
       // Walk the candidates in reverse order so that we start with the use
       // of a def-use chain, if there is any.
       // TODO: Sort the candidates using a cost-model.
       unsigned i = 0;
       for (MachineInstr *I : llvm::reverse(Candidates)) {
-        if (i++ == SinkIntoCycleLimit) {
-          LLVM_DEBUG(dbgs() << "CycleSink:   Limit reached of instructions to "
+        if (i++ == SinkIntoLoopLimit) {
+          LLVM_DEBUG(dbgs() << "LoopSink:   Limit reached of instructions to "
                                "be analysed.");
           break;
         }
 
-        if (!SinkIntoCycle(Cycle, *I))
+        if (!SinkIntoLoop(L, *I))
           break;
         EverMadeChange = true;
-        ++NumCycleSunk;
+        ++NumLoopSunk;
       }
     }
   }
@@ -514,12 +520,12 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
 
   // Don't bother sinking code out of unreachable blocks. In addition to being
   // unprofitable, it can also lead to infinite looping, because in an
-  // unreachable cycle there may be nowhere to stop.
+  // unreachable loop there may be nowhere to stop.
   if (!DT->isReachableFromEntry(&MBB)) return false;
 
   bool MadeChange = false;
 
-  // Cache all successors, sorted by frequency info and cycle depth.
+  // Cache all successors, sorted by frequency info and loop depth.
   AllSuccsCache AllSuccessors;
 
   // Walk the basic block bottom-up.  Remember if we saw a store.
@@ -638,16 +644,13 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
   if (!isWorthBreakingCriticalEdge(MI, FromBB, ToBB))
     return false;
 
-  // Avoid breaking back edge. From == To means backedge for single BB cycle.
+  // Avoid breaking back edge. From == To means backedge for single BB loop.
   if (!SplitEdges || FromBB == ToBB)
     return false;
 
-  MachineCycle *FromCycle = CI->getCycle(FromBB);
-  MachineCycle *ToCycle = CI->getCycle(ToBB);
-
-  // Check for backedges of more "complex" cycles.
-  if (FromCycle == ToCycle && FromCycle &&
-      (!FromCycle->isReducible() || FromCycle->getHeader() == ToBB))
+  // Check for backedges of more "complex" loops.
+  if (LI->getLoopFor(FromBB) == LI->getLoopFor(ToBB) &&
+      LI->isLoopHeader(ToBB))
     return false;
 
   // It's not always legal to break critical edges and sink the computation
@@ -750,9 +753,9 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
   if (!PDT->dominates(SuccToSinkTo, MBB))
     return true;
 
-  // It is profitable to sink an instruction from a deeper cycle to a shallower
-  // cycle, even if the latter post-dominates the former (PR21115).
-  if (CI->getCycleDepth(MBB) > CI->getCycleDepth(SuccToSinkTo))
+  // It is profitable to sink an instruction from a deeper loop to a shallower
+  // loop, even if the latter post-dominates the former (PR21115).
+  if (LI->getLoopDepth(MBB) > LI->getLoopDepth(SuccToSinkTo))
     return true;
 
   // Check if only use in post dominated block is PHI instruction.
@@ -773,11 +776,11 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
           FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge, AllSuccessors))
     return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2, AllSuccessors);
 
-  MachineCycle *MCycle = CI->getCycle(MBB);
+  MachineLoop *ML = LI->getLoopFor(MBB);
 
-  // If the instruction is not inside a cycle, it is not profitable to sink MI to
+  // If the instruction is not inside a loop, it is not profitable to sink MI to
   // a post dominate block SuccToSinkTo.
-  if (!MCycle)
+  if (!ML)
     return false;
 
   auto isRegisterPressureSetExceedLimit = [&](const TargetRegisterClass *RC) {
@@ -795,7 +798,7 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
     return false;
   };
 
-  // If this instruction is inside a Cycle and sinking this instruction can make
+  // If this instruction is inside a loop and sinking this instruction can make
   // more registers live range shorten, it is still prifitable.
   for (const MachineOperand &MO : MI.operands()) {
     // Ignore non-register operands.
@@ -823,15 +826,14 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
         return false;
     } else {
       MachineInstr *DefMI = MRI->getVRegDef(Reg);
-      MachineCycle *Cycle = CI->getCycle(DefMI->getParent());
-      // DefMI is defined outside of cycle. There should be no live range
-      // impact for this operand. Defination outside of cycle means:
-      // 1: defination is outside of cycle.
-      // 2: defination is in this cycle, but it is a PHI in the cycle header.
-      if (Cycle != MCycle || (DefMI->isPHI() && Cycle && Cycle->isReducible() &&
-                              Cycle->getHeader() == DefMI->getParent()))
+      // DefMI is defined outside of loop. There should be no live range
+      // impact for this operand. Defination outside of loop means:
+      // 1: defination is outside of loop.
+      // 2: defination is in this loop, but it is a PHI in the loop header.
+      if (LI->getLoopFor(DefMI->getParent()) != ML ||
+          (DefMI->isPHI() && LI->isLoopHeader(DefMI->getParent())))
         continue;
-      // The DefMI is defined inside the cycle.
+      // The DefMI is defined inside the loop.
       // If sinking this operand makes some register pressure set exceed limit,
       // it is not profitable.
       if (isRegisterPressureSetExceedLimit(MRI->getRegClass(Reg))) {
@@ -841,8 +843,8 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
     }
   }
 
-  // If MI is in cycle and all its operands are alive across the whole cycle or
-  // if no operand sinking make register pressure set exceed limit, it is
+  // If MI is in loop and all its operands are alive across the whole loop or if
+  // no operand sinking make register pressure set exceed limit, it is
   // profitable to sink MI.
   return true;
 }
@@ -874,14 +876,14 @@ MachineSinking::GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
       AllSuccs.push_back(DTChild->getBlock());
   }
 
-  // Sort Successors according to their cycle depth or block frequency info.
+  // Sort Successors according to their loop depth or block frequency info.
   llvm::stable_sort(
       AllSuccs, [this](const MachineBasicBlock *L, const MachineBasicBlock *R) {
         uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(L).getFrequency() : 0;
         uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(R).getFrequency() : 0;
         bool HasBlockFreq = LHSFreq != 0 && RHSFreq != 0;
         return HasBlockFreq ? LHSFreq < RHSFreq
-                            : CI->getCycleDepth(L) < CI->getCycleDepth(R);
+                            : LI->getLoopDepth(L) < LI->getLoopDepth(R);
       });
 
   auto it = AllSuccessors.insert(std::make_pair(MBB, AllSuccs));
@@ -896,7 +898,7 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
                                  AllSuccsCache &AllSuccessors) {
   assert (MBB && "Invalid MachineBasicBlock!");
 
-  // loop over all the operands of the specified instruction.  If there is
+  // Loop over all the operands of the specified instruction.  If there is
   // anything we can't handle, bail out.
 
   // SuccToSinkTo - This is the successor to sink this instruction to, once we
@@ -943,7 +945,7 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
       // Otherwise, we should look at all the successors and decide which one
       // we should sink to. If we have reliable block frequency information
       // (frequency != 0) available, give successors with smaller frequencies
-      // higher priority, otherwise prioritize smaller cycle depths.
+      // higher priority, otherwise prioritize smaller loop depths.
       for (MachineBasicBlock *SuccBlock :
            GetAllSortedSuccessors(MI, MBB, AllSuccessors)) {
         bool LocalUse = false;
@@ -966,7 +968,7 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
   }
 
   // It is not possible to sink an instruction into its own block.  This can
-  // happen with cycles.
+  // happen with loops.
   if (MBB == SuccToSinkTo)
     return nullptr;
 
@@ -1220,70 +1222,68 @@ bool MachineSinking::hasStoreBetween(MachineBasicBlock *From,
   return HasAliasedStore;
 }
 
-/// Sink instructions into cycles if profitable. This especially tries to
-/// prevent register spills caused by register pressure if there is little to no
-/// overhead moving instructions into cycles.
-bool MachineSinking::SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I) {
-  LLVM_DEBUG(dbgs() << "CycleSink: Finding sink block for: " << I);
-  MachineBasicBlock *Preheader = Cycle->getCyclePreheader();
-  assert(Preheader && "Cycle sink needs a preheader block");
+/// Sink instructions into loops if profitable. This especially tries to prevent
+/// register spills caused by register pressure if there is little to no
+/// overhead moving instructions into loops.
+bool MachineSinking::SinkIntoLoop(MachineLoop *L, MachineInstr &I) {
+  LLVM_DEBUG(dbgs() << "LoopSink: Finding sink block for: " << I);
+  MachineBasicBlock *Preheader = L->getLoopPreheader();
+  assert(Preheader && "Loop sink needs a preheader block");
   MachineBasicBlock *SinkBlock = nullptr;
   bool CanSink = true;
   const MachineOperand &MO = I.getOperand(0);
 
   for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) {
-    LLVM_DEBUG(dbgs() << "CycleSink:   Analysing use: " << MI);
-    if (!Cycle->contains(MI.getParent())) {
-      LLVM_DEBUG(dbgs() << "CycleSink:   Use not in cycle, can't sink.\n");
+    LLVM_DEBUG(dbgs() << "LoopSink:   Analysing use: " << MI);
+    if (!L->contains(&MI)) {
+      LLVM_DEBUG(dbgs() << "LoopSink:   Use not in loop, can't sink.\n");
       CanSink = false;
       break;
     }
 
     // FIXME: Come up with a proper cost model that estimates whether sinking
-    // the instruction (and thus possibly executing it on every cycle
+    // the instruction (and thus possibly executing it on every loop
     // iteration) is more expensive than a register.
     // For now assumes that copies are cheap and thus almost always worth it.
     if (!MI.isCopy()) {
-      LLVM_DEBUG(dbgs() << "CycleSink:   Use is not a copy\n");
+      LLVM_DEBUG(dbgs() << "LoopSink:   Use is not a copy\n");
       CanSink = false;
       break;
     }
     if (!SinkBlock) {
       SinkBlock = MI.getParent();
-      LLVM_DEBUG(dbgs() << "CycleSink:   Setting sink block to: "
+      LLVM_DEBUG(dbgs() << "LoopSink:   Setting sink block to: "
                         << printMBBReference(*SinkBlock) << "\n");
       continue;
     }
     SinkBlock = DT->findNearestCommonDominator(SinkBlock, MI.getParent());
     if (!SinkBlock) {
-      LLVM_DEBUG(dbgs() << "CycleSink:   Can't find nearest dominator\n");
+      LLVM_DEBUG(dbgs() << "LoopSink:   Can't find nearest dominator\n");
       CanSink = false;
       break;
     }
-    LLVM_DEBUG(dbgs() << "CycleSink:   Setting nearest common dom block: " <<
+    LLVM_DEBUG(dbgs() << "LoopSink:   Setting nearest common dom block: " <<
                printMBBReference(*SinkBlock) << "\n");
   }
 
   if (!CanSink) {
-    LLVM_DEBUG(dbgs() << "CycleSink: Can't sink instruction.\n");
+    LLVM_DEBUG(dbgs() << "LoopSink: Can't sink instruction.\n");
     return false;
   }
   if (!SinkBlock) {
-    LLVM_DEBUG(dbgs() << "CycleSink: Not sinking, can't find sink block.\n");
+    LLVM_DEBUG(dbgs() << "LoopSink: Not sinking, can't find sink block.\n");
     return false;
   }
   if (SinkBlock == Preheader) {
-    LLVM_DEBUG(
-        dbgs() << "CycleSink: Not sinking, sink block is the preheader\n");
+    LLVM_DEBUG(dbgs() << "LoopSink: Not sinking, sink block is the preheader\n");
     return false;
   }
   if (SinkBlock->size() > SinkLoadInstsPerBlockThreshold) {
-    LLVM_DEBUG(
-        dbgs() << "CycleSink: Not Sinking, block too large to analyse.\n");
+    LLVM_DEBUG(dbgs() << "LoopSink: Not Sinking, block too large to analyse.\n");
     return false;
   }
 
-  LLVM_DEBUG(dbgs() << "CycleSink: Sinking instruction!\n");
+  LLVM_DEBUG(dbgs() << "LoopSink: Sinking instruction!\n");
   SinkBlock->splice(SinkBlock->SkipPHIsAndLabels(SinkBlock->begin()), Preheader,
                     I);
 
@@ -1407,11 +1407,9 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
       TryBreak = true;
     }
 
-    // Don't sink instructions into a cycle.
-    if (!TryBreak && CI->getCycle(SuccToSinkTo) &&
-        (!CI->getCycle(SuccToSinkTo)->isReducible() ||
-         CI->getCycle(SuccToSinkTo)->getHeader() == SuccToSinkTo)) {
-      LLVM_DEBUG(dbgs() << " *** NOTE: cycle header found\n");
+    // Don't sink instructions into a loop.
+    if (!TryBreak && LI->isLoopHeader(SuccToSinkTo)) {
+      LLVM_DEBUG(dbgs() << " *** NOTE: Loop header found\n");
       TryBreak = true;
     }
 

diff  --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
index 2f205f9ab373..0f532f4fe324 100644
--- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
@@ -132,7 +132,6 @@
 ; CHECK-NEXT:       Machine Block Frequency Analysis
 ; CHECK-NEXT:       Machine Common Subexpression Elimination
 ; CHECK-NEXT:       MachinePostDominator Tree Construction
-; CHECK-NEXT:       Machine Cycle Info Analysis
 ; CHECK-NEXT:       Machine code sinking
 ; CHECK-NEXT:       Peephole Optimizations
 ; CHECK-NEXT:       Remove dead machine instructions

diff  --git a/llvm/test/CodeGen/AArch64/loop-sink-limit.mir b/llvm/test/CodeGen/AArch64/loop-sink-limit.mir
index 0a886925ffab..bdb99f296fca 100644
--- a/llvm/test/CodeGen/AArch64/loop-sink-limit.mir
+++ b/llvm/test/CodeGen/AArch64/loop-sink-limit.mir
@@ -1,10 +1,10 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple aarch64 -run-pass=machine-sink -sink-insts-to-avoid-spills \
-# RUN:   -machine-sink-cycle-limit=1 -verify-machineinstrs %s -o - 2>&1 | \
+# RUN:   -machine-sink-loop-limit=1 -verify-machineinstrs %s -o - 2>&1 | \
 # RUN:   FileCheck %s --check-prefix=SINK1
 #
 # RUN: llc -mtriple aarch64 -run-pass=machine-sink -sink-insts-to-avoid-spills \
-# RUN:   -machine-sink-cycle-limit=2 -verify-machineinstrs %s -o - 2>&1 | \
+# RUN:   -machine-sink-loop-limit=2 -verify-machineinstrs %s -o - 2>&1 | \
 # RUN:   FileCheck %s --check-prefix=SINK2
 
 --- |

diff  --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
index 684774b18308..e23bea234be9 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
@@ -296,7 +296,6 @@
 ; GCN-O1-NEXT:        Machine Block Frequency Analysis
 ; GCN-O1-NEXT:        Machine Common Subexpression Elimination
 ; GCN-O1-NEXT:        MachinePostDominator Tree Construction
-; GCN-O1-NEXT:        Machine Cycle Info Analysis
 ; GCN-O1-NEXT:        Machine code sinking
 ; GCN-O1-NEXT:        Peephole Optimizations
 ; GCN-O1-NEXT:        Remove dead machine instructions
@@ -575,7 +574,6 @@
 ; GCN-O1-OPTS-NEXT:        Machine Block Frequency Analysis
 ; GCN-O1-OPTS-NEXT:        Machine Common Subexpression Elimination
 ; GCN-O1-OPTS-NEXT:        MachinePostDominator Tree Construction
-; GCN-O1-OPTS-NEXT:        Machine Cycle Info Analysis
 ; GCN-O1-OPTS-NEXT:        Machine code sinking
 ; GCN-O1-OPTS-NEXT:        Peephole Optimizations
 ; GCN-O1-OPTS-NEXT:        Remove dead machine instructions
@@ -863,7 +861,6 @@
 ; GCN-O2-NEXT:        Machine Block Frequency Analysis
 ; GCN-O2-NEXT:        Machine Common Subexpression Elimination
 ; GCN-O2-NEXT:        MachinePostDominator Tree Construction
-; GCN-O2-NEXT:        Machine Cycle Info Analysis
 ; GCN-O2-NEXT:        Machine code sinking
 ; GCN-O2-NEXT:        Peephole Optimizations
 ; GCN-O2-NEXT:        Remove dead machine instructions
@@ -1164,7 +1161,6 @@
 ; GCN-O3-NEXT:        Machine Block Frequency Analysis
 ; GCN-O3-NEXT:        Machine Common Subexpression Elimination
 ; GCN-O3-NEXT:        MachinePostDominator Tree Construction
-; GCN-O3-NEXT:        Machine Cycle Info Analysis
 ; GCN-O3-NEXT:        Machine code sinking
 ; GCN-O3-NEXT:        Peephole Optimizations
 ; GCN-O3-NEXT:        Remove dead machine instructions

diff  --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll
index 098422a4a770..d2507bf6365b 100644
--- a/llvm/test/CodeGen/ARM/O3-pipeline.ll
+++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll
@@ -91,7 +91,6 @@
 ; CHECK-NEXT:      Machine Block Frequency Analysis
 ; CHECK-NEXT:      Machine Common Subexpression Elimination
 ; CHECK-NEXT:      MachinePostDominator Tree Construction
-; CHECK-NEXT:      Machine Cycle Info Analysis
 ; CHECK-NEXT:      Machine code sinking
 ; CHECK-NEXT:      Peephole Optimizations
 ; CHECK-NEXT:      Remove dead machine instructions

diff  --git a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll
index eff5184e9c84..d67fed77d356 100644
--- a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll
+++ b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll
@@ -111,7 +111,6 @@
 ; CHECK-NEXT:       Machine Block Frequency Analysis
 ; CHECK-NEXT:       Machine Common Subexpression Elimination
 ; CHECK-NEXT:       MachinePostDominator Tree Construction
-; CHECK-NEXT:       Machine Cycle Info Analysis
 ; CHECK-NEXT:       Machine code sinking
 ; CHECK-NEXT:       Peephole Optimizations
 ; CHECK-NEXT:       Remove dead machine instructions

diff  --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
index 21b8b8f7842b..89b14bef9bce 100644
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -90,7 +90,6 @@
 ; CHECK-NEXT:       Machine Block Frequency Analysis
 ; CHECK-NEXT:       Machine Common Subexpression Elimination
 ; CHECK-NEXT:       MachinePostDominator Tree Construction
-; CHECK-NEXT:       Machine Cycle Info Analysis
 ; CHECK-NEXT:       Machine code sinking
 ; CHECK-NEXT:       Peephole Optimizations
 ; CHECK-NEXT:       Remove dead machine instructions

diff  --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll
index 8552ebc348cb..e83266f72e48 100644
--- a/llvm/test/CodeGen/X86/opt-pipeline.ll
+++ b/llvm/test/CodeGen/X86/opt-pipeline.ll
@@ -107,7 +107,6 @@
 ; CHECK-NEXT:       Machine Block Frequency Analysis
 ; CHECK-NEXT:       Machine Common Subexpression Elimination
 ; CHECK-NEXT:       MachinePostDominator Tree Construction
-; CHECK-NEXT:       Machine Cycle Info Analysis
 ; CHECK-NEXT:       Machine code sinking
 ; CHECK-NEXT:       Peephole Optimizations
 ; CHECK-NEXT:       Remove dead machine instructions

diff  --git a/llvm/test/CodeGen/X86/pr38795.ll b/llvm/test/CodeGen/X86/pr38795.ll
index d805dcad8b6e..b526e4f471b1 100644
--- a/llvm/test/CodeGen/X86/pr38795.ll
+++ b/llvm/test/CodeGen/X86/pr38795.ll
@@ -32,13 +32,14 @@ define dso_local void @fn() {
 ; CHECK-NEXT:    # implicit-def: $ebp
 ; CHECK-NEXT:    jmp .LBB0_1
 ; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB0_16: # %for.inc
+; CHECK-NEXT:  .LBB0_15: # %for.inc
 ; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:    movl %esi, %ecx
 ; CHECK-NEXT:    movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
 ; CHECK-NEXT:    movb %dh, %dl
 ; CHECK-NEXT:  .LBB0_1: # %for.cond
 ; CHECK-NEXT:    # =>This Loop Header: Depth=1
-; CHECK-NEXT:    # Child Loop BB0_20 Depth 2
+; CHECK-NEXT:    # Child Loop BB0_19 Depth 2
 ; CHECK-NEXT:    cmpb $8, %dl
 ; CHECK-NEXT:    movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
 ; CHECK-NEXT:    ja .LBB0_3
@@ -55,7 +56,7 @@ define dso_local void @fn() {
 ; CHECK-NEXT:    movb %cl, %dh
 ; CHECK-NEXT:    movl $0, h
 ; CHECK-NEXT:    cmpb $8, %dl
-; CHECK-NEXT:    jg .LBB0_8
+; CHECK-NEXT:    jg .LBB0_9
 ; CHECK-NEXT:  # %bb.5: # %if.then13
 ; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
 ; CHECK-NEXT:    movl %eax, %esi
@@ -64,12 +65,10 @@ define dso_local void @fn() {
 ; CHECK-NEXT:    calll printf
 ; CHECK-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %dh # 1-byte Reload
 ; CHECK-NEXT:    testb %bl, %bl
-; CHECK-NEXT:    movl %esi, %ecx
 ; CHECK-NEXT:    # implicit-def: $eax
-; CHECK-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload
-; CHECK-NEXT:    movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; CHECK-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
 ; CHECK-NEXT:    movb %dh, %dl
-; CHECK-NEXT:    jne .LBB0_16
+; CHECK-NEXT:    jne .LBB0_15
 ; CHECK-NEXT:    jmp .LBB0_6
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB0_3: # %if.then
@@ -78,82 +77,82 @@ define dso_local void @fn() {
 ; CHECK-NEXT:    calll printf
 ; CHECK-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload
 ; CHECK-NEXT:    # implicit-def: $eax
+; CHECK-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; CHECK-NEXT:    jmp .LBB0_6
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB0_9: # %if.end21
+; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:    # implicit-def: $ebp
+; CHECK-NEXT:    jmp .LBB0_10
+; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB0_6: # %for.cond35
 ; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:    movb %dl, %dh
 ; CHECK-NEXT:    testl %edi, %edi
-; CHECK-NEXT:    je .LBB0_7
-; CHECK-NEXT:  .LBB0_11: # %af
+; CHECK-NEXT:    movl %edi, %esi
+; CHECK-NEXT:    movl $0, %edi
+; CHECK-NEXT:    movb %cl, %dl
+; CHECK-NEXT:    je .LBB0_19
+; CHECK-NEXT:  # %bb.7: # %af
 ; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
 ; CHECK-NEXT:    testb %bl, %bl
-; CHECK-NEXT:    jne .LBB0_12
-; CHECK-NEXT:  .LBB0_17: # %if.end39
+; CHECK-NEXT:    jne .LBB0_8
+; CHECK-NEXT:  .LBB0_16: # %if.end39
 ; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
 ; CHECK-NEXT:    testl %eax, %eax
-; CHECK-NEXT:    je .LBB0_19
-; CHECK-NEXT:  # %bb.18: # %if.then41
+; CHECK-NEXT:    je .LBB0_18
+; CHECK-NEXT:  # %bb.17: # %if.then41
 ; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
 ; CHECK-NEXT:    movl $0, {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    movl $fn, {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    movl $.str, (%esp)
 ; CHECK-NEXT:    calll printf
-; CHECK-NEXT:  .LBB0_19: # %for.end46
+; CHECK-NEXT:  .LBB0_18: # %for.end46
 ; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:    movl %esi, %edi
 ; CHECK-NEXT:    # implicit-def: $dl
 ; CHECK-NEXT:    # implicit-def: $dh
 ; CHECK-NEXT:    # implicit-def: $ebp
-; CHECK-NEXT:    jmp .LBB0_20
-; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB0_8: # %if.end21
-; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT:    # implicit-def: $ebp
-; CHECK-NEXT:    jmp .LBB0_9
 ; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB0_7: # in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT:    xorl %edi, %edi
-; CHECK-NEXT:    movb %dl, %dh
-; CHECK-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload
-; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB0_20: # %for.cond47
+; CHECK-NEXT:  .LBB0_19: # %for.cond47
 ; CHECK-NEXT:    # Parent Loop BB0_1 Depth=1
 ; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    testb %bl, %bl
-; CHECK-NEXT:    jne .LBB0_20
-; CHECK-NEXT:  # %bb.21: # %for.cond47
-; CHECK-NEXT:    # in Loop: Header=BB0_20 Depth=2
+; CHECK-NEXT:    jne .LBB0_19
+; CHECK-NEXT:  # %bb.20: # %for.cond47
+; CHECK-NEXT:    # in Loop: Header=BB0_19 Depth=2
 ; CHECK-NEXT:    testb %bl, %bl
-; CHECK-NEXT:    jne .LBB0_20
-; CHECK-NEXT:  .LBB0_9: # %ae
+; CHECK-NEXT:    jne .LBB0_19
+; CHECK-NEXT:  .LBB0_10: # %ae
 ; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
 ; CHECK-NEXT:    testb %bl, %bl
-; CHECK-NEXT:    jne .LBB0_10
-; CHECK-NEXT:  # %bb.13: # %if.end26
+; CHECK-NEXT:    jne .LBB0_11
+; CHECK-NEXT:  # %bb.12: # %if.end26
 ; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    xorl %esi, %esi
 ; CHECK-NEXT:    testb %dl, %dl
-; CHECK-NEXT:    je .LBB0_16
-; CHECK-NEXT:  # %bb.14: # %if.end26
+; CHECK-NEXT:    je .LBB0_15
+; CHECK-NEXT:  # %bb.13: # %if.end26
 ; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
 ; CHECK-NEXT:    testl %ebp, %ebp
-; CHECK-NEXT:    jne .LBB0_16
-; CHECK-NEXT:  # %bb.15: # %if.then31
+; CHECK-NEXT:    jne .LBB0_15
+; CHECK-NEXT:  # %bb.14: # %if.then31
 ; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    xorl %esi, %esi
 ; CHECK-NEXT:    xorl %ebp, %ebp
-; CHECK-NEXT:    jmp .LBB0_16
+; CHECK-NEXT:    jmp .LBB0_15
 ; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB0_10: # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:  .LBB0_11: # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:    movl %edi, %esi
 ; CHECK-NEXT:    # implicit-def: $eax
 ; CHECK-NEXT:    testb %bl, %bl
-; CHECK-NEXT:    je .LBB0_17
-; CHECK-NEXT:  .LBB0_12: # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:    je .LBB0_16
+; CHECK-NEXT:  .LBB0_8: # in Loop: Header=BB0_1 Depth=1
 ; CHECK-NEXT:    # implicit-def: $edi
 ; CHECK-NEXT:    # implicit-def: $cl
-; CHECK-NEXT:    # kill: killed $cl
 ; CHECK-NEXT:    # implicit-def: $dl
 ; CHECK-NEXT:    # implicit-def: $ebp
-; CHECK-NEXT:    testl %edi, %edi
-; CHECK-NEXT:    jne .LBB0_11
-; CHECK-NEXT:    jmp .LBB0_7
+; CHECK-NEXT:    jmp .LBB0_6
 entry:
   br label %for.cond
 

diff  --git a/llvm/test/CodeGen/X86/switch-phi-const.ll b/llvm/test/CodeGen/X86/switch-phi-const.ll
index 981a60d09545..e28169217c01 100644
--- a/llvm/test/CodeGen/X86/switch-phi-const.ll
+++ b/llvm/test/CodeGen/X86/switch-phi-const.ll
@@ -93,12 +93,12 @@ define void @switch_trunc_phi_const(i32 %x) {
 ; CHECK:       # %bb.0: # %bb0
 ; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
 ; CHECK-NEXT:    movzbl %dil, %ecx
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    movl $3895, %edx # imm = 0xF37
 ; CHECK-NEXT:    decl %ecx
 ; CHECK-NEXT:    cmpl $54, %ecx
 ; CHECK-NEXT:    ja .LBB1_8
 ; CHECK-NEXT:  # %bb.1: # %bb0
-; CHECK-NEXT:    movzbl %dil, %eax
-; CHECK-NEXT:    movl $3895, %edx # imm = 0xF37
 ; CHECK-NEXT:    jmpq *.LJTI1_0(,%rcx,8)
 ; CHECK-NEXT:  .LBB1_8: # %default
 ; CHECK-NEXT:    retq

diff  --git a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll
index 0f8bb837f82a..b44895293b41 100644
--- a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll
+++ b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll
@@ -1377,6 +1377,8 @@ define i32 @irreducibleCFG() #4 {
 ; ENABLE-NEXT:    pushq %rbx
 ; ENABLE-NEXT:    pushq %rax
 ; ENABLE-NEXT:    .cfi_offset %rbx, -24
+; ENABLE-NEXT:    movq _irreducibleCFGa at GOTPCREL(%rip), %rax
+; ENABLE-NEXT:    movl (%rax), %edi
 ; ENABLE-NEXT:    movq _irreducibleCFGf at GOTPCREL(%rip), %rax
 ; ENABLE-NEXT:    cmpb $0, (%rax)
 ; ENABLE-NEXT:    je LBB16_2
@@ -1386,24 +1388,20 @@ define i32 @irreducibleCFG() #4 {
 ; ENABLE-NEXT:    jmp LBB16_1
 ; ENABLE-NEXT:  LBB16_2: ## %split
 ; ENABLE-NEXT:    movq _irreducibleCFGb at GOTPCREL(%rip), %rax
+; ENABLE-NEXT:    xorl %ebx, %ebx
 ; ENABLE-NEXT:    cmpl $0, (%rax)
-; ENABLE-NEXT:    je LBB16_3
-; ENABLE-NEXT:  ## %bb.4: ## %for.body4.i
-; ENABLE-NEXT:    movq _irreducibleCFGa at GOTPCREL(%rip), %rax
-; ENABLE-NEXT:    movl (%rax), %edi
+; ENABLE-NEXT:    je LBB16_4
+; ENABLE-NEXT:  ## %bb.3: ## %for.body4.i
 ; ENABLE-NEXT:    xorl %ebx, %ebx
 ; ENABLE-NEXT:    xorl %eax, %eax
 ; ENABLE-NEXT:    callq _something
-; ENABLE-NEXT:    jmp LBB16_5
-; ENABLE-NEXT:  LBB16_3:
-; ENABLE-NEXT:    xorl %ebx, %ebx
 ; ENABLE-NEXT:    .p2align 4, 0x90
-; ENABLE-NEXT:  LBB16_5: ## %for.inc
+; ENABLE-NEXT:  LBB16_4: ## %for.inc
 ; ENABLE-NEXT:    ## =>This Inner Loop Header: Depth=1
 ; ENABLE-NEXT:    incl %ebx
 ; ENABLE-NEXT:    cmpl $7, %ebx
-; ENABLE-NEXT:    jl LBB16_5
-; ENABLE-NEXT:  ## %bb.6: ## %fn1.exit
+; ENABLE-NEXT:    jl LBB16_4
+; ENABLE-NEXT:  ## %bb.5: ## %fn1.exit
 ; ENABLE-NEXT:    xorl %eax, %eax
 ; ENABLE-NEXT:    addq $8, %rsp
 ; ENABLE-NEXT:    popq %rbx
@@ -1420,6 +1418,8 @@ define i32 @irreducibleCFG() #4 {
 ; DISABLE-NEXT:    pushq %rbx
 ; DISABLE-NEXT:    pushq %rax
 ; DISABLE-NEXT:    .cfi_offset %rbx, -24
+; DISABLE-NEXT:    movq _irreducibleCFGa at GOTPCREL(%rip), %rax
+; DISABLE-NEXT:    movl (%rax), %edi
 ; DISABLE-NEXT:    movq _irreducibleCFGf at GOTPCREL(%rip), %rax
 ; DISABLE-NEXT:    cmpb $0, (%rax)
 ; DISABLE-NEXT:    je LBB16_2
@@ -1429,24 +1429,20 @@ define i32 @irreducibleCFG() #4 {
 ; DISABLE-NEXT:    jmp LBB16_1
 ; DISABLE-NEXT:  LBB16_2: ## %split
 ; DISABLE-NEXT:    movq _irreducibleCFGb at GOTPCREL(%rip), %rax
+; DISABLE-NEXT:    xorl %ebx, %ebx
 ; DISABLE-NEXT:    cmpl $0, (%rax)
-; DISABLE-NEXT:    je LBB16_3
-; DISABLE-NEXT:  ## %bb.4: ## %for.body4.i
-; DISABLE-NEXT:    movq _irreducibleCFGa at GOTPCREL(%rip), %rax
-; DISABLE-NEXT:    movl (%rax), %edi
+; DISABLE-NEXT:    je LBB16_4
+; DISABLE-NEXT:  ## %bb.3: ## %for.body4.i
 ; DISABLE-NEXT:    xorl %ebx, %ebx
 ; DISABLE-NEXT:    xorl %eax, %eax
 ; DISABLE-NEXT:    callq _something
-; DISABLE-NEXT:    jmp LBB16_5
-; DISABLE-NEXT:  LBB16_3:
-; DISABLE-NEXT:    xorl %ebx, %ebx
 ; DISABLE-NEXT:    .p2align 4, 0x90
-; DISABLE-NEXT:  LBB16_5: ## %for.inc
+; DISABLE-NEXT:  LBB16_4: ## %for.inc
 ; DISABLE-NEXT:    ## =>This Inner Loop Header: Depth=1
 ; DISABLE-NEXT:    incl %ebx
 ; DISABLE-NEXT:    cmpl $7, %ebx
-; DISABLE-NEXT:    jl LBB16_5
-; DISABLE-NEXT:  ## %bb.6: ## %fn1.exit
+; DISABLE-NEXT:    jl LBB16_4
+; DISABLE-NEXT:  ## %bb.5: ## %fn1.exit
 ; DISABLE-NEXT:    xorl %eax, %eax
 ; DISABLE-NEXT:    addq $8, %rsp
 ; DISABLE-NEXT:    popq %rbx