[llvm] r363471 - [MBP] Move a latch block with conditional exit and multi predecessors to top of loop

Guozhi Wei via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 14 16:08:59 PDT 2019


Author: carrot
Date: Fri Jun 14 16:08:59 2019
New Revision: 363471

URL: http://llvm.org/viewvc/llvm-project?rev=363471&view=rev
Log:
[MBP] Move a latch block with conditional exit and multi predecessors to top of loop

Current findBestLoopTop can find and move one kind of block to top, a latch block has one successor. Another common case is:

    * a latch block
    * it has two successors, one is loop header, another is exit
    * it has more than one predecessors

If it is below one of its predecessors P, only P can fall through to it, all other predecessors need a jump to it, and another conditional jump to loop header. If it is moved before loop header, all its predecessors jump to it, then fall through to loop header. So all its predecessors except P can reduce one taken branch.

Differential Revision: https://reviews.llvm.org/D43256



Added:
    llvm/trunk/test/CodeGen/X86/loop-rotate.ll
    llvm/trunk/test/CodeGen/X86/move_latch_to_loop_top.ll
Modified:
    llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp
    llvm/trunk/test/CodeGen/AArch64/cmpxchg-idioms.ll
    llvm/trunk/test/CodeGen/AArch64/neg-imm.ll
    llvm/trunk/test/CodeGen/AArch64/tailmerging_in_mbp.ll
    llvm/trunk/test/CodeGen/AMDGPU/collapse-endcf.ll
    llvm/trunk/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
    llvm/trunk/test/CodeGen/AMDGPU/global_smrd_cfg.ll
    llvm/trunk/test/CodeGen/AMDGPU/hoist-cond.ll
    llvm/trunk/test/CodeGen/AMDGPU/i1-copy-from-loop.ll
    llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-si.ll
    llvm/trunk/test/CodeGen/AMDGPU/loop_break.ll
    llvm/trunk/test/CodeGen/AMDGPU/loop_exit_with_xor.ll
    llvm/trunk/test/CodeGen/AMDGPU/madmk.ll
    llvm/trunk/test/CodeGen/AMDGPU/multilevel-break.ll
    llvm/trunk/test/CodeGen/AMDGPU/optimize-negated-cond.ll
    llvm/trunk/test/CodeGen/AMDGPU/si-annotate-cf.ll
    llvm/trunk/test/CodeGen/AMDGPU/valu-i1.ll
    llvm/trunk/test/CodeGen/AMDGPU/wqm.ll
    llvm/trunk/test/CodeGen/ARM/2011-03-23-PeepholeBug.ll
    llvm/trunk/test/CodeGen/ARM/arm-and-tst-peephole.ll
    llvm/trunk/test/CodeGen/ARM/atomic-cmp.ll
    llvm/trunk/test/CodeGen/ARM/atomic-cmpxchg.ll
    llvm/trunk/test/CodeGen/ARM/code-placement.ll
    llvm/trunk/test/CodeGen/ARM/pr32578.ll
    llvm/trunk/test/CodeGen/ARM/swifterror.ll
    llvm/trunk/test/CodeGen/Hexagon/bug6757-endloop.ll
    llvm/trunk/test/CodeGen/Hexagon/early-if-merge-loop.ll
    llvm/trunk/test/CodeGen/Hexagon/prof-early-if.ll
    llvm/trunk/test/CodeGen/Hexagon/redundant-branching2.ll
    llvm/trunk/test/CodeGen/PowerPC/atomics-regression.ll
    llvm/trunk/test/CodeGen/PowerPC/cmp_elimination.ll
    llvm/trunk/test/CodeGen/PowerPC/ctrloop-shortLoops.ll
    llvm/trunk/test/CodeGen/PowerPC/expand-foldable-isel.ll
    llvm/trunk/test/CodeGen/PowerPC/knowCRBitSpill.ll
    llvm/trunk/test/CodeGen/PowerPC/licm-remat.ll
    llvm/trunk/test/CodeGen/SystemZ/atomicrmw-minmax-01.ll
    llvm/trunk/test/CodeGen/SystemZ/atomicrmw-minmax-02.ll
    llvm/trunk/test/CodeGen/SystemZ/loop-01.ll
    llvm/trunk/test/CodeGen/SystemZ/loop-02.ll
    llvm/trunk/test/CodeGen/SystemZ/swifterror.ll
    llvm/trunk/test/CodeGen/Thumb/consthoist-physical-addr.ll
    llvm/trunk/test/CodeGen/X86/block-placement.ll
    llvm/trunk/test/CodeGen/X86/code_placement.ll
    llvm/trunk/test/CodeGen/X86/code_placement_cold_loop_blocks.ll
    llvm/trunk/test/CodeGen/X86/code_placement_ignore_succ_in_inner_loop.ll
    llvm/trunk/test/CodeGen/X86/code_placement_loop_rotation2.ll
    llvm/trunk/test/CodeGen/X86/code_placement_no_header_change.ll
    llvm/trunk/test/CodeGen/X86/conditional-tailcall.ll
    llvm/trunk/test/CodeGen/X86/loop-blocks.ll
    llvm/trunk/test/CodeGen/X86/lsr-loop-exit-cond.ll
    llvm/trunk/test/CodeGen/X86/pr38185.ll
    llvm/trunk/test/CodeGen/X86/ragreedy-hoist-spill.ll
    llvm/trunk/test/CodeGen/X86/reverse_branches.ll
    llvm/trunk/test/CodeGen/X86/speculative-load-hardening.ll
    llvm/trunk/test/CodeGen/X86/swifterror.ll
    llvm/trunk/test/CodeGen/X86/tail-dup-merge-loop-headers.ll
    llvm/trunk/test/CodeGen/X86/tail-dup-repeat.ll
    llvm/trunk/test/CodeGen/X86/vector-shift-by-select-loop.ll
    llvm/trunk/test/CodeGen/X86/widen_arith-1.ll
    llvm/trunk/test/CodeGen/X86/widen_arith-2.ll
    llvm/trunk/test/CodeGen/X86/widen_arith-3.ll
    llvm/trunk/test/CodeGen/X86/widen_arith-4.ll
    llvm/trunk/test/CodeGen/X86/widen_arith-5.ll
    llvm/trunk/test/CodeGen/X86/widen_arith-6.ll
    llvm/trunk/test/CodeGen/X86/widen_cast-4.ll
    llvm/trunk/test/CodeGen/X86/x86-cmov-converter.ll
    llvm/trunk/test/DebugInfo/X86/PR37234.ll
    llvm/trunk/test/DebugInfo/X86/dbg-value-transfer-order.ll

Modified: llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp (original)
+++ llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp Fri Jun 14 16:08:59 2019
@@ -455,15 +455,24 @@ class MachineBlockPlacement : public Mac
                                const MachineBasicBlock *OldTop);
   bool hasViableTopFallthrough(const MachineBasicBlock *Top,
                                const BlockFilterSet &LoopBlockSet);
+  BlockFrequency TopFallThroughFreq(const MachineBasicBlock *Top,
+                                    const BlockFilterSet &LoopBlockSet);
+  BlockFrequency FallThroughGains(const MachineBasicBlock *NewTop,
+                                  const MachineBasicBlock *OldTop,
+                                  const MachineBasicBlock *ExitBB,
+                                  const BlockFilterSet &LoopBlockSet);
+  MachineBasicBlock *findBestLoopTopHelper(MachineBasicBlock *OldTop,
+      const MachineLoop &L, const BlockFilterSet &LoopBlockSet);
   MachineBasicBlock *findBestLoopTop(
       const MachineLoop &L, const BlockFilterSet &LoopBlockSet);
   MachineBasicBlock *findBestLoopExit(
-      const MachineLoop &L, const BlockFilterSet &LoopBlockSet);
+      const MachineLoop &L, const BlockFilterSet &LoopBlockSet,
+      BlockFrequency &ExitFreq);
   BlockFilterSet collectLoopBlockSet(const MachineLoop &L);
   void buildLoopChains(const MachineLoop &L);
   void rotateLoop(
       BlockChain &LoopChain, const MachineBasicBlock *ExitingBB,
-      const BlockFilterSet &LoopBlockSet);
+      BlockFrequency ExitFreq, const BlockFilterSet &LoopBlockSet);
   void rotateLoopWithProfile(
       BlockChain &LoopChain, const MachineLoop &L,
       const BlockFilterSet &LoopBlockSet);
@@ -1790,66 +1799,205 @@ MachineBlockPlacement::canMoveBottomBloc
   return true;
 }
 
-/// Find the best loop top block for layout.
+// Find out the possible fall through frequence to the top of a loop.
+BlockFrequency
+MachineBlockPlacement::TopFallThroughFreq(
+    const MachineBasicBlock *Top,
+    const BlockFilterSet &LoopBlockSet) {
+  BlockFrequency MaxFreq = 0;
+  for (MachineBasicBlock *Pred : Top->predecessors()) {
+    BlockChain *PredChain = BlockToChain[Pred];
+    if (!LoopBlockSet.count(Pred) &&
+        (!PredChain || Pred == *std::prev(PredChain->end()))) {
+      // Found a Pred block can be placed before Top.
+      // Check if Top is the best successor of Pred.
+      auto TopProb = MBPI->getEdgeProbability(Pred, Top);
+      bool TopOK = true;
+      for (MachineBasicBlock *Succ : Pred->successors()) {
+        auto SuccProb = MBPI->getEdgeProbability(Pred, Succ);
+        BlockChain *SuccChain = BlockToChain[Succ];
+        // Check if Succ can be placed after Pred.
+        // Succ should not be in any chain, or it is the head of some chain.
+        if (!LoopBlockSet.count(Succ) && (SuccProb > TopProb) &&
+            (!SuccChain || Succ == *SuccChain->begin())) {
+          TopOK = false;
+          break;
+        }
+      }
+      if (TopOK) {
+        BlockFrequency EdgeFreq = MBFI->getBlockFreq(Pred) *
+                                  MBPI->getEdgeProbability(Pred, Top);
+        if (EdgeFreq > MaxFreq)
+          MaxFreq = EdgeFreq;
+      }
+    }
+  }
+  return MaxFreq;
+}
+
+// Compute the fall through gains when move NewTop before OldTop.
+//
+// In following diagram, edges marked as "-" are reduced fallthrough, edges
+// marked as "+" are increased fallthrough, this function computes
+//
+//      SUM(increased fallthrough) - SUM(decreased fallthrough)
+//
+//              |
+//              | -
+//              V
+//        --->OldTop
+//        |     .
+//        |     .
+//       +|     .    +
+//        |   Pred --->
+//        |     |-
+//        |     V
+//        --- NewTop <---
+//              |-
+//              V
+//
+BlockFrequency
+MachineBlockPlacement::FallThroughGains(
+    const MachineBasicBlock *NewTop,
+    const MachineBasicBlock *OldTop,
+    const MachineBasicBlock *ExitBB,
+    const BlockFilterSet &LoopBlockSet) {
+  BlockFrequency FallThrough2Top = TopFallThroughFreq(OldTop, LoopBlockSet);
+  BlockFrequency FallThrough2Exit = 0;
+  if (ExitBB)
+    FallThrough2Exit = MBFI->getBlockFreq(NewTop) *
+        MBPI->getEdgeProbability(NewTop, ExitBB);
+  BlockFrequency BackEdgeFreq = MBFI->getBlockFreq(NewTop) *
+      MBPI->getEdgeProbability(NewTop, OldTop);
+
+  // Find the best Pred of NewTop.
+   MachineBasicBlock *BestPred = nullptr;
+   BlockFrequency FallThroughFromPred = 0;
+   for (MachineBasicBlock *Pred : NewTop->predecessors()) {
+     if (!LoopBlockSet.count(Pred))
+       continue;
+     BlockChain *PredChain = BlockToChain[Pred];
+     if (!PredChain || Pred == *std::prev(PredChain->end())) {
+       BlockFrequency EdgeFreq = MBFI->getBlockFreq(Pred) *
+           MBPI->getEdgeProbability(Pred, NewTop);
+       if (EdgeFreq > FallThroughFromPred) {
+         FallThroughFromPred = EdgeFreq;
+         BestPred = Pred;
+       }
+     }
+   }
+
+   // If NewTop is not placed after Pred, another successor can be placed
+   // after Pred.
+   BlockFrequency NewFreq = 0;
+   if (BestPred) {
+     for (MachineBasicBlock *Succ : BestPred->successors()) {
+       if ((Succ == NewTop) || (Succ == BestPred) || !LoopBlockSet.count(Succ))
+         continue;
+       if (ComputedEdges.find(Succ) != ComputedEdges.end())
+         continue;
+       BlockChain *SuccChain = BlockToChain[Succ];
+       if ((SuccChain && (Succ != *SuccChain->begin())) ||
+           (SuccChain == BlockToChain[BestPred]))
+         continue;
+       BlockFrequency EdgeFreq = MBFI->getBlockFreq(BestPred) *
+           MBPI->getEdgeProbability(BestPred, Succ);
+       if (EdgeFreq > NewFreq)
+         NewFreq = EdgeFreq;
+     }
+     BlockFrequency OrigEdgeFreq = MBFI->getBlockFreq(BestPred) *
+         MBPI->getEdgeProbability(BestPred, NewTop);
+     if (NewFreq > OrigEdgeFreq) {
+       // If NewTop is not the best successor of Pred, then Pred doesn't
+       // fallthrough to NewTop. So there is no FallThroughFromPred and
+       // NewFreq.
+       NewFreq = 0;
+       FallThroughFromPred = 0;
+     }
+   }
+
+   BlockFrequency Result = 0;
+   BlockFrequency Gains = BackEdgeFreq + NewFreq;
+   BlockFrequency Lost = FallThrough2Top + FallThrough2Exit +
+       FallThroughFromPred;
+   if (Gains > Lost)
+     Result = Gains - Lost;
+   return Result;
+}
+
+/// Helper function of findBestLoopTop. Find the best loop top block
+/// from predecessors of old top.
 ///
-/// Look for a block which is strictly better than the loop header for laying
-/// out at the top of the loop. This looks for one and only one pattern:
-/// a latch block with no conditional exit. This block will cause a conditional
-/// jump around it or will be the bottom of the loop if we lay it out in place,
-/// but if it it doesn't end up at the bottom of the loop for any reason,
-/// rotation alone won't fix it. Because such a block will always result in an
-/// unconditional jump (for the backedge) rotating it in front of the loop
-/// header is always profitable.
+/// Look for a block which is strictly better than the old top for laying
+/// out before the old top of the loop. This looks for only two patterns:
+///
+///     1. a block has only one successor, the old loop top
+///
+///        Because such a block will always result in an unconditional jump,
+///        rotating it in front of the old top is always profitable.
+///
+///     2. a block has two successors, one is old top, another is exit
+///        and it has more than one predecessors
+///
+///        If it is below one of its predecessors P, only P can fall through to
+///        it, all other predecessors need a jump to it, and another conditional
+///        jump to loop header. If it is moved before loop header, all its
+///        predecessors jump to it, then fall through to loop header. So all its
+///        predecessors except P can reduce one taken branch.
+///        At the same time, move it before old top increases the taken branch
+///        to loop exit block, so the reduced taken branch will be compared with
+///        the increased taken branch to the loop exit block.
 MachineBasicBlock *
-MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
-                                       const BlockFilterSet &LoopBlockSet) {
-  // Placing the latch block before the header may introduce an extra branch
-  // that skips this block the first time the loop is executed, which we want
-  // to avoid when optimising for size.
-  // FIXME: in theory there is a case that does not introduce a new branch,
-  // i.e. when the layout predecessor does not fallthrough to the loop header.
-  // In practice this never happens though: there always seems to be a preheader
-  // that can fallthrough and that is also placed before the header.
-  if (F->getFunction().hasOptSize())
-    return L.getHeader();
-
+MachineBlockPlacement::findBestLoopTopHelper(
+    MachineBasicBlock *OldTop,
+    const MachineLoop &L,
+    const BlockFilterSet &LoopBlockSet) {
   // Check that the header hasn't been fused with a preheader block due to
   // crazy branches. If it has, we need to start with the header at the top to
   // prevent pulling the preheader into the loop body.
-  BlockChain &HeaderChain = *BlockToChain[L.getHeader()];
+  BlockChain &HeaderChain = *BlockToChain[OldTop];
   if (!LoopBlockSet.count(*HeaderChain.begin()))
-    return L.getHeader();
+    return OldTop;
 
-  LLVM_DEBUG(dbgs() << "Finding best loop top for: "
-                    << getBlockName(L.getHeader()) << "\n");
+  LLVM_DEBUG(dbgs() << "Finding best loop top for: " << getBlockName(OldTop)
+                    << "\n");
 
-  BlockFrequency BestPredFreq;
+  BlockFrequency BestGains = 0;
   MachineBasicBlock *BestPred = nullptr;
-  for (MachineBasicBlock *Pred : L.getHeader()->predecessors()) {
+  for (MachineBasicBlock *Pred : OldTop->predecessors()) {
     if (!LoopBlockSet.count(Pred))
       continue;
-    LLVM_DEBUG(dbgs() << "    header pred: " << getBlockName(Pred) << ", has "
+    if (Pred == L.getHeader())
+      continue;
+    LLVM_DEBUG(dbgs() << "   old top pred: " << getBlockName(Pred) << ", has "
                       << Pred->succ_size() << " successors, ";
                MBFI->printBlockFreq(dbgs(), Pred) << " freq\n");
-    if (Pred->succ_size() > 1)
+    if (Pred->succ_size() > 2)
       continue;
 
-    if (!canMoveBottomBlockToTop(Pred, L.getHeader()))
+    MachineBasicBlock *OtherBB = nullptr;
+    if (Pred->succ_size() == 2) {
+      OtherBB = *Pred->succ_begin();
+      if (OtherBB == OldTop)
+        OtherBB = *Pred->succ_rbegin();
+    }
+
+    if (!canMoveBottomBlockToTop(Pred, OldTop))
       continue;
 
-    BlockFrequency PredFreq = MBFI->getBlockFreq(Pred);
-    if (!BestPred || PredFreq > BestPredFreq ||
-        (!(PredFreq < BestPredFreq) &&
-         Pred->isLayoutSuccessor(L.getHeader()))) {
+    BlockFrequency Gains = FallThroughGains(Pred, OldTop, OtherBB,
+                                            LoopBlockSet);
+    if ((Gains > 0) && (Gains > BestGains ||
+        ((Gains == BestGains) && Pred->isLayoutSuccessor(OldTop)))) {
       BestPred = Pred;
-      BestPredFreq = PredFreq;
+      BestGains = Gains;
     }
   }
 
   // If no direct predecessor is fine, just use the loop header.
   if (!BestPred) {
     LLVM_DEBUG(dbgs() << "    final top unchanged\n");
-    return L.getHeader();
+    return OldTop;
   }
 
   // Walk backwards through any straight line of predecessors.
@@ -1862,6 +2010,34 @@ MachineBlockPlacement::findBestLoopTop(c
   return BestPred;
 }
 
+/// Find the best loop top block for layout.
+///
+/// This function iteratively calls findBestLoopTopHelper, until no new better
+/// BB can be found.
+MachineBasicBlock *
+MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
+                                       const BlockFilterSet &LoopBlockSet) {
+  // Placing the latch block before the header may introduce an extra branch
+  // that skips this block the first time the loop is executed, which we want
+  // to avoid when optimising for size.
+  // FIXME: in theory there is a case that does not introduce a new branch,
+  // i.e. when the layout predecessor does not fallthrough to the loop header.
+  // In practice this never happens though: there always seems to be a preheader
+  // that can fallthrough and that is also placed before the header.
+  if (F->getFunction().hasOptSize())
+    return L.getHeader();
+
+  MachineBasicBlock *OldTop = nullptr;
+  MachineBasicBlock *NewTop = L.getHeader();
+  while (NewTop != OldTop) {
+    OldTop = NewTop;
+    NewTop = findBestLoopTopHelper(OldTop, L, LoopBlockSet);
+    if (NewTop != OldTop)
+      ComputedEdges[NewTop] = { OldTop, false };
+  }
+  return NewTop;
+}
+
 /// Find the best loop exiting block for layout.
 ///
 /// This routine implements the logic to analyze the loop looking for the best
@@ -1869,7 +2045,8 @@ MachineBlockPlacement::findBestLoopTop(c
 /// fallthrough opportunities.
 MachineBasicBlock *
 MachineBlockPlacement::findBestLoopExit(const MachineLoop &L,
-                                        const BlockFilterSet &LoopBlockSet) {
+                                        const BlockFilterSet &LoopBlockSet,
+                                        BlockFrequency &ExitFreq) {
   // We don't want to layout the loop linearly in all cases. If the loop header
   // is just a normal basic block in the loop, we want to look for what block
   // within the loop is the best one to layout at the top. However, if the loop
@@ -1980,6 +2157,7 @@ MachineBlockPlacement::findBestLoopExit(
 
   LLVM_DEBUG(dbgs() << "  Best exiting block: " << getBlockName(ExitingBB)
                     << "\n");
+  ExitFreq = BestExitEdgeFreq;
   return ExitingBB;
 }
 
@@ -2024,6 +2202,7 @@ MachineBlockPlacement::hasViableTopFallt
 /// of its bottom already, don't rotate it.
 void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
                                        const MachineBasicBlock *ExitingBB,
+                                       BlockFrequency ExitFreq,
                                        const BlockFilterSet &LoopBlockSet) {
   if (!ExitingBB)
     return;
@@ -2047,6 +2226,12 @@ void MachineBlockPlacement::rotateLoop(B
           (!SuccChain || Succ == *SuccChain->begin()))
         return;
     }
+
+    // Rotate will destroy the top fallthrough, we need to ensure the new exit
+    // frequency is larger than top fallthrough.
+    BlockFrequency FallThrough2Top = TopFallThroughFreq(Top, LoopBlockSet);
+    if (FallThrough2Top >= ExitFreq)
+      return;
   }
 
   BlockChain::iterator ExitIt = llvm::find(LoopChain, ExitingBB);
@@ -2102,8 +2287,6 @@ void MachineBlockPlacement::rotateLoop(B
 void MachineBlockPlacement::rotateLoopWithProfile(
     BlockChain &LoopChain, const MachineLoop &L,
     const BlockFilterSet &LoopBlockSet) {
-  auto HeaderBB = L.getHeader();
-  auto HeaderIter = llvm::find(LoopChain, HeaderBB);
   auto RotationPos = LoopChain.end();
 
   BlockFrequency SmallestRotationCost = BlockFrequency::getMaxFrequency();
@@ -2123,12 +2306,13 @@ void MachineBlockPlacement::rotateLoopWi
   // chain head is not the loop header. As we only consider natural loops with
   // single header, this computation can be done only once.
   BlockFrequency HeaderFallThroughCost(0);
-  for (auto *Pred : HeaderBB->predecessors()) {
+  MachineBasicBlock *ChainHeaderBB = *LoopChain.begin();
+  for (auto *Pred : ChainHeaderBB->predecessors()) {
     BlockChain *PredChain = BlockToChain[Pred];
     if (!LoopBlockSet.count(Pred) &&
         (!PredChain || Pred == *std::prev(PredChain->end()))) {
-      auto EdgeFreq =
-          MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, HeaderBB);
+      auto EdgeFreq = MBFI->getBlockFreq(Pred) *
+          MBPI->getEdgeProbability(Pred, ChainHeaderBB);
       auto FallThruCost = ScaleBlockFrequency(EdgeFreq, MisfetchCost);
       // If the predecessor has only an unconditional jump to the header, we
       // need to consider the cost of this jump.
@@ -2178,7 +2362,7 @@ void MachineBlockPlacement::rotateLoopWi
     // If the current BB is the loop header, we need to take into account the
     // cost of the missed fall through edge from outside of the loop to the
     // header.
-    if (Iter != HeaderIter)
+    if (Iter != LoopChain.begin())
       Cost += HeaderFallThroughCost;
 
     // Collect the loop exit cost by summing up frequencies of all exit edges
@@ -2299,9 +2483,7 @@ void MachineBlockPlacement::buildLoopCha
   // loop. This will default to the header, but may end up as one of the
   // predecessors to the header if there is one which will result in strictly
   // fewer branches in the loop body.
-  // When we use profile data to rotate the loop, this is unnecessary.
-  MachineBasicBlock *LoopTop =
-      RotateLoopWithProfile ? L.getHeader() : findBestLoopTop(L, LoopBlockSet);
+  MachineBasicBlock *LoopTop = findBestLoopTop(L, LoopBlockSet);
 
   // If we selected just the header for the loop top, look for a potentially
   // profitable exit block in the event that rotating the loop can eliminate
@@ -2310,8 +2492,9 @@ void MachineBlockPlacement::buildLoopCha
   // Loops are processed innermost to uttermost, make sure we clear
   // PreferredLoopExit before processing a new loop.
   PreferredLoopExit = nullptr;
+  BlockFrequency ExitFreq;
   if (!RotateLoopWithProfile && LoopTop == L.getHeader())
-    PreferredLoopExit = findBestLoopExit(L, LoopBlockSet);
+    PreferredLoopExit = findBestLoopExit(L, LoopBlockSet, ExitFreq);
 
   BlockChain &LoopChain = *BlockToChain[LoopTop];
 
@@ -2331,7 +2514,7 @@ void MachineBlockPlacement::buildLoopCha
   if (RotateLoopWithProfile)
     rotateLoopWithProfile(LoopChain, L, LoopBlockSet);
   else
-    rotateLoop(LoopChain, PreferredLoopExit, LoopBlockSet);
+    rotateLoop(LoopChain, PreferredLoopExit, ExitFreq, LoopBlockSet);
 
   LLVM_DEBUG({
     // Crash at the end so we get all of the debugging output first.

Modified: llvm/trunk/test/CodeGen/AArch64/cmpxchg-idioms.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/cmpxchg-idioms.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/cmpxchg-idioms.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/cmpxchg-idioms.ll Fri Jun 14 16:08:59 2019
@@ -111,7 +111,7 @@ define i1 @test_conditional2(i32 %a, i32
 ; CHECK: mov w22, #2
 ; CHECK-NOT: mov w22, #4
 ; CHECK-NOT: cmn w22, #4
-; CHECK: b [[LOOP2:LBB[0-9]+_[0-9]+]]
+; CHECK: [[LOOP2:LBB[0-9]+_[0-9]+]]: ; %for.cond
 ; CHECK-NOT: b.ne [[LOOP2]]
 ; CHECK-NOT: b {{LBB[0-9]+_[0-9]+}}
 ; CHECK: bl _foo

Modified: llvm/trunk/test/CodeGen/AArch64/neg-imm.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neg-imm.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neg-imm.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neg-imm.ll Fri Jun 14 16:08:59 2019
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -disable-block-placement -o - %s | FileCheck %s
 ; LSR used to pick a sub-optimal solution due to the target responding
 ; conservatively to isLegalAddImmediate for negative values.
 

Modified: llvm/trunk/test/CodeGen/AArch64/tailmerging_in_mbp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/tailmerging_in_mbp.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/tailmerging_in_mbp.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/tailmerging_in_mbp.ll Fri Jun 14 16:08:59 2019
@@ -1,9 +1,8 @@
 ; RUN: llc <%s -mtriple=aarch64-eabi -verify-machine-dom-info | FileCheck %s
 
 ; CHECK-LABEL: test:
-; CHECK:       LBB0_7:
-; CHECK:         b.hi	
-; CHECK-NEXT:    b	
+; CHECK-LABEL: %cond.false12.i
+; CHECK:         b.gt	
 ; CHECK-NEXT:  LBB0_8:
 ; CHECK-NEXT:    mov	 x8, x9
 ; CHECK-NEXT:  LBB0_9:

Modified: llvm/trunk/test/CodeGen/AMDGPU/collapse-endcf.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/collapse-endcf.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/collapse-endcf.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/collapse-endcf.ll Fri Jun 14 16:08:59 2019
@@ -230,6 +230,11 @@ bb.end:
 ; Make sure scc liveness is updated if sor_b64 is removed
 ; ALL-LABEL: {{^}}scc_liveness:
 
+; GCN: %bb10
+; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}}
+; GCN: s_andn2_b64
+; GCN-NEXT: s_cbranch_execz
+
 ; GCN: [[BB1_LOOP:BB[0-9]+_[0-9]+]]:
 ; GCN: s_andn2_b64 exec, exec,
 ; GCN-NEXT: s_cbranch_execnz [[BB1_LOOP]]
@@ -240,10 +245,6 @@ bb.end:
 ; GCN-NOT: s_or_b64 exec, exec
 
 ; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}}
-; GCN: s_andn2_b64
-; GCN-NEXT: s_cbranch_execnz
-
-; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}}
 ; GCN: buffer_store_dword
 ; GCN: buffer_store_dword
 ; GCN: buffer_store_dword

Modified: llvm/trunk/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll Fri Jun 14 16:08:59 2019
@@ -19,38 +19,39 @@ define amdgpu_ps void @main(i32, float)
 ; CHECK-NEXT:    v_mov_b32_e32 v1, 0
 ; CHECK-NEXT:    ; implicit-def: $sgpr2_sgpr3
 ; CHECK-NEXT:    ; implicit-def: $sgpr6_sgpr7
-; CHECK-NEXT:  BB0_1: ; %loop
+; CHECK-NEXT:    s_branch BB0_3
+; CHECK-NEXT:  BB0_1: ; in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT:    ; implicit-def: $vgpr1
+; CHECK-NEXT:  BB0_2: ; %Flow
+; CHECK-NEXT:    ; in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT:    s_and_b64 s[8:9], exec, s[6:7]
+; CHECK-NEXT:    s_or_b64 s[8:9], s[8:9], s[4:5]
+; CHECK-NEXT:    s_mov_b64 s[4:5], s[8:9]
+; CHECK-NEXT:    s_andn2_b64 exec, exec, s[8:9]
+; CHECK-NEXT:    s_cbranch_execz BB0_7
+; CHECK-NEXT:  BB0_3: ; %loop
 ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    v_cmp_gt_u32_e32 vcc, 32, v1
 ; CHECK-NEXT:    s_and_b64 vcc, exec, vcc
 ; CHECK-NEXT:    s_or_b64 s[6:7], s[6:7], exec
 ; CHECK-NEXT:    s_or_b64 s[2:3], s[2:3], exec
-; CHECK-NEXT:    s_cbranch_vccz BB0_5
-; CHECK-NEXT:  ; %bb.2: ; %endif1
-; CHECK-NEXT:    ; in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:    s_cbranch_vccz BB0_1
+; CHECK-NEXT:  ; %bb.4: ; %endif1
+; CHECK-NEXT:    ; in Loop: Header=BB0_3 Depth=1
 ; CHECK-NEXT:    s_mov_b64 s[6:7], -1
 ; CHECK-NEXT:    s_and_saveexec_b64 s[8:9], s[0:1]
 ; CHECK-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
-; CHECK-NEXT:    ; mask branch BB0_4
-; CHECK-NEXT:  BB0_3: ; %endif2
-; CHECK-NEXT:    ; in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:    ; mask branch BB0_6
+; CHECK-NEXT:  BB0_5: ; %endif2
+; CHECK-NEXT:    ; in Loop: Header=BB0_3 Depth=1
 ; CHECK-NEXT:    v_add_u32_e32 v1, 1, v1
 ; CHECK-NEXT:    s_xor_b64 s[6:7], exec, -1
-; CHECK-NEXT:  BB0_4: ; %Flow1
-; CHECK-NEXT:    ; in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:  BB0_6: ; %Flow1
+; CHECK-NEXT:    ; in Loop: Header=BB0_3 Depth=1
 ; CHECK-NEXT:    s_or_b64 exec, exec, s[8:9]
 ; CHECK-NEXT:    s_andn2_b64 s[2:3], s[2:3], exec
-; CHECK-NEXT:    s_branch BB0_6
-; CHECK-NEXT:  BB0_5: ; in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT:    ; implicit-def: $vgpr1
-; CHECK-NEXT:  BB0_6: ; %Flow
-; CHECK-NEXT:    ; in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT:    s_and_b64 s[8:9], exec, s[6:7]
-; CHECK-NEXT:    s_or_b64 s[8:9], s[8:9], s[4:5]
-; CHECK-NEXT:    s_mov_b64 s[4:5], s[8:9]
-; CHECK-NEXT:    s_andn2_b64 exec, exec, s[8:9]
-; CHECK-NEXT:    s_cbranch_execnz BB0_1
-; CHECK-NEXT:  ; %bb.7: ; %Flow2
+; CHECK-NEXT:    s_branch BB0_2
+; CHECK-NEXT:  BB0_7: ; %Flow2
 ; CHECK-NEXT:    s_or_b64 exec, exec, s[8:9]
 ; CHECK-NEXT:    v_mov_b32_e32 v1, 0
 ; this is the divergent branch with the condition not marked as divergent

Modified: llvm/trunk/test/CodeGen/AMDGPU/global_smrd_cfg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/global_smrd_cfg.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/global_smrd_cfg.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/global_smrd_cfg.ll Fri Jun 14 16:08:59 2019
@@ -1,27 +1,28 @@
 ; RUN: llc -mtriple amdgcn--amdhsa -mcpu=fiji -amdgpu-scalarize-global-loads=true -verify-machineinstrs  < %s | FileCheck %s
 
-; CHECK-LABEL: %bb11
+; CHECK-LABEL: %bb22
 
-; Load from %arg in a Loop body has alias store
+; Load from %arg has alias store in Loop
 
 ; CHECK: flat_load_dword
 
-; CHECK-LABEL: %bb20
-; CHECK: flat_store_dword
+; #####################################################################
+
+; Load from %arg1 has no-alias store in Loop - arg1[i+1] never alias arg1[i]
+
+; CHECK: s_load_dword
 
 ; #####################################################################
 
-; CHECK-LABEL: %bb22
+; CHECK-LABEL: %bb11
 
-; Load from %arg has alias store in Loop
+; Load from %arg in a Loop body has alias store
 
 ; CHECK: flat_load_dword
 
-; #####################################################################
-
-; Load from %arg1 has no-alias store in Loop - arg1[i+1] never alias arg1[i]
+; CHECK-LABEL: %bb20
 
-; CHECK: s_load_dword
+; CHECK: flat_store_dword
 
 define amdgpu_kernel void @cfg(i32 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) #0 {
 bb:

Modified: llvm/trunk/test/CodeGen/AMDGPU/hoist-cond.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/hoist-cond.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/hoist-cond.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/hoist-cond.ll Fri Jun 14 16:08:59 2019
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -march=amdgcn -verify-machineinstrs -disable-block-placement < %s | FileCheck %s
 
 ; Check that invariant compare is hoisted out of the loop.
 ; At the same time condition shall not be serialized into a VGPR and deserialized later

Modified: llvm/trunk/test/CodeGen/AMDGPU/i1-copy-from-loop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/i1-copy-from-loop.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/i1-copy-from-loop.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/i1-copy-from-loop.ll Fri Jun 14 16:08:59 2019
@@ -3,20 +3,20 @@
 
 ; SI-LABEL: {{^}}i1_copy_from_loop:
 ;
+; SI: ; %Flow
+; SI-DAG:  s_andn2_b64       [[LCSSA_ACCUM:s\[[0-9]+:[0-9]+\]]], [[LCSSA_ACCUM]], exec
+; SI-DAG:  s_and_b64         [[CC_MASK2:s\[[0-9]+:[0-9]+\]]], [[CC_ACCUM:s\[[0-9]+:[0-9]+\]]], exec
+; SI:      s_or_b64          [[LCSSA_ACCUM]], [[LCSSA_ACCUM]], [[CC_MASK2]]
+
 ; SI: ; %for.body
 ; SI:      v_cmp_gt_u32_e64  [[CC_SREG:s\[[0-9]+:[0-9]+\]]], 4,
-; SI-DAG:  s_andn2_b64       [[CC_ACCUM:s\[[0-9]+:[0-9]+\]]], [[CC_ACCUM]], exec
+; SI-DAG:  s_andn2_b64       [[CC_ACCUM]], [[CC_ACCUM]], exec
 ; SI-DAG:  s_and_b64         [[CC_MASK:s\[[0-9]+:[0-9]+\]]], [[CC_SREG]], exec
 ; SI:      s_or_b64          [[CC_ACCUM]], [[CC_ACCUM]], [[CC_MASK]]
 
 ; SI: ; %Flow1
 ; SI:      s_or_b64          [[CC_ACCUM]], [[CC_ACCUM]], exec
 
-; SI: ; %Flow
-; SI-DAG:  s_andn2_b64       [[LCSSA_ACCUM:s\[[0-9]+:[0-9]+\]]], [[LCSSA_ACCUM]], exec
-; SI-DAG:  s_and_b64         [[CC_MASK2:s\[[0-9]+:[0-9]+\]]], [[CC_ACCUM]], exec
-; SI:      s_or_b64          [[LCSSA_ACCUM]], [[LCSSA_ACCUM]], [[CC_MASK2]]
-
 ; SI: ; %for.end
 ; SI:      s_and_saveexec_b64 {{s\[[0-9]+:[0-9]+\]}}, [[LCSSA_ACCUM]]
 

Modified: llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-si.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-si.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-si.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-si.ll Fri Jun 14 16:08:59 2019
@@ -630,12 +630,7 @@ define amdgpu_kernel void @insertelement
 ; GCN-LABEL: {{^}}broken_phi_bb:
 ; GCN: v_mov_b32_e32 [[PHIREG:v[0-9]+]], 8
 
-; GCN: s_branch [[BB2:BB[0-9]+_[0-9]+]]
-
-; GCN: {{^BB[0-9]+_[0-9]+}}:
-; GCN: s_mov_b64 exec,
-
-; GCN: [[BB2]]:
+; GCN: [[BB2:BB[0-9]+_[0-9]+]]:
 ; GCN: v_cmp_le_i32_e32 vcc, s{{[0-9]+}}, [[PHIREG]]
 ; GCN: buffer_load_dword
 
@@ -647,6 +642,11 @@ define amdgpu_kernel void @insertelement
 ; IDXMODE: s_set_gpr_idx_off
 
 ; GCN: s_cbranch_execnz [[REGLOOP]]
+
+; GCN: {{^; %bb.[0-9]}}:
+; GCN: s_mov_b64 exec,
+; GCN: s_branch [[BB2]]
+
 define amdgpu_kernel void @broken_phi_bb(i32 %arg, i32 %arg1) #0 {
 bb:
   br label %bb2

Modified: llvm/trunk/test/CodeGen/AMDGPU/loop_break.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/loop_break.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/loop_break.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/loop_break.ll Fri Jun 14 16:08:59 2019
@@ -1,5 +1,5 @@
 ; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -verify-machineinstrs -disable-block-placement < %s | FileCheck -check-prefix=GCN %s
 
 ; Uses llvm.amdgcn.break
 

Modified: llvm/trunk/test/CodeGen/AMDGPU/loop_exit_with_xor.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/loop_exit_with_xor.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/loop_exit_with_xor.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/loop_exit_with_xor.ll Fri Jun 14 16:08:59 2019
@@ -61,9 +61,9 @@ loopexit:
 
 ; GCN-LABEL: {{^}}break_cond_is_arg:
 ; GCN: s_xor_b64 [[REG1:[^ ,]*]], {{[^ ,]*, -1$}}
+; GCN: s_andn2_b64 exec, exec, [[REG3:[^ ,]*]]
 ; GCN: s_and_b64 [[REG2:[^ ,]*]], exec, [[REG1]]
-; GCN: s_or_b64 [[REG3:[^ ,]*]], [[REG2]],
-; GCN: s_andn2_b64 exec, exec, [[REG3]]
+; GCN: s_or_b64 [[REG3]], [[REG2]],
 
 define void @break_cond_is_arg(i32 %arg, i1 %breakcond) {
 entry:

Modified: llvm/trunk/test/CodeGen/AMDGPU/madmk.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/madmk.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/madmk.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/madmk.ll Fri Jun 14 16:08:59 2019
@@ -188,9 +188,9 @@ define amdgpu_kernel void @madmk_add_inl
 }
 
 ; SI-LABEL: {{^}}kill_madmk_verifier_error:
+; SI: s_or_b64
 ; SI: s_xor_b64
 ; SI: v_mac_f32_e32 {{v[0-9]+}}, 0x472aee8c, {{v[0-9]+}}
-; SI: s_or_b64
 define amdgpu_kernel void @kill_madmk_verifier_error() nounwind {
 bb:
   br label %bb2

Modified: llvm/trunk/test/CodeGen/AMDGPU/multilevel-break.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/multilevel-break.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/multilevel-break.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/multilevel-break.ll Fri Jun 14 16:08:59 2019
@@ -24,13 +24,29 @@
 ; GCN: ; %main_body
 ; GCN:      s_mov_b64           [[LEFT_OUTER:s\[[0-9]+:[0-9]+\]]], 0{{$}}
 
+; GCN: [[FLOW2:BB[0-9]+_[0-9]+]]: ; %Flow2
+; GCN:      s_or_b64            exec, exec, [[TMP0:s\[[0-9]+:[0-9]+\]]]
+; GCN:      s_and_b64           [[TMP1:s\[[0-9]+:[0-9]+\]]], exec, [[BREAK_OUTER:s\[[0-9]+:[0-9]+\]]]
+; GCN:      s_or_b64            [[TMP1]], [[TMP1]], [[LEFT_OUTER]]
+; GCN:      s_mov_b64           [[LEFT_OUTER]], [[TMP1]]
+; GCN:      s_andn2_b64         exec, exec, [[TMP1]]
+; GCN:      s_cbranch_execz    [[IF_BLOCK:BB[0-9]+_[0-9]+]]
+
 ; GCN: [[OUTER_LOOP:BB[0-9]+_[0-9]+]]: ; %LOOP.outer{{$}}
 ; GCN:      s_mov_b64           [[LEFT_INNER:s\[[0-9]+:[0-9]+\]]], 0{{$}}
 
+; GCN: ; %Flow
+; GCN:      s_or_b64            exec, exec, [[SAVE_EXEC:s\[[0-9]+:[0-9]+\]]]
+; GCN:      s_and_b64           [[TMP0]], exec, [[BREAK_INNER:s\[[0-9]+:[0-9]+\]]]
+; GCN:      s_or_b64            [[TMP0]], [[TMP0]], [[LEFT_INNER]]
+; GCN:      s_mov_b64           [[LEFT_INNER]], [[TMP0]]
+; GCN:      s_andn2_b64         exec, exec, [[TMP0]]
+; GCN:      s_cbranch_execz    [[FLOW2]]
+
 ; GCN: [[INNER_LOOP:BB[0-9]+_[0-9]+]]: ; %LOOP{{$}}
-; GCN:      s_or_b64            [[BREAK_OUTER:s\[[0-9]+:[0-9]+\]]], [[BREAK_OUTER]], exec
-; GCN:      s_or_b64            [[BREAK_INNER:s\[[0-9]+:[0-9]+\]]], [[BREAK_INNER]], exec
-; GCN:      s_and_saveexec_b64  [[SAVE_EXEC:s\[[0-9]+:[0-9]+\]]], vcc
+; GCN:      s_or_b64            [[BREAK_OUTER]], [[BREAK_OUTER]], exec
+; GCN:      s_or_b64            [[BREAK_INNER]], [[BREAK_INNER]], exec
+; GCN:      s_and_saveexec_b64  [[SAVE_EXEC]], vcc
 
 ; FIXME: duplicate comparison
 ; GCN: ; %ENDIF
@@ -43,23 +59,7 @@
 ; GCN-DAG:  s_or_b64            [[BREAK_OUTER]], [[BREAK_OUTER]], [[TMP_EQ]]
 ; GCN-DAG:  s_or_b64            [[BREAK_INNER]], [[BREAK_INNER]], [[TMP_NE]]
 
-; GCN: ; %Flow
-; GCN:      s_or_b64            exec, exec, [[SAVE_EXEC]]
-; GCN:      s_and_b64           [[TMP0:s\[[0-9]+:[0-9]+\]]], exec, [[BREAK_INNER]]
-; GCN:      s_or_b64            [[TMP0]], [[TMP0]], [[LEFT_INNER]]
-; GCN:      s_mov_b64           [[LEFT_INNER]], [[TMP0]]
-; GCN:      s_andn2_b64         exec, exec, [[TMP0]]
-; GCN:      s_cbranch_execnz    [[INNER_LOOP]]
-
-; GCN: ; %Flow2
-; GCN:      s_or_b64            exec, exec, [[TMP0]]
-; GCN:      s_and_b64           [[TMP1:s\[[0-9]+:[0-9]+\]]], exec, [[BREAK_OUTER]]
-; GCN:      s_or_b64            [[TMP1]], [[TMP1]], [[LEFT_OUTER]]
-; GCN:      s_mov_b64           [[LEFT_OUTER]], [[TMP1]]
-; GCN:      s_andn2_b64         exec, exec, [[TMP1]]
-; GCN:      s_cbranch_execnz    [[OUTER_LOOP]]
-
-; GCN: ; %IF
+; GCN: [[IF_BLOCK]]: ; %IF
 ; GCN-NEXT: s_endpgm
 define amdgpu_vs void @multi_else_break(<4 x float> %vec, i32 %ub, i32 %cont) {
 main_body:
@@ -92,12 +92,18 @@ ENDIF:
 ; GCN-LABEL: {{^}}multi_if_break_loop:
 ; GCN:      s_mov_b64          [[LEFT:s\[[0-9]+:[0-9]+\]]], 0{{$}}
 
+; GCN: ; %Flow4
+; GCN:      s_and_b64          [[BREAK:s\[[0-9]+:[0-9]+\]]], exec, [[BREAK]]
+; GCN:      s_or_b64           [[LEFT]], [[BREAK]], [[OLD_LEFT:s\[[0-9]+:[0-9]+\]]]
+; GCN:      s_andn2_b64        exec, exec, [[LEFT]]
+; GCN-NEXT: s_cbranch_execz
+
 ; GCN: [[LOOP:BB[0-9]+_[0-9]+]]: ; %bb1{{$}}
-; GCN:      s_mov_b64          [[OLD_LEFT:s\[[0-9]+:[0-9]+\]]], [[LEFT]]
+; GCN:      s_mov_b64          [[OLD_LEFT]], [[LEFT]]
 
 ; GCN: ; %LeafBlock1
 ; GCN:      s_mov_b64
-; GCN:      s_mov_b64          [[BREAK:s\[[0-9]+:[0-9]+\]]], -1{{$}}
+; GCN:      s_mov_b64          [[BREAK]], -1{{$}}
 
 ; GCN: ; %case1
 ; GCN:      buffer_load_dword  [[LOAD2:v[0-9]+]],
@@ -118,12 +124,6 @@ ENDIF:
 ; GCN-DAG:  s_and_b64          [[TMP:s\[[0-9]+:[0-9]+\]]], vcc, exec
 ; GCN:      s_or_b64           [[BREAK]], [[BREAK]], [[TMP]]
 
-; GCN: ; %Flow4
-; GCN:      s_and_b64          [[BREAK]], exec, [[BREAK]]
-; GCN:      s_or_b64           [[LEFT]], [[BREAK]], [[OLD_LEFT]]
-; GCN:      s_andn2_b64        exec, exec, [[LEFT]]
-; GCN-NEXT: s_cbranch_execnz
-
 define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 {
 bb:
   %id = call i32 @llvm.amdgcn.workitem.id.x()

Modified: llvm/trunk/test/CodeGen/AMDGPU/optimize-negated-cond.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/optimize-negated-cond.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/optimize-negated-cond.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/optimize-negated-cond.ll Fri Jun 14 16:08:59 2019
@@ -3,11 +3,11 @@
 ; GCN-LABEL: {{^}}negated_cond:
 ; GCN: BB0_1:
 ; GCN:   v_cmp_eq_u32_e64 [[CC:[^,]+]],
-; GCN: BB0_2:
+; GCN: BB0_3:
 ; GCN-NOT: v_cndmask_b32
 ; GCN-NOT: v_cmp
 ; GCN:   s_andn2_b64 vcc, exec, [[CC]]
-; GCN:   s_cbranch_vccnz BB0_4
+; GCN:   s_cbranch_vccnz BB0_2
 define amdgpu_kernel void @negated_cond(i32 addrspace(1)* %arg1) {
 bb:
   br label %bb1
@@ -36,11 +36,11 @@ bb4:
 
 ; GCN-LABEL: {{^}}negated_cond_dominated_blocks:
 ; GCN:   v_cmp_eq_u32_e64 [[CC:[^,]+]],
-; GCN: BB1_1:
+; GCN: %bb4
 ; GCN-NOT: v_cndmask_b32
 ; GCN-NOT: v_cmp
 ; GCN:   s_andn2_b64 vcc, exec, [[CC]]
-; GCN:   s_cbranch_vccz BB1_3
+; GCN:   s_cbranch_vccnz BB1_1
 define amdgpu_kernel void @negated_cond_dominated_blocks(i32 addrspace(1)* %arg1) {
 bb:
   br label %bb2

Modified: llvm/trunk/test/CodeGen/AMDGPU/si-annotate-cf.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-annotate-cf.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-annotate-cf.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-annotate-cf.ll Fri Jun 14 16:08:59 2019
@@ -96,20 +96,20 @@ declare float @llvm.fabs.f32(float) noun
 ; FUNC-LABEL: {{^}}loop_land_info_assert:
 ; SI:      v_cmp_lt_i32_e64 [[CMP4:s\[[0-9:]+\]]], s{{[0-9]+}}, 4{{$}}
 ; SI:      s_and_b64        [[CMP4M:s\[[0-9]+:[0-9]+\]]], exec, [[CMP4]]
-; SI:      s_branch         [[INFLOOP:BB[0-9]+_[0-9]+]]
+
+; SI: [[WHILELOOP:BB[0-9]+_[0-9]+]]: ; %while.cond
+; SI:      s_cbranch_vccz [[FOR_COND_PH:BB[0-9]+_[0-9]+]]
 
 ; SI:      [[CONVEX_EXIT:BB[0-9_]+]]
 ; SI:      s_mov_b64        vcc,
 ; SI-NEXT: s_cbranch_vccnz  [[ENDPGM:BB[0-9]+_[0-9]+]]
-; SI:      s_cbranch_vccnz  [[INFLOOP]]
+
+; SI:      s_cbranch_vccnz  [[WHILELOOP]]
 
 ; SI: ; %if.else
 ; SI:      buffer_store_dword
 
-; SI:      [[INFLOOP]]:
-; SI:      s_cbranch_vccnz [[CONVEX_EXIT]]
-
-; SI: ; %for.cond.preheader
+; SI: [[FOR_COND_PH]]: ; %for.cond.preheader
 ; SI:      s_cbranch_vccz [[ENDPGM]]
 
 ; SI:      [[ENDPGM]]:

Modified: llvm/trunk/test/CodeGen/AMDGPU/valu-i1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/valu-i1.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/valu-i1.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/valu-i1.ll Fri Jun 14 16:08:59 2019
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs -enable-misched -asm-verbose < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -verify-machineinstrs -enable-misched -asm-verbose -disable-block-placement < %s | FileCheck -check-prefix=SI %s
 
 declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 

Modified: llvm/trunk/test/CodeGen/AMDGPU/wqm.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/wqm.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/wqm.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/wqm.ll Fri Jun 14 16:08:59 2019
@@ -650,12 +650,15 @@ main_body:
 ; CHECK-DAG: v_mov_b32_e32 [[CTR:v[0-9]+]], 0
 ; CHECK-DAG: s_mov_b32 [[SEVEN:s[0-9]+]], 0x40e00000
 
-; CHECK: [[LOOPHDR:BB[0-9]+_[0-9]+]]: ; %body
-; CHECK: v_add_f32_e32 [[CTR]], 2.0, [[CTR]]
+; CHECK: [[LOOPHDR:BB[0-9]+_[0-9]+]]: ; %loop
 ; CHECK: v_cmp_lt_f32_e32 vcc, [[SEVEN]], [[CTR]]
-; CHECK: s_cbranch_vccz [[LOOPHDR]]
-; CHECK: ; %break
+; CHECK: s_cbranch_vccnz
 
+; CHECK: ; %body
+; CHECK: v_add_f32_e32 [[CTR]], 2.0, [[CTR]]
+; CHECK: s_branch [[LOOPHDR]]
+
+; CHECK: ; %break
 ; CHECK: ; return
 define amdgpu_ps <4 x float> @test_loop_vcc(<4 x float> %in) nounwind {
 entry:

Modified: llvm/trunk/test/CodeGen/ARM/2011-03-23-PeepholeBug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/2011-03-23-PeepholeBug.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/2011-03-23-PeepholeBug.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/2011-03-23-PeepholeBug.ll Fri Jun 14 16:08:59 2019
@@ -26,7 +26,7 @@ bb1:
 
 bb2:                                              ; preds = %bb1, %entry
 ; CHECK: cmp [[REG]], #0
-; CHECK: ble
+; CHECK: bgt
   %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %entry ]
   %tries.0 = sub i32 2147483647, %indvar
   %tmp1 = icmp sgt i32 %tries.0, 0

Modified: llvm/trunk/test/CodeGen/ARM/arm-and-tst-peephole.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/arm-and-tst-peephole.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/arm-and-tst-peephole.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/arm-and-tst-peephole.ll Fri Jun 14 16:08:59 2019
@@ -47,9 +47,8 @@ tailrecurse.switch:
 ; V8-NEXT: beq
 ; V8-NEXT: %tailrecurse.switch
 ; V8: cmp
-; V8-NEXT: beq
-; V8-NEXT: %sw.epilog
-; V8-NEXT: bx lr
+; V8-NEXT: bne
+; V8-NEXT: %sw.bb
   switch i32 %and, label %sw.epilog [
     i32 1, label %sw.bb
     i32 3, label %sw.bb6

Modified: llvm/trunk/test/CodeGen/ARM/atomic-cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/atomic-cmp.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/atomic-cmp.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/atomic-cmp.ll Fri Jun 14 16:08:59 2019
@@ -9,8 +9,8 @@ define i8 @t(i8* %a, i8 %b, i8 %c) nounw
 ; ARM: clrex
 
 ; T2-LABEL: t:
-; T2: strexb
 ; T2: ldrexb
+; T2: strexb
 ; T2: clrex
   %tmp0 = cmpxchg i8* %a, i8 %b, i8 %c monotonic monotonic
   %tmp1 = extractvalue { i8, i1 } %tmp0, 0

Modified: llvm/trunk/test/CodeGen/ARM/atomic-cmpxchg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/atomic-cmpxchg.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/atomic-cmpxchg.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/atomic-cmpxchg.ll Fri Jun 14 16:08:59 2019
@@ -52,16 +52,16 @@ entry:
 ; CHECK-ARMV7-LABEL: test_cmpxchg_res_i8:
 ; CHECK-ARMV7-NEXT: .fnstart
 ; CHECK-ARMV7-NEXT: uxtb [[DESIRED:r[0-9]+]], r1
-; CHECK-ARMV7-NEXT: b [[TRY:.LBB[0-9_]+]]
-; CHECK-ARMV7-NEXT: [[HEAD:.LBB[0-9_]+]]:
-; CHECK-ARMV7-NEXT: strexb [[SUCCESS:r[0-9]+]], r2, [r0]
+; CHECK-ARMV7-NEXT: [[TRY:.LBB[0-9_]+]]:
+; CHECK-ARMV7-NEXT: ldrexb [[SUCCESS:r[0-9]+]], [r0]
+; CHECK-ARMV7-NEXT: cmp [[SUCCESS]], r1
+; CHECK-ARMV7-NEXT: bne [[EXIT:.LBB[0-9_]+]]
+; CHECK-ARMV7-NEXT: strexb [[SUCCESS]], r2, [r0]
 ; CHECK-ARMV7-NEXT: cmp [[SUCCESS]], #0
 ; CHECK-ARMV7-NEXT: moveq r0, #1
 ; CHECK-ARMV7-NEXT: bxeq lr
-; CHECK-ARMV7-NEXT: [[TRY]]:
-; CHECK-ARMV7-NEXT: ldrexb [[SUCCESS]], [r0]
-; CHECK-ARMV7-NEXT: cmp [[SUCCESS]], r1
-; CHECK-ARMV7-NEXT: beq [[HEAD]]
+; CHECK-ARMV7-NEXT: b [[TRY]]
+; CHECK-ARMV7-NEXT: [[EXIT]]:
 ; CHECK-ARMV7-NEXT: mov r0, #0
 ; CHECK-ARMV7-NEXT: clrex
 ; CHECK-ARMV7-NEXT: bx lr
@@ -69,17 +69,17 @@ entry:
 ; CHECK-THUMBV7-LABEL: test_cmpxchg_res_i8:
 ; CHECK-THUMBV7-NEXT: .fnstart
 ; CHECK-THUMBV7-NEXT: uxtb [[DESIRED:r[0-9]+]], r1
-; CHECK-THUMBV7-NEXT: b [[TRYLD:.LBB[0-9_]+]]
-; CHECK-THUMBV7-NEXT: [[TRYST:.LBB[0-9_]+]]:
+; CHECK-THUMBV7-NEXT: [[TRYLD:.LBB[0-9_]+]]
+; CHECK-THUMBV7-NEXT: ldrexb [[LD:r[0-9]+]], [r0]
+; CHECK-THUMBV7-NEXT: cmp [[LD]], [[DESIRED]]
+; CHECK-THUMBV7-NEXT: bne [[EXIT:.LBB[0-9_]+]]
 ; CHECK-THUMBV7-NEXT: strexb [[SUCCESS:r[0-9]+]], r2, [r0]
 ; CHECK-THUMBV7-NEXT: cmp [[SUCCESS]], #0
 ; CHECK-THUMBV7-NEXT: itt eq
 ; CHECK-THUMBV7-NEXT: moveq r0, #1
 ; CHECK-THUMBV7-NEXT: bxeq lr
-; CHECK-THUMBV7-NEXT: [[TRYLD]]:
-; CHECK-THUMBV7-NEXT: ldrexb [[LD:r[0-9]+]], [r0]
-; CHECK-THUMBV7-NEXT: cmp [[LD]], [[DESIRED]]
-; CHECK-THUMBV7-NEXT: beq [[TRYST:.LBB[0-9_]+]]
+; CHECK-THUMBV7-NEXT: b [[TRYLD]]
+; CHECK-THUMBV7-NEXT: [[EXIT]]:
 ; CHECK-THUMBV7-NEXT: movs r0, #0
 ; CHECK-THUMBV7-NEXT: clrex
 ; CHECK-THUMBV7-NEXT: bx lr

Modified: llvm/trunk/test/CodeGen/ARM/code-placement.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/code-placement.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/code-placement.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/code-placement.ll Fri Jun 14 16:08:59 2019
@@ -38,8 +38,9 @@ entry:
   br i1 %0, label %bb5, label %bb.nph15
 
 bb1:                                              ; preds = %bb2.preheader, %bb1
+; CHECK: LBB1_[[BB3:.]]: @ %bb3
 ; CHECK: LBB1_[[PREHDR:.]]: @ %bb2.preheader
-; CHECK: blt LBB1_[[BB3:.]]
+; CHECK: blt LBB1_[[BB3]]
   %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %bb2.preheader ] ; <i32> [#uses=2]
   %sum.08 = phi i32 [ %2, %bb1 ], [ %sum.110, %bb2.preheader ] ; <i32> [#uses=1]
   %tmp17 = sub i32 %i.07, %indvar                 ; <i32> [#uses=1]
@@ -53,7 +54,6 @@ bb1:
 bb3:                                              ; preds = %bb1, %bb2.preheader
 ; CHECK: LBB1_[[BB1:.]]: @ %bb1
 ; CHECK: bne LBB1_[[BB1]]
-; CHECK: LBB1_[[BB3]]: @ %bb3
   %sum.0.lcssa = phi i32 [ %sum.110, %bb2.preheader ], [ %2, %bb1 ] ; <i32> [#uses=2]
   %3 = add i32 %pass.011, 1                       ; <i32> [#uses=2]
   %exitcond18 = icmp eq i32 %3, %passes           ; <i1> [#uses=1]

Modified: llvm/trunk/test/CodeGen/ARM/pr32578.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/pr32578.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/pr32578.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/pr32578.ll Fri Jun 14 16:08:59 2019
@@ -4,7 +4,7 @@ target triple = "armv7"
 ; CHECK-LABEL: func:
 ; CHECK: push {r11, lr}
 ; CHECK: vpush {d8}
-; CHECK: b .LBB0_2
+; CHECK: .LBB0_1: @ %tailrecurse
 define arm_aapcscc double @func() {
   br label %tailrecurse
 

Modified: llvm/trunk/test/CodeGen/ARM/swifterror.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/swifterror.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/swifterror.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/swifterror.ll Fri Jun 14 16:08:59 2019
@@ -182,7 +182,7 @@ define float @foo_loop(%swift_error** sw
 ; CHECK-APPLE: mov r0, #16
 ; CHECK-APPLE: malloc
 ; CHECK-APPLE: strb r{{.*}}, [r0, #8]
-; CHECK-APPLE: ble
+; CHECK-APPLE: b
 
 ; CHECK-O0-LABEL: foo_loop:
 ; CHECK-O0: cmp r{{.*}}, #0

Modified: llvm/trunk/test/CodeGen/Hexagon/bug6757-endloop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/bug6757-endloop.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Hexagon/bug6757-endloop.ll (original)
+++ llvm/trunk/test/CodeGen/Hexagon/bug6757-endloop.ll Fri Jun 14 16:08:59 2019
@@ -4,10 +4,10 @@
 ; This situation can arise due to tail duplication.
 
 ; CHECK: loop1([[LP:.LBB0_[0-9]+]]
+; CHECK: endloop1
 ; CHECK: [[LP]]:
 ; CHECK-NOT: loop1(
 ; CHECK: endloop1
-; CHECK: endloop1
 
 %s.0 = type { i32, i8* }
 %s.1 = type { i32, i32, i32, i32 }

Modified: llvm/trunk/test/CodeGen/Hexagon/early-if-merge-loop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/early-if-merge-loop.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Hexagon/early-if-merge-loop.ll (original)
+++ llvm/trunk/test/CodeGen/Hexagon/early-if-merge-loop.ll Fri Jun 14 16:08:59 2019
@@ -2,9 +2,11 @@
 ; Make sure that the loop in the end has only one basic block.
 
 ; CHECK-LABEL: fred
+; CHECK: %b2
 ; Rely on the comments, make sure the one for the loop header is present.
 ; CHECK: %loop
-; CHECK-NOT: %should_merge
+; CHECK: %should_merge
+; CHECK: %exit
 
 target triple = "hexagon"
 

Modified: llvm/trunk/test/CodeGen/Hexagon/prof-early-if.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/prof-early-if.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Hexagon/prof-early-if.ll (original)
+++ llvm/trunk/test/CodeGen/Hexagon/prof-early-if.ll Fri Jun 14 16:08:59 2019
@@ -1,8 +1,8 @@
 ; RUN: llc -O2 -march=hexagon < %s | FileCheck %s
 ; Rely on the comments generated by llc. Check that "if.then" was not predicated.
+; CHECK: b5
 ; CHECK: b2
 ; CHECK-NOT: if{{.*}}memd
-; CHECK: b5
 
 %s.0 = type { [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [3 x i32], [24 x i32], [8 x %s.1], [5 x i32] }
 %s.1 = type { i32, i32 }

Modified: llvm/trunk/test/CodeGen/Hexagon/redundant-branching2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/redundant-branching2.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Hexagon/redundant-branching2.ll (original)
+++ llvm/trunk/test/CodeGen/Hexagon/redundant-branching2.ll Fri Jun 14 16:08:59 2019
@@ -3,9 +3,9 @@
 
 ; CHECK: memub
 ; CHECK: memub
+; CHECK: cmp.eq
 ; CHECK: memub
 ; CHECK-NOT: if{{.*}}jump .LBB
-; CHECK: cmp.eq
 
 target triple = "hexagon-unknown--elf"
 

Modified: llvm/trunk/test/CodeGen/PowerPC/atomics-regression.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/atomics-regression.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/atomics-regression.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/atomics-regression.ll Fri Jun 14 16:08:59 2019
@@ -401,16 +401,15 @@ define void @test40(i8* %ptr, i8 %cmp, i
 ; PPC64LE-LABEL: test40:
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
-; PPC64LE-NEXT:    b .LBB40_2
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB40_1:
-; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
-; PPC64LE-NEXT:  .LBB40_2:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 4, 6
-; PPC64LE-NEXT:    beq 0, .LBB40_1
-; PPC64LE-NEXT:  # %bb.3:
+; PPC64LE-NEXT:    bne 0, .LBB40_3
+; PPC64LE-NEXT:  # %bb.2:
+; PPC64LE-NEXT:    stbcx. 5, 0, 3
+; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    b .LBB40_1
+; PPC64LE-NEXT:  .LBB40_3:
 ; PPC64LE-NEXT:    stbcx. 6, 0, 3
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg i8* %ptr, i8 %cmp, i8 %val monotonic monotonic
@@ -466,16 +465,15 @@ define void @test43(i8* %ptr, i8 %cmp, i
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
 ; PPC64LE-NEXT:    lwsync
-; PPC64LE-NEXT:    b .LBB43_2
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB43_1:
-; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
-; PPC64LE-NEXT:  .LBB43_2:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 4, 6
-; PPC64LE-NEXT:    beq 0, .LBB43_1
-; PPC64LE-NEXT:  # %bb.3:
+; PPC64LE-NEXT:    bne 0, .LBB43_3
+; PPC64LE-NEXT:  # %bb.2:
+; PPC64LE-NEXT:    stbcx. 5, 0, 3
+; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    b .LBB43_1
+; PPC64LE-NEXT:  .LBB43_3:
 ; PPC64LE-NEXT:    stbcx. 6, 0, 3
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg i8* %ptr, i8 %cmp, i8 %val release monotonic
@@ -487,16 +485,15 @@ define void @test44(i8* %ptr, i8 %cmp, i
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
 ; PPC64LE-NEXT:    lwsync
-; PPC64LE-NEXT:    b .LBB44_2
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB44_1:
-; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
-; PPC64LE-NEXT:  .LBB44_2:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 4, 6
-; PPC64LE-NEXT:    beq 0, .LBB44_1
-; PPC64LE-NEXT:  # %bb.3:
+; PPC64LE-NEXT:    bne 0, .LBB44_3
+; PPC64LE-NEXT:  # %bb.2:
+; PPC64LE-NEXT:    stbcx. 5, 0, 3
+; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    b .LBB44_1
+; PPC64LE-NEXT:  .LBB44_3:
 ; PPC64LE-NEXT:    stbcx. 6, 0, 3
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg i8* %ptr, i8 %cmp, i8 %val release acquire
@@ -622,16 +619,15 @@ define void @test50(i16* %ptr, i16 %cmp,
 ; PPC64LE-LABEL: test50:
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
-; PPC64LE-NEXT:    b .LBB50_2
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB50_1:
-; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
-; PPC64LE-NEXT:  .LBB50_2:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 4, 6
-; PPC64LE-NEXT:    beq 0, .LBB50_1
-; PPC64LE-NEXT:  # %bb.3:
+; PPC64LE-NEXT:    bne 0, .LBB50_3
+; PPC64LE-NEXT:  # %bb.2:
+; PPC64LE-NEXT:    sthcx. 5, 0, 3
+; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    b .LBB50_1
+; PPC64LE-NEXT:  .LBB50_3:
 ; PPC64LE-NEXT:    sthcx. 6, 0, 3
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val monotonic monotonic
@@ -687,16 +683,15 @@ define void @test53(i16* %ptr, i16 %cmp,
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
 ; PPC64LE-NEXT:    lwsync
-; PPC64LE-NEXT:    b .LBB53_2
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB53_1:
-; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
-; PPC64LE-NEXT:  .LBB53_2:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 4, 6
-; PPC64LE-NEXT:    beq 0, .LBB53_1
-; PPC64LE-NEXT:  # %bb.3:
+; PPC64LE-NEXT:    bne 0, .LBB53_3
+; PPC64LE-NEXT:  # %bb.2:
+; PPC64LE-NEXT:    sthcx. 5, 0, 3
+; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    b .LBB53_1
+; PPC64LE-NEXT:  .LBB53_3:
 ; PPC64LE-NEXT:    sthcx. 6, 0, 3
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val release monotonic
@@ -708,16 +703,15 @@ define void @test54(i16* %ptr, i16 %cmp,
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
 ; PPC64LE-NEXT:    lwsync
-; PPC64LE-NEXT:    b .LBB54_2
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB54_1:
-; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
-; PPC64LE-NEXT:  .LBB54_2:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 4, 6
-; PPC64LE-NEXT:    beq 0, .LBB54_1
-; PPC64LE-NEXT:  # %bb.3:
+; PPC64LE-NEXT:    bne 0, .LBB54_3
+; PPC64LE-NEXT:  # %bb.2:
+; PPC64LE-NEXT:    sthcx. 5, 0, 3
+; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    b .LBB54_1
+; PPC64LE-NEXT:  .LBB54_3:
 ; PPC64LE-NEXT:    sthcx. 6, 0, 3
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val release acquire
@@ -842,16 +836,15 @@ define void @test59(i16* %ptr, i16 %cmp,
 define void @test60(i32* %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE-LABEL: test60:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    b .LBB60_2
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB60_1:
-; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
-; PPC64LE-NEXT:  .LBB60_2:
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 4, 6
-; PPC64LE-NEXT:    beq 0, .LBB60_1
-; PPC64LE-NEXT:  # %bb.3:
+; PPC64LE-NEXT:    bne 0, .LBB60_3
+; PPC64LE-NEXT:  # %bb.2:
+; PPC64LE-NEXT:    stwcx. 5, 0, 3
+; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    b .LBB60_1
+; PPC64LE-NEXT:  .LBB60_3:
 ; PPC64LE-NEXT:    stwcx. 6, 0, 3
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg i32* %ptr, i32 %cmp, i32 %val monotonic monotonic
@@ -904,16 +897,15 @@ define void @test63(i32* %ptr, i32 %cmp,
 ; PPC64LE-LABEL: test63:
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    lwsync
-; PPC64LE-NEXT:    b .LBB63_2
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB63_1:
-; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
-; PPC64LE-NEXT:  .LBB63_2:
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 4, 6
-; PPC64LE-NEXT:    beq 0, .LBB63_1
-; PPC64LE-NEXT:  # %bb.3:
+; PPC64LE-NEXT:    bne 0, .LBB63_3
+; PPC64LE-NEXT:  # %bb.2:
+; PPC64LE-NEXT:    stwcx. 5, 0, 3
+; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    b .LBB63_1
+; PPC64LE-NEXT:  .LBB63_3:
 ; PPC64LE-NEXT:    stwcx. 6, 0, 3
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg i32* %ptr, i32 %cmp, i32 %val release monotonic
@@ -924,16 +916,15 @@ define void @test64(i32* %ptr, i32 %cmp,
 ; PPC64LE-LABEL: test64:
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    lwsync
-; PPC64LE-NEXT:    b .LBB64_2
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB64_1:
-; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
-; PPC64LE-NEXT:  .LBB64_2:
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 4, 6
-; PPC64LE-NEXT:    beq 0, .LBB64_1
-; PPC64LE-NEXT:  # %bb.3:
+; PPC64LE-NEXT:    bne 0, .LBB64_3
+; PPC64LE-NEXT:  # %bb.2:
+; PPC64LE-NEXT:    stwcx. 5, 0, 3
+; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    b .LBB64_1
+; PPC64LE-NEXT:  .LBB64_3:
 ; PPC64LE-NEXT:    stwcx. 6, 0, 3
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg i32* %ptr, i32 %cmp, i32 %val release acquire
@@ -1053,16 +1044,15 @@ define void @test69(i32* %ptr, i32 %cmp,
 define void @test70(i64* %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE-LABEL: test70:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    b .LBB70_2
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB70_1:
-; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
-; PPC64LE-NEXT:  .LBB70_2:
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpd 4, 6
-; PPC64LE-NEXT:    beq 0, .LBB70_1
-; PPC64LE-NEXT:  # %bb.3:
+; PPC64LE-NEXT:    bne 0, .LBB70_3
+; PPC64LE-NEXT:  # %bb.2:
+; PPC64LE-NEXT:    stdcx. 5, 0, 3
+; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    b .LBB70_1
+; PPC64LE-NEXT:  .LBB70_3:
 ; PPC64LE-NEXT:    stdcx. 6, 0, 3
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg i64* %ptr, i64 %cmp, i64 %val monotonic monotonic
@@ -1115,16 +1105,15 @@ define void @test73(i64* %ptr, i64 %cmp,
 ; PPC64LE-LABEL: test73:
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    lwsync
-; PPC64LE-NEXT:    b .LBB73_2
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB73_1:
-; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
-; PPC64LE-NEXT:  .LBB73_2:
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpd 4, 6
-; PPC64LE-NEXT:    beq 0, .LBB73_1
-; PPC64LE-NEXT:  # %bb.3:
+; PPC64LE-NEXT:    bne 0, .LBB73_3
+; PPC64LE-NEXT:  # %bb.2:
+; PPC64LE-NEXT:    stdcx. 5, 0, 3
+; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    b .LBB73_1
+; PPC64LE-NEXT:  .LBB73_3:
 ; PPC64LE-NEXT:    stdcx. 6, 0, 3
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg i64* %ptr, i64 %cmp, i64 %val release monotonic
@@ -1135,16 +1124,15 @@ define void @test74(i64* %ptr, i64 %cmp,
 ; PPC64LE-LABEL: test74:
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    lwsync
-; PPC64LE-NEXT:    b .LBB74_2
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB74_1:
-; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
-; PPC64LE-NEXT:  .LBB74_2:
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpd 4, 6
-; PPC64LE-NEXT:    beq 0, .LBB74_1
-; PPC64LE-NEXT:  # %bb.3:
+; PPC64LE-NEXT:    bne 0, .LBB74_3
+; PPC64LE-NEXT:  # %bb.2:
+; PPC64LE-NEXT:    stdcx. 5, 0, 3
+; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    b .LBB74_1
+; PPC64LE-NEXT:  .LBB74_3:
 ; PPC64LE-NEXT:    stdcx. 6, 0, 3
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg i64* %ptr, i64 %cmp, i64 %val release acquire
@@ -1265,16 +1253,15 @@ define void @test80(i8* %ptr, i8 %cmp, i
 ; PPC64LE-LABEL: test80:
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
-; PPC64LE-NEXT:    b .LBB80_2
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB80_1:
-; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
-; PPC64LE-NEXT:  .LBB80_2:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 4, 6
-; PPC64LE-NEXT:    beq 0, .LBB80_1
-; PPC64LE-NEXT:  # %bb.3:
+; PPC64LE-NEXT:    bne 0, .LBB80_3
+; PPC64LE-NEXT:  # %bb.2:
+; PPC64LE-NEXT:    stbcx. 5, 0, 3
+; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    b .LBB80_1
+; PPC64LE-NEXT:  .LBB80_3:
 ; PPC64LE-NEXT:    stbcx. 6, 0, 3
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg i8* %ptr, i8 %cmp, i8 %val syncscope("singlethread") monotonic monotonic
@@ -1330,16 +1317,15 @@ define void @test83(i8* %ptr, i8 %cmp, i
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
 ; PPC64LE-NEXT:    lwsync
-; PPC64LE-NEXT:    b .LBB83_2
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB83_1:
-; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
-; PPC64LE-NEXT:  .LBB83_2:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 4, 6
-; PPC64LE-NEXT:    beq 0, .LBB83_1
-; PPC64LE-NEXT:  # %bb.3:
+; PPC64LE-NEXT:    bne 0, .LBB83_3
+; PPC64LE-NEXT:  # %bb.2:
+; PPC64LE-NEXT:    stbcx. 5, 0, 3
+; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    b .LBB83_1
+; PPC64LE-NEXT:  .LBB83_3:
 ; PPC64LE-NEXT:    stbcx. 6, 0, 3
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg i8* %ptr, i8 %cmp, i8 %val syncscope("singlethread") release monotonic
@@ -1351,16 +1337,15 @@ define void @test84(i8* %ptr, i8 %cmp, i
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
 ; PPC64LE-NEXT:    lwsync
-; PPC64LE-NEXT:    b .LBB84_2
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB84_1:
-; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
-; PPC64LE-NEXT:  .LBB84_2:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 4, 6
-; PPC64LE-NEXT:    beq 0, .LBB84_1
-; PPC64LE-NEXT:  # %bb.3:
+; PPC64LE-NEXT:    bne 0, .LBB84_3
+; PPC64LE-NEXT:  # %bb.2:
+; PPC64LE-NEXT:    stbcx. 5, 0, 3
+; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    b .LBB84_1
+; PPC64LE-NEXT:  .LBB84_3:
 ; PPC64LE-NEXT:    stbcx. 6, 0, 3
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg i8* %ptr, i8 %cmp, i8 %val syncscope("singlethread") release acquire
@@ -1486,16 +1471,15 @@ define void @test90(i16* %ptr, i16 %cmp,
 ; PPC64LE-LABEL: test90:
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
-; PPC64LE-NEXT:    b .LBB90_2
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB90_1:
-; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
-; PPC64LE-NEXT:  .LBB90_2:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 4, 6
-; PPC64LE-NEXT:    beq 0, .LBB90_1
-; PPC64LE-NEXT:  # %bb.3:
+; PPC64LE-NEXT:    bne 0, .LBB90_3
+; PPC64LE-NEXT:  # %bb.2:
+; PPC64LE-NEXT:    sthcx. 5, 0, 3
+; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    b 
+; PPC64LE-NEXT:  .LBB90_3:
 ; PPC64LE-NEXT:    sthcx. 6, 0, 3
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val syncscope("singlethread") monotonic monotonic
@@ -1551,16 +1535,15 @@ define void @test93(i16* %ptr, i16 %cmp,
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
 ; PPC64LE-NEXT:    lwsync
-; PPC64LE-NEXT:    b .LBB93_2
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB93_1:
-; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
-; PPC64LE-NEXT:  .LBB93_2:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 4, 6
-; PPC64LE-NEXT:    beq 0, .LBB93_1
-; PPC64LE-NEXT:  # %bb.3:
+; PPC64LE-NEXT:    bne 0, .LBB93_3
+; PPC64LE-NEXT:  # %bb.2:
+; PPC64LE-NEXT:    sthcx. 5, 0, 3
+; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    b .LBB93_1
+; PPC64LE-NEXT:  .LBB93_3:
 ; PPC64LE-NEXT:    sthcx. 6, 0, 3
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val syncscope("singlethread") release monotonic
@@ -1572,16 +1555,15 @@ define void @test94(i16* %ptr, i16 %cmp,
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
 ; PPC64LE-NEXT:    lwsync
-; PPC64LE-NEXT:    b .LBB94_2
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB94_1:
-; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
-; PPC64LE-NEXT:  .LBB94_2:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 4, 6
-; PPC64LE-NEXT:    beq 0, .LBB94_1
-; PPC64LE-NEXT:  # %bb.3:
+; PPC64LE-NEXT:    bne 0, .LBB94_3
+; PPC64LE-NEXT:  # %bb.2:
+; PPC64LE-NEXT:    sthcx. 5, 0, 3
+; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    b .LBB94_1
+; PPC64LE-NEXT:  .LBB94_3:
 ; PPC64LE-NEXT:    sthcx. 6, 0, 3
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val syncscope("singlethread") release acquire
@@ -1706,16 +1688,15 @@ define void @test99(i16* %ptr, i16 %cmp,
 define void @test100(i32* %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE-LABEL: test100:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    b .LBB100_2
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB100_1:
-; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
-; PPC64LE-NEXT:  .LBB100_2:
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 4, 6
-; PPC64LE-NEXT:    beq 0, .LBB100_1
-; PPC64LE-NEXT:  # %bb.3:
+; PPC64LE-NEXT:    bne 0, .LBB100_3
+; PPC64LE-NEXT:  # %bb.2:
+; PPC64LE-NEXT:    stwcx. 5, 0, 3
+; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    b .LBB100_1
+; PPC64LE-NEXT:  .LBB100_3:
 ; PPC64LE-NEXT:    stwcx. 6, 0, 3
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg i32* %ptr, i32 %cmp, i32 %val syncscope("singlethread") monotonic monotonic
@@ -1768,16 +1749,15 @@ define void @test103(i32* %ptr, i32 %cmp
 ; PPC64LE-LABEL: test103:
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    lwsync
-; PPC64LE-NEXT:    b .LBB103_2
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB103_1:
-; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
-; PPC64LE-NEXT:  .LBB103_2:
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 4, 6
-; PPC64LE-NEXT:    beq 0, .LBB103_1
-; PPC64LE-NEXT:  # %bb.3:
+; PPC64LE-NEXT:    bne 0, .LBB103_3
+; PPC64LE-NEXT:  # %bb.2:
+; PPC64LE-NEXT:    stwcx. 5, 0, 3
+; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    b .LBB103_1
+; PPC64LE-NEXT:  .LBB103_3:
 ; PPC64LE-NEXT:    stwcx. 6, 0, 3
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg i32* %ptr, i32 %cmp, i32 %val syncscope("singlethread") release monotonic
@@ -1788,16 +1768,15 @@ define void @test104(i32* %ptr, i32 %cmp
 ; PPC64LE-LABEL: test104:
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    lwsync
-; PPC64LE-NEXT:    b .LBB104_2
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB104_1:
-; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
-; PPC64LE-NEXT:  .LBB104_2:
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 4, 6
-; PPC64LE-NEXT:    beq 0, .LBB104_1
-; PPC64LE-NEXT:  # %bb.3:
+; PPC64LE-NEXT:    bne 0, .LBB104_3
+; PPC64LE-NEXT:  # %bb.2:
+; PPC64LE-NEXT:    stwcx. 5, 0, 3
+; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    b .LBB104_1
+; PPC64LE-NEXT:  .LBB104_3:
 ; PPC64LE-NEXT:    stwcx. 6, 0, 3
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg i32* %ptr, i32 %cmp, i32 %val syncscope("singlethread") release acquire
@@ -1917,16 +1896,15 @@ define void @test109(i32* %ptr, i32 %cmp
 define void @test110(i64* %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE-LABEL: test110:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    b .LBB110_2
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB110_1:
-; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
-; PPC64LE-NEXT:  .LBB110_2:
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpd 4, 6
-; PPC64LE-NEXT:    beq 0, .LBB110_1
-; PPC64LE-NEXT:  # %bb.3:
+; PPC64LE-NEXT:    bne 0, .LBB110_3
+; PPC64LE-NEXT:  # %bb.2:
+; PPC64LE-NEXT:    stdcx. 5, 0, 3
+; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    b .LBB110_1
+; PPC64LE-NEXT:  .LBB110_3:
 ; PPC64LE-NEXT:    stdcx. 6, 0, 3
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg i64* %ptr, i64 %cmp, i64 %val syncscope("singlethread") monotonic monotonic
@@ -1979,16 +1957,15 @@ define void @test113(i64* %ptr, i64 %cmp
 ; PPC64LE-LABEL: test113:
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    lwsync
-; PPC64LE-NEXT:    b .LBB113_2
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB113_1:
-; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
-; PPC64LE-NEXT:  .LBB113_2:
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpd 4, 6
-; PPC64LE-NEXT:    beq 0, .LBB113_1
-; PPC64LE-NEXT:  # %bb.3:
+; PPC64LE-NEXT:    bne 0, .LBB113_3
+; PPC64LE-NEXT:  # %bb.2:
+; PPC64LE-NEXT:    stdcx. 5, 0, 3
+; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    b .LBB113_1
+; PPC64LE-NEXT:  .LBB113_3:
 ; PPC64LE-NEXT:    stdcx. 6, 0, 3
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg i64* %ptr, i64 %cmp, i64 %val syncscope("singlethread") release monotonic
@@ -1999,16 +1976,15 @@ define void @test114(i64* %ptr, i64 %cmp
 ; PPC64LE-LABEL: test114:
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    lwsync
-; PPC64LE-NEXT:    b .LBB114_2
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB114_1:
-; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
-; PPC64LE-NEXT:  .LBB114_2:
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpd 4, 6
-; PPC64LE-NEXT:    beq 0, .LBB114_1
-; PPC64LE-NEXT:  # %bb.3:
+; PPC64LE-NEXT:    bne 0, .LBB114_3
+; PPC64LE-NEXT:  # %bb.2:
+; PPC64LE-NEXT:    stdcx. 5, 0, 3
+; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    b .LBB114_1
+; PPC64LE-NEXT:  .LBB114_3:
 ; PPC64LE-NEXT:    stdcx. 6, 0, 3
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg i64* %ptr, i64 %cmp, i64 %val syncscope("singlethread") release acquire

Modified: llvm/trunk/test/CodeGen/PowerPC/cmp_elimination.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/cmp_elimination.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/cmp_elimination.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/cmp_elimination.ll Fri Jun 14 16:08:59 2019
@@ -718,13 +718,14 @@ if.end:
 define void @func28(i32 signext %a) {
 ; CHECK-LABEL: @func28
 ; CHECK: cmplwi	 [[REG1:[0-9]+]], [[REG2:[0-9]+]]
-; CHECK: .[[LABEL1:[A-Z0-9_]+]]:
+; CHECK: .[[LABEL2:[A-Z0-9_]+]]:
+; CHECK: cmpwi   [[REG1]], [[REG2]]
+; CHECK: ble     0, .[[LABEL1:[A-Z0-9_]+]]
 ; CHECK-NOT: cmp
-; CHECK: bne	 0, .[[LABEL2:[A-Z0-9_]+]]
+; CHECK: bne     0, .[[LABEL2]]
 ; CHECK: bl dummy1
-; CHECK: .[[LABEL2]]:
-; CHECK: cmpwi	 [[REG1]], [[REG2]]
-; CHECK: bgt	 0, .[[LABEL1]]
+; CHECK: b .[[LABEL2]]
+; CHECK: .[[LABEL1]]:
 ; CHECK: blr
 entry:
   br label %do.body

Modified: llvm/trunk/test/CodeGen/PowerPC/ctrloop-shortLoops.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/ctrloop-shortLoops.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/ctrloop-shortLoops.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/ctrloop-shortLoops.ll Fri Jun 14 16:08:59 2019
@@ -88,7 +88,8 @@ for.body:
 ; Function Attrs: norecurse nounwind
 define signext i32 @testTripCount2NonSmallLoop() {
 ; CHECK-LABEL: testTripCount2NonSmallLoop:
-; CHECK: bge
+; CHECK: blt
+; CHECK: beq
 ; CHECK: blr
 
 entry:

Modified: llvm/trunk/test/CodeGen/PowerPC/expand-foldable-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/expand-foldable-isel.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/expand-foldable-isel.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/expand-foldable-isel.ll Fri Jun 14 16:08:59 2019
@@ -29,13 +29,13 @@ define void @_ZN3pov6ot_insEPPNS_14ot_no
 ;
 ; CHECK-LABEL: _ZN3pov6ot_insEPPNS_14ot_node_structEPNS_15ot_block_structEPNS_12ot_id_structE:
 ; CHECK:    mr r4, r3
-; CHECK:    bc 12, 4*cr5+lt, .LBB0_3
-; CHECK:   # %bb.2:
+; CHECK:    bc 12, 4*cr5+lt, [[CASE1:.LBB[0-9_]+]]
+; CHECK:   # %bb.
 ; CHECK:    ori r29, r6, 0
-; CHECK:    b .LBB0_4
-; CHECK:  .LBB0_3:
+; CHECK:    b [[MERGE:.LBB[0-9_]+]]
+; CHECK:  [[CASE1]]:
 ; CHECK:    addi r29, r5, 0
-; CHECK:  .LBB0_4:
+; CHECK:  [[MERGE]]:
 ; CHECK:    blr
 entry:
   br label %while.cond11

Modified: llvm/trunk/test/CodeGen/PowerPC/knowCRBitSpill.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/knowCRBitSpill.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/knowCRBitSpill.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/knowCRBitSpill.ll Fri Jun 14 16:08:59 2019
@@ -86,7 +86,7 @@ define dso_local signext i32 @spillCRUNS
 ; CHECK-NOT:    mfocrf [[REG2:.*]], [[CREG]]
 ; CHECK-NOT:    rlwinm [[REG2]], [[REG2]]
 ; CHECK:        stw [[REG1]]
-; CHECK:        .LBB1_1: # %redo_first_pass
+; CHECK:        .LBB1_1:
 entry:
   %and = and i32 %p3, 128
   %tobool = icmp eq i32 %and, 0

Modified: llvm/trunk/test/CodeGen/PowerPC/licm-remat.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/licm-remat.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/licm-remat.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/licm-remat.ll Fri Jun 14 16:08:59 2019
@@ -24,8 +24,7 @@ define linkonce_odr void @ZN6snappyDecom
 ; CHECK-DAG:   addi 25, 3, _ZN6snappy8internalL8wordmaskE at toc@l
 ; CHECK-DAG:   addis 5, 2, _ZN6snappy8internalL10char_tableE at toc@ha
 ; CHECK-DAG:   addi 24, 5, _ZN6snappy8internalL10char_tableE at toc@l
-; CHECK:       b .[[LABEL1:[A-Z0-9_]+]]
-; CHECK:       .[[LABEL1]]: # %for.cond
+; CHECK:       .LBB0_2: # %for.cond
 ; CHECK-NOT:   addis {{[0-9]+}}, 2, _ZN6snappy8internalL8wordmaskE at toc@ha
 ; CHECK-NOT:   addis {{[0-9]+}}, 2, _ZN6snappy8internalL10char_tableE at toc@ha
 ; CHECK:       bctrl

Modified: llvm/trunk/test/CodeGen/SystemZ/atomicrmw-minmax-01.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/atomicrmw-minmax-01.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/atomicrmw-minmax-01.ll (original)
+++ llvm/trunk/test/CodeGen/SystemZ/atomicrmw-minmax-01.ll Fri Jun 14 16:08:59 2019
@@ -1,8 +1,8 @@
 ; Test 8-bit atomic min/max operations.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT1
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT2
+; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-block-placement | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-block-placement | FileCheck %s -check-prefix=CHECK-SHIFT1
+; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-block-placement | FileCheck %s -check-prefix=CHECK-SHIFT2
 
 ; Check signed minimum.
 ; - CHECK is for the main loop.

Modified: llvm/trunk/test/CodeGen/SystemZ/atomicrmw-minmax-02.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/atomicrmw-minmax-02.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/atomicrmw-minmax-02.ll (original)
+++ llvm/trunk/test/CodeGen/SystemZ/atomicrmw-minmax-02.ll Fri Jun 14 16:08:59 2019
@@ -1,8 +1,8 @@
 ; Test 8-bit atomic min/max operations.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT1
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT2
+; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-block-placement | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-block-placement | FileCheck %s -check-prefix=CHECK-SHIFT1
+; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-block-placement | FileCheck %s -check-prefix=CHECK-SHIFT2
 
 ; Check signed minimum.
 ; - CHECK is for the main loop.

Modified: llvm/trunk/test/CodeGen/SystemZ/loop-01.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/loop-01.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/loop-01.ll (original)
+++ llvm/trunk/test/CodeGen/SystemZ/loop-01.ll Fri Jun 14 16:08:59 2019
@@ -1,7 +1,7 @@
 ; Test loop tuning.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -disable-block-placement | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -disable-block-placement \
 ; RUN:  | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-Z13
 
 ; Test that strength reduction is applied to addresses with a scale factor,

Modified: llvm/trunk/test/CodeGen/SystemZ/loop-02.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/loop-02.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/loop-02.ll (original)
+++ llvm/trunk/test/CodeGen/SystemZ/loop-02.ll Fri Jun 14 16:08:59 2019
@@ -1,7 +1,7 @@
 ; Test BRCTH.
 
 ; RUN: llc < %s -verify-machineinstrs -mtriple=s390x-linux-gnu -mcpu=z196 \
-; RUN:   -no-integrated-as | FileCheck %s
+; RUN:   -no-integrated-as -disable-block-placement | FileCheck %s
 
 ; Test a loop that should be converted into dbr form and then use BRCTH.
 define void @f2(i32 *%src, i32 *%dest) {

Modified: llvm/trunk/test/CodeGen/SystemZ/swifterror.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/swifterror.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/swifterror.ll (original)
+++ llvm/trunk/test/CodeGen/SystemZ/swifterror.ll Fri Jun 14 16:08:59 2019
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=s390x-linux-gnu| FileCheck %s
-; RUN: llc < %s -O0 -mtriple=s390x-linux-gnu | FileCheck --check-prefix=CHECK-O0 %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-block-placement | FileCheck %s
+; RUN: llc < %s -O0 -mtriple=s390x-linux-gnu -disable-block-placement | FileCheck --check-prefix=CHECK-O0 %s
 
 declare i8* @malloc(i64)
 declare void @free(i8*)

Modified: llvm/trunk/test/CodeGen/Thumb/consthoist-physical-addr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb/consthoist-physical-addr.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb/consthoist-physical-addr.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb/consthoist-physical-addr.ll Fri Jun 14 16:08:59 2019
@@ -10,8 +10,9 @@ define i32 @C(i32 %x, i32* nocapture %y)
 ; CHECK-NEXT:    push {r4, r5, r7, lr}
 ; CHECK-NEXT:    movs r2, #0
 ; CHECK-NEXT:    ldr r3, .LCPI0_0
-; CHECK-NEXT:    b .LBB0_4
 ; CHECK-NEXT:  .LBB0_1:
+; CHECK-NEXT:    cmp r2, #128
+; CHECK-NEXT:    beq .LBB0_5
 ; CHECK-NEXT:    movs r4, #0
 ; CHECK-NEXT:    str r4, [r3, #8]
 ; CHECK-NEXT:    lsls r4, r2, #2
@@ -20,16 +21,15 @@ define i32 @C(i32 %x, i32* nocapture %y)
 ; CHECK-NEXT:    movs r5, #1
 ; CHECK-NEXT:    str r5, [r3, #12]
 ; CHECK-NEXT:    isb sy
-; CHECK-NEXT:  .LBB0_2:
+; CHECK-NEXT:  .LBB0_3:
 ; CHECK-NEXT:    ldr r5, [r3, #12]
 ; CHECK-NEXT:    cmp r5, #0
-; CHECK-NEXT:    bne .LBB0_2
+; CHECK-NEXT:    bne .LBB0_3
 ; CHECK-NEXT:    ldr r5, [r3, #4]
 ; CHECK-NEXT:    str r5, [r1, r4]
 ; CHECK-NEXT:    adds r2, r2, #1
-; CHECK-NEXT:  .LBB0_4:
-; CHECK-NEXT:    cmp r2, #128
-; CHECK-NEXT:    bne .LBB0_1
+; CHECK-NEXT:    b .LBB0_1
+; CHECK-NEXT:  .LBB0_5:
 ; CHECK-NEXT:    movs r0, #0
 ; CHECK-NEXT:    pop {r4, r5, r7, pc}
 ; CHECK-NEXT:    .p2align 2

Modified: llvm/trunk/test/CodeGen/X86/block-placement.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/block-placement.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/block-placement.ll (original)
+++ llvm/trunk/test/CodeGen/X86/block-placement.ll Fri Jun 14 16:08:59 2019
@@ -82,14 +82,14 @@ define i32 @test_loop_cold_blocks(i32 %i
 ; Check that we sink cold loop blocks after the hot loop body.
 ; CHECK-LABEL: test_loop_cold_blocks:
 ; CHECK: %entry
-; CHECK-NOT: .p2align
-; CHECK: %unlikely1
-; CHECK-NOT: .p2align
-; CHECK: %unlikely2
 ; CHECK: .p2align
 ; CHECK: %body1
 ; CHECK: %body2
 ; CHECK: %body3
+; CHECK-NOT: .p2align
+; CHECK: %unlikely1
+; CHECK-NOT: .p2align
+; CHECK: %unlikely2
 ; CHECK: %exit
 
 entry:
@@ -125,7 +125,7 @@ exit:
   ret i32 %sum
 }
 
-!0 = !{!"branch_weights", i32 4, i32 64}
+!0 = !{!"branch_weights", i32 1, i32 64}
 
 define i32 @test_loop_early_exits(i32 %i, i32* %a) {
 ; Check that we sink early exit blocks out of loop bodies.
@@ -189,8 +189,8 @@ define i32 @test_loop_rotate(i32 %i, i32
 ; loop, eliminating unconditional branches to the top.
 ; CHECK-LABEL: test_loop_rotate:
 ; CHECK: %entry
-; CHECK: %body1
 ; CHECK: %body0
+; CHECK: %body1
 ; CHECK: %exit
 
 entry:
@@ -957,16 +957,15 @@ define void @benchmark_heapsort(i32 %n,
 ; CHECK: %if.else
 ; CHECK: %if.end10
 ; Second rotated loop top
-; CHECK: .p2align
-; CHECK: %if.then24
 ; CHECK: %while.cond.outer
 ; Third rotated loop top
 ; CHECK: .p2align
+; CHECK: %if.end20
 ; CHECK: %while.cond
 ; CHECK: %while.body
 ; CHECK: %land.lhs.true
 ; CHECK: %if.then19
-; CHECK: %if.end20
+; CHECK: %if.then24
 ; CHECK: %if.then8
 ; CHECK: ret
 
@@ -1546,8 +1545,8 @@ define i32 @not_rotate_if_extra_branch_r
 ; CHECK-LABEL: not_rotate_if_extra_branch_regression
 ; CHECK: %.entry
 ; CHECK: %.first_backedge
-; CHECK: %.slow
 ; CHECK: %.second_header
+; CHECK: %.slow
 .entry:
   %sum.0 = shl nsw i32 %count, 1
   br label %.first_header

Modified: llvm/trunk/test/CodeGen/X86/code_placement.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/code_placement.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/code_placement.ll (original)
+++ llvm/trunk/test/CodeGen/X86/code_placement.ll Fri Jun 14 16:08:59 2019
@@ -4,6 +4,11 @@
 @Te1 = external global [256 x i32]		; <[256 x i32]*> [#uses=4]
 @Te3 = external global [256 x i32]		; <[256 x i32]*> [#uses=2]
 
+; CHECK: %entry
+; CHECK: %bb
+; CHECK: %bb1
+; CHECK: %bb2
+
 define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r) nounwind ssp {
 entry:
 	%0 = load i32, i32* %rk, align 4		; <i32> [#uses=1]
@@ -12,8 +17,6 @@ entry:
 	%tmp15 = add i32 %r, -1		; <i32> [#uses=1]
 	%tmp.16 = zext i32 %tmp15 to i64		; <i64> [#uses=2]
 	br label %bb
-; CHECK: jmp
-; CHECK-NEXT: align
 
 bb:		; preds = %bb1, %entry
 	%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %bb1 ]		; <i64> [#uses=3]

Modified: llvm/trunk/test/CodeGen/X86/code_placement_cold_loop_blocks.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/code_placement_cold_loop_blocks.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/code_placement_cold_loop_blocks.ll (original)
+++ llvm/trunk/test/CodeGen/X86/code_placement_cold_loop_blocks.ll Fri Jun 14 16:08:59 2019
@@ -44,8 +44,8 @@ define void @nested_loop_0(i1 %flag) !pr
 ; CHECK-LABEL: nested_loop_0:
 ; CHECK: callq c
 ; CHECK: callq d
-; CHECK: callq e
 ; CHECK: callq b
+; CHECK: callq e
 ; CHECK: callq f
 
 entry:

Modified: llvm/trunk/test/CodeGen/X86/code_placement_ignore_succ_in_inner_loop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/code_placement_ignore_succ_in_inner_loop.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/code_placement_ignore_succ_in_inner_loop.ll (original)
+++ llvm/trunk/test/CodeGen/X86/code_placement_ignore_succ_in_inner_loop.ll Fri Jun 14 16:08:59 2019
@@ -1,13 +1,12 @@
 ; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux < %s | FileCheck %s
 
 define void @foo() {
-; Test that when determining the edge probability from a node in an inner loop
-; to a node in an outer loop, the weights on edges in the inner loop should be
-; ignored if we are building the chain for the outer loop.
+; After moving the latch to the top of loop, there is no fall through from the
+; latch to outer loop.
 ;
 ; CHECK-LABEL: foo:
-; CHECK: callq c
 ; CHECK: callq b
+; CHECK: callq c
 
 entry:
   %call = call zeroext i1 @a()

Modified: llvm/trunk/test/CodeGen/X86/code_placement_loop_rotation2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/code_placement_loop_rotation2.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/code_placement_loop_rotation2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/code_placement_loop_rotation2.ll Fri Jun 14 16:08:59 2019
@@ -5,13 +5,13 @@ define void @foo() {
 ; Test a nested loop case when profile data is not available.
 ;
 ; CHECK-LABEL: foo:
+; CHECK: callq g
+; CHECK: callq h
 ; CHECK: callq b
-; CHECK: callq c
-; CHECK: callq d
 ; CHECK: callq e
 ; CHECK: callq f
-; CHECK: callq g
-; CHECK: callq h
+; CHECK: callq c
+; CHECK: callq d
 
 entry:
   br label %header
@@ -59,13 +59,13 @@ define void @bar() !prof !1 {
 ; Test a nested loop case when profile data is available.
 ;
 ; CHECK-PROFILE-LABEL: bar:
+; CHECK-PROFILE: callq h
+; CHECK-PROFILE: callq b
+; CHECK-PROFILE: callq g
 ; CHECK-PROFILE: callq e
 ; CHECK-PROFILE: callq f
 ; CHECK-PROFILE: callq c
 ; CHECK-PROFILE: callq d
-; CHECK-PROFILE: callq h
-; CHECK-PROFILE: callq b
-; CHECK-PROFILE: callq g
 
 entry:
   br label %header

Modified: llvm/trunk/test/CodeGen/X86/code_placement_no_header_change.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/code_placement_no_header_change.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/code_placement_no_header_change.ll (original)
+++ llvm/trunk/test/CodeGen/X86/code_placement_no_header_change.ll Fri Jun 14 16:08:59 2019
@@ -7,9 +7,9 @@ define i32 @bar(i32 %count) {
 ; Later backedge1 and backedge2 is rotated before loop header.
 ; CHECK-LABEL: bar
 ; CHECK: %.entry
+; CHECK: %.header
 ; CHECK: %.backedge1
 ; CHECK: %.backedge2
-; CHECK: %.header
 ; CHECK: %.exit
 .entry:
   %c = shl nsw i32 %count, 2

Modified: llvm/trunk/test/CodeGen/X86/conditional-tailcall.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/conditional-tailcall.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/conditional-tailcall.ll (original)
+++ llvm/trunk/test/CodeGen/X86/conditional-tailcall.ll Fri Jun 14 16:08:59 2019
@@ -258,9 +258,12 @@ define zeroext i1 @pr31257(%"class.std::
 ; CHECK32-NEXT:    .cfi_adjust_cfa_offset -4
 ; CHECK32-NEXT:    xorl %edi, %edi # encoding: [0x31,0xff]
 ; CHECK32-NEXT:    incl %edi # encoding: [0x47]
-; CHECK32-NEXT:    jmp .LBB3_1 # encoding: [0xeb,A]
-; CHECK32-NEXT:    # fixup A - offset: 1, value: .LBB3_1-1, kind: FK_PCRel_1
-; CHECK32-NEXT:  .LBB3_2: # %for.body
+; CHECK32-NEXT:  .LBB3_1: # %for.cond
+; CHECK32-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK32-NEXT:    testl %edx, %edx # encoding: [0x85,0xd2]
+; CHECK32-NEXT:    je .LBB3_13 # encoding: [0x74,A]
+; CHECK32-NEXT:    # fixup A - offset: 1, value: .LBB3_13-1, kind: FK_PCRel_1
+; CHECK32-NEXT:  # %bb.2: # %for.body
 ; CHECK32-NEXT:    # in Loop: Header=BB3_1 Depth=1
 ; CHECK32-NEXT:    cmpl $2, %ebx # encoding: [0x83,0xfb,0x02]
 ; CHECK32-NEXT:    je .LBB3_11 # encoding: [0x74,A]
@@ -314,12 +317,9 @@ define zeroext i1 @pr31257(%"class.std::
 ; CHECK32-NEXT:    # in Loop: Header=BB3_1 Depth=1
 ; CHECK32-NEXT:    incl %eax # encoding: [0x40]
 ; CHECK32-NEXT:    decl %edx # encoding: [0x4a]
-; CHECK32-NEXT:  .LBB3_1: # %for.cond
-; CHECK32-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK32-NEXT:    testl %edx, %edx # encoding: [0x85,0xd2]
-; CHECK32-NEXT:    jne .LBB3_2 # encoding: [0x75,A]
-; CHECK32-NEXT:    # fixup A - offset: 1, value: .LBB3_2-1, kind: FK_PCRel_1
-; CHECK32-NEXT:  # %bb.13:
+; CHECK32-NEXT:    jmp .LBB3_1 # encoding: [0xeb,A]
+; CHECK32-NEXT:    # fixup A - offset: 1, value: .LBB3_1-1, kind: FK_PCRel_1
+; CHECK32-NEXT:  .LBB3_13:
 ; CHECK32-NEXT:    cmpl $2, %ebx # encoding: [0x83,0xfb,0x02]
 ; CHECK32-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
 ; CHECK32-NEXT:    jmp .LBB3_14 # encoding: [0xeb,A]
@@ -369,56 +369,59 @@ define zeroext i1 @pr31257(%"class.std::
 ; CHECK64-NEXT:    .cfi_adjust_cfa_offset 8
 ; CHECK64-NEXT:    popq %r8 # encoding: [0x41,0x58]
 ; CHECK64-NEXT:    .cfi_adjust_cfa_offset -8
-; CHECK64-NEXT:    jmp .LBB3_11 # encoding: [0xeb,A]
-; CHECK64-NEXT:    # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1
-; CHECK64-NEXT:  .LBB3_1: # %for.body
-; CHECK64-NEXT:    # in Loop: Header=BB3_11 Depth=1
-; CHECK64-NEXT:    cmpl $2, %ecx # encoding: [0x83,0xf9,0x02]
-; CHECK64-NEXT:    je .LBB3_9 # encoding: [0x74,A]
-; CHECK64-NEXT:    # fixup A - offset: 1, value: .LBB3_9-1, kind: FK_PCRel_1
+; CHECK64-NEXT:  .LBB3_1: # %for.cond
+; CHECK64-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK64-NEXT:    testq %rax, %rax # encoding: [0x48,0x85,0xc0]
+; CHECK64-NEXT:    je .LBB3_12 # encoding: [0x74,A]
+; CHECK64-NEXT:    # fixup A - offset: 1, value: .LBB3_12-1, kind: FK_PCRel_1
 ; CHECK64-NEXT:  # %bb.2: # %for.body
-; CHECK64-NEXT:    # in Loop: Header=BB3_11 Depth=1
-; CHECK64-NEXT:    cmpl $1, %ecx # encoding: [0x83,0xf9,0x01]
-; CHECK64-NEXT:    je .LBB3_7 # encoding: [0x74,A]
-; CHECK64-NEXT:    # fixup A - offset: 1, value: .LBB3_7-1, kind: FK_PCRel_1
+; CHECK64-NEXT:    # in Loop: Header=BB3_1 Depth=1
+; CHECK64-NEXT:    cmpl $2, %ecx # encoding: [0x83,0xf9,0x02]
+; CHECK64-NEXT:    je .LBB3_10 # encoding: [0x74,A]
+; CHECK64-NEXT:    # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1
 ; CHECK64-NEXT:  # %bb.3: # %for.body
-; CHECK64-NEXT:    # in Loop: Header=BB3_11 Depth=1
+; CHECK64-NEXT:    # in Loop: Header=BB3_1 Depth=1
+; CHECK64-NEXT:    cmpl $1, %ecx # encoding: [0x83,0xf9,0x01]
+; CHECK64-NEXT:    je .LBB3_8 # encoding: [0x74,A]
+; CHECK64-NEXT:    # fixup A - offset: 1, value: .LBB3_8-1, kind: FK_PCRel_1
+; CHECK64-NEXT:  # %bb.4: # %for.body
+; CHECK64-NEXT:    # in Loop: Header=BB3_1 Depth=1
 ; CHECK64-NEXT:    testl %ecx, %ecx # encoding: [0x85,0xc9]
-; CHECK64-NEXT:    jne .LBB3_10 # encoding: [0x75,A]
-; CHECK64-NEXT:    # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1
-; CHECK64-NEXT:  # %bb.4: # %sw.bb
-; CHECK64-NEXT:    # in Loop: Header=BB3_11 Depth=1
+; CHECK64-NEXT:    jne .LBB3_11 # encoding: [0x75,A]
+; CHECK64-NEXT:    # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1
+; CHECK64-NEXT:  # %bb.5: # %sw.bb
+; CHECK64-NEXT:    # in Loop: Header=BB3_1 Depth=1
 ; CHECK64-NEXT:    movzbl (%rdi), %edx # encoding: [0x0f,0xb6,0x17]
 ; CHECK64-NEXT:    cmpl $43, %edx # encoding: [0x83,0xfa,0x2b]
 ; CHECK64-NEXT:    movl %r8d, %ecx # encoding: [0x44,0x89,0xc1]
-; CHECK64-NEXT:    je .LBB3_10 # encoding: [0x74,A]
-; CHECK64-NEXT:    # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1
-; CHECK64-NEXT:  # %bb.5: # %sw.bb
-; CHECK64-NEXT:    # in Loop: Header=BB3_11 Depth=1
+; CHECK64-NEXT:    je .LBB3_11 # encoding: [0x74,A]
+; CHECK64-NEXT:    # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1
+; CHECK64-NEXT:  # %bb.6: # %sw.bb
+; CHECK64-NEXT:    # in Loop: Header=BB3_1 Depth=1
 ; CHECK64-NEXT:    cmpb $45, %dl # encoding: [0x80,0xfa,0x2d]
 ; CHECK64-NEXT:    movl %r8d, %ecx # encoding: [0x44,0x89,0xc1]
-; CHECK64-NEXT:    je .LBB3_10 # encoding: [0x74,A]
-; CHECK64-NEXT:    # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1
-; CHECK64-NEXT:  # %bb.6: # %if.else
-; CHECK64-NEXT:    # in Loop: Header=BB3_11 Depth=1
+; CHECK64-NEXT:    je .LBB3_11 # encoding: [0x74,A]
+; CHECK64-NEXT:    # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1
+; CHECK64-NEXT:  # %bb.7: # %if.else
+; CHECK64-NEXT:    # in Loop: Header=BB3_1 Depth=1
 ; CHECK64-NEXT:    addl $-48, %edx # encoding: [0x83,0xc2,0xd0]
 ; CHECK64-NEXT:    cmpl $10, %edx # encoding: [0x83,0xfa,0x0a]
-; CHECK64-NEXT:    jmp .LBB3_8 # encoding: [0xeb,A]
-; CHECK64-NEXT:    # fixup A - offset: 1, value: .LBB3_8-1, kind: FK_PCRel_1
-; CHECK64-NEXT:  .LBB3_7: # %sw.bb14
-; CHECK64-NEXT:    # in Loop: Header=BB3_11 Depth=1
+; CHECK64-NEXT:    jmp .LBB3_9 # encoding: [0xeb,A]
+; CHECK64-NEXT:    # fixup A - offset: 1, value: .LBB3_9-1, kind: FK_PCRel_1
+; CHECK64-NEXT:  .LBB3_8: # %sw.bb14
+; CHECK64-NEXT:    # in Loop: Header=BB3_1 Depth=1
 ; CHECK64-NEXT:    movzbl (%rdi), %ecx # encoding: [0x0f,0xb6,0x0f]
 ; CHECK64-NEXT:    addl $-48, %ecx # encoding: [0x83,0xc1,0xd0]
 ; CHECK64-NEXT:    cmpl $10, %ecx # encoding: [0x83,0xf9,0x0a]
-; CHECK64-NEXT:  .LBB3_8: # %if.else
-; CHECK64-NEXT:    # in Loop: Header=BB3_11 Depth=1
+; CHECK64-NEXT:  .LBB3_9: # %if.else
+; CHECK64-NEXT:    # in Loop: Header=BB3_1 Depth=1
 ; CHECK64-NEXT:    movl %r9d, %ecx # encoding: [0x44,0x89,0xc9]
-; CHECK64-NEXT:    jb .LBB3_10 # encoding: [0x72,A]
-; CHECK64-NEXT:    # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1
+; CHECK64-NEXT:    jb .LBB3_11 # encoding: [0x72,A]
+; CHECK64-NEXT:    # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1
 ; CHECK64-NEXT:    jmp .LBB3_13 # encoding: [0xeb,A]
 ; CHECK64-NEXT:    # fixup A - offset: 1, value: .LBB3_13-1, kind: FK_PCRel_1
-; CHECK64-NEXT:  .LBB3_9: # %sw.bb22
-; CHECK64-NEXT:    # in Loop: Header=BB3_11 Depth=1
+; CHECK64-NEXT:  .LBB3_10: # %sw.bb22
+; CHECK64-NEXT:    # in Loop: Header=BB3_1 Depth=1
 ; CHECK64-NEXT:    movzbl (%rdi), %ecx # encoding: [0x0f,0xb6,0x0f]
 ; CHECK64-NEXT:    addl $-48, %ecx # encoding: [0x83,0xc1,0xd0]
 ; CHECK64-NEXT:    cmpl $10, %ecx # encoding: [0x83,0xf9,0x0a]
@@ -426,16 +429,13 @@ define zeroext i1 @pr31257(%"class.std::
 ; CHECK64-NEXT:    jae _Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEES3_ # TAILCALL
 ; CHECK64-NEXT:    # encoding: [0x73,A]
 ; CHECK64-NEXT:    # fixup A - offset: 1, value: _Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEES3_-1, kind: FK_PCRel_1
-; CHECK64-NEXT:  .LBB3_10: # %for.inc
-; CHECK64-NEXT:    # in Loop: Header=BB3_11 Depth=1
+; CHECK64-NEXT:  .LBB3_11: # %for.inc
+; CHECK64-NEXT:    # in Loop: Header=BB3_1 Depth=1
 ; CHECK64-NEXT:    incq %rdi # encoding: [0x48,0xff,0xc7]
 ; CHECK64-NEXT:    decq %rax # encoding: [0x48,0xff,0xc8]
-; CHECK64-NEXT:  .LBB3_11: # %for.cond
-; CHECK64-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK64-NEXT:    testq %rax, %rax # encoding: [0x48,0x85,0xc0]
-; CHECK64-NEXT:    jne .LBB3_1 # encoding: [0x75,A]
+; CHECK64-NEXT:    jmp .LBB3_1 # encoding: [0xeb,A]
 ; CHECK64-NEXT:    # fixup A - offset: 1, value: .LBB3_1-1, kind: FK_PCRel_1
-; CHECK64-NEXT:  # %bb.12:
+; CHECK64-NEXT:  .LBB3_12:
 ; CHECK64-NEXT:    cmpl $2, %ecx # encoding: [0x83,0xf9,0x02]
 ; CHECK64-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
 ; CHECK64-NEXT:    # kill: def $al killed $al killed $eax
@@ -451,51 +451,54 @@ define zeroext i1 @pr31257(%"class.std::
 ; WIN64-NEXT:    movq -24(%rcx), %r8 # encoding: [0x4c,0x8b,0x41,0xe8]
 ; WIN64-NEXT:    leaq (%rcx,%r8), %rdx # encoding: [0x4a,0x8d,0x14,0x01]
 ; WIN64-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
-; WIN64-NEXT:    jmp .LBB3_10 # encoding: [0xeb,A]
-; WIN64-NEXT:    # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1
-; WIN64-NEXT:  .LBB3_1: # %for.body
-; WIN64-NEXT:    # in Loop: Header=BB3_10 Depth=1
-; WIN64-NEXT:    cmpl $2, %eax # encoding: [0x83,0xf8,0x02]
-; WIN64-NEXT:    je .LBB3_8 # encoding: [0x74,A]
-; WIN64-NEXT:    # fixup A - offset: 1, value: .LBB3_8-1, kind: FK_PCRel_1
+; WIN64-NEXT:  .LBB3_1: # %for.cond
+; WIN64-NEXT:    # =>This Inner Loop Header: Depth=1
+; WIN64-NEXT:    testq %r8, %r8 # encoding: [0x4d,0x85,0xc0]
+; WIN64-NEXT:    je .LBB3_11 # encoding: [0x74,A]
+; WIN64-NEXT:    # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1
 ; WIN64-NEXT:  # %bb.2: # %for.body
-; WIN64-NEXT:    # in Loop: Header=BB3_10 Depth=1
-; WIN64-NEXT:    cmpl $1, %eax # encoding: [0x83,0xf8,0x01]
-; WIN64-NEXT:    je .LBB3_6 # encoding: [0x74,A]
-; WIN64-NEXT:    # fixup A - offset: 1, value: .LBB3_6-1, kind: FK_PCRel_1
+; WIN64-NEXT:    # in Loop: Header=BB3_1 Depth=1
+; WIN64-NEXT:    cmpl $2, %eax # encoding: [0x83,0xf8,0x02]
+; WIN64-NEXT:    je .LBB3_9 # encoding: [0x74,A]
+; WIN64-NEXT:    # fixup A - offset: 1, value: .LBB3_9-1, kind: FK_PCRel_1
 ; WIN64-NEXT:  # %bb.3: # %for.body
-; WIN64-NEXT:    # in Loop: Header=BB3_10 Depth=1
+; WIN64-NEXT:    # in Loop: Header=BB3_1 Depth=1
+; WIN64-NEXT:    cmpl $1, %eax # encoding: [0x83,0xf8,0x01]
+; WIN64-NEXT:    je .LBB3_7 # encoding: [0x74,A]
+; WIN64-NEXT:    # fixup A - offset: 1, value: .LBB3_7-1, kind: FK_PCRel_1
+; WIN64-NEXT:  # %bb.4: # %for.body
+; WIN64-NEXT:    # in Loop: Header=BB3_1 Depth=1
 ; WIN64-NEXT:    testl %eax, %eax # encoding: [0x85,0xc0]
-; WIN64-NEXT:    jne .LBB3_9 # encoding: [0x75,A]
-; WIN64-NEXT:    # fixup A - offset: 1, value: .LBB3_9-1, kind: FK_PCRel_1
-; WIN64-NEXT:  # %bb.4: # %sw.bb
-; WIN64-NEXT:    # in Loop: Header=BB3_10 Depth=1
+; WIN64-NEXT:    jne .LBB3_10 # encoding: [0x75,A]
+; WIN64-NEXT:    # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1
+; WIN64-NEXT:  # %bb.5: # %sw.bb
+; WIN64-NEXT:    # in Loop: Header=BB3_1 Depth=1
 ; WIN64-NEXT:    movzbl (%rcx), %r9d # encoding: [0x44,0x0f,0xb6,0x09]
 ; WIN64-NEXT:    cmpl $43, %r9d # encoding: [0x41,0x83,0xf9,0x2b]
 ; WIN64-NEXT:    movl $1, %eax # encoding: [0xb8,0x01,0x00,0x00,0x00]
-; WIN64-NEXT:    je .LBB3_9 # encoding: [0x74,A]
-; WIN64-NEXT:    # fixup A - offset: 1, value: .LBB3_9-1, kind: FK_PCRel_1
-; WIN64-NEXT:  # %bb.5: # %sw.bb
-; WIN64-NEXT:    # in Loop: Header=BB3_10 Depth=1
+; WIN64-NEXT:    je .LBB3_10 # encoding: [0x74,A]
+; WIN64-NEXT:    # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1
+; WIN64-NEXT:  # %bb.6: # %sw.bb
+; WIN64-NEXT:    # in Loop: Header=BB3_1 Depth=1
 ; WIN64-NEXT:    cmpb $45, %r9b # encoding: [0x41,0x80,0xf9,0x2d]
-; WIN64-NEXT:    je .LBB3_9 # encoding: [0x74,A]
-; WIN64-NEXT:    # fixup A - offset: 1, value: .LBB3_9-1, kind: FK_PCRel_1
-; WIN64-NEXT:    jmp .LBB3_7 # encoding: [0xeb,A]
-; WIN64-NEXT:    # fixup A - offset: 1, value: .LBB3_7-1, kind: FK_PCRel_1
-; WIN64-NEXT:  .LBB3_6: # %sw.bb14
-; WIN64-NEXT:    # in Loop: Header=BB3_10 Depth=1
+; WIN64-NEXT:    je .LBB3_10 # encoding: [0x74,A]
+; WIN64-NEXT:    # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1
+; WIN64-NEXT:    jmp .LBB3_8 # encoding: [0xeb,A]
+; WIN64-NEXT:    # fixup A - offset: 1, value: .LBB3_8-1, kind: FK_PCRel_1
+; WIN64-NEXT:  .LBB3_7: # %sw.bb14
+; WIN64-NEXT:    # in Loop: Header=BB3_1 Depth=1
 ; WIN64-NEXT:    movzbl (%rcx), %r9d # encoding: [0x44,0x0f,0xb6,0x09]
-; WIN64-NEXT:  .LBB3_7: # %if.else
-; WIN64-NEXT:    # in Loop: Header=BB3_10 Depth=1
+; WIN64-NEXT:  .LBB3_8: # %if.else
+; WIN64-NEXT:    # in Loop: Header=BB3_1 Depth=1
 ; WIN64-NEXT:    addl $-48, %r9d # encoding: [0x41,0x83,0xc1,0xd0]
 ; WIN64-NEXT:    movl $2, %eax # encoding: [0xb8,0x02,0x00,0x00,0x00]
 ; WIN64-NEXT:    cmpl $10, %r9d # encoding: [0x41,0x83,0xf9,0x0a]
-; WIN64-NEXT:    jb .LBB3_9 # encoding: [0x72,A]
-; WIN64-NEXT:    # fixup A - offset: 1, value: .LBB3_9-1, kind: FK_PCRel_1
+; WIN64-NEXT:    jb .LBB3_10 # encoding: [0x72,A]
+; WIN64-NEXT:    # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1
 ; WIN64-NEXT:    jmp .LBB3_12 # encoding: [0xeb,A]
 ; WIN64-NEXT:    # fixup A - offset: 1, value: .LBB3_12-1, kind: FK_PCRel_1
-; WIN64-NEXT:  .LBB3_8: # %sw.bb22
-; WIN64-NEXT:    # in Loop: Header=BB3_10 Depth=1
+; WIN64-NEXT:  .LBB3_9: # %sw.bb22
+; WIN64-NEXT:    # in Loop: Header=BB3_1 Depth=1
 ; WIN64-NEXT:    movzbl (%rcx), %r9d # encoding: [0x44,0x0f,0xb6,0x09]
 ; WIN64-NEXT:    addl $-48, %r9d # encoding: [0x41,0x83,0xc1,0xd0]
 ; WIN64-NEXT:    movl $2, %eax # encoding: [0xb8,0x02,0x00,0x00,0x00]
@@ -503,16 +506,13 @@ define zeroext i1 @pr31257(%"class.std::
 ; WIN64-NEXT:    jae _Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEES3_ # TAILCALL
 ; WIN64-NEXT:    # encoding: [0x73,A]
 ; WIN64-NEXT:    # fixup A - offset: 1, value: _Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEES3_-1, kind: FK_PCRel_1
-; WIN64-NEXT:  .LBB3_9: # %for.inc
-; WIN64-NEXT:    # in Loop: Header=BB3_10 Depth=1
+; WIN64-NEXT:  .LBB3_10: # %for.inc
+; WIN64-NEXT:    # in Loop: Header=BB3_1 Depth=1
 ; WIN64-NEXT:    incq %rcx # encoding: [0x48,0xff,0xc1]
 ; WIN64-NEXT:    decq %r8 # encoding: [0x49,0xff,0xc8]
-; WIN64-NEXT:  .LBB3_10: # %for.cond
-; WIN64-NEXT:    # =>This Inner Loop Header: Depth=1
-; WIN64-NEXT:    testq %r8, %r8 # encoding: [0x4d,0x85,0xc0]
-; WIN64-NEXT:    jne .LBB3_1 # encoding: [0x75,A]
+; WIN64-NEXT:    jmp .LBB3_1 # encoding: [0xeb,A]
 ; WIN64-NEXT:    # fixup A - offset: 1, value: .LBB3_1-1, kind: FK_PCRel_1
-; WIN64-NEXT:  # %bb.11:
+; WIN64-NEXT:  .LBB3_11:
 ; WIN64-NEXT:    cmpl $2, %eax # encoding: [0x83,0xf8,0x02]
 ; WIN64-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
 ; WIN64-NEXT:    # kill: def $al killed $al killed $eax

Modified: llvm/trunk/test/CodeGen/X86/loop-blocks.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/loop-blocks.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/loop-blocks.ll (original)
+++ llvm/trunk/test/CodeGen/X86/loop-blocks.ll Fri Jun 14 16:08:59 2019
@@ -7,12 +7,14 @@
 ; order to avoid a branch within the loop.
 
 ; CHECK-LABEL: simple:
-;      CHECK:   jmp   .LBB0_1
-; CHECK-NEXT:   align
-; CHECK-NEXT: .LBB0_2:
-; CHECK-NEXT:   callq loop_latch
+;      CHECK:   align
 ; CHECK-NEXT: .LBB0_1:
 ; CHECK-NEXT:   callq loop_header
+;      CHECK:   js .LBB0_3
+; CHECK-NEXT:   callq loop_latch
+; CHECK-NEXT:   jmp .LBB0_1
+; CHECK-NEXT: .LBB0_3:
+; CHECK-NEXT:   callq exit
 
 define void @simple() nounwind {
 entry:
@@ -75,17 +77,21 @@ exit:
 ; CHECK-LABEL: yet_more_involved:
 ;      CHECK:   jmp .LBB2_1
 ; CHECK-NEXT:   align
-; CHECK-NEXT: .LBB2_5:
-; CHECK-NEXT:   callq block_a_true_func
-; CHECK-NEXT:   callq block_a_merge_func
-; CHECK-NEXT: .LBB2_1:
+
+;      CHECK: .LBB2_1:
 ; CHECK-NEXT:   callq body
-;
-; LBB2_4
-;      CHECK:   callq bar99
+; CHECK-NEXT:   callq get
+; CHECK-NEXT:   cmpl $2, %eax
+; CHECK-NEXT:   jge .LBB2_2
+; CHECK-NEXT:   callq bar99
 ; CHECK-NEXT:   callq get
 ; CHECK-NEXT:   cmpl $2999, %eax
-; CHECK-NEXT:   jle .LBB2_5
+; CHECK-NEXT:   jg .LBB2_6
+; CHECK-NEXT:   callq block_a_true_func
+; CHECK-NEXT:   callq block_a_merge_func
+; CHECK-NEXT:   jmp .LBB2_1
+; CHECK-NEXT:   align
+; CHECK-NEXT: .LBB2_6:
 ; CHECK-NEXT:   callq block_a_false_func
 ; CHECK-NEXT:   callq block_a_merge_func
 ; CHECK-NEXT:   jmp .LBB2_1
@@ -201,12 +207,12 @@ block102:
 }
 
 ; CHECK-LABEL: check_minsize:
-;      CHECK:   jmp   .LBB4_1
 ; CHECK-NOT:   align
-; CHECK-NEXT: .LBB4_2:
-; CHECK-NEXT:   callq loop_latch
-; CHECK-NEXT: .LBB4_1:
+; CHECK:      .LBB4_1:
 ; CHECK-NEXT:   callq loop_header
+; CHECK:        callq loop_latch
+; CHECK:      .LBB4_3:
+; CHECK:        callq exit
 
 
 define void @check_minsize() minsize nounwind {

Added: llvm/trunk/test/CodeGen/X86/loop-rotate.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/loop-rotate.ll?rev=363471&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/loop-rotate.ll (added)
+++ llvm/trunk/test/CodeGen/X86/loop-rotate.ll Fri Jun 14 16:08:59 2019
@@ -0,0 +1,120 @@
+; RUN: llc -mtriple=i686-linux < %s | FileCheck %s
+
+; Don't rotate the loop if the number of fall through to exit is not larger
+; than the number of fall through to header.
+define void @no_rotate() {
+; CHECK-LABEL: no_rotate
+; CHECK: %entry
+; CHECK: %header
+; CHECK: %middle
+; CHECK: %latch1
+; CHECK: %latch2
+; CHECK: %end
+entry:
+  br label %header
+
+header:
+  %val1 = call i1 @foo()
+  br i1 %val1, label %middle, label %end
+
+middle:
+  %val2 = call i1 @foo()
+  br i1 %val2, label %latch1, label %end
+
+latch1:
+  %val3 = call i1 @foo()
+  br i1 %val3, label %latch2, label %header
+
+latch2:
+  %val4 = call i1 @foo()
+  br label %header
+
+end:
+  ret void
+}
+
+define void @do_rotate() {
+; CHECK-LABEL: do_rotate
+; CHECK: %entry
+; CHECK: %then
+; CHECK: %else
+; CHECK: %latch1
+; CHECK: %latch2
+; CHECK: %header
+; CHECK: %end
+entry:
+  %val0 = call i1 @foo()
+  br i1 %val0, label %then, label %else
+
+then:
+  call void @a()
+  br label %header
+
+else:
+  call void @b()
+  br label %header
+
+header:
+  %val1 = call i1 @foo()
+  br i1 %val1, label %latch1, label %end
+
+latch1:
+  %val3 = call i1 @foo()
+  br i1 %val3, label %latch2, label %header
+
+latch2:
+  %val4 = call i1 @foo()
+  br label %header
+
+end:
+  ret void
+}
+
+; The loop structure is same as in @no_rotate, but the loop header's predecessor
+; doesn't fall through to it, so it should be rotated to get exit fall through.
+define void @do_rotate2() {
+; CHECK-LABEL: do_rotate2
+; CHECK: %entry
+; CHECK: %then
+; CHECK: %middle
+; CHECK: %latch1
+; CHECK: %latch2
+; CHECK: %header
+; CHECK: %exit
+entry:
+  %val0 = call i1 @foo()
+  br i1 %val0, label %then, label %header, !prof !1
+
+then:
+  call void @a()
+  br label %end
+
+header:
+  %val1 = call i1 @foo()
+  br i1 %val1, label %middle, label %exit
+
+middle:
+  %val2 = call i1 @foo()
+  br i1 %val2, label %latch1, label %exit
+
+latch1:
+  %val3 = call i1 @foo()
+  br i1 %val3, label %latch2, label %header
+
+latch2:
+  %val4 = call i1 @foo()
+  br label %header
+
+exit:
+  call void @b()
+  br label %end
+
+end:
+  ret void
+}
+
+declare i1 @foo()
+declare void @a()
+declare void @b()
+
+!1 = !{!"branch_weights", i32 10, i32 1}

Modified: llvm/trunk/test/CodeGen/X86/lsr-loop-exit-cond.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/lsr-loop-exit-cond.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/lsr-loop-exit-cond.ll (original)
+++ llvm/trunk/test/CodeGen/X86/lsr-loop-exit-cond.ll Fri Jun 14 16:08:59 2019
@@ -21,22 +21,7 @@ define void @t(i8* nocapture %in, i8* no
 ; GENERIC-NEXT:    movq _Te1@{{.*}}(%rip), %r8
 ; GENERIC-NEXT:    movq _Te3@{{.*}}(%rip), %r10
 ; GENERIC-NEXT:    movq %rcx, %r11
-; GENERIC-NEXT:    jmp LBB0_1
 ; GENERIC-NEXT:    .p2align 4, 0x90
-; GENERIC-NEXT:  LBB0_2: ## %bb1
-; GENERIC-NEXT:    ## in Loop: Header=BB0_1 Depth=1
-; GENERIC-NEXT:    movl %edi, %ebx
-; GENERIC-NEXT:    shrl $16, %ebx
-; GENERIC-NEXT:    movzbl %bl, %ebx
-; GENERIC-NEXT:    xorl (%r8,%rbx,4), %eax
-; GENERIC-NEXT:    xorl -4(%r14), %eax
-; GENERIC-NEXT:    shrl $24, %edi
-; GENERIC-NEXT:    movzbl %bpl, %ebx
-; GENERIC-NEXT:    movl (%r10,%rbx,4), %ebx
-; GENERIC-NEXT:    xorl (%r9,%rdi,4), %ebx
-; GENERIC-NEXT:    xorl (%r14), %ebx
-; GENERIC-NEXT:    decq %r11
-; GENERIC-NEXT:    addq $16, %r14
 ; GENERIC-NEXT:  LBB0_1: ## %bb
 ; GENERIC-NEXT:    ## =>This Inner Loop Header: Depth=1
 ; GENERIC-NEXT:    movzbl %al, %edi
@@ -56,8 +41,23 @@ define void @t(i8* nocapture %in, i8* no
 ; GENERIC-NEXT:    shrl $24, %eax
 ; GENERIC-NEXT:    movl (%r9,%rax,4), %eax
 ; GENERIC-NEXT:    testq %r11, %r11
-; GENERIC-NEXT:    jne LBB0_2
-; GENERIC-NEXT:  ## %bb.3: ## %bb2
+; GENERIC-NEXT:    je LBB0_3
+; GENERIC-NEXT:  ## %bb.2: ## %bb1
+; GENERIC-NEXT:    ## in Loop: Header=BB0_1 Depth=1
+; GENERIC-NEXT:    movl %edi, %ebx
+; GENERIC-NEXT:    shrl $16, %ebx
+; GENERIC-NEXT:    movzbl %bl, %ebx
+; GENERIC-NEXT:    xorl (%r8,%rbx,4), %eax
+; GENERIC-NEXT:    xorl -4(%r14), %eax
+; GENERIC-NEXT:    shrl $24, %edi
+; GENERIC-NEXT:    movzbl %bpl, %ebx
+; GENERIC-NEXT:    movl (%r10,%rbx,4), %ebx
+; GENERIC-NEXT:    xorl (%r9,%rdi,4), %ebx
+; GENERIC-NEXT:    xorl (%r14), %ebx
+; GENERIC-NEXT:    decq %r11
+; GENERIC-NEXT:    addq $16, %r14
+; GENERIC-NEXT:    jmp LBB0_1
+; GENERIC-NEXT:  LBB0_3: ## %bb2
 ; GENERIC-NEXT:    shlq $4, %rcx
 ; GENERIC-NEXT:    andl $-16777216, %eax ## imm = 0xFF000000
 ; GENERIC-NEXT:    movl %edi, %ebx
@@ -105,21 +105,7 @@ define void @t(i8* nocapture %in, i8* no
 ; ATOM-NEXT:    movq _Te3@{{.*}}(%rip), %r10
 ; ATOM-NEXT:    decl %ecx
 ; ATOM-NEXT:    movq %rcx, %r11
-; ATOM-NEXT:    jmp LBB0_1
 ; ATOM-NEXT:    .p2align 4, 0x90
-; ATOM-NEXT:  LBB0_2: ## %bb1
-; ATOM-NEXT:    ## in Loop: Header=BB0_1 Depth=1
-; ATOM-NEXT:    shrl $16, %eax
-; ATOM-NEXT:    shrl $24, %edi
-; ATOM-NEXT:    decq %r11
-; ATOM-NEXT:    movzbl %al, %ebp
-; ATOM-NEXT:    movzbl %bl, %eax
-; ATOM-NEXT:    movl (%r10,%rax,4), %eax
-; ATOM-NEXT:    xorl (%r8,%rbp,4), %r15d
-; ATOM-NEXT:    xorl (%r9,%rdi,4), %eax
-; ATOM-NEXT:    xorl -4(%r14), %r15d
-; ATOM-NEXT:    xorl (%r14), %eax
-; ATOM-NEXT:    addq $16, %r14
 ; ATOM-NEXT:  LBB0_1: ## %bb
 ; ATOM-NEXT:    ## =>This Inner Loop Header: Depth=1
 ; ATOM-NEXT:    movl %eax, %edi
@@ -140,8 +126,22 @@ define void @t(i8* nocapture %in, i8* no
 ; ATOM-NEXT:    movl (%r9,%rax,4), %r15d
 ; ATOM-NEXT:    testq %r11, %r11
 ; ATOM-NEXT:    movl %edi, %eax
-; ATOM-NEXT:    jne LBB0_2
-; ATOM-NEXT:  ## %bb.3: ## %bb2
+; ATOM-NEXT:    je LBB0_3
+; ATOM-NEXT:  ## %bb.2: ## %bb1
+; ATOM-NEXT:    ## in Loop: Header=BB0_1 Depth=1
+; ATOM-NEXT:    shrl $16, %eax
+; ATOM-NEXT:    shrl $24, %edi
+; ATOM-NEXT:    decq %r11
+; ATOM-NEXT:    movzbl %al, %ebp
+; ATOM-NEXT:    movzbl %bl, %eax
+; ATOM-NEXT:    movl (%r10,%rax,4), %eax
+; ATOM-NEXT:    xorl (%r8,%rbp,4), %r15d
+; ATOM-NEXT:    xorl (%r9,%rdi,4), %eax
+; ATOM-NEXT:    xorl -4(%r14), %r15d
+; ATOM-NEXT:    xorl (%r14), %eax
+; ATOM-NEXT:    addq $16, %r14
+; ATOM-NEXT:    jmp LBB0_1
+; ATOM-NEXT:  LBB0_3: ## %bb2
 ; ATOM-NEXT:    shrl $16, %eax
 ; ATOM-NEXT:    shrl $8, %edi
 ; ATOM-NEXT:    movzbl %bl, %ebp

Added: llvm/trunk/test/CodeGen/X86/move_latch_to_loop_top.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/move_latch_to_loop_top.ll?rev=363471&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/move_latch_to_loop_top.ll (added)
+++ llvm/trunk/test/CodeGen/X86/move_latch_to_loop_top.ll Fri Jun 14 16:08:59 2019
@@ -0,0 +1,239 @@
+; RUN: llc  -mcpu=corei7 -mtriple=x86_64-linux < %s | FileCheck %s
+
+; The block latch should be moved before header.
+;CHECK-LABEL: test1:
+;CHECK:       %latch
+;CHECK:       %header
+;CHECK:       %false
+define i32 @test1(i32* %p) {
+entry:
+  br label %header
+
+header:
+  %x1 = phi i64 [0, %entry], [%x2, %latch]
+  %count1 = phi i32 [0, %entry], [%count4, %latch]
+  %0 = ptrtoint i32* %p to i64
+  %1 = add i64 %0, %x1
+  %2 = inttoptr i64 %1 to i32*
+  %data = load i32, i32* %2
+  %3 = icmp eq i32 %data, 0
+  br i1 %3, label %latch, label %false
+
+false:
+  %count2 = add i32 %count1, 1
+  br label %latch
+
+latch:
+  %count4 = phi i32 [%count2, %false], [%count1, %header]
+  %x2 = add i64 %x1, 1
+  %4 = icmp eq i64 %x2, 100
+  br i1 %4, label %exit, label %header
+
+exit:
+  ret i32 %count4
+}
+
+; The block latch and one of false/true should be moved before header.
+;CHECK-LABEL: test2:
+;CHECK:       %true
+;CHECK:       %latch
+;CHECK:       %header
+;CHECK:       %false
+define i32 @test2(i32* %p) {
+entry:
+  br label %header
+
+header:
+  %x1 = phi i64 [0, %entry], [%x2, %latch]
+  %count1 = phi i32 [0, %entry], [%count4, %latch]
+  %0 = ptrtoint i32* %p to i64
+  %1 = add i64 %0, %x1
+  %2 = inttoptr i64 %1 to i32*
+  %data = load i32, i32* %2
+  %3 = icmp eq i32 %data, 0
+  br i1 %3, label %true, label %false
+
+false:
+  %count2 = add i32 %count1, 1
+  br label %latch
+
+true:
+  %count3 = add i32 %count1, 2
+  br label %latch
+
+latch:
+  %count4 = phi i32 [%count2, %false], [%count3, %true]
+  %x2 = add i64 %x1, 1
+  %4 = icmp eq i64 %x2, 100
+  br i1 %4, label %exit, label %header
+
+exit:
+  ret i32 %count4
+}
+
+; More blocks can be moved before header.
+;            header <------------
+;              /\               |
+;             /  \              |
+;            /    \             |
+;           /      \            |
+;          /        \           |
+;        true      false        |
+;         /\         /\         |
+;        /  \       /  \        |
+;       /    \     /    \       |
+;    true3 false3 /      \      |
+;      \    /   true2  false2   |
+;       \  /      \      /      |
+;        \/        \    /       |
+;      endif3       \  /        |
+;         \          \/         |
+;          \       endif2       |
+;           \        /          |
+;            \      /           |
+;             \    /            |
+;              \  /             |
+;               \/              |
+;              latch-------------
+;                |
+;                |
+;              exit
+;
+; Blocks true3,endif3,latch should be moved before header.
+;
+;CHECK-LABEL: test3:
+;CHECK:       %true3
+;CHECK:       %endif3
+;CHECK:       %latch
+;CHECK:       %header
+;CHECK:       %false
+define i32 @test3(i32* %p) {
+entry:
+  br label %header
+
+header:
+  %x1 = phi i64 [0, %entry], [%x2, %latch]
+  %count1 = phi i32 [0, %entry], [%count12, %latch]
+  %0 = ptrtoint i32* %p to i64
+  %1 = add i64 %0, %x1
+  %2 = inttoptr i64 %1 to i32*
+  %data = load i32, i32* %2
+  %3 = icmp eq i32 %data, 0
+  br i1 %3, label %true, label %false, !prof !3
+
+false:
+  %count2 = add i32 %count1, 1
+  %cond = icmp sgt i32 %count2, 10
+  br i1 %cond, label %true2, label %false2
+
+false2:
+  %count3 = and i32 %count2, 7
+  br label %endif2
+
+true2:
+  %count4 = mul i32 %count2, 3
+  br label %endif2
+
+endif2:
+  %count5 = phi i32 [%count3, %false2], [%count4, %true2]
+  %count6 = sub i32 %count5, 5
+  br label %latch
+
+true:
+  %count7 = add i32 %count1, 2
+  %cond2 = icmp slt i32 %count7, 20
+  br i1 %cond2, label %true3, label %false3
+
+false3:
+  %count8 = or i32 %count7, 3
+  br label %endif3
+
+true3:
+  %count9 = xor i32 %count7, 55
+  br label %endif3
+
+endif3:
+  %count10 = phi i32 [%count8, %false3], [%count9, %true3]
+  %count11 = add i32 %count10, 3
+  br label %latch
+
+latch:
+  %count12 = phi i32 [%count6, %endif2], [%count11, %endif3]
+  %x2 = add i64 %x1, 1
+  %4 = icmp eq i64 %x2, 100
+  br i1 %4, label %exit, label %header
+
+exit:
+  ret i32 %count12
+}
+
+; The exit block has higher frequency than false block, so latch block
+; should not moved before header.
+;CHECK-LABEL: test4:
+;CHECK:       %header
+;CHECK:       %true
+;CHECK:       %latch
+;CHECK:       %false
+;CHECK:       %exit
+define i32 @test4(i32 %t, i32* %p) {
+entry:
+  br label %header
+
+header:
+  %x1 = phi i64 [0, %entry], [%x2, %latch]
+  %count1 = phi i32 [0, %entry], [%count4, %latch]
+  %0 = ptrtoint i32* %p to i64
+  %1 = add i64 %0, %x1
+  %2 = inttoptr i64 %1 to i32*
+  %data = load i32, i32* %2
+  %3 = icmp eq i32 %data, 0
+  br i1 %3, label %true, label %false, !prof !1
+
+false:
+  %count2 = add i32 %count1, 1
+  br label %latch
+
+true:
+  %count3 = add i32 %count1, 2
+  br label %latch
+
+latch:
+  %count4 = phi i32 [%count2, %false], [%count3, %true]
+  %x2 = add i64 %x1, 1
+  %4 = icmp eq i64 %x2, 100
+  br i1 %4, label %exit, label %header, !prof !2
+
+exit:
+  ret i32 %count4
+}
+
+!1 = !{!"branch_weights", i32 100, i32 1}
+!2 = !{!"branch_weights", i32 16, i32 16}
+!3 = !{!"branch_weights", i32 51, i32 49}
+
+; If move latch to loop top doesn't reduce taken branch, don't do it.
+;CHECK-LABEL: test5:
+;CHECK:       %entry
+;CHECK:       %header
+;CHECK:       %latch
+define void @test5(i32* %p) {
+entry:
+  br label %header
+
+header:
+  %x1 = phi i64 [0, %entry], [%x1, %header], [%x2, %latch]
+  %0 = ptrtoint i32* %p to i64
+  %1 = add i64 %0, %x1
+  %2 = inttoptr i64 %1 to i32*
+  %data = load i32, i32* %2
+  %3 = icmp eq i32 %data, 0
+  br i1 %3, label %latch, label %header
+
+latch:
+  %x2 = add i64 %x1, 1
+  br label %header
+
+exit:
+  ret void
+}
+

Modified: llvm/trunk/test/CodeGen/X86/pr38185.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr38185.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr38185.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr38185.ll Fri Jun 14 16:08:59 2019
@@ -5,9 +5,13 @@ define void @foo(i32* %a, i32* %b, i32*
 ; CHECK-LABEL: foo:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movq $0, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT:    jmp .LBB0_1
 ; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB0_2: # %body
+; CHECK-NEXT:  .LBB0_1: # %loop
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movq -{{[0-9]+}}(%rsp), %r9
+; CHECK-NEXT:    cmpq %rcx, %r9
+; CHECK-NEXT:    je .LBB0_3
+; CHECK-NEXT:  # %bb.2: # %body
 ; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
 ; CHECK-NEXT:    movl $1, (%rdx,%r9,4)
 ; CHECK-NEXT:    movzbl (%rdi,%r9,4), %r8d
@@ -17,12 +21,8 @@ define void @foo(i32* %a, i32* %b, i32*
 ; CHECK-NEXT:    movl %eax, (%rdi,%r9,4)
 ; CHECK-NEXT:    incq %r9
 ; CHECK-NEXT:    movq %r9, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT:  .LBB0_1: # %loop
-; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    movq -{{[0-9]+}}(%rsp), %r9
-; CHECK-NEXT:    cmpq %rcx, %r9
-; CHECK-NEXT:    jne .LBB0_2
-; CHECK-NEXT:  # %bb.3: # %endloop
+; CHECK-NEXT:    jmp .LBB0_1
+; CHECK-NEXT:  .LBB0_3: # %endloop
 ; CHECK-NEXT:    retq
 %i = alloca i64
 store i64 0, i64* %i

Modified: llvm/trunk/test/CodeGen/X86/ragreedy-hoist-spill.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/ragreedy-hoist-spill.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/ragreedy-hoist-spill.ll (original)
+++ llvm/trunk/test/CodeGen/X86/ragreedy-hoist-spill.ll Fri Jun 14 16:08:59 2019
@@ -103,6 +103,34 @@ define i8* @SyFgets(i8* %line, i64 %leng
 ; CHECK-NEXT:    xorl %r13d, %r13d
 ; CHECK-NEXT:    jmp LBB0_13
 ; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  LBB0_20: ## %sw.bb256
+; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
+; CHECK-NEXT:    movl %r14d, %r13d
+; CHECK-NEXT:  LBB0_21: ## %while.cond197.backedge
+; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
+; CHECK-NEXT:    decl %r15d
+; CHECK-NEXT:    testl %r15d, %r15d
+; CHECK-NEXT:    movl %r13d, %r14d
+; CHECK-NEXT:    jle LBB0_22
+; CHECK-NEXT:  LBB0_13: ## %while.body200
+; CHECK-NEXT:    ## =>This Loop Header: Depth=1
+; CHECK-NEXT:    ## Child Loop BB0_30 Depth 2
+; CHECK-NEXT:    ## Child Loop BB0_38 Depth 2
+; CHECK-NEXT:    leal -268(%r14), %eax
+; CHECK-NEXT:    cmpl $105, %eax
+; CHECK-NEXT:    ja LBB0_14
+; CHECK-NEXT:  ## %bb.56: ## %while.body200
+; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
+; CHECK-NEXT:    movslq (%rdi,%rax,4), %rax
+; CHECK-NEXT:    addq %rdi, %rax
+; CHECK-NEXT:    jmpq *%rax
+; CHECK-NEXT:  LBB0_44: ## %while.cond1037.preheader
+; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
+; CHECK-NEXT:    testb %dl, %dl
+; CHECK-NEXT:    movl %r14d, %r13d
+; CHECK-NEXT:    jne LBB0_21
+; CHECK-NEXT:    jmp LBB0_55
+; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  LBB0_14: ## %while.body200
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
 ; CHECK-NEXT:    leal 1(%r14), %eax
@@ -118,12 +146,6 @@ define i8* @SyFgets(i8* %line, i64 %leng
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
 ; CHECK-NEXT:    movl $1, %r13d
 ; CHECK-NEXT:    jmp LBB0_21
-; CHECK-NEXT:  LBB0_44: ## %while.cond1037.preheader
-; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    testb %dl, %dl
-; CHECK-NEXT:    movl %r14d, %r13d
-; CHECK-NEXT:    jne LBB0_21
-; CHECK-NEXT:    jmp LBB0_55
 ; CHECK-NEXT:  LBB0_26: ## %sw.bb474
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
 ; CHECK-NEXT:    testb %dl, %dl
@@ -137,30 +159,52 @@ define i8* @SyFgets(i8* %line, i64 %leng
 ; CHECK-NEXT:  ## %bb.28: ## %land.rhs485.preheader
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
 ; CHECK-NEXT:    ## implicit-def: $rax
+; CHECK-NEXT:    testb %al, %al
+; CHECK-NEXT:    jns LBB0_30
+; CHECK-NEXT:    jmp LBB0_55
 ; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  LBB0_29: ## %land.rhs485
-; CHECK-NEXT:    ## Parent Loop BB0_13 Depth=1
-; CHECK-NEXT:    ## => This Inner Loop Header: Depth=2
+; CHECK-NEXT:  LBB0_32: ## %do.body479.backedge
+; CHECK-NEXT:    ## in Loop: Header=BB0_30 Depth=2
+; CHECK-NEXT:    leaq 1(%r12), %rax
+; CHECK-NEXT:    testb %dl, %dl
+; CHECK-NEXT:    je LBB0_33
+; CHECK-NEXT:  ## %bb.29: ## %land.rhs485
+; CHECK-NEXT:    ## in Loop: Header=BB0_30 Depth=2
 ; CHECK-NEXT:    testb %al, %al
 ; CHECK-NEXT:    js LBB0_55
-; CHECK-NEXT:  ## %bb.30: ## %cond.true.i.i2780
-; CHECK-NEXT:    ## in Loop: Header=BB0_29 Depth=2
+; CHECK-NEXT:  LBB0_30: ## %cond.true.i.i2780
+; CHECK-NEXT:    ## Parent Loop BB0_13 Depth=1
+; CHECK-NEXT:    ## => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    movq %rax, %r12
 ; CHECK-NEXT:    testb %dl, %dl
 ; CHECK-NEXT:    jne LBB0_32
 ; CHECK-NEXT:  ## %bb.31: ## %lor.rhs500
-; CHECK-NEXT:    ## in Loop: Header=BB0_29 Depth=2
+; CHECK-NEXT:    ## in Loop: Header=BB0_30 Depth=2
 ; CHECK-NEXT:    movl $256, %esi ## imm = 0x100
 ; CHECK-NEXT:    callq ___maskrune
 ; CHECK-NEXT:    xorl %edx, %edx
 ; CHECK-NEXT:    testb %dl, %dl
-; CHECK-NEXT:    je LBB0_34
-; CHECK-NEXT:  LBB0_32: ## %do.body479.backedge
-; CHECK-NEXT:    ## in Loop: Header=BB0_29 Depth=2
-; CHECK-NEXT:    leaq 1(%r12), %rax
-; CHECK-NEXT:    testb %dl, %dl
-; CHECK-NEXT:    jne LBB0_29
-; CHECK-NEXT:  ## %bb.33: ## %if.end517.loopexitsplit
+; CHECK-NEXT:    jne LBB0_32
+; CHECK-NEXT:    jmp LBB0_34
+; CHECK-NEXT:  LBB0_45: ## %sw.bb1134
+; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
+; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rax
+; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; CHECK-NEXT:    cmpq %rax, %rcx
+; CHECK-NEXT:    jb LBB0_55
+; CHECK-NEXT:  ## %bb.46: ## in Loop: Header=BB0_13 Depth=1
+; CHECK-NEXT:    xorl %ebp, %ebp
+; CHECK-NEXT:    movl $268, %r13d ## imm = 0x10C
+; CHECK-NEXT:    jmp LBB0_21
+; CHECK-NEXT:  LBB0_19: ## %sw.bb243
+; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
+; CHECK-NEXT:    movl $2, %r13d
+; CHECK-NEXT:    jmp LBB0_21
+; CHECK-NEXT:  LBB0_40: ## %sw.bb566
+; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
+; CHECK-NEXT:    movl $20, %r13d
+; CHECK-NEXT:    jmp LBB0_21
+; CHECK-NEXT:  LBB0_33: ## %if.end517.loopexitsplit
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
 ; CHECK-NEXT:    incq %r12
 ; CHECK-NEXT:  LBB0_34: ## %if.end517
@@ -199,47 +243,6 @@ define i8* @SyFgets(i8* %line, i64 %leng
 ; CHECK-NEXT:    leaq {{.*}}(%rip), %rsi
 ; CHECK-NEXT:    leaq {{.*}}(%rip), %rdi
 ; CHECK-NEXT:    jmp LBB0_21
-; CHECK-NEXT:  LBB0_45: ## %sw.bb1134
-; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rax
-; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; CHECK-NEXT:    cmpq %rax, %rcx
-; CHECK-NEXT:    jb LBB0_55
-; CHECK-NEXT:  ## %bb.46: ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    xorl %ebp, %ebp
-; CHECK-NEXT:    movl $268, %r13d ## imm = 0x10C
-; CHECK-NEXT:    jmp LBB0_21
-; CHECK-NEXT:  LBB0_19: ## %sw.bb243
-; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    movl $2, %r13d
-; CHECK-NEXT:    jmp LBB0_21
-; CHECK-NEXT:  LBB0_40: ## %sw.bb566
-; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    movl $20, %r13d
-; CHECK-NEXT:    jmp LBB0_21
-; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  LBB0_13: ## %while.body200
-; CHECK-NEXT:    ## =>This Loop Header: Depth=1
-; CHECK-NEXT:    ## Child Loop BB0_29 Depth 2
-; CHECK-NEXT:    ## Child Loop BB0_38 Depth 2
-; CHECK-NEXT:    leal -268(%r14), %eax
-; CHECK-NEXT:    cmpl $105, %eax
-; CHECK-NEXT:    ja LBB0_14
-; CHECK-NEXT:  ## %bb.56: ## %while.body200
-; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    movslq (%rdi,%rax,4), %rax
-; CHECK-NEXT:    addq %rdi, %rax
-; CHECK-NEXT:    jmpq *%rax
-; CHECK-NEXT:  LBB0_20: ## %sw.bb256
-; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    movl %r14d, %r13d
-; CHECK-NEXT:  LBB0_21: ## %while.cond197.backedge
-; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    decl %r15d
-; CHECK-NEXT:    testl %r15d, %r15d
-; CHECK-NEXT:    movl %r13d, %r14d
-; CHECK-NEXT:    jg LBB0_13
-; CHECK-NEXT:    jmp LBB0_22
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  LBB0_42: ## %while.cond864
 ; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1

Modified: llvm/trunk/test/CodeGen/X86/reverse_branches.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/reverse_branches.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/reverse_branches.ll (original)
+++ llvm/trunk/test/CodeGen/X86/reverse_branches.ll Fri Jun 14 16:08:59 2019
@@ -85,25 +85,36 @@ define i32 @test_branches_order() uwtabl
 ; CHECK-NEXT:    jg LBB0_16
 ; CHECK-NEXT:  LBB0_9: ## %for.cond18.preheader
 ; CHECK-NEXT:    ## =>This Loop Header: Depth=1
-; CHECK-NEXT:    ## Child Loop BB0_10 Depth 2
+; CHECK-NEXT:    ## Child Loop BB0_11 Depth 2
 ; CHECK-NEXT:    ## Child Loop BB0_12 Depth 3
 ; CHECK-NEXT:    movq %rcx, %rdx
 ; CHECK-NEXT:    xorl %esi, %esi
 ; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:    cmpl $999, %edi ## imm = 0x3E7
+; CHECK-NEXT:    jle LBB0_11
+; CHECK-NEXT:    jmp LBB0_15
 ; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  LBB0_10: ## %for.cond18
+; CHECK-NEXT:  LBB0_14: ## %exit
+; CHECK-NEXT:    ## in Loop: Header=BB0_11 Depth=2
+; CHECK-NEXT:    addq %rsi, %rbp
+; CHECK-NEXT:    incq %rdi
+; CHECK-NEXT:    decq %rsi
+; CHECK-NEXT:    addq $1001, %rdx ## imm = 0x3E9
+; CHECK-NEXT:    cmpq $-1000, %rbp ## imm = 0xFC18
+; CHECK-NEXT:    jne LBB0_5
+; CHECK-NEXT:  ## %bb.10: ## %for.cond18
+; CHECK-NEXT:    ## in Loop: Header=BB0_11 Depth=2
+; CHECK-NEXT:    cmpl $999, %edi ## imm = 0x3E7
+; CHECK-NEXT:    jg LBB0_15
+; CHECK-NEXT:  LBB0_11: ## %for.body20
 ; CHECK-NEXT:    ## Parent Loop BB0_9 Depth=1
 ; CHECK-NEXT:    ## => This Loop Header: Depth=2
 ; CHECK-NEXT:    ## Child Loop BB0_12 Depth 3
-; CHECK-NEXT:    cmpl $999, %edi ## imm = 0x3E7
-; CHECK-NEXT:    jg LBB0_15
-; CHECK-NEXT:  ## %bb.11: ## %for.body20
-; CHECK-NEXT:    ## in Loop: Header=BB0_10 Depth=2
 ; CHECK-NEXT:    movq $-1000, %rbp ## imm = 0xFC18
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  LBB0_12: ## %do.body.i
 ; CHECK-NEXT:    ## Parent Loop BB0_9 Depth=1
-; CHECK-NEXT:    ## Parent Loop BB0_10 Depth=2
+; CHECK-NEXT:    ## Parent Loop BB0_11 Depth=2
 ; CHECK-NEXT:    ## => This Inner Loop Header: Depth=3
 ; CHECK-NEXT:    cmpb $120, 1000(%rdx,%rbp)
 ; CHECK-NEXT:    je LBB0_14
@@ -111,16 +122,6 @@ define i32 @test_branches_order() uwtabl
 ; CHECK-NEXT:    ## in Loop: Header=BB0_12 Depth=3
 ; CHECK-NEXT:    incq %rbp
 ; CHECK-NEXT:    jne LBB0_12
-; CHECK-NEXT:    jmp LBB0_5
-; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  LBB0_14: ## %exit
-; CHECK-NEXT:    ## in Loop: Header=BB0_10 Depth=2
-; CHECK-NEXT:    addq %rsi, %rbp
-; CHECK-NEXT:    incq %rdi
-; CHECK-NEXT:    decq %rsi
-; CHECK-NEXT:    addq $1001, %rdx ## imm = 0x3E9
-; CHECK-NEXT:    cmpq $-1000, %rbp ## imm = 0xFC18
-; CHECK-NEXT:    je LBB0_10
 ; CHECK-NEXT:  LBB0_5: ## %if.then
 ; CHECK-NEXT:    leaq {{.*}}(%rip), %rdi
 ; CHECK-NEXT:    callq _puts

Modified: llvm/trunk/test/CodeGen/X86/speculative-load-hardening.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/speculative-load-hardening.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/speculative-load-hardening.ll (original)
+++ llvm/trunk/test/CodeGen/X86/speculative-load-hardening.ll Fri Jun 14 16:08:59 2019
@@ -215,10 +215,7 @@ define void @test_basic_loop(i32 %a, i32
 ; X64-NEXT:    movl %esi, %ebp
 ; X64-NEXT:    cmovneq %r15, %rax
 ; X64-NEXT:    xorl %ebx, %ebx
-; X64-NEXT:    jmp .LBB2_3
 ; X64-NEXT:    .p2align 4, 0x90
-; X64-NEXT:  .LBB2_6: # in Loop: Header=BB2_3 Depth=1
-; X64-NEXT:    cmovgeq %r15, %rax
 ; X64-NEXT:  .LBB2_3: # %l.header
 ; X64-NEXT:    # =>This Inner Loop Header: Depth=1
 ; X64-NEXT:    movslq (%r12), %rcx
@@ -237,8 +234,11 @@ define void @test_basic_loop(i32 %a, i32
 ; X64-NEXT:    cmovneq %r15, %rax
 ; X64-NEXT:    incl %ebx
 ; X64-NEXT:    cmpl %ebp, %ebx
-; X64-NEXT:    jl .LBB2_6
-; X64-NEXT:  # %bb.4:
+; X64-NEXT:    jge .LBB2_4
+; X64-NEXT:  # %bb.6: # in Loop: Header=BB2_3 Depth=1
+; X64-NEXT:    cmovgeq %r15, %rax
+; X64-NEXT:    jmp .LBB2_3
+; X64-NEXT:  .LBB2_4:
 ; X64-NEXT:    cmovlq %r15, %rax
 ; X64-NEXT:  .LBB2_5: # %exit
 ; X64-NEXT:    shlq $47, %rax
@@ -328,20 +328,12 @@ define void @test_basic_nested_loop(i32
 ; X64-NEXT:    xorl %r13d, %r13d
 ; X64-NEXT:    movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
 ; X64-NEXT:    testl %r15d, %r15d
-; X64-NEXT:    jg .LBB3_5
-; X64-NEXT:    jmp .LBB3_4
-; X64-NEXT:    .p2align 4, 0x90
-; X64-NEXT:  .LBB3_12:
-; X64-NEXT:    cmovgeq %rbp, %rax
-; X64-NEXT:    testl %r15d, %r15d
 ; X64-NEXT:    jle .LBB3_4
+; X64-NEXT:    .p2align 4, 0x90
 ; X64-NEXT:  .LBB3_5: # %l2.header.preheader
 ; X64-NEXT:    cmovleq %rbp, %rax
 ; X64-NEXT:    xorl %r15d, %r15d
-; X64-NEXT:    jmp .LBB3_6
 ; X64-NEXT:    .p2align 4, 0x90
-; X64-NEXT:  .LBB3_11: # in Loop: Header=BB3_6 Depth=1
-; X64-NEXT:    cmovgeq %rbp, %rax
 ; X64-NEXT:  .LBB3_6: # %l2.header
 ; X64-NEXT:    # =>This Inner Loop Header: Depth=1
 ; X64-NEXT:    movslq (%rbx), %rcx
@@ -360,8 +352,12 @@ define void @test_basic_nested_loop(i32
 ; X64-NEXT:    cmovneq %rbp, %rax
 ; X64-NEXT:    incl %r15d
 ; X64-NEXT:    cmpl %r12d, %r15d
-; X64-NEXT:    jl .LBB3_11
-; X64-NEXT:  # %bb.7:
+; X64-NEXT:    jge .LBB3_7
+; X64-NEXT:  # %bb.11: # in Loop: Header=BB3_6 Depth=1
+; X64-NEXT:    cmovgeq %rbp, %rax
+; X64-NEXT:    jmp .LBB3_6
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB3_7:
 ; X64-NEXT:    cmovlq %rbp, %rax
 ; X64-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %r15d # 4-byte Reload
 ; X64-NEXT:    jmp .LBB3_8
@@ -385,8 +381,13 @@ define void @test_basic_nested_loop(i32
 ; X64-NEXT:    cmovneq %rbp, %rax
 ; X64-NEXT:    incl %r13d
 ; X64-NEXT:    cmpl %r15d, %r13d
-; X64-NEXT:    jl .LBB3_12
-; X64-NEXT:  # %bb.9:
+; X64-NEXT:    jge .LBB3_9
+; X64-NEXT:  # %bb.12:
+; X64-NEXT:    cmovgeq %rbp, %rax
+; X64-NEXT:    testl %r15d, %r15d
+; X64-NEXT:    jg .LBB3_5
+; X64-NEXT:    jmp .LBB3_4
+; X64-NEXT:  .LBB3_9:
 ; X64-NEXT:    cmovlq %rbp, %rax
 ; X64-NEXT:  .LBB3_10: # %exit
 ; X64-NEXT:    shlq $47, %rax
@@ -418,7 +419,17 @@ define void @test_basic_nested_loop(i32
 ; X64-LFENCE-NEXT:    movl %esi, %r15d
 ; X64-LFENCE-NEXT:    lfence
 ; X64-LFENCE-NEXT:    xorl %r12d, %r12d
+; X64-LFENCE-NEXT:    jmp .LBB3_2
 ; X64-LFENCE-NEXT:    .p2align 4, 0x90
+; X64-LFENCE-NEXT:  .LBB3_5: # %l1.latch
+; X64-LFENCE-NEXT:    # in Loop: Header=BB3_2 Depth=1
+; X64-LFENCE-NEXT:    lfence
+; X64-LFENCE-NEXT:    movslq (%rbx), %rax
+; X64-LFENCE-NEXT:    movl (%r14,%rax,4), %edi
+; X64-LFENCE-NEXT:    callq sink
+; X64-LFENCE-NEXT:    incl %r12d
+; X64-LFENCE-NEXT:    cmpl %r15d, %r12d
+; X64-LFENCE-NEXT:    jge .LBB3_6
 ; X64-LFENCE-NEXT:  .LBB3_2: # %l1.header
 ; X64-LFENCE-NEXT:    # =>This Loop Header: Depth=1
 ; X64-LFENCE-NEXT:    # Child Loop BB3_4 Depth 2
@@ -440,15 +451,7 @@ define void @test_basic_nested_loop(i32
 ; X64-LFENCE-NEXT:    incl %ebp
 ; X64-LFENCE-NEXT:    cmpl %r13d, %ebp
 ; X64-LFENCE-NEXT:    jl .LBB3_4
-; X64-LFENCE-NEXT:  .LBB3_5: # %l1.latch
-; X64-LFENCE-NEXT:    # in Loop: Header=BB3_2 Depth=1
-; X64-LFENCE-NEXT:    lfence
-; X64-LFENCE-NEXT:    movslq (%rbx), %rax
-; X64-LFENCE-NEXT:    movl (%r14,%rax,4), %edi
-; X64-LFENCE-NEXT:    callq sink
-; X64-LFENCE-NEXT:    incl %r12d
-; X64-LFENCE-NEXT:    cmpl %r15d, %r12d
-; X64-LFENCE-NEXT:    jl .LBB3_2
+; X64-LFENCE-NEXT:    jmp .LBB3_5
 ; X64-LFENCE-NEXT:  .LBB3_6: # %exit
 ; X64-LFENCE-NEXT:    lfence
 ; X64-LFENCE-NEXT:    addq $8, %rsp

Modified: llvm/trunk/test/CodeGen/X86/swifterror.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/swifterror.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/swifterror.ll (original)
+++ llvm/trunk/test/CodeGen/X86/swifterror.ll Fri Jun 14 16:08:59 2019
@@ -1,6 +1,6 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-apple-darwin | FileCheck --check-prefix=CHECK-APPLE %s
-; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-apple-darwin -O0 | FileCheck --check-prefix=CHECK-O0 %s
-; RUN: llc < %s -verify-machineinstrs -mtriple=i386-apple-darwin | FileCheck --check-prefix=CHECK-i386 %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=x86_64-apple-darwin -disable-block-placement | FileCheck --check-prefix=CHECK-APPLE %s
+; RUN: llc -verify-machineinstrs -O0 < %s -mtriple=x86_64-apple-darwin -disable-block-placement | FileCheck --check-prefix=CHECK-O0 %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=i386-apple-darwin -disable-block-placement | FileCheck --check-prefix=CHECK-i386 %s
 
 declare i8* @malloc(i64)
 declare void @free(i8*)

Modified: llvm/trunk/test/CodeGen/X86/tail-dup-merge-loop-headers.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/tail-dup-merge-loop-headers.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/tail-dup-merge-loop-headers.ll (original)
+++ llvm/trunk/test/CodeGen/X86/tail-dup-merge-loop-headers.ll Fri Jun 14 16:08:59 2019
@@ -12,14 +12,17 @@ define void @tail_dup_merge_loops(i32 %a
 ; CHECK-NEXT:    # in Loop: Header=BB0_2 Depth=1
 ; CHECK-NEXT:    incq %rsi
 ; CHECK-NEXT:    testl %edi, %edi
-; CHECK-NEXT:    jne .LBB0_2
-; CHECK-NEXT:    jmp .LBB0_5
+; CHECK-NEXT:    je .LBB0_5
 ; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB0_2: # %inner_loop_top
+; CHECK-NEXT:    # =>This Loop Header: Depth=1
+; CHECK-NEXT:    # Child Loop BB0_4 Depth 2
+; CHECK-NEXT:    cmpb $0, (%rsi)
+; CHECK-NEXT:    js .LBB0_3
 ; CHECK-NEXT:  .LBB0_4: # %inner_loop_latch
-; CHECK-NEXT:    # in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT:    # Parent Loop BB0_2 Depth=1
+; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    addq $2, %rsi
-; CHECK-NEXT:  .LBB0_2: # %inner_loop_top
-; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    cmpb $0, (%rsi)
 ; CHECK-NEXT:    jns .LBB0_4
 ; CHECK-NEXT:    jmp .LBB0_3
@@ -130,58 +133,58 @@ define i32 @loop_shared_header(i8* %exe,
 ; CHECK-NEXT:    testl %ebp, %ebp
 ; CHECK-NEXT:    je .LBB1_18
 ; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB1_8: # %shared_loop_header
+; CHECK-NEXT:  .LBB1_9: # %shared_loop_header
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    testq %rbx, %rbx
 ; CHECK-NEXT:    jne .LBB1_27
-; CHECK-NEXT:  # %bb.9: # %inner_loop_body
-; CHECK-NEXT:    # in Loop: Header=BB1_8 Depth=1
+; CHECK-NEXT:  # %bb.10: # %inner_loop_body
+; CHECK-NEXT:    # in Loop: Header=BB1_9 Depth=1
 ; CHECK-NEXT:    testl %eax, %eax
-; CHECK-NEXT:    jns .LBB1_8
-; CHECK-NEXT:  # %bb.10: # %if.end96.i
-; CHECK-NEXT:    # in Loop: Header=BB1_8 Depth=1
+; CHECK-NEXT:    jns .LBB1_9
+; CHECK-NEXT:  # %bb.11: # %if.end96.i
+; CHECK-NEXT:    # in Loop: Header=BB1_9 Depth=1
 ; CHECK-NEXT:    cmpl $3, %ebp
 ; CHECK-NEXT:    jae .LBB1_22
-; CHECK-NEXT:  # %bb.11: # %if.end287.i
-; CHECK-NEXT:    # in Loop: Header=BB1_8 Depth=1
+; CHECK-NEXT:  # %bb.12: # %if.end287.i
+; CHECK-NEXT:    # in Loop: Header=BB1_9 Depth=1
 ; CHECK-NEXT:    xorl %esi, %esi
 ; CHECK-NEXT:    cmpl $1, %ebp
 ; CHECK-NEXT:    setne %dl
 ; CHECK-NEXT:    testb %al, %al
-; CHECK-NEXT:    jne .LBB1_15
-; CHECK-NEXT:  # %bb.12: # %if.end308.i
-; CHECK-NEXT:    # in Loop: Header=BB1_8 Depth=1
+; CHECK-NEXT:    jne .LBB1_16
+; CHECK-NEXT:  # %bb.13: # %if.end308.i
+; CHECK-NEXT:    # in Loop: Header=BB1_9 Depth=1
 ; CHECK-NEXT:    testb %al, %al
-; CHECK-NEXT:    je .LBB1_17
-; CHECK-NEXT:  # %bb.13: # %if.end335.i
-; CHECK-NEXT:    # in Loop: Header=BB1_8 Depth=1
+; CHECK-NEXT:    je .LBB1_7
+; CHECK-NEXT:  # %bb.14: # %if.end335.i
+; CHECK-NEXT:    # in Loop: Header=BB1_9 Depth=1
 ; CHECK-NEXT:    xorl %edx, %edx
 ; CHECK-NEXT:    testb %dl, %dl
 ; CHECK-NEXT:    movl $0, %esi
-; CHECK-NEXT:    jne .LBB1_7
-; CHECK-NEXT:  # %bb.14: # %merge_other
-; CHECK-NEXT:    # in Loop: Header=BB1_8 Depth=1
+; CHECK-NEXT:    jne .LBB1_8
+; CHECK-NEXT:  # %bb.15: # %merge_other
+; CHECK-NEXT:    # in Loop: Header=BB1_9 Depth=1
 ; CHECK-NEXT:    xorl %esi, %esi
-; CHECK-NEXT:    jmp .LBB1_16
-; CHECK-NEXT:  .LBB1_15: # in Loop: Header=BB1_8 Depth=1
+; CHECK-NEXT:    jmp .LBB1_17
+; CHECK-NEXT:  .LBB1_16: # in Loop: Header=BB1_9 Depth=1
 ; CHECK-NEXT:    movb %dl, %sil
 ; CHECK-NEXT:    addl $3, %esi
-; CHECK-NEXT:  .LBB1_16: # %outer_loop_latch
-; CHECK-NEXT:    # in Loop: Header=BB1_8 Depth=1
+; CHECK-NEXT:  .LBB1_17: # %outer_loop_latch
+; CHECK-NEXT:    # in Loop: Header=BB1_9 Depth=1
 ; CHECK-NEXT:    # implicit-def: $dl
-; CHECK-NEXT:    jmp .LBB1_7
-; CHECK-NEXT:  .LBB1_17: # %merge_predecessor_split
-; CHECK-NEXT:    # in Loop: Header=BB1_8 Depth=1
+; CHECK-NEXT:    jmp .LBB1_8
+; CHECK-NEXT:  .LBB1_7: # %merge_predecessor_split
+; CHECK-NEXT:    # in Loop: Header=BB1_9 Depth=1
 ; CHECK-NEXT:    movb $32, %dl
 ; CHECK-NEXT:    xorl %esi, %esi
-; CHECK-NEXT:  .LBB1_7: # %outer_loop_latch
-; CHECK-NEXT:    # in Loop: Header=BB1_8 Depth=1
+; CHECK-NEXT:  .LBB1_8: # %outer_loop_latch
+; CHECK-NEXT:    # in Loop: Header=BB1_9 Depth=1
 ; CHECK-NEXT:    movzwl %si, %esi
 ; CHECK-NEXT:    decl %esi
 ; CHECK-NEXT:    movzwl %si, %esi
 ; CHECK-NEXT:    leaq 1(%rcx,%rsi), %rcx
 ; CHECK-NEXT:    testl %ebp, %ebp
-; CHECK-NEXT:    jne .LBB1_8
+; CHECK-NEXT:    jne .LBB1_9
 ; CHECK-NEXT:  .LBB1_18: # %while.cond.us1412.i
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    testb %al, %al

Modified: llvm/trunk/test/CodeGen/X86/tail-dup-repeat.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/tail-dup-repeat.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/tail-dup-repeat.ll (original)
+++ llvm/trunk/test/CodeGen/X86/tail-dup-repeat.ll Fri Jun 14 16:08:59 2019
@@ -10,35 +10,30 @@
 define void @repeated_tail_dup(i1 %a1, i1 %a2, i32* %a4, i32* %a5, i8* %a6, i32 %a7) #0 align 2 {
 ; CHECK-LABEL: repeated_tail_dup:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB0_1: # %for.cond
-; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    testb $1, %dil
 ; CHECK-NEXT:    je .LBB0_3
-; CHECK-NEXT:  # %bb.2: # %land.lhs.true
-; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT:    movl $10, (%rdx)
-; CHECK-NEXT:    jmp .LBB0_6
 ; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB0_2: # %land.lhs.true
+; CHECK-NEXT:    movl $10, (%rdx)
+; CHECK-NEXT:  .LBB0_6: # %dup2
+; CHECK-NEXT:    movl $2, (%rcx)
+; CHECK-NEXT:    testl %r9d, %r9d
+; CHECK-NEXT:    jne .LBB0_8
+; CHECK-NEXT:  .LBB0_1: # %for.cond
+; CHECK-NEXT:    testb $1, %dil
+; CHECK-NEXT:    jne .LBB0_2
 ; CHECK-NEXT:  .LBB0_3: # %if.end56
-; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
 ; CHECK-NEXT:    testb $1, %sil
 ; CHECK-NEXT:    je .LBB0_5
 ; CHECK-NEXT:  # %bb.4: # %if.then64
-; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
 ; CHECK-NEXT:    movb $1, (%r8)
 ; CHECK-NEXT:    testl %r9d, %r9d
 ; CHECK-NEXT:    je .LBB0_1
 ; CHECK-NEXT:    jmp .LBB0_8
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB0_5: # %if.end70
-; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
 ; CHECK-NEXT:    movl $12, (%rdx)
-; CHECK-NEXT:  .LBB0_6: # %dup2
-; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT:    movl $2, (%rcx)
-; CHECK-NEXT:    testl %r9d, %r9d
-; CHECK-NEXT:    je .LBB0_1
+; CHECK-NEXT:    jmp .LBB0_6
 ; CHECK-NEXT:  .LBB0_8: # %for.end
 ; CHECK-NEXT:    retq
 entry:

Modified: llvm/trunk/test/CodeGen/X86/vector-shift-by-select-loop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shift-by-select-loop.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shift-by-select-loop.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shift-by-select-loop.ll Fri Jun 14 16:08:59 2019
@@ -136,8 +136,17 @@ define void @vector_variable_shift_left_
 ; SSE-NEXT:    jne .LBB0_4
 ; SSE-NEXT:  # %bb.5: # %middle.block
 ; SSE-NEXT:    cmpq %rax, %rdx
-; SSE-NEXT:    je .LBB0_9
+; SSE-NEXT:    jne .LBB0_6
+; SSE-NEXT:  .LBB0_9: # %for.cond.cleanup
+; SSE-NEXT:    retq
 ; SSE-NEXT:    .p2align 4, 0x90
+; SSE-NEXT:  .LBB0_8: # %for.body
+; SSE-NEXT:    # in Loop: Header=BB0_6 Depth=1
+; SSE-NEXT:    # kill: def $cl killed $cl killed $ecx
+; SSE-NEXT:    shll %cl, (%rdi,%rdx,4)
+; SSE-NEXT:    incq %rdx
+; SSE-NEXT:    cmpq %rdx, %rax
+; SSE-NEXT:    je .LBB0_9
 ; SSE-NEXT:  .LBB0_6: # %for.body
 ; SSE-NEXT:    # =>This Inner Loop Header: Depth=1
 ; SSE-NEXT:    cmpb $0, (%rsi,%rdx)
@@ -146,15 +155,7 @@ define void @vector_variable_shift_left_
 ; SSE-NEXT:  # %bb.7: # %for.body
 ; SSE-NEXT:    # in Loop: Header=BB0_6 Depth=1
 ; SSE-NEXT:    movl %r8d, %ecx
-; SSE-NEXT:  .LBB0_8: # %for.body
-; SSE-NEXT:    # in Loop: Header=BB0_6 Depth=1
-; SSE-NEXT:    # kill: def $cl killed $cl killed $ecx
-; SSE-NEXT:    shll %cl, (%rdi,%rdx,4)
-; SSE-NEXT:    incq %rdx
-; SSE-NEXT:    cmpq %rdx, %rax
-; SSE-NEXT:    jne .LBB0_6
-; SSE-NEXT:  .LBB0_9: # %for.cond.cleanup
-; SSE-NEXT:    retq
+; SSE-NEXT:    jmp .LBB0_8
 ;
 ; AVX1-LABEL: vector_variable_shift_left_loop:
 ; AVX1:       # %bb.0: # %entry
@@ -258,8 +259,18 @@ define void @vector_variable_shift_left_
 ; AVX1-NEXT:    jne .LBB0_4
 ; AVX1-NEXT:  # %bb.5: # %middle.block
 ; AVX1-NEXT:    cmpq %rax, %rdx
-; AVX1-NEXT:    je .LBB0_9
+; AVX1-NEXT:    jne .LBB0_6
+; AVX1-NEXT:  .LBB0_9: # %for.cond.cleanup
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
 ; AVX1-NEXT:    .p2align 4, 0x90
+; AVX1-NEXT:  .LBB0_8: # %for.body
+; AVX1-NEXT:    # in Loop: Header=BB0_6 Depth=1
+; AVX1-NEXT:    # kill: def $cl killed $cl killed $ecx
+; AVX1-NEXT:    shll %cl, (%rdi,%rdx,4)
+; AVX1-NEXT:    incq %rdx
+; AVX1-NEXT:    cmpq %rdx, %rax
+; AVX1-NEXT:    je .LBB0_9
 ; AVX1-NEXT:  .LBB0_6: # %for.body
 ; AVX1-NEXT:    # =>This Inner Loop Header: Depth=1
 ; AVX1-NEXT:    cmpb $0, (%rsi,%rdx)
@@ -268,16 +279,7 @@ define void @vector_variable_shift_left_
 ; AVX1-NEXT:  # %bb.7: # %for.body
 ; AVX1-NEXT:    # in Loop: Header=BB0_6 Depth=1
 ; AVX1-NEXT:    movl %r8d, %ecx
-; AVX1-NEXT:  .LBB0_8: # %for.body
-; AVX1-NEXT:    # in Loop: Header=BB0_6 Depth=1
-; AVX1-NEXT:    # kill: def $cl killed $cl killed $ecx
-; AVX1-NEXT:    shll %cl, (%rdi,%rdx,4)
-; AVX1-NEXT:    incq %rdx
-; AVX1-NEXT:    cmpq %rdx, %rax
-; AVX1-NEXT:    jne .LBB0_6
-; AVX1-NEXT:  .LBB0_9: # %for.cond.cleanup
-; AVX1-NEXT:    vzeroupper
-; AVX1-NEXT:    retq
+; AVX1-NEXT:    jmp .LBB0_8
 ;
 ; AVX2-LABEL: vector_variable_shift_left_loop:
 ; AVX2:       # %bb.0: # %entry
@@ -332,8 +334,18 @@ define void @vector_variable_shift_left_
 ; AVX2-NEXT:    jne .LBB0_4
 ; AVX2-NEXT:  # %bb.5: # %middle.block
 ; AVX2-NEXT:    cmpq %rax, %rdx
-; AVX2-NEXT:    je .LBB0_9
+; AVX2-NEXT:    jne .LBB0_6
+; AVX2-NEXT:  .LBB0_9: # %for.cond.cleanup
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
 ; AVX2-NEXT:    .p2align 4, 0x90
+; AVX2-NEXT:  .LBB0_8: # %for.body
+; AVX2-NEXT:    # in Loop: Header=BB0_6 Depth=1
+; AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; AVX2-NEXT:    shll %cl, (%rdi,%rdx,4)
+; AVX2-NEXT:    incq %rdx
+; AVX2-NEXT:    cmpq %rdx, %rax
+; AVX2-NEXT:    je .LBB0_9
 ; AVX2-NEXT:  .LBB0_6: # %for.body
 ; AVX2-NEXT:    # =>This Inner Loop Header: Depth=1
 ; AVX2-NEXT:    cmpb $0, (%rsi,%rdx)
@@ -342,16 +354,7 @@ define void @vector_variable_shift_left_
 ; AVX2-NEXT:  # %bb.7: # %for.body
 ; AVX2-NEXT:    # in Loop: Header=BB0_6 Depth=1
 ; AVX2-NEXT:    movl %r8d, %ecx
-; AVX2-NEXT:  .LBB0_8: # %for.body
-; AVX2-NEXT:    # in Loop: Header=BB0_6 Depth=1
-; AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; AVX2-NEXT:    shll %cl, (%rdi,%rdx,4)
-; AVX2-NEXT:    incq %rdx
-; AVX2-NEXT:    cmpq %rdx, %rax
-; AVX2-NEXT:    jne .LBB0_6
-; AVX2-NEXT:  .LBB0_9: # %for.cond.cleanup
-; AVX2-NEXT:    vzeroupper
-; AVX2-NEXT:    retq
+; AVX2-NEXT:    jmp .LBB0_8
 entry:
   %cmp12 = icmp sgt i32 %count, 0
   br i1 %cmp12, label %for.body.preheader, label %for.cond.cleanup

Modified: llvm/trunk/test/CodeGen/X86/widen_arith-1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/widen_arith-1.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/widen_arith-1.ll (original)
+++ llvm/trunk/test/CodeGen/X86/widen_arith-1.ll Fri Jun 14 16:08:59 2019
@@ -8,9 +8,13 @@ define void @update(<3 x i8>* %dst, <3 x
 ; CHECK-NEXT:    movl $0, (%esp)
 ; CHECK-NEXT:    pcmpeqd %xmm0, %xmm0
 ; CHECK-NEXT:    movdqa {{.*#+}} xmm1 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
-; CHECK-NEXT:    jmp .LBB0_1
 ; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB0_2: # %forbody
+; CHECK-NEXT:  .LBB0_1: # %forcond
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movl (%esp), %eax
+; CHECK-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    jge .LBB0_3
+; CHECK-NEXT:  # %bb.2: # %forbody
 ; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
 ; CHECK-NEXT:    movl (%esp), %eax
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -21,12 +25,8 @@ define void @update(<3 x i8>* %dst, <3 x
 ; CHECK-NEXT:    pshufb %xmm1, %xmm2
 ; CHECK-NEXT:    pextrw $0, %xmm2, (%ecx,%eax,4)
 ; CHECK-NEXT:    incl (%esp)
-; CHECK-NEXT:  .LBB0_1: # %forcond
-; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    movl (%esp), %eax
-; CHECK-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    jl .LBB0_2
-; CHECK-NEXT:  # %bb.3: # %afterfor
+; CHECK-NEXT:    jmp .LBB0_1
+; CHECK-NEXT:  .LBB0_3: # %afterfor
 ; CHECK-NEXT:    addl $12, %esp
 ; CHECK-NEXT:    retl
 entry:

Modified: llvm/trunk/test/CodeGen/X86/widen_arith-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/widen_arith-2.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/widen_arith-2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/widen_arith-2.ll Fri Jun 14 16:08:59 2019
@@ -10,9 +10,13 @@ define void @update(i64* %dst_i, i64* %s
 ; CHECK-NEXT:    movl $0, (%esp)
 ; CHECK-NEXT:    pcmpeqd %xmm0, %xmm0
 ; CHECK-NEXT:    movdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4]
-; CHECK-NEXT:    jmp .LBB0_1
 ; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB0_2: # %forbody
+; CHECK-NEXT:  .LBB0_1: # %forcond
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movl (%esp), %eax
+; CHECK-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    jge .LBB0_3
+; CHECK-NEXT:  # %bb.2: # %forbody
 ; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
 ; CHECK-NEXT:    movl (%esp), %eax
 ; CHECK-NEXT:    leal (,%eax,8), %ecx
@@ -27,12 +31,8 @@ define void @update(i64* %dst_i, i64* %s
 ; CHECK-NEXT:    packuswb %xmm0, %xmm2
 ; CHECK-NEXT:    movq %xmm2, (%edx,%eax,8)
 ; CHECK-NEXT:    incl (%esp)
-; CHECK-NEXT:  .LBB0_1: # %forcond
-; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    movl (%esp), %eax
-; CHECK-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    jl .LBB0_2
-; CHECK-NEXT:  # %bb.3: # %afterfor
+; CHECK-NEXT:    jmp .LBB0_1
+; CHECK-NEXT:  .LBB0_3: # %afterfor
 ; CHECK-NEXT:    addl $12, %esp
 ; CHECK-NEXT:    retl
 entry:

Modified: llvm/trunk/test/CodeGen/X86/widen_arith-3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/widen_arith-3.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/widen_arith-3.ll (original)
+++ llvm/trunk/test/CodeGen/X86/widen_arith-3.ll Fri Jun 14 16:08:59 2019
@@ -18,9 +18,13 @@ define void @update(<3 x i16>* %dst, <3
 ; CHECK-NEXT:    movw $1, {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    movl $65537, {{[0-9]+}}(%esp) # imm = 0x10001
 ; CHECK-NEXT:    movl $0, {{[0-9]+}}(%esp)
-; CHECK-NEXT:    jmp .LBB0_1
 ; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB0_2: # %forbody
+; CHECK-NEXT:  .LBB0_1: # %forcond
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    cmpl 16(%ebp), %eax
+; CHECK-NEXT:    jge .LBB0_3
+; CHECK-NEXT:  # %bb.2: # %forbody
 ; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    movl 12(%ebp), %edx
@@ -31,12 +35,8 @@ define void @update(<3 x i16>* %dst, <3
 ; CHECK-NEXT:    pshufb %xmm1, %xmm2
 ; CHECK-NEXT:    movd %xmm2, (%ecx,%eax,8)
 ; CHECK-NEXT:    incl {{[0-9]+}}(%esp)
-; CHECK-NEXT:  .LBB0_1: # %forcond
-; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    cmpl 16(%ebp), %eax
-; CHECK-NEXT:    jl .LBB0_2
-; CHECK-NEXT:  # %bb.3: # %afterfor
+; CHECK-NEXT:    jmp .LBB0_1
+; CHECK-NEXT:  .LBB0_3: # %afterfor
 ; CHECK-NEXT:    movl %ebp, %esp
 ; CHECK-NEXT:    popl %ebp
 ; CHECK-NEXT:    retl

Modified: llvm/trunk/test/CodeGen/X86/widen_arith-4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/widen_arith-4.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/widen_arith-4.ll (original)
+++ llvm/trunk/test/CodeGen/X86/widen_arith-4.ll Fri Jun 14 16:08:59 2019
@@ -16,9 +16,13 @@ define void @update(<5 x i16>* %dst, <5
 ; SSE2-NEXT:    movl $0, -{{[0-9]+}}(%rsp)
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = <271,271,271,271,271,u,u,u>
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = <2,4,2,2,2,u,u,u>
-; SSE2-NEXT:    jmp .LBB0_1
 ; SSE2-NEXT:    .p2align 4, 0x90
-; SSE2-NEXT:  .LBB0_2: # %forbody
+; SSE2-NEXT:  .LBB0_1: # %forcond
+; SSE2-NEXT:    # =>This Inner Loop Header: Depth=1
+; SSE2-NEXT:    movl -{{[0-9]+}}(%rsp), %eax
+; SSE2-NEXT:    cmpl -{{[0-9]+}}(%rsp), %eax
+; SSE2-NEXT:    jge .LBB0_3
+; SSE2-NEXT:  # %bb.2: # %forbody
 ; SSE2-NEXT:    # in Loop: Header=BB0_1 Depth=1
 ; SSE2-NEXT:    movslq -{{[0-9]+}}(%rsp), %rax
 ; SSE2-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
@@ -31,12 +35,8 @@ define void @update(<5 x i16>* %dst, <5
 ; SSE2-NEXT:    pextrw $4, %xmm2, %edx
 ; SSE2-NEXT:    movw %dx, 8(%rcx,%rax)
 ; SSE2-NEXT:    incl -{{[0-9]+}}(%rsp)
-; SSE2-NEXT:  .LBB0_1: # %forcond
-; SSE2-NEXT:    # =>This Inner Loop Header: Depth=1
-; SSE2-NEXT:    movl -{{[0-9]+}}(%rsp), %eax
-; SSE2-NEXT:    cmpl -{{[0-9]+}}(%rsp), %eax
-; SSE2-NEXT:    jl .LBB0_2
-; SSE2-NEXT:  # %bb.3: # %afterfor
+; SSE2-NEXT:    jmp .LBB0_1
+; SSE2-NEXT:  .LBB0_3: # %afterfor
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: update:
@@ -49,9 +49,13 @@ define void @update(<5 x i16>* %dst, <5
 ; SSE41-NEXT:    movw $0, -{{[0-9]+}}(%rsp)
 ; SSE41-NEXT:    movl $0, -{{[0-9]+}}(%rsp)
 ; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = <271,271,271,271,271,u,u,u>
-; SSE41-NEXT:    jmp .LBB0_1
 ; SSE41-NEXT:    .p2align 4, 0x90
-; SSE41-NEXT:  .LBB0_2: # %forbody
+; SSE41-NEXT:  .LBB0_1: # %forcond
+; SSE41-NEXT:    # =>This Inner Loop Header: Depth=1
+; SSE41-NEXT:    movl -{{[0-9]+}}(%rsp), %eax
+; SSE41-NEXT:    cmpl -{{[0-9]+}}(%rsp), %eax
+; SSE41-NEXT:    jge .LBB0_3
+; SSE41-NEXT:  # %bb.2: # %forbody
 ; SSE41-NEXT:    # in Loop: Header=BB0_1 Depth=1
 ; SSE41-NEXT:    movslq -{{[0-9]+}}(%rsp), %rax
 ; SSE41-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
@@ -66,12 +70,8 @@ define void @update(<5 x i16>* %dst, <5
 ; SSE41-NEXT:    pextrw $4, %xmm1, 8(%rcx,%rax)
 ; SSE41-NEXT:    movq %xmm2, (%rcx,%rax)
 ; SSE41-NEXT:    incl -{{[0-9]+}}(%rsp)
-; SSE41-NEXT:  .LBB0_1: # %forcond
-; SSE41-NEXT:    # =>This Inner Loop Header: Depth=1
-; SSE41-NEXT:    movl -{{[0-9]+}}(%rsp), %eax
-; SSE41-NEXT:    cmpl -{{[0-9]+}}(%rsp), %eax
-; SSE41-NEXT:    jl .LBB0_2
-; SSE41-NEXT:  # %bb.3: # %afterfor
+; SSE41-NEXT:    jmp .LBB0_1
+; SSE41-NEXT:  .LBB0_3: # %afterfor
 ; SSE41-NEXT:    retq
 entry:
 	%dst.addr = alloca <5 x i16>*

Modified: llvm/trunk/test/CodeGen/X86/widen_arith-5.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/widen_arith-5.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/widen_arith-5.ll (original)
+++ llvm/trunk/test/CodeGen/X86/widen_arith-5.ll Fri Jun 14 16:08:59 2019
@@ -14,9 +14,13 @@ define void @update(<3 x i32>* %dst, <3
 ; CHECK-NEXT:    movl $1, -{{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    movl $0, -{{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    movdqa {{.*#+}} xmm0 = <3,3,3,u>
-; CHECK-NEXT:    jmp .LBB0_1
 ; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB0_2: # %forbody
+; CHECK-NEXT:  .LBB0_1: # %forcond
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movl -{{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT:    cmpl -{{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT:    jge .LBB0_3
+; CHECK-NEXT:  # %bb.2: # %forbody
 ; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
 ; CHECK-NEXT:    movslq -{{[0-9]+}}(%rsp), %rax
 ; CHECK-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
@@ -28,12 +32,8 @@ define void @update(<3 x i32>* %dst, <3
 ; CHECK-NEXT:    pextrd $2, %xmm1, 8(%rcx,%rax)
 ; CHECK-NEXT:    movq %xmm1, (%rcx,%rax)
 ; CHECK-NEXT:    incl -{{[0-9]+}}(%rsp)
-; CHECK-NEXT:  .LBB0_1: # %forcond
-; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    movl -{{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    cmpl -{{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    jl .LBB0_2
-; CHECK-NEXT:  # %bb.3: # %afterfor
+; CHECK-NEXT:    jmp .LBB0_1
+; CHECK-NEXT:  .LBB0_3: # %afterfor
 ; CHECK-NEXT:    retq
 entry:
 	%dst.addr = alloca <3 x i32>*

Modified: llvm/trunk/test/CodeGen/X86/widen_arith-6.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/widen_arith-6.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/widen_arith-6.ll (original)
+++ llvm/trunk/test/CodeGen/X86/widen_arith-6.ll Fri Jun 14 16:08:59 2019
@@ -15,9 +15,13 @@ define void @update(<3 x float>* %dst, <
 ; CHECK-NEXT:    movl $1065353216, {{[0-9]+}}(%esp) # imm = 0x3F800000
 ; CHECK-NEXT:    movl $0, {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    movaps {{.*#+}} xmm0 = <1.97604004E+3,1.97604004E+3,1.97604004E+3,u>
-; CHECK-NEXT:    jmp .LBB0_1
 ; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB0_2: # %forbody
+; CHECK-NEXT:  .LBB0_1: # %forcond
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    cmpl 16(%ebp), %eax
+; CHECK-NEXT:    jge .LBB0_3
+; CHECK-NEXT:  # %bb.2: # %forbody
 ; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    movl 8(%ebp), %ecx
@@ -30,12 +34,8 @@ define void @update(<3 x float>* %dst, <
 ; CHECK-NEXT:    extractps $1, %xmm1, 4(%ecx,%eax)
 ; CHECK-NEXT:    movss %xmm1, (%ecx,%eax)
 ; CHECK-NEXT:    incl {{[0-9]+}}(%esp)
-; CHECK-NEXT:  .LBB0_1: # %forcond
-; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    cmpl 16(%ebp), %eax
-; CHECK-NEXT:    jl .LBB0_2
-; CHECK-NEXT:  # %bb.3: # %afterfor
+; CHECK-NEXT:    jmp .LBB0_1
+; CHECK-NEXT:  .LBB0_3: # %afterfor
 ; CHECK-NEXT:    movl %ebp, %esp
 ; CHECK-NEXT:    popl %ebp
 ; CHECK-NEXT:    retl

Modified: llvm/trunk/test/CodeGen/X86/widen_cast-4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/widen_cast-4.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/widen_cast-4.ll (original)
+++ llvm/trunk/test/CodeGen/X86/widen_cast-4.ll Fri Jun 14 16:08:59 2019
@@ -11,9 +11,13 @@ define void @update(i64* %dst_i, i64* %s
 ; NARROW-NEXT:    movl $0, (%esp)
 ; NARROW-NEXT:    pcmpeqd %xmm0, %xmm0
 ; NARROW-NEXT:    movdqa {{.*#+}} xmm1 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
-; NARROW-NEXT:    jmp .LBB0_1
 ; NARROW-NEXT:    .p2align 4, 0x90
-; NARROW-NEXT:  .LBB0_2: # %forbody
+; NARROW-NEXT:  .LBB0_1: # %forcond
+; NARROW-NEXT:    # =>This Inner Loop Header: Depth=1
+; NARROW-NEXT:    movl (%esp), %eax
+; NARROW-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
+; NARROW-NEXT:    jge .LBB0_3
+; NARROW-NEXT:  # %bb.2: # %forbody
 ; NARROW-NEXT:    # in Loop: Header=BB0_1 Depth=1
 ; NARROW-NEXT:    movl (%esp), %eax
 ; NARROW-NEXT:    leal (,%eax,8), %ecx
@@ -30,12 +34,8 @@ define void @update(i64* %dst_i, i64* %s
 ; NARROW-NEXT:    pshufb %xmm1, %xmm2
 ; NARROW-NEXT:    movq %xmm2, (%edx,%eax,8)
 ; NARROW-NEXT:    incl (%esp)
-; NARROW-NEXT:  .LBB0_1: # %forcond
-; NARROW-NEXT:    # =>This Inner Loop Header: Depth=1
-; NARROW-NEXT:    movl (%esp), %eax
-; NARROW-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
-; NARROW-NEXT:    jl .LBB0_2
-; NARROW-NEXT:  # %bb.3: # %afterfor
+; NARROW-NEXT:    jmp .LBB0_1
+; NARROW-NEXT:  .LBB0_3: # %afterfor
 ; NARROW-NEXT:    addl $12, %esp
 ; NARROW-NEXT:    retl
 ;
@@ -46,9 +46,13 @@ define void @update(i64* %dst_i, i64* %s
 ; WIDE-NEXT:    pcmpeqd %xmm0, %xmm0
 ; WIDE-NEXT:    movdqa {{.*#+}} xmm1 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
 ; WIDE-NEXT:    movdqa {{.*#+}} xmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
-; WIDE-NEXT:    jmp .LBB0_1
 ; WIDE-NEXT:    .p2align 4, 0x90
-; WIDE-NEXT:  .LBB0_2: # %forbody
+; WIDE-NEXT:  .LBB0_1: # %forcond
+; WIDE-NEXT:    # =>This Inner Loop Header: Depth=1
+; WIDE-NEXT:    movl (%esp), %eax
+; WIDE-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
+; WIDE-NEXT:    jge .LBB0_3
+; WIDE-NEXT:  # %bb.2: # %forbody
 ; WIDE-NEXT:    # in Loop: Header=BB0_1 Depth=1
 ; WIDE-NEXT:    movl (%esp), %eax
 ; WIDE-NEXT:    leal (,%eax,8), %ecx
@@ -65,12 +69,8 @@ define void @update(i64* %dst_i, i64* %s
 ; WIDE-NEXT:    psubb %xmm2, %xmm3
 ; WIDE-NEXT:    movq %xmm3, (%edx,%eax,8)
 ; WIDE-NEXT:    incl (%esp)
-; WIDE-NEXT:  .LBB0_1: # %forcond
-; WIDE-NEXT:    # =>This Inner Loop Header: Depth=1
-; WIDE-NEXT:    movl (%esp), %eax
-; WIDE-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
-; WIDE-NEXT:    jl .LBB0_2
-; WIDE-NEXT:  # %bb.3: # %afterfor
+; WIDE-NEXT:    jmp .LBB0_1
+; WIDE-NEXT:  .LBB0_3: # %afterfor
 ; WIDE-NEXT:    addl $12, %esp
 ; WIDE-NEXT:    retl
 entry:

Modified: llvm/trunk/test/CodeGen/X86/x86-cmov-converter.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/x86-cmov-converter.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/x86-cmov-converter.ll (original)
+++ llvm/trunk/test/CodeGen/X86/x86-cmov-converter.ll Fri Jun 14 16:08:59 2019
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=x86_64-pc-linux -x86-cmov-converter=true -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap %s
+; RUN: llc -mtriple=x86_64-pc-linux -x86-cmov-converter=true -verify-machineinstrs -disable-block-placement < %s | FileCheck -allow-deprecated-dag-overlap %s
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; This test checks that x86-cmov-converter optimization transform CMOV

Modified: llvm/trunk/test/DebugInfo/X86/PR37234.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/DebugInfo/X86/PR37234.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/DebugInfo/X86/PR37234.ll (original)
+++ llvm/trunk/test/DebugInfo/X86/PR37234.ll Fri Jun 14 16:08:59 2019
@@ -21,18 +21,18 @@
 ; CHECK-LABEL: # %bb.{{.*}}:
 ; CHECK:        #DEBUG_VALUE: main:aa <- 0
 ; CHECK: 	#DEBUG_VALUE: main:aa <- $[[REG:[0-9a-z]+]]
-; CHECK: 	jmp	.LBB0_1
-; CHECK: .LBB0_2:
+; CHECK: .LBB0_1:
+; CHECK:        #DEBUG_VALUE: main:aa <- $[[REG]]
+; CHECK:        je      .LBB0_4
+; CHECK: # %bb.{{.*}}:
 ; CHECK:        #DEBUG_VALUE: main:aa <- $[[REG]]
 ; CHECK:        jne     .LBB0_1
 ; CHECK: # %bb.{{.*}}:
 ; CHECK:        #DEBUG_VALUE: main:aa <- $[[REG]]
 ; CHECK:        incl    %[[REG]]
 ; CHECK:        #DEBUG_VALUE: main:aa <- $[[REG]]
-; CHECK: .LBB0_1:
-; CHECK: 	#DEBUG_VALUE: main:aa <- $[[REG]]
-; CHECK:        jne     .LBB0_2
-; CHECK: # %bb.{{.*}}:
+; CHECK:        jmp     .LBB0_1
+; CHECK: .LBB0_4:
 ; CHECK: 	#DEBUG_VALUE: main:aa <- $[[REG]]
 ; CHECK: 	retq
 

Modified: llvm/trunk/test/DebugInfo/X86/dbg-value-transfer-order.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/DebugInfo/X86/dbg-value-transfer-order.ll?rev=363471&r1=363470&r2=363471&view=diff
==============================================================================
--- llvm/trunk/test/DebugInfo/X86/dbg-value-transfer-order.ll (original)
+++ llvm/trunk/test/DebugInfo/X86/dbg-value-transfer-order.ll Fri Jun 14 16:08:59 2019
@@ -24,6 +24,12 @@
 ; with the Orders insertion point vector.
 
 ; CHECK-LABEL: f: # @f
+; CHECK: .LBB0_4:
+;        Check that this DEBUG_VALUE comes before the left shift.
+; CHECK:         #DEBUG_VALUE: bit_offset <- $ecx
+; CHECK:         .cv_loc 0 1 8 28                # t.c:8:28
+; CHECK:         movl    $1, %[[reg:[^ ]*]]
+; CHECK:         shll    %cl, %[[reg]]
 ; CHECK: .LBB0_2:                                # %while.body
 ; CHECK:         movl    $32, %ecx
 ; CHECK:         testl   {{.*}}
@@ -31,12 +37,7 @@
 ; CHECK: # %bb.3:                                 # %if.then
 ; CHECK:         callq   if_then
 ; CHECK:         movl    %eax, %ecx
-; CHECK: .LBB0_4:                                # %if.end
-;        Check that this DEBUG_VALUE comes before the left shift.
-; CHECK:         #DEBUG_VALUE: bit_offset <- $ecx
-; CHECK:         .cv_loc 0 1 8 28                # t.c:8:28
-; CHECK:         movl    $1, %[[reg:[^ ]*]]
-; CHECK:         shll    %cl, %[[reg]]
+; CHECK:         jmp     .LBB0_4
 
 ; ModuleID = 't.c'
 source_filename = "t.c"




More information about the llvm-commits mailing list