[clang] [llvm] [IndVarSimplify] Sink unused l-invariant loads in preheader. (PR #157559)

Vigneshwar Jayakumar via cfe-commits cfe-commits at lists.llvm.org
Wed Sep 24 11:27:59 PDT 2025


https://github.com/VigneshwarJ updated https://github.com/llvm/llvm-project/pull/157559

>From 63d81f0c3e09d27cd048657d81f12a63bcb89796 Mon Sep 17 00:00:00 2001
From: vigneshwar jayakumar <vigneshwar.jayakumar at amd.com>
Date: Mon, 8 Sep 2025 16:24:50 -0500
Subject: [PATCH 1/3] [IndVarSimplify] Sink unused l-invariant loads in
 preheader.

Unused loop invariant loads were not sunk from preheader to exit block
which increased live range.

This commit sinks unused invariant loads from preheader to exit block
when the load's memory location is not modified within the loop body.
---
 llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 50 ++++++++++++---
 .../IndVarSimplify/sink-from-preheader.ll     | 61 +++++++++++++++++++
 .../Transforms/LoopSimplify/ashr-crash.ll     |  2 +-
 3 files changed, 102 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index c32731185afd0..61138e379607c 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -130,6 +130,7 @@ class IndVarSimplify {
   const DataLayout &DL;
   TargetLibraryInfo *TLI;
   const TargetTransformInfo *TTI;
+  AliasAnalysis *AA;
   std::unique_ptr<MemorySSAUpdater> MSSAU;
 
   SmallVector<WeakTrackingVH, 16> DeadInsts;
@@ -162,8 +163,9 @@ class IndVarSimplify {
 public:
   IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
                  const DataLayout &DL, TargetLibraryInfo *TLI,
-                 TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars)
-      : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI),
+                 TargetTransformInfo *TTI, AliasAnalysis *AA, MemorySSA *MSSA,
+                 bool WidenIndVars)
+      : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI), AA(AA),
         WidenIndVars(WidenIndVars) {
     if (MSSA)
       MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
@@ -1089,6 +1091,16 @@ bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
   if (!Preheader) return false;
 
   bool MadeAnyChanges = false;
+
+  // Collect all store instructions that may modify memory in the loop.
+  SmallPtrSet<Instruction *, 8> Stores;
+  for (BasicBlock *BB : L->blocks()) {
+    for (Instruction &I : *BB) {
+      if (I.mayWriteToMemory())
+        Stores.insert(&I);
+    }
+  }
+
   for (Instruction &I : llvm::make_early_inc_range(llvm::reverse(*Preheader))) {
 
     // Skip BB Terminator.
@@ -1100,14 +1112,32 @@ bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
       break;
 
     // Don't move instructions which might have side effects, since the side
-    // effects need to complete before instructions inside the loop.  Also don't
-    // move instructions which might read memory, since the loop may modify
-    // memory. Note that it's okay if the instruction might have undefined
-    // behavior: LoopSimplify guarantees that the preheader dominates the exit
-    // block.
-    if (I.mayHaveSideEffects() || I.mayReadFromMemory())
+    // effects need to complete before instructions inside the loop. Note that
+    // it's okay if the instruction might have undefined behavior: LoopSimplify
+    // guarantees that the preheader dominates the exit block.
+    if (I.mayHaveSideEffects())
       continue;
 
+    // Don't sink read instruction which's memory might be modified in the loop.
+    if (I.mayReadFromMemory()) {
+      if (LoadInst *Load = dyn_cast<LoadInst>(&I)) {
+        MemoryLocation Loc = MemoryLocation::get(Load);
+        bool isModified = false;
+
+        // Check if any store instruction in the loop modifies the loaded memory
+        // location.
+        for (Instruction *S : Stores) {
+          if (isModSet(AA->getModRefInfo(S, Loc))) {
+            isModified = true;
+            break;
+          }
+        }
+        if (isModified)
+          continue;
+      } else {
+        continue;
+      }
+    }
     // Skip debug or pseudo instructions.
     if (I.isDebugOrPseudoInst())
       continue;
@@ -2042,8 +2072,8 @@ PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
   Function *F = L.getHeader()->getParent();
   const DataLayout &DL = F->getDataLayout();
 
-  IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
-                     WidenIndVars && AllowIVWidening);
+  IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, &AR.AA,
+                     AR.MSSA, WidenIndVars && AllowIVWidening);
   if (!IVS.run(&L))
     return PreservedAnalyses::all();
 
diff --git a/llvm/test/Transforms/IndVarSimplify/sink-from-preheader.ll b/llvm/test/Transforms/IndVarSimplify/sink-from-preheader.ll
index 89583f9131518..8d714c775bde5 100644
--- a/llvm/test/Transforms/IndVarSimplify/sink-from-preheader.ll
+++ b/llvm/test/Transforms/IndVarSimplify/sink-from-preheader.ll
@@ -30,3 +30,64 @@ loop:
 exit:
   ret i32 %add
 }
+
+define i32 @test_with_unused_load(i32 %a, ptr %b, i32 %N) {
+; CHECK-LABEL: @test_with_unused_load(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[IV_NEXT]], [[N:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[B:%.*]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[A:%.*]], [[LOAD]]
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+entry:
+  %load = load i32, ptr %b
+  %add = add i32 %a, %load
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+  %iv.next = add i32 %iv, 1
+  %cmp = icmp slt i32 %iv.next, %N
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret i32 %add
+}
+
+; load's memory location is modified inside the loop, don't sink load.
+define i32 @test_with_unused_load_modified_store(i32 %a, ptr %b, i32 %N) {
+; CHECK-LABEL: @test_with_unused_load_modified_store(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[B:%.*]], align 4
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT:    store i32 [[IV_NEXT]], ptr [[B]], align 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[IV_NEXT]], [[N:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[A:%.*]], [[LOAD]]
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+entry:
+  %load = load i32, ptr %b
+  %add = add i32 %a, %load
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+  %iv.next = add i32 %iv, 1
+  store i32 %iv.next, ptr %b
+  %cmp = icmp slt i32 %iv.next, %N
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret i32 %add
+}
diff --git a/llvm/test/Transforms/LoopSimplify/ashr-crash.ll b/llvm/test/Transforms/LoopSimplify/ashr-crash.ll
index 85198eb19aad6..ae4f9adf8e16c 100644
--- a/llvm/test/Transforms/LoopSimplify/ashr-crash.ll
+++ b/llvm/test/Transforms/LoopSimplify/ashr-crash.ll
@@ -29,11 +29,11 @@ define void @foo() {
 ; CHECK-LABEL: define void @foo() {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    store i32 0, ptr @d, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr @c, align 4
 ; CHECK-NEXT:    br label [[FOR_COND1_PREHEADER:%.*]]
 ; CHECK:       for.cond1.preheader:
 ; CHECK-NEXT:    br label [[FOR_BODY3:%.*]]
 ; CHECK:       for.body3:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr @c, align 4
 ; CHECK-NEXT:    store i32 1, ptr @a, align 4
 ; CHECK-NEXT:    store i32 1, ptr @d, align 4
 ; CHECK-NEXT:    [[CMP4_LE_LE_INV:%.*]] = icmp sgt i32 [[TMP0]], 0

>From 61ad654c0b2c45e3235d065ab5c4162e79b23be2 Mon Sep 17 00:00:00 2001
From: vigneshwar jayakumar <vigneshwar.jayakumar at amd.com>
Date: Mon, 8 Sep 2025 18:44:18 -0500
Subject: [PATCH 2/3] fix tests

---
 clang/test/OpenMP/bug56913.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/clang/test/OpenMP/bug56913.c b/clang/test/OpenMP/bug56913.c
index fad9e17ac4dd8..8409f2933922b 100644
--- a/clang/test/OpenMP/bug56913.c
+++ b/clang/test/OpenMP/bug56913.c
@@ -19,12 +19,12 @@ void loop(int n) {
 // CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[N]], 0
 // CHECK-NEXT:    br i1 [[CMP]], label [[SIMD_IF_THEN:%.*]], label [[SIMD_IF_END:%.*]]
 // CHECK:       simd.if.then:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr @j, align 4, !tbaa [[TBAA2:![0-9]+]]
 // CHECK-NEXT:    call void @llvm.lifetime.start.p0(ptr nonnull [[J]]) #[[ATTR2:[0-9]+]]
-// CHECK-NEXT:    store ptr [[J]], ptr @u, align 8, !tbaa [[TBAA6:![0-9]+]], !llvm.access.group [[ACC_GRP8:![0-9]+]]
+// CHECK-NEXT:    store ptr [[J]], ptr @u, align 8, !tbaa [[TBAA2:![0-9]+]], !llvm.access.group [[ACC_GRP7:![0-9]+]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr @j, align 4, !tbaa [[TBAA8:![0-9]+]]
 // CHECK-NEXT:    [[INC_LE:%.*]] = add i32 [[TMP0]], [[N]]
-// CHECK-NEXT:    store i32 [[INC_LE]], ptr [[J]], align 4, !tbaa [[TBAA2]]
-// CHECK-NEXT:    store i32 [[INC_LE]], ptr @j, align 4, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store i32 [[INC_LE]], ptr [[J]], align 4, !tbaa [[TBAA8]]
+// CHECK-NEXT:    store i32 [[INC_LE]], ptr @j, align 4, !tbaa [[TBAA8]]
 // CHECK-NEXT:    call void @llvm.lifetime.end.p0(ptr nonnull [[J]]) #[[ATTR2]]
 // CHECK-NEXT:    br label [[SIMD_IF_END]]
 // CHECK:       simd.if.end:

>From fe37acbd4cc96118944f6dbb297694d1729ceabc Mon Sep 17 00:00:00 2001
From: vigneshwar jayakumar <vigneshwar.jayakumar at amd.com>
Date: Wed, 24 Sep 2025 13:27:15 -0500
Subject: [PATCH 3/3] moved the code to LICM

---
 clang/test/OpenMP/bug56913.c                  |  12 +-
 llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 113 -----------
 llvm/lib/Transforms/Scalar/LICM.cpp           | 109 ++++++++++
 .../AMDGPU/schedule-amdgpu-trackers.ll        |   6 +-
 .../AMDGPU/addrspace-7-doesnt-crash.ll        |   2 +-
 .../X86/inner-loop-by-latch-cond.ll           |   2 +-
 .../IndVarSimplify/exit-count-select.ll       |  14 +-
 .../IndVarSimplify/finite-exit-comparisons.ll |   6 +-
 .../Transforms/IndVarSimplify/pr116483.ll     |   8 +-
 .../test/Transforms/IndVarSimplify/pr24783.ll |   2 +-
 .../test/Transforms/IndVarSimplify/pr39673.ll |   2 +-
 .../test/Transforms/IndVarSimplify/pr63763.ll |   6 +-
 .../IndVarSimplify/replace-loop-exit-folds.ll |  21 +-
 .../rewrite-loop-exit-values-phi.ll           |   8 +-
 .../scev-expander-preserve-lcssa.ll           |  14 +-
 .../IndVarSimplify/scev-invalidation.ll       |   4 +-
 .../Transforms/IndVarSimplify/sentinel.ll     |  14 +-
 .../IndVarSimplify/sink-from-preheader.ll     |  93 ---------
 .../IndVarSimplify/sink-trapping.ll           |  19 --
 .../Transforms/IndVarSimplify/zext-nuw.ll     |   2 +-
 llvm/test/Transforms/LICM/scalar-promote.ll   |   6 +-
 .../Transforms/LICM/sink-from-preheader.ll    | 187 ++++++++++++++++++
 llvm/test/Transforms/LICM/sink-trapping.ll    |  19 ++
 .../invalidate-scev-after-hoisting.ll         |   2 +-
 .../LoopDistribute/laa-invalidation.ll        |   2 +-
 .../Transforms/LoopSimplify/ashr-crash.ll     |   2 +-
 .../invariant-store-vectorization.ll          |   2 +-
 .../X86/pr48844-br-to-switch-vectorization.ll |  10 +-
 28 files changed, 388 insertions(+), 299 deletions(-)
 delete mode 100644 llvm/test/Transforms/IndVarSimplify/sink-from-preheader.ll
 delete mode 100644 llvm/test/Transforms/IndVarSimplify/sink-trapping.ll
 create mode 100644 llvm/test/Transforms/LICM/sink-from-preheader.ll
 create mode 100644 llvm/test/Transforms/LICM/sink-trapping.ll

diff --git a/clang/test/OpenMP/bug56913.c b/clang/test/OpenMP/bug56913.c
index 945fe4834d5cd..fa5e46d30ae85 100644
--- a/clang/test/OpenMP/bug56913.c
+++ b/clang/test/OpenMP/bug56913.c
@@ -17,14 +17,14 @@ void loop(int n) {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[J:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[N]], 0
-// CHECK-NEXT:    br i1 [[CMP]], label [[SIMD_IF_THEN:%.*]], label [[SIMD_IF_END:%.*]]
-// CHECK:       simd.if.then:
+// CHECK-NEXT:    br i1 [[CMP]], label %[[SIMD_IF_THEN:.*]], label %[[SIMD_IF_END:.*]]
+// CHECK:       [[SIMD_IF_THEN]]:
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr @j, align 4, !tbaa [[INT_TBAA2:![0-9]+]]
 // CHECK-NEXT:    call void @llvm.lifetime.start.p0(ptr nonnull [[J]]) #[[ATTR2:[0-9]+]]
-// CHECK-NEXT:    store ptr [[J]], ptr @u, align 8, !tbaa [[TBAA2:![0-9]+]], !llvm.access.group [[ACC_GRP7:![0-9]+]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr @j, align 4, !tbaa [[TBAA8:![0-9]+]]
+// CHECK-NEXT:    store ptr [[J]], ptr @u, align 8, !tbaa [[INTPTR_TBAA6:![0-9]+]], !llvm.access.group [[ACC_GRP9:![0-9]+]]
 // CHECK-NEXT:    [[INC_LE:%.*]] = add i32 [[TMP0]], [[N]]
-// CHECK-NEXT:    store i32 [[INC_LE]], ptr [[J]], align 4, !tbaa [[TBAA8]]
-// CHECK-NEXT:    store i32 [[INC_LE]], ptr @j, align 4, !tbaa [[TBAA8]]
+// CHECK-NEXT:    store i32 [[INC_LE]], ptr [[J]], align 4, !tbaa [[INT_TBAA2]]
+// CHECK-NEXT:    store i32 [[INC_LE]], ptr @j, align 4, !tbaa [[INT_TBAA2]]
 // CHECK-NEXT:    call void @llvm.lifetime.end.p0(ptr nonnull [[J]]) #[[ATTR2]]
 // CHECK-NEXT:    br label %[[SIMD_IF_END]]
 // CHECK:       [[SIMD_IF_END]]:
diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 61138e379607c..4d60c9cf6755b 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -158,8 +158,6 @@ class IndVarSimplify {
                                  const SCEV *ExitCount,
                                  PHINode *IndVar, SCEVExpander &Rewriter);
 
-  bool sinkUnusedInvariants(Loop *L);
-
 public:
   IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
                  const DataLayout &DL, TargetLibraryInfo *TLI,
@@ -1076,113 +1074,6 @@ linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
   return true;
 }
 
-//===----------------------------------------------------------------------===//
-//  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
-//===----------------------------------------------------------------------===//
-
-/// If there's a single exit block, sink any loop-invariant values that
-/// were defined in the preheader but not used inside the loop into the
-/// exit block to reduce register pressure in the loop.
-bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
-  BasicBlock *ExitBlock = L->getExitBlock();
-  if (!ExitBlock) return false;
-
-  BasicBlock *Preheader = L->getLoopPreheader();
-  if (!Preheader) return false;
-
-  bool MadeAnyChanges = false;
-
-  // Collect all store instructions that may modify memory in the loop.
-  SmallPtrSet<Instruction *, 8> Stores;
-  for (BasicBlock *BB : L->blocks()) {
-    for (Instruction &I : *BB) {
-      if (I.mayWriteToMemory())
-        Stores.insert(&I);
-    }
-  }
-
-  for (Instruction &I : llvm::make_early_inc_range(llvm::reverse(*Preheader))) {
-
-    // Skip BB Terminator.
-    if (Preheader->getTerminator() == &I)
-      continue;
-
-    // New instructions were inserted at the end of the preheader.
-    if (isa<PHINode>(I))
-      break;
-
-    // Don't move instructions which might have side effects, since the side
-    // effects need to complete before instructions inside the loop. Note that
-    // it's okay if the instruction might have undefined behavior: LoopSimplify
-    // guarantees that the preheader dominates the exit block.
-    if (I.mayHaveSideEffects())
-      continue;
-
-    // Don't sink read instruction which's memory might be modified in the loop.
-    if (I.mayReadFromMemory()) {
-      if (LoadInst *Load = dyn_cast<LoadInst>(&I)) {
-        MemoryLocation Loc = MemoryLocation::get(Load);
-        bool isModified = false;
-
-        // Check if any store instruction in the loop modifies the loaded memory
-        // location.
-        for (Instruction *S : Stores) {
-          if (isModSet(AA->getModRefInfo(S, Loc))) {
-            isModified = true;
-            break;
-          }
-        }
-        if (isModified)
-          continue;
-      } else {
-        continue;
-      }
-    }
-    // Skip debug or pseudo instructions.
-    if (I.isDebugOrPseudoInst())
-      continue;
-
-    // Skip eh pad instructions.
-    if (I.isEHPad())
-      continue;
-
-    // Don't sink alloca: we never want to sink static alloca's out of the
-    // entry block, and correctly sinking dynamic alloca's requires
-    // checks for stacksave/stackrestore intrinsics.
-    // FIXME: Refactor this check somehow?
-    if (isa<AllocaInst>(&I))
-      continue;
-
-    // Determine if there is a use in or before the loop (direct or
-    // otherwise).
-    bool UsedInLoop = false;
-    for (Use &U : I.uses()) {
-      Instruction *User = cast<Instruction>(U.getUser());
-      BasicBlock *UseBB = User->getParent();
-      if (PHINode *P = dyn_cast<PHINode>(User)) {
-        unsigned i =
-          PHINode::getIncomingValueNumForOperand(U.getOperandNo());
-        UseBB = P->getIncomingBlock(i);
-      }
-      if (UseBB == Preheader || L->contains(UseBB)) {
-        UsedInLoop = true;
-        break;
-      }
-    }
-
-    // If there is, the def must remain in the preheader.
-    if (UsedInLoop)
-      continue;
-
-    // Otherwise, sink it to the exit block.
-    I.moveBefore(ExitBlock->getFirstInsertionPt());
-    SE->forgetValue(&I);
-    MadeAnyChanges = true;
-  }
-
-  return MadeAnyChanges;
-}
-
 static void replaceExitCond(BranchInst *BI, Value *NewCond,
                             SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
   auto *OldCond = BI->getCondition();
@@ -2045,10 +1936,6 @@ bool IndVarSimplify::run(Loop *L) {
 
   // The Rewriter may not be used from this point on.
 
-  // Loop-invariant instructions in the preheader that aren't used in the
-  // loop may be sunk below the loop to reduce register pressure.
-  Changed |= sinkUnusedInvariants(L);
-
   // rewriteFirstIterationLoopExitValues does not rely on the computation of
   // trip count and therefore can further simplify exit values in addition to
   // rewriteLoopExitValues.
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index bab1f2a90a8fd..da07a2f0e2c45 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -38,6 +38,7 @@
 
 #include "llvm/Transforms/Scalar/LICM.h"
 #include "llvm/ADT/PriorityWorklist.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetOperations.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
@@ -213,6 +214,10 @@ static void moveInstructionBefore(Instruction &I, BasicBlock::iterator Dest,
                                   ICFLoopSafetyInfo &SafetyInfo,
                                   MemorySSAUpdater &MSSAU, ScalarEvolution *SE);
 
+static bool sinkUnusedInvariantsFromPreheaderToExit(
+    Loop *L, AAResults *AA, ICFLoopSafetyInfo *SafetyInfo,
+    MemorySSAUpdater &MSSAU, ScalarEvolution *SE);
+
 static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L,
                                 function_ref<void(Instruction *)> Fn);
 using PointersAndHasReadsOutsideSet =
@@ -546,6 +551,12 @@ bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI,
 
   if (Changed && SE)
     SE->forgetLoopDispositions();
+
+  // sink pre-header defs that are unused in-loop into the unique exit to reduce
+  // pressure.
+  Changed |=
+      sinkUnusedInvariantsFromPreheaderToExit(L, AA, &SafetyInfo, MSSAU, SE);
+
   return Changed;
 }
 
@@ -1469,6 +1480,104 @@ static void moveInstructionBefore(Instruction &I, BasicBlock::iterator Dest,
     SE->forgetBlockAndLoopDispositions(&I);
 }
 
+// If there's a single exit block, sink any loop-invariant values that were
+// defined in the preheader but not used inside the loop into the exit block
+// to reduce register pressure in the loop.
+static bool sinkUnusedInvariantsFromPreheaderToExit(
+    Loop *L, AAResults *AA, ICFLoopSafetyInfo *SafetyInfo,
+    MemorySSAUpdater &MSSAU, ScalarEvolution *SE) {
+  BasicBlock *ExitBlock = L->getExitBlock();
+  if (!ExitBlock)
+    return false;
+
+  BasicBlock *Preheader = L->getLoopPreheader();
+  if (!Preheader)
+    return false;
+
+  bool MadeAnyChanges = false;
+
+  for (Instruction &I : llvm::make_early_inc_range(llvm::reverse(*Preheader))) {
+
+    // Skip terminator.
+    if (Preheader->getTerminator() == &I)
+      continue;
+
+    // New instructions were inserted at the end of the preheader.
+    if (isa<PHINode>(I))
+      break;
+
+    // Don't move instructions which might have side effects, since the side
+    // effects need to complete before instructions inside the loop. Note that
+    // it's okay if the instruction might have undefined behavior: LoopSimplify
+    // guarantees that the preheader dominates the exit block.
+    if (I.mayHaveSideEffects())
+      continue;
+
+    // Don't sink reads unless MemorySSA says the location is not clobbered
+    // within the loop.
+    if (I.mayReadFromMemory()) {
+      auto *LI = dyn_cast<LoadInst>(&I);
+      // Only handle loads here; be conservative for other memory-reading ops.
+      if (!LI)
+        continue;
+      // Do not move volatile or ordered/atomic loads.
+      if (!LI->isUnordered() || LI->isAtomic())
+        continue;
+
+      MemorySSA *MSSA = MSSAU.getMemorySSA();
+      auto *MU = dyn_cast_or_null<MemoryUse>(MSSA->getMemoryAccess(LI));
+      if (!MU)
+        // Conservatively skip if MSSA has no use for this load.
+        continue;
+
+      SinkAndHoistLICMFlags Flags(SetLicmMssaOptCap,
+                                  SetLicmMssaNoAccForPromotionCap,
+                                  /*IsSink=*/true, *L, *MSSA);
+      bool InvariantGroup = LI->hasMetadata(LLVMContext::MD_invariant_group);
+
+      // If the pointer could be invalidated in the loop, do not sink.
+      if (pointerInvalidatedByLoop(MSSA, MU, L, I, Flags, InvariantGroup))
+        continue;
+    }
+
+    // Skip debug/pseudo and EH pad.
+    if (I.isDebugOrPseudoInst() || I.isEHPad())
+      continue;
+
+    // Don't sink alloca: we never want to sink static alloca's out of the
+    // entry block, and correctly sinking dynamic alloca's requires
+    // checks for stacksave/stackrestore intrinsics.
+    // FIXME: Refactor this check somehow?
+    if (isa<AllocaInst>(&I))
+      continue;
+
+    // Determine if there is a use in or before the loop (direct or
+    // otherwise).
+    bool UsedInLoopOrPreheader = false;
+    for (Use &U : I.uses()) {
+      auto *UserI = cast<Instruction>(U.getUser());
+      BasicBlock *UseBB = UserI->getParent();
+      if (auto *PN = dyn_cast<PHINode>(UserI)) {
+        unsigned OpIdx =
+            PHINode::getIncomingValueNumForOperand(U.getOperandNo());
+        UseBB = PN->getIncomingBlock(OpIdx);
+      }
+      if (UseBB == Preheader || L->contains(UseBB)) {
+        UsedInLoopOrPreheader = true;
+        break;
+      }
+    }
+    if (UsedInLoopOrPreheader)
+      continue;
+
+    moveInstructionBefore(I, ExitBlock->getFirstInsertionPt(), *SafetyInfo,
+                          MSSAU, SE);
+    MadeAnyChanges = true;
+  }
+
+  return MadeAnyChanges;
+}
+
 static Instruction *sinkThroughTriviallyReplaceablePHI(
     PHINode *TPN, Instruction *I, LoopInfo *LI,
     SmallDenseMap<BasicBlock *, Instruction *, 32> &SunkCopies,
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-trackers.ll b/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-trackers.ll
index c5732531f5423..48ed5c4dedfb2 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-trackers.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-trackers.ll
@@ -73,10 +73,10 @@ define amdgpu_kernel void @constant_zextload_v64i16_to_v64i32(ptr addrspace(1) %
 }
 
 ; CHECK-LABEL: {{^}}excess_soft_clause_reg_pressure:
-; GFX908:    NumSgprs: 64
-; GFX908-GCNTRACKERS:    NumSgprs: 64
+; GFX908:    NumSgprs: 56
+; GFX908-GCNTRACKERS:    NumSgprs: 56
 ; GFX908:    NumVgprs: 43
-; GFX908-GCNTRACKERS:    NumVgprs: 39
+; GFX908-GCNTRACKERS:    NumVgprs: 40
 ; GFX908:    Occupancy: 5
 ; GFX908-GCNTRACKERS:    Occupancy: 6
 
diff --git a/llvm/test/Transforms/IndVarSimplify/AMDGPU/addrspace-7-doesnt-crash.ll b/llvm/test/Transforms/IndVarSimplify/AMDGPU/addrspace-7-doesnt-crash.ll
index 08dcf1d7a0091..8e932e0c00d4f 100644
--- a/llvm/test/Transforms/IndVarSimplify/AMDGPU/addrspace-7-doesnt-crash.ll
+++ b/llvm/test/Transforms/IndVarSimplify/AMDGPU/addrspace-7-doesnt-crash.ll
@@ -7,11 +7,11 @@ define void @f(ptr addrspace(7) %arg) {
 ; CHECK-LABEL: define void @f
 ; CHECK-SAME: (ptr addrspace(7) [[ARG:%.*]]) {
 ; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr addrspace(7) [[ARG]], i32 8
 ; CHECK-NEXT:    br label [[BB1:%.*]]
 ; CHECK:       bb1:
 ; CHECK-NEXT:    br i1 false, label [[BB2:%.*]], label [[BB1]]
 ; CHECK:       bb2:
-; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr addrspace(7) [[ARG]], i32 8
 ; CHECK-NEXT:    br label [[BB3:%.*]]
 ; CHECK:       bb3:
 ; CHECK-NEXT:    [[I4:%.*]] = load i32, ptr addrspace(7) [[SCEVGEP]], align 4
diff --git a/llvm/test/Transforms/IndVarSimplify/X86/inner-loop-by-latch-cond.ll b/llvm/test/Transforms/IndVarSimplify/X86/inner-loop-by-latch-cond.ll
index 0fa6e34cf186e..0eb9debce8177 100644
--- a/llvm/test/Transforms/IndVarSimplify/X86/inner-loop-by-latch-cond.ll
+++ b/llvm/test/Transforms/IndVarSimplify/X86/inner-loop-by-latch-cond.ll
@@ -14,6 +14,7 @@ define void @test(i64 %a) {
 ; CHECK:       outer_header:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[OUTER_LATCH:%.*]] ], [ 21, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[I:%.*]] = phi i64 [ 20, [[ENTRY]] ], [ [[I_NEXT:%.*]], [[OUTER_LATCH]] ]
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
 ; CHECK-NEXT:    br label [[INNER_HEADER:%.*]]
 ; CHECK:       inner_header:
 ; CHECK-NEXT:    [[J:%.*]] = phi i64 [ 1, [[OUTER_HEADER]] ], [ [[J_NEXT:%.*]], [[INNER_HEADER]] ]
@@ -22,7 +23,6 @@ define void @test(i64 %a) {
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[J_NEXT]], [[INDVARS_IV]]
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[INNER_HEADER]], label [[OUTER_LATCH]]
 ; CHECK:       outer_latch:
-; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
 ; CHECK-NEXT:    [[COND2:%.*]] = icmp ne i64 [[I_NEXT]], 40
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    br i1 [[COND2]], label [[OUTER_HEADER]], label [[RETURN:%.*]]
diff --git a/llvm/test/Transforms/IndVarSimplify/exit-count-select.ll b/llvm/test/Transforms/IndVarSimplify/exit-count-select.ll
index 1592b84480e3f..829092f2f4bd4 100644
--- a/llvm/test/Transforms/IndVarSimplify/exit-count-select.ll
+++ b/llvm/test/Transforms/IndVarSimplify/exit-count-select.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=indvars -S | FileCheck %s
+; RUN: opt < %s -passes='require<scalar-evolution>,indvars,loop-mssa(licm)' -S | FileCheck %s
 
 define i32 @logical_and_2ops(i32 %n, i32 %m) {
 ; CHECK-LABEL: @logical_and_2ops(
@@ -56,10 +56,10 @@ define i32 @logical_and_3ops(i32 %n, i32 %m, i32 %k) {
 ; CHECK:       loop:
 ; CHECK-NEXT:    br i1 false, label [[LOOP]], label [[EXIT:%.*]]
 ; CHECK:       exit:
-; CHECK-NEXT:    [[TMP0:%.*]] = freeze i32 [[K:%.*]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = freeze i32 [[M:%.*]]
-; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[TMP1]])
-; CHECK-NEXT:    [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[N:%.*]])
+; CHECK-NEXT:    [[N:%.*]] = freeze i32 [[K:%.*]]
+; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[N]])
+; CHECK-NEXT:    [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[N1:%.*]])
 ; CHECK-NEXT:    ret i32 [[UMIN1]]
 ;
 entry:
@@ -84,10 +84,10 @@ define i32 @logical_or_3ops(i32 %n, i32 %m, i32 %k) {
 ; CHECK:       loop:
 ; CHECK-NEXT:    br i1 true, label [[EXIT:%.*]], label [[LOOP]]
 ; CHECK:       exit:
-; CHECK-NEXT:    [[TMP0:%.*]] = freeze i32 [[K:%.*]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = freeze i32 [[M:%.*]]
-; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[TMP1]])
-; CHECK-NEXT:    [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[N:%.*]])
+; CHECK-NEXT:    [[N:%.*]] = freeze i32 [[K:%.*]]
+; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[N]])
+; CHECK-NEXT:    [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[N1:%.*]])
 ; CHECK-NEXT:    ret i32 [[UMIN1]]
 ;
 entry:
diff --git a/llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll b/llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll
index e006d9f6696ca..f798eb281f51a 100644
--- a/llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll
+++ b/llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll
@@ -932,6 +932,9 @@ for.end:                                          ; preds = %for.body, %entry
 define i16 @ult_multiuse_profit(i16 %n.raw, i8 %start) mustprogress {
 ; CHECK-LABEL: @ult_multiuse_profit(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP2:%.*]] = add i8 [[START:%.*]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[TMP2]] to i16
+; CHECK-NEXT:    [[UMAX:%.*]] = call i16 @llvm.umax.i16(i16 [[TMP1]], i16 254)
 ; CHECK-NEXT:    [[TMP0:%.*]] = trunc i16 254 to i8
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
@@ -940,9 +943,6 @@ define i16 @ult_multiuse_profit(i16 %n.raw, i8 %start) mustprogress {
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[IV_NEXT]], [[TMP0]]
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
 ; CHECK:       for.end:
-; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[START:%.*]], 1
-; CHECK-NEXT:    [[TMP2:%.*]] = zext i8 [[TMP1]] to i16
-; CHECK-NEXT:    [[UMAX:%.*]] = call i16 @llvm.umax.i16(i16 [[TMP2]], i16 254)
 ; CHECK-NEXT:    ret i16 [[UMAX]]
 ;
 entry:
diff --git a/llvm/test/Transforms/IndVarSimplify/pr116483.ll b/llvm/test/Transforms/IndVarSimplify/pr116483.ll
index 093e25a3caa81..e9e0d22bf960a 100644
--- a/llvm/test/Transforms/IndVarSimplify/pr116483.ll
+++ b/llvm/test/Transforms/IndVarSimplify/pr116483.ll
@@ -4,16 +4,16 @@
 define i32 @test() {
 ; CHECK-LABEL: define i32 @test() {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    br label %[[LOOP_BODY:.*]]
-; CHECK:       [[LOOP_BODY]]:
-; CHECK-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[LOOP_BODY]]
-; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    [[XOR:%.*]] = xor i32 0, 3
 ; CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[XOR]], 329
 ; CHECK-NEXT:    [[CONV:%.*]] = trunc i32 [[MUL]] to i16
 ; CHECK-NEXT:    [[SEXT:%.*]] = shl i16 [[CONV]], 8
 ; CHECK-NEXT:    [[CONV1:%.*]] = ashr i16 [[SEXT]], 8
 ; CHECK-NEXT:    [[CONV3:%.*]] = zext i16 [[CONV1]] to i32
+; CHECK-NEXT:    br label %[[LOOP_BODY:.*]]
+; CHECK:       [[LOOP_BODY]]:
+; CHECK-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[LOOP_BODY]]
+; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    ret i32 [[CONV3]]
 ;
 entry:
diff --git a/llvm/test/Transforms/IndVarSimplify/pr24783.ll b/llvm/test/Transforms/IndVarSimplify/pr24783.ll
index c521bcaf59d49..37ecf42ea0fd3 100644
--- a/llvm/test/Transforms/IndVarSimplify/pr24783.ll
+++ b/llvm/test/Transforms/IndVarSimplify/pr24783.ll
@@ -7,11 +7,11 @@ target triple = "powerpc64-unknown-linux-gnu"
 define void @f(ptr %end.s, ptr %loc, i32 %p) {
 ; CHECK-LABEL: @f(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[END:%.*]] = getelementptr inbounds i32, ptr [[END_S:%.*]], i32 [[P:%.*]]
 ; CHECK-NEXT:    br label [[WHILE_BODY_I:%.*]]
 ; CHECK:       while.body.i:
 ; CHECK-NEXT:    br i1 true, label [[LOOP_EXIT:%.*]], label [[WHILE_BODY_I]]
 ; CHECK:       loop.exit:
-; CHECK-NEXT:    [[END:%.*]] = getelementptr inbounds i32, ptr [[END_S:%.*]], i32 [[P:%.*]]
 ; CHECK-NEXT:    store ptr [[END]], ptr [[LOC:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/IndVarSimplify/pr39673.ll b/llvm/test/Transforms/IndVarSimplify/pr39673.ll
index 7b093b34b91ad..3cee1ab7be881 100644
--- a/llvm/test/Transforms/IndVarSimplify/pr39673.ll
+++ b/llvm/test/Transforms/IndVarSimplify/pr39673.ll
@@ -148,6 +148,7 @@ loop2.end:                                       ; preds = %loop2
 define i16 @neg_loop_carried(i16 %arg) {
 ; CHECK-LABEL: @neg_loop_carried(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = add i16 [[ARG:%.*]], 2
 ; CHECK-NEXT:    br label [[LOOP1:%.*]]
 ; CHECK:       loop1:
 ; CHECK-NEXT:    [[L1:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[L1_ADD:%.*]], [[LOOP1]] ]
@@ -155,7 +156,6 @@ define i16 @neg_loop_carried(i16 %arg) {
 ; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult i16 [[L1_ADD]], 2
 ; CHECK-NEXT:    br i1 [[CMP1]], label [[LOOP1]], label [[LOOP2_PREHEADER:%.*]]
 ; CHECK:       loop2.preheader:
-; CHECK-NEXT:    [[TMP0:%.*]] = add i16 [[ARG:%.*]], 2
 ; CHECK-NEXT:    br label [[LOOP2:%.*]]
 ; CHECK:       loop2:
 ; CHECK-NEXT:    [[K2:%.*]] = phi i16 [ [[K2_ADD:%.*]], [[LOOP2]] ], [ [[TMP0]], [[LOOP2_PREHEADER]] ]
diff --git a/llvm/test/Transforms/IndVarSimplify/pr63763.ll b/llvm/test/Transforms/IndVarSimplify/pr63763.ll
index 427db1e67410a..a5fde67d6140a 100644
--- a/llvm/test/Transforms/IndVarSimplify/pr63763.ll
+++ b/llvm/test/Transforms/IndVarSimplify/pr63763.ll
@@ -16,13 +16,13 @@ define i32 @test(i1 %c) {
 ; CHECK-NEXT:    [[CONV2:%.*]] = ashr exact i32 [[SEXT]], 24
 ; CHECK-NEXT:    [[INVARIANT_OP:%.*]] = sub nsw i32 7, [[CONV2]]
 ; CHECK-NEXT:    call void @use(i32 [[INVARIANT_OP]])
+; CHECK-NEXT:    [[SEXT_US:%.*]] = shl i32 [[SEL]], 24
+; CHECK-NEXT:    [[CONV2_US:%.*]] = ashr exact i32 [[SEXT_US]], 24
+; CHECK-NEXT:    [[INVARIANT_OP_US:%.*]] = sub nsw i32 7, [[CONV2_US]]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    br i1 true, label [[EXIT:%.*]], label [[LOOP]]
 ; CHECK:       exit:
-; CHECK-NEXT:    [[SEXT_US:%.*]] = shl i32 [[SEL]], 24
-; CHECK-NEXT:    [[CONV2_US:%.*]] = ashr exact i32 [[SEXT_US]], 24
-; CHECK-NEXT:    [[INVARIANT_OP_US:%.*]] = sub nsw i32 7, [[CONV2_US]]
 ; CHECK-NEXT:    ret i32 [[INVARIANT_OP_US]]
 ;
 entry:
diff --git a/llvm/test/Transforms/IndVarSimplify/replace-loop-exit-folds.ll b/llvm/test/Transforms/IndVarSimplify/replace-loop-exit-folds.ll
index b3162de0f2245..7cdc98a6c4f7c 100644
--- a/llvm/test/Transforms/IndVarSimplify/replace-loop-exit-folds.ll
+++ b/llvm/test/Transforms/IndVarSimplify/replace-loop-exit-folds.ll
@@ -4,22 +4,21 @@
 target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
 
 define i32 @remove_loop(i32 %size) {
-; CHECK-LABEL: define i32 @remove_loop(
-; CHECK-SAME: i32 [[SIZE:%.*]]) {
-; CHECK-NEXT:  [[ENTRY:.*]]:
-; CHECK-NEXT:    br label %[[WHILE_COND:.*]]
-; CHECK:       [[WHILE_COND]]:
-; CHECK-NEXT:    [[SIZE_ADDR_0:%.*]] = phi i32 [ [[SIZE]], %[[ENTRY]] ], [ [[SUB:%.*]], %[[WHILE_COND]] ]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[SIZE_ADDR_0]], 31
-; CHECK-NEXT:    [[SUB]] = add i32 [[SIZE_ADDR_0]], -32
-; CHECK-NEXT:    br i1 [[CMP]], label %[[WHILE_COND]], label %[[WHILE_END:.*]]
-; CHECK:       [[WHILE_END]]:
-; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[SIZE]], 31
+; CHECK-LABEL: @remove_loop(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[SIZE:%.*]], 31
 ; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SIZE]], i32 31)
 ; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[TMP0]], [[UMIN]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 [[TMP1]], 5
 ; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 5
 ; CHECK-NEXT:    [[TMP4:%.*]] = sub i32 [[SIZE]], [[TMP3]]
+; CHECK-NEXT:    br label [[WHILE_COND:%.*]]
+; CHECK:       while.cond:
+; CHECK-NEXT:    [[SIZE_ADDR_0:%.*]] = phi i32 [ [[SIZE]], [[ENTRY:%.*]] ], [ [[SUB:%.*]], [[WHILE_COND]] ]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[SIZE_ADDR_0]], 31
+; CHECK-NEXT:    [[SUB]] = add i32 [[SIZE_ADDR_0]], -32
+; CHECK-NEXT:    br i1 [[CMP]], label [[WHILE_COND]], label [[WHILE_END:%.*]]
+; CHECK:       while.end:
 ; CHECK-NEXT:    ret i32 [[TMP4]]
 ;
 entry:
diff --git a/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll b/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll
index 84ae79d53e25e..41fce3681c3a3 100644
--- a/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll
+++ b/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll
@@ -76,6 +76,10 @@ define i64 @narow_canonical_iv_wide_multiplied_iv(i32 %x, i64 %y, ptr %0) {
 ; CHECK-LABEL: @narow_canonical_iv_wide_multiplied_iv(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[SMAX:%.*]] = tail call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 1)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext nneg i32 [[SMAX]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = mul i64 [[Y:%.*]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = shl i64 [[TMP2]], 1
+; CHECK-NEXT:    [[TMP6:%.*]] = add nuw nsw i64 [[TMP3]], 1
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
@@ -84,10 +88,6 @@ define i64 @narow_canonical_iv_wide_multiplied_iv(i32 %x, i64 %y, ptr %0) {
 ; CHECK-NEXT:    [[EC:%.*]] = icmp ne i32 [[IV_NEXT]], [[SMAX]]
 ; CHECK-NEXT:    br i1 [[EC]], label [[LOOP]], label [[EXIT:%.*]]
 ; CHECK:       exit:
-; CHECK-NEXT:    [[TMP1:%.*]] = zext nneg i32 [[SMAX]] to i64
-; CHECK-NEXT:    [[TMP2:%.*]] = mul i64 [[Y:%.*]], [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = shl i64 [[TMP2]], 1
-; CHECK-NEXT:    [[TMP6:%.*]] = add nuw nsw i64 [[TMP3]], 1
 ; CHECK-NEXT:    ret i64 [[TMP6]]
 ;
 entry:
diff --git a/llvm/test/Transforms/IndVarSimplify/scev-expander-preserve-lcssa.ll b/llvm/test/Transforms/IndVarSimplify/scev-expander-preserve-lcssa.ll
index 14e06fe06b412..aca553e536119 100644
--- a/llvm/test/Transforms/IndVarSimplify/scev-expander-preserve-lcssa.ll
+++ b/llvm/test/Transforms/IndVarSimplify/scev-expander-preserve-lcssa.ll
@@ -23,8 +23,8 @@ define void @test1(i8 %x, ptr %ptr) {
 ; CHECK-NEXT:    br label [[WHILE_COND192:%.*]]
 ; CHECK:       while.cond192:
 ; CHECK-NEXT:    switch i8 [[X:%.*]], label [[WHILE_BODY205:%.*]] [
-; CHECK-NEXT:    i8 59, label [[WHILE_COND215_PREHEADER:%.*]]
-; CHECK-NEXT:    i8 10, label [[IF_END224_LOOPEXIT1:%.*]]
+; CHECK-NEXT:      i8 59, label [[WHILE_COND215_PREHEADER:%.*]]
+; CHECK-NEXT:      i8 10, label [[IF_END224_LOOPEXIT1:%.*]]
 ; CHECK-NEXT:    ]
 ; CHECK:       while.cond215.preheader:
 ; CHECK-NEXT:    br label [[WHILE_COND215:%.*]]
@@ -103,8 +103,8 @@ define void @test2(i16 %x)  {
 ; CHECK-NEXT:    br label [[FOR_COND:%.*]]
 ; CHECK:       for.cond:
 ; CHECK-NEXT:    switch i16 [[X:%.*]], label [[RETURN_LOOPEXIT1:%.*]] [
-; CHECK-NEXT:    i16 41, label [[FOR_END:%.*]]
-; CHECK-NEXT:    i16 43, label [[FOR_COND]]
+; CHECK-NEXT:      i16 41, label [[FOR_END:%.*]]
+; CHECK-NEXT:      i16 43, label [[FOR_COND]]
 ; CHECK-NEXT:    ]
 ; CHECK:       for.end:
 ; CHECK-NEXT:    [[I_0_LCSSA2:%.*]] = phi i32 [ 0, [[FOR_COND]] ]
@@ -336,6 +336,7 @@ if.end1824:                                       ; preds = %for.end1326
 define void @test5(ptr %header, i32 %conv, i8 %n) {
 ; CHECK-LABEL: @test5(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SHL:%.*]] = shl nuw nsw i32 [[CONV:%.*]], 2
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    br label [[FOR_INNER:%.*]]
@@ -358,7 +359,6 @@ define void @test5(ptr %header, i32 %conv, i8 %n) {
 ; CHECK-NEXT:    br i1 false, label [[FOR_BODY]], label [[WHILE_COND_PREHEADER:%.*]]
 ; CHECK:       while.cond.preheader:
 ; CHECK-NEXT:    [[ADD85_LCSSA:%.*]] = phi i32 [ [[ADD85]], [[FOR_INC]] ]
-; CHECK-NEXT:    [[SHL:%.*]] = shl nuw nsw i32 [[CONV:%.*]], 2
 ; CHECK-NEXT:    br label [[WHILE_COND:%.*]]
 ; CHECK:       while.cond:
 ; CHECK-NEXT:    [[POS_8:%.*]] = phi i32 [ [[INC114:%.*]], [[WHILE_BODY:%.*]] ], [ [[ADD85_LCSSA]], [[WHILE_COND_PREHEADER]] ]
@@ -427,8 +427,8 @@ define void @test6(i8 %x) {
 ; CHECK-NEXT:    br label [[WHILE_COND192:%.*]]
 ; CHECK:       while.cond192:
 ; CHECK-NEXT:    switch i8 [[X:%.*]], label [[WHILE_BODY205:%.*]] [
-; CHECK-NEXT:    i8 59, label [[WHILE_COND215_PREHEADER:%.*]]
-; CHECK-NEXT:    i8 10, label [[IF_END224:%.*]]
+; CHECK-NEXT:      i8 59, label [[WHILE_COND215_PREHEADER:%.*]]
+; CHECK-NEXT:      i8 10, label [[IF_END224:%.*]]
 ; CHECK-NEXT:    ]
 ; CHECK:       while.cond215.preheader:
 ; CHECK-NEXT:    [[I_7_LCSSA:%.*]] = phi i32 [ 0, [[WHILE_COND192]] ]
diff --git a/llvm/test/Transforms/IndVarSimplify/scev-invalidation.ll b/llvm/test/Transforms/IndVarSimplify/scev-invalidation.ll
index a92d328df99ca..ad69812838569 100644
--- a/llvm/test/Transforms/IndVarSimplify/scev-invalidation.ll
+++ b/llvm/test/Transforms/IndVarSimplify/scev-invalidation.ll
@@ -46,12 +46,12 @@ for.end106:                                       ; preds = %for.cond
 define i32 @test_pr58439(i32 %a) {
 ; CHECK-LABEL: @test_pr58439(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[A:%.*]], 1
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    br i1 false, label [[LOOP]], label [[EXIT:%.*]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    [[C_EXT_LCSSA:%.*]] = phi i32 [ 0, [[LOOP]] ]
-; CHECK-NEXT:    [[OR:%.*]] = or i32 [[A:%.*]], 1
 ; CHECK-NEXT:    [[RES:%.*]] = add i32 [[C_EXT_LCSSA]], [[OR]]
 ; CHECK-NEXT:    ret i32 [[RES]]
 ;
@@ -76,6 +76,7 @@ define i8 @l(i32 %inc, i1 %tobool.not.i) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[OUTER_HEADER:%.*]]
 ; CHECK:       outer.header:
+; CHECK-NEXT:    [[AND:%.*]] = and i32 1, [[INC:%.*]]
 ; CHECK-NEXT:    br label [[INNER:%.*]]
 ; CHECK:       inner:
 ; CHECK-NEXT:    [[C_05_I:%.*]] = phi i32 [ [[INC_I:%.*]], [[INNER]] ], [ 0, [[OUTER_HEADER]] ]
@@ -86,7 +87,6 @@ define i8 @l(i32 %inc, i1 %tobool.not.i) {
 ; CHECK:       outer.latch:
 ; CHECK-NEXT:    [[C_05_I_LCSSA:%.*]] = phi i32 [ [[C_05_I]], [[INNER]] ]
 ; CHECK-NEXT:    [[LCSSA:%.*]] = phi i32 [ 0, [[INNER]] ]
-; CHECK-NEXT:    [[AND:%.*]] = and i32 1, [[INC:%.*]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[AND]] to i8
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[C_05_I_LCSSA]] to i8
 ; CHECK-NEXT:    [[TMP2:%.*]] = sub i8 [[TMP0]], [[TMP1]]
diff --git a/llvm/test/Transforms/IndVarSimplify/sentinel.ll b/llvm/test/Transforms/IndVarSimplify/sentinel.ll
index 523414167956b..4f12308f3b01a 100644
--- a/llvm/test/Transforms/IndVarSimplify/sentinel.ll
+++ b/llvm/test/Transforms/IndVarSimplify/sentinel.ll
@@ -9,19 +9,19 @@ define void @test(i1 %arg) personality ptr @snork {
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    br label [[BB4:%.*]]
 ; CHECK:       bb1:
-; CHECK-NEXT:    [[INDVARS_IV_NEXT:%.*]] = add i32 [[INDVARS_IV:%.*]], 1
-; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[TMP6:%.*]], [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMAX:%.*]]
 ; CHECK-NEXT:    br i1 [[ARG:%.*]], label [[BB2:%.*]], label [[BB4]]
 ; CHECK:       bb2:
-; CHECK-NEXT:    [[TMP3:%.*]] = phi i32 [ [[TMP1]], [[BB1:%.*]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = phi i32 [ [[TMP1:%.*]], [[BB1:%.*]] ]
 ; CHECK-NEXT:    ret void
 ; CHECK:       bb4:
-; CHECK-NEXT:    [[INDVARS_IV]] = phi i32 [ [[INDVARS_IV_NEXT]], [[BB1]] ], [ undef, [[BB:%.*]] ]
-; CHECK-NEXT:    [[SMAX]] = call i32 @llvm.smax.i32(i32 [[INDVARS_IV]], i32 36)
-; CHECK-NEXT:    [[TMP6]] = invoke i32 @quux() [ "deopt"(i32 0, i32 0, i32 0, i32 180, i32 0, i32 25, i32 0, i32 7, ptr null, i32 7, ptr null, i32 7, ptr null, i32 3, i32 [[INDVARS_IV]], i32 3, i32 undef, i32 7, ptr null, i32 3, i32 undef, i32 3, i32 undef, i32 3, i32 undef, i32 3, i32 undef, i32 4, double undef, i32 7, ptr null, i32 4, i64 undef, i32 7, ptr null, i32 0, ptr addrspace(1) undef, i32 3, i32 undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 7, ptr null) ]
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[BB1]] ], [ undef, [[BB:%.*]] ]
+; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[INDVARS_IV]], i32 36)
+; CHECK-NEXT:    [[TMP6:%.*]] = invoke i32 @quux() [ "deopt"(i32 0, i32 0, i32 0, i32 180, i32 0, i32 25, i32 0, i32 7, ptr null, i32 7, ptr null, i32 7, ptr null, i32 3, i32 [[INDVARS_IV]], i32 3, i32 undef, i32 7, ptr null, i32 3, i32 undef, i32 3, i32 undef, i32 3, i32 undef, i32 3, i32 undef, i32 4, double undef, i32 7, ptr null, i32 4, i64 undef, i32 7, ptr null, i32 0, ptr addrspace(1) undef, i32 3, i32 undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 7, ptr null) ]
 ; CHECK-NEXT:            to label [[BB7:%.*]] unwind label [[BB15:%.*]]
 ; CHECK:       bb7:
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[TMP6]], [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP1]] = sub i32 [[TMP0]], [[SMAX]]
 ; CHECK-NEXT:    br label [[BB9:%.*]]
 ; CHECK:       bb9:
 ; CHECK-NEXT:    br i1 true, label [[BB1]], label [[BB9]]
diff --git a/llvm/test/Transforms/IndVarSimplify/sink-from-preheader.ll b/llvm/test/Transforms/IndVarSimplify/sink-from-preheader.ll
deleted file mode 100644
index 8d714c775bde5..0000000000000
--- a/llvm/test/Transforms/IndVarSimplify/sink-from-preheader.ll
+++ /dev/null
@@ -1,93 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=indvars -indvars-predicate-loops=0 -S | FileCheck %s
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i386-apple-darwin10.0"
-
-; We make sinking here, Changed flag should be set properly.
-define i32 @test(i32 %a, i32 %b, i32 %N) {
-; CHECK-LABEL: @test(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[LOOP:%.*]]
-; CHECK:       loop:
-; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[IV_NEXT]], [[N:%.*]]
-; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
-; CHECK:       exit:
-; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    ret i32 [[ADD]]
-;
-entry:
-  %add = add i32 %a, %b
-  br label %loop
-
-loop:
-  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
-  %iv.next = add i32 %iv, 1
-  %cmp = icmp slt i32 %iv.next, %N
-  br i1 %cmp, label %loop, label %exit
-
-exit:
-  ret i32 %add
-}
-
-define i32 @test_with_unused_load(i32 %a, ptr %b, i32 %N) {
-; CHECK-LABEL: @test_with_unused_load(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[LOOP:%.*]]
-; CHECK:       loop:
-; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[IV_NEXT]], [[N:%.*]]
-; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
-; CHECK:       exit:
-; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[B:%.*]], align 4
-; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[A:%.*]], [[LOAD]]
-; CHECK-NEXT:    ret i32 [[ADD]]
-;
-entry:
-  %load = load i32, ptr %b
-  %add = add i32 %a, %load
-  br label %loop
-
-loop:
-  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
-  %iv.next = add i32 %iv, 1
-  %cmp = icmp slt i32 %iv.next, %N
-  br i1 %cmp, label %loop, label %exit
-
-exit:
-  ret i32 %add
-}
-
-; load's memory location is modified inside the loop, don't sink load.
-define i32 @test_with_unused_load_modified_store(i32 %a, ptr %b, i32 %N) {
-; CHECK-LABEL: @test_with_unused_load_modified_store(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[B:%.*]], align 4
-; CHECK-NEXT:    br label [[LOOP:%.*]]
-; CHECK:       loop:
-; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
-; CHECK-NEXT:    store i32 [[IV_NEXT]], ptr [[B]], align 4
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[IV_NEXT]], [[N:%.*]]
-; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
-; CHECK:       exit:
-; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[A:%.*]], [[LOAD]]
-; CHECK-NEXT:    ret i32 [[ADD]]
-;
-entry:
-  %load = load i32, ptr %b
-  %add = add i32 %a, %load
-  br label %loop
-
-loop:
-  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
-  %iv.next = add i32 %iv, 1
-  store i32 %iv.next, ptr %b
-  %cmp = icmp slt i32 %iv.next, %N
-  br i1 %cmp, label %loop, label %exit
-
-exit:
-  ret i32 %add
-}
diff --git a/llvm/test/Transforms/IndVarSimplify/sink-trapping.ll b/llvm/test/Transforms/IndVarSimplify/sink-trapping.ll
deleted file mode 100644
index d2478be5a8fcc..0000000000000
--- a/llvm/test/Transforms/IndVarSimplify/sink-trapping.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: opt < %s -passes=indvars -S | FileCheck %s
-
-declare i1 @b()
-
-define i32 @a(i32 %x) nounwind {
-for.body.preheader:
-    %y = sdiv i32 10, %x
-	br label %for.body
-
-for.body:
-    %cmp = call i1 @b()
-	br i1 %cmp, label %for.body, label %for.end.loopexit
-
-for.end.loopexit:
-	ret i32 %y
-}
-; CHECK: for.end.loopexit:
-; CHECK: sdiv
-; CHECK: ret
diff --git a/llvm/test/Transforms/IndVarSimplify/zext-nuw.ll b/llvm/test/Transforms/IndVarSimplify/zext-nuw.ll
index 17921afc5ff06..abe7a3e618dd8 100644
--- a/llvm/test/Transforms/IndVarSimplify/zext-nuw.ll
+++ b/llvm/test/Transforms/IndVarSimplify/zext-nuw.ll
@@ -24,13 +24,13 @@ define void @_Z3fn1v() {
 ; CHECK-NEXT:    [[X8:%.*]] = icmp ult i32 0, 4
 ; CHECK-NEXT:    br i1 [[X8]], label [[DOTPREHEADER_LR_PH:%.*]], label [[X22]]
 ; CHECK:       .preheader.lr.ph:
+; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[K_09]], i64 [[TMP5]]
 ; CHECK-NEXT:    br label [[DOTPREHEADER:%.*]]
 ; CHECK:       .preheader:
 ; CHECK-NEXT:    br label [[X17:%.*]]
 ; CHECK:       x17:
 ; CHECK-NEXT:    br i1 false, label [[DOTPREHEADER]], label [[DOT_CRIT_EDGE_8:%.*]]
 ; CHECK:       ._crit_edge.8:
-; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[K_09]], i64 [[TMP5]]
 ; CHECK-NEXT:    br label [[X22]]
 ; CHECK:       x22:
 ; CHECK-NEXT:    [[K_1_LCSSA:%.*]] = phi ptr [ [[SCEVGEP]], [[DOT_CRIT_EDGE_8]] ], [ [[K_09]], [[DOTPREHEADER4]] ]
diff --git a/llvm/test/Transforms/LICM/scalar-promote.ll b/llvm/test/Transforms/LICM/scalar-promote.ll
index 3af65df55a099..915fc5d9f745b 100644
--- a/llvm/test/Transforms/LICM/scalar-promote.ll
+++ b/llvm/test/Transforms/LICM/scalar-promote.ll
@@ -43,9 +43,8 @@ define void @test2(i32 %i) {
 ; CHECK-LABEL: define void @test2(
 ; CHECK-SAME: i32 [[I:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
-; CHECK-NEXT:    [[X1:%.*]] = getelementptr i32, ptr @X, i64 1
 ; CHECK-NEXT:    [[X2:%.*]] = getelementptr i32, ptr @X, i64 1
-; CHECK-NEXT:    [[X1_PROMOTED:%.*]] = load i32, ptr [[X1]], align 4
+; CHECK-NEXT:    [[X1_PROMOTED:%.*]] = load i32, ptr [[X2]], align 4
 ; CHECK-NEXT:    br label %[[LOOP:.*]]
 ; CHECK:       [[LOOP]]:
 ; CHECK-NEXT:    [[A1:%.*]] = phi i32 [ [[V:%.*]], %[[LOOP]] ], [ [[X1_PROMOTED]], %[[ENTRY]] ]
@@ -53,7 +52,8 @@ define void @test2(i32 %i) {
 ; CHECK-NEXT:    br i1 false, label %[[LOOP]], label %[[EXIT:.*]]
 ; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    [[V_LCSSA:%.*]] = phi i32 [ [[V]], %[[LOOP]] ]
-; CHECK-NEXT:    store i32 [[V_LCSSA]], ptr [[X1]], align 4
+; CHECK-NEXT:    [[X3:%.*]] = getelementptr i32, ptr @X, i64 1
+; CHECK-NEXT:    store i32 [[V_LCSSA]], ptr [[X2]], align 4
 ; CHECK-NEXT:    ret void
 ;
 Entry:
diff --git a/llvm/test/Transforms/LICM/sink-from-preheader.ll b/llvm/test/Transforms/LICM/sink-from-preheader.ll
new file mode 100644
index 0000000000000..057eec0607ff6
--- /dev/null
+++ b/llvm/test/Transforms/LICM/sink-from-preheader.ll
@@ -0,0 +1,187 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=licm -S | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin10.0"
+
+; We make sinking here, Changed flag should be set properly.
+define i32 @test(i32 %a, i32 %b, i32 %N) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[IV_NEXT]], [[N:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+entry:
+  %add = add i32 %a, %b
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+  %iv.next = add i32 %iv, 1
+  %cmp = icmp slt i32 %iv.next, %N
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret i32 %add
+}
+
+define i32 @test_with_unused_load(i32 %a, ptr %b, i32 %N) {
+; CHECK-LABEL: @test_with_unused_load(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[IV_NEXT]], [[N:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[B:%.*]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[A:%.*]], [[LOAD]]
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+entry:
+  %load = load i32, ptr %b
+  %add = add i32 %a, %load
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+  %iv.next = add i32 %iv, 1
+  %cmp = icmp slt i32 %iv.next, %N
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret i32 %add
+}
+
+define i32 @test_with_unused_load_modified_store(i32 %a, ptr %b, i32 %N) {
+; CHECK-LABEL: @test_with_unused_load_modified_store(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], [[A:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[IV_NEXT]], [[N:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[SMAX:%.*]] = phi i32 [ [[IV_NEXT]], [[LOOP]] ]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[B:%.*]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[A]], [[LOAD]]
+; CHECK-NEXT:    store i32 [[SMAX]], ptr [[B]], align 4
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+entry:
+  %load = load i32, ptr %b
+  %add = add i32 %a, %load
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+  %iv.next = add i32 %iv, %a
+  store i32 %iv.next, ptr %b
+  %cmp = icmp slt i32 %iv.next, %N
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret i32 %add
+}
+
+; Volatile loads must not be sunk.
+define i32 @test_with_volatile_load_no_sink(i32 %a, ptr %b, i32 %N) {
+; CHECK-LABEL: @test_with_volatile_load_no_sink(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[LD:%.*]] = load volatile i32, ptr [[B:%.*]], align 4
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[IV_NEXT]], [[N:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[A:%.*]], [[LD]]
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+entry:
+  %ld = load volatile i32, ptr %b, align 4
+  %add = add i32 %a, %ld
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+  %iv.next = add i32 %iv, 1
+  %cmp = icmp slt i32 %iv.next, %N
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret i32 %add
+}
+
+; Ordered/atomic loads must not be sunk.
+define i32 @test_with_atomic_load_no_sink(i32 %a, ptr %b, i32 %N) {
+; CHECK-LABEL: @test_with_atomic_load_no_sink(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[LD:%.*]] = load atomic i32, ptr [[B:%.*]] acquire, align 4
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[IV_NEXT]], [[N:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[A:%.*]], [[LD]]
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+entry:
+  %ld = load atomic i32, ptr %b acquire, align 4
+  %add = add i32 %a, %ld
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+  %iv.next = add i32 %iv, 1
+  %cmp = icmp slt i32 %iv.next, %N
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret i32 %add
+}
+
+declare void @clobber(ptr)
+
+; Calls that may write memory in the loop should prevent sinking the load.
+define i32 @test_with_unused_load_clobbered_by_call(i32 %a, ptr %b, i32 %N) {
+; CHECK-LABEL: @test_with_unused_load_clobbered_by_call(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[LD:%.*]] = load i32, ptr [[B:%.*]], align 4
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT:    call void @clobber(ptr [[B]])
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[IV_NEXT]], [[N:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[A:%.*]], [[LD]]
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+entry:
+  %ld = load i32, ptr %b, align 4
+  %add = add i32 %a, %ld
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+  %iv.next = add i32 %iv, 1
+  call void @clobber(ptr %b)
+  %cmp = icmp slt i32 %iv.next, %N
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret i32 %add
+}
diff --git a/llvm/test/Transforms/LICM/sink-trapping.ll b/llvm/test/Transforms/LICM/sink-trapping.ll
new file mode 100644
index 0000000000000..4321b5859551c
--- /dev/null
+++ b/llvm/test/Transforms/LICM/sink-trapping.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -passes=licm -S | FileCheck %s
+
+declare i1 @b()
+
+define i32 @a(i32 %x) nounwind {
+for.body.preheader:
+  %y = sdiv i32 10, %x
+  br label %for.body
+
+for.body:
+  %cmp = call i1 @b()
+  br i1 %cmp, label %for.body, label %for.end.loopexit
+
+for.end.loopexit:
+  ret i32 %y
+}
+; CHECK: for.end.loopexit:
+; CHECK: sdiv
+; CHECK: ret
diff --git a/llvm/test/Transforms/LoopDeletion/invalidate-scev-after-hoisting.ll b/llvm/test/Transforms/LoopDeletion/invalidate-scev-after-hoisting.ll
index bdd51c2b6bc53..6c19aaad03ba8 100644
--- a/llvm/test/Transforms/LoopDeletion/invalidate-scev-after-hoisting.ll
+++ b/llvm/test/Transforms/LoopDeletion/invalidate-scev-after-hoisting.ll
@@ -84,13 +84,13 @@ define i32 @scev_invalidation_after_deleting(ptr %src) {
 ; CHECK:       inner.2.preheader:
 ; CHECK-NEXT:    br label [[INNER_3_PH:%.*]]
 ; CHECK:       inner.3.ph:
+; CHECK-NEXT:    [[TRUNC:%.*]] = trunc i64 0 to i32
 ; CHECK-NEXT:    br label [[INNER_3:%.*]]
 ; CHECK:       inner.3:
 ; CHECK-NEXT:    [[L:%.*]] = load i32, ptr [[SRC:%.*]], align 4
 ; CHECK-NEXT:    br i1 false, label [[OUTER_LATCH]], label [[INNER_3]]
 ; CHECK:       outer.latch:
 ; CHECK-NEXT:    [[L_LCSSA:%.*]] = phi i32 [ [[L]], [[INNER_3]] ]
-; CHECK-NEXT:    [[TRUNC:%.*]] = trunc i64 0 to i32
 ; CHECK-NEXT:    [[OUTER_IV_NEXT]] = add nsw i32 [[L_LCSSA]], [[TRUNC]]
 ; CHECK-NEXT:    br label [[OUTER_HEADER]]
 ;
diff --git a/llvm/test/Transforms/LoopDistribute/laa-invalidation.ll b/llvm/test/Transforms/LoopDistribute/laa-invalidation.ll
index 62c5627ac2d38..4a55c0e9e11d5 100644
--- a/llvm/test/Transforms/LoopDistribute/laa-invalidation.ll
+++ b/llvm/test/Transforms/LoopDistribute/laa-invalidation.ll
@@ -4,11 +4,11 @@
 define void @test_pr50940(ptr %A, ptr %B) {
 ; CHECK-LABEL: @test_pr50940(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 4
 ; CHECK-NEXT:    br label [[OUTER_HEADER:%.*]]
 ; CHECK:       outer.header:
 ; CHECK-NEXT:    br i1 false, label [[OUTER_LATCH:%.*]], label [[INNER_PH:%.*]]
 ; CHECK:       inner.ph:
-; CHECK-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 4
 ; CHECK-NEXT:    [[GEP_A_3:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 3
 ; CHECK-NEXT:    br label [[INNER_LVER_CHECK:%.*]]
 ; CHECK:       inner.lver.check:
diff --git a/llvm/test/Transforms/LoopSimplify/ashr-crash.ll b/llvm/test/Transforms/LoopSimplify/ashr-crash.ll
index ae4f9adf8e16c..85198eb19aad6 100644
--- a/llvm/test/Transforms/LoopSimplify/ashr-crash.ll
+++ b/llvm/test/Transforms/LoopSimplify/ashr-crash.ll
@@ -29,11 +29,11 @@ define void @foo() {
 ; CHECK-LABEL: define void @foo() {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    store i32 0, ptr @d, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr @c, align 4
 ; CHECK-NEXT:    br label [[FOR_COND1_PREHEADER:%.*]]
 ; CHECK:       for.cond1.preheader:
 ; CHECK-NEXT:    br label [[FOR_BODY3:%.*]]
 ; CHECK:       for.body3:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr @c, align 4
 ; CHECK-NEXT:    store i32 1, ptr @a, align 4
 ; CHECK-NEXT:    store i32 1, ptr @d, align 4
 ; CHECK-NEXT:    [[CMP4_LE_LE_INV:%.*]] = icmp sgt i32 [[TMP0]], 0
diff --git a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll
index 742ee649db3a2..44f50713d69c0 100644
--- a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll
+++ b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll
@@ -380,7 +380,6 @@ define i32 @multiple_uniform_stores(ptr nocapture %var1, ptr nocapture readonly
 ; CHECK-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP8]], 8589934588
-; CHECK-NEXT:    [[IND_END:%.*]] = add nuw nsw i64 [[N_VEC]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[ARRAYIDX5_PROMOTED]], i64 0
 ; CHECK-NEXT:    [[INVARIANT_GEP:%.*]] = getelementptr i32, ptr [[VAR2]], i64 [[TMP4]]
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
@@ -396,6 +395,7 @@ define i32 @multiple_uniform_stores(ptr nocapture %var1, ptr nocapture readonly
 ; CHECK-NEXT:    br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[DOTLCSSA:%.*]] = phi <4 x i32> [ [[TMP17]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[IND_END:%.*]] = add nuw nsw i64 [[N_VEC]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP19:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[DOTLCSSA]])
 ; CHECK-NEXT:    store i32 [[TMP19]], ptr [[ARRAYIDX5]], align 4, !alias.scope [[META27:![0-9]+]], !noalias [[META23]]
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP8]], [[N_VEC]]
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/pr48844-br-to-switch-vectorization.ll b/llvm/test/Transforms/PhaseOrdering/X86/pr48844-br-to-switch-vectorization.ll
index 2fe420183c683..44703bbcd1220 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/pr48844-br-to-switch-vectorization.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/pr48844-br-to-switch-vectorization.ll
@@ -86,12 +86,10 @@ define dso_local void @test(ptr %start, ptr %end) #0 {
 ; AVX2-NEXT:    [[IND_END11:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP26]]
 ; AVX2-NEXT:    [[N_VEC_REMAINING:%.*]] = and i64 [[TMP3]], 24
 ; AVX2-NEXT:    [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0
-; AVX2-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[BB12_PREHEADER1]], label [[BB12_PREHEADER11]]
+; AVX2-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[BB12_PREHEADER1]], label [[BB12_PREHEADER11]], !prof [[PROF3:![0-9]+]]
 ; AVX2:       vec.epilog.ph:
 ; AVX2-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
 ; AVX2-NEXT:    [[N_VEC10:%.*]] = and i64 [[TMP3]], 9223372036854775800
-; AVX2-NEXT:    [[TMP21:%.*]] = shl i64 [[N_VEC10]], 2
-; AVX2-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP21]]
 ; AVX2-NEXT:    br label [[BB12:%.*]]
 ; AVX2:       vec.epilog.vector.body:
 ; AVX2-NEXT:    [[INDEX12:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[BB12_PREHEADER11]] ], [ [[INDEX_NEXT16:%.*]], [[BB12]] ]
@@ -104,8 +102,10 @@ define dso_local void @test(ptr %start, ptr %end) #0 {
 ; AVX2-NEXT:    tail call void @llvm.masked.store.v8i32.p0(<8 x i32> splat (i32 42), ptr [[NEXT_GEP14]], i32 4, <8 x i1> [[TMP24]])
 ; AVX2-NEXT:    [[INDEX_NEXT16]] = add nuw i64 [[INDEX12]], 8
 ; AVX2-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT16]], [[N_VEC10]]
-; AVX2-NEXT:    br i1 [[TMP25]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[BB12]], !llvm.loop [[LOOP3:![0-9]+]]
+; AVX2-NEXT:    br i1 [[TMP25]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[BB12]], !llvm.loop [[LOOP4:![0-9]+]]
 ; AVX2:       vec.epilog.middle.block:
+; AVX2-NEXT:    [[TMP27:%.*]] = shl i64 [[N_VEC10]], 2
+; AVX2-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP27]]
 ; AVX2-NEXT:    [[CMP_N17:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC10]]
 ; AVX2-NEXT:    br i1 [[CMP_N17]], label [[EXIT]], label [[BB12_PREHEADER1]]
 ; AVX2:       bb12.preheader:
@@ -124,7 +124,7 @@ define dso_local void @test(ptr %start, ptr %end) #0 {
 ; AVX2:       latch:
 ; AVX2-NEXT:    [[PTR_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR2]], i64 4
 ; AVX2-NEXT:    [[I11_NOT:%.*]] = icmp eq ptr [[PTR_NEXT]], [[END]]
-; AVX2-NEXT:    br i1 [[I11_NOT]], label [[EXIT]], label [[BB13]], !llvm.loop [[LOOP4:![0-9]+]]
+; AVX2-NEXT:    br i1 [[I11_NOT]], label [[EXIT]], label [[BB13]], !llvm.loop [[LOOP5:![0-9]+]]
 ; AVX2:       exit:
 ; AVX2-NEXT:    ret void
 ;



More information about the cfe-commits mailing list