[llvm] 0af3e6a - [InstCombine] Sink instructions with multiple users in a successor block.

Thu Mar 17 21:09:48 PDT 2022

Author: Andrew Wei
Date: 2022-03-18T11:53:45+08:00
New Revision: 0af3e6a22da2eda5021b5fad656d0b9db7702e0a

URL: https://github.com/llvm/llvm-project/commit/0af3e6a22da2eda5021b5fad656d0b9db7702e0a
DIFF: https://github.com/llvm/llvm-project/commit/0af3e6a22da2eda5021b5fad656d0b9db7702e0a.diff

LOG: [InstCombine] Sink instructions with multiple users in a successor block.

This patch tries to sink instructions when they are only used in a successor block.

This is a further enhancement patch based on Anna's commit:
D109700, which allows sinking an instruction having multiple uses in a single user.

In this patch, sink instructions with multiple users in a single successor block will be supported.
It could fix a known issue from rust:
  https://github.com/rust-lang/rust/issues/51346#issuecomment-394443610

Reviewed By: nikic, reames

Differential Revision: https://reviews.llvm.org/D121585

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
    llvm/test/Transforms/InstCombine/intptr7.ll
    llvm/test/Transforms/InstCombine/lifetime-no-null-opt.ll
    llvm/test/Transforms/InstCombine/lifetime.ll
    llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll
    llvm/test/Transforms/InstCombine/pr33689_same_bitwidth.ll
    llvm/test/Transforms/InstCombine/shift-by-signext.ll
    llvm/test/Transforms/InstCombine/sink_instruction.ll
    llvm/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll
    llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll
    llvm/test/Transforms/LoopVectorize/X86/small-size.ll
    llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll
    llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
    llvm/test/Transforms/LoopVectorize/induction.ll
    llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-dominance.ll
    llvm/test/Transforms/PGOProfile/chr.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 3dda6895c1df9..c8368d559e253 100644

--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -138,6 +138,10 @@ static cl::opt<bool>
 EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"),
                                               cl::init(true));
 
+static cl::opt<unsigned> MaxSinkNumUsers(
+    "instcombine-max-sink-users", cl::init(32),
+    cl::desc("Maximum number of undroppable users for instruction sinking"));
+
 static cl::opt<unsigned> LimitMaxIterations(
     "instcombine-max-iterations",
     cl::desc("Limit the maximum number of instruction combining iterations"),
@@ -3859,7 +3863,6 @@ static bool SoleWriteToDeadLocal(Instruction *I, TargetLibraryInfo &TLI) {
 /// block.
 static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock,
                                  TargetLibraryInfo &TLI) {
-  assert(I->getUniqueUndroppableUser() && "Invariants didn't hold!");
   BasicBlock *SrcBlock = I->getParent();
 
   // Cannot move control-flow-involving, volatile loads, vaarg, etc.
@@ -4026,48 +4029,68 @@ bool InstCombinerImpl::run() {
         [this](Instruction *I) -> Optional<BasicBlock *> {
       if (!EnableCodeSinking)
         return None;
-      auto *UserInst = cast_or_null<Instruction>(I->getUniqueUndroppableUser());
-      if (!UserInst)
-        return None;
 
       BasicBlock *BB = I->getParent();
       BasicBlock *UserParent = nullptr;
+      unsigned NumUsers = 0;
 
-      // Special handling for Phi nodes - get the block the use occurs in.
-      if (PHINode *PN = dyn_cast<PHINode>(UserInst)) {
-        for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
-          if (PN->getIncomingValue(i) == I) {
-            // Bail out if we have uses in 
diff erent blocks. We don't do any
-            // sophisticated analysis (i.e finding NearestCommonDominator of these
-            // use blocks).
-            if (UserParent && UserParent != PN->getIncomingBlock(i))
-              return None;
-            UserParent = PN->getIncomingBlock(i);
+      for (auto *U : I->users()) {
+        if (U->isDroppable())
+          continue;
+        if (NumUsers > MaxSinkNumUsers)
+          return None;
+
+        Instruction *UserInst = cast<Instruction>(U);
+        // Special handling for Phi nodes - get the block the use occurs in.
+        if (PHINode *PN = dyn_cast<PHINode>(UserInst)) {
+          for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
+            if (PN->getIncomingValue(i) == I) {
+              // Bail out if we have uses in 
diff erent blocks. We don't do any
+              // sophisticated analysis (i.e finding NearestCommonDominator of
+              // these use blocks).
+              if (UserParent && UserParent != PN->getIncomingBlock(i))
+                return None;
+              UserParent = PN->getIncomingBlock(i);
+            }
           }
+          assert(UserParent && "expected to find user block!");
+        } else {
+          if (UserParent && UserParent != UserInst->getParent())
+            return None;
+          UserParent = UserInst->getParent();
         }
-        assert(UserParent && "expected to find user block!");
-      } else
-        UserParent = UserInst->getParent();
 
-      // Try sinking to another block. If that block is unreachable, then do
-      // not bother. SimplifyCFG should handle it.
-      if (UserParent == BB || !DT.isReachableFromEntry(UserParent))
-        return None;
+        // Make sure these checks are done only once, naturally we do the checks
+        // the first time we get the userparent, this will save compile time.
+        if (NumUsers == 0) {
+          // Try sinking to another block. If that block is unreachable, then do
+          // not bother. SimplifyCFG should handle it.
+          if (UserParent == BB || !DT.isReachableFromEntry(UserParent))
+            return None;
+
+          auto *Term = UserParent->getTerminator();
+          // See if the user is one of our successors that has only one
+          // predecessor, so that we don't have to split the critical edge.
+          // Another option where we can sink is a block that ends with a
+          // terminator that does not pass control to other block (such as
+          // return or unreachable or resume). In this case:
+          //   - I dominates the User (by SSA form);
+          //   - the User will be executed at most once.
+          // So sinking I down to User is always profitable or neutral.
+          if (UserParent->getUniquePredecessor() != BB && !succ_empty(Term))
+            return None;
+
+          assert(DT.dominates(BB, UserParent) && "Dominance relation broken?");
+        }
 
-      auto *Term = UserParent->getTerminator();
-      // See if the user is one of our successors that has only one
-      // predecessor, so that we don't have to split the critical edge.
-      // Another option where we can sink is a block that ends with a
-      // terminator that does not pass control to other block (such as
-      // return or unreachable or resume). In this case:
-      //   - I dominates the User (by SSA form);
-      //   - the User will be executed at most once.
-      // So sinking I down to User is always profitable or neutral.
-      if (UserParent->getUniquePredecessor() == BB || succ_empty(Term)) {
-        assert(DT.dominates(BB, UserParent) && "Dominance relation broken?");
-        return UserParent;
+        NumUsers++;
       }
-      return None;
+
+      // No user or only has droppable users.
+      if (!UserParent)
+        return None;
+
+      return UserParent;
     };
 
     auto OptBB = getOptionalSinkBlockForInst(I);

diff  --git a/llvm/test/Transforms/InstCombine/intptr7.ll b/llvm/test/Transforms/InstCombine/intptr7.ll
index a7e38754f4e32..303fb714ef42a 100644
--- a/llvm/test/Transforms/InstCombine/intptr7.ll
+++ b/llvm/test/Transforms/InstCombine/intptr7.ll
@@ -4,17 +4,17 @@
 define void @matching_phi(i64 %a, float* %b, i1 %cond) {
 ; CHECK-LABEL: @matching_phi(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[BB2:%.*]], label [[BB1:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[ADDB:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 2
+; CHECK-NEXT:    br label [[BB3:%.*]]
+; CHECK:       bb2:
 ; CHECK-NEXT:    [[ADD_INT:%.*]] = add i64 [[A:%.*]], 1
 ; CHECK-NEXT:    [[ADD:%.*]] = inttoptr i64 [[ADD_INT]] to float*
-; CHECK-NEXT:    br i1 [[COND:%.*]], label [[BBB:%.*]], label [[A:%.*]]
-; CHECK:       A:
-; CHECK-NEXT:    [[ADDB:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 2
-; CHECK-NEXT:    br label [[C:%.*]]
-; CHECK:       Bbb:
 ; CHECK-NEXT:    store float 1.000000e+01, float* [[ADD]], align 4
-; CHECK-NEXT:    br label [[C]]
-; CHECK:       C:
-; CHECK-NEXT:    [[A_ADDR_03:%.*]] = phi float* [ [[ADDB]], [[A]] ], [ [[ADD]], [[BBB]] ]
+; CHECK-NEXT:    br label [[BB3]]
+; CHECK:       bb3:
+; CHECK-NEXT:    [[A_ADDR_03:%.*]] = phi float* [ [[ADDB]], [[BB1]] ], [ [[ADD]], [[BB2]] ]
 ; CHECK-NEXT:    [[I1:%.*]] = load float, float* [[A_ADDR_03]], align 4
 ; CHECK-NEXT:    [[MUL_I:%.*]] = fmul float [[I1]], 4.200000e+01
 ; CHECK-NEXT:    store float [[MUL_I]], float* [[A_ADDR_03]], align 4
@@ -27,16 +27,16 @@ entry:
 
   %addb = getelementptr inbounds float, float* %b, i64 2
   %addb.int = ptrtoint float* %addb to i64
-  br i1 %cmp1, label %A, label %Bbb
-A:
-  br label %C
-Bbb:
+  br i1 %cmp1, label %bb1, label %bb2
+bb1:
+  br label %bb3
+bb2:
   store float 1.0e+01, float* %add, align 4
-  br label %C
+  br label %bb3
 
-C:
-  %a.addr.03 = phi float* [ %addb, %A ], [ %add, %Bbb ]
-  %b.addr.02 = phi i64 [ %addb.int, %A ], [ %add.int, %Bbb ]
+bb3:
+  %a.addr.03 = phi float* [ %addb, %bb1 ], [ %add, %bb2 ]
+  %b.addr.02 = phi i64 [ %addb.int, %bb1 ], [ %add.int, %bb2 ]
   %i0 = inttoptr i64 %b.addr.02 to float*
   %i1 = load float, float* %i0, align 4
   %mul.i = fmul float %i1, 4.200000e+01
@@ -48,18 +48,20 @@ define void @no_matching_phi(i64 %a, float* %b, i1 %cond) {
 ; CHECK-LABEL: @no_matching_phi(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[ADD_INT:%.*]] = add i64 [[A:%.*]], 1
-; CHECK-NEXT:    [[ADD:%.*]] = inttoptr i64 [[ADD_INT]] to float*
 ; CHECK-NEXT:    [[ADDB:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 2
 ; CHECK-NEXT:    br i1 [[COND:%.*]], label [[B:%.*]], label [[A:%.*]]
 ; CHECK:       A:
 ; CHECK-NEXT:    br label [[C:%.*]]
 ; CHECK:       B:
+; CHECK-NEXT:    [[ADDB_INT:%.*]] = ptrtoint float* [[ADDB]] to i64
+; CHECK-NEXT:    [[ADD:%.*]] = inttoptr i64 [[ADD_INT]] to float*
 ; CHECK-NEXT:    store float 1.000000e+01, float* [[ADD]], align 4
 ; CHECK-NEXT:    br label [[C]]
 ; CHECK:       C:
 ; CHECK-NEXT:    [[A_ADDR_03:%.*]] = phi float* [ [[ADDB]], [[A]] ], [ [[ADD]], [[B]] ]
-; CHECK-NEXT:    [[B_ADDR_02_PTR:%.*]] = phi float* [ [[ADD]], [[A]] ], [ [[ADDB]], [[B]] ]
-; CHECK-NEXT:    [[I1:%.*]] = load float, float* [[B_ADDR_02_PTR]], align 4
+; CHECK-NEXT:    [[B_ADDR_02:%.*]] = phi i64 [ [[ADD_INT]], [[A]] ], [ [[ADDB_INT]], [[B]] ]
+; CHECK-NEXT:    [[I0:%.*]] = inttoptr i64 [[B_ADDR_02]] to float*
+; CHECK-NEXT:    [[I1:%.*]] = load float, float* [[I0]], align 4
 ; CHECK-NEXT:    [[MUL_I:%.*]] = fmul float [[I1]], 4.200000e+01
 ; CHECK-NEXT:    store float [[MUL_I]], float* [[A_ADDR_03]], align 4
 ; CHECK-NEXT:    ret void

diff  --git a/llvm/test/Transforms/InstCombine/lifetime-no-null-opt.ll b/llvm/test/Transforms/InstCombine/lifetime-no-null-opt.ll
index bdc8d6ea2585d..46bfa8d8ed213 100644
--- a/llvm/test/Transforms/InstCombine/lifetime-no-null-opt.ll
+++ b/llvm/test/Transforms/InstCombine/lifetime-no-null-opt.ll
@@ -11,17 +11,17 @@ define void @bar(i1 %flag) #0 !dbg !4 {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TEXT:%.*]] = alloca [1 x i8], align 1
 ; CHECK-NEXT:    [[BUFF:%.*]] = alloca [1 x i8], align 1
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x i8], [1 x i8]* [[TEXT]], i64 0, i64 0
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [1 x i8], [1 x i8]* [[BUFF]], i64 0, i64 0
 ; CHECK-NEXT:    br i1 [[FLAG:%.*]], label [[IF:%.*]], label [[ELSE:%.*]]
 ; CHECK:       if:
 ; CHECK-NEXT:    br label [[BB2:%.*]]
 ; CHECK:       bb2:
 ; CHECK-NEXT:    br label [[BB3:%.*]]
 ; CHECK:       bb3:
-; CHECK-NEXT:    call void @llvm.dbg.declare(metadata [1 x i8]* [[TEXT]], [[META16:metadata !.*]], metadata !DIExpression()), [[DBG24:!dbg !.*]]
+; CHECK-NEXT:    call void @llvm.dbg.declare(metadata [1 x i8]* [[TEXT]], metadata [[META16:![0-9]+]], metadata !DIExpression()), !dbg [[DBG24:![0-9]+]]
 ; CHECK-NEXT:    br label [[FIN:%.*]]
 ; CHECK:       else:
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x i8], [1 x i8]* [[TEXT]], i64 0, i64 0
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [1 x i8], [1 x i8]* [[BUFF]], i64 0, i64 0
 ; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 1, i8* [[TMP0]])
 ; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 1, i8* [[TMP1]])
 ; CHECK-NEXT:    call void @foo(i8* [[TMP1]], i8* [[TMP0]])

diff  --git a/llvm/test/Transforms/InstCombine/lifetime.ll b/llvm/test/Transforms/InstCombine/lifetime.ll
index 1aef5cfcee27f..7f79972a529ec 100644
--- a/llvm/test/Transforms/InstCombine/lifetime.ll
+++ b/llvm/test/Transforms/InstCombine/lifetime.ll
@@ -11,17 +11,17 @@ define void @bar(i1 %flag) !dbg !4 {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TEXT:%.*]] = alloca [1 x i8], align 1
 ; CHECK-NEXT:    [[BUFF:%.*]] = alloca [1 x i8], align 1
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x i8], [1 x i8]* [[TEXT]], i64 0, i64 0
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [1 x i8], [1 x i8]* [[BUFF]], i64 0, i64 0
 ; CHECK-NEXT:    br i1 [[FLAG:%.*]], label [[IF:%.*]], label [[ELSE:%.*]]
 ; CHECK:       if:
 ; CHECK-NEXT:    br label [[BB2:%.*]]
 ; CHECK:       bb2:
 ; CHECK-NEXT:    br label [[BB3:%.*]]
 ; CHECK:       bb3:
-; CHECK-NEXT:    call void @llvm.dbg.declare(metadata [1 x i8]* [[TEXT]], [[META16:metadata !.*]], metadata !DIExpression()), [[DBG24:!dbg !.*]]
+; CHECK-NEXT:    call void @llvm.dbg.declare(metadata [1 x i8]* [[TEXT]], metadata [[META16:![0-9]+]], metadata !DIExpression()), !dbg [[DBG24:![0-9]+]]
 ; CHECK-NEXT:    br label [[FIN:%.*]]
 ; CHECK:       else:
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x i8], [1 x i8]* [[TEXT]], i64 0, i64 0
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [1 x i8], [1 x i8]* [[BUFF]], i64 0, i64 0
 ; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 1, i8* nonnull [[TMP0]])
 ; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 1, i8* nonnull [[TMP1]])
 ; CHECK-NEXT:    call void @foo(i8* nonnull [[TMP1]], i8* nonnull [[TMP0]])

diff  --git a/llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll b/llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll
index 608109ac56335..4a68861d3f1fa 100644
--- a/llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll
+++ b/llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll
@@ -15,7 +15,6 @@ define void @_Z4testv() {
 ; CHECK-NEXT:    [[I:%.*]] = load i8, i8* @var_7, align 1
 ; CHECK-NEXT:    [[I1:%.*]] = icmp eq i8 [[I]], -1
 ; CHECK-NEXT:    [[I4:%.*]] = load i16, i16* @var_0, align 2
-; CHECK-NEXT:    [[I8:%.*]] = sext i16 [[I4]] to i32
 ; CHECK-NEXT:    br i1 [[I1]], label [[BB10:%.*]], label [[BB9:%.*]]
 ; CHECK:       bb9:
 ; CHECK-NEXT:    br label [[BB12:%.*]]
@@ -31,6 +30,7 @@ define void @_Z4testv() {
 ; CHECK-NEXT:    [[STOREMERGE1:%.*]] = phi i32 [ [[I11]], [[BB10]] ], [ 1, [[BB9]] ]
 ; CHECK-NEXT:    store i32 [[STOREMERGE1]], i32* getelementptr inbounds ([0 x i32], [0 x i32]* @arr_2, i64 0, i64 0), align 4
 ; CHECK-NEXT:    store i16 [[I4]], i16* getelementptr inbounds ([0 x i16], [0 x i16]* @arr_4, i64 0, i64 0), align 2
+; CHECK-NEXT:    [[I8:%.*]] = sext i16 [[I4]] to i32
 ; CHECK-NEXT:    store i32 [[I8]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @arr_3, i64 0, i64 0), align 16
 ; CHECK-NEXT:    store i32 [[STOREMERGE1]], i32* getelementptr inbounds ([0 x i32], [0 x i32]* @arr_2, i64 0, i64 1), align 4
 ; CHECK-NEXT:    store i16 [[I4]], i16* getelementptr inbounds ([0 x i16], [0 x i16]* @arr_4, i64 0, i64 1), align 2

diff  --git a/llvm/test/Transforms/InstCombine/pr33689_same_bitwidth.ll b/llvm/test/Transforms/InstCombine/pr33689_same_bitwidth.ll
index be50716fa131a..3d0ae632e9fab 100644
--- a/llvm/test/Transforms/InstCombine/pr33689_same_bitwidth.ll
+++ b/llvm/test/Transforms/InstCombine/pr33689_same_bitwidth.ll
@@ -14,11 +14,11 @@ define void @f(i1 %cond) {
 ; CHECK-LABEL: @f(
 ; CHECK-NEXT:  bb0:
 ; CHECK-NEXT:    [[T12:%.*]] = alloca [2 x i32], align 8
-; CHECK-NEXT:    [[T12_SUB:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[T12]], i16 0, i16 0
 ; CHECK-NEXT:    br i1 [[COND:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
 ; CHECK:       bb1:
 ; CHECK-NEXT:    unreachable
 ; CHECK:       bb2:
+; CHECK-NEXT:    [[T12_SUB:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[T12]], i16 0, i16 0
 ; CHECK-NEXT:    [[T9:%.*]] = load i16*, i16** @b, align 2
 ; CHECK-NEXT:    store i16 0, i16* [[T9]], align 2
 ; CHECK-NEXT:    [[T10:%.*]] = load i32, i32* [[T12_SUB]], align 8

diff  --git a/llvm/test/Transforms/InstCombine/shift-by-signext.ll b/llvm/test/Transforms/InstCombine/shift-by-signext.ll
index 8e1445803c3d3..b72f33fc65023 100644
--- a/llvm/test/Transforms/InstCombine/shift-by-signext.ll
+++ b/llvm/test/Transforms/InstCombine/shift-by-signext.ll
@@ -69,11 +69,11 @@ define <2 x i32> @t5_vec_ashr(<2 x i32> %x, <2 x i8> %shamt) {
 define i32 @t6_twoshifts(i32 %x, i8 %shamt) {
 ; CHECK-LABEL: @t6_twoshifts(
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[SHAMT_WIDE:%.*]] = sext i8 [[SHAMT:%.*]] to i32
 ; CHECK-NEXT:    br label [[WORK:%.*]]
 ; CHECK:       work:
 ; CHECK-NEXT:    br label [[END:%.*]]
 ; CHECK:       end:
+; CHECK-NEXT:    [[SHAMT_WIDE:%.*]] = sext i8 [[SHAMT:%.*]] to i32
 ; CHECK-NEXT:    [[N0:%.*]] = shl i32 [[X:%.*]], [[SHAMT_WIDE]]
 ; CHECK-NEXT:    [[R:%.*]] = ashr i32 [[N0]], [[SHAMT_WIDE]]
 ; CHECK-NEXT:    ret i32 [[R]]
@@ -151,11 +151,11 @@ define i32 @n11_extrause(i32 %x, i8 %shamt) {
 }
 define i32 @n12_twoshifts_and_extrause(i32 %x, i8 %shamt) {
 ; CHECK-LABEL: @n12_twoshifts_and_extrause(
-; CHECK-NEXT:    [[SHAMT_WIDE:%.*]] = sext i8 [[SHAMT:%.*]] to i32
 ; CHECK-NEXT:    br label [[WORK:%.*]]
 ; CHECK:       work:
 ; CHECK-NEXT:    br label [[END:%.*]]
 ; CHECK:       end:
+; CHECK-NEXT:    [[SHAMT_WIDE:%.*]] = sext i8 [[SHAMT:%.*]] to i32
 ; CHECK-NEXT:    [[N0:%.*]] = shl i32 [[X:%.*]], [[SHAMT_WIDE]]
 ; CHECK-NEXT:    [[R:%.*]] = ashr i32 [[N0]], [[SHAMT_WIDE]]
 ; CHECK-NEXT:    call void @use32(i32 [[SHAMT_WIDE]])

diff  --git a/llvm/test/Transforms/InstCombine/sink_instruction.ll b/llvm/test/Transforms/InstCombine/sink_instruction.ll
index fb40a9c5f2a7c..7ae0da3133b2f 100644
--- a/llvm/test/Transforms/InstCombine/sink_instruction.ll
+++ b/llvm/test/Transforms/InstCombine/sink_instruction.ll
@@ -241,16 +241,15 @@ else:
 declare void @abort()
 declare { i64, i1 } @llvm.umul.with.overflow.i64(i64, i64)
 declare void @dummy(i64)
-; Todo: Two uses in two 
diff erent users of a single successor block. We can sink.
+; Two uses in two 
diff erent users of a single successor block. We can sink.
 define i64 @test8(i64 %c) {
 ; CHECK-LABEL: @test8(
 ; CHECK-NEXT:  bb1:
 ; CHECK-NEXT:    [[OVERFLOW:%.*]] = icmp ugt i64 [[C:%.*]], 2305843009213693951
-; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[OVERFLOW]], i64 0, i64 8
 ; CHECK-NEXT:    br i1 [[OVERFLOW]], label [[ABORT:%.*]], label [[BB2:%.*]]
 ; CHECK:       bb2:
-; CHECK-NEXT:    call void @dummy(i64 [[SELECT]])
-; CHECK-NEXT:    ret i64 [[SELECT]]
+; CHECK-NEXT:    call void @dummy(i64 8)
+; CHECK-NEXT:    ret i64 8
 ; CHECK:       abort:
 ; CHECK-NEXT:    call void @abort()
 ; CHECK-NEXT:    unreachable

diff  --git a/llvm/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll b/llvm/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll
index eb963303a3612..832452d0f5268 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll
@@ -33,20 +33,20 @@ define i32 @unroll(i32* nocapture readonly %a, i32* nocapture readonly %b, i32 %
 ; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX2_EPIL]], align 4
 ; CHECK-NEXT:    [[MUL_EPIL:%.*]] = mul nsw i32 [[TMP3]], [[TMP2]]
 ; CHECK-NEXT:    [[ADD_EPIL:%.*]] = add nsw i32 [[MUL_EPIL]], [[C_010_UNR]]
-; CHECK-NEXT:    [[INDVARS_IV_NEXT_EPIL:%.*]] = add nuw nsw i64 [[INDVARS_IV_UNR]], 1
 ; CHECK-NEXT:    [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 1
 ; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA:%.*]], label [[FOR_BODY_EPIL_1:%.*]]
 ; CHECK:       for.body.epil.1:
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_EPIL:%.*]] = add nuw nsw i64 [[INDVARS_IV_UNR]], 1
 ; CHECK-NEXT:    [[ARRAYIDX_EPIL_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_EPIL]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_EPIL_1]], align 4
 ; CHECK-NEXT:    [[ARRAYIDX2_EPIL_1:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV_NEXT_EPIL]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX2_EPIL_1]], align 4
 ; CHECK-NEXT:    [[MUL_EPIL_1:%.*]] = mul nsw i32 [[TMP5]], [[TMP4]]
 ; CHECK-NEXT:    [[ADD_EPIL_1:%.*]] = add nsw i32 [[MUL_EPIL_1]], [[ADD_EPIL]]
-; CHECK-NEXT:    [[INDVARS_IV_NEXT_EPIL_1:%.*]] = add nuw nsw i64 [[INDVARS_IV_UNR]], 2
 ; CHECK-NEXT:    [[EPIL_ITER_CMP_1_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 2
 ; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP_1_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA]], label [[FOR_BODY_EPIL_2:%.*]]
 ; CHECK:       for.body.epil.2:
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_EPIL_1:%.*]] = add nuw nsw i64 [[INDVARS_IV_UNR]], 2
 ; CHECK-NEXT:    [[ARRAYIDX_EPIL_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_EPIL_1]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_EPIL_2]], align 4
 ; CHECK-NEXT:    [[ARRAYIDX2_EPIL_2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV_NEXT_EPIL_1]]

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll b/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll
index db662f11f8cf5..1db93f4c9665f 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll
@@ -32,15 +32,15 @@ define i32 @inv_load_conditional(i32* %a, i64 %n, i32* %b, i32 %k) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <16 x i32*> [[BROADCAST_SPLAT]], zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[TMP0]] to <16 x i32>*
-; CHECK-NEXT:    store <16 x i32> [[BROADCAST_SPLAT9]], <16 x i32>* [[TMP2]], align 4, !alias.scope !0, !noalias !3
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <16 x i32>*
+; CHECK-NEXT:    store <16 x i32> [[BROADCAST_SPLAT9]], <16 x i32>* [[TMP1]], align 4, !alias.scope !0, !noalias !3
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> [[BROADCAST_SPLAT]], i32 4, <16 x i1> [[TMP1]], <16 x i32> undef), !alias.scope !3
-; CHECK-NEXT:    [[PREDPHI:%.*]] = select <16 x i1> [[TMP1]], <16 x i32> [[WIDE_MASKED_GATHER]], <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <16 x i32*> [[BROADCAST_SPLAT]], zeroinitializer
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> [[BROADCAST_SPLAT]], i32 4, <16 x i1> [[TMP3]], <16 x i32> undef), !alias.scope !3
+; CHECK-NEXT:    [[PREDPHI:%.*]] = select <16 x i1> [[TMP3]], <16 x i32> [[WIDE_MASKED_GATHER]], <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 1>
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[SMAX6]], [[N_VEC]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <16 x i32> [[PREDPHI]], i64 15
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
@@ -52,26 +52,26 @@ define i32 @inv_load_conditional(i32* %a, i64 %n, i32* %b, i32 %k) {
 ; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
 ; CHECK-NEXT:    [[SMAX11:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
 ; CHECK-NEXT:    [[N_VEC13:%.*]] = and i64 [[SMAX11]], 9223372036854775800
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT18:%.*]] = insertelement <8 x i32*> poison, i32* [[A]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT19:%.*]] = shufflevector <8 x i32*> [[BROADCAST_SPLATINSERT18]], <8 x i32*> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT20:%.*]] = insertelement <8 x i32> poison, i32 [[NTRUNC]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT21:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT20]], <8 x i32> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT17:%.*]] = insertelement <8 x i32*> poison, i32* [[A]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT18:%.*]] = shufflevector <8 x i32*> [[BROADCAST_SPLATINSERT17]], <8 x i32*> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT19:%.*]] = insertelement <8 x i32> poison, i32 [[NTRUNC]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT20:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT19]], <8 x i32> poison, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
 ; CHECK:       vec.epilog.vector.body:
-; CHECK-NEXT:    [[INDEX14:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT15:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX14]]
-; CHECK-NEXT:    [[TMP6:%.*]] = icmp ne <8 x i32*> [[BROADCAST_SPLAT19]], zeroinitializer
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i32* [[TMP5]] to <8 x i32>*
-; CHECK-NEXT:    store <8 x i32> [[BROADCAST_SPLAT21]], <8 x i32>* [[TMP7]], align 4
-; CHECK-NEXT:    [[INDEX_NEXT15]] = add nuw i64 [[INDEX14]], 8
-; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT15]], [[N_VEC13]]
-; CHECK-NEXT:    br i1 [[TMP8]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT23:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <8 x i32>*
+; CHECK-NEXT:    store <8 x i32> [[BROADCAST_SPLAT20]], <8 x i32>* [[TMP6]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT23]] = add nuw i64 [[OFFSET_IDX]], 8
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT23]], [[N_VEC13]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
 ; CHECK:       vec.epilog.middle.block:
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER22:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> [[BROADCAST_SPLAT19]], i32 4, <8 x i1> [[TMP6]], <8 x i32> undef)
-; CHECK-NEXT:    [[PREDPHI23:%.*]] = select <8 x i1> [[TMP6]], <8 x i32> [[WIDE_MASKED_GATHER22]], <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 1>
-; CHECK-NEXT:    [[CMP_N16:%.*]] = icmp eq i64 [[SMAX11]], [[N_VEC13]]
-; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <8 x i32> [[PREDPHI23]], i64 7
-; CHECK-NEXT:    br i1 [[CMP_N16]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]]
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp ne <8 x i32*> [[BROADCAST_SPLAT18]], zeroinitializer
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER21:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> [[BROADCAST_SPLAT18]], i32 4, <8 x i1> [[TMP8]], <8 x i32> undef)
+; CHECK-NEXT:    [[PREDPHI22:%.*]] = select <8 x i1> [[TMP8]], <8 x i32> [[WIDE_MASKED_GATHER21]], <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 1>
+; CHECK-NEXT:    [[CMP_N14:%.*]] = icmp eq i64 [[SMAX11]], [[N_VEC13]]
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <8 x i32> [[PREDPHI22]], i64 7
+; CHECK-NEXT:    br i1 [[CMP_N14]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]]
 ; CHECK:       vec.epilog.scalar.ph:
 ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC13]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll
index fbd9009186a5a..d11b8fa3dd6bf 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll
@@ -76,10 +76,10 @@ define void @example2(i32 %n, i32 %x) optsize {
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[DOTLR_PH5_PREHEADER:%.*]], label [[DOTPREHEADER:%.*]]
 ; CHECK:       .lr.ph5.preheader:
-; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[N]], -1
-; CHECK-NEXT:    [[TMP3:%.*]] = zext i32 [[TMP2]] to i64
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
+; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[N]], -1
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i32 [[TMP2]] to i64
 ; CHECK-NEXT:    [[N_RND_UP:%.*]] = add nuw nsw i64 [[TMP3]], 4
 ; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[N_RND_UP]], 8589934588
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP3]], i64 0
@@ -136,24 +136,24 @@ define void @example2(i32 %n, i32 %x) optsize {
 ; CHECK-NEXT:    [[TMP17:%.*]] = icmp eq i32 [[N]], 0
 ; CHECK-NEXT:    br i1 [[TMP17]], label [[DOT_CRIT_EDGE:%.*]], label [[DOTLR_PH_PREHEADER:%.*]]
 ; CHECK:       .lr.ph.preheader:
-; CHECK-NEXT:    [[TMP18:%.*]] = add i32 [[N]], -1
-; CHECK-NEXT:    [[TMP19:%.*]] = zext i32 [[TMP18]] to i64
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH8:%.*]], label [[VECTOR_PH10:%.*]]
 ; CHECK:       vector.ph10:
+; CHECK-NEXT:    [[TMP18:%.*]] = add i32 [[N]], -1
+; CHECK-NEXT:    [[TMP19:%.*]] = zext i32 [[TMP18]] to i64
 ; CHECK-NEXT:    [[N_RND_UP11:%.*]] = add nuw nsw i64 [[TMP19]], 4
 ; CHECK-NEXT:    [[N_VEC13:%.*]] = and i64 [[N_RND_UP11]], 8589934588
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT18:%.*]] = insertelement <4 x i64> poison, i64 [[TMP19]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT19:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT18]], <4 x i64> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY9:%.*]]
 ; CHECK:       vector.body9:
-; CHECK-NEXT:    [[INDEX38:%.*]] = phi i64 [ 0, [[VECTOR_PH10]] ], [ [[INDEX_NEXT37:%.*]], [[PRED_STORE_CONTINUE36:%.*]] ]
-; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i64 [[I_0_LCSSA]], [[INDEX38]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT27:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX38]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT28:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT27]], <4 x i64> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[VEC_IV:%.*]] = or <4 x i64> [[BROADCAST_SPLAT28]], <i64 0, i64 1, i64 2, i64 3>
+; CHECK-NEXT:    [[INDEX20:%.*]] = phi i64 [ 0, [[VECTOR_PH10]] ], [ [[INDEX_NEXT31:%.*]], [[PRED_STORE_CONTINUE30:%.*]] ]
+; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i64 [[I_0_LCSSA]], [[INDEX20]]
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT21:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX20]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT22:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT21]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[VEC_IV:%.*]] = or <4 x i64> [[BROADCAST_SPLAT22]], <i64 0, i64 1, i64 2, i64 3>
 ; CHECK-NEXT:    [[TMP20:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT19]]
 ; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <4 x i1> [[TMP20]], i64 0
-; CHECK-NEXT:    br i1 [[TMP21]], label [[PRED_STORE_IF29:%.*]], label [[PRED_STORE_CONTINUE30:%.*]]
+; CHECK-NEXT:    br i1 [[TMP21]], label [[PRED_STORE_IF23:%.*]], label [[PRED_STORE_CONTINUE24:%.*]]
 ; CHECK:       pred.store.if23:
 ; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[OFFSET_IDX]]
 ; CHECK-NEXT:    [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4
@@ -162,10 +162,10 @@ define void @example2(i32 %n, i32 %x) optsize {
 ; CHECK-NEXT:    [[TMP26:%.*]] = and i32 [[TMP25]], [[TMP23]]
 ; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[OFFSET_IDX]]
 ; CHECK-NEXT:    store i32 [[TMP26]], i32* [[TMP27]], align 4
-; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE30]]
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE24]]
 ; CHECK:       pred.store.continue24:
 ; CHECK-NEXT:    [[TMP28:%.*]] = extractelement <4 x i1> [[TMP20]], i64 1
-; CHECK-NEXT:    br i1 [[TMP28]], label [[PRED_STORE_IF31:%.*]], label [[PRED_STORE_CONTINUE32:%.*]]
+; CHECK-NEXT:    br i1 [[TMP28]], label [[PRED_STORE_IF25:%.*]], label [[PRED_STORE_CONTINUE26:%.*]]
 ; CHECK:       pred.store.if25:
 ; CHECK-NEXT:    [[TMP29:%.*]] = add i64 [[OFFSET_IDX]], 1
 ; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[TMP29]]
@@ -175,10 +175,10 @@ define void @example2(i32 %n, i32 %x) optsize {
 ; CHECK-NEXT:    [[TMP34:%.*]] = and i32 [[TMP33]], [[TMP31]]
 ; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[TMP29]]
 ; CHECK-NEXT:    store i32 [[TMP34]], i32* [[TMP35]], align 4
-; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE32]]
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE26]]
 ; CHECK:       pred.store.continue26:
 ; CHECK-NEXT:    [[TMP36:%.*]] = extractelement <4 x i1> [[TMP20]], i64 2
-; CHECK-NEXT:    br i1 [[TMP36]], label [[PRED_STORE_IF33:%.*]], label [[PRED_STORE_CONTINUE34:%.*]]
+; CHECK-NEXT:    br i1 [[TMP36]], label [[PRED_STORE_IF27:%.*]], label [[PRED_STORE_CONTINUE28:%.*]]
 ; CHECK:       pred.store.if27:
 ; CHECK-NEXT:    [[TMP37:%.*]] = add i64 [[OFFSET_IDX]], 2
 ; CHECK-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[TMP37]]
@@ -188,10 +188,10 @@ define void @example2(i32 %n, i32 %x) optsize {
 ; CHECK-NEXT:    [[TMP42:%.*]] = and i32 [[TMP41]], [[TMP39]]
 ; CHECK-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[TMP37]]
 ; CHECK-NEXT:    store i32 [[TMP42]], i32* [[TMP43]], align 4
-; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE34]]
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE28]]
 ; CHECK:       pred.store.continue28:
 ; CHECK-NEXT:    [[TMP44:%.*]] = extractelement <4 x i1> [[TMP20]], i64 3
-; CHECK-NEXT:    br i1 [[TMP44]], label [[PRED_STORE_IF35:%.*]], label [[PRED_STORE_CONTINUE36]]
+; CHECK-NEXT:    br i1 [[TMP44]], label [[PRED_STORE_IF29:%.*]], label [[PRED_STORE_CONTINUE30]]
 ; CHECK:       pred.store.if29:
 ; CHECK-NEXT:    [[TMP45:%.*]] = add i64 [[OFFSET_IDX]], 3
 ; CHECK-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[TMP45]]
@@ -201,10 +201,10 @@ define void @example2(i32 %n, i32 %x) optsize {
 ; CHECK-NEXT:    [[TMP50:%.*]] = and i32 [[TMP49]], [[TMP47]]
 ; CHECK-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[TMP45]]
 ; CHECK-NEXT:    store i32 [[TMP50]], i32* [[TMP51]], align 4
-; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE36]]
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE30]]
 ; CHECK:       pred.store.continue30:
-; CHECK-NEXT:    [[INDEX_NEXT37]] = add i64 [[INDEX38]], 4
-; CHECK-NEXT:    [[TMP52:%.*]] = icmp eq i64 [[INDEX_NEXT37]], [[N_VEC13]]
+; CHECK-NEXT:    [[INDEX_NEXT31]] = add i64 [[INDEX20]], 4
+; CHECK-NEXT:    [[TMP52:%.*]] = icmp eq i64 [[INDEX_NEXT31]], [[N_VEC13]]
 ; CHECK-NEXT:    br i1 [[TMP52]], label [[MIDDLE_BLOCK7:%.*]], label [[VECTOR_BODY9]], !llvm.loop [[LOOP5:![0-9]+]]
 ; CHECK:       middle.block7:
 ; CHECK-NEXT:    br i1 true, label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[SCALAR_PH8]]
@@ -266,62 +266,62 @@ define void @example3(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[N:%.*]], 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[DOT_CRIT_EDGE:%.*]], label [[DOTLR_PH_PREHEADER:%.*]]
 ; CHECK:       .lr.ph.preheader:
-; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[N]], -1
-; CHECK-NEXT:    [[TMP3:%.*]] = zext i32 [[TMP2]] to i64
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
+; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[N]], -1
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i32 [[TMP2]] to i64
 ; CHECK-NEXT:    [[N_RND_UP:%.*]] = add nuw nsw i64 [[TMP3]], 4
 ; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[N_RND_UP]], 8589934588
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP3]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE21:%.*]] ]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT14:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT15:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT14]], <4 x i64> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[VEC_IV:%.*]] = or <4 x i64> [[BROADCAST_SPLAT15]], <i64 0, i64 1, i64 2, i64 3>
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE19:%.*]] ]
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT12:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT13:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT12]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[VEC_IV:%.*]] = or <4 x i64> [[BROADCAST_SPLAT13]], <i64 0, i64 1, i64 2, i64 3>
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i1> [[TMP4]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
 ; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[NEXT_GEP10:%.*]] = getelementptr i32, i32* [[Q:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[NEXT_GEP10]], align 16
+; CHECK-NEXT:    [[NEXT_GEP8:%.*]] = getelementptr i32, i32* [[Q:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[NEXT_GEP8]], align 16
 ; CHECK-NEXT:    store i32 [[TMP6]], i32* [[NEXT_GEP]], align 16
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; CHECK:       pred.store.continue:
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i1> [[TMP4]], i64 1
-; CHECK-NEXT:    br i1 [[TMP7]], label [[PRED_STORE_IF16:%.*]], label [[PRED_STORE_CONTINUE17:%.*]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[PRED_STORE_IF14:%.*]], label [[PRED_STORE_CONTINUE15:%.*]]
 ; CHECK:       pred.store.if14:
 ; CHECK-NEXT:    [[TMP8:%.*]] = or i64 [[INDEX]], 1
-; CHECK-NEXT:    [[NEXT_GEP7:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP8]]
+; CHECK-NEXT:    [[NEXT_GEP5:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP8]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = or i64 [[INDEX]], 1
-; CHECK-NEXT:    [[NEXT_GEP11:%.*]] = getelementptr i32, i32* [[Q]], i64 [[TMP9]]
-; CHECK-NEXT:    [[TMP10:%.*]] = load i32, i32* [[NEXT_GEP11]], align 16
-; CHECK-NEXT:    store i32 [[TMP10]], i32* [[NEXT_GEP7]], align 16
-; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE17]]
+; CHECK-NEXT:    [[NEXT_GEP9:%.*]] = getelementptr i32, i32* [[Q]], i64 [[TMP9]]
+; CHECK-NEXT:    [[TMP10:%.*]] = load i32, i32* [[NEXT_GEP9]], align 16
+; CHECK-NEXT:    store i32 [[TMP10]], i32* [[NEXT_GEP5]], align 16
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE15]]
 ; CHECK:       pred.store.continue15:
 ; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <4 x i1> [[TMP4]], i64 2
-; CHECK-NEXT:    br i1 [[TMP11]], label [[PRED_STORE_IF18:%.*]], label [[PRED_STORE_CONTINUE19:%.*]]
+; CHECK-NEXT:    br i1 [[TMP11]], label [[PRED_STORE_IF16:%.*]], label [[PRED_STORE_CONTINUE17:%.*]]
 ; CHECK:       pred.store.if16:
 ; CHECK-NEXT:    [[TMP12:%.*]] = or i64 [[INDEX]], 2
-; CHECK-NEXT:    [[NEXT_GEP8:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP12]]
+; CHECK-NEXT:    [[NEXT_GEP6:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP12]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = or i64 [[INDEX]], 2
-; CHECK-NEXT:    [[NEXT_GEP12:%.*]] = getelementptr i32, i32* [[Q]], i64 [[TMP13]]
-; CHECK-NEXT:    [[TMP14:%.*]] = load i32, i32* [[NEXT_GEP12]], align 16
-; CHECK-NEXT:    store i32 [[TMP14]], i32* [[NEXT_GEP8]], align 16
-; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE19]]
+; CHECK-NEXT:    [[NEXT_GEP10:%.*]] = getelementptr i32, i32* [[Q]], i64 [[TMP13]]
+; CHECK-NEXT:    [[TMP14:%.*]] = load i32, i32* [[NEXT_GEP10]], align 16
+; CHECK-NEXT:    store i32 [[TMP14]], i32* [[NEXT_GEP6]], align 16
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE17]]
 ; CHECK:       pred.store.continue17:
 ; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <4 x i1> [[TMP4]], i64 3
-; CHECK-NEXT:    br i1 [[TMP15]], label [[PRED_STORE_IF20:%.*]], label [[PRED_STORE_CONTINUE21]]
+; CHECK-NEXT:    br i1 [[TMP15]], label [[PRED_STORE_IF18:%.*]], label [[PRED_STORE_CONTINUE19]]
 ; CHECK:       pred.store.if18:
 ; CHECK-NEXT:    [[TMP16:%.*]] = or i64 [[INDEX]], 3
-; CHECK-NEXT:    [[NEXT_GEP9:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP16]]
+; CHECK-NEXT:    [[NEXT_GEP7:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP16]]
 ; CHECK-NEXT:    [[TMP17:%.*]] = or i64 [[INDEX]], 3
-; CHECK-NEXT:    [[NEXT_GEP13:%.*]] = getelementptr i32, i32* [[Q]], i64 [[TMP17]]
-; CHECK-NEXT:    [[TMP18:%.*]] = load i32, i32* [[NEXT_GEP13]], align 16
-; CHECK-NEXT:    store i32 [[TMP18]], i32* [[NEXT_GEP9]], align 16
-; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE21]]
+; CHECK-NEXT:    [[NEXT_GEP11:%.*]] = getelementptr i32, i32* [[Q]], i64 [[TMP17]]
+; CHECK-NEXT:    [[TMP18:%.*]] = load i32, i32* [[NEXT_GEP11]], align 16
+; CHECK-NEXT:    store i32 [[TMP18]], i32* [[NEXT_GEP7]], align 16
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE19]]
 ; CHECK:       pred.store.continue19:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -455,8 +455,8 @@ define void @example23c(i16* noalias nocapture %src, i32* noalias nocapture %dst
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE16:%.*]] ]
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[INDUCTION:%.*]] = or <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3>
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <4 x i64> [[INDUCTION]], <i64 257, i64 257, i64 257, i64 257>
+; CHECK-NEXT:    [[VEC_IV:%.*]] = or <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <4 x i64> [[VEC_IV]], <i64 257, i64 257, i64 257, i64 257>
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll
index 98c2b81cf792f..00b4fe382e9a9 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll
@@ -133,8 +133,8 @@ define dso_local void @test2(i16* noalias nocapture %points, i32 %numPoints, i16
 ; DISABLED_MASKED_STRIDED-LABEL: @test2(
 ; DISABLED_MASKED_STRIDED-NEXT:  entry:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[CMP15:%.*]] = icmp sgt i32 [[NUMPOINTS:%.*]], 0
-; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[CMP15]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
-; DISABLED_MASKED_STRIDED:       for.body.preheader:
+; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[CMP15]], label [[VECTOR_PH:%.*]], label [[FOR_END:%.*]]
+; DISABLED_MASKED_STRIDED:       vector.ph:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[NUMPOINTS]] to i64
 ; DISABLED_MASKED_STRIDED-NEXT:    [[N_RND_UP:%.*]] = add nuw nsw i64 [[WIDE_TRIP_COUNT]], 3
 ; DISABLED_MASKED_STRIDED-NEXT:    [[N_VEC:%.*]] = and i64 [[N_RND_UP]], 8589934588
@@ -143,8 +143,8 @@ define dso_local void @test2(i16* noalias nocapture %points, i32 %numPoints, i16
 ; DISABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; DISABLED_MASKED_STRIDED:       vector.body:
-; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE15:%.*]] ]
-; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[FOR_BODY_PREHEADER]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE15]] ]
+; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE15:%.*]] ]
+; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE15]] ]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i64 [[INDEX]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = bitcast i16* [[TMP1]] to <4 x i16>*
@@ -238,8 +238,8 @@ define dso_local void @test2(i16* noalias nocapture %points, i32 %numPoints, i16
 ; ENABLED_MASKED_STRIDED-LABEL: @test2(
 ; ENABLED_MASKED_STRIDED-NEXT:  entry:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[CMP15:%.*]] = icmp sgt i32 [[NUMPOINTS:%.*]], 0
-; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[CMP15]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
-; ENABLED_MASKED_STRIDED:       for.body.preheader:
+; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[CMP15]], label [[VECTOR_PH:%.*]], label [[FOR_END:%.*]]
+; ENABLED_MASKED_STRIDED:       vector.ph:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[NUMPOINTS]] to i64
 ; ENABLED_MASKED_STRIDED-NEXT:    [[N_RND_UP:%.*]] = add nuw nsw i64 [[WIDE_TRIP_COUNT]], 3
 ; ENABLED_MASKED_STRIDED-NEXT:    [[N_VEC:%.*]] = and i64 [[N_RND_UP]], 8589934588
@@ -249,11 +249,11 @@ define dso_local void @test2(i16* noalias nocapture %points, i32 %numPoints, i16
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i16, i16* [[POINTS:%.*]], i64 -1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; ENABLED_MASKED_STRIDED:       vector.body:
-; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i64 0
 ; ENABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer
-; ENABLED_MASKED_STRIDED-NEXT:    [[INDUCTION:%.*]] = or <4 x i64> [[BROADCAST_SPLAT2]], <i64 0, i64 1, i64 2, i64 3>
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = icmp ule <4 x i64> [[INDUCTION]], [[BROADCAST_SPLAT]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IV:%.*]] = or <4 x i64> [[BROADCAST_SPLAT2]], <i64 0, i64 1, i64 2, i64 3>
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i64 [[INDEX]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <4 x i16>*
 ; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* [[TMP3]], i32 2, <4 x i1> [[TMP1]], <4 x i16> poison)

diff  --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
index 85d72dd7f9d3f..7da1b810e22d3 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
@@ -4367,11 +4367,11 @@ define i32 @sink_into_replication_region(i32 %y) {
 ;
 ; CHECK-LABEL: @sink_into_replication_region(
 ; CHECK-NEXT:  bb:
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[Y:%.*]], 1
 ; CHECK-NEXT:    [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[Y]], i32 1)
 ; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN]]
-; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
-; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[N_RND_UP:%.*]] = add i32 [[TMP1]], 3
 ; CHECK-NEXT:    [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -4
 ; CHECK-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[TMP1]], -1
@@ -4441,11 +4441,11 @@ define i32 @sink_into_replication_region(i32 %y) {
 ;
 ; UNROLL-LABEL: @sink_into_replication_region(
 ; UNROLL-NEXT:  bb:
+; UNROLL-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; UNROLL:       vector.ph:
 ; UNROLL-NEXT:    [[TMP0:%.*]] = add i32 [[Y:%.*]], 1
 ; UNROLL-NEXT:    [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[Y]], i32 1)
 ; UNROLL-NEXT:    [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN]]
-; UNROLL-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
-; UNROLL:       vector.ph:
 ; UNROLL-NEXT:    [[N_RND_UP:%.*]] = add i32 [[TMP1]], 7
 ; UNROLL-NEXT:    [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8
 ; UNROLL-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[TMP1]], -1
@@ -4864,11 +4864,11 @@ define i32 @sink_into_replication_region_multiple(i32 *%x, i32 %y) {
 ;
 ; CHECK-LABEL: @sink_into_replication_region_multiple(
 ; CHECK-NEXT:  bb:
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[Y:%.*]], 1
 ; CHECK-NEXT:    [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[Y]], i32 1)
 ; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN]]
-; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
-; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[N_RND_UP:%.*]] = add i32 [[TMP1]], 3
 ; CHECK-NEXT:    [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -4
 ; CHECK-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[TMP1]], -1
@@ -4972,11 +4972,11 @@ define i32 @sink_into_replication_region_multiple(i32 *%x, i32 %y) {
 ;
 ; UNROLL-LABEL: @sink_into_replication_region_multiple(
 ; UNROLL-NEXT:  bb:
+; UNROLL-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; UNROLL:       vector.ph:
 ; UNROLL-NEXT:    [[TMP0:%.*]] = add i32 [[Y:%.*]], 1
 ; UNROLL-NEXT:    [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[Y]], i32 1)
 ; UNROLL-NEXT:    [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN]]
-; UNROLL-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
-; UNROLL:       vector.ph:
 ; UNROLL-NEXT:    [[N_RND_UP:%.*]] = add i32 [[TMP1]], 7
 ; UNROLL-NEXT:    [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8
 ; UNROLL-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[TMP1]], -1

diff  --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll
index fcb19229aa719..4bfb94fc87fbd 100644
--- a/llvm/test/Transforms/LoopVectorize/induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/induction.ll
@@ -1194,11 +1194,11 @@ define float @scalarize_induction_variable_02(float* %a, float* %b, i64 %n) {
 ; INTERLEAVE-NEXT:  entry:
 ; INTERLEAVE-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 8)
 ; INTERLEAVE-NEXT:    [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1
-; INTERLEAVE-NEXT:    [[TMP1:%.*]] = lshr i64 [[TMP0]], 3
-; INTERLEAVE-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
 ; INTERLEAVE-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 64
 ; INTERLEAVE-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; INTERLEAVE:       vector.ph:
+; INTERLEAVE-NEXT:    [[TMP1:%.*]] = lshr i64 [[TMP0]], 3
+; INTERLEAVE-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
 ; INTERLEAVE-NEXT:    [[N_MOD_VF:%.*]] = and i64 [[TMP2]], 7
 ; INTERLEAVE-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
 ; INTERLEAVE-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i64 8, i64 [[N_MOD_VF]]

diff  --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll
index eddfd9febaa8d..54ccd7f070003 100644
--- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll
@@ -115,11 +115,11 @@ define void @test_struct_array_load3_store3() {
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[NEXT_GEP]] to <12 x i32>*
 ; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <12 x i32>, <12 x i32>* [[TMP1]], align 4
 ; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
-; CHECK-NEXT:    [[STRIDED_VEC2:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
-; CHECK-NEXT:    [[STRIDED_VEC3:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
+; CHECK-NEXT:    [[STRIDED_VEC5:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
+; CHECK-NEXT:    [[STRIDED_VEC6:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
 ; CHECK-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[STRIDED_VEC]], <i32 1, i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[TMP3:%.*]] = add nsw <4 x i32> [[STRIDED_VEC2]], <i32 2, i32 2, i32 2, i32 2>
-; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[STRIDED_VEC3]], <i32 3, i32 3, i32 3, i32 3>
+; CHECK-NEXT:    [[TMP3:%.*]] = add nsw <4 x i32> [[STRIDED_VEC5]], <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[STRIDED_VEC6]], <i32 3, i32 3, i32 3, i32 3>
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1024 x %struct.ST3], [1024 x %struct.ST3]* @S, i64 0, i64 [[INDEX]], i32 2
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i64 -2
 ; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <12 x i32>*
@@ -496,11 +496,11 @@ define void @even_load_dynamic_tc(i32* noalias nocapture readonly %A, i32* noali
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 2)
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[UMAX]], -1
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr i64 [[TMP0]], 1
-; CHECK-NEXT:    [[TMP2:%.*]] = add nuw i64 [[TMP1]], 1
 ; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 8
 ; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i64 [[TMP0]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = add nuw i64 [[TMP1]], 1
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = and i64 [[TMP2]], 3
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i64 4, i64 [[N_MOD_VF]]
@@ -759,12 +759,12 @@ define void @mixed_load3_store3(i32* nocapture %A) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[NEXT_GEP]] to <12 x i32>*
 ; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <12 x i32>, <12 x i32>* [[TMP1]], align 4
 ; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
-; CHECK-NEXT:    [[STRIDED_VEC2:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
-; CHECK-NEXT:    [[STRIDED_VEC3:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
+; CHECK-NEXT:    [[STRIDED_VEC5:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
+; CHECK-NEXT:    [[STRIDED_VEC6:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
 ; CHECK-NEXT:    [[TMP2:%.*]] = add <4 x i32> [[STRIDED_VEC]], [[VEC_IND]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[NEXT_GEP]], i64 2
-; CHECK-NEXT:    [[TMP4:%.*]] = add <4 x i32> [[STRIDED_VEC2]], [[VEC_IND]]
-; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i32> [[STRIDED_VEC3]], [[VEC_IND]]
+; CHECK-NEXT:    [[TMP4:%.*]] = add <4 x i32> [[STRIDED_VEC5]], [[VEC_IND]]
+; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i32> [[STRIDED_VEC6]], [[VEC_IND]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i64 -2
 ; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <12 x i32>*
 ; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-dominance.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-dominance.ll
index 22024ce1c6179..d8109cc5f921e 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-dominance.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-dominance.ll
@@ -299,22 +299,22 @@ entry:
 define void @multiply_dont_hoist_phi(<4 x double>* noalias %A, <4 x double> * %B, [4 x double]* %C) {
 ; CHECK-LABEL: @multiply_dont_hoist_phi(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[VEC_CAST:%.*]] = bitcast <4 x double>* [[A:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST]], align 8
-; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr <4 x double>, <4 x double>* [[A]], i64 0, i64 2
+; CHECK-NEXT:    br label [[NEXT:%.*]]
+; CHECK:       next:
+; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr <4 x double>, <4 x double>* [[A:%.*]], i64 0, i64 2
 ; CHECK-NEXT:    [[VEC_CAST1:%.*]] = bitcast double* [[VEC_GEP]] to <2 x double>*
 ; CHECK-NEXT:    [[COL_LOAD2:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST1]], align 8
-; CHECK-NEXT:    [[VEC_CAST3:%.*]] = bitcast <4 x double>* [[B:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[COL_LOAD4:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST3]], align 8
-; CHECK-NEXT:    [[VEC_GEP5:%.*]] = getelementptr <4 x double>, <4 x double>* [[B]], i64 0, i64 2
+; CHECK-NEXT:    [[VEC_GEP5:%.*]] = getelementptr <4 x double>, <4 x double>* [[B:%.*]], i64 0, i64 2
 ; CHECK-NEXT:    [[VEC_CAST6:%.*]] = bitcast double* [[VEC_GEP5]] to <2 x double>*
 ; CHECK-NEXT:    [[COL_LOAD7:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST6]], align 8
-; CHECK-NEXT:    br label [[NEXT:%.*]]
-; CHECK:       next:
 ; CHECK-NEXT:    [[SPLAT_SPLAT16:%.*]] = shufflevector <2 x double> [[COL_LOAD7]], <2 x double> undef, <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT:    [[VEC_CAST:%.*]] = bitcast <4 x double>* [[A]] to <2 x double>*
+; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST]], align 8
 ; CHECK-NEXT:    [[SPLAT_SPLAT13:%.*]] = shufflevector <2 x double> [[COL_LOAD7]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP0:%.*]] = fmul contract <2 x double> [[COL_LOAD]], [[SPLAT_SPLAT13]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD2]], <2 x double> [[SPLAT_SPLAT16]], <2 x double> [[TMP0]])
+; CHECK-NEXT:    [[VEC_CAST3:%.*]] = bitcast <4 x double>* [[B]] to <2 x double>*
+; CHECK-NEXT:    [[COL_LOAD4:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST3]], align 8
 ; CHECK-NEXT:    [[SPLAT_SPLAT10:%.*]] = shufflevector <2 x double> [[COL_LOAD4]], <2 x double> undef, <2 x i32> <i32 1, i32 1>
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[COL_LOAD4]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul contract <2 x double> [[COL_LOAD]], [[SPLAT_SPLAT]]

diff  --git a/llvm/test/Transforms/PGOProfile/chr.ll b/llvm/test/Transforms/PGOProfile/chr.ll
index ec215269e1ae4..bcf73e2dc7a18 100644
--- a/llvm/test/Transforms/PGOProfile/chr.ll
+++ b/llvm/test/Transforms/PGOProfile/chr.ll
@@ -1381,10 +1381,6 @@ define i32 @test_chr_15(i32* %i, i32* %j, i32 %sum0, i1 %pred, i32 %z) !prof !14
 ; CHECK-NEXT:    br label [[BB1]]
 ; CHECK:       bb1:
 ; CHECK-NEXT:    [[J0:%.*]] = load i32, i32* [[J:%.*]], align 4
-; CHECK-NEXT:    [[V6:%.*]] = and i32 [[I0]], 2
-; CHECK-NEXT:    [[V4:%.*]] = icmp eq i32 [[V6]], [[J0]]
-; CHECK-NEXT:    [[V8:%.*]] = add i32 [[SUM0:%.*]], 43
-; CHECK-NEXT:    [[SUM2:%.*]] = select i1 [[V4]], i32 [[SUM0]], i32 [[V8]], !prof [[PROF16]]
 ; CHECK-NEXT:    call void @foo()
 ; CHECK-NEXT:    [[V9:%.*]] = and i32 [[I0]], 4
 ; CHECK-NEXT:    [[V10:%.*]] = icmp eq i32 [[V9]], 0
@@ -1393,6 +1389,10 @@ define i32 @test_chr_15(i32* %i, i32* %j, i32 %sum0, i1 %pred, i32 %z) !prof !14
 ; CHECK-NEXT:    call void @foo()
 ; CHECK-NEXT:    br label [[BB3]]
 ; CHECK:       bb3:
+; CHECK-NEXT:    [[V6:%.*]] = and i32 [[I0]], 2
+; CHECK-NEXT:    [[V4:%.*]] = icmp eq i32 [[V6]], [[J0]]
+; CHECK-NEXT:    [[V8:%.*]] = add i32 [[SUM0:%.*]], 43
+; CHECK-NEXT:    [[SUM2:%.*]] = select i1 [[V4]], i32 [[SUM0]], i32 [[V8]], !prof [[PROF16]]
 ; CHECK-NEXT:    [[V5:%.*]] = icmp eq i32 [[I0]], [[SUM2]]
 ; CHECK-NEXT:    [[SUM3:%.*]] = select i1 [[V5]], i32 [[SUM2]], i32 [[V8]], !prof [[PROF16]]
 ; CHECK-NEXT:    [[V11:%.*]] = add i32 [[I0]], [[SUM3]]