[llvm] [VPlan] Remove ILV::sinkScalarOperands. (PR #136024)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 16 13:35:44 PDT 2025
https://github.com/fhahn created https://github.com/llvm/llvm-project/pull/136024
Remove legacy ILV sinkScalarOperands, which is superseded by the sinkScalarOperands VPlan transforms.
There are a few cases that aren't handled by VPlan's sinkScalarOperands, namely when we have scalar-steps where the first lane is used outside a replicate region; the IR-based sinking is able to sink computing the steps for all lanes except the first.
We could improve this further, by duplicating such scalar-steps recipes, one to serve the first-lane-only users and users in replicate regions, but I am not sure if the extra complexity is warrented.
>From b983c4f06e02c99af2dcecc837996b6d0e053bdb Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 29 Apr 2024 12:43:43 +0100
Subject: [PATCH] [VPlan] Remove ILV::sinkScalarOperands.
Remove legacy ILV sinkScalarOperands, which is superseded by the
sinkScalarOperands VPlan transforms.
There are a few cases that aren't handled by VPlan's sinkScalarOperands,
namely when we have scalar-steps where the first lane is used outside a
replicate region; the IR-based sinking is able to sink computing the
steps for all lanes except the first.
We could improve this further, by duplicating such scalar-steps recipes,
one to serve the first-lane-only users and users in replicate regions,
but I am not sure if the extra complexity is warrented.
---
.../Transforms/Vectorize/LoopVectorize.cpp | 88 ----------
.../LoopVectorize/AArch64/blend-costs.ll | 30 ++--
.../LoopVectorize/AArch64/masked-call.ll | 37 ++--
.../LoopVectorize/RISCV/uniform-load-store.ll | 6 +-
.../LoopVectorize/X86/cost-model.ll | 30 ++--
...6-sunk-instruction-used-outside-of-loop.ll | 4 +-
.../LoopVectorize/X86/small-size.ll | 80 ++++-----
.../LoopVectorize/consecutive-ptr-uniforms.ll | 6 +-
...able-info-from-assumption-constant-size.ll | 6 +-
.../LoopVectorize/if-pred-stores.ll | 8 +-
.../Transforms/LoopVectorize/induction.ll | 30 ++--
.../interleaved-accesses-pred-stores.ll | 2 +-
.../LoopVectorize/load-deref-pred-align.ll | 30 ++--
.../load-of-struct-deref-pred.ll | 160 +++++++++---------
.../Transforms/LoopVectorize/loop-form.ll | 2 +-
.../LoopVectorize/pointer-induction.ll | 56 +++---
.../LoopVectorize/reduction-inloop-cond.ll | 6 +-
.../LoopVectorize/select-cmp-multiuse.ll | 20 +--
.../LoopVectorize/select-cmp-predicated.ll | 2 +-
.../Transforms/LoopVectorize/struct-return.ll | 2 +-
.../tail-folding-vectorization-factor-1.ll | 14 +-
.../trip-count-expansion-may-introduce-ub.ll | 12 +-
22 files changed, 273 insertions(+), 358 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index dd7f05465a50b..d2f93bb7de2c8 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -541,10 +541,6 @@ class InnerLoopVectorizer {
protected:
friend class LoopVectorizationPlanner;
- /// Iteratively sink the scalarized operands of a predicated instruction into
- /// the block that was created for it.
- void sinkScalarOperands(Instruction *PredInst);
-
/// Returns (and creates if needed) the trip count of the widened loop.
Value *getOrCreateVectorTripCount(BasicBlock *InsertBlock);
@@ -629,9 +625,6 @@ class InnerLoopVectorizer {
/// A list of all bypass blocks. The first block is the entry of the loop.
SmallVector<BasicBlock *, 4> LoopBypassBlocks;
- /// Store instructions that were predicated.
- SmallVector<Instruction *, 4> PredicatedInstructions;
-
/// Trip count of the original loop.
Value *TripCount = nullptr;
@@ -2385,15 +2378,12 @@ void InnerLoopVectorizer::scalarizeInstruction(const Instruction *Instr,
// End if-block.
VPRegionBlock *Parent = RepRecipe->getParent()->getParent();
- bool IfPredicateInstr = Parent ? Parent->isReplicator() : false;
assert(
(Parent || !RepRecipe->getParent()->getPlan()->getVectorLoopRegion() ||
all_of(RepRecipe->operands(),
[](VPValue *Op) { return Op->isDefinedOutsideLoopRegions(); })) &&
"Expected a recipe is either within a region or all of its operands "
"are defined outside the vectorized region.");
- if (IfPredicateInstr)
- PredicatedInstructions.push_back(Cloned);
}
Value *
@@ -2867,8 +2857,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
if (!State.Plan->getVectorLoopRegion())
return;
- for (Instruction *PI : PredicatedInstructions)
- sinkScalarOperands(&*PI);
VPRegionBlock *VectorRegion = State.Plan->getVectorLoopRegion();
VPBasicBlock *HeaderVPBB = VectorRegion->getEntryBasicBlock();
@@ -2895,82 +2883,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
VF.getKnownMinValue() * UF);
}
-void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) {
- // The basic block and loop containing the predicated instruction.
- auto *PredBB = PredInst->getParent();
- auto *VectorLoop = LI->getLoopFor(PredBB);
-
- // Initialize a worklist with the operands of the predicated instruction.
- SetVector<Value *> Worklist(PredInst->op_begin(), PredInst->op_end());
-
- // Holds instructions that we need to analyze again. An instruction may be
- // reanalyzed if we don't yet know if we can sink it or not.
- SmallVector<Instruction *, 8> InstsToReanalyze;
-
- // Returns true if a given use occurs in the predicated block. Phi nodes use
- // their operands in their corresponding predecessor blocks.
- auto IsBlockOfUsePredicated = [&](Use &U) -> bool {
- auto *I = cast<Instruction>(U.getUser());
- BasicBlock *BB = I->getParent();
- if (auto *Phi = dyn_cast<PHINode>(I))
- BB = Phi->getIncomingBlock(
- PHINode::getIncomingValueNumForOperand(U.getOperandNo()));
- return BB == PredBB;
- };
-
- // Iteratively sink the scalarized operands of the predicated instruction
- // into the block we created for it. When an instruction is sunk, it's
- // operands are then added to the worklist. The algorithm ends after one pass
- // through the worklist doesn't sink a single instruction.
- bool Changed;
- do {
- // Add the instructions that need to be reanalyzed to the worklist, and
- // reset the changed indicator.
- Worklist.insert_range(InstsToReanalyze);
- InstsToReanalyze.clear();
- Changed = false;
-
- while (!Worklist.empty()) {
- auto *I = dyn_cast<Instruction>(Worklist.pop_back_val());
-
- // We can't sink an instruction if it is a phi node, is not in the loop,
- // may have side effects or may read from memory.
- // TODO: Could do more granular checking to allow sinking
- // a load past non-store instructions.
- if (!I || isa<PHINode>(I) || !VectorLoop->contains(I) ||
- I->mayHaveSideEffects() || I->mayReadFromMemory())
- continue;
-
- // If the instruction is already in PredBB, check if we can sink its
- // operands. In that case, VPlan's sinkScalarOperands() succeeded in
- // sinking the scalar instruction I, hence it appears in PredBB; but it
- // may have failed to sink I's operands (recursively), which we try
- // (again) here.
- if (I->getParent() == PredBB) {
- Worklist.insert_range(I->operands());
- continue;
- }
-
- // It's legal to sink the instruction if all its uses occur in the
- // predicated block. Otherwise, there's nothing to do yet, and we may
- // need to reanalyze the instruction.
- if (!llvm::all_of(I->uses(), IsBlockOfUsePredicated)) {
- InstsToReanalyze.push_back(I);
- continue;
- }
-
- // Move the instruction to the beginning of the predicated block, and add
- // it's operands to the worklist.
- I->moveBefore(PredBB->getFirstInsertionPt());
- Worklist.insert_range(I->operands());
-
- // The sinking may have enabled other instructions to be sunk, so we will
- // need to iterate.
- Changed = true;
- }
- } while (Changed);
-}
-
void InnerLoopVectorizer::fixNonInductionPHIs(VPTransformState &State) {
auto Iter = vp_depth_first_deep(Plan.getEntry());
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll
index e302bf195ef8e..acc41d21af1f5 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll
@@ -213,6 +213,21 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE30:.*]] ]
; CHECK-NEXT: [[IV:%.*]] = add i32 [[INDEX]], 0
+; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[INDEX]], 1
+; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[INDEX]], 2
+; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[INDEX]], 3
+; CHECK-NEXT: [[TMP24:%.*]] = add i32 [[INDEX]], 4
+; CHECK-NEXT: [[TMP28:%.*]] = add i32 [[INDEX]], 5
+; CHECK-NEXT: [[TMP32:%.*]] = add i32 [[INDEX]], 6
+; CHECK-NEXT: [[TMP36:%.*]] = add i32 [[INDEX]], 7
+; CHECK-NEXT: [[TMP40:%.*]] = add i32 [[INDEX]], 8
+; CHECK-NEXT: [[TMP44:%.*]] = add i32 [[INDEX]], 9
+; CHECK-NEXT: [[TMP48:%.*]] = add i32 [[INDEX]], 10
+; CHECK-NEXT: [[TMP52:%.*]] = add i32 [[INDEX]], 11
+; CHECK-NEXT: [[TMP56:%.*]] = add i32 [[INDEX]], 12
+; CHECK-NEXT: [[TMP60:%.*]] = add i32 [[INDEX]], 13
+; CHECK-NEXT: [[TMP64:%.*]] = add i32 [[INDEX]], 14
+; CHECK-NEXT: [[TMP68:%.*]] = add i32 [[INDEX]], 15
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[IV]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[GEP_SRC]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
@@ -232,7 +247,6 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <16 x i1> [[TMP7]], i32 1
; CHECK-NEXT: br i1 [[TMP11]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
; CHECK: [[PRED_STORE_IF1]]:
-; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[INDEX]], 1
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP12]]
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 1
; CHECK-NEXT: store i8 [[TMP14]], ptr [[TMP13]], align 1
@@ -241,7 +255,6 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <16 x i1> [[TMP7]], i32 2
; CHECK-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
; CHECK: [[PRED_STORE_IF3]]:
-; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP16]]
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 2
; CHECK-NEXT: store i8 [[TMP18]], ptr [[TMP17]], align 1
@@ -250,7 +263,6 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <16 x i1> [[TMP7]], i32 3
; CHECK-NEXT: br i1 [[TMP19]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
; CHECK: [[PRED_STORE_IF5]]:
-; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[INDEX]], 3
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP20]]
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 3
; CHECK-NEXT: store i8 [[TMP22]], ptr [[TMP21]], align 1
@@ -259,7 +271,6 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <16 x i1> [[TMP7]], i32 4
; CHECK-NEXT: br i1 [[TMP23]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
; CHECK: [[PRED_STORE_IF7]]:
-; CHECK-NEXT: [[TMP24:%.*]] = add i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP24]]
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 4
; CHECK-NEXT: store i8 [[TMP26]], ptr [[TMP25]], align 1
@@ -268,7 +279,6 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <16 x i1> [[TMP7]], i32 5
; CHECK-NEXT: br i1 [[TMP27]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
; CHECK: [[PRED_STORE_IF9]]:
-; CHECK-NEXT: [[TMP28:%.*]] = add i32 [[INDEX]], 5
; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP28]]
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 5
; CHECK-NEXT: store i8 [[TMP30]], ptr [[TMP29]], align 1
@@ -277,7 +287,6 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
; CHECK-NEXT: [[TMP31:%.*]] = extractelement <16 x i1> [[TMP7]], i32 6
; CHECK-NEXT: br i1 [[TMP31]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
; CHECK: [[PRED_STORE_IF11]]:
-; CHECK-NEXT: [[TMP32:%.*]] = add i32 [[INDEX]], 6
; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP32]]
; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 6
; CHECK-NEXT: store i8 [[TMP34]], ptr [[TMP33]], align 1
@@ -286,7 +295,6 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x i1> [[TMP7]], i32 7
; CHECK-NEXT: br i1 [[TMP35]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
; CHECK: [[PRED_STORE_IF13]]:
-; CHECK-NEXT: [[TMP36:%.*]] = add i32 [[INDEX]], 7
; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP36]]
; CHECK-NEXT: [[TMP38:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 7
; CHECK-NEXT: store i8 [[TMP38]], ptr [[TMP37]], align 1
@@ -295,7 +303,6 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
; CHECK-NEXT: [[TMP39:%.*]] = extractelement <16 x i1> [[TMP7]], i32 8
; CHECK-NEXT: br i1 [[TMP39]], label %[[PRED_STORE_IF15:.*]], label %[[PRED_STORE_CONTINUE16:.*]]
; CHECK: [[PRED_STORE_IF15]]:
-; CHECK-NEXT: [[TMP40:%.*]] = add i32 [[INDEX]], 8
; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP40]]
; CHECK-NEXT: [[TMP42:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 8
; CHECK-NEXT: store i8 [[TMP42]], ptr [[TMP41]], align 1
@@ -304,7 +311,6 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
; CHECK-NEXT: [[TMP43:%.*]] = extractelement <16 x i1> [[TMP7]], i32 9
; CHECK-NEXT: br i1 [[TMP43]], label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]]
; CHECK: [[PRED_STORE_IF17]]:
-; CHECK-NEXT: [[TMP44:%.*]] = add i32 [[INDEX]], 9
; CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP44]]
; CHECK-NEXT: [[TMP46:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 9
; CHECK-NEXT: store i8 [[TMP46]], ptr [[TMP45]], align 1
@@ -313,7 +319,6 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
; CHECK-NEXT: [[TMP47:%.*]] = extractelement <16 x i1> [[TMP7]], i32 10
; CHECK-NEXT: br i1 [[TMP47]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]]
; CHECK: [[PRED_STORE_IF19]]:
-; CHECK-NEXT: [[TMP48:%.*]] = add i32 [[INDEX]], 10
; CHECK-NEXT: [[TMP49:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP48]]
; CHECK-NEXT: [[TMP50:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 10
; CHECK-NEXT: store i8 [[TMP50]], ptr [[TMP49]], align 1
@@ -322,7 +327,6 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
; CHECK-NEXT: [[TMP51:%.*]] = extractelement <16 x i1> [[TMP7]], i32 11
; CHECK-NEXT: br i1 [[TMP51]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]]
; CHECK: [[PRED_STORE_IF21]]:
-; CHECK-NEXT: [[TMP52:%.*]] = add i32 [[INDEX]], 11
; CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP52]]
; CHECK-NEXT: [[TMP54:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 11
; CHECK-NEXT: store i8 [[TMP54]], ptr [[TMP53]], align 1
@@ -331,7 +335,6 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
; CHECK-NEXT: [[TMP55:%.*]] = extractelement <16 x i1> [[TMP7]], i32 12
; CHECK-NEXT: br i1 [[TMP55]], label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]]
; CHECK: [[PRED_STORE_IF23]]:
-; CHECK-NEXT: [[TMP56:%.*]] = add i32 [[INDEX]], 12
; CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP56]]
; CHECK-NEXT: [[TMP58:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 12
; CHECK-NEXT: store i8 [[TMP58]], ptr [[TMP57]], align 1
@@ -340,7 +343,6 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
; CHECK-NEXT: [[TMP59:%.*]] = extractelement <16 x i1> [[TMP7]], i32 13
; CHECK-NEXT: br i1 [[TMP59]], label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]]
; CHECK: [[PRED_STORE_IF25]]:
-; CHECK-NEXT: [[TMP60:%.*]] = add i32 [[INDEX]], 13
; CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP60]]
; CHECK-NEXT: [[TMP62:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 13
; CHECK-NEXT: store i8 [[TMP62]], ptr [[TMP61]], align 1
@@ -349,7 +351,6 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
; CHECK-NEXT: [[TMP63:%.*]] = extractelement <16 x i1> [[TMP7]], i32 14
; CHECK-NEXT: br i1 [[TMP63]], label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28:.*]]
; CHECK: [[PRED_STORE_IF27]]:
-; CHECK-NEXT: [[TMP64:%.*]] = add i32 [[INDEX]], 14
; CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP64]]
; CHECK-NEXT: [[TMP66:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 14
; CHECK-NEXT: store i8 [[TMP66]], ptr [[TMP65]], align 1
@@ -358,7 +359,6 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
; CHECK-NEXT: [[TMP67:%.*]] = extractelement <16 x i1> [[TMP7]], i32 15
; CHECK-NEXT: br i1 [[TMP67]], label %[[PRED_STORE_IF29:.*]], label %[[PRED_STORE_CONTINUE30]]
; CHECK: [[PRED_STORE_IF29]]:
-; CHECK-NEXT: [[TMP68:%.*]] = add i32 [[INDEX]], 15
; CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP68]]
; CHECK-NEXT: [[TMP70:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 15
; CHECK-NEXT: store i8 [[TMP70]], ptr [[TMP69]], align 1
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
index 585c2df08f7d6..c662cf8478651 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
@@ -996,28 +996,31 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = icmp ult i64 1, [[TMP0]]
; TFA_INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]]
; TFA_INTERLEAVE: [[VECTOR_BODY]]:
-; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[TMP27:%.*]], %[[PRED_STORE_CONTINUE5:.*]] ]
-; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi i1 [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[PRED_STORE_CONTINUE5]] ]
-; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi i1 [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT6:%.*]], %[[PRED_STORE_CONTINUE5]] ]
+; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[TMP27:%.*]], %[[TMP19:.*]] ]
+; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi i1 [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[TMP19]] ]
+; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi i1 [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT6:%.*]], %[[TMP19]] ]
; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = load double, ptr [[P2]], align 8
-; TFA_INTERLEAVE-NEXT: br i1 [[ACTIVE_LANE_MASK]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
-; TFA_INTERLEAVE: [[PRED_STORE_IF]]:
; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR7:[0-9]+]]
-; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = fcmp ogt double [[TMP5]], 0.000000e+00
-; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = xor i1 [[TMP6]], true
-; TFA_INTERLEAVE-NEXT: [[TMP24:%.*]] = select i1 [[TMP7]], double 1.000000e+00, double 0.000000e+00
-; TFA_INTERLEAVE-NEXT: store double [[TMP24]], ptr [[P]], align 8
-; TFA_INTERLEAVE-NEXT: br label %[[PRED_STORE_CONTINUE]]
-; TFA_INTERLEAVE: [[PRED_STORE_CONTINUE]]:
-; TFA_INTERLEAVE-NEXT: br i1 [[ACTIVE_LANE_MASK2]], label %[[PRED_STORE_IF4:.*]], label %[[PRED_STORE_CONTINUE5]]
-; TFA_INTERLEAVE: [[PRED_STORE_IF4]]:
; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR7]]
+; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = fcmp ogt double [[TMP5]], 0.000000e+00
; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = fcmp ogt double [[TMP8]], 0.000000e+00
-; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = xor i1 [[TMP9]], true
+; TFA_INTERLEAVE-NEXT: [[TMP18:%.*]] = xor i1 [[TMP6]], true
+; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = xor i1 [[TMP9]], true
+; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = select i1 [[ACTIVE_LANE_MASK]], i1 [[TMP18]], i1 false
+; TFA_INTERLEAVE-NEXT: [[TMP21:%.*]] = select i1 [[ACTIVE_LANE_MASK2]], i1 [[TMP20]], i1 false
; TFA_INTERLEAVE-NEXT: [[TMP26:%.*]] = select i1 [[TMP10]], double 1.000000e+00, double 0.000000e+00
-; TFA_INTERLEAVE-NEXT: store double [[TMP26]], ptr [[P]], align 8
-; TFA_INTERLEAVE-NEXT: br label %[[PRED_STORE_CONTINUE5]]
-; TFA_INTERLEAVE: [[PRED_STORE_CONTINUE5]]:
+; TFA_INTERLEAVE-NEXT: [[PREDPHI3:%.*]] = select i1 [[TMP21]], double 1.000000e+00, double 0.000000e+00
+; TFA_INTERLEAVE-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[ACTIVE_LANE_MASK2]], double [[PREDPHI3]], double [[TMP26]]
+; TFA_INTERLEAVE-NEXT: [[TMP13:%.*]] = xor i1 [[ACTIVE_LANE_MASK]], true
+; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = xor i1 [[ACTIVE_LANE_MASK2]], true
+; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = xor i1 [[TMP13]], true
+; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = xor i1 [[TMP14]], true
+; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]]
+; TFA_INTERLEAVE-NEXT: br i1 [[TMP17]], label %[[BB18:.*]], label %[[TMP19]]
+; TFA_INTERLEAVE: [[BB18]]:
+; TFA_INTERLEAVE-NEXT: store double [[SPEC_SELECT]], ptr [[P]], align 8
+; TFA_INTERLEAVE-NEXT: br label %[[TMP19]]
+; TFA_INTERLEAVE: [[TMP19]]:
; TFA_INTERLEAVE-NEXT: [[TMP27]] = add i64 [[INDEX]], 2
; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = icmp ult i64 [[INDEX]], [[TMP3]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll
index d8713bdda689a..d065fdd2d31c9 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll
@@ -1046,6 +1046,9 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias
; TF-FIXEDLEN: [[VECTOR_BODY]]:
; TF-FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ]
; TF-FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 1
+; TF-FIXEDLEN-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 2
+; TF-FIXEDLEN-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 3
; TF-FIXEDLEN-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[TMP0]], i64 1025)
; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 0
; TF-FIXEDLEN-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
@@ -1056,21 +1059,18 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias
; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 1
; TF-FIXEDLEN-NEXT: br i1 [[TMP2]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
; TF-FIXEDLEN: [[PRED_STORE_IF1]]:
-; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 1
; TF-FIXEDLEN-NEXT: store i64 [[TMP3]], ptr [[B]], align 8
; TF-FIXEDLEN-NEXT: br label %[[PRED_STORE_CONTINUE2]]
; TF-FIXEDLEN: [[PRED_STORE_CONTINUE2]]:
; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 2
; TF-FIXEDLEN-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
; TF-FIXEDLEN: [[PRED_STORE_IF3]]:
-; TF-FIXEDLEN-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 2
; TF-FIXEDLEN-NEXT: store i64 [[TMP5]], ptr [[B]], align 8
; TF-FIXEDLEN-NEXT: br label %[[PRED_STORE_CONTINUE4]]
; TF-FIXEDLEN: [[PRED_STORE_CONTINUE4]]:
; TF-FIXEDLEN-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 3
; TF-FIXEDLEN-NEXT: br i1 [[TMP6]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6]]
; TF-FIXEDLEN: [[PRED_STORE_IF5]]:
-; TF-FIXEDLEN-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 3
; TF-FIXEDLEN-NEXT: store i64 [[TMP7]], ptr [[B]], align 8
; TF-FIXEDLEN-NEXT: br label %[[PRED_STORE_CONTINUE6]]
; TF-FIXEDLEN: [[PRED_STORE_CONTINUE6]]:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
index a1c727f62ba7a..3d77e6d3a94af 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
@@ -604,9 +604,21 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE37:%.*]] ]
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT: [[TMP35:%.*]] = add i64 [[INDEX]], 3
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP42:%.*]] = add i64 [[INDEX]], 5
+; CHECK-NEXT: [[TMP46:%.*]] = add i64 [[INDEX]], 6
+; CHECK-NEXT: [[TMP50:%.*]] = add i64 [[INDEX]], 7
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 8
+; CHECK-NEXT: [[TMP57:%.*]] = add i64 [[INDEX]], 9
+; CHECK-NEXT: [[TMP61:%.*]] = add i64 [[INDEX]], 10
+; CHECK-NEXT: [[TMP65:%.*]] = add i64 [[INDEX]], 11
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 12
+; CHECK-NEXT: [[TMP72:%.*]] = add i64 [[INDEX]], 13
+; CHECK-NEXT: [[TMP76:%.*]] = add i64 [[INDEX]], 14
+; CHECK-NEXT: [[TMP80:%.*]] = add i64 [[INDEX]], 15
; CHECK-NEXT: [[TMP7:%.*]] = shl nsw i64 [[TMP3]], 2
; CHECK-NEXT: [[TMP8:%.*]] = shl nsw i64 [[TMP4]], 2
; CHECK-NEXT: [[TMP9:%.*]] = shl nsw i64 [[TMP5]], 2
@@ -638,7 +650,6 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 {
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i1> [[TMP19]], i32 1
; CHECK-NEXT: br i1 [[TMP26]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9:%.*]]
; CHECK: pred.store.if8:
-; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP28:%.*]] = shl nsw i64 [[TMP27]], 2
; CHECK-NEXT: [[TMP29:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP28]]
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP29]], align 8
@@ -647,7 +658,6 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 {
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP19]], i32 2
; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_STORE_IF10:%.*]], label [[PRED_STORE_CONTINUE11:%.*]]
; CHECK: pred.store.if10:
-; CHECK-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP32:%.*]] = shl nsw i64 [[TMP31]], 2
; CHECK-NEXT: [[TMP33:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP32]]
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP33]], align 8
@@ -656,7 +666,6 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 {
; CHECK-NEXT: [[TMP34:%.*]] = extractelement <4 x i1> [[TMP19]], i32 3
; CHECK-NEXT: br i1 [[TMP34]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE13:%.*]]
; CHECK: pred.store.if12:
-; CHECK-NEXT: [[TMP35:%.*]] = add i64 [[INDEX]], 3
; CHECK-NEXT: [[TMP36:%.*]] = shl nsw i64 [[TMP35]], 2
; CHECK-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP36]]
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP37]], align 8
@@ -673,7 +682,6 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 {
; CHECK-NEXT: [[TMP41:%.*]] = extractelement <4 x i1> [[TMP20]], i32 1
; CHECK-NEXT: br i1 [[TMP41]], label [[PRED_STORE_IF16:%.*]], label [[PRED_STORE_CONTINUE17:%.*]]
; CHECK: pred.store.if16:
-; CHECK-NEXT: [[TMP42:%.*]] = add i64 [[INDEX]], 5
; CHECK-NEXT: [[TMP43:%.*]] = shl nsw i64 [[TMP42]], 2
; CHECK-NEXT: [[TMP44:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP43]]
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP44]], align 8
@@ -682,7 +690,6 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 {
; CHECK-NEXT: [[TMP45:%.*]] = extractelement <4 x i1> [[TMP20]], i32 2
; CHECK-NEXT: br i1 [[TMP45]], label [[PRED_STORE_IF18:%.*]], label [[PRED_STORE_CONTINUE19:%.*]]
; CHECK: pred.store.if18:
-; CHECK-NEXT: [[TMP46:%.*]] = add i64 [[INDEX]], 6
; CHECK-NEXT: [[TMP47:%.*]] = shl nsw i64 [[TMP46]], 2
; CHECK-NEXT: [[TMP48:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP47]]
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP48]], align 8
@@ -691,7 +698,6 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 {
; CHECK-NEXT: [[TMP49:%.*]] = extractelement <4 x i1> [[TMP20]], i32 3
; CHECK-NEXT: br i1 [[TMP49]], label [[PRED_STORE_IF20:%.*]], label [[PRED_STORE_CONTINUE21:%.*]]
; CHECK: pred.store.if20:
-; CHECK-NEXT: [[TMP50:%.*]] = add i64 [[INDEX]], 7
; CHECK-NEXT: [[TMP51:%.*]] = shl nsw i64 [[TMP50]], 2
; CHECK-NEXT: [[TMP52:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP51]]
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP52]], align 8
@@ -708,7 +714,6 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 {
; CHECK-NEXT: [[TMP56:%.*]] = extractelement <4 x i1> [[TMP21]], i32 1
; CHECK-NEXT: br i1 [[TMP56]], label [[PRED_STORE_IF24:%.*]], label [[PRED_STORE_CONTINUE25:%.*]]
; CHECK: pred.store.if24:
-; CHECK-NEXT: [[TMP57:%.*]] = add i64 [[INDEX]], 9
; CHECK-NEXT: [[TMP58:%.*]] = shl nsw i64 [[TMP57]], 2
; CHECK-NEXT: [[TMP59:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP58]]
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP59]], align 8
@@ -717,7 +722,6 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 {
; CHECK-NEXT: [[TMP60:%.*]] = extractelement <4 x i1> [[TMP21]], i32 2
; CHECK-NEXT: br i1 [[TMP60]], label [[PRED_STORE_IF26:%.*]], label [[PRED_STORE_CONTINUE27:%.*]]
; CHECK: pred.store.if26:
-; CHECK-NEXT: [[TMP61:%.*]] = add i64 [[INDEX]], 10
; CHECK-NEXT: [[TMP62:%.*]] = shl nsw i64 [[TMP61]], 2
; CHECK-NEXT: [[TMP63:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP62]]
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP63]], align 8
@@ -726,7 +730,6 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 {
; CHECK-NEXT: [[TMP64:%.*]] = extractelement <4 x i1> [[TMP21]], i32 3
; CHECK-NEXT: br i1 [[TMP64]], label [[PRED_STORE_IF28:%.*]], label [[PRED_STORE_CONTINUE29:%.*]]
; CHECK: pred.store.if28:
-; CHECK-NEXT: [[TMP65:%.*]] = add i64 [[INDEX]], 11
; CHECK-NEXT: [[TMP66:%.*]] = shl nsw i64 [[TMP65]], 2
; CHECK-NEXT: [[TMP67:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP66]]
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP67]], align 8
@@ -743,7 +746,6 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 {
; CHECK-NEXT: [[TMP71:%.*]] = extractelement <4 x i1> [[TMP22]], i32 1
; CHECK-NEXT: br i1 [[TMP71]], label [[PRED_STORE_IF32:%.*]], label [[PRED_STORE_CONTINUE33:%.*]]
; CHECK: pred.store.if32:
-; CHECK-NEXT: [[TMP72:%.*]] = add i64 [[INDEX]], 13
; CHECK-NEXT: [[TMP73:%.*]] = shl nsw i64 [[TMP72]], 2
; CHECK-NEXT: [[TMP74:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP73]]
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP74]], align 8
@@ -752,7 +754,6 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 {
; CHECK-NEXT: [[TMP75:%.*]] = extractelement <4 x i1> [[TMP22]], i32 2
; CHECK-NEXT: br i1 [[TMP75]], label [[PRED_STORE_IF34:%.*]], label [[PRED_STORE_CONTINUE35:%.*]]
; CHECK: pred.store.if34:
-; CHECK-NEXT: [[TMP76:%.*]] = add i64 [[INDEX]], 14
; CHECK-NEXT: [[TMP77:%.*]] = shl nsw i64 [[TMP76]], 2
; CHECK-NEXT: [[TMP78:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP77]]
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP78]], align 8
@@ -761,7 +762,6 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 {
; CHECK-NEXT: [[TMP79:%.*]] = extractelement <4 x i1> [[TMP22]], i32 3
; CHECK-NEXT: br i1 [[TMP79]], label [[PRED_STORE_IF36:%.*]], label [[PRED_STORE_CONTINUE37]]
; CHECK: pred.store.if36:
-; CHECK-NEXT: [[TMP80:%.*]] = add i64 [[INDEX]], 15
; CHECK-NEXT: [[TMP81:%.*]] = shl nsw i64 [[TMP80]], 2
; CHECK-NEXT: [[TMP82:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP81]]
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP82]], align 8
@@ -786,6 +786,9 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 {
; CHECK: vec.epilog.vector.body:
; CHECK-NEXT: [[INDEX40:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL1]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT51:%.*]], [[PRED_STORE_CONTINUE50:%.*]] ]
; CHECK-NEXT: [[TMP86:%.*]] = add i64 [[INDEX40]], 0
+; CHECK-NEXT: [[TMP95:%.*]] = add i64 [[INDEX40]], 1
+; CHECK-NEXT: [[TMP99:%.*]] = add i64 [[INDEX40]], 2
+; CHECK-NEXT: [[TMP103:%.*]] = add i64 [[INDEX40]], 3
; CHECK-NEXT: [[TMP87:%.*]] = shl nsw i64 [[TMP86]], 2
; CHECK-NEXT: [[TMP89:%.*]] = getelementptr nusw double, ptr [[A]], i64 [[TMP87]]
; CHECK-NEXT: [[WIDE_VEC41:%.*]] = load <16 x double>, ptr [[TMP89]], align 8
@@ -802,7 +805,6 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 {
; CHECK-NEXT: [[TMP94:%.*]] = extractelement <4 x i1> [[TMP90]], i32 1
; CHECK-NEXT: br i1 [[TMP94]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]]
; CHECK: pred.store.if45:
-; CHECK-NEXT: [[TMP95:%.*]] = add i64 [[INDEX40]], 1
; CHECK-NEXT: [[TMP96:%.*]] = shl nsw i64 [[TMP95]], 2
; CHECK-NEXT: [[TMP97:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP96]]
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP97]], align 8
@@ -811,7 +813,6 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 {
; CHECK-NEXT: [[TMP98:%.*]] = extractelement <4 x i1> [[TMP90]], i32 2
; CHECK-NEXT: br i1 [[TMP98]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]]
; CHECK: pred.store.if47:
-; CHECK-NEXT: [[TMP99:%.*]] = add i64 [[INDEX40]], 2
; CHECK-NEXT: [[TMP100:%.*]] = shl nsw i64 [[TMP99]], 2
; CHECK-NEXT: [[TMP101:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP100]]
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP101]], align 8
@@ -820,7 +821,6 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 {
; CHECK-NEXT: [[TMP102:%.*]] = extractelement <4 x i1> [[TMP90]], i32 3
; CHECK-NEXT: br i1 [[TMP102]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50]]
; CHECK: pred.store.if49:
-; CHECK-NEXT: [[TMP103:%.*]] = add i64 [[INDEX40]], 3
; CHECK-NEXT: [[TMP104:%.*]] = shl nsw i64 [[TMP103]], 2
; CHECK-NEXT: [[TMP105:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP104]]
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP105]], align 8
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll b/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll
index c91ead00a950d..644f10b617eb7 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll
@@ -8,13 +8,13 @@ define ptr @test(ptr noalias %src, ptr noalias %dst) {
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %vector.ph ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE2]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[TMP1]]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i64> [[VEC_IND]], zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true)
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
; CHECK: pred.load.if:
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll
index 6424fb5565a63..58ae5022b9f90 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll
@@ -275,7 +275,19 @@ define void @example3(i32 %n, ptr noalias nocapture %p, ptr noalias nocapture %q
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE16:%.*]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 2
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[TMP9]], i64 4
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[P]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[TMP10]], i64 8
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[P]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[TMP11]], i64 12
; CHECK-NEXT: [[OFFSET_IDX6:%.*]] = shl i64 [[INDEX]], 2
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[Q:%.*]], i64 [[OFFSET_IDX6]]
+; CHECK-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[TMP6]], i64 4
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Q]], i64 [[OFFSET_IDX6]]
+; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[TMP7]], i64 8
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[Q]], i64 [[OFFSET_IDX6]]
+; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[TMP8]], i64 12
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[VEC_IV:%.*]] = or disjoint <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3>
@@ -283,43 +295,31 @@ define void @example3(i32 %n, ptr noalias nocapture %p, ptr noalias nocapture %q
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i64 0
; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[Q:%.*]], i64 [[OFFSET_IDX6]]
+; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[Q]], i64 [[OFFSET_IDX6]]
+; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[P]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[NEXT_GEP7]], align 16
; CHECK-NEXT: store i32 [[TMP5]], ptr [[NEXT_GEP]], align 16
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP3]], i64 1
-; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]]
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP3]], i64 1
+; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]]
; CHECK: pred.store.if11:
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[P]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[TMP7]], i64 4
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[Q]], i64 [[OFFSET_IDX6]]
-; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[TMP8]], i64 4
-; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[NEXT_GEP8]], align 16
-; CHECK-NEXT: store i32 [[TMP9]], ptr [[NEXT_GEP3]], align 16
+; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[NEXT_GEP6]], align 16
+; CHECK-NEXT: store i32 [[TMP13]], ptr [[NEXT_GEP1]], align 16
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]]
; CHECK: pred.store.continue12:
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP3]], i64 2
-; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14:%.*]]
+; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP3]], i64 2
+; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14:%.*]]
; CHECK: pred.store.if13:
-; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[P]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[TMP11]], i64 8
-; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[Q]], i64 [[OFFSET_IDX6]]
-; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[TMP12]], i64 8
-; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[NEXT_GEP9]], align 16
-; CHECK-NEXT: store i32 [[TMP13]], ptr [[NEXT_GEP4]], align 16
+; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[NEXT_GEP9]], align 16
+; CHECK-NEXT: store i32 [[TMP15]], ptr [[NEXT_GEP2]], align 16
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE14]]
; CHECK: pred.store.continue14:
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP3]], i64 3
-; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF15:%.*]], label [[PRED_STORE_CONTINUE16]]
+; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP3]], i64 3
+; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF15:%.*]], label [[PRED_STORE_CONTINUE16]]
; CHECK: pred.store.if15:
-; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[P]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[TMP15]], i64 12
-; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Q]], i64 [[OFFSET_IDX6]]
-; CHECK-NEXT: [[NEXT_GEP10:%.*]] = getelementptr i8, ptr [[TMP16]], i64 12
-; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[NEXT_GEP10]], align 16
-; CHECK-NEXT: store i32 [[TMP17]], ptr [[NEXT_GEP5]], align 16
+; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[NEXT_GEP8]], align 16
+; CHECK-NEXT: store i32 [[TMP17]], ptr [[NEXT_GEP3]], align 16
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE16]]
; CHECK: pred.store.continue16:
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
@@ -453,7 +453,19 @@ define void @example23c(ptr noalias nocapture %src, ptr noalias nocapture %dst)
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE15:%.*]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[TMP7]], i64 2
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[TMP8]], i64 4
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[TMP13]], i64 6
; CHECK-NEXT: [[OFFSET_IDX5:%.*]] = shl i64 [[INDEX]], 2
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 [[OFFSET_IDX5]]
+; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[TMP14]], i64 4
+; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX5]]
+; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[TMP19]], i64 8
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX5]]
+; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[TMP20]], i64 12
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[VEC_IV:%.*]] = or disjoint <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3>
@@ -461,8 +473,8 @@ define void @example23c(ptr noalias nocapture %src, ptr noalias nocapture %dst)
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i64 0
; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 [[OFFSET_IDX5]]
-; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX5]]
+; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr [[NEXT_GEP]], align 2
; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[TMP3]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i32 [[TMP4]], 7
@@ -472,10 +484,6 @@ define void @example23c(ptr noalias nocapture %src, ptr noalias nocapture %dst)
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP1]], i64 1
; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]]
; CHECK: pred.store.if9:
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX5]]
-; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[TMP7]], i64 4
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[TMP8]], i64 2
; CHECK-NEXT: [[TMP9:%.*]] = load i16, ptr [[NEXT_GEP2]], align 2
; CHECK-NEXT: [[TMP10:%.*]] = zext i16 [[TMP9]] to i32
; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i32 [[TMP10]], 7
@@ -485,10 +493,6 @@ define void @example23c(ptr noalias nocapture %src, ptr noalias nocapture %dst)
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP1]], i64 2
; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE12:%.*]]
; CHECK: pred.store.if11:
-; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX5]]
-; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[TMP13]], i64 8
-; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[TMP14]], i64 4
; CHECK-NEXT: [[TMP15:%.*]] = load i16, ptr [[NEXT_GEP3]], align 2
; CHECK-NEXT: [[TMP16:%.*]] = zext i16 [[TMP15]] to i32
; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw i32 [[TMP16]], 7
@@ -498,10 +502,6 @@ define void @example23c(ptr noalias nocapture %src, ptr noalias nocapture %dst)
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP1]], i64 3
; CHECK-NEXT: br i1 [[TMP18]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE15]]
; CHECK: pred.store.if13:
-; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX5]]
-; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[TMP19]], i64 12
-; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[TMP20]], i64 6
; CHECK-NEXT: [[TMP21:%.*]] = load i16, ptr [[NEXT_GEP4]], align 2
; CHECK-NEXT: [[TMP22:%.*]] = zext i16 [[TMP21]] to i32
; CHECK-NEXT: [[TMP23:%.*]] = shl nuw nsw i32 [[TMP22]], 7
diff --git a/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll b/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll
index 736f88e7f2d04..57c35e874a55c 100644
--- a/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll
+++ b/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll
@@ -200,13 +200,13 @@ for.end:
; INTER-NOT: LV: Found uniform instruction: %tmp0 = getelementptr inbounds %pair, ptr %p, i64 %i, i32 0
; INTER: vector.body
; INTER: %index = phi i64 [ 0, %vector.ph ], [ %index.next, {{.*}} ]
+; INTER: %[[I1:.+]] = or disjoint i64 %index, 1
+; INTER: %[[I2:.+]] = or disjoint i64 %index, 2
+; INTER: %[[I3:.+]] = or disjoint i64 %index, 3
; INTER: %[[G0:.+]] = getelementptr inbounds %pair, ptr %p, i64 %index, i32 0
; INTER: %wide.vec = load <8 x i32>, ptr %[[G0]], align 8
-; INTER: %[[I1:.+]] = or disjoint i64 %index, 1
; INTER: getelementptr inbounds %pair, ptr %p, i64 %[[I1]], i32 0
-; INTER: %[[I2:.+]] = or disjoint i64 %index, 2
; INTER: getelementptr inbounds %pair, ptr %p, i64 %[[I2]], i32 0
-; INTER: %[[I3:.+]] = or disjoint i64 %index, 3
; INTER: getelementptr inbounds %pair, ptr %p, i64 %[[I3]], i32 0
; INTER: br i1 {{.*}}, label %middle.block, label %vector.body
;
diff --git a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll
index eb688f86d84ee..af8bcef775e62 100644
--- a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll
+++ b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll
@@ -969,6 +969,7 @@ define void @deref_assumption_too_small_in_preheader_constant_trip_count_align_1
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4
@@ -986,7 +987,6 @@ define void @deref_assumption_too_small_in_preheader_constant_trip_count_align_1
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1
; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
; CHECK: [[PRED_LOAD_IF1]]:
-; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP11]]
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 1
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1
@@ -1219,6 +1219,7 @@ define void @deref_assumption_in_preheader_constant_trip_count_align_4_not_known
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4
@@ -1236,7 +1237,6 @@ define void @deref_assumption_in_preheader_constant_trip_count_align_4_not_known
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1
; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
; CHECK: [[PRED_LOAD_IF1]]:
-; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP11]]
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1
@@ -1314,6 +1314,7 @@ define void @deref_assumption_too_small_in_preheader_constant_trip_count_align_4
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4
@@ -1331,7 +1332,6 @@ define void @deref_assumption_too_small_in_preheader_constant_trip_count_align_4
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1
; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
; CHECK: [[PRED_LOAD_IF1]]:
-; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP11]]
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
index 7ac0eb038cb7a..767df2e38d862 100644
--- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
@@ -94,6 +94,7 @@ define i32 @test(ptr nocapture %f) #0 {
; VEC: vector.body:
; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
; VEC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; VEC-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1
; VEC-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[F:%.*]], i64 [[TMP0]]
; VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
; VEC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4
@@ -110,7 +111,6 @@ define i32 @test(ptr nocapture %f) #0 {
; VEC-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1
; VEC-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
; VEC: pred.store.if1:
-; VEC-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1
; VEC-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[F]], i64 [[TMP9]]
; VEC-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 1
; VEC-NEXT: [[TMP12:%.*]] = add nsw i32 [[TMP11]], 20
@@ -322,6 +322,7 @@ define void @bug18724(i1 %cond, ptr %ptr, i1 %cond.2, i64 %v.1, i32 %v.2) {
; VEC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ [[TMP5]], [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[PRED_STORE_CONTINUE2]] ]
; VEC-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[V_1]], [[INDEX]]
; VEC-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 0
+; VEC-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 1
; VEC-NEXT: [[TMP7:%.*]] = getelementptr inbounds [768 x i32], ptr [[PTR:%.*]], i64 0, i64 [[TMP6]]
; VEC-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0
; VEC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP8]], align 4
@@ -330,7 +331,6 @@ define void @bug18724(i1 %cond, ptr %ptr, i1 %cond.2, i64 %v.1, i32 %v.2) {
; VEC-NEXT: [[TMP10:%.*]] = getelementptr inbounds [768 x i32], ptr [[PTR]], i64 0, i64 [[TMP6]]
; VEC-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 0
; VEC-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4
-; VEC-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 1
; VEC-NEXT: [[TMP14:%.*]] = getelementptr inbounds [768 x i32], ptr [[PTR]], i64 0, i64 [[TMP13]]
; VEC-NEXT: [[TMP15:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 1
; VEC-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4
@@ -482,6 +482,7 @@ define void @minimal_bit_widths(i1 %c) {
; VEC: vector.body:
; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
; VEC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; VEC-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 1
; VEC-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr undef, i64 [[TMP0]]
; VEC-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 0
; VEC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP2]], align 1
@@ -490,7 +491,6 @@ define void @minimal_bit_widths(i1 %c) {
; VEC-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr undef, i64 [[TMP0]]
; VEC-NEXT: [[TMP5:%.*]] = extractelement <2 x i8> [[WIDE_LOAD]], i32 0
; VEC-NEXT: store i8 [[TMP5]], ptr [[TMP4]], align 1
-; VEC-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 1
; VEC-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr undef, i64 [[TMP7]]
; VEC-NEXT: [[TMP9:%.*]] = extractelement <2 x i8> [[WIDE_LOAD]], i32 1
; VEC-NEXT: store i8 [[TMP9]], ptr [[TMP8]], align 1
@@ -612,6 +612,7 @@ define void @minimal_bit_widths_with_aliasing_store(i1 %c, ptr %ptr) {
; VEC: vector.body:
; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
; VEC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; VEC-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 1
; VEC-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[TMP0]]
; VEC-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP2]], i32 0
; VEC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP3]], align 1
@@ -621,7 +622,6 @@ define void @minimal_bit_widths_with_aliasing_store(i1 %c, ptr %ptr) {
; VEC-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP0]]
; VEC-NEXT: [[TMP5:%.*]] = extractelement <2 x i8> [[WIDE_LOAD]], i32 0
; VEC-NEXT: store i8 [[TMP5]], ptr [[TMP4]], align 1
-; VEC-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 1
; VEC-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP7]]
; VEC-NEXT: [[TMP9:%.*]] = extractelement <2 x i8> [[WIDE_LOAD]], i32 1
; VEC-NEXT: store i8 [[TMP9]], ptr [[TMP8]], align 1
diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll
index 9c3ba8fbcf036..7057fded1c3ac 100644
--- a/llvm/test/Transforms/LoopVectorize/induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/induction.ll
@@ -1961,6 +1961,7 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) {
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE2:%.*]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[PRED_UDIV_CONTINUE2]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[INDEX]], 1
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4
@@ -1974,7 +1975,6 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) {
; CHECK-NEXT: [[TMP7:%.*]] = phi <2 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_UDIV_IF]] ]
; CHECK-NEXT: br i1 [[C]], label [[PRED_UDIV_IF1:%.*]], label [[PRED_UDIV_CONTINUE2]]
; CHECK: pred.udiv.if1:
-; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[INDEX]], 1
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 1
; CHECK-NEXT: [[TMP11:%.*]] = udiv i32 [[TMP10]], [[TMP9]]
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP11]], i32 1
@@ -2026,6 +2026,7 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) {
; IND: vector.body:
; IND-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE2:%.*]] ]
; IND-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[PRED_UDIV_CONTINUE2]] ]
+; IND-NEXT: [[TMP6:%.*]] = or disjoint i32 [[INDEX]], 1
; IND-NEXT: [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
; IND-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]]
; IND-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 4
@@ -2039,7 +2040,6 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) {
; IND-NEXT: [[TMP5:%.*]] = phi <2 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_UDIV_IF]] ]
; IND-NEXT: br i1 [[C]], label [[PRED_UDIV_IF1:%.*]], label [[PRED_UDIV_CONTINUE2]]
; IND: pred.udiv.if1:
-; IND-NEXT: [[TMP6:%.*]] = or disjoint i32 [[INDEX]], 1
; IND-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i64 1
; IND-NEXT: [[TMP8:%.*]] = udiv i32 [[TMP7]], [[TMP6]]
; IND-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[TMP8]], i64 1
@@ -2093,6 +2093,9 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) {
; UNROLL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE8:%.*]] ]
; UNROLL-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[PRED_UDIV_CONTINUE8]] ]
; UNROLL-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP23:%.*]], [[PRED_UDIV_CONTINUE8]] ]
+; UNROLL-NEXT: [[TMP7:%.*]] = or disjoint i32 [[INDEX]], 1
+; UNROLL-NEXT: [[TMP12:%.*]] = or disjoint i32 [[INDEX]], 2
+; UNROLL-NEXT: [[TMP17:%.*]] = or disjoint i32 [[INDEX]], 3
; UNROLL-NEXT: [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
; UNROLL-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]]
; UNROLL-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 8
@@ -2108,7 +2111,6 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) {
; UNROLL-NEXT: [[TMP6:%.*]] = phi <2 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP5]], [[PRED_UDIV_IF]] ]
; UNROLL-NEXT: br i1 [[C]], label [[PRED_UDIV_IF3:%.*]], label [[PRED_UDIV_CONTINUE4:%.*]]
; UNROLL: pred.udiv.if3:
-; UNROLL-NEXT: [[TMP7:%.*]] = or disjoint i32 [[INDEX]], 1
; UNROLL-NEXT: [[TMP8:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i64 1
; UNROLL-NEXT: [[TMP9:%.*]] = udiv i32 [[TMP8]], [[TMP7]]
; UNROLL-NEXT: [[TMP10:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[TMP9]], i64 1
@@ -2117,7 +2119,6 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) {
; UNROLL-NEXT: [[TMP11:%.*]] = phi <2 x i32> [ [[TMP6]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP10]], [[PRED_UDIV_IF3]] ]
; UNROLL-NEXT: br i1 [[C]], label [[PRED_UDIV_IF5:%.*]], label [[PRED_UDIV_CONTINUE6:%.*]]
; UNROLL: pred.udiv.if5:
-; UNROLL-NEXT: [[TMP12:%.*]] = or disjoint i32 [[INDEX]], 2
; UNROLL-NEXT: [[TMP13:%.*]] = extractelement <2 x i32> [[WIDE_LOAD2]], i64 0
; UNROLL-NEXT: [[TMP14:%.*]] = udiv i32 [[TMP13]], [[TMP12]]
; UNROLL-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> poison, i32 [[TMP14]], i64 0
@@ -2126,7 +2127,6 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) {
; UNROLL-NEXT: [[TMP16:%.*]] = phi <2 x i32> [ poison, [[PRED_UDIV_CONTINUE4]] ], [ [[TMP15]], [[PRED_UDIV_IF5]] ]
; UNROLL-NEXT: br i1 [[C]], label [[PRED_UDIV_IF7:%.*]], label [[PRED_UDIV_CONTINUE8]]
; UNROLL: pred.udiv.if7:
-; UNROLL-NEXT: [[TMP17:%.*]] = or disjoint i32 [[INDEX]], 3
; UNROLL-NEXT: [[TMP18:%.*]] = extractelement <2 x i32> [[WIDE_LOAD2]], i64 1
; UNROLL-NEXT: [[TMP19:%.*]] = udiv i32 [[TMP18]], [[TMP17]]
; UNROLL-NEXT: [[TMP20:%.*]] = insertelement <2 x i32> [[TMP16]], i32 [[TMP19]], i64 1
@@ -2185,6 +2185,9 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) {
; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[PRED_UDIV_CONTINUE8]] ]
; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP30:%.*]], [[PRED_UDIV_CONTINUE8]] ]
; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
+; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = add i32 [[INDEX]], 1
+; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = add i32 [[INDEX]], 2
+; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = add i32 [[INDEX]], 3
; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP0]]
; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 2
@@ -2200,7 +2203,6 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) {
; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = phi <2 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_UDIV_IF]] ]
; UNROLL-NO-IC-NEXT: br i1 [[C]], label [[PRED_UDIV_IF3:%.*]], label [[PRED_UDIV_CONTINUE4:%.*]]
; UNROLL-NO-IC: pred.udiv.if3:
-; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = add i32 [[INDEX]], 1
; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 1
; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = udiv i32 [[TMP11]], [[TMP10]]
; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> [[TMP8]], i32 [[TMP12]], i32 1
@@ -2209,7 +2211,6 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) {
; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = phi <2 x i32> [ [[TMP8]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP13]], [[PRED_UDIV_IF3]] ]
; UNROLL-NO-IC-NEXT: br i1 [[C]], label [[PRED_UDIV_IF5:%.*]], label [[PRED_UDIV_CONTINUE6:%.*]]
; UNROLL-NO-IC: pred.udiv.if5:
-; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = add i32 [[INDEX]], 2
; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = extractelement <2 x i32> [[WIDE_LOAD2]], i32 0
; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = udiv i32 [[TMP17]], [[TMP16]]
; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> poison, i32 [[TMP18]], i32 0
@@ -2218,7 +2219,6 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) {
; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = phi <2 x i32> [ poison, [[PRED_UDIV_CONTINUE4]] ], [ [[TMP19]], [[PRED_UDIV_IF5]] ]
; UNROLL-NO-IC-NEXT: br i1 [[C]], label [[PRED_UDIV_IF7:%.*]], label [[PRED_UDIV_CONTINUE8]]
; UNROLL-NO-IC: pred.udiv.if7:
-; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = add i32 [[INDEX]], 3
; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = extractelement <2 x i32> [[WIDE_LOAD2]], i32 1
; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = udiv i32 [[TMP23]], [[TMP22]]
; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = insertelement <2 x i32> [[TMP20]], i32 [[TMP24]], i32 1
@@ -2274,6 +2274,13 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) {
; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE16:%.*]] ]
; INTERLEAVE-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[PRED_UDIV_CONTINUE16]] ]
; INTERLEAVE-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_UDIV_CONTINUE16]] ]
+; INTERLEAVE-NEXT: [[TMP7:%.*]] = or disjoint i32 [[INDEX]], 1
+; INTERLEAVE-NEXT: [[TMP12:%.*]] = or disjoint i32 [[INDEX]], 2
+; INTERLEAVE-NEXT: [[TMP17:%.*]] = or disjoint i32 [[INDEX]], 3
+; INTERLEAVE-NEXT: [[TMP22:%.*]] = or disjoint i32 [[INDEX]], 4
+; INTERLEAVE-NEXT: [[TMP27:%.*]] = or disjoint i32 [[INDEX]], 5
+; INTERLEAVE-NEXT: [[TMP32:%.*]] = or disjoint i32 [[INDEX]], 6
+; INTERLEAVE-NEXT: [[TMP37:%.*]] = or disjoint i32 [[INDEX]], 7
; INTERLEAVE-NEXT: [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
; INTERLEAVE-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]]
; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16
@@ -2289,7 +2296,6 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) {
; INTERLEAVE-NEXT: [[TMP6:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP5]], [[PRED_UDIV_IF]] ]
; INTERLEAVE-NEXT: br i1 [[C]], label [[PRED_UDIV_IF3:%.*]], label [[PRED_UDIV_CONTINUE4:%.*]]
; INTERLEAVE: pred.udiv.if3:
-; INTERLEAVE-NEXT: [[TMP7:%.*]] = or disjoint i32 [[INDEX]], 1
; INTERLEAVE-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 1
; INTERLEAVE-NEXT: [[TMP9:%.*]] = udiv i32 [[TMP8]], [[TMP7]]
; INTERLEAVE-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP9]], i64 1
@@ -2298,7 +2304,6 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) {
; INTERLEAVE-NEXT: [[TMP11:%.*]] = phi <4 x i32> [ [[TMP6]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP10]], [[PRED_UDIV_IF3]] ]
; INTERLEAVE-NEXT: br i1 [[C]], label [[PRED_UDIV_IF5:%.*]], label [[PRED_UDIV_CONTINUE6:%.*]]
; INTERLEAVE: pred.udiv.if5:
-; INTERLEAVE-NEXT: [[TMP12:%.*]] = or disjoint i32 [[INDEX]], 2
; INTERLEAVE-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 2
; INTERLEAVE-NEXT: [[TMP14:%.*]] = udiv i32 [[TMP13]], [[TMP12]]
; INTERLEAVE-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP14]], i64 2
@@ -2307,7 +2312,6 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) {
; INTERLEAVE-NEXT: [[TMP16:%.*]] = phi <4 x i32> [ [[TMP11]], [[PRED_UDIV_CONTINUE4]] ], [ [[TMP15]], [[PRED_UDIV_IF5]] ]
; INTERLEAVE-NEXT: br i1 [[C]], label [[PRED_UDIV_IF7:%.*]], label [[PRED_UDIV_CONTINUE8:%.*]]
; INTERLEAVE: pred.udiv.if7:
-; INTERLEAVE-NEXT: [[TMP17:%.*]] = or disjoint i32 [[INDEX]], 3
; INTERLEAVE-NEXT: [[TMP18:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 3
; INTERLEAVE-NEXT: [[TMP19:%.*]] = udiv i32 [[TMP18]], [[TMP17]]
; INTERLEAVE-NEXT: [[TMP20:%.*]] = insertelement <4 x i32> [[TMP16]], i32 [[TMP19]], i64 3
@@ -2316,7 +2320,6 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) {
; INTERLEAVE-NEXT: [[TMP21:%.*]] = phi <4 x i32> [ [[TMP16]], [[PRED_UDIV_CONTINUE6]] ], [ [[TMP20]], [[PRED_UDIV_IF7]] ]
; INTERLEAVE-NEXT: br i1 [[C]], label [[PRED_UDIV_IF9:%.*]], label [[PRED_UDIV_CONTINUE10:%.*]]
; INTERLEAVE: pred.udiv.if9:
-; INTERLEAVE-NEXT: [[TMP22:%.*]] = or disjoint i32 [[INDEX]], 4
; INTERLEAVE-NEXT: [[TMP23:%.*]] = extractelement <4 x i32> [[WIDE_LOAD2]], i64 0
; INTERLEAVE-NEXT: [[TMP24:%.*]] = udiv i32 [[TMP23]], [[TMP22]]
; INTERLEAVE-NEXT: [[TMP25:%.*]] = insertelement <4 x i32> poison, i32 [[TMP24]], i64 0
@@ -2325,7 +2328,6 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) {
; INTERLEAVE-NEXT: [[TMP26:%.*]] = phi <4 x i32> [ poison, [[PRED_UDIV_CONTINUE8]] ], [ [[TMP25]], [[PRED_UDIV_IF9]] ]
; INTERLEAVE-NEXT: br i1 [[C]], label [[PRED_UDIV_IF11:%.*]], label [[PRED_UDIV_CONTINUE12:%.*]]
; INTERLEAVE: pred.udiv.if11:
-; INTERLEAVE-NEXT: [[TMP27:%.*]] = or disjoint i32 [[INDEX]], 5
; INTERLEAVE-NEXT: [[TMP28:%.*]] = extractelement <4 x i32> [[WIDE_LOAD2]], i64 1
; INTERLEAVE-NEXT: [[TMP29:%.*]] = udiv i32 [[TMP28]], [[TMP27]]
; INTERLEAVE-NEXT: [[TMP30:%.*]] = insertelement <4 x i32> [[TMP26]], i32 [[TMP29]], i64 1
@@ -2334,7 +2336,6 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) {
; INTERLEAVE-NEXT: [[TMP31:%.*]] = phi <4 x i32> [ [[TMP26]], [[PRED_UDIV_CONTINUE10]] ], [ [[TMP30]], [[PRED_UDIV_IF11]] ]
; INTERLEAVE-NEXT: br i1 [[C]], label [[PRED_UDIV_IF13:%.*]], label [[PRED_UDIV_CONTINUE14:%.*]]
; INTERLEAVE: pred.udiv.if13:
-; INTERLEAVE-NEXT: [[TMP32:%.*]] = or disjoint i32 [[INDEX]], 6
; INTERLEAVE-NEXT: [[TMP33:%.*]] = extractelement <4 x i32> [[WIDE_LOAD2]], i64 2
; INTERLEAVE-NEXT: [[TMP34:%.*]] = udiv i32 [[TMP33]], [[TMP32]]
; INTERLEAVE-NEXT: [[TMP35:%.*]] = insertelement <4 x i32> [[TMP31]], i32 [[TMP34]], i64 2
@@ -2343,7 +2344,6 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) {
; INTERLEAVE-NEXT: [[TMP36:%.*]] = phi <4 x i32> [ [[TMP31]], [[PRED_UDIV_CONTINUE12]] ], [ [[TMP35]], [[PRED_UDIV_IF13]] ]
; INTERLEAVE-NEXT: br i1 [[C]], label [[PRED_UDIV_IF15:%.*]], label [[PRED_UDIV_CONTINUE16]]
; INTERLEAVE: pred.udiv.if15:
-; INTERLEAVE-NEXT: [[TMP37:%.*]] = or disjoint i32 [[INDEX]], 7
; INTERLEAVE-NEXT: [[TMP38:%.*]] = extractelement <4 x i32> [[WIDE_LOAD2]], i64 3
; INTERLEAVE-NEXT: [[TMP39:%.*]] = udiv i32 [[TMP38]], [[TMP37]]
; INTERLEAVE-NEXT: [[TMP40:%.*]] = insertelement <4 x i32> [[TMP36]], i32 [[TMP39]], i64 3
diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll
index fb4545cb715bb..c2f912fb4bc8d 100644
--- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll
@@ -27,6 +27,7 @@ define void @interleaved_with_cond_store_0(ptr %p, i64 %x, i64 %n) {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = or disjoint i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 1
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP0]], align 8
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2>
@@ -42,7 +43,6 @@ define void @interleaved_with_cond_store_0(ptr %p, i64 %x, i64 %n) {
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP1]], i64 1
; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
; CHECK: pred.store.if1:
-; CHECK-NEXT: [[TMP6:%.*]] = or disjoint i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP6]], i32 1
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 2
; CHECK-NEXT: store i64 [[TMP8]], ptr [[TMP7]], align 8
diff --git a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll
index 402632b51698a..25df4db3c6c99 100644
--- a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll
+++ b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll
@@ -19,6 +19,7 @@ define i16 @test_access_size_not_multiple_of_align(i64 %len, ptr %test_base) {
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE2:%.*]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[PRED_LOAD_CONTINUE2]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TEST_BASE:%.*]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP2]], align 1
@@ -35,7 +36,6 @@ define i16 @test_access_size_not_multiple_of_align(i64 %len, ptr %test_base) {
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1
; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2]]
; CHECK: pred.load.if1:
-; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i16, ptr [[ALLOCA]], i64 [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = load i16, ptr [[TMP11]], align 4
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i16> [[TMP8]], i16 [[TMP12]], i32 1
@@ -116,6 +116,7 @@ define i32 @test_access_size_multiple_of_align_but_offset_by_1(i64 %len, ptr %te
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE2:%.*]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[PRED_LOAD_CONTINUE2]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TEST_BASE:%.*]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP2]], align 1
@@ -132,7 +133,6 @@ define i32 @test_access_size_multiple_of_align_but_offset_by_1(i64 %len, ptr %te
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1
; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2]]
; CHECK: pred.load.if1:
-; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[START]], i64 [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> [[TMP8]], i32 [[TMP12]], i32 1
@@ -230,6 +230,7 @@ define i32 @loop_requires_scev_predicate(ptr %dest, i32 %end) {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ]
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP10]], align 4
@@ -250,7 +251,6 @@ define i32 @loop_requires_scev_predicate(ptr %dest, i32 %end) {
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x i1> [[TMP11]], i32 1
; CHECK-NEXT: br i1 [[TMP19]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4]]
; CHECK: pred.store.if3:
-; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[DEST]], i64 [[TMP20]]
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 1
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i32> [[WIDE_LOAD3]], i32 1
@@ -260,7 +260,7 @@ define i32 @loop_requires_scev_predicate(ptr %dest, i32 %end) {
; CHECK: pred.store.continue4:
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX1]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
@@ -287,7 +287,7 @@ define i32 @loop_requires_scev_predicate(ptr %dest, i32 %end) {
; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[IND_NEXT]] to i32
; CHECK-NEXT: [[GEP_IND_NEXT]] = add i64 [[GEP_IND]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[CONV]], [[END_CLAMPED]]
-; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[EXIT]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[EXIT]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret i32 0
;
@@ -344,6 +344,7 @@ define void @test_rev_loops_deref_loads(ptr nocapture noundef writeonly %dest) {
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
+; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], -1
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_CMP]], i64 0, i64 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 -1
@@ -368,7 +369,6 @@ define void @test_rev_loops_deref_loads(ptr nocapture noundef writeonly %dest) {
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1
; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4]]
; CHECK: pred.store.if3:
-; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], -1
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP14]]
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i32> [[REVERSE2]], i32 1
; CHECK-NEXT: [[TMP17:%.*]] = shl nsw i32 [[TMP16]], 2
@@ -377,7 +377,7 @@ define void @test_rev_loops_deref_loads(ptr nocapture noundef writeonly %dest) {
; CHECK: pred.store.continue4:
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
-; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
@@ -399,7 +399,7 @@ define void @test_rev_loops_deref_loads(ptr nocapture noundef writeonly %dest) {
; CHECK: for.inc:
; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
; CHECK-NEXT: [[CMP2_NOT:%.*]] = icmp eq i64 [[IV]], 0
-; CHECK-NEXT: br i1 [[CMP2_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK-NEXT: br i1 [[CMP2_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DEST:%.*]], ptr [[LOCAL_DEST]], i64 1024, i1 false)
; CHECK-NEXT: ret void
@@ -490,7 +490,7 @@ define void @test_rev_loops_non_deref_loads(ptr nocapture noundef writeonly %des
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 -2)
; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
-; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
@@ -513,7 +513,7 @@ define void @test_rev_loops_non_deref_loads(ptr nocapture noundef writeonly %des
; CHECK: for.inc:
; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
; CHECK-NEXT: [[CMP2_NOT:%.*]] = icmp eq i64 [[IV]], 0
-; CHECK-NEXT: br i1 [[CMP2_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK-NEXT: br i1 [[CMP2_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DEST:%.*]], ptr [[LOCAL_DEST]], i64 1024, i1 false)
; CHECK-NEXT: ret void
@@ -584,7 +584,7 @@ define i16 @test_strided_access(i64 %len, ptr %test_base) {
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096
-; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP15:%.*]] = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> [[TMP13]])
; CHECK-NEXT: br i1 true, label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]]
@@ -609,7 +609,7 @@ define i16 @test_strided_access(i64 %len, ptr %test_base) {
; CHECK-NEXT: [[VAL_PHI:%.*]] = phi i16 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ]
; CHECK-NEXT: [[ACCUM_NEXT]] = add i16 [[ACCUM]], [[VAL_PHI]]
; CHECK-NEXT: [[EXIT:%.*]] = icmp eq i64 [[IV]], 4095
-; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP15:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP13:![0-9]+]]
; CHECK: loop_exit:
; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i16 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i16 [[ACCUM_NEXT_LCSSA]]
@@ -659,6 +659,7 @@ define void @test_rev_loops_strided_deref_loads(ptr nocapture noundef writeonly
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 511, i64 510>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 511, [[INDEX]]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[OFFSET_IDX]], -1
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_CMP]], i64 0, i64 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 -1
@@ -684,7 +685,6 @@ define void @test_rev_loops_strided_deref_loads(ptr nocapture noundef writeonly
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1
; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
; CHECK: pred.store.if1:
-; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[OFFSET_IDX]], -1
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP17]]
; CHECK-NEXT: [[TMP19:%.*]] = shl nsw i32 [[TMP12]], 2
; CHECK-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4
@@ -693,7 +693,7 @@ define void @test_rev_loops_strided_deref_loads(ptr nocapture noundef writeonly
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 -2)
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512
-; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
@@ -716,7 +716,7 @@ define void @test_rev_loops_strided_deref_loads(ptr nocapture noundef writeonly
; CHECK: for.inc:
; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
; CHECK-NEXT: [[CMP2_NOT:%.*]] = icmp eq i64 [[IV]], 0
-; CHECK-NEXT: br i1 [[CMP2_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
+; CHECK-NEXT: br i1 [[CMP2_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DEST:%.*]], ptr [[LOCAL_DEST]], i64 1024, i1 false)
; CHECK-NEXT: ret void
diff --git a/llvm/test/Transforms/LoopVectorize/load-of-struct-deref-pred.ll b/llvm/test/Transforms/LoopVectorize/load-of-struct-deref-pred.ll
index 7e00cb74a69ed..ca15b65ec5c36 100644
--- a/llvm/test/Transforms/LoopVectorize/load-of-struct-deref-pred.ll
+++ b/llvm/test/Transforms/LoopVectorize/load-of-struct-deref-pred.ll
@@ -16,21 +16,21 @@ define void @accesses_to_struct_dereferenceable(ptr noalias %dst) {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]]
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
-; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <4 x i32> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr @foo, i64 0, i32 1, i64 [[INDEX]]
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0
-; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_FOO]], ptr @foo, i64 0, i32 0, i64 [[INDEX]]
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i32 0
-; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP7]], align 4
-; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[WIDE_LOAD2]], <4 x i32> [[WIDE_LOAD1]]
-; CHECK-NEXT: store <4 x i32> [[PREDPHI]], ptr [[TMP2]], align 4
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <4 x i32> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr @foo, i64 0, i32 1, i64 [[INDEX]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[TMP3]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_FOO]], ptr @foo, i64 0, i32 0, i64 [[INDEX]]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP5]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[WIDE_LOAD2]], <4 x i32> [[WIDE_LOAD1]]
+; CHECK-NEXT: store <4 x i32> [[PREDPHI]], ptr [[TMP1]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32000
-; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32000
+; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
@@ -100,55 +100,55 @@ define void @accesses_to_struct_may_not_be_dereferenceable_due_to_loop_bound(ptr
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP0]]
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
-; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <4 x i32> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true)
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0
-; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ult <4 x i32> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true)
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0
+; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
; CHECK: pred.load.if:
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_FOO:%.*]], ptr @foo, i64 0, i32 1, i64 [[TMP0]]
-; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> poison, i32 [[TMP7]], i32 0
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_FOO:%.*]], ptr @foo, i64 0, i32 1, i64 [[TMP0]]
+; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
+; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP10]], i32 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
; CHECK: pred.load.continue:
-; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_LOAD_IF]] ]
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1
-; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
+; CHECK-NEXT: [[TMP12:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP11]], [[PRED_LOAD_IF]] ]
+; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP7]], i32 1
+; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
; CHECK: pred.load.if1:
-; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 1, i64 [[TMP11]]
-; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
-; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP13]], i32 1
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 1, i64 [[TMP1]]
+; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4
+; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP15]], i32 1
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]]
; CHECK: pred.load.continue2:
-; CHECK-NEXT: [[TMP15:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ]
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2
-; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
+; CHECK-NEXT: [[TMP17:%.*]] = phi <4 x i32> [ [[TMP12]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP16]], [[PRED_LOAD_IF1]] ]
+; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP7]], i32 2
+; CHECK-NEXT: br i1 [[TMP18]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
; CHECK: pred.load.if3:
-; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 2
-; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 1, i64 [[TMP17]]
-; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4
-; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i32> [[TMP15]], i32 [[TMP19]], i32 2
+; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 1, i64 [[TMP2]]
+; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
+; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP20]], i32 2
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]]
; CHECK: pred.load.continue4:
-; CHECK-NEXT: [[TMP21:%.*]] = phi <4 x i32> [ [[TMP15]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP20]], [[PRED_LOAD_IF3]] ]
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3
-; CHECK-NEXT: br i1 [[TMP22]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
+; CHECK-NEXT: [[TMP22:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP21]], [[PRED_LOAD_IF3]] ]
+; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i1> [[TMP7]], i32 3
+; CHECK-NEXT: br i1 [[TMP23]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
; CHECK: pred.load.if5:
-; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[INDEX]], 3
-; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 1, i64 [[TMP23]]
+; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 1, i64 [[TMP3]]
; CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
-; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x i32> [[TMP21]], i32 [[TMP25]], i32 3
+; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x i32> [[TMP22]], i32 [[TMP25]], i32 3
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]]
; CHECK: pred.load.continue6:
-; CHECK-NEXT: [[TMP27:%.*]] = phi <4 x i32> [ [[TMP21]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP26]], [[PRED_LOAD_IF5]] ]
+; CHECK-NEXT: [[TMP27:%.*]] = phi <4 x i32> [ [[TMP22]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP26]], [[PRED_LOAD_IF5]] ]
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr [[STRUCT_FOO]], ptr @foo, i64 0, i32 0, i64 [[TMP0]]
; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[TMP28]], i32 0
; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP29]], align 4
-; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[WIDE_LOAD7]], <4 x i32> [[TMP27]]
-; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> [[WIDE_LOAD7]], <4 x i32> [[TMP27]]
+; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0
; CHECK-NEXT: store <4 x i32> [[PREDPHI]], ptr [[TMP30]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32000
@@ -222,56 +222,56 @@ define void @accesses_to_struct_may_not_be_dereferenceable_access_size(ptr noali
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP0]]
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
-; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <4 x i32> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true)
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0
-; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ult <4 x i32> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true)
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0
+; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
; CHECK: pred.load.if:
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_FOO:%.*]], ptr @foo, i64 0, i32 1, i64 [[TMP0]]
-; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 4
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i64> poison, i64 [[TMP7]], i32 0
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_FOO:%.*]], ptr @foo, i64 0, i32 1, i64 [[TMP0]]
+; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 4
+; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i64> poison, i64 [[TMP10]], i32 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
; CHECK: pred.load.continue:
-; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i64> [ poison, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_LOAD_IF]] ]
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1
-; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
+; CHECK-NEXT: [[TMP12:%.*]] = phi <4 x i64> [ poison, [[VECTOR_BODY]] ], [ [[TMP11]], [[PRED_LOAD_IF]] ]
+; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP7]], i32 1
+; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
; CHECK: pred.load.if1:
-; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 1, i64 [[TMP11]]
-; CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP12]], align 4
-; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i64> [[TMP9]], i64 [[TMP13]], i32 1
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 1, i64 [[TMP1]]
+; CHECK-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP14]], align 4
+; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i64> [[TMP12]], i64 [[TMP15]], i32 1
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]]
; CHECK: pred.load.continue2:
-; CHECK-NEXT: [[TMP15:%.*]] = phi <4 x i64> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ]
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2
-; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
+; CHECK-NEXT: [[TMP17:%.*]] = phi <4 x i64> [ [[TMP12]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP16]], [[PRED_LOAD_IF1]] ]
+; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP7]], i32 2
+; CHECK-NEXT: br i1 [[TMP18]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
; CHECK: pred.load.if3:
-; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 2
-; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 1, i64 [[TMP17]]
-; CHECK-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP18]], align 4
-; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP15]], i64 [[TMP19]], i32 2
+; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 1, i64 [[TMP2]]
+; CHECK-NEXT: [[TMP20:%.*]] = load i64, ptr [[TMP19]], align 4
+; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP20]], i32 2
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]]
; CHECK: pred.load.continue4:
-; CHECK-NEXT: [[TMP21:%.*]] = phi <4 x i64> [ [[TMP15]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP20]], [[PRED_LOAD_IF3]] ]
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3
-; CHECK-NEXT: br i1 [[TMP22]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
+; CHECK-NEXT: [[TMP22:%.*]] = phi <4 x i64> [ [[TMP17]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP21]], [[PRED_LOAD_IF3]] ]
+; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i1> [[TMP7]], i32 3
+; CHECK-NEXT: br i1 [[TMP23]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
; CHECK: pred.load.if5:
-; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[INDEX]], 3
-; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 1, i64 [[TMP23]]
+; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 1, i64 [[TMP3]]
; CHECK-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP24]], align 4
-; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP25]], i32 3
+; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x i64> [[TMP22]], i64 [[TMP25]], i32 3
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]]
; CHECK: pred.load.continue6:
-; CHECK-NEXT: [[TMP27:%.*]] = phi <4 x i64> [ [[TMP21]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP26]], [[PRED_LOAD_IF5]] ]
+; CHECK-NEXT: [[TMP27:%.*]] = phi <4 x i64> [ [[TMP22]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP26]], [[PRED_LOAD_IF5]] ]
; CHECK-NEXT: [[TMP28:%.*]] = trunc <4 x i64> [[TMP27]] to <4 x i32>
; CHECK-NEXT: [[TMP29:%.*]] = getelementptr [[STRUCT_FOO]], ptr @foo, i64 0, i32 0, i64 [[TMP0]]
; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i32, ptr [[TMP29]], i32 0
; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP30]], align 4
-; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[WIDE_LOAD7]], <4 x i32> [[TMP28]]
-; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> [[WIDE_LOAD7]], <4 x i32> [[TMP28]]
+; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0
; CHECK-NEXT: store <4 x i32> [[PREDPHI]], ptr [[TMP31]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32000
diff --git a/llvm/test/Transforms/LoopVectorize/loop-form.ll b/llvm/test/Transforms/LoopVectorize/loop-form.ll
index 914b9ad4a9e5a..f1925c39c75ac 100644
--- a/llvm/test/Transforms/LoopVectorize/loop-form.ll
+++ b/llvm/test/Transforms/LoopVectorize/loop-form.ll
@@ -1077,6 +1077,7 @@ define void @scalar_predication(ptr %addr) {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[ADDR:%.*]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[TMP1]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP2]], align 4
@@ -1092,7 +1093,6 @@ define void @scalar_predication(ptr %addr) {
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1
; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
; CHECK: pred.store.if1:
-; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr float, ptr [[ADDR]], i64 [[TMP8]]
; CHECK-NEXT: store float 1.000000e+01, ptr [[TMP9]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]]
diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
index 06b6a2b29c01e..c911aa2947066 100644
--- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
@@ -25,44 +25,44 @@ define void @a(ptr readnone %b) {
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE10:%.*]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 0, [[INDEX]]
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], -1
+; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], -2
+; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], -3
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr null, i64 [[TMP2]]
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 -1
-; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 -3
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr null, i64 [[TMP3]]
+; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr null, i64 [[TMP4]]
+; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr null, i64 [[TMP5]]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 -1
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 0
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i32 -3
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP8]], align 1
; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i8> [[REVERSE]], zeroinitializer
-; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true)
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0
-; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
+; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <4 x i8> [[REVERSE]], zeroinitializer
+; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP11]], splat (i1 true)
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP10]], i32 0
+; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 -1
-; CHECK-NEXT: store i8 95, ptr [[TMP9]], align 1
-; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 -1
+; CHECK-NEXT: store i8 95, ptr [[TMP14]], align 1
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
; CHECK: pred.store.continue:
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP7]], i32 1
-; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
+; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP10]], i32 1
+; CHECK-NEXT: br i1 [[TMP21]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE7:%.*]]
; CHECK: pred.store.if5:
-; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], -1
-; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr null, i64 [[TMP11]]
-; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP2]], i64 -1
-; CHECK-NEXT: store i8 95, ptr [[TMP12]], align 1
-; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
+; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP2]], i64 -1
+; CHECK-NEXT: store i8 95, ptr [[TMP22]], align 1
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE7]]
; CHECK: pred.store.continue6:
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP7]], i32 2
-; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
+; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP10]], i32 2
+; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
; CHECK: pred.store.if7:
-; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], -2
-; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr null, i64 [[TMP14]]
-; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP3]], i64 -1
-; CHECK-NEXT: store i8 95, ptr [[TMP15]], align 1
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP3]], i64 -1
+; CHECK-NEXT: store i8 95, ptr [[TMP16]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]]
; CHECK: pred.store.continue8:
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP7]], i32 3
-; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10]]
+; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i1> [[TMP10]], i32 3
+; CHECK-NEXT: br i1 [[TMP17]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10]]
; CHECK: pred.store.if9:
-; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[OFFSET_IDX]], -3
-; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr null, i64 [[TMP17]]
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP4]], i64 -1
; CHECK-NEXT: store i8 95, ptr [[TMP18]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE10]]
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll
index a85718d1a382f..4e1ba0cd230a7 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll
@@ -360,6 +360,9 @@ define i32 @simple_chained_rdx(ptr noalias %a, ptr noalias %b, ptr noalias %cond
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 5, [[VECTOR_PH]] ], [ [[TMP46:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT: [[TMP12:%.*]] = or disjoint i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP22:%.*]] = or disjoint i64 [[INDEX]], 2
+; CHECK-NEXT: [[TMP32:%.*]] = or disjoint i64 [[INDEX]], 3
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[COND]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[WIDE_LOAD]], zeroinitializer
@@ -379,7 +382,6 @@ define i32 @simple_chained_rdx(ptr noalias %a, ptr noalias %b, ptr noalias %cond
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP1]], i64 1
; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
; CHECK: pred.load.if1:
-; CHECK-NEXT: [[TMP12:%.*]] = or disjoint i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP12]]
; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP14]], i64 1
@@ -393,7 +395,6 @@ define i32 @simple_chained_rdx(ptr noalias %a, ptr noalias %b, ptr noalias %cond
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP1]], i64 2
; CHECK-NEXT: br i1 [[TMP21]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
; CHECK: pred.load.if3:
-; CHECK-NEXT: [[TMP22:%.*]] = or disjoint i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP22]]
; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4
; CHECK-NEXT: [[TMP25:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP24]], i64 2
@@ -407,7 +408,6 @@ define i32 @simple_chained_rdx(ptr noalias %a, ptr noalias %b, ptr noalias %cond
; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x i1> [[TMP1]], i64 3
; CHECK-NEXT: br i1 [[TMP31]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
; CHECK: pred.load.if5:
-; CHECK-NEXT: [[TMP32:%.*]] = or disjoint i64 [[INDEX]], 3
; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP32]]
; CHECK-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4
; CHECK-NEXT: [[TMP35:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP34]], i64 3
diff --git a/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll b/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll
index 4c9b4a4df0e37..77dfca6f659ae 100644
--- a/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll
+++ b/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll
@@ -484,6 +484,9 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
; CHECK-VF4-IC1-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[PRED_STORE_CONTINUE8]] ]
; CHECK-VF4-IC1-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[PRED_STORE_CONTINUE8]] ]
; CHECK-VF4-IC1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
+; CHECK-VF4-IC1-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 1
+; CHECK-VF4-IC1-NEXT: [[TMP20:%.*]] = add i64 [[INDEX]], 2
+; CHECK-VF4-IC1-NEXT: [[TMP26:%.*]] = add i64 [[INDEX]], 3
; CHECK-VF4-IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]]
; CHECK-VF4-IC1-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0
; CHECK-VF4-IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP3]], align 4, !alias.scope [[META6:![0-9]+]]
@@ -503,7 +506,6 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
; CHECK-VF4-IC1-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1
; CHECK-VF4-IC1-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
; CHECK-VF4-IC1: pred.store.if3:
-; CHECK-VF4-IC1-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 1
; CHECK-VF4-IC1-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP14]]
; CHECK-VF4-IC1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4, !alias.scope [[META9]], !noalias [[META6]]
; CHECK-VF4-IC1-NEXT: [[TMP17:%.*]] = add nsw i32 [[TMP16]], 1
@@ -513,7 +515,6 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
; CHECK-VF4-IC1-NEXT: [[TMP19:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2
; CHECK-VF4-IC1-NEXT: br i1 [[TMP19]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
; CHECK-VF4-IC1: pred.store.if5:
-; CHECK-VF4-IC1-NEXT: [[TMP20:%.*]] = add i64 [[INDEX]], 2
; CHECK-VF4-IC1-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP20]]
; CHECK-VF4-IC1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4, !alias.scope [[META9]], !noalias [[META6]]
; CHECK-VF4-IC1-NEXT: [[TMP23:%.*]] = add nsw i32 [[TMP22]], 1
@@ -523,7 +524,6 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
; CHECK-VF4-IC1-NEXT: [[TMP25:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3
; CHECK-VF4-IC1-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8]]
; CHECK-VF4-IC1: pred.store.if7:
-; CHECK-VF4-IC1-NEXT: [[TMP26:%.*]] = add i64 [[INDEX]], 3
; CHECK-VF4-IC1-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP26]]
; CHECK-VF4-IC1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4, !alias.scope [[META9]], !noalias [[META6]]
; CHECK-VF4-IC1-NEXT: [[TMP29:%.*]] = add nsw i32 [[TMP28]], 1
@@ -598,6 +598,13 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
; CHECK-VF4-IC2-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[PRED_STORE_CONTINUE19]] ]
; CHECK-VF4-IC2-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[PRED_STORE_CONTINUE19]] ]
; CHECK-VF4-IC2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
+; CHECK-VF4-IC2-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 1
+; CHECK-VF4-IC2-NEXT: [[TMP27:%.*]] = add i64 [[INDEX]], 2
+; CHECK-VF4-IC2-NEXT: [[TMP33:%.*]] = add i64 [[INDEX]], 3
+; CHECK-VF4-IC2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 4
+; CHECK-VF4-IC2-NEXT: [[TMP44:%.*]] = add i64 [[INDEX]], 5
+; CHECK-VF4-IC2-NEXT: [[TMP50:%.*]] = add i64 [[INDEX]], 6
+; CHECK-VF4-IC2-NEXT: [[TMP56:%.*]] = add i64 [[INDEX]], 7
; CHECK-VF4-IC2-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]]
; CHECK-VF4-IC2-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 0
; CHECK-VF4-IC2-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 4
@@ -623,7 +630,6 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
; CHECK-VF4-IC2-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP7]], i32 1
; CHECK-VF4-IC2-NEXT: br i1 [[TMP20]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7:%.*]]
; CHECK-VF4-IC2: pred.store.if6:
-; CHECK-VF4-IC2-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 1
; CHECK-VF4-IC2-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP21]]
; CHECK-VF4-IC2-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4, !alias.scope [[META9]], !noalias [[META6]]
; CHECK-VF4-IC2-NEXT: [[TMP24:%.*]] = add nsw i32 [[TMP23]], 1
@@ -633,7 +639,6 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
; CHECK-VF4-IC2-NEXT: [[TMP26:%.*]] = extractelement <4 x i1> [[TMP7]], i32 2
; CHECK-VF4-IC2-NEXT: br i1 [[TMP26]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9:%.*]]
; CHECK-VF4-IC2: pred.store.if8:
-; CHECK-VF4-IC2-NEXT: [[TMP27:%.*]] = add i64 [[INDEX]], 2
; CHECK-VF4-IC2-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP27]]
; CHECK-VF4-IC2-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4, !alias.scope [[META9]], !noalias [[META6]]
; CHECK-VF4-IC2-NEXT: [[TMP30:%.*]] = add nsw i32 [[TMP29]], 1
@@ -643,7 +648,6 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
; CHECK-VF4-IC2-NEXT: [[TMP32:%.*]] = extractelement <4 x i1> [[TMP7]], i32 3
; CHECK-VF4-IC2-NEXT: br i1 [[TMP32]], label [[PRED_STORE_IF10:%.*]], label [[PRED_STORE_CONTINUE11:%.*]]
; CHECK-VF4-IC2: pred.store.if10:
-; CHECK-VF4-IC2-NEXT: [[TMP33:%.*]] = add i64 [[INDEX]], 3
; CHECK-VF4-IC2-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP33]]
; CHECK-VF4-IC2-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4, !alias.scope [[META9]], !noalias [[META6]]
; CHECK-VF4-IC2-NEXT: [[TMP36:%.*]] = add nsw i32 [[TMP35]], 1
@@ -653,7 +657,6 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
; CHECK-VF4-IC2-NEXT: [[TMP38:%.*]] = extractelement <4 x i1> [[TMP8]], i32 0
; CHECK-VF4-IC2-NEXT: br i1 [[TMP38]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE13:%.*]]
; CHECK-VF4-IC2: pred.store.if12:
-; CHECK-VF4-IC2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 4
; CHECK-VF4-IC2-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP2]]
; CHECK-VF4-IC2-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4, !alias.scope [[META9]], !noalias [[META6]]
; CHECK-VF4-IC2-NEXT: [[TMP41:%.*]] = add nsw i32 [[TMP40]], 1
@@ -663,7 +666,6 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
; CHECK-VF4-IC2-NEXT: [[TMP43:%.*]] = extractelement <4 x i1> [[TMP8]], i32 1
; CHECK-VF4-IC2-NEXT: br i1 [[TMP43]], label [[PRED_STORE_IF14:%.*]], label [[PRED_STORE_CONTINUE15:%.*]]
; CHECK-VF4-IC2: pred.store.if14:
-; CHECK-VF4-IC2-NEXT: [[TMP44:%.*]] = add i64 [[INDEX]], 5
; CHECK-VF4-IC2-NEXT: [[TMP45:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP44]]
; CHECK-VF4-IC2-NEXT: [[TMP46:%.*]] = load i32, ptr [[TMP45]], align 4, !alias.scope [[META9]], !noalias [[META6]]
; CHECK-VF4-IC2-NEXT: [[TMP47:%.*]] = add nsw i32 [[TMP46]], 1
@@ -673,7 +675,6 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
; CHECK-VF4-IC2-NEXT: [[TMP49:%.*]] = extractelement <4 x i1> [[TMP8]], i32 2
; CHECK-VF4-IC2-NEXT: br i1 [[TMP49]], label [[PRED_STORE_IF16:%.*]], label [[PRED_STORE_CONTINUE17:%.*]]
; CHECK-VF4-IC2: pred.store.if16:
-; CHECK-VF4-IC2-NEXT: [[TMP50:%.*]] = add i64 [[INDEX]], 6
; CHECK-VF4-IC2-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP50]]
; CHECK-VF4-IC2-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP51]], align 4, !alias.scope [[META9]], !noalias [[META6]]
; CHECK-VF4-IC2-NEXT: [[TMP53:%.*]] = add nsw i32 [[TMP52]], 1
@@ -683,7 +684,6 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
; CHECK-VF4-IC2-NEXT: [[TMP55:%.*]] = extractelement <4 x i1> [[TMP8]], i32 3
; CHECK-VF4-IC2-NEXT: br i1 [[TMP55]], label [[PRED_STORE_IF18:%.*]], label [[PRED_STORE_CONTINUE19]]
; CHECK-VF4-IC2: pred.store.if18:
-; CHECK-VF4-IC2-NEXT: [[TMP56:%.*]] = add i64 [[INDEX]], 7
; CHECK-VF4-IC2-NEXT: [[TMP57:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP56]]
; CHECK-VF4-IC2-NEXT: [[TMP58:%.*]] = load i32, ptr [[TMP57]], align 4, !alias.scope [[META9]], !noalias [[META6]]
; CHECK-VF4-IC2-NEXT: [[TMP59:%.*]] = add nsw i32 [[TMP58]], 1
diff --git a/llvm/test/Transforms/LoopVectorize/select-cmp-predicated.ll b/llvm/test/Transforms/LoopVectorize/select-cmp-predicated.ll
index 11294b8fef3ee..fe8c64f0536a3 100644
--- a/llvm/test/Transforms/LoopVectorize/select-cmp-predicated.ll
+++ b/llvm/test/Transforms/LoopVectorize/select-cmp-predicated.ll
@@ -16,6 +16,7 @@ define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1
; CHECK-VF2IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
; CHECK-VF2IC1-NEXT: [[VEC_PHI:%.*]] = phi <2 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[PREDPHI:%.*]], %[[PRED_LOAD_CONTINUE2]] ]
; CHECK-VF2IC1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-VF2IC1-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1
; CHECK-VF2IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC1]], i64 [[TMP0]]
; CHECK-VF2IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
; CHECK-VF2IC1-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4
@@ -32,7 +33,6 @@ define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1
; CHECK-VF2IC1-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1
; CHECK-VF2IC1-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
; CHECK-VF2IC1: [[PRED_LOAD_IF1]]:
-; CHECK-VF2IC1-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1
; CHECK-VF2IC1-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i64 [[TMP11]]
; CHECK-VF2IC1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
; CHECK-VF2IC1-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/struct-return.ll b/llvm/test/Transforms/LoopVectorize/struct-return.ll
index 1b2a809a552d8..7e0e1410932c9 100644
--- a/llvm/test/Transforms/LoopVectorize/struct-return.ll
+++ b/llvm/test/Transforms/LoopVectorize/struct-return.ll
@@ -215,7 +215,7 @@ define void @scalarized_predicated_struct_return(ptr %a) optsize {
; CHECK-LABEL: define void @scalarized_predicated_struct_return
; CHECK: vector.body:
; CHECK: pred.store.if:
-; CHECK: tail call { i64, i64 } @bar_i64(i64 %5)
+; CHECK: tail call { i64, i64 } @bar_i64(i64 {{.+}})
entry:
br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll
index caa5969bbc365..855d401686b7b 100644
--- a/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll
+++ b/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll
@@ -92,6 +92,13 @@ define void @VF1-VPWidenCanonicalIVRecipeExe(ptr %ptr1) {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE12:%.*]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
+; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 8
+; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 16
+; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 24
+; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR1]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[PTR1]], i64 [[TMP4]]
+; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[PTR1]], i64 [[TMP5]]
+; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[PTR1]], i64 [[TMP6]]
; CHECK-NEXT: [[VEC_IV:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[VEC_IV4:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[VEC_IV5:%.*]] = add i64 [[INDEX]], 2
@@ -102,28 +109,21 @@ define void @VF1-VPWidenCanonicalIVRecipeExe(ptr %ptr1) {
; CHECK-NEXT: [[TMP3:%.*]] = icmp ule i64 [[VEC_IV6]], 14
; CHECK-NEXT: br i1 [[TMP0]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR1]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: store double 0.000000e+00, ptr [[NEXT_GEP]], align 8
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
; CHECK: pred.store.if7:
-; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 8
-; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[PTR1]], i64 [[TMP5]]
; CHECK-NEXT: store double 0.000000e+00, ptr [[NEXT_GEP1]], align 8
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]]
; CHECK: pred.store.continue8:
; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]]
; CHECK: pred.store.if9:
-; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 16
-; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[PTR1]], i64 [[TMP6]]
; CHECK-NEXT: store double 0.000000e+00, ptr [[NEXT_GEP2]], align 8
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE10]]
; CHECK: pred.store.continue10:
; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12]]
; CHECK: pred.store.if11:
-; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 24
-; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[PTR1]], i64 [[TMP7]]
; CHECK-NEXT: store double 0.000000e+00, ptr [[NEXT_GEP3]], align 8
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]]
; CHECK: pred.store.continue12:
diff --git a/llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll b/llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll
index f31885219f53f..27160e6f62470 100644
--- a/llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll
+++ b/llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll
@@ -153,6 +153,9 @@ define i64 @multi_exit_2_exit_count_with_udiv_by_value_in_block_executed_uncondi
; CHECK: vector.body:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[IV]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[IV]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[IV]], 2
+; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[IV]], 3
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4
@@ -167,7 +170,6 @@ define i64 @multi_exit_2_exit_count_with_udiv_by_value_in_block_executed_uncondi
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP7]], i32 1
; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
; CHECK: pred.store.if1:
-; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[IV]], 1
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]]
; CHECK-NEXT: store i32 1, ptr [[TMP12]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]]
@@ -175,7 +177,6 @@ define i64 @multi_exit_2_exit_count_with_udiv_by_value_in_block_executed_uncondi
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP7]], i32 2
; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
; CHECK: pred.store.if3:
-; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[IV]], 2
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP14]]
; CHECK-NEXT: store i32 1, ptr [[TMP15]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
@@ -183,7 +184,6 @@ define i64 @multi_exit_2_exit_count_with_udiv_by_value_in_block_executed_uncondi
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP7]], i32 3
; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF5:%.*]], label [[LOOP_LATCH]]
; CHECK: pred.store.if5:
-; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[IV]], 3
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP17]]
; CHECK-NEXT: store i32 1, ptr [[TMP18]], align 4
; CHECK-NEXT: br label [[LOOP_LATCH]]
@@ -265,6 +265,9 @@ define i64 @multi_exit_2_exit_count_with_udiv_by_constant_in_block_executed_unco
; CHECK: vector.body:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[IV]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[IV]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[IV]], 2
+; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[IV]], 3
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4
@@ -279,7 +282,6 @@ define i64 @multi_exit_2_exit_count_with_udiv_by_constant_in_block_executed_unco
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP7]], i32 1
; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
; CHECK: pred.store.if1:
-; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[IV]], 1
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]]
; CHECK-NEXT: store i32 1, ptr [[TMP12]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]]
@@ -287,7 +289,6 @@ define i64 @multi_exit_2_exit_count_with_udiv_by_constant_in_block_executed_unco
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP7]], i32 2
; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
; CHECK: pred.store.if3:
-; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[IV]], 2
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP14]]
; CHECK-NEXT: store i32 1, ptr [[TMP15]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
@@ -295,7 +296,6 @@ define i64 @multi_exit_2_exit_count_with_udiv_by_constant_in_block_executed_unco
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP7]], i32 3
; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF5:%.*]], label [[LOOP_LATCH]]
; CHECK: pred.store.if5:
-; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[IV]], 3
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP17]]
; CHECK-NEXT: store i32 1, ptr [[TMP18]], align 4
; CHECK-NEXT: br label [[LOOP_LATCH]]
More information about the llvm-commits
mailing list