[llvm] [VPlan] Expand VPWidenIntOrFpInductionRecipe into separate recipes (PR #118638)
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 26 05:36:41 PST 2025
https://github.com/lukel97 updated https://github.com/llvm/llvm-project/pull/118638
>From e72140a173c1fd620e7148f2f0135be1b4045a4b Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Wed, 4 Dec 2024 21:26:23 +0800
Subject: [PATCH 01/16] [LV] Expand VPWidenIntOrFpInductionRecipe into separate
recipes
---
llvm/lib/Transforms/Vectorize/VPlan.cpp | 21 +--
llvm/lib/Transforms/Vectorize/VPlan.h | 157 +++++++++++++++++-
.../Transforms/Vectorize/VPlanAnalysis.cpp | 19 ++-
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 109 +++++++-----
.../Transforms/Vectorize/VPlanTransforms.cpp | 79 +++++++++
llvm/lib/Transforms/Vectorize/VPlanValue.h | 3 +
.../AArch64/clamped-trip-count.ll | 12 +-
.../AArch64/conditional-branches-cost.ll | 4 +-
.../AArch64/divs-with-scalable-vfs.ll | 14 +-
.../AArch64/simple_early_exit.ll | 10 +-
.../AArch64/sve-interleaved-accesses.ll | 46 ++---
.../sve-interleaved-masked-accesses.ll | 32 ++--
.../RISCV/blocks-with-dead-instructions.ll | 36 ++--
.../LoopVectorize/RISCV/mask-index-type.ll | 4 +-
...ruction-or-drop-poison-generating-flags.ll | 6 +-
.../Transforms/LoopVectorize/RISCV/pr88802.ll | 2 +-
.../LoopVectorize/RISCV/strided-accesses.ll | 10 +-
.../LoopVectorize/RISCV/uniform-load-store.ll | 26 +--
...rize-force-tail-with-evl-cond-reduction.ll | 4 +-
.../X86/drop-poison-generating-flags.ll | 2 +-
.../X86/epilog-vectorization-inductions.ll | 10 +-
.../LoopVectorize/X86/induction-costs.ll | 2 +-
.../LoopVectorize/X86/interleave-cost.ll | 4 +-
...outer_loop_test1_no_explicit_vect_width.ll | 2 +-
.../LoopVectorize/X86/scatter_crash.ll | 2 +-
.../LoopVectorize/create-induction-resume.ll | 10 +-
.../epilog-vectorization-any-of-reductions.ll | 4 +-
.../LoopVectorize/first-order-recurrence.ll | 4 +-
.../LoopVectorize/float-induction.ll | 20 +--
.../LoopVectorize/induction-step.ll | 20 +--
.../Transforms/LoopVectorize/induction.ll | 8 +-
.../LoopVectorize/reduction-inloop-pred.ll | 36 ++--
.../single_early_exit_live_outs.ll | 38 ++---
33 files changed, 500 insertions(+), 256 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index d3c195d4a70ea..d2c732b7230e3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -1050,17 +1050,12 @@ void VPlan::execute(VPTransformState *State) {
if (isa<VPWidenPHIRecipe>(&R))
continue;
- if (isa<VPWidenInductionRecipe>(&R)) {
- PHINode *Phi = nullptr;
- if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
- Phi = cast<PHINode>(State->get(R.getVPSingleValue()));
- } else {
- auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
- assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) &&
- "recipe generating only scalars should have been replaced");
- auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi));
- Phi = cast<PHINode>(GEP->getPointerOperand());
- }
+ if (isa<VPWidenPointerInductionRecipe>(&R)) {
+ auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
+ assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) &&
+ "recipe generating only scalars should have been replaced");
+ auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi));
+ PHINode *Phi = cast<PHINode>(GEP->getPointerOperand());
Phi->setIncomingBlock(1, VectorLatchBB);
@@ -1068,10 +1063,6 @@ void VPlan::execute(VPTransformState *State) {
// consistent placement of all induction updates.
Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
Inc->moveBefore(std::prev(VectorLatchBB->getTerminator()->getIterator()));
-
- // Use the steps for the last part as backedge value for the induction.
- if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R))
- Inc->setOperand(0, State->get(IV->getLastUnrolledPartOperand()));
continue;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 1f1af7f87e554..e41ee6ce18656 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1749,7 +1749,9 @@ class VPWidenInductionRecipe : public VPHeaderPHIRecipe {
};
/// A recipe for handling phi nodes of integer and floating-point inductions,
-/// producing their vector values.
+/// producing their vector values. This won't execute any LLVM IR and will get
+/// expanded later into VPWidenIntOrFpInitialRecipe, VPWidenIntOrFpPHIRecipe and
+/// VPWidenIntOrFpBackedgeRecipe.
class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe {
TruncInst *Trunc;
@@ -1782,9 +1784,10 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe {
VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
- /// Generate the vectorized and scalarized versions of the phi node as
- /// needed by their users.
- void execute(VPTransformState &State) override;
+ void execute(VPTransformState &State) override {
+ llvm_unreachable("cannot execute this recipe, should be expanded via "
+ "expandVPWidenIntOrFpInductionRecipe");
+ }
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
@@ -1817,10 +1820,152 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe {
}
/// Returns the VPValue representing the value of this induction at
- /// the last unrolled part, if it exists. Returns itself if unrolling did not
+ /// the last unrolled part, if it exists. Returns nullptr if unrolling did not
/// take place.
VPValue *getLastUnrolledPartOperand() {
- return getNumOperands() == 5 ? getOperand(4) : this;
+ return getNumOperands() == 5 ? getOperand(4) : nullptr;
+ }
+};
+
+/// A recipe to compute the initial value for a widened IV, expanded from
+/// VPWidenIntOrFpInductionRecipe.
+class VPWidenIntOrFpInductionInitialRecipe : public VPSingleDefRecipe {
+ Instruction *IV;
+ const InductionDescriptor &ID;
+
+public:
+ VPWidenIntOrFpInductionInitialRecipe(Instruction *IV, VPValue *Start,
+ VPValue *Step,
+ const InductionDescriptor &ID)
+ : VPSingleDefRecipe(VPDef::VPWidenIntOrFpInductionStartSC, {Start, Step}),
+ IV(IV), ID(ID) {
+ assert((isa<PHINode>(IV) || isa<TruncInst>(IV)) &&
+ "Expected either an induction phi-node or a truncate of it!");
+ }
+
+ ~VPWidenIntOrFpInductionInitialRecipe() override = default;
+
+ VPWidenIntOrFpInductionInitialRecipe *clone() override {
+ return new VPWidenIntOrFpInductionInitialRecipe(IV, getOperand(0),
+ getOperand(1), ID);
+ }
+
+ VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionStartSC)
+
+ void execute(VPTransformState &State) override;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const override;
+#endif
+
+ VPValue *getStartValue() { return getOperand(0); }
+ const VPValue *getStartValue() const { return getOperand(0); }
+
+ VPValue *getStepValue() { return getOperand(1); }
+ const VPValue *getStepValue() const { return getOperand(1); }
+
+ /// Returns the scalar type of the induction.
+ Type *getScalarType() const { return IV->getType(); }
+
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ return true;
+ }
+};
+
+/// A recipe to generate the PHI of a widened IV, expanded from
+/// VPWidenIntOrFpInductionRecipe.
+class VPWidenIntOrFpInductionPHIRecipe : public VPHeaderPHIRecipe {
+ Instruction *IV;
+
+public:
+ VPWidenIntOrFpInductionPHIRecipe(Instruction *IV, VPValue *Start)
+ : VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionPHISC, IV, Start),
+ IV(IV) {
+ assert((isa<PHINode>(IV) || isa<TruncInst>(IV)) &&
+ "Expected either an induction phi-node or a truncate of it!");
+ }
+
+ ~VPWidenIntOrFpInductionPHIRecipe() override = default;
+
+ VPWidenIntOrFpInductionPHIRecipe *clone() override {
+ auto *R = new VPWidenIntOrFpInductionPHIRecipe(IV, getOperand(0));
+ R->addOperand(getBackedgeValue());
+ return R;
+ }
+
+ VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionPHISC)
+
+ void execute(VPTransformState &State) override;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const override;
+#endif
+};
+
+/// A recipe to compute the backedge value for a widened IV, expanded from
+/// VPWidenIntOrFpInductionRecipe.
+class VPWidenIntOrFpInductionBackedgeRecipe : public VPSingleDefRecipe {
+ Instruction *IV;
+ const InductionDescriptor &ID;
+
+public:
+ VPWidenIntOrFpInductionBackedgeRecipe(Instruction *IV, VPValue *Step,
+ VPValue *VF, VPValue *Prev,
+ VPValue *SplatVF,
+ const InductionDescriptor &ID)
+ : VPSingleDefRecipe(VPDef::VPWidenIntOrFpInductionSC, {Step, VF, Prev}),
+ IV(IV), ID(ID) {
+ assert((isa<PHINode>(IV) || isa<TruncInst>(IV)) &&
+ "Expected either an induction phi-node or a truncate of it!");
+ if (SplatVF)
+ addOperand(SplatVF);
+ }
+
+ ~VPWidenIntOrFpInductionBackedgeRecipe() override = default;
+
+ VPWidenIntOrFpInductionBackedgeRecipe *clone() override {
+ return new VPWidenIntOrFpInductionBackedgeRecipe(
+ IV, getOperand(0), getOperand(1), getOperand(2), getOperand(3), ID);
+ }
+
+ VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionIncSC)
+
+ void execute(VPTransformState &State) override;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const override;
+#endif
+
+ VPValue *getStepValue() { return getOperand(0); }
+ const VPValue *getStepValue() const { return getOperand(0); }
+
+ VPValue *getVFValue() { return getOperand(1); }
+ const VPValue *getVFValue() const { return getOperand(1); }
+
+ VPValue *getPrevValue() { return getOperand(2); }
+ const VPValue *getPrevValue() const { return getOperand(2); }
+
+ VPValue *getSplatVFValue() {
+ // If the recipe has been unrolled (4 operands), return the VPValue for the
+ // induction increment.
+ return getNumOperands() == 4 ? getOperand(3) : nullptr;
+ }
+
+ /// Returns the scalar type of the induction.
+ Type *getScalarType() const { return IV->getType(); }
+
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ return Op == getOperand(0) || Op == getOperand(1);
}
};
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index 8e5b37a35dda7..ef0f12524b5a7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -236,14 +236,17 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
.Case<VPActiveLaneMaskPHIRecipe, VPCanonicalIVPHIRecipe,
VPFirstOrderRecurrencePHIRecipe, VPReductionPHIRecipe,
VPWidenPointerInductionRecipe, VPEVLBasedIVPHIRecipe,
- VPScalarPHIRecipe>([this](const auto *R) {
- // Handle header phi recipes, except VPWidenIntOrFpInduction
- // which needs special handling due it being possibly truncated.
- // TODO: consider inferring/caching type of siblings, e.g.,
- // backedge value, here and in cases below.
- return inferScalarType(R->getStartValue());
- })
- .Case<VPWidenIntOrFpInductionRecipe, VPDerivedIVRecipe>(
+ VPScalarPHIRecipe, VPWidenIntOrFpInductionPHIRecipe>(
+ [this](const auto *R) {
+ // Handle header phi recipes, except VPWidenIntOrFpInduction
+ // which needs special handling due it being possibly truncated.
+ // TODO: consider inferring/caching type of siblings, e.g.,
+ // backedge value, here and in cases below.
+ return inferScalarType(R->getStartValue());
+ })
+ .Case<VPWidenIntOrFpInductionRecipe, VPDerivedIVRecipe,
+ VPWidenIntOrFpInductionInitialRecipe,
+ VPWidenIntOrFpInductionBackedgeRecipe>(
[](const auto *R) { return R->getScalarType(); })
.Case<VPReductionRecipe, VPPredInstPHIRecipe, VPWidenPHIRecipe,
VPScalarIVStepsRecipe, VPWidenGEPRecipe, VPVectorPointerRecipe,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 8a928fa71fab8..82cfe1774f678 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -1751,47 +1751,72 @@ static Constant *getSignedIntOrFpConstant(Type *Ty, int64_t C) {
: ConstantFP::get(Ty, C);
}
-void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
+void VPWidenIntOrFpInductionInitialRecipe::execute(VPTransformState &State) {
assert(!State.Lane && "Int or FP induction being replicated.");
- Value *Start = getStartValue()->getLiveInIRValue();
- const InductionDescriptor &ID = getInductionDescriptor();
- TruncInst *Trunc = getTruncInst();
+ Value *Start = State.get(getStartValue(), true);
IRBuilderBase &Builder = State.Builder;
- assert(getPHINode()->getType() == ID.getStartValue()->getType() &&
- "Types must match");
assert(State.VF.isVector() && "must have vector VF");
- // The value from the original loop to which we are mapping the new induction
- // variable.
- Instruction *EntryVal = Trunc ? cast<Instruction>(Trunc) : getPHINode();
-
// Fast-math-flags propagate from the original induction instruction.
IRBuilder<>::FastMathFlagGuard FMFG(Builder);
- if (ID.getInductionBinOp() && isa<FPMathOperator>(ID.getInductionBinOp()))
+ if (isa_and_nonnull<FPMathOperator>(ID.getInductionBinOp()))
Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags());
// Now do the actual transformations, and start with fetching the step value.
Value *Step = State.get(getStepValue(), VPLane(0));
- assert((isa<PHINode>(EntryVal) || isa<TruncInst>(EntryVal)) &&
- "Expected either an induction phi-node or a truncate of it!");
+ // Construct the initial value of the vector IV
+ Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start);
+ Value *SteppedStart = getStepVector(SplatStart, Step, ID.getInductionOpcode(),
+ State.VF, State.Builder);
+ State.set(this, SteppedStart);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void VPWidenIntOrFpInductionInitialRecipe::print(
+ raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const {
+ O << Indent;
+ printAsOperand(O, SlotTracker);
+ O << " = WIDEN-INDUCTION-START ";
+ printOperands(O, SlotTracker);
+}
+#endif
+
+void VPWidenIntOrFpInductionPHIRecipe::execute(VPTransformState &State) {
+ BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
+
+ Value *Start = State.get(getOperand(0));
+ PHINode *Phi = State.Builder.CreatePHI(Start->getType(), 2, "vec.ind");
+ Phi->addIncoming(Start, VectorPH);
+ Phi->setDebugLoc(IV->getDebugLoc());
+ State.set(this, Phi);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void VPWidenIntOrFpInductionPHIRecipe::print(raw_ostream &O,
+ const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << Indent;
+ printAsOperand(O, SlotTracker);
+ O << " = WIDEN-INDUCTION-PHI ";
+ printOperands(O, SlotTracker);
+}
+#endif
+
+void VPWidenIntOrFpInductionBackedgeRecipe::execute(VPTransformState &State) {
+ IRBuilderBase &Builder = State.Builder;
+
+ // Fast-math-flags propagate from the original induction instruction.
+ IRBuilder<>::FastMathFlagGuard FMFG(Builder);
+ if (isa_and_nonnull<FPMathOperator>(ID.getInductionBinOp()))
+ Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags());
+
+ Value *Step = State.get(getStepValue(), VPLane(0));
- // Construct the initial value of the vector IV in the vector loop preheader
auto CurrIP = Builder.saveIP();
BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
Builder.SetInsertPoint(VectorPH->getTerminator());
- if (isa<TruncInst>(EntryVal)) {
- assert(Start->getType()->isIntegerTy() &&
- "Truncation requires an integer type");
- auto *TruncType = cast<IntegerType>(EntryVal->getType());
- Step = Builder.CreateTrunc(Step, TruncType);
- Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType);
- }
-
- Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start);
- Value *SteppedStart = getStepVector(SplatStart, Step, ID.getInductionOpcode(),
- State.VF, State.Builder);
// We create vector phi nodes for both integer and floating-point induction
// variables. Here, we determine the kind of arithmetic we will perform.
@@ -1826,28 +1851,26 @@ void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
}
Builder.restoreIP(CurrIP);
-
- // We may need to add the step a number of times, depending on the unroll
- // factor. The last of those goes into the PHI.
- PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind");
- VecInd->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
- VecInd->setDebugLoc(getDebugLoc());
- State.set(this, VecInd);
+ Value *PrevVal = State.get(getPrevValue());
Instruction *LastInduction = cast<Instruction>(
- Builder.CreateBinOp(AddOp, VecInd, SplatVF, "vec.ind.next"));
- if (isa<TruncInst>(EntryVal))
- State.addMetadata(LastInduction, EntryVal);
- LastInduction->setDebugLoc(getDebugLoc());
+ Builder.CreateBinOp(AddOp, PrevVal, SplatVF, "vec.ind.next"));
+ if (isa<TruncInst>(IV))
+ State.addMetadata(LastInduction, IV);
+ LastInduction->setDebugLoc(IV->getDebugLoc());
- VecInd->addIncoming(SteppedStart, VectorPH);
- // Add induction update using an incorrect block temporarily. The phi node
- // will be fixed after VPlan execution. Note that at this point the latch
- // block cannot be used, as it does not exist yet.
- // TODO: Model increment value in VPlan, by turning the recipe into a
- // multi-def and a subclass of VPHeaderPHIRecipe.
- VecInd->addIncoming(LastInduction, VectorPH);
+ State.set(this, LastInduction);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void VPWidenIntOrFpInductionBackedgeRecipe::print(
+ raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const {
+ O << Indent;
+ printAsOperand(O, SlotTracker);
+ O << " = WIDEN-INDUCTION-INC ";
+ printOperands(O, SlotTracker);
}
+#endif
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 13ef3029023f1..950eb5a2a01b3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2013,10 +2013,89 @@ void VPlanTransforms::createInterleaveGroups(
}
}
+/// Expand a VPWidenIntOrFpInduction into separate recipes for the initial
+/// value, phi and backedge value. In the followng example:
+///
+/// vector.ph:
+/// Successor(s): vector loop
+///
+/// <x1> vector loop: {
+/// vector.body:
+/// WIDEN-INDUCTION %i = phi %bc.resume.val, %i.next, ir<1>, ir<%5>
+/// ...
+/// EMIT branch-on-count vp<%index.next>, ir<%n.vec>
+/// No successors
+/// }
+///
+/// WIDEN-INDUCTION will get expanded to:
+///
+/// vector.ph:
+/// vp<%0> = WIDEN-INDUCTION-START ir<0>, ir<1>
+/// Successor(s): vector loop
+///
+/// <x1> vector loop: {
+/// vector.body:
+/// ir<%i> = WIDEN-INDUCTION-PHI vp<%0>, vp<%4>
+/// ...
+/// vp<%4> = WIDEN-INDUCTION-INC ir<1>, ir<%5>, ir<%i>
+/// EMIT branch-on-count vp<%index.next>, ir<%n.vec>
+/// No successors
+/// }
+static void
+expandVPWidenIntOrFpInduction(VPWidenIntOrFpInductionRecipe *WidenIVR) {
+ VPlan *Plan = WidenIVR->getParent()->getPlan();
+ VPValue *Start = WidenIVR->getStartValue();
+ VPValue *Step = WidenIVR->getStepValue();
+ VPValue *VF = WidenIVR->getVFValue();
+ const InductionDescriptor &ID = WidenIVR->getInductionDescriptor();
+ TruncInst *Trunc = WidenIVR->getTruncInst();
+ DebugLoc DL = WidenIVR->getDebugLoc();
+
+ // The value from the original loop to which we are mapping the new induction
+ // variable.
+ Instruction *IV = Trunc ? cast<Instruction>(Trunc) : WidenIVR->getPHINode();
+
+ // If the phi is truncated, truncate the start and step values.
+ VPBuilder Builder(Plan->getVectorPreheader());
+ if (isa<TruncInst>(IV)) {
+ assert(Start->getUnderlyingValue()->getType()->isIntegerTy() &&
+ "Truncation requires an integer type");
+ auto *TruncType = cast<IntegerType>(IV->getType());
+ Step = Builder.createScalarCast(Instruction::Trunc, Step, TruncType, DL);
+ Start = Builder.createScalarCast(Instruction::Trunc, Start, TruncType, DL);
+ }
+
+ // Construct the initial value of the vector IV in the vector loop preheader.
+ auto *StartR = new VPWidenIntOrFpInductionInitialRecipe(IV, Start, Step, ID);
+ Plan->getVectorPreheader()->insert(StartR, Builder.getInsertPoint());
+
+ // Create the widened phi of the vector IV.
+ auto *PhiR = new VPWidenIntOrFpInductionPHIRecipe(IV, StartR);
+ PhiR->insertBefore(WidenIVR);
+
+ // Create the backedge value for the vector IV.
+ VPValue *Prev = PhiR;
+ // If unrolled, use the last unrolled part in the increment.
+ if (auto *UnrolledPart = WidenIVR->getLastUnrolledPartOperand())
+ Prev = UnrolledPart;
+ auto *IncR = new VPWidenIntOrFpInductionBackedgeRecipe(
+ IV, Step, VF, Prev, WidenIVR->getSplatVFValue(), ID);
+ VPBasicBlock *ExitingBB = Plan->getVectorLoopRegion()->getExitingBasicBlock();
+ ExitingBB->insert(IncR, ExitingBB->getTerminator()->getIterator());
+ PhiR->addOperand(IncR);
+
+ WidenIVR->replaceAllUsesWith(PhiR);
+ WidenIVR->eraseFromParent();
+}
+
void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan) {
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_deep(Plan.getEntry()))) {
for (VPRecipeBase &R : make_early_inc_range(VPBB->phis())) {
+ if (auto *WidenIVR = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R)) {
+ expandVPWidenIntOrFpInduction(WidenIVR);
+ continue;
+ }
if (!isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe>(&R))
continue;
auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index a058b2a121d59..8e5148b1616bd 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -354,6 +354,8 @@ class VPDef {
VPWidenSelectSC,
VPBlendSC,
VPHistogramSC,
+ VPWidenIntOrFpInductionStartSC,
+ VPWidenIntOrFpInductionIncSC,
// START: Phi-like recipes. Need to be kept together.
VPWidenPHISC,
VPPredInstPHISC,
@@ -364,6 +366,7 @@ class VPDef {
VPEVLBasedIVPHISC,
VPFirstOrderRecurrencePHISC,
VPWidenIntOrFpInductionSC,
+ VPWidenIntOrFpInductionPHISC,
VPWidenPointerInductionSC,
VPScalarPHISC,
VPReductionPHISC,
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll
index 1f46073d025a9..44148aac54407 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll
@@ -20,11 +20,11 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 8 x i64> [[TMP8]], splat (i64 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP7]]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP12]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -38,8 +38,8 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0
; CHECK-NEXT: call void @llvm.masked.store.nxv8i8.p0(<vscale x 8 x i8> [[TMP14]], ptr [[TMP17]], i32 1, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]])
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT]]
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_NEXT]], i64 8)
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT]]
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
@@ -106,11 +106,11 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 8 x i64> [[TMP8]], splat (i64 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP7]]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP12]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -124,8 +124,8 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0
; CHECK-NEXT: call void @llvm.masked.store.nxv8i8.p0(<vscale x 8 x i8> [[TMP14]], ptr [[TMP17]], i32 1, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]])
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT]]
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_NEXT]], i64 [[WIDE_TRIP_COUNT]])
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT]]
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
index cf4fc143fe8c3..a9f42ff1d1e57 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
@@ -978,8 +978,8 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) {
; DEFAULT-NEXT: store i8 [[TMP33]], ptr [[TMP32]], align 1
; DEFAULT-NEXT: br label [[PRED_STORE_CONTINUE14]]
; DEFAULT: pred.store.continue14:
-; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
; DEFAULT-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
; DEFAULT: middle.block:
; DEFAULT-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
@@ -1080,8 +1080,8 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) {
; PRED-NEXT: store i8 [[TMP33]], ptr [[TMP32]], align 1
; PRED-NEXT: br label [[PRED_STORE_CONTINUE14]]
; PRED: pred.store.continue14:
-; PRED-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
; PRED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; PRED-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
; PRED-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
; PRED: middle.block:
; PRED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll
index ecf49a31ba79f..dfcb1e561551f 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll
@@ -127,11 +127,11 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 2 x i64> [[TMP15]], splat (i64 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP17]]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[M]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP9]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP20]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[M]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -251,10 +251,10 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 2 x i64> [[TMP15]], splat (i64 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP17]]
-; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP9]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP20]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[MUL_2_I]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[MUL_2_I]], i64 0
+; CHECK-NEXT: [[TMP18:%.*]] = mul i64 1, [[TMP9]]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP18]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT3]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
@@ -262,7 +262,7 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP23:%.*]] = udiv <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT4]]
+; CHECK-NEXT: [[TMP23:%.*]] = udiv <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP24:%.*]] = urem i64 [[TMP21]], [[MUL_2_I]]
; CHECK-NEXT: [[TMP25:%.*]] = udiv i64 [[TMP24]], [[MUL_1_I]]
; CHECK-NEXT: [[TMP26:%.*]] = urem i64 [[TMP24]], [[MUL_1_I]]
@@ -283,7 +283,7 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]]
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP14]])
; CHECK-NEXT: [[TMP47:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT4]]
; CHECK-NEXT: [[TMP48:%.*]] = extractelement <vscale x 2 x i1> [[TMP47]], i32 0
; CHECK-NEXT: br i1 [[TMP48]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll
index 5f926db1131f6..42600a132c2c3 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll
@@ -48,8 +48,8 @@ define i64 @same_exit_block_pre_inc_use1() #1 {
; CHECK-NEXT: [[TMP16:%.*]] = xor <vscale x 16 x i1> [[TMP15]], splat (i1 true)
; CHECK-NEXT: [[TMP17:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[TMP16]])
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT3]], [[N_VEC]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i64> [[VEC_IND]], [[DOTSPLAT]]
; CHECK-NEXT: [[TMP19:%.*]] = or i1 [[TMP17]], [[TMP18]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i64> [[VEC_IND]], [[DOTSPLAT]]
; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.split:
; CHECK-NEXT: br i1 [[TMP17]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
@@ -131,8 +131,8 @@ define i64 @same_exit_block_pre_inc_use4() {
; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true)
; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[TMP4]])
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 64
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.split:
; CHECK-NEXT: br i1 [[TMP5]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
@@ -209,8 +209,8 @@ define i64 @loop_contains_safe_call() #1 {
; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true)
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 64
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: middle.split:
; CHECK-NEXT: br i1 [[TMP6]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
@@ -304,8 +304,8 @@ define i64 @loop_contains_safe_div() #1 {
; CHECK-NEXT: [[TMP15:%.*]] = xor <vscale x 4 x i1> [[TMP14]], splat (i1 true)
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP15]])
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[INDEX1]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: middle.split:
; CHECK-NEXT: br i1 [[TMP6]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
@@ -389,8 +389,8 @@ define i64 @loop_contains_load_after_early_exit(ptr dereferenceable(1024) align(
; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true)
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK: middle.split:
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[WIDE_LOAD2]], i32 3
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll
index eb805b1f8c571..19b1e869a914a 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll
@@ -108,13 +108,13 @@ define void @test_array_load2_i16_store2(i32 %C, i32 %D) #1 {
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[TMP3:%.*]] = shl <vscale x 4 x i64> [[TMP2]], splat (i64 1)
-; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP0]], 3
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP5]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[C:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[D:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT2]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP0]], 3
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP4]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -197,13 +197,13 @@ define void @test_array_load2_store2_i16(i32 noundef %C, i32 noundef %D) #1 {
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[TMP3:%.*]] = shl <vscale x 4 x i64> [[TMP2]], splat (i64 1)
-; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP0]], 3
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP5]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[C:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[D:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP0]], 3
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP4]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -637,16 +637,16 @@ define void @load_gap_reverse(ptr noalias nocapture readonly %P1, ptr noalias no
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[INDUCTION:%.*]] = sub <vscale x 4 x i64> splat (i64 1023), [[TMP2]]
-; CHECK-NEXT: [[DOTNEG:%.*]] = sub nsw i64 0, [[TMP1]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[DOTNEG]], i64 0
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[DOTNEG:%.*]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[X:%.*]], i64 0
+; CHECK-NEXT: [[X:%.*]] = sub nsw i64 0, [[TMP1]]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[X]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = add nsw <vscale x 4 x i64> [[BROADCAST_SPLAT]], [[VEC_IND]]
+; CHECK-NEXT: [[TMP4:%.*]] = add nsw <vscale x 4 x i64> [[DOTSPLAT]], [[VEC_IND]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P1:%.*]], <vscale x 4 x i64> [[VEC_IND]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P2:%.*]], <vscale x 4 x i64> [[VEC_IND]], i32 1
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> [[TMP6]], i32 8, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i64> poison)
@@ -654,7 +654,7 @@ define void @load_gap_reverse(ptr noalias nocapture readonly %P1, ptr noalias no
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> [[TMP4]], <vscale x 4 x ptr> [[TMP5]], i32 8, <vscale x 4 x i1> splat (i1 true))
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> [[TMP7]], <vscale x 4 x ptr> [[TMP6]], i32 8, <vscale x 4 x i1> splat (i1 true))
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
; CHECK: middle.block:
@@ -885,10 +885,10 @@ define void @PR27626_0(ptr %p, i32 %z, i64 %n) #1 {
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2
; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP8]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Z:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP8]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -1063,10 +1063,10 @@ define void @PR27626_2(ptr %p, i64 %n, i32 %z) #1 {
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2
; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP8]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Z:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP8]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -1253,15 +1253,15 @@ define void @PR27626_4(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 {
; CHECK-NEXT: [[IND_END:%.*]] = shl nuw i64 [[N_VEC]], 1
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[TMP9:%.*]] = shl <vscale x 4 x i64> [[TMP8]], splat (i64 1)
-; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i64 [[TMP6]], 3
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP11]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Y:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Z:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT3]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i64 [[TMP6]], 3
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP11]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -1346,15 +1346,15 @@ define void @PR27626_5(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 {
; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[TMP10:%.*]] = shl <vscale x 4 x i64> [[TMP9]], splat (i64 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> [[TMP10]], splat (i64 3)
-; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i64 [[TMP7]], 3
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP12]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Y:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Z:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT3]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i64 [[TMP7]], 3
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP12]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -1452,12 +1452,12 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) #1 {
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw i64 [[TMP9]], 2
; CHECK-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
+; CHECK-NEXT: [[TMP15:%.*]] = shl <vscale x 4 x i64> [[TMP14]], splat (i64 1)
; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i32 [[TMP11]], 2
; CHECK-NEXT: [[TMP13:%.*]] = add nsw i32 [[TMP12]], -1
; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 4 x i16> poison, i16 [[DOTPRE]], i32 [[TMP13]]
-; CHECK-NEXT: [[TMP14:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
-; CHECK-NEXT: [[TMP15:%.*]] = shl <vscale x 4 x i64> [[TMP14]], splat (i64 1)
; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw i64 [[TMP9]], 3
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP17]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll
index a2da67c2cbb81..94e57c7ca023f 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll
@@ -38,10 +38,10 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no
; SCALAR_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32()
; SCALAR_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = shl i32 [[TMP4]], 4
; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32()
-; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP5]], i64 0
-; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[DOTSPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV]], i64 0
; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
+; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP5]], i64 0
+; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[DOTSPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; SCALAR_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]]
; SCALAR_TAIL_FOLDING: vector.body:
; SCALAR_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -114,10 +114,10 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no
; PREDICATED_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.usub.sat.i32(i32 1024, i32 [[TMP3]])
; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 0, i32 1024)
; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32()
-; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP1]], i64 0
-; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[DOTSPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV]], i64 0
; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
+; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP1]], i64 0
+; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[DOTSPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; PREDICATED_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]]
; PREDICATED_TAIL_FOLDING: vector.body:
; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -222,10 +222,10 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no
; SCALAR_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32()
; SCALAR_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = shl i32 [[TMP4]], 4
; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32()
-; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP5]], i64 0
-; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[DOTSPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV]], i64 0
; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
+; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP5]], i64 0
+; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[DOTSPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; SCALAR_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]]
; SCALAR_TAIL_FOLDING: vector.body:
; SCALAR_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -283,10 +283,10 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no
; PREDICATED_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.usub.sat.i32(i32 1024, i32 [[TMP3]])
; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 0, i32 1024)
; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32()
-; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP1]], i64 0
-; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[DOTSPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV]], i64 0
; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
+; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP1]], i64 0
+; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[DOTSPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; PREDICATED_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]]
; PREDICATED_TAIL_FOLDING: vector.body:
; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -380,12 +380,12 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no
; SCALAR_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32()
; SCALAR_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = shl i32 [[TMP4]], 4
; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32()
-; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP5]], i64 0
-; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[DOTSPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV]], i64 0
; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV3]], i64 0
; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
+; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP5]], i64 0
+; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[DOTSPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; SCALAR_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]]
; SCALAR_TAIL_FOLDING: vector.body:
; SCALAR_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -450,12 +450,12 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no
; PREDICATED_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.usub.sat.i32(i32 1024, i32 [[TMP3]])
; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 0, i32 1024)
; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32()
-; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP1]], i64 0
-; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[DOTSPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV]], i64 0
; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV3]], i64 0
; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
+; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP1]], i64 0
+; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[DOTSPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; PREDICATED_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]]
; PREDICATED_TAIL_FOLDING: vector.body:
; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -567,10 +567,10 @@ define dso_local void @masked_strided_factor4(ptr noalias nocapture readonly %p,
; SCALAR_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32()
; SCALAR_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = shl i32 [[TMP4]], 4
; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32()
-; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP5]], i64 0
-; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[DOTSPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV]], i64 0
; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
+; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP5]], i64 0
+; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[DOTSPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; SCALAR_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]]
; SCALAR_TAIL_FOLDING: vector.body:
; SCALAR_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -673,10 +673,10 @@ define dso_local void @masked_strided_factor4(ptr noalias nocapture readonly %p,
; PREDICATED_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.usub.sat.i32(i32 1024, i32 [[TMP3]])
; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 0, i32 1024)
; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32()
-; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP1]], i64 0
-; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[DOTSPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV]], i64 0
; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
+; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP1]], i64 0
+; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[DOTSPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; PREDICATED_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]]
; PREDICATED_TAIL_FOLDING: vector.body:
; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll
index 50dfb689c2060..6e6944b904ae0 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll
@@ -28,13 +28,13 @@ define void @block_with_dead_inst_1(ptr %src, i64 %N) #0 {
; CHECK-NEXT: [[TMP11:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
; CHECK-NEXT: [[TMP13:%.*]] = mul <vscale x 8 x i64> [[TMP11]], splat (i64 3)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP13]]
-; CHECK-NEXT: [[TMP16:%.*]] = mul i64 3, [[TMP10]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP16]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[TMP17]], 8
; CHECK-NEXT: [[TMP19:%.*]] = sub i32 [[TMP18]], 1
; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 1, i32 [[TMP19]]
+; CHECK-NEXT: [[TMP26:%.*]] = mul i64 3, [[TMP10]]
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP26]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -122,13 +122,13 @@ define void @block_with_dead_inst_2(ptr %src) #0 {
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 4 x i64> [[TMP8]], splat (i64 3)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP10]]
-; CHECK-NEXT: [[TMP13:%.*]] = mul i64 3, [[TMP7]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP13]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], 4
; CHECK-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], 1
; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 4 x i16> poison, i16 0, i32 [[TMP16]]
+; CHECK-NEXT: [[TMP22:%.*]] = mul i64 3, [[TMP7]]
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP22]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -216,13 +216,13 @@ define void @multiple_blocks_with_dead_insts_3(ptr %src) #0 {
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 4 x i64> [[TMP8]], splat (i64 3)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP10]]
-; CHECK-NEXT: [[TMP13:%.*]] = mul i64 3, [[TMP7]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP13]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], 4
; CHECK-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], 1
; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 4 x i16> poison, i16 0, i32 [[TMP16]]
+; CHECK-NEXT: [[TMP22:%.*]] = mul i64 3, [[TMP7]]
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP22]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -320,13 +320,13 @@ define void @multiple_blocks_with_dead_insts_4(ptr %src, i64 %N) #0 {
; CHECK-NEXT: [[TMP11:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
; CHECK-NEXT: [[TMP13:%.*]] = mul <vscale x 8 x i64> [[TMP11]], splat (i64 3)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP13]]
-; CHECK-NEXT: [[TMP16:%.*]] = mul i64 3, [[TMP10]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP16]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[TMP17]], 8
; CHECK-NEXT: [[TMP19:%.*]] = sub i32 [[TMP18]], 1
; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 1, i32 [[TMP19]]
+; CHECK-NEXT: [[TMP25:%.*]] = mul i64 3, [[TMP10]]
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP25]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -426,13 +426,13 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_5(ptr %src) #0 {
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 4 x i64> [[TMP8]], splat (i64 3)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP10]]
-; CHECK-NEXT: [[TMP13:%.*]] = mul i64 3, [[TMP7]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP13]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], 4
; CHECK-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], 1
; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 4 x i16> poison, i16 1, i32 [[TMP16]]
+; CHECK-NEXT: [[TMP22:%.*]] = mul i64 3, [[TMP7]]
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP22]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -540,13 +540,13 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_6(ptr %src, i1 %
; CHECK-NEXT: [[TMP11:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
; CHECK-NEXT: [[TMP13:%.*]] = mul <vscale x 8 x i64> [[TMP11]], splat (i64 3)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP13]]
-; CHECK-NEXT: [[TMP16:%.*]] = mul i64 3, [[TMP10]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP16]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[TMP17]], 8
; CHECK-NEXT: [[TMP19:%.*]] = sub i32 [[TMP18]], 1
; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 1, i32 [[TMP19]]
+; CHECK-NEXT: [[TMP25:%.*]] = mul i64 3, [[TMP10]]
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP25]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll b/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll
index c00868c3e0be2..7baba88061cd4 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll
@@ -25,11 +25,11 @@ define void @test(ptr noalias nocapture %a, ptr noalias nocapture %b, i32 %v) {
; VLENUNK-NEXT: [[TMP6:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; VLENUNK-NEXT: [[TMP8:%.*]] = mul <vscale x 4 x i64> [[TMP6]], splat (i64 1)
; VLENUNK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP8]]
+; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[V:%.*]], i64 0
+; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; VLENUNK-NEXT: [[TMP11:%.*]] = mul i64 1, [[TMP5]]
; VLENUNK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP11]], i64 0
; VLENUNK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
-; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[V:%.*]], i64 0
-; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]]
; VLENUNK: vector.body:
; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll
index 09e8a760a355c..44af6c77a573c 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll
@@ -23,15 +23,15 @@ define void @pr87378_vpinstruction_or_drop_poison_generating_flags(ptr %arg, i64
; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
; CHECK-NEXT: [[TMP8:%.*]] = mul <vscale x 8 x i64> [[TMP6]], splat (i64 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP8]]
-; CHECK-NEXT: [[TMP11:%.*]] = mul i64 1, [[TMP5]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP11]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[A]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[B]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[C]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT3]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP5]]
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP9]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll
index 305d26d7f3bc1..a0c56faf913fc 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll
@@ -54,8 +54,8 @@ define void @test(ptr %p, i64 %a, i8 %b) {
; CHECK-NEXT: store i8 [[TMP16]], ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]]
; CHECK: pred.store.continue8:
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY1]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
index f7b8758084056..9602d4cbf5df3 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
@@ -560,23 +560,23 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) {
; STRIDED-NEXT: [[TMP12:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; STRIDED-NEXT: [[TMP14:%.*]] = mul <vscale x 4 x i64> [[TMP12]], splat (i64 1)
; STRIDED-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP14]]
-; STRIDED-NEXT: [[TMP17:%.*]] = mul i64 1, [[TMP11]]
-; STRIDED-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP17]], i64 0
+; STRIDED-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[STRIDE]], i64 0
; STRIDED-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
-; STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[STRIDE]], i64 0
+; STRIDED-NEXT: [[TMP15:%.*]] = mul i64 1, [[TMP11]]
+; STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP15]], i64 0
; STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; STRIDED-NEXT: br label [[VECTOR_BODY:%.*]]
; STRIDED: vector.body:
; STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; STRIDED-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; STRIDED-NEXT: [[TMP18:%.*]] = mul nuw nsw <vscale x 4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
+; STRIDED-NEXT: [[TMP18:%.*]] = mul nuw nsw <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
; STRIDED-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[P]], <vscale x 4 x i64> [[TMP18]]
; STRIDED-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP19]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> poison), !alias.scope [[META8:![0-9]+]]
; STRIDED-NEXT: [[TMP20:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], splat (i32 1)
; STRIDED-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[P2]], <vscale x 4 x i64> [[TMP18]]
; STRIDED-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP20]], <vscale x 4 x ptr> [[TMP21]], i32 4, <vscale x 4 x i1> splat (i1 true)), !alias.scope [[META11:![0-9]+]], !noalias [[META8]]
; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]]
-; STRIDED-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; STRIDED-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
; STRIDED-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; STRIDED-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
; STRIDED: middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll
index 95496bb51a1bc..e3f0700cf352e 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll
@@ -325,11 +325,11 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca
; SCALABLE-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; SCALABLE-NEXT: [[TMP8:%.*]] = mul <vscale x 2 x i64> [[TMP6]], splat (i64 1)
; SCALABLE-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP8]]
+; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[B:%.*]], i64 0
+; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
; SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP5]]
; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP9]], i64 0
; SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[B:%.*]], i64 0
-; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
; SCALABLE: vector.body:
; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -432,11 +432,11 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca
; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul <vscale x 2 x i64> [[TMP5]], splat (i64 1)
; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP7]]
+; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[B:%.*]], i64 0
+; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
; TF-SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP4]]
; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP8]], i64 0
; TF-SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[B:%.*]], i64 0
-; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
; TF-SCALABLE: vector.body:
; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -995,13 +995,13 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias
; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul <vscale x 2 x i64> [[TMP5]], splat (i64 1)
; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP7]]
-; TF-SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP4]]
-; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP8]], i64 0
-; TF-SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[B:%.*]], i64 0
; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0
; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+; TF-SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP4]]
+; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP8]], i64 0
+; TF-SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
; TF-SCALABLE: vector.body:
; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -1125,13 +1125,13 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc
; SCALABLE-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; SCALABLE-NEXT: [[TMP8:%.*]] = mul <vscale x 2 x i64> [[TMP6]], splat (i64 1)
; SCALABLE-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP8]]
-; SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP5]]
-; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP9]], i64 0
-; SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0
; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[B:%.*]], i64 0
; SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
+; SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP5]]
+; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP9]], i64 0
+; SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
; SCALABLE: vector.body:
; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -1231,13 +1231,13 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc
; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul <vscale x 2 x i64> [[TMP5]], splat (i64 1)
; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP7]]
-; TF-SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP4]]
-; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP8]], i64 0
-; TF-SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0
; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[B:%.*]], i64 0
; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
+; TF-SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP4]]
+; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP8]], i64 0
+; TF-SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
; TF-SCALABLE: vector.body:
; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll
index fefbdb473e2ab..7045b4e03ed8c 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll
@@ -601,10 +601,10 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) {
; NO-VP-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-OUTLOOP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; NO-VP-OUTLOOP-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4
-; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0
; NO-VP-OUTLOOP-NEXT: [[TMP12:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
; NO-VP-OUTLOOP-NEXT: [[TMP14:%.*]] = mul <vscale x 4 x i32> [[TMP12]], splat (i32 1)
; NO-VP-OUTLOOP-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i32> zeroinitializer, [[TMP14]]
+; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0
; NO-VP-OUTLOOP-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP10]] to i32
; NO-VP-OUTLOOP-NEXT: [[TMP17:%.*]] = mul i32 1, [[TMP16]]
; NO-VP-OUTLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP17]], i64 0
@@ -793,10 +793,10 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) {
; NO-VP-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-OUTLOOP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; NO-VP-OUTLOOP-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4
-; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0
; NO-VP-OUTLOOP-NEXT: [[TMP12:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
; NO-VP-OUTLOOP-NEXT: [[TMP14:%.*]] = mul <vscale x 4 x i32> [[TMP12]], splat (i32 1)
; NO-VP-OUTLOOP-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i32> zeroinitializer, [[TMP14]]
+; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0
; NO-VP-OUTLOOP-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP10]] to i32
; NO-VP-OUTLOOP-NEXT: [[TMP17:%.*]] = mul i32 1, [[TMP16]]
; NO-VP-OUTLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP17]], i64 0
diff --git a/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll
index 68695a8b1282c..843afa95687c4 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll
@@ -608,8 +608,8 @@ define void @pr70590_recipe_without_underlying_instr(i64 %n, ptr noalias %dst) {
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr %dst, i64 [[TMP0]]
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[TMP19]], i32 0
; CHECK-NEXT: store <4 x i8> [[PREDPHI]], ptr [[TMP20]], align 4
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: br i1 true, label %middle.block, label %vector.body
; CHECK: middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll
index 7614be0bd3fce..3d47d8217b5f5 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll
@@ -203,22 +203,22 @@ define void @test_induction_step_needs_expansion(ptr noalias %j, ptr %k, i64 %l,
; CHECK-NEXT: [[DOTSPLAT16:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT15]], <8 x i16> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP14:%.*]] = mul <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, [[DOTSPLAT16]]
; CHECK-NEXT: [[INDUCTION17:%.*]] = add <8 x i16> [[DOTSPLAT14]], [[TMP14]]
-; CHECK-NEXT: [[TMP15:%.*]] = mul i16 [[TMP0]], 8
-; CHECK-NEXT: [[DOTSPLATINSERT18:%.*]] = insertelement <8 x i16> poison, i16 [[TMP15]], i64 0
+; CHECK-NEXT: [[DOTSPLATINSERT18:%.*]] = insertelement <8 x i16> poison, i16 [[OFF]], i64 0
; CHECK-NEXT: [[DOTSPLAT19:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT18]], <8 x i16> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT22:%.*]] = insertelement <8 x i16> poison, i16 [[OFF]], i64 0
+; CHECK-NEXT: [[TMP15:%.*]] = mul i16 [[TMP0]], 8
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT22:%.*]] = insertelement <8 x i16> poison, i16 [[TMP15]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT23:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT22]], <8 x i16> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; CHECK: vec.epilog.vector.body:
; CHECK-NEXT: [[INDEX12:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT24:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND20:%.*]] = phi <8 x i16> [ [[INDUCTION17]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT21:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX12]], 0
-; CHECK-NEXT: [[TMP17:%.*]] = sub <8 x i16> [[VEC_IND20]], [[BROADCAST_SPLAT23]]
+; CHECK-NEXT: [[TMP17:%.*]] = sub <8 x i16> [[VEC_IND20]], [[DOTSPLAT19]]
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i16, ptr [[K]], i64 [[TMP16]]
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i16, ptr [[TMP18]], i32 0
; CHECK-NEXT: store <8 x i16> [[TMP17]], ptr [[TMP19]], align 2
; CHECK-NEXT: [[INDEX_NEXT24]] = add nuw i64 [[INDEX12]], 8
-; CHECK-NEXT: [[VEC_IND_NEXT21]] = add <8 x i16> [[VEC_IND20]], [[DOTSPLAT19]]
+; CHECK-NEXT: [[VEC_IND_NEXT21]] = add <8 x i16> [[VEC_IND20]], [[BROADCAST_SPLAT23]]
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT24]], [[N_VEC5]]
; CHECK-NEXT: br i1 [[TMP20]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: vec.epilog.middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll
index b6e9dfc2322ff..769d1e3569cda 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll
@@ -418,9 +418,9 @@ define i16 @iv_and_step_trunc() {
; CHECK-NEXT: [[TMP0:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 1)
; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i64> [[TMP0]] to <2 x i16>
; CHECK-NEXT: [[TMP2]] = mul <2 x i16> [[VEC_IND1]], [[TMP1]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <2 x i16> [[VEC_IND1]], splat (i16 2)
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i16> [[TMP2]], i32 0
diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll
index b885d85a96800..576d749c22120 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll
@@ -523,8 +523,8 @@ define void @interleave_store_double_i64(ptr %dst) {
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
; CHECK-NEXT: store <4 x double> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
@@ -645,8 +645,8 @@ define void @interleave_store_i64_double_2(ptr %dst) {
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
; CHECK-NEXT: store <4 x double> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll b/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll
index 6480c0ab1099d..02d48cbda1aab 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll
@@ -71,8 +71,8 @@
; AVX: br i1 %[[InnerCond]], label %[[ForInc]], label %[[InnerLoop]]
; AVX: [[ForInc]]:
-; AVX: %[[VecIndNext]] = add <8 x i64> %[[VecInd]], splat (i64 8)
; AVX: %[[IndNext]] = add nuw i64 %[[Ind]], 8
+; AVX: %[[VecIndNext]] = add <8 x i64> %[[VecInd]], splat (i64 8)
; AVX: br i1 true, label %middle.block, label %vector.body
@arr2 = external global [8 x i32], align 16
diff --git a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll
index 6bed344d4d73b..312125608c87e 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll
@@ -183,7 +183,7 @@ define void @_Z3fn1v() #0 {
; CHECK-NEXT: [[DOTSPLAT68:%.*]] = shufflevector <8 x i64> [[DOTSPLATINSERT67]], <8 x i64> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION69:%.*]] = add <8 x i64> [[DOTSPLAT68]], <i64 0, i64 2, i64 4, i64 6, i64 8, i64 10, i64 12, i64 14>
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY51:%.*]]
-; CHECK: vec.epilog.vector.body51:
+; CHECK: vec.epilog.vector.body57:
; CHECK-NEXT: [[INDEX61:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL51]], [[VEC_EPILOG_PH42]] ], [ [[INDEX_NEXT74:%.*]], [[VEC_EPILOG_VECTOR_BODY51]] ]
; CHECK-NEXT: [[VEC_IND65:%.*]] = phi <8 x i64> [ [[INDUCTION64]], [[VEC_EPILOG_PH42]] ], [ [[VEC_IND_NEXT66:%.*]], [[VEC_EPILOG_VECTOR_BODY51]] ]
; CHECK-NEXT: [[VEC_IND70:%.*]] = phi <8 x i64> [ [[INDUCTION69]], [[VEC_EPILOG_PH42]] ], [ [[VEC_IND_NEXT71:%.*]], [[VEC_EPILOG_VECTOR_BODY51]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll b/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll
index c745b4f74786c..e10b801844985 100644
--- a/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll
+++ b/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll
@@ -50,10 +50,10 @@ define void @test(i32 %arg, i32 %L1.limit, i32 %L2.switch, i1 %c, ptr %dst) {
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[DOTSPLAT]]
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> splat (i32 1), [[TMP4]]
-; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[INDUCTION_IV_LCSSA1]], 4
-; CHECK-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i64 0
+; CHECK-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <4 x i32> poison, i32 [[L1_EXIT_VAL]], i64 0
; CHECK-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT3]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[L1_EXIT_VAL]], i64 0
+; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[INDUCTION_IV_LCSSA1]], 4
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
@@ -61,13 +61,13 @@ define void @test(i32 %arg, i32 %L1.limit, i32 %L2.switch, i1 %c, ptr %dst) {
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = sub <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[TMP7:%.*]] = sub <4 x i32> [[VEC_IND]], [[DOTSPLAT4]]
; CHECK-NEXT: [[TMP8:%.*]] = sext <4 x i32> [[TMP7]] to <4 x i64>
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[DST:%.*]], i64 [[TMP6]]
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP9]], i32 0
; CHECK-NEXT: store <4 x i64> [[TMP8]], ptr [[TMP10]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], [[DOTSPLAT4]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 12
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll
index a829decb5231e..6aa44533c4a15 100644
--- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll
@@ -248,11 +248,11 @@ define i1 @any_of_reduction_i1_epilog(i64 %N, i32 %a) {
; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[TMP0]], 4
; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF2]]
; CHECK-NEXT: [[IND_END5:%.*]] = trunc i64 [[N_VEC3]] to i32
-; CHECK-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP7]], i64 0
-; CHECK-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x i1> [[MINMAX_IDENT_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[BC_RESUME_VAL]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[DOTSPLAT]], <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT10:%.*]] = insertelement <4 x i1> poison, i1 [[TMP7]], i64 0
+; CHECK-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT10]], <4 x i1> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT13:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT14:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT13]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
index 509b8f99e4b61..553462207386b 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
@@ -892,10 +892,10 @@ define i32 @PR27246() {
; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[I_016]], 8
; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i32 [[I_016]], [[N_MOD_VF]]
; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = sub i32 [[I_016]], [[N_VEC]]
-; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[E_015]], i32 3
; UNROLL-NO-IC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[I_016]], i64 0
; UNROLL-NO-IC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; UNROLL-NO-IC-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[DOTSPLAT]], <i32 0, i32 -1, i32 -2, i32 -3>
+; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[E_015]], i32 3
; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-IC: vector.body:
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -986,10 +986,10 @@ define i32 @PR27246() {
; SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[I_016]], 4
; SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i32 [[I_016]], [[N_MOD_VF]]
; SINK-AFTER-NEXT: [[IND_END:%.*]] = sub i32 [[I_016]], [[N_VEC]]
-; SINK-AFTER-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[E_015]], i32 3
; SINK-AFTER-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[I_016]], i64 0
; SINK-AFTER-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; SINK-AFTER-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[DOTSPLAT]], <i32 0, i32 -1, i32 -2, i32 -3>
+; SINK-AFTER-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[E_015]], i32 3
; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]]
; SINK-AFTER: vector.body:
; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/float-induction.ll b/llvm/test/Transforms/LoopVectorize/float-induction.ll
index 29b29c500c46e..b94520ce5abfe 100644
--- a/llvm/test/Transforms/LoopVectorize/float-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/float-induction.ll
@@ -770,10 +770,10 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca
; VEC4_INTERL1-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT5]], <4 x float> poison, <4 x i32> zeroinitializer
; VEC4_INTERL1-NEXT: [[TMP4:%.*]] = fmul fast <4 x float> [[DOTSPLAT6]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
; VEC4_INTERL1-NEXT: [[INDUCTION:%.*]] = fadd fast <4 x float> [[DOTSPLAT]], [[TMP4]]
-; VEC4_INTERL1-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP0]], 4.000000e+00
-; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT7:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i64 0
+; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT7:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0
; VEC4_INTERL1-NEXT: [[DOTSPLAT8:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT7]], <4 x float> poison, <4 x i32> zeroinitializer
-; VEC4_INTERL1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0
+; VEC4_INTERL1-NEXT: [[TMP13:%.*]] = fmul fast float [[TMP0]], 4.000000e+00
+; VEC4_INTERL1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP13]], i64 0
; VEC4_INTERL1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
; VEC4_INTERL1-NEXT: br label [[VECTOR_BODY:%.*]]
; VEC4_INTERL1: vector.body:
@@ -782,7 +782,7 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca
; VEC4_INTERL1-NEXT: [[VEC_IND9:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT10:%.*]], [[VECTOR_BODY]] ]
; VEC4_INTERL1-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
; VEC4_INTERL1-NEXT: store <4 x float> [[VEC_IND9]], ptr [[TMP6]], align 4
-; VEC4_INTERL1-NEXT: [[TMP7:%.*]] = fadd fast <4 x float> [[VEC_IND9]], [[BROADCAST_SPLAT]]
+; VEC4_INTERL1-NEXT: [[TMP7:%.*]] = fadd fast <4 x float> [[VEC_IND9]], [[DOTSPLAT8]]
; VEC4_INTERL1-NEXT: [[TMP8:%.*]] = fadd fast <4 x float> [[VEC_IND]], splat (float -5.000000e-01)
; VEC4_INTERL1-NEXT: [[TMP9:%.*]] = fadd fast <4 x float> [[TMP8]], [[TMP7]]
; VEC4_INTERL1-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]]
@@ -791,7 +791,7 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca
; VEC4_INTERL1-NEXT: store <4 x float> [[TMP8]], ptr [[TMP11]], align 4
; VEC4_INTERL1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], splat (float -2.000000e+00)
-; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT10]] = fadd fast <4 x float> [[VEC_IND9]], [[DOTSPLAT8]]
+; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT10]] = fadd fast <4 x float> [[VEC_IND9]], [[BROADCAST_SPLAT]]
; VEC4_INTERL1-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; VEC4_INTERL1-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; VEC4_INTERL1: middle.block:
@@ -1012,10 +1012,10 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca
; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT5]], <2 x float> poison, <2 x i32> zeroinitializer
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP4:%.*]] = fmul fast <2 x float> [[DOTSPLAT6]], <float 0.000000e+00, float 1.000000e+00>
; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fadd fast <2 x float> [[DOTSPLAT]], [[TMP4]]
-; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP0]], 2.000000e+00
-; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT7:%.*]] = insertelement <2 x float> poison, float [[TMP5]], i64 0
+; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT7:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0
; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT8:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT7]], <2 x float> poison, <2 x i32> zeroinitializer
-; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0
+; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP13:%.*]] = fmul fast float [[TMP0]], 2.000000e+00
+; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP13]], i64 0
; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer
; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]]
; VEC2_INTERL1_PRED_STORE: vector.body:
@@ -1024,7 +1024,7 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca
; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND9:%.*]] = phi <2 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT10:%.*]], [[VECTOR_BODY]] ]
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[VEC_IND9]], ptr [[TMP6]], align 4
-; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP7:%.*]] = fadd fast <2 x float> [[VEC_IND9]], [[BROADCAST_SPLAT]]
+; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP7:%.*]] = fadd fast <2 x float> [[VEC_IND9]], [[DOTSPLAT8]]
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP8:%.*]] = fadd fast <2 x float> [[VEC_IND]], splat (float -5.000000e-01)
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP9:%.*]] = fadd fast <2 x float> [[TMP8]], [[TMP7]]
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]]
@@ -1033,7 +1033,7 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca
; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[TMP8]], ptr [[TMP11]], align 4
; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], splat (float -1.000000e+00)
-; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT10]] = fadd fast <2 x float> [[VEC_IND9]], [[DOTSPLAT8]]
+; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT10]] = fadd fast <2 x float> [[VEC_IND9]], [[BROADCAST_SPLAT]]
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; VEC2_INTERL1_PRED_STORE: middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/induction-step.ll b/llvm/test/Transforms/LoopVectorize/induction-step.ll
index 29c8d1f36ea59..80a4754a0b63a 100644
--- a/llvm/test/Transforms/LoopVectorize/induction-step.ll
+++ b/llvm/test/Transforms/LoopVectorize/induction-step.ll
@@ -350,22 +350,22 @@ define void @wide_add_induction_step_live_in(ptr %dst, i64 %N, i16 %off) {
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = mul <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, [[DOTSPLAT]]
; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i16> zeroinitializer, [[TMP1]]
-; CHECK-NEXT: [[TMP2:%.*]] = mul i16 [[O_1]], 8
-; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <8 x i16> poison, i16 [[TMP2]], i64 0
+; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <8 x i16> poison, i16 [[O_1]], i64 0
; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT1]], <8 x i16> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[O_1]], i64 0
+; CHECK-NEXT: [[TMP2:%.*]] = mul i16 [[O_1]], 8
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[TMP2]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i16> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = add <8 x i16> [[VEC_IND]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[TMP4:%.*]] = add <8 x i16> [[VEC_IND]], [[DOTSPLAT2]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0
; CHECK-NEXT: store <8 x i16> [[TMP4]], ptr [[TMP6]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i16> [[VEC_IND]], [[DOTSPLAT2]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i16> [[VEC_IND]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
@@ -422,22 +422,22 @@ define void @wide_sub_induction_step_live_in(ptr %dst, i64 %N, i16 %off) {
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, [[DOTSPLAT]]
; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i16> zeroinitializer, [[TMP2]]
-; CHECK-NEXT: [[TMP3:%.*]] = mul i16 [[TMP0]], 8
-; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <8 x i16> poison, i16 [[TMP3]], i64 0
+; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <8 x i16> poison, i16 [[O_1]], i64 0
; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT1]], <8 x i16> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[O_1]], i64 0
+; CHECK-NEXT: [[TMP3:%.*]] = mul i16 [[TMP0]], 8
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[TMP3]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i16> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = sub <8 x i16> [[VEC_IND]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[TMP5:%.*]] = sub <8 x i16> [[VEC_IND]], [[DOTSPLAT2]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i32 0
; CHECK-NEXT: store <8 x i16> [[TMP5]], ptr [[TMP7]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i16> [[VEC_IND]], [[DOTSPLAT2]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i16> [[VEC_IND]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll
index f6ca9ff2e7b2e..d2d123af4b8f7 100644
--- a/llvm/test/Transforms/LoopVectorize/induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/induction.ll
@@ -6367,12 +6367,12 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n
; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; UNROLL-NO-IC-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32
; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = mul i32 [[DOTCAST]], [[STEP]]
-; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP]], i64 0
-; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
-; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = mul <2 x i32> splat (i32 2), [[BROADCAST_SPLAT]]
; UNROLL-NO-IC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP]], i64 0
; UNROLL-NO-IC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
-; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = mul <2 x i32> <i32 0, i32 1>, [[DOTSPLAT]]
+; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = mul <2 x i32> splat (i32 2), [[DOTSPLAT]]
+; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP]], i64 0
+; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
+; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = mul <2 x i32> <i32 0, i32 1>, [[BROADCAST_SPLAT]]
; UNROLL-NO-IC-NEXT: [[INDUCTION:%.*]] = add <2 x i32> zeroinitializer, [[TMP18]]
; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-IC: vector.body:
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll
index 8e132ed8399cd..2fb840af63542 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll
@@ -116,7 +116,7 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) {
; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1
; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
-; CHECK: pred.load.if3:
+; CHECK: pred.load.if2:
; CHECK-NEXT: [[TMP11:%.*]] = or disjoint i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]]
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
@@ -125,12 +125,12 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) {
; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]]
-; CHECK: pred.load.continue4:
+; CHECK: pred.load.continue3:
; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF3]] ]
; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF3]] ]
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2
; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
-; CHECK: pred.load.if5:
+; CHECK: pred.load.if4:
; CHECK-NEXT: [[TMP21:%.*]] = or disjoint i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP21]]
; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
@@ -139,12 +139,12 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) {
; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]]
-; CHECK: pred.load.continue6:
+; CHECK: pred.load.continue5:
; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], [[PRED_LOAD_IF5]] ]
; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP27]], [[PRED_LOAD_IF5]] ]
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3
; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8]]
-; CHECK: pred.load.if7:
+; CHECK: pred.load.if6:
; CHECK-NEXT: [[TMP31:%.*]] = or disjoint i64 [[INDEX]], 3
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP31]]
; CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
@@ -153,7 +153,7 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) {
; CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE8]]
-; CHECK: pred.load.continue8:
+; CHECK: pred.load.continue7:
; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP34]], [[PRED_LOAD_IF7]] ]
; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP37]], [[PRED_LOAD_IF7]] ]
; CHECK-NEXT: [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[VEC_IND1]], <4 x i32> zeroinitializer
@@ -321,7 +321,7 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) {
; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1
; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
-; CHECK: pred.load.if3:
+; CHECK: pred.load.if2:
; CHECK-NEXT: [[TMP11:%.*]] = or disjoint i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]]
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
@@ -330,12 +330,12 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) {
; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]]
-; CHECK: pred.load.continue4:
+; CHECK: pred.load.continue3:
; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF3]] ]
; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF3]] ]
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2
; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
-; CHECK: pred.load.if5:
+; CHECK: pred.load.if4:
; CHECK-NEXT: [[TMP21:%.*]] = or disjoint i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP21]]
; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
@@ -344,12 +344,12 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) {
; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]]
-; CHECK: pred.load.continue6:
+; CHECK: pred.load.continue5:
; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], [[PRED_LOAD_IF5]] ]
; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP27]], [[PRED_LOAD_IF5]] ]
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3
; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8]]
-; CHECK: pred.load.if7:
+; CHECK: pred.load.if6:
; CHECK-NEXT: [[TMP31:%.*]] = or disjoint i64 [[INDEX]], 3
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP31]]
; CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
@@ -358,7 +358,7 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) {
; CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE8]]
-; CHECK: pred.load.continue8:
+; CHECK: pred.load.continue7:
; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP34]], [[PRED_LOAD_IF7]] ]
; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP37]], [[PRED_LOAD_IF7]] ]
; CHECK-NEXT: [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[VEC_IND1]], <4 x i32> splat (i32 1)
@@ -436,7 +436,7 @@ define i32 @reduction_mix(ptr noalias nocapture %A, ptr noalias nocapture %B) {
; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1
; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
-; CHECK: pred.load.if3:
+; CHECK: pred.load.if2:
; CHECK-NEXT: [[TMP11:%.*]] = or disjoint i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]]
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
@@ -445,12 +445,12 @@ define i32 @reduction_mix(ptr noalias nocapture %A, ptr noalias nocapture %B) {
; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]]
-; CHECK: pred.load.continue4:
+; CHECK: pred.load.continue3:
; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF3]] ]
; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF3]] ]
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2
; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
-; CHECK: pred.load.if5:
+; CHECK: pred.load.if4:
; CHECK-NEXT: [[TMP21:%.*]] = or disjoint i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP21]]
; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
@@ -459,12 +459,12 @@ define i32 @reduction_mix(ptr noalias nocapture %A, ptr noalias nocapture %B) {
; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]]
-; CHECK: pred.load.continue6:
+; CHECK: pred.load.continue5:
; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], [[PRED_LOAD_IF5]] ]
; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP27]], [[PRED_LOAD_IF5]] ]
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3
; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8]]
-; CHECK: pred.load.if7:
+; CHECK: pred.load.if6:
; CHECK-NEXT: [[TMP31:%.*]] = or disjoint i64 [[INDEX]], 3
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP31]]
; CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
@@ -473,7 +473,7 @@ define i32 @reduction_mix(ptr noalias nocapture %A, ptr noalias nocapture %B) {
; CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE8]]
-; CHECK: pred.load.continue8:
+; CHECK: pred.load.continue7:
; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP34]], [[PRED_LOAD_IF7]] ]
; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP37]], [[PRED_LOAD_IF7]] ]
; CHECK-NEXT: [[TMP40:%.*]] = mul nsw <4 x i32> [[TMP39]], [[TMP38]]
diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll
index e24c6090b704b..561dd2cf7b11e 100644
--- a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll
+++ b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll
@@ -29,8 +29,8 @@ define i64 @same_exit_block_pre_inc_use1() {
; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true)
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.split:
; CHECK-NEXT: br i1 [[TMP7]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
@@ -114,8 +114,8 @@ define i64 @same_exit_block_pre_inc1_use_inv_cond(i1 %cond) {
; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true)
; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP7]])
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.split:
; CHECK-NEXT: br i1 [[TMP8]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
@@ -199,8 +199,8 @@ define i64 @same_exit_block_pre_inc_use1_gep_two_indices() {
; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true)
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: middle.split:
; CHECK-NEXT: br i1 [[TMP7]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
@@ -282,8 +282,8 @@ define i64 @same_exit_block_pre_inc_use1_alloca_diff_type() {
; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true)
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: middle.split:
; CHECK-NEXT: br i1 [[TMP7]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
@@ -365,8 +365,8 @@ define i64 @same_exit_block_pre_inc_use2() {
; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true)
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK: middle.split:
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 3
@@ -447,8 +447,8 @@ define i64 @same_exit_block_pre_inc_use3() {
; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true)
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK: middle.split:
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 3
@@ -529,8 +529,8 @@ define i64 @same_exit_block_pre_inc_use4() {
; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true)
; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]])
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 64
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
; CHECK: middle.split:
; CHECK-NEXT: br i1 [[TMP5]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
@@ -615,8 +615,8 @@ define i64 @same_exit_block_post_inc_use() {
; CHECK-NEXT: [[TMP13:%.*]] = xor <4 x i1> [[TMP8]], splat (i1 true)
; CHECK-NEXT: [[TMP14:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP13]])
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP16:%.*]] = or i1 [[TMP14]], [[TMP15]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
; CHECK: middle.split:
; CHECK-NEXT: br i1 [[TMP14]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
@@ -709,8 +709,8 @@ define i64 @same_exit_block_post_inc_use2() {
; CHECK-NEXT: [[TMP17:%.*]] = xor <4 x i1> [[TMP16]], splat (i1 true)
; CHECK-NEXT: [[TMP18:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP17]])
; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP20:%.*]] = or i1 [[TMP18]], [[TMP19]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; CHECK: middle.split:
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 3
@@ -793,8 +793,8 @@ define i64 @diff_exit_block_pre_inc_use1() {
; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true)
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
; CHECK: middle.split:
; CHECK-NEXT: br i1 [[TMP7]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
@@ -883,8 +883,8 @@ define i64 @diff_exit_block_pre_inc_use2() {
; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true)
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
; CHECK: middle.split:
; CHECK-NEXT: br i1 [[TMP7]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
@@ -972,8 +972,8 @@ define i64 @diff_exit_block_pre_inc_use3() {
; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true)
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT4]], 64
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
; CHECK: middle.split:
; CHECK-NEXT: br i1 [[TMP7]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
@@ -1068,8 +1068,8 @@ define i64 @diff_exit_block_post_inc_use1() {
; CHECK-NEXT: [[TMP13:%.*]] = xor <4 x i1> [[TMP8]], splat (i1 true)
; CHECK-NEXT: [[TMP14:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP13]])
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP16:%.*]] = or i1 [[TMP14]], [[TMP15]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
; CHECK: middle.split:
; CHECK-NEXT: br i1 [[TMP14]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
@@ -1169,8 +1169,8 @@ define i64 @diff_exit_block_post_inc_use2() {
; CHECK-NEXT: [[TMP17:%.*]] = xor <4 x i1> [[TMP16]], splat (i1 true)
; CHECK-NEXT: [[TMP18:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP17]])
; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP20:%.*]] = or i1 [[TMP18]], [[TMP19]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
; CHECK: middle.split:
; CHECK-NEXT: br i1 [[TMP18]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
@@ -1258,8 +1258,8 @@ define i64 @loop_contains_safe_call() {
; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true)
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 64
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
; CHECK: middle.split:
; CHECK-NEXT: br i1 [[TMP6]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
@@ -1337,8 +1337,8 @@ define i64 @loop_contains_safe_div() {
; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true)
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 64
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
; CHECK: middle.split:
; CHECK-NEXT: br i1 [[TMP6]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
@@ -1417,8 +1417,8 @@ define i64 @loop_contains_load_after_early_exit(ptr dereferenceable(1024) align(
; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true)
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]]
; CHECK: middle.split:
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[WIDE_LOAD2]], i32 3
@@ -1503,8 +1503,8 @@ define i64 @same_exit_block_pre_inc_use1_reverse() {
; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true)
; CHECK-NEXT: [[TMP9:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP8]])
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT4]], 1020
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 -4)
; CHECK-NEXT: [[TMP11:%.*]] = or i1 [[TMP9]], [[TMP10]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 -4)
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
; CHECK: middle.split:
; CHECK-NEXT: br i1 [[TMP9]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
@@ -1583,8 +1583,8 @@ define i64 @same_exit_block_pre_inc_use1_deref_ptrs(ptr dereferenceable(1024) %p
; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true)
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
; CHECK: middle.split:
; CHECK-NEXT: br i1 [[TMP7]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
>From 6fc4a7c59bf4199906ddae52ac8a3bde0290191f Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Fri, 13 Dec 2024 01:06:37 +0800
Subject: [PATCH 02/16] Remove initial and backedge recipes, expand out to
regular VPInstructions + splat and step vector recipes
---
.../Vectorize/LoopVectorizationPlanner.h | 8 +
.../Transforms/Vectorize/LoopVectorize.cpp | 3 +-
llvm/lib/Transforms/Vectorize/VPlan.h | 182 +++++++----------
.../Transforms/Vectorize/VPlanAnalysis.cpp | 3 +-
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 183 ++++--------------
.../Transforms/Vectorize/VPlanTransforms.cpp | 101 +++++++---
.../Transforms/Vectorize/VPlanTransforms.h | 2 +-
llvm/lib/Transforms/Vectorize/VPlanValue.h | 4 +-
.../AArch64/clamped-trip-count.ll | 8 +-
.../AArch64/divs-with-scalable-vfs.ll | 14 +-
.../AArch64/scalable-avoid-scalarization.ll | 4 +-
.../AArch64/sve-interleaved-accesses.ll | 48 ++---
.../sve-interleaved-masked-accesses.ll | 32 +--
.../RISCV/blocks-with-dead-instructions.ll | 36 ++--
.../LoopVectorize/RISCV/dead-ops-cost.ll | 4 +-
.../LoopVectorize/RISCV/induction-costs.ll | 4 +-
.../LoopVectorize/RISCV/mask-index-type.ll | 4 +-
...ruction-or-drop-poison-generating-flags.ll | 6 +-
.../LoopVectorize/RISCV/strided-accesses.ll | 10 +-
.../LoopVectorize/RISCV/uniform-load-store.ll | 58 +++---
...rize-force-tail-with-evl-cond-reduction.ll | 4 +-
.../X86/epilog-vectorization-inductions.ll | 18 +-
.../LoopVectorize/X86/induction-costs.ll | 12 +-
.../LoopVectorize/create-induction-resume.ll | 10 +-
.../LoopVectorize/float-induction.ll | 92 ++++-----
.../LoopVectorize/induction-step.ll | 34 ++--
.../LoopVectorize/iv_outside_user.ll | 12 +-
.../optimal-epilog-vectorization.ll | 16 +-
28 files changed, 404 insertions(+), 508 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index ed3e45dd2c6c8..7f1ec987ad038 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -265,6 +265,14 @@ class VPBuilder {
FPBinOp ? FPBinOp->getFastMathFlags() : FastMathFlags()));
}
+ VPSplatRecipe *createSplat(VPValue *Val) {
+ return tryInsertInstruction(new VPSplatRecipe(Val));
+ }
+
+ VPStepVectorRecipe *createStepVector(Type *Ty) {
+ return tryInsertInstruction(new VPStepVectorRecipe(Ty));
+ }
+
//===--------------------------------------------------------------------===//
// RAII helpers.
//===--------------------------------------------------------------------===//
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 226fc23888f02..b8b958ddaa9ae 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7705,7 +7705,8 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
VPlanTransforms::optimizeForVFAndUF(BestVPlan, BestVF, BestUF, PSE);
VPlanTransforms::simplifyRecipes(BestVPlan, *Legal->getWidestInductionType());
VPlanTransforms::removeDeadRecipes(BestVPlan);
- VPlanTransforms::convertToConcreteRecipes(BestVPlan);
+ VPlanTransforms::convertToConcreteRecipes(BestVPlan,
+ Legal->getWidestInductionType());
// Perform the actual loop transformation.
VPTransformState State(&TTI, BestVF, BestUF, LI, DT, ILV.Builder, &ILV,
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index e41ee6ce18656..bd860c177e6ec 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1236,6 +1236,74 @@ class VPScalarCastRecipe : public VPSingleDefRecipe {
}
};
+/// A for splatting a scalar value to a vector.
+class VPSplatRecipe : public VPSingleDefRecipe {
+public:
+ VPSplatRecipe(VPValue *Op) : VPSingleDefRecipe(VPDef::VPSplatSC, {Op}) {}
+
+ ~VPSplatRecipe() override = default;
+
+ VPSplatRecipe *clone() override { return new VPSplatRecipe(getOperand(0)); }
+
+ VP_CLASSOF_IMPL(VPDef::VPSplatSC)
+
+ void execute(VPTransformState &State) override;
+
+ /// Return the cost of this VPSplatRecipe.
+ InstructionCost computeCost(ElementCount VF,
+ VPCostContext &Ctx) const override {
+ // TODO: Compute accurate cost after retiring the legacy cost model.
+ return 0;
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const override;
+#endif
+
+ /// Returns true if the recipe only uses the first lane of operand \p Op.
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ return true;
+ }
+};
+
+/// A recipe for generating a step vector.
+class VPStepVectorRecipe : public VPSingleDefRecipe {
+ /// Scalar return type of the intrinsic.
+ Type *ScalarTy;
+
+public:
+ VPStepVectorRecipe(Type *Ty)
+ : VPSingleDefRecipe(VPDef::VPStepVectorSC, {}), ScalarTy(Ty) {}
+
+ ~VPStepVectorRecipe() override = default;
+
+ VPStepVectorRecipe *clone() override {
+ return new VPStepVectorRecipe(ScalarTy);
+ }
+
+ VP_CLASSOF_IMPL(VPDef::VPStepVectorSC)
+
+ void execute(VPTransformState &State) override;
+
+ /// Return the cost of this VPStepVectorRecipe.
+ InstructionCost computeCost(ElementCount VF,
+ VPCostContext &Ctx) const override {
+ // TODO: Compute accurate cost after retiring the legacy cost model.
+ return 0;
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const override;
+#endif
+
+ /// Return the scalar return type of the intrinsic.
+ Type *getScalarType() const { return ScalarTy; }
+};
+
/// A recipe for widening vector intrinsics.
class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags {
/// ID of the vector intrinsic to widen.
@@ -1820,59 +1888,10 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe {
}
/// Returns the VPValue representing the value of this induction at
- /// the last unrolled part, if it exists. Returns nullptr if unrolling did not
+ /// the last unrolled part, if it exists. Returns itself if unrolling did not
/// take place.
VPValue *getLastUnrolledPartOperand() {
- return getNumOperands() == 5 ? getOperand(4) : nullptr;
- }
-};
-
-/// A recipe to compute the initial value for a widened IV, expanded from
-/// VPWidenIntOrFpInductionRecipe.
-class VPWidenIntOrFpInductionInitialRecipe : public VPSingleDefRecipe {
- Instruction *IV;
- const InductionDescriptor &ID;
-
-public:
- VPWidenIntOrFpInductionInitialRecipe(Instruction *IV, VPValue *Start,
- VPValue *Step,
- const InductionDescriptor &ID)
- : VPSingleDefRecipe(VPDef::VPWidenIntOrFpInductionStartSC, {Start, Step}),
- IV(IV), ID(ID) {
- assert((isa<PHINode>(IV) || isa<TruncInst>(IV)) &&
- "Expected either an induction phi-node or a truncate of it!");
- }
-
- ~VPWidenIntOrFpInductionInitialRecipe() override = default;
-
- VPWidenIntOrFpInductionInitialRecipe *clone() override {
- return new VPWidenIntOrFpInductionInitialRecipe(IV, getOperand(0),
- getOperand(1), ID);
- }
-
- VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionStartSC)
-
- void execute(VPTransformState &State) override;
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- /// Print the recipe.
- void print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const override;
-#endif
-
- VPValue *getStartValue() { return getOperand(0); }
- const VPValue *getStartValue() const { return getOperand(0); }
-
- VPValue *getStepValue() { return getOperand(1); }
- const VPValue *getStepValue() const { return getOperand(1); }
-
- /// Returns the scalar type of the induction.
- Type *getScalarType() const { return IV->getType(); }
-
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
- assert(is_contained(operands(), Op) &&
- "Op must be an operand of the recipe");
- return true;
+ return getNumOperands() == 5 ? getOperand(4) : this;
}
};
@@ -1908,67 +1927,6 @@ class VPWidenIntOrFpInductionPHIRecipe : public VPHeaderPHIRecipe {
#endif
};
-/// A recipe to compute the backedge value for a widened IV, expanded from
-/// VPWidenIntOrFpInductionRecipe.
-class VPWidenIntOrFpInductionBackedgeRecipe : public VPSingleDefRecipe {
- Instruction *IV;
- const InductionDescriptor &ID;
-
-public:
- VPWidenIntOrFpInductionBackedgeRecipe(Instruction *IV, VPValue *Step,
- VPValue *VF, VPValue *Prev,
- VPValue *SplatVF,
- const InductionDescriptor &ID)
- : VPSingleDefRecipe(VPDef::VPWidenIntOrFpInductionSC, {Step, VF, Prev}),
- IV(IV), ID(ID) {
- assert((isa<PHINode>(IV) || isa<TruncInst>(IV)) &&
- "Expected either an induction phi-node or a truncate of it!");
- if (SplatVF)
- addOperand(SplatVF);
- }
-
- ~VPWidenIntOrFpInductionBackedgeRecipe() override = default;
-
- VPWidenIntOrFpInductionBackedgeRecipe *clone() override {
- return new VPWidenIntOrFpInductionBackedgeRecipe(
- IV, getOperand(0), getOperand(1), getOperand(2), getOperand(3), ID);
- }
-
- VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionIncSC)
-
- void execute(VPTransformState &State) override;
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- /// Print the recipe.
- void print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const override;
-#endif
-
- VPValue *getStepValue() { return getOperand(0); }
- const VPValue *getStepValue() const { return getOperand(0); }
-
- VPValue *getVFValue() { return getOperand(1); }
- const VPValue *getVFValue() const { return getOperand(1); }
-
- VPValue *getPrevValue() { return getOperand(2); }
- const VPValue *getPrevValue() const { return getOperand(2); }
-
- VPValue *getSplatVFValue() {
- // If the recipe has been unrolled (4 operands), return the VPValue for the
- // induction increment.
- return getNumOperands() == 4 ? getOperand(3) : nullptr;
- }
-
- /// Returns the scalar type of the induction.
- Type *getScalarType() const { return IV->getType(); }
-
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
- assert(is_contained(operands(), Op) &&
- "Op must be an operand of the recipe");
- return Op == getOperand(0) || Op == getOperand(1);
- }
-};
-
class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe,
public VPUnrollPartAccessor<3> {
bool IsScalarAfterVectorization;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index ef0f12524b5a7..6108f2f4e4513 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -245,8 +245,7 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
return inferScalarType(R->getStartValue());
})
.Case<VPWidenIntOrFpInductionRecipe, VPDerivedIVRecipe,
- VPWidenIntOrFpInductionInitialRecipe,
- VPWidenIntOrFpInductionBackedgeRecipe>(
+ VPStepVectorRecipe>(
[](const auto *R) { return R->getScalarType(); })
.Case<VPReductionRecipe, VPPredInstPHIRecipe, VPWidenPHIRecipe,
VPScalarIVStepsRecipe, VPWidenGEPRecipe, VPVectorPointerRecipe,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 82cfe1774f678..1056e3d68f191 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -1696,54 +1696,6 @@ InstructionCost VPHeaderPHIRecipe::computeCost(ElementCount VF,
return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);
}
-/// This function adds
-/// (0 * Step, 1 * Step, 2 * Step, ...)
-/// to each vector element of Val.
-/// \p Opcode is relevant for FP induction variable.
-static Value *getStepVector(Value *Val, Value *Step,
- Instruction::BinaryOps BinOp, ElementCount VF,
- IRBuilderBase &Builder) {
- assert(VF.isVector() && "only vector VFs are supported");
-
- // Create and check the types.
- auto *ValVTy = cast<VectorType>(Val->getType());
- ElementCount VLen = ValVTy->getElementCount();
-
- Type *STy = Val->getType()->getScalarType();
- assert((STy->isIntegerTy() || STy->isFloatingPointTy()) &&
- "Induction Step must be an integer or FP");
- assert(Step->getType() == STy && "Step has wrong type");
-
- SmallVector<Constant *, 8> Indices;
-
- // Create a vector of consecutive numbers from zero to VF.
- VectorType *InitVecValVTy = ValVTy;
- if (STy->isFloatingPointTy()) {
- Type *InitVecValSTy =
- IntegerType::get(STy->getContext(), STy->getScalarSizeInBits());
- InitVecValVTy = VectorType::get(InitVecValSTy, VLen);
- }
- Value *InitVec = Builder.CreateStepVector(InitVecValVTy);
-
- if (STy->isIntegerTy()) {
- Step = Builder.CreateVectorSplat(VLen, Step);
- assert(Step->getType() == Val->getType() && "Invalid step vec");
- // FIXME: The newly created binary instructions should contain nsw/nuw
- // flags, which can be found from the original scalar operations.
- Step = Builder.CreateMul(InitVec, Step);
- return Builder.CreateAdd(Val, Step, "induction");
- }
-
- // Floating point induction.
- assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) &&
- "Binary Opcode should be specified for FP induction");
- InitVec = Builder.CreateUIToFP(InitVec, ValVTy);
-
- Step = Builder.CreateVectorSplat(VLen, Step);
- Value *MulOp = Builder.CreateFMul(InitVec, Step);
- return Builder.CreateBinOp(BinOp, Val, MulOp, "induction");
-}
-
/// A helper function that returns an integer or floating-point constant with
/// value C.
static Constant *getSignedIntOrFpConstant(Type *Ty, int64_t C) {
@@ -1751,38 +1703,6 @@ static Constant *getSignedIntOrFpConstant(Type *Ty, int64_t C) {
: ConstantFP::get(Ty, C);
}
-void VPWidenIntOrFpInductionInitialRecipe::execute(VPTransformState &State) {
- assert(!State.Lane && "Int or FP induction being replicated.");
-
- Value *Start = State.get(getStartValue(), true);
- IRBuilderBase &Builder = State.Builder;
- assert(State.VF.isVector() && "must have vector VF");
-
- // Fast-math-flags propagate from the original induction instruction.
- IRBuilder<>::FastMathFlagGuard FMFG(Builder);
- if (isa_and_nonnull<FPMathOperator>(ID.getInductionBinOp()))
- Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags());
-
- // Now do the actual transformations, and start with fetching the step value.
- Value *Step = State.get(getStepValue(), VPLane(0));
-
- // Construct the initial value of the vector IV
- Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start);
- Value *SteppedStart = getStepVector(SplatStart, Step, ID.getInductionOpcode(),
- State.VF, State.Builder);
- State.set(this, SteppedStart);
-}
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void VPWidenIntOrFpInductionInitialRecipe::print(
- raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const {
- O << Indent;
- printAsOperand(O, SlotTracker);
- O << " = WIDEN-INDUCTION-START ";
- printOperands(O, SlotTracker);
-}
-#endif
-
void VPWidenIntOrFpInductionPHIRecipe::execute(VPTransformState &State) {
BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
@@ -1804,74 +1724,6 @@ void VPWidenIntOrFpInductionPHIRecipe::print(raw_ostream &O,
}
#endif
-void VPWidenIntOrFpInductionBackedgeRecipe::execute(VPTransformState &State) {
- IRBuilderBase &Builder = State.Builder;
-
- // Fast-math-flags propagate from the original induction instruction.
- IRBuilder<>::FastMathFlagGuard FMFG(Builder);
- if (isa_and_nonnull<FPMathOperator>(ID.getInductionBinOp()))
- Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags());
-
- Value *Step = State.get(getStepValue(), VPLane(0));
-
- auto CurrIP = Builder.saveIP();
- BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
- Builder.SetInsertPoint(VectorPH->getTerminator());
-
- // We create vector phi nodes for both integer and floating-point induction
- // variables. Here, we determine the kind of arithmetic we will perform.
- Instruction::BinaryOps AddOp;
- Instruction::BinaryOps MulOp;
- if (Step->getType()->isIntegerTy()) {
- AddOp = Instruction::Add;
- MulOp = Instruction::Mul;
- } else {
- AddOp = ID.getInductionOpcode();
- MulOp = Instruction::FMul;
- }
-
- Value *SplatVF;
- if (VPValue *SplatVFOperand = getSplatVFValue()) {
- // The recipe has been unrolled. In that case, fetch the splat value for the
- // induction increment.
- SplatVF = State.get(SplatVFOperand);
- } else {
- // Multiply the vectorization factor by the step using integer or
- // floating-point arithmetic as appropriate.
- Type *StepType = Step->getType();
- Value *RuntimeVF = State.get(getVFValue(), VPLane(0));
- if (Step->getType()->isFloatingPointTy())
- RuntimeVF = Builder.CreateUIToFP(RuntimeVF, StepType);
- else
- RuntimeVF = Builder.CreateZExtOrTrunc(RuntimeVF, StepType);
- Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF);
-
- // Create a vector splat to use in the induction update.
- SplatVF = Builder.CreateVectorSplat(State.VF, Mul);
- }
-
- Builder.restoreIP(CurrIP);
- Value *PrevVal = State.get(getPrevValue());
-
- Instruction *LastInduction = cast<Instruction>(
- Builder.CreateBinOp(AddOp, PrevVal, SplatVF, "vec.ind.next"));
- if (isa<TruncInst>(IV))
- State.addMetadata(LastInduction, IV);
- LastInduction->setDebugLoc(IV->getDebugLoc());
-
- State.set(this, LastInduction);
-}
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void VPWidenIntOrFpInductionBackedgeRecipe::print(
- raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const {
- O << Indent;
- printAsOperand(O, SlotTracker);
- O << " = WIDEN-INDUCTION-INC ";
- printOperands(O, SlotTracker);
-}
-#endif
-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
@@ -2456,8 +2308,9 @@ Value *VPScalarCastRecipe ::generate(VPTransformState &State) {
switch (Opcode) {
case Instruction::SExt:
case Instruction::ZExt:
- case Instruction::Trunc: {
- // Note: SExt/ZExt not used yet.
+ case Instruction::Trunc:
+ case Instruction::UIToFP: {
+ // Note: SExt not used yet.
Value *Op = State.get(getOperand(0), VPLane(0));
return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy);
}
@@ -2481,6 +2334,36 @@ void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent,
}
#endif
+void VPSplatRecipe::execute(VPTransformState &State) {
+ Value *Splat =
+ State.Builder.CreateVectorSplat(State.VF, State.get(getOperand(0), true));
+ State.set(this, Splat);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void VPSplatRecipe::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << Indent;
+ printAsOperand(O, SlotTracker);
+ O << " = SPLAT ";
+ printOperands(O, SlotTracker);
+}
+#endif
+
+void VPStepVectorRecipe::execute(VPTransformState &State) {
+ VectorType *Ty = VectorType::get(ScalarTy, State.VF);
+ State.set(this, State.Builder.CreateStepVector(Ty));
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void VPStepVectorRecipe::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << Indent;
+ printAsOperand(O, SlotTracker);
+ O << " = STEP-VECTOR";
+}
+#endif
+
void VPBranchOnMaskRecipe::execute(VPTransformState &State) {
assert(State.Lane && "Branch on Mask works only on single instance.");
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 950eb5a2a01b3..4945288e058fb 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2013,7 +2013,7 @@ void VPlanTransforms::createInterleaveGroups(
}
}
-/// Expand a VPWidenIntOrFpInduction into separate recipes for the initial
+/// Expand a VPWidenIntOrFpInduction into executable recipes. for the initial
/// value, phi and backedge value. In the followng example:
///
/// vector.ph:
@@ -2021,79 +2021,126 @@ void VPlanTransforms::createInterleaveGroups(
///
/// <x1> vector loop: {
/// vector.body:
-/// WIDEN-INDUCTION %i = phi %bc.resume.val, %i.next, ir<1>, ir<%5>
+/// WIDEN-INDUCTION %i = phi %start, %step, %vf
/// ...
-/// EMIT branch-on-count vp<%index.next>, ir<%n.vec>
+/// EMIT branch-on-count ...
/// No successors
/// }
///
/// WIDEN-INDUCTION will get expanded to:
///
/// vector.ph:
-/// vp<%0> = WIDEN-INDUCTION-START ir<0>, ir<1>
+/// ...
+/// vp<%induction> = ...
+/// vp<%inc> = ...
+///
/// Successor(s): vector loop
///
/// <x1> vector loop: {
/// vector.body:
-/// ir<%i> = WIDEN-INDUCTION-PHI vp<%0>, vp<%4>
+/// ir<%i> = WIDEN-INDUCTION-PHI vp<%induction>, vp<%vec.ind.next>
/// ...
-/// vp<%4> = WIDEN-INDUCTION-INC ir<1>, ir<%5>, ir<%i>
-/// EMIT branch-on-count vp<%index.next>, ir<%n.vec>
+/// vp<%vec.ind.next> = add ir<%i>, vp<%inc>
+/// EMIT branch-on-count ...
/// No successors
/// }
static void
-expandVPWidenIntOrFpInduction(VPWidenIntOrFpInductionRecipe *WidenIVR) {
+expandVPWidenIntOrFpInduction(VPWidenIntOrFpInductionRecipe *WidenIVR,
+ VPTypeAnalysis &TypeInfo) {
VPlan *Plan = WidenIVR->getParent()->getPlan();
VPValue *Start = WidenIVR->getStartValue();
VPValue *Step = WidenIVR->getStepValue();
VPValue *VF = WidenIVR->getVFValue();
- const InductionDescriptor &ID = WidenIVR->getInductionDescriptor();
TruncInst *Trunc = WidenIVR->getTruncInst();
DebugLoc DL = WidenIVR->getDebugLoc();
// The value from the original loop to which we are mapping the new induction
// variable.
Instruction *IV = Trunc ? cast<Instruction>(Trunc) : WidenIVR->getPHINode();
+ Type *Ty = IV->getType();
+
+ const InductionDescriptor &ID = WidenIVR->getInductionDescriptor();
+ Instruction::BinaryOps AddOp;
+ Instruction::BinaryOps MulOp;
+ std::optional<FastMathFlags> FMFs;
+ if (ID.getKind() == InductionDescriptor::IK_IntInduction) {
+ AddOp = Instruction::Add;
+ MulOp = Instruction::Mul;
+ } else {
+ AddOp = ID.getInductionOpcode();
+ MulOp = Instruction::FMul;
+ FMFs = ID.getInductionBinOp()->getFastMathFlags();
+ }
// If the phi is truncated, truncate the start and step values.
VPBuilder Builder(Plan->getVectorPreheader());
if (isa<TruncInst>(IV)) {
assert(Start->getUnderlyingValue()->getType()->isIntegerTy() &&
"Truncation requires an integer type");
- auto *TruncType = cast<IntegerType>(IV->getType());
- Step = Builder.createScalarCast(Instruction::Trunc, Step, TruncType, DL);
- Start = Builder.createScalarCast(Instruction::Trunc, Start, TruncType, DL);
+ Step = Builder.createScalarCast(Instruction::Trunc, Step, Ty, DL);
+ Start = Builder.createScalarCast(Instruction::Trunc, Start, Ty, DL);
}
// Construct the initial value of the vector IV in the vector loop preheader.
- auto *StartR = new VPWidenIntOrFpInductionInitialRecipe(IV, Start, Step, ID);
- Plan->getVectorPreheader()->insert(StartR, Builder.getInsertPoint());
+ Type *IVIntTy = IntegerType::get(IV->getContext(), Ty->getScalarSizeInBits());
+ VPValue *Init = Builder.createStepVector(IVIntTy);
+ if (Ty->isFloatingPointTy())
+ Init = Builder.createWidenCast(Instruction::UIToFP, Init, Ty);
+
+ // FIXME: The newly created binary instructions should contain nsw/nuw
+ // flags, which can be found from the original scalar operations.
+ Init = Builder.createNaryOp(MulOp, {Init, Builder.createSplat(Step)}, FMFs);
+ Init = Builder.createNaryOp(AddOp, {Builder.createSplat(Start), Init}, FMFs,
+ {}, "induction");
// Create the widened phi of the vector IV.
- auto *PhiR = new VPWidenIntOrFpInductionPHIRecipe(IV, StartR);
- PhiR->insertBefore(WidenIVR);
+ auto *WidePHI = new VPWidenIntOrFpInductionPHIRecipe(IV, Init);
+ WidePHI->insertBefore(WidenIVR);
// Create the backedge value for the vector IV.
- VPValue *Prev = PhiR;
- // If unrolled, use the last unrolled part in the increment.
- if (auto *UnrolledPart = WidenIVR->getLastUnrolledPartOperand())
- Prev = UnrolledPart;
- auto *IncR = new VPWidenIntOrFpInductionBackedgeRecipe(
- IV, Step, VF, Prev, WidenIVR->getSplatVFValue(), ID);
+ VPValue *Inc;
+ VPValue *Prev;
+ // If unrolled, use the increment and prev value from the operands.
+ if (WidenIVR->getNumOperands() == 5) {
+ Inc = WidenIVR->getSplatVFValue();
+ Prev = WidenIVR->getLastUnrolledPartOperand();
+ assert(Inc && Prev);
+ } else {
+ unsigned VFTySize = TypeInfo.inferScalarType(VF)->getScalarSizeInBits();
+
+ // Multiply the vectorization factor by the step using integer or
+ // floating-point arithmetic as appropriate.
+ if (Ty->isFloatingPointTy())
+ VF = Builder.createScalarCast(Instruction::CastOps::UIToFP, VF, Ty, DL);
+ else if (VFTySize < Ty->getScalarSizeInBits())
+ VF = Builder.createScalarCast(Instruction::CastOps::ZExt, VF, Ty, DL);
+ else if (VFTySize > Ty->getScalarSizeInBits())
+ VF = Builder.createScalarCast(Instruction::CastOps::Trunc, VF, Ty, DL);
+
+ Inc = Builder.createSplat(Builder.createNaryOp(MulOp, {Step, VF}, FMFs));
+ Prev = WidePHI;
+ }
+
VPBasicBlock *ExitingBB = Plan->getVectorLoopRegion()->getExitingBasicBlock();
- ExitingBB->insert(IncR, ExitingBB->getTerminator()->getIterator());
- PhiR->addOperand(IncR);
+ Builder.setInsertPoint(ExitingBB, ExitingBB->getTerminator()->getIterator());
+ auto *Next = Builder.createNaryOp(AddOp, {Prev, Inc}, FMFs, IV->getDebugLoc(),
+ "vec.ind.next");
+
+ WidePHI->addOperand(Next);
- WidenIVR->replaceAllUsesWith(PhiR);
+ WidenIVR->replaceAllUsesWith(WidePHI);
WidenIVR->eraseFromParent();
}
-void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan) {
+void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan,
+ Type *CanonicalIVTy) {
+ VPTypeAnalysis TypeInfo(CanonicalIVTy);
+
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_deep(Plan.getEntry()))) {
for (VPRecipeBase &R : make_early_inc_range(VPBB->phis())) {
if (auto *WidenIVR = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R)) {
- expandVPWidenIntOrFpInduction(WidenIVR);
+ expandVPWidenIntOrFpInduction(WidenIVR, TypeInfo);
continue;
}
if (!isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe>(&R))
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 3dd476a8526d6..f77a3e1e370dc 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -161,7 +161,7 @@ struct VPlanTransforms {
VPRecipeBuilder &RecipeBuilder);
/// Lower abstract recipes to concrete ones, that can be codegen'd.
- static void convertToConcreteRecipes(VPlan &Plan);
+ static void convertToConcreteRecipes(VPlan &Plan, Type *CanonicalIVTy);
/// Perform instcombine-like simplifications on recipes in \p Plan. Use \p
/// CanonicalIVTy as type for all un-typed live-ins in VPTypeAnalysis.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 8e5148b1616bd..622ebf211e8e6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -354,8 +354,8 @@ class VPDef {
VPWidenSelectSC,
VPBlendSC,
VPHistogramSC,
- VPWidenIntOrFpInductionStartSC,
- VPWidenIntOrFpInductionIncSC,
+ VPSplatSC,
+ VPStepVectorSC,
// START: Phi-like recipes. Need to be kept together.
VPWidenPHISC,
VPPredInstPHISC,
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll
index 44148aac54407..8a372d134eebb 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll
@@ -20,11 +20,11 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 8 x i64> [[TMP8]], splat (i64 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP7]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP12]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -106,11 +106,11 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 8 x i64> [[TMP8]], splat (i64 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP7]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP12]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll
index dfcb1e561551f..96283637b34bf 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll
@@ -127,11 +127,11 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 2 x i64> [[TMP15]], splat (i64 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP17]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[M]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP9]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP20]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[M]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -251,10 +251,10 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 2 x i64> [[TMP15]], splat (i64 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP17]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[MUL_2_I]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP18:%.*]] = mul i64 1, [[TMP9]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP18]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP18]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[MUL_2_I]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT3]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
@@ -262,7 +262,7 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP23:%.*]] = udiv <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[TMP23:%.*]] = udiv <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT4]]
; CHECK-NEXT: [[TMP24:%.*]] = urem i64 [[TMP21]], [[MUL_2_I]]
; CHECK-NEXT: [[TMP25:%.*]] = udiv i64 [[TMP24]], [[MUL_1_I]]
; CHECK-NEXT: [[TMP26:%.*]] = urem i64 [[TMP24]], [[MUL_1_I]]
@@ -283,7 +283,7 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]]
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP14]])
; CHECK-NEXT: [[TMP47:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT4]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP48:%.*]] = extractelement <vscale x 2 x i1> [[TMP47]], i32 0
; CHECK-NEXT: br i1 [[TMP48]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll
index a83c62b04afc7..4074210a95b59 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll
@@ -26,10 +26,10 @@ define void @test_no_scalarization(ptr %a, ptr noalias %b, i32 %idx, i32 %n) #0
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP7:%.*]] = mul i32 [[TMP6]], 2
; CHECK-NEXT: [[IND_END:%.*]] = add i32 [[IDX]], [[N_VEC]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[IDX]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i32> [[DOTSPLATINSERT]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 2 x i32> [[TMP8]], splat (i32 1)
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[IDX]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i32> [[DOTSPLATINSERT]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i32> [[DOTSPLAT]], [[TMP10]]
; CHECK-NEXT: [[TMP13:%.*]] = mul i32 1, [[TMP7]]
; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[TMP13]], i64 0
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll
index 19b1e869a914a..08146a737eb1b 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll
@@ -108,13 +108,13 @@ define void @test_array_load2_i16_store2(i32 %C, i32 %D) #1 {
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[TMP3:%.*]] = shl <vscale x 4 x i64> [[TMP2]], splat (i64 1)
+; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP0]], 3
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP4]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[C:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[D:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT2]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP0]], 3
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP4]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -197,13 +197,13 @@ define void @test_array_load2_store2_i16(i32 noundef %C, i32 noundef %D) #1 {
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[TMP3:%.*]] = shl <vscale x 4 x i64> [[TMP2]], splat (i64 1)
+; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP0]], 3
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP4]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[C:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[D:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP0]], 3
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP4]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -637,16 +637,16 @@ define void @load_gap_reverse(ptr noalias nocapture readonly %P1, ptr noalias no
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[INDUCTION:%.*]] = sub <vscale x 4 x i64> splat (i64 1023), [[TMP2]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[DOTNEG:%.*]], i64 0
+; CHECK-NEXT: [[DOTNEG:%.*]] = sub nsw i64 0, [[TMP1]]
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[DOTNEG]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-NEXT: [[X:%.*]] = sub nsw i64 0, [[TMP1]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[X]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[X:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = add nsw <vscale x 4 x i64> [[DOTSPLAT]], [[VEC_IND]]
+; CHECK-NEXT: [[TMP4:%.*]] = add nsw <vscale x 4 x i64> [[BROADCAST_SPLAT]], [[VEC_IND]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P1:%.*]], <vscale x 4 x i64> [[VEC_IND]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P2:%.*]], <vscale x 4 x i64> [[VEC_IND]], i32 1
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> [[TMP6]], i32 8, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i64> poison)
@@ -654,7 +654,7 @@ define void @load_gap_reverse(ptr noalias nocapture readonly %P1, ptr noalias no
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> [[TMP4]], <vscale x 4 x ptr> [[TMP5]], i32 8, <vscale x 4 x i1> splat (i1 true))
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> [[TMP7]], <vscale x 4 x ptr> [[TMP6]], i32 8, <vscale x 4 x i1> splat (i1 true))
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
; CHECK: middle.block:
@@ -885,10 +885,10 @@ define void @PR27626_0(ptr %p, i32 %z, i64 %n) #1 {
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2
; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Z:%.*]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP8]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Z:%.*]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -1063,10 +1063,10 @@ define void @PR27626_2(ptr %p, i64 %n, i32 %z) #1 {
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2
; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Z:%.*]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP8]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Z:%.*]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -1253,15 +1253,15 @@ define void @PR27626_4(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 {
; CHECK-NEXT: [[IND_END:%.*]] = shl nuw i64 [[N_VEC]], 1
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[TMP9:%.*]] = shl <vscale x 4 x i64> [[TMP8]], splat (i64 1)
+; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i64 [[TMP6]], 3
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP11]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Y:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Z:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT3]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i64 [[TMP6]], 3
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP11]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -1346,15 +1346,15 @@ define void @PR27626_5(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 {
; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[TMP10:%.*]] = shl <vscale x 4 x i64> [[TMP9]], splat (i64 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> [[TMP10]], splat (i64 3)
+; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i64 [[TMP7]], 3
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP12]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Y:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Z:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT3]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i64 [[TMP7]], 3
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP12]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -1454,13 +1454,13 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) #1 {
; CHECK-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 1
; CHECK-NEXT: [[TMP14:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[TMP15:%.*]] = shl <vscale x 4 x i64> [[TMP14]], splat (i64 1)
+; CHECK-NEXT: [[TMP16:%.*]] = shl nuw nsw i64 [[TMP9]], 3
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP16]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i32 [[TMP11]], 2
; CHECK-NEXT: [[TMP13:%.*]] = add nsw i32 [[TMP12]], -1
; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 4 x i16> poison, i16 [[DOTPRE]], i32 [[TMP13]]
-; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw i64 [[TMP9]], 3
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP17]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll
index 94e57c7ca023f..a2da67c2cbb81 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll
@@ -38,10 +38,10 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no
; SCALAR_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32()
; SCALAR_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = shl i32 [[TMP4]], 4
; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32()
-; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV]], i64 0
-; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP5]], i64 0
; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[DOTSPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
+; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV]], i64 0
+; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; SCALAR_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]]
; SCALAR_TAIL_FOLDING: vector.body:
; SCALAR_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -114,10 +114,10 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no
; PREDICATED_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.usub.sat.i32(i32 1024, i32 [[TMP3]])
; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 0, i32 1024)
; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32()
-; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV]], i64 0
-; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP1]], i64 0
; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[DOTSPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
+; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV]], i64 0
+; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; PREDICATED_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]]
; PREDICATED_TAIL_FOLDING: vector.body:
; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -222,10 +222,10 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no
; SCALAR_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32()
; SCALAR_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = shl i32 [[TMP4]], 4
; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32()
-; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV]], i64 0
-; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP5]], i64 0
; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[DOTSPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
+; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV]], i64 0
+; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; SCALAR_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]]
; SCALAR_TAIL_FOLDING: vector.body:
; SCALAR_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -283,10 +283,10 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no
; PREDICATED_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.usub.sat.i32(i32 1024, i32 [[TMP3]])
; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 0, i32 1024)
; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32()
-; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV]], i64 0
-; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP1]], i64 0
; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[DOTSPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
+; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV]], i64 0
+; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; PREDICATED_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]]
; PREDICATED_TAIL_FOLDING: vector.body:
; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -380,12 +380,12 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no
; SCALAR_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32()
; SCALAR_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = shl i32 [[TMP4]], 4
; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32()
+; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP5]], i64 0
+; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[DOTSPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV]], i64 0
; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV3]], i64 0
; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
-; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP5]], i64 0
-; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[DOTSPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; SCALAR_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]]
; SCALAR_TAIL_FOLDING: vector.body:
; SCALAR_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -450,12 +450,12 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no
; PREDICATED_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.usub.sat.i32(i32 1024, i32 [[TMP3]])
; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 0, i32 1024)
; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32()
+; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP1]], i64 0
+; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[DOTSPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV]], i64 0
; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV3]], i64 0
; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
-; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP1]], i64 0
-; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[DOTSPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; PREDICATED_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]]
; PREDICATED_TAIL_FOLDING: vector.body:
; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -567,10 +567,10 @@ define dso_local void @masked_strided_factor4(ptr noalias nocapture readonly %p,
; SCALAR_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32()
; SCALAR_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = shl i32 [[TMP4]], 4
; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32()
-; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV]], i64 0
-; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP5]], i64 0
; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[DOTSPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
+; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV]], i64 0
+; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; SCALAR_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]]
; SCALAR_TAIL_FOLDING: vector.body:
; SCALAR_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -673,10 +673,10 @@ define dso_local void @masked_strided_factor4(ptr noalias nocapture readonly %p,
; PREDICATED_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.usub.sat.i32(i32 1024, i32 [[TMP3]])
; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 0, i32 1024)
; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32()
-; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV]], i64 0
-; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP1]], i64 0
; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[DOTSPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
+; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV]], i64 0
+; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; PREDICATED_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]]
; PREDICATED_TAIL_FOLDING: vector.body:
; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll
index 6e6944b904ae0..109de3e14da0b 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll
@@ -28,13 +28,13 @@ define void @block_with_dead_inst_1(ptr %src, i64 %N) #0 {
; CHECK-NEXT: [[TMP11:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
; CHECK-NEXT: [[TMP13:%.*]] = mul <vscale x 8 x i64> [[TMP11]], splat (i64 3)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP13]]
+; CHECK-NEXT: [[TMP14:%.*]] = mul i64 3, [[TMP10]]
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP14]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[TMP17]], 8
; CHECK-NEXT: [[TMP19:%.*]] = sub i32 [[TMP18]], 1
; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 1, i32 [[TMP19]]
-; CHECK-NEXT: [[TMP26:%.*]] = mul i64 3, [[TMP10]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP26]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -122,13 +122,13 @@ define void @block_with_dead_inst_2(ptr %src) #0 {
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 4 x i64> [[TMP8]], splat (i64 3)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP10]]
+; CHECK-NEXT: [[TMP11:%.*]] = mul i64 3, [[TMP7]]
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP11]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], 4
; CHECK-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], 1
; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 4 x i16> poison, i16 0, i32 [[TMP16]]
-; CHECK-NEXT: [[TMP22:%.*]] = mul i64 3, [[TMP7]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP22]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -216,13 +216,13 @@ define void @multiple_blocks_with_dead_insts_3(ptr %src) #0 {
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 4 x i64> [[TMP8]], splat (i64 3)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP10]]
+; CHECK-NEXT: [[TMP11:%.*]] = mul i64 3, [[TMP7]]
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP11]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], 4
; CHECK-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], 1
; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 4 x i16> poison, i16 0, i32 [[TMP16]]
-; CHECK-NEXT: [[TMP22:%.*]] = mul i64 3, [[TMP7]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP22]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -320,13 +320,13 @@ define void @multiple_blocks_with_dead_insts_4(ptr %src, i64 %N) #0 {
; CHECK-NEXT: [[TMP11:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
; CHECK-NEXT: [[TMP13:%.*]] = mul <vscale x 8 x i64> [[TMP11]], splat (i64 3)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP13]]
+; CHECK-NEXT: [[TMP14:%.*]] = mul i64 3, [[TMP10]]
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP14]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[TMP17]], 8
; CHECK-NEXT: [[TMP19:%.*]] = sub i32 [[TMP18]], 1
; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 1, i32 [[TMP19]]
-; CHECK-NEXT: [[TMP25:%.*]] = mul i64 3, [[TMP10]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP25]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -426,13 +426,13 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_5(ptr %src) #0 {
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 4 x i64> [[TMP8]], splat (i64 3)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP10]]
+; CHECK-NEXT: [[TMP11:%.*]] = mul i64 3, [[TMP7]]
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP11]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], 4
; CHECK-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], 1
; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 4 x i16> poison, i16 1, i32 [[TMP16]]
-; CHECK-NEXT: [[TMP22:%.*]] = mul i64 3, [[TMP7]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP22]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -540,13 +540,13 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_6(ptr %src, i1 %
; CHECK-NEXT: [[TMP11:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
; CHECK-NEXT: [[TMP13:%.*]] = mul <vscale x 8 x i64> [[TMP11]], splat (i64 3)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP13]]
+; CHECK-NEXT: [[TMP14:%.*]] = mul i64 3, [[TMP10]]
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP14]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[TMP17]], 8
; CHECK-NEXT: [[TMP19:%.*]] = sub i32 [[TMP18]], 1
; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 1, i32 [[TMP19]]
-; CHECK-NEXT: [[TMP25:%.*]] = mul i64 3, [[TMP10]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP25]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll
index 3839b367ae08c..8475004091f3f 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll
@@ -33,10 +33,10 @@ define void @dead_load(ptr %p, i16 %start) {
; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 8
; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[N_VEC]], 3
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[START_EXT]], [[TMP18]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[START_EXT]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 8 x i64> [[TMP15]], splat (i64 3)
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[START_EXT]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> [[DOTSPLAT]], [[TMP17]]
; CHECK-NEXT: [[TMP20:%.*]] = mul i64 3, [[TMP14]]
; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP20]], i64 0
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll
index 2582882baba00..396b54995fe2a 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll
@@ -70,10 +70,10 @@ define void @skip_free_iv_truncate(i16 %x, ptr %A) #0 {
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32
; CHECK-NEXT: [[TMP50:%.*]] = mul i32 [[DOTCAST]], 3
; CHECK-NEXT: [[IND_END22:%.*]] = add i32 [[X_I32]], [[TMP50]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[X_I64]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP53:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
; CHECK-NEXT: [[TMP55:%.*]] = mul <vscale x 8 x i64> [[TMP53]], splat (i64 3)
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[X_I64]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> [[DOTSPLAT]], [[TMP55]]
; CHECK-NEXT: [[TMP58:%.*]] = mul i64 3, [[TMP52]]
; CHECK-NEXT: [[DOTSPLATINSERT24:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP58]], i64 0
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll b/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll
index 7baba88061cd4..c00868c3e0be2 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll
@@ -25,11 +25,11 @@ define void @test(ptr noalias nocapture %a, ptr noalias nocapture %b, i32 %v) {
; VLENUNK-NEXT: [[TMP6:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; VLENUNK-NEXT: [[TMP8:%.*]] = mul <vscale x 4 x i64> [[TMP6]], splat (i64 1)
; VLENUNK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP8]]
-; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[V:%.*]], i64 0
-; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; VLENUNK-NEXT: [[TMP11:%.*]] = mul i64 1, [[TMP5]]
; VLENUNK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP11]], i64 0
; VLENUNK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[V:%.*]], i64 0
+; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]]
; VLENUNK: vector.body:
; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll
index 44af6c77a573c..ae6e833da01cf 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll
@@ -23,15 +23,15 @@ define void @pr87378_vpinstruction_or_drop_poison_generating_flags(ptr %arg, i64
; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
; CHECK-NEXT: [[TMP8:%.*]] = mul <vscale x 8 x i64> [[TMP6]], splat (i64 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP8]]
+; CHECK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP5]]
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP9]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[A]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[B]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[C]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT3]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP5]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP9]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
index 9602d4cbf5df3..6a5a0eb3e54b0 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
@@ -560,23 +560,23 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) {
; STRIDED-NEXT: [[TMP12:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; STRIDED-NEXT: [[TMP14:%.*]] = mul <vscale x 4 x i64> [[TMP12]], splat (i64 1)
; STRIDED-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP14]]
-; STRIDED-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[STRIDE]], i64 0
-; STRIDED-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; STRIDED-NEXT: [[TMP15:%.*]] = mul i64 1, [[TMP11]]
-; STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP15]], i64 0
+; STRIDED-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP15]], i64 0
+; STRIDED-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+; STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[STRIDE]], i64 0
; STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; STRIDED-NEXT: br label [[VECTOR_BODY:%.*]]
; STRIDED: vector.body:
; STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; STRIDED-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; STRIDED-NEXT: [[TMP18:%.*]] = mul nuw nsw <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; STRIDED-NEXT: [[TMP18:%.*]] = mul nuw nsw <vscale x 4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
; STRIDED-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[P]], <vscale x 4 x i64> [[TMP18]]
; STRIDED-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP19]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> poison), !alias.scope [[META8:![0-9]+]]
; STRIDED-NEXT: [[TMP20:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], splat (i32 1)
; STRIDED-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[P2]], <vscale x 4 x i64> [[TMP18]]
; STRIDED-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP20]], <vscale x 4 x ptr> [[TMP21]], i32 4, <vscale x 4 x i1> splat (i1 true)), !alias.scope [[META11:![0-9]+]], !noalias [[META8]]
; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]]
-; STRIDED-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
+; STRIDED-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
; STRIDED-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; STRIDED-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
; STRIDED: middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll
index e3f0700cf352e..219e1fe05ebf7 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll
@@ -325,11 +325,11 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca
; SCALABLE-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; SCALABLE-NEXT: [[TMP8:%.*]] = mul <vscale x 2 x i64> [[TMP6]], splat (i64 1)
; SCALABLE-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP8]]
-; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[B:%.*]], i64 0
-; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
; SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP5]]
; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP9]], i64 0
; SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[B:%.*]], i64 0
+; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
; SCALABLE: vector.body:
; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -432,11 +432,11 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca
; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul <vscale x 2 x i64> [[TMP5]], splat (i64 1)
; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP7]]
-; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[B:%.*]], i64 0
-; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
; TF-SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP4]]
; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP8]], i64 0
; TF-SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[B:%.*]], i64 0
+; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
; TF-SCALABLE: vector.body:
; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -995,12 +995,12 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias
; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul <vscale x 2 x i64> [[TMP5]], splat (i64 1)
; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP7]]
+; TF-SCALABLE-NEXT: [[V:%.*]] = mul i64 1, [[TMP4]]
+; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V]], i64 0
+; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[B:%.*]], i64 0
; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
-; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0
-; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-; TF-SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP4]]
-; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP8]], i64 0
+; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP8:%.*]], i64 0
; TF-SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
; TF-SCALABLE: vector.body:
@@ -1011,9 +1011,9 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias
; TF-SCALABLE-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> [[VEC_IND]], <vscale x 2 x ptr> [[BROADCAST_SPLAT]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
; TF-SCALABLE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP9]]
; TF-SCALABLE-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 0
-; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP11]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
+; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[DOTSPLAT]], ptr [[TMP11]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
-; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT2]]
; TF-SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; TF-SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; TF-SCALABLE: middle.block:
@@ -1025,7 +1025,7 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias
; TF-SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
; TF-SCALABLE-NEXT: store i64 [[IV]], ptr [[B]], align 8
; TF-SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
-; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8
+; TF-SCALABLE-NEXT: store i64 [[TMP8]], ptr [[ARRAYIDX]], align 8
; TF-SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025
; TF-SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
@@ -1125,25 +1125,25 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc
; SCALABLE-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; SCALABLE-NEXT: [[TMP8:%.*]] = mul <vscale x 2 x i64> [[TMP6]], splat (i64 1)
; SCALABLE-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP8]]
-; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0
+; SCALABLE-NEXT: [[V:%.*]] = mul i64 1, [[TMP5]]
+; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V]], i64 0
; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP9:%.*]], i64 0
+; SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[B:%.*]], i64 0
; SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
-; SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP5]]
-; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP9]], i64 0
-; SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
; SCALABLE: vector.body:
; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; SCALABLE-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; SCALABLE-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0
; SCALABLE-NEXT: [[TMP11:%.*]] = icmp ugt <vscale x 2 x i64> [[VEC_IND]], splat (i64 10)
-; SCALABLE-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], <vscale x 2 x ptr> [[BROADCAST_SPLAT2]], i32 8, <vscale x 2 x i1> [[TMP11]])
+; SCALABLE-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> [[DOTSPLAT]], <vscale x 2 x ptr> [[BROADCAST_SPLAT2]], i32 8, <vscale x 2 x i1> [[TMP11]])
; SCALABLE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP10]]
; SCALABLE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 0
-; SCALABLE-NEXT: store <vscale x 2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP13]], align 8
+; SCALABLE-NEXT: store <vscale x 2 x i64> [[DOTSPLAT]], ptr [[TMP13]], align 8
; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
-; SCALABLE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; SCALABLE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
; SCALABLE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; SCALABLE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
; SCALABLE: middle.block:
@@ -1157,11 +1157,11 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc
; SCALABLE-NEXT: [[CMP:%.*]] = icmp ugt i64 [[IV]], 10
; SCALABLE-NEXT: br i1 [[CMP]], label [[DO_STORE:%.*]], label [[LATCH]]
; SCALABLE: do_store:
-; SCALABLE-NEXT: store i64 [[V]], ptr [[B]], align 8
+; SCALABLE-NEXT: store i64 [[TMP9]], ptr [[B]], align 8
; SCALABLE-NEXT: br label [[LATCH]]
; SCALABLE: latch:
; SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
-; SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8
+; SCALABLE-NEXT: store i64 [[TMP9]], ptr [[ARRAYIDX]], align 8
; SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025
; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
@@ -1231,13 +1231,13 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc
; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul <vscale x 2 x i64> [[TMP5]], splat (i64 1)
; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP7]]
-; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0
+; TF-SCALABLE-NEXT: [[V:%.*]] = mul i64 1, [[TMP4]]
+; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V]], i64 0
; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP8:%.*]], i64 0
+; TF-SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[B:%.*]], i64 0
; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
-; TF-SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP4]]
-; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP8]], i64 0
-; TF-SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
; TF-SCALABLE: vector.body:
; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -1246,12 +1246,12 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc
; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP9]], i64 1025)
; TF-SCALABLE-NEXT: [[TMP10:%.*]] = icmp ugt <vscale x 2 x i64> [[VEC_IND]], splat (i64 10)
; TF-SCALABLE-NEXT: [[TMP11:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> [[TMP10]], <vscale x 2 x i1> zeroinitializer
-; TF-SCALABLE-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], <vscale x 2 x ptr> [[BROADCAST_SPLAT2]], i32 8, <vscale x 2 x i1> [[TMP11]])
+; TF-SCALABLE-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> [[DOTSPLAT]], <vscale x 2 x ptr> [[BROADCAST_SPLAT2]], i32 8, <vscale x 2 x i1> [[TMP11]])
; TF-SCALABLE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP9]]
; TF-SCALABLE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 0
-; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP13]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
+; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[DOTSPLAT]], ptr [[TMP13]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
-; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
; TF-SCALABLE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; TF-SCALABLE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; TF-SCALABLE: middle.block:
@@ -1264,11 +1264,11 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc
; TF-SCALABLE-NEXT: [[CMP:%.*]] = icmp ugt i64 [[IV]], 10
; TF-SCALABLE-NEXT: br i1 [[CMP]], label [[DO_STORE:%.*]], label [[LATCH]]
; TF-SCALABLE: do_store:
-; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[B]], align 8
+; TF-SCALABLE-NEXT: store i64 [[TMP8]], ptr [[B]], align 8
; TF-SCALABLE-NEXT: br label [[LATCH]]
; TF-SCALABLE: latch:
; TF-SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
-; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8
+; TF-SCALABLE-NEXT: store i64 [[TMP8]], ptr [[ARRAYIDX]], align 8
; TF-SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025
; TF-SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll
index 7045b4e03ed8c..8893956bb3384 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll
@@ -604,11 +604,11 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) {
; NO-VP-OUTLOOP-NEXT: [[TMP12:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
; NO-VP-OUTLOOP-NEXT: [[TMP14:%.*]] = mul <vscale x 4 x i32> [[TMP12]], splat (i32 1)
; NO-VP-OUTLOOP-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i32> zeroinitializer, [[TMP14]]
-; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0
; NO-VP-OUTLOOP-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP10]] to i32
; NO-VP-OUTLOOP-NEXT: [[TMP17:%.*]] = mul i32 1, [[TMP16]]
; NO-VP-OUTLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP17]], i64 0
; NO-VP-OUTLOOP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0
; NO-VP-OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]]
; NO-VP-OUTLOOP: vector.body:
; NO-VP-OUTLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -796,11 +796,11 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) {
; NO-VP-OUTLOOP-NEXT: [[TMP12:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
; NO-VP-OUTLOOP-NEXT: [[TMP14:%.*]] = mul <vscale x 4 x i32> [[TMP12]], splat (i32 1)
; NO-VP-OUTLOOP-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i32> zeroinitializer, [[TMP14]]
-; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0
; NO-VP-OUTLOOP-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP10]] to i32
; NO-VP-OUTLOOP-NEXT: [[TMP17:%.*]] = mul i32 1, [[TMP16]]
; NO-VP-OUTLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP17]], i64 0
; NO-VP-OUTLOOP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0
; NO-VP-OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]]
; NO-VP-OUTLOOP: vector.body:
; NO-VP-OUTLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll
index 3d47d8217b5f5..5c1cb1da120c0 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll
@@ -197,28 +197,28 @@ define void @test_induction_step_needs_expansion(ptr noalias %j, ptr %k, i64 %l,
; CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[L]], [[N_MOD_VF4]]
; CHECK-NEXT: [[DOTCAST7:%.*]] = trunc i64 [[N_VEC5]] to i16
; CHECK-NEXT: [[IND_END8:%.*]] = mul i16 [[DOTCAST7]], [[TMP0]]
-; CHECK-NEXT: [[DOTSPLATINSERT13:%.*]] = insertelement <8 x i16> poison, i16 [[BC_RESUME_VAL]], i64 0
+; CHECK-NEXT: [[DOTSPLATINSERT13:%.*]] = insertelement <8 x i16> poison, i16 [[TMP0]], i64 0
; CHECK-NEXT: [[DOTSPLAT14:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT13]], <8 x i16> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT: [[DOTSPLATINSERT15:%.*]] = insertelement <8 x i16> poison, i16 [[TMP0]], i64 0
+; CHECK-NEXT: [[TMP14:%.*]] = mul <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, [[DOTSPLAT14]]
+; CHECK-NEXT: [[DOTSPLATINSERT15:%.*]] = insertelement <8 x i16> poison, i16 [[BC_RESUME_VAL]], i64 0
; CHECK-NEXT: [[DOTSPLAT16:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT15]], <8 x i16> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP14:%.*]] = mul <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, [[DOTSPLAT16]]
-; CHECK-NEXT: [[INDUCTION17:%.*]] = add <8 x i16> [[DOTSPLAT14]], [[TMP14]]
-; CHECK-NEXT: [[DOTSPLATINSERT18:%.*]] = insertelement <8 x i16> poison, i16 [[OFF]], i64 0
-; CHECK-NEXT: [[DOTSPLAT19:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT18]], <8 x i16> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[INDUCTION17:%.*]] = add <8 x i16> [[DOTSPLAT16]], [[TMP14]]
; CHECK-NEXT: [[TMP15:%.*]] = mul i16 [[TMP0]], 8
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT22:%.*]] = insertelement <8 x i16> poison, i16 [[TMP15]], i64 0
+; CHECK-NEXT: [[DOTSPLATINSERT18:%.*]] = insertelement <8 x i16> poison, i16 [[TMP15]], i64 0
+; CHECK-NEXT: [[DOTSPLAT19:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT18]], <8 x i16> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT22:%.*]] = insertelement <8 x i16> poison, i16 [[OFF]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT23:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT22]], <8 x i16> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; CHECK: vec.epilog.vector.body:
; CHECK-NEXT: [[INDEX12:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT24:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND20:%.*]] = phi <8 x i16> [ [[INDUCTION17]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT21:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX12]], 0
-; CHECK-NEXT: [[TMP17:%.*]] = sub <8 x i16> [[VEC_IND20]], [[DOTSPLAT19]]
+; CHECK-NEXT: [[TMP17:%.*]] = sub <8 x i16> [[VEC_IND20]], [[BROADCAST_SPLAT23]]
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i16, ptr [[K]], i64 [[TMP16]]
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i16, ptr [[TMP18]], i32 0
; CHECK-NEXT: store <8 x i16> [[TMP17]], ptr [[TMP19]], align 2
; CHECK-NEXT: [[INDEX_NEXT24]] = add nuw i64 [[INDEX12]], 8
-; CHECK-NEXT: [[VEC_IND_NEXT21]] = add <8 x i16> [[VEC_IND20]], [[BROADCAST_SPLAT23]]
+; CHECK-NEXT: [[VEC_IND_NEXT21]] = add <8 x i16> [[VEC_IND20]], [[DOTSPLAT19]]
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT24]], [[N_VEC5]]
; CHECK-NEXT: br i1 [[TMP20]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: vec.epilog.middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll
index 769d1e3569cda..45d1385fd6fa9 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll
@@ -713,11 +713,11 @@ define void @wombat(i32 %arg, ptr %dst) #1 {
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP0:%.*]] = mul i32 56, [[ARG]]
; CHECK-NEXT: [[IND_END:%.*]] = add i32 [[MUL]], [[TMP0]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[MUL]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[ARG]], i64 0
; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = mul <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[DOTSPLAT2]]
+; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <8 x i32> poison, i32 [[MUL]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT2]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i32> [[DOTSPLAT]], [[TMP1]]
; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[ARG]], 8
; CHECK-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <8 x i32> poison, i32 [[TMP2]], i64 0
@@ -787,11 +787,11 @@ define void @wombat2(i32 %arg, ptr %dst) #1 {
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP0:%.*]] = mul i32 56, [[ARG]]
; CHECK-NEXT: [[IND_END:%.*]] = add i32 [[MUL]], [[TMP0]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[MUL]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[ARG]], i64 0
; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = mul <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[DOTSPLAT2]]
+; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <8 x i32> poison, i32 [[MUL]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT2]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i32> [[DOTSPLAT]], [[TMP1]]
; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[ARG]], 8
; CHECK-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <8 x i32> poison, i32 [[TMP2]], i64 0
@@ -864,11 +864,11 @@ define void @with_dead_use(i32 %arg, ptr %dst) #1 {
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP0:%.*]] = mul i32 56, [[ARG]]
; CHECK-NEXT: [[IND_END:%.*]] = add i32 [[MUL]], [[TMP0]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[MUL]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[ARG]], i64 0
; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = mul <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[DOTSPLAT2]]
+; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <8 x i32> poison, i32 [[MUL]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT2]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i32> [[DOTSPLAT]], [[TMP1]]
; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[ARG]], 8
; CHECK-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <8 x i32> poison, i32 [[TMP2]], i64 0
diff --git a/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll b/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll
index e10b801844985..c745b4f74786c 100644
--- a/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll
+++ b/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll
@@ -50,10 +50,10 @@ define void @test(i32 %arg, i32 %L1.limit, i32 %L2.switch, i1 %c, ptr %dst) {
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[DOTSPLAT]]
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> splat (i32 1), [[TMP4]]
-; CHECK-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <4 x i32> poison, i32 [[L1_EXIT_VAL]], i64 0
-; CHECK-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT3]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[INDUCTION_IV_LCSSA1]], 4
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i64 0
+; CHECK-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i64 0
+; CHECK-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT3]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[L1_EXIT_VAL]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
@@ -61,13 +61,13 @@ define void @test(i32 %arg, i32 %L1.limit, i32 %L2.switch, i1 %c, ptr %dst) {
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = sub <4 x i32> [[VEC_IND]], [[DOTSPLAT4]]
+; CHECK-NEXT: [[TMP7:%.*]] = sub <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP8:%.*]] = sext <4 x i32> [[TMP7]] to <4 x i64>
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[DST:%.*]], i64 [[TMP6]]
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP9]], i32 0
; CHECK-NEXT: store <4 x i64> [[TMP8]], ptr [[TMP10]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], [[DOTSPLAT4]]
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 12
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/float-induction.ll b/llvm/test/Transforms/LoopVectorize/float-induction.ll
index b94520ce5abfe..9aff603ef5fce 100644
--- a/llvm/test/Transforms/LoopVectorize/float-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/float-induction.ll
@@ -32,12 +32,12 @@ define void @fp_iv_loop1_fast_FMF(float %init, ptr noalias nocapture %A, i32 %N)
; VEC4_INTERL1-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float
; VEC4_INTERL1-NEXT: [[TMP1:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]]
; VEC4_INTERL1-NEXT: [[IND_END:%.*]] = fsub fast float [[INIT:%.*]], [[TMP1]]
-; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0
+; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0
; VEC4_INTERL1-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
-; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0
+; VEC4_INTERL1-NEXT: [[TMP6:%.*]] = fmul fast <4 x float> [[DOTSPLAT]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
+; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0
; VEC4_INTERL1-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer
-; VEC4_INTERL1-NEXT: [[TMP2:%.*]] = fmul fast <4 x float> [[DOTSPLAT3]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
-; VEC4_INTERL1-NEXT: [[INDUCTION:%.*]] = fsub fast <4 x float> [[DOTSPLAT]], [[TMP2]]
+; VEC4_INTERL1-NEXT: [[INDUCTION:%.*]] = fsub fast <4 x float> [[DOTSPLAT3]], [[TMP6]]
; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], 4.000000e+00
; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT4:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i64 0
; VEC4_INTERL1-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT4]], <4 x float> poison, <4 x i32> zeroinitializer
@@ -90,12 +90,12 @@ define void @fp_iv_loop1_fast_FMF(float %init, ptr noalias nocapture %A, i32 %N)
; VEC4_INTERL2-NEXT: [[FPINC_INS:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0
; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[FPINC_INS]], <float 4.000000e+00, float poison, float poison, float poison>
; VEC4_INTERL2-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <4 x i32> zeroinitializer
-; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0
+; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0
; VEC4_INTERL2-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
-; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0
+; VEC4_INTERL2-NEXT: [[TMP7:%.*]] = fmul fast <4 x float> [[DOTSPLAT]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
+; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0
; VEC4_INTERL2-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer
-; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = fmul fast <4 x float> [[DOTSPLAT3]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
-; VEC4_INTERL2-NEXT: [[INDUCTION:%.*]] = fsub fast <4 x float> [[DOTSPLAT]], [[TMP2]]
+; VEC4_INTERL2-NEXT: [[INDUCTION:%.*]] = fsub fast <4 x float> [[DOTSPLAT3]], [[TMP7]]
; VEC4_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]]
; VEC4_INTERL2: vector.body:
; VEC4_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -196,12 +196,12 @@ define void @fp_iv_loop1_fast_FMF(float %init, ptr noalias nocapture %A, i32 %N)
; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]]
; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END:%.*]] = fsub fast float [[INIT:%.*]], [[TMP1]]
-; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0
+; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[FPINC]], i64 0
; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer
-; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x float> poison, float [[FPINC]], i64 0
+; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP6:%.*]] = fmul fast <2 x float> [[DOTSPLAT]], <float 0.000000e+00, float 1.000000e+00>
+; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0
; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT2]], <2 x float> poison, <2 x i32> zeroinitializer
-; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = fmul fast <2 x float> [[DOTSPLAT3]], <float 0.000000e+00, float 1.000000e+00>
-; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fsub fast <2 x float> [[DOTSPLAT]], [[TMP2]]
+; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fsub fast <2 x float> [[DOTSPLAT3]], [[TMP6]]
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], 2.000000e+00
; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT4:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i64 0
; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT4]], <2 x float> poison, <2 x i32> zeroinitializer
@@ -279,12 +279,12 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, ptr noalias nocapture %A, i32
; VEC4_INTERL1-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float
; VEC4_INTERL1-NEXT: [[TMP1:%.*]] = fmul reassoc float [[FPINC]], [[DOTCAST]]
; VEC4_INTERL1-NEXT: [[IND_END:%.*]] = fsub reassoc float [[INIT:%.*]], [[TMP1]]
-; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0
+; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0
; VEC4_INTERL1-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
-; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0
+; VEC4_INTERL1-NEXT: [[TMP6:%.*]] = fmul reassoc <4 x float> [[DOTSPLAT]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
+; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0
; VEC4_INTERL1-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer
-; VEC4_INTERL1-NEXT: [[TMP2:%.*]] = fmul reassoc <4 x float> [[DOTSPLAT3]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
-; VEC4_INTERL1-NEXT: [[INDUCTION:%.*]] = fsub reassoc <4 x float> [[DOTSPLAT]], [[TMP2]]
+; VEC4_INTERL1-NEXT: [[INDUCTION:%.*]] = fsub reassoc <4 x float> [[DOTSPLAT3]], [[TMP6]]
; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = fmul reassoc float [[FPINC]], 4.000000e+00
; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT4:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i64 0
; VEC4_INTERL1-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT4]], <4 x float> poison, <4 x i32> zeroinitializer
@@ -337,12 +337,12 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, ptr noalias nocapture %A, i32
; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0
; VEC4_INTERL2-NEXT: [[MUL:%.*]] = fmul reassoc <4 x float> [[DOTSPLATINSERT2]], <float 4.000000e+00, float poison, float poison, float poison>
; VEC4_INTERL2-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <4 x float> [[MUL]], <4 x float> poison, <4 x i32> zeroinitializer
-; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0
+; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0
; VEC4_INTERL2-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
-; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0
+; VEC4_INTERL2-NEXT: [[TMP7:%.*]] = fmul reassoc <4 x float> [[DOTSPLAT]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
+; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0
; VEC4_INTERL2-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT1]], <4 x float> poison, <4 x i32> zeroinitializer
-; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = fmul reassoc <4 x float> [[DOTSPLAT3]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
-; VEC4_INTERL2-NEXT: [[INDUCTION:%.*]] = fsub reassoc <4 x float> [[DOTSPLAT]], [[TMP2]]
+; VEC4_INTERL2-NEXT: [[INDUCTION:%.*]] = fsub reassoc <4 x float> [[DOTSPLAT3]], [[TMP7]]
; VEC4_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]]
; VEC4_INTERL2: vector.body:
; VEC4_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -445,12 +445,12 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, ptr noalias nocapture %A, i32
; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = fmul reassoc float [[FPINC]], [[DOTCAST]]
; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END:%.*]] = fsub reassoc float [[INIT:%.*]], [[TMP1]]
-; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0
+; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[FPINC]], i64 0
; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer
-; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x float> poison, float [[FPINC]], i64 0
+; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP6:%.*]] = fmul reassoc <2 x float> [[DOTSPLAT]], <float 0.000000e+00, float 1.000000e+00>
+; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0
; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT2]], <2 x float> poison, <2 x i32> zeroinitializer
-; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = fmul reassoc <2 x float> [[DOTSPLAT3]], <float 0.000000e+00, float 1.000000e+00>
-; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fsub reassoc <2 x float> [[DOTSPLAT]], [[TMP2]]
+; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fsub reassoc <2 x float> [[DOTSPLAT3]], [[TMP6]]
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fmul reassoc float [[FPINC]], 2.000000e+00
; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT4:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i64 0
; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT4]], <2 x float> poison, <2 x i32> zeroinitializer
@@ -764,16 +764,16 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca
; VEC4_INTERL1-NEXT: [[DOTCAST2:%.*]] = uitofp nneg i64 [[N_VEC]] to float
; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = fmul fast float [[TMP0]], [[DOTCAST2]]
; VEC4_INTERL1-NEXT: [[IND_END3:%.*]] = fadd fast float [[INIT:%.*]], [[TMP3]]
-; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0
+; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0
; VEC4_INTERL1-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
-; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0
+; VEC4_INTERL1-NEXT: [[TMP13:%.*]] = fmul fast <4 x float> [[DOTSPLAT]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
+; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0
; VEC4_INTERL1-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT5]], <4 x float> poison, <4 x i32> zeroinitializer
-; VEC4_INTERL1-NEXT: [[TMP4:%.*]] = fmul fast <4 x float> [[DOTSPLAT6]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
-; VEC4_INTERL1-NEXT: [[INDUCTION:%.*]] = fadd fast <4 x float> [[DOTSPLAT]], [[TMP4]]
-; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT7:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0
+; VEC4_INTERL1-NEXT: [[INDUCTION:%.*]] = fadd fast <4 x float> [[DOTSPLAT6]], [[TMP13]]
+; VEC4_INTERL1-NEXT: [[TMP14:%.*]] = fmul fast float [[TMP0]], 4.000000e+00
+; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT7:%.*]] = insertelement <4 x float> poison, float [[TMP14]], i64 0
; VEC4_INTERL1-NEXT: [[DOTSPLAT8:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT7]], <4 x float> poison, <4 x i32> zeroinitializer
-; VEC4_INTERL1-NEXT: [[TMP13:%.*]] = fmul fast float [[TMP0]], 4.000000e+00
-; VEC4_INTERL1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP13]], i64 0
+; VEC4_INTERL1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0
; VEC4_INTERL1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
; VEC4_INTERL1-NEXT: br label [[VECTOR_BODY:%.*]]
; VEC4_INTERL1: vector.body:
@@ -782,7 +782,7 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca
; VEC4_INTERL1-NEXT: [[VEC_IND9:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT10:%.*]], [[VECTOR_BODY]] ]
; VEC4_INTERL1-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
; VEC4_INTERL1-NEXT: store <4 x float> [[VEC_IND9]], ptr [[TMP6]], align 4
-; VEC4_INTERL1-NEXT: [[TMP7:%.*]] = fadd fast <4 x float> [[VEC_IND9]], [[DOTSPLAT8]]
+; VEC4_INTERL1-NEXT: [[TMP7:%.*]] = fadd fast <4 x float> [[VEC_IND9]], [[BROADCAST_SPLAT]]
; VEC4_INTERL1-NEXT: [[TMP8:%.*]] = fadd fast <4 x float> [[VEC_IND]], splat (float -5.000000e-01)
; VEC4_INTERL1-NEXT: [[TMP9:%.*]] = fadd fast <4 x float> [[TMP8]], [[TMP7]]
; VEC4_INTERL1-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]]
@@ -791,7 +791,7 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca
; VEC4_INTERL1-NEXT: store <4 x float> [[TMP8]], ptr [[TMP11]], align 4
; VEC4_INTERL1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], splat (float -2.000000e+00)
-; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT10]] = fadd fast <4 x float> [[VEC_IND9]], [[BROADCAST_SPLAT]]
+; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT10]] = fadd fast <4 x float> [[VEC_IND9]], [[DOTSPLAT8]]
; VEC4_INTERL1-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; VEC4_INTERL1-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; VEC4_INTERL1: middle.block:
@@ -844,12 +844,12 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca
; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0
; VEC4_INTERL2-NEXT: [[TMP19:%.*]] = fmul fast <4 x float> [[DOTSPLATINSERT2]], <float 4.000000e+00, float poison, float poison, float poison>
; VEC4_INTERL2-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <4 x float> [[TMP19]], <4 x float> poison, <4 x i32> zeroinitializer
-; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0
+; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0
; VEC4_INTERL2-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
-; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT6:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0
+; VEC4_INTERL2-NEXT: [[TMP20:%.*]] = fmul fast <4 x float> [[DOTSPLAT]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
+; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT6:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0
; VEC4_INTERL2-NEXT: [[DOTSPLAT7:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT6]], <4 x float> poison, <4 x i32> zeroinitializer
-; VEC4_INTERL2-NEXT: [[TMP4:%.*]] = fmul fast <4 x float> [[DOTSPLAT7]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
-; VEC4_INTERL2-NEXT: [[INDUCTION:%.*]] = fadd fast <4 x float> [[DOTSPLAT]], [[TMP4]]
+; VEC4_INTERL2-NEXT: [[INDUCTION:%.*]] = fadd fast <4 x float> [[DOTSPLAT7]], [[TMP20]]
; VEC4_INTERL2-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0
; VEC4_INTERL2-NEXT: [[BROADCAST:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT7]], <4 x float> poison, <4 x i32> zeroinitializer
; VEC4_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -1006,16 +1006,16 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca
; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST2:%.*]] = uitofp nneg i64 [[N_VEC]] to float
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fmul fast float [[TMP0]], [[DOTCAST2]]
; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END3:%.*]] = fadd fast float [[INIT:%.*]], [[TMP3]]
-; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0
+; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0
; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer
-; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0
+; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP13:%.*]] = fmul fast <2 x float> [[DOTSPLAT]], <float 0.000000e+00, float 1.000000e+00>
+; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0
; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT5]], <2 x float> poison, <2 x i32> zeroinitializer
-; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP4:%.*]] = fmul fast <2 x float> [[DOTSPLAT6]], <float 0.000000e+00, float 1.000000e+00>
-; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fadd fast <2 x float> [[DOTSPLAT]], [[TMP4]]
-; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT7:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0
+; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fadd fast <2 x float> [[DOTSPLAT6]], [[TMP13]]
+; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP14:%.*]] = fmul fast float [[TMP0]], 2.000000e+00
+; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT7:%.*]] = insertelement <2 x float> poison, float [[TMP14]], i64 0
; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT8:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT7]], <2 x float> poison, <2 x i32> zeroinitializer
-; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP13:%.*]] = fmul fast float [[TMP0]], 2.000000e+00
-; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP13]], i64 0
+; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0
; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer
; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]]
; VEC2_INTERL1_PRED_STORE: vector.body:
@@ -1024,7 +1024,7 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca
; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND9:%.*]] = phi <2 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT10:%.*]], [[VECTOR_BODY]] ]
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[VEC_IND9]], ptr [[TMP6]], align 4
-; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP7:%.*]] = fadd fast <2 x float> [[VEC_IND9]], [[DOTSPLAT8]]
+; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP7:%.*]] = fadd fast <2 x float> [[VEC_IND9]], [[BROADCAST_SPLAT]]
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP8:%.*]] = fadd fast <2 x float> [[VEC_IND]], splat (float -5.000000e-01)
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP9:%.*]] = fadd fast <2 x float> [[TMP8]], [[TMP7]]
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]]
@@ -1033,7 +1033,7 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca
; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[TMP8]], ptr [[TMP11]], align 4
; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], splat (float -1.000000e+00)
-; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT10]] = fadd fast <2 x float> [[VEC_IND9]], [[BROADCAST_SPLAT]]
+; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT10]] = fadd fast <2 x float> [[VEC_IND9]], [[DOTSPLAT8]]
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; VEC2_INTERL1_PRED_STORE: middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/induction-step.ll b/llvm/test/Transforms/LoopVectorize/induction-step.ll
index 80a4754a0b63a..343c5ee538790 100644
--- a/llvm/test/Transforms/LoopVectorize/induction-step.ll
+++ b/llvm/test/Transforms/LoopVectorize/induction-step.ll
@@ -33,11 +33,11 @@ define void @induction_with_global(i32 %init, ptr noalias nocapture %A, i32 %N)
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32
; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[DOTCAST]], [[TMP0]]
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INIT]], [[TMP3]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[INIT]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <8 x i32> poison, i32 [[TMP0]], i64 0
; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT2]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP6:%.*]] = mul <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[DOTSPLAT3]]
+; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[INIT]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION4:%.*]] = add <8 x i32> [[DOTSPLAT]], [[TMP6]]
; CHECK-NEXT: [[TMP7:%.*]] = mul i32 [[TMP0]], 8
; CHECK-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <8 x i32> poison, i32 [[TMP7]], i64 0
@@ -124,19 +124,19 @@ define i32 @induction_with_loop_inv(i32 %init, ptr noalias nocapture %A, i32 %N,
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32
; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[DOTCAST]], [[J_012]]
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[X_011]], [[TMP1]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[X_011]], i64 0
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[J_012]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <8 x i32> poison, i32 [[J_012]], i64 0
+; CHECK-NEXT: [[TMP12:%.*]] = mul <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[DOTSPLAT]]
+; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <8 x i32> poison, i32 [[X_011]], i64 0
; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT2]], <8 x i32> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP4:%.*]] = mul <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[DOTSPLAT3]]
-; CHECK-NEXT: [[INDUCTION4:%.*]] = add <8 x i32> [[DOTSPLAT]], [[TMP4]]
+; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i32> [[DOTSPLAT3]], [[TMP12]]
; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[J_012]], 8
; CHECK-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <8 x i32> poison, i32 [[TMP5]], i64 0
; CHECK-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT5]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ [[INDUCTION4]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0
@@ -350,22 +350,22 @@ define void @wide_add_induction_step_live_in(ptr %dst, i64 %N, i16 %off) {
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = mul <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, [[DOTSPLAT]]
; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i16> zeroinitializer, [[TMP1]]
-; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <8 x i16> poison, i16 [[O_1]], i64 0
-; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT1]], <8 x i16> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = mul i16 [[O_1]], 8
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[TMP2]], i64 0
+; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <8 x i16> poison, i16 [[TMP2]], i64 0
+; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT1]], <8 x i16> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[O_1]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i16> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = add <8 x i16> [[VEC_IND]], [[DOTSPLAT2]]
+; CHECK-NEXT: [[TMP4:%.*]] = add <8 x i16> [[VEC_IND]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0
; CHECK-NEXT: store <8 x i16> [[TMP4]], ptr [[TMP6]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i16> [[VEC_IND]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i16> [[VEC_IND]], [[DOTSPLAT2]]
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
@@ -422,22 +422,22 @@ define void @wide_sub_induction_step_live_in(ptr %dst, i64 %N, i16 %off) {
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, [[DOTSPLAT]]
; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i16> zeroinitializer, [[TMP2]]
-; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <8 x i16> poison, i16 [[O_1]], i64 0
-; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT1]], <8 x i16> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = mul i16 [[TMP0]], 8
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[TMP3]], i64 0
+; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <8 x i16> poison, i16 [[TMP3]], i64 0
+; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT1]], <8 x i16> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[O_1]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i16> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = sub <8 x i16> [[VEC_IND]], [[DOTSPLAT2]]
+; CHECK-NEXT: [[TMP5:%.*]] = sub <8 x i16> [[VEC_IND]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i32 0
; CHECK-NEXT: store <8 x i16> [[TMP5]], ptr [[TMP7]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i16> [[VEC_IND]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i16> [[VEC_IND]], [[DOTSPLAT2]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll
index 848831eab8165..82a17297f6072 100644
--- a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll
+++ b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll
@@ -752,11 +752,11 @@ define float @fp_postinc_use_fadd(float %init, ptr noalias nocapture %A, i64 %N,
; VEC-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float
; VEC-NEXT: [[TMP0:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]]
; VEC-NEXT: [[TMP1:%.*]] = fadd fast float [[INIT]], [[TMP0]]
-; VEC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0
-; VEC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer
; VEC-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <2 x float> poison, float [[FPINC]], i64 0
; VEC-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT1]], <2 x float> poison, <2 x i32> zeroinitializer
; VEC-NEXT: [[TMP2:%.*]] = fmul fast <2 x float> <float 0.000000e+00, float 1.000000e+00>, [[DOTSPLAT2]]
+; VEC-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0
+; VEC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT2]], <2 x float> poison, <2 x i32> zeroinitializer
; VEC-NEXT: [[INDUCTION:%.*]] = fadd fast <2 x float> [[DOTSPLAT]], [[TMP2]]
; VEC-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], 2.000000e+00
; VEC-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i64 0
@@ -872,11 +872,11 @@ define float @fp_postinc_use_fadd_ops_swapped(float %init, ptr noalias nocapture
; VEC-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float
; VEC-NEXT: [[TMP0:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]]
; VEC-NEXT: [[TMP1:%.*]] = fadd fast float [[INIT]], [[TMP0]]
-; VEC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0
-; VEC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer
; VEC-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <2 x float> poison, float [[FPINC]], i64 0
; VEC-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT1]], <2 x float> poison, <2 x i32> zeroinitializer
; VEC-NEXT: [[TMP2:%.*]] = fmul fast <2 x float> <float 0.000000e+00, float 1.000000e+00>, [[DOTSPLAT2]]
+; VEC-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0
+; VEC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT2]], <2 x float> poison, <2 x i32> zeroinitializer
; VEC-NEXT: [[INDUCTION:%.*]] = fadd fast <2 x float> [[DOTSPLAT]], [[TMP2]]
; VEC-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], 2.000000e+00
; VEC-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i64 0
@@ -992,11 +992,11 @@ define float @fp_postinc_use_fsub(float %init, ptr noalias nocapture %A, i64 %N,
; VEC-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float
; VEC-NEXT: [[TMP0:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]]
; VEC-NEXT: [[TMP1:%.*]] = fsub fast float [[INIT]], [[TMP0]]
-; VEC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0
-; VEC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer
; VEC-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <2 x float> poison, float [[FPINC]], i64 0
; VEC-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT1]], <2 x float> poison, <2 x i32> zeroinitializer
; VEC-NEXT: [[TMP2:%.*]] = fmul fast <2 x float> <float 0.000000e+00, float 1.000000e+00>, [[DOTSPLAT2]]
+; VEC-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0
+; VEC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT2]], <2 x float> poison, <2 x i32> zeroinitializer
; VEC-NEXT: [[INDUCTION:%.*]] = fsub fast <2 x float> [[DOTSPLAT]], [[TMP2]]
; VEC-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], 2.000000e+00
; VEC-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i64 0
diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll
index 4db0153fe8491..6411a3031d1af 100644
--- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll
+++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll
@@ -503,12 +503,12 @@ define void @induction_resume_value_requires_non_trivial_scev_expansion(ptr %dst
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 84, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: [[IND_END4:%.*]] = mul i8 84, [[INDUCTION_IV]]
-; CHECK-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <4 x i8> poison, i8 [[BC_RESUME_VAL]], i64 0
+; CHECK-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <4 x i8> poison, i8 [[INDUCTION_IV]], i64 0
; CHECK-NEXT: [[DOTSPLAT9:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT8]], <4 x i8> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[DOTSPLATINSERT10:%.*]] = insertelement <4 x i8> poison, i8 [[INDUCTION_IV]], i64 0
+; CHECK-NEXT: [[TMP14:%.*]] = mul <4 x i8> <i8 0, i8 1, i8 2, i8 3>, [[DOTSPLAT9]]
+; CHECK-NEXT: [[DOTSPLATINSERT10:%.*]] = insertelement <4 x i8> poison, i8 [[BC_RESUME_VAL]], i64 0
; CHECK-NEXT: [[DOTSPLAT11:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT10]], <4 x i8> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP8:%.*]] = mul <4 x i8> <i8 0, i8 1, i8 2, i8 3>, [[DOTSPLAT11]]
-; CHECK-NEXT: [[INDUCTION12:%.*]] = add <4 x i8> [[DOTSPLAT9]], [[TMP8]]
+; CHECK-NEXT: [[INDUCTION12:%.*]] = add <4 x i8> [[DOTSPLAT11]], [[TMP14]]
; CHECK-NEXT: [[TMP9:%.*]] = mul i8 [[INDUCTION_IV]], 4
; CHECK-NEXT: [[DOTSPLATINSERT13:%.*]] = insertelement <4 x i8> poison, i8 [[TMP9]], i64 0
; CHECK-NEXT: [[DOTSPLAT14:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT13]], <4 x i8> poison, <4 x i32> zeroinitializer
@@ -593,12 +593,12 @@ define void @induction_resume_value_requires_non_trivial_scev_expansion(ptr %dst
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 84, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[IND_END4:%.*]] = mul i8 84, [[INDUCTION_IV]]
-; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <2 x i8> poison, i8 [[BC_RESUME_VAL]], i64 0
+; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <2 x i8> poison, i8 [[INDUCTION_IV]], i64 0
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLAT9:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT8]], <2 x i8> poison, <2 x i32> zeroinitializer
-; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLATINSERT10:%.*]] = insertelement <2 x i8> poison, i8 [[INDUCTION_IV]], i64 0
+; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP14:%.*]] = mul <2 x i8> <i8 0, i8 1>, [[DOTSPLAT9]]
+; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLATINSERT10:%.*]] = insertelement <2 x i8> poison, i8 [[BC_RESUME_VAL]], i64 0
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLAT11:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT10]], <2 x i8> poison, <2 x i32> zeroinitializer
-; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP8:%.*]] = mul <2 x i8> <i8 0, i8 1>, [[DOTSPLAT11]]
-; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDUCTION12:%.*]] = add <2 x i8> [[DOTSPLAT9]], [[TMP8]]
+; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDUCTION12:%.*]] = add <2 x i8> [[DOTSPLAT11]], [[TMP14]]
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP9:%.*]] = mul i8 [[INDUCTION_IV]], 2
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLATINSERT13:%.*]] = insertelement <2 x i8> poison, i8 [[TMP9]], i64 0
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLAT14:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT13]], <2 x i8> poison, <2 x i32> zeroinitializer
>From e7f5a4eb653687585c765e9e8488313dfd0d548c Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Fri, 13 Dec 2024 01:43:23 +0800
Subject: [PATCH 03/16] Only truncate the IV step, don't widen it
---
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 6 +-----
1 file changed, 1 insertion(+), 5 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 4945288e058fb..3438b547a7a15 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2106,15 +2106,11 @@ expandVPWidenIntOrFpInduction(VPWidenIntOrFpInductionRecipe *WidenIVR,
Prev = WidenIVR->getLastUnrolledPartOperand();
assert(Inc && Prev);
} else {
- unsigned VFTySize = TypeInfo.inferScalarType(VF)->getScalarSizeInBits();
-
// Multiply the vectorization factor by the step using integer or
// floating-point arithmetic as appropriate.
if (Ty->isFloatingPointTy())
VF = Builder.createScalarCast(Instruction::CastOps::UIToFP, VF, Ty, DL);
- else if (VFTySize < Ty->getScalarSizeInBits())
- VF = Builder.createScalarCast(Instruction::CastOps::ZExt, VF, Ty, DL);
- else if (VFTySize > Ty->getScalarSizeInBits())
+ else if (Ty != TypeInfo.inferScalarType(VF))
VF = Builder.createScalarCast(Instruction::CastOps::Trunc, VF, Ty, DL);
Inc = Builder.createSplat(Builder.createNaryOp(MulOp, {Step, VF}, FMFs));
>From 31bdc7c794ebb8d8a0d895cc782b7338b7ac080a Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Mon, 16 Dec 2024 12:23:19 +0800
Subject: [PATCH 04/16] Use recipe debugloc after
734a204fbd4b790048c57f79351ad8beeb1000ce
---
llvm/lib/Transforms/Vectorize/VPlan.h | 7 ++++---
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 2 +-
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 7 ++++---
3 files changed, 9 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index bd860c177e6ec..52459dcac2943 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1901,8 +1901,8 @@ class VPWidenIntOrFpInductionPHIRecipe : public VPHeaderPHIRecipe {
Instruction *IV;
public:
- VPWidenIntOrFpInductionPHIRecipe(Instruction *IV, VPValue *Start)
- : VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionPHISC, IV, Start),
+ VPWidenIntOrFpInductionPHIRecipe(Instruction *IV, VPValue *Start, DebugLoc DL)
+ : VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionPHISC, IV, Start, DL),
IV(IV) {
assert((isa<PHINode>(IV) || isa<TruncInst>(IV)) &&
"Expected either an induction phi-node or a truncate of it!");
@@ -1911,7 +1911,8 @@ class VPWidenIntOrFpInductionPHIRecipe : public VPHeaderPHIRecipe {
~VPWidenIntOrFpInductionPHIRecipe() override = default;
VPWidenIntOrFpInductionPHIRecipe *clone() override {
- auto *R = new VPWidenIntOrFpInductionPHIRecipe(IV, getOperand(0));
+ auto *R =
+ new VPWidenIntOrFpInductionPHIRecipe(IV, getOperand(0), getDebugLoc());
R->addOperand(getBackedgeValue());
return R;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 1056e3d68f191..31607c11df988 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -1709,7 +1709,7 @@ void VPWidenIntOrFpInductionPHIRecipe::execute(VPTransformState &State) {
Value *Start = State.get(getOperand(0));
PHINode *Phi = State.Builder.CreatePHI(Start->getType(), 2, "vec.ind");
Phi->addIncoming(Start, VectorPH);
- Phi->setDebugLoc(IV->getDebugLoc());
+ Phi->setDebugLoc(getDebugLoc());
State.set(this, Phi);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 3438b547a7a15..23970b4b8764b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2094,7 +2094,8 @@ expandVPWidenIntOrFpInduction(VPWidenIntOrFpInductionRecipe *WidenIVR,
{}, "induction");
// Create the widened phi of the vector IV.
- auto *WidePHI = new VPWidenIntOrFpInductionPHIRecipe(IV, Init);
+ auto *WidePHI =
+ new VPWidenIntOrFpInductionPHIRecipe(IV, Init, WidenIVR->getDebugLoc());
WidePHI->insertBefore(WidenIVR);
// Create the backedge value for the vector IV.
@@ -2119,8 +2120,8 @@ expandVPWidenIntOrFpInduction(VPWidenIntOrFpInductionRecipe *WidenIVR,
VPBasicBlock *ExitingBB = Plan->getVectorLoopRegion()->getExitingBasicBlock();
Builder.setInsertPoint(ExitingBB, ExitingBB->getTerminator()->getIterator());
- auto *Next = Builder.createNaryOp(AddOp, {Prev, Inc}, FMFs, IV->getDebugLoc(),
- "vec.ind.next");
+ auto *Next = Builder.createNaryOp(AddOp, {Prev, Inc}, FMFs,
+ WidenIVR->getDebugLoc(), "vec.ind.next");
WidePHI->addOperand(Next);
>From 42dc912da4a94e9c27b16b7f31879b49ac1080c0 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Mon, 16 Dec 2024 18:01:42 +0800
Subject: [PATCH 05/16] Mark splat and step vector recipes as free from side
effects/reads/writes
---
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 31607c11df988..394d6ee8a5d14 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -73,6 +73,8 @@ bool VPRecipeBase::mayWriteToMemory() const {
case VPBranchOnMaskSC:
case VPScalarIVStepsSC:
case VPPredInstPHISC:
+ case VPSplatSC:
+ case VPStepVectorSC:
return false;
case VPBlendSC:
case VPReductionEVLSC:
@@ -120,6 +122,8 @@ bool VPRecipeBase::mayReadFromMemory() const {
case VPScalarIVStepsSC:
case VPWidenStoreEVLSC:
case VPWidenStoreSC:
+ case VPSplatSC:
+ case VPStepVectorSC:
return false;
case VPBlendSC:
case VPReductionEVLSC:
@@ -150,6 +154,8 @@ bool VPRecipeBase::mayHaveSideEffects() const {
case VPPredInstPHISC:
case VPScalarCastSC:
case VPReverseVectorPointerSC:
+ case VPSplatSC:
+ case VPStepVectorSC:
return false;
case VPInstructionSC:
return mayWriteToMemory();
>From 6839647322dd8e3d8b2955e8f9c563aa23dfc1e1 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Tue, 17 Dec 2024 12:16:29 +0800
Subject: [PATCH 06/16] Fix crash when pointer IV comes after intorfp IV
---
.../Transforms/Vectorize/VPlanTransforms.cpp | 14 ++
.../LoopVectorize/pointer-induction.ll | 139 ++++++++++++++++++
2 files changed, 153 insertions(+)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 23970b4b8764b..64e2d7b43463a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2135,6 +2135,20 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan,
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_deep(Plan.getEntry()))) {
+
+ // Move VPWidenPointerInductionRecipes to the back of the phis
+ // since it may insert non-phi instructions in place, which will
+ // interfere with other header phis if they come after.
+ //
+ // TODO: Expand out VPWidenPointerInductionRecipe into multiple
+ // recipes here and remove this
+ SmallVector<VPRecipeBase *> PointerIVs;
+ for (VPRecipeBase &R : VPBB->phis())
+ if (isa<VPWidenPointerInductionRecipe>(R))
+ PointerIVs.push_back(&R);
+ for (VPRecipeBase *R : PointerIVs)
+ R->moveBefore(*VPBB, VPBB->getFirstNonPhi());
+
for (VPRecipeBase &R : make_early_inc_range(VPBB->phis())) {
if (auto *WidenIVR = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R)) {
expandVPWidenIntOrFpInduction(WidenIVR, TypeInfo);
diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
index 531164a2c5dd0..6a297fb21a58b 100644
--- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
@@ -284,3 +284,142 @@ for.cond: ; preds = %for.body, %entry
for.end: ; preds = %for.cond
ret void
}
+
+; Test that WidenPointerInductionRecipes are ordered after the other header phis
+define void @outside_lattice(ptr noalias %p, ptr noalias %q, i32 %n) {
+; DEFAULT-LABEL: @outside_lattice(
+; DEFAULT-NEXT: entry:
+; DEFAULT-NEXT: [[TMP0:%.*]] = zext i32 [[N:%.*]] to i64
+; DEFAULT-NEXT: [[UMAX1:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP0]], i64 1)
+; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX1]], 4
+; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
+; DEFAULT: vector.scevcheck:
+; DEFAULT-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N]], i32 1)
+; DEFAULT-NEXT: [[TMP1:%.*]] = add i32 [[UMAX]], -1
+; DEFAULT-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0
+; DEFAULT-NEXT: br i1 [[TMP2]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; DEFAULT: vector.ph:
+; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX1]], 4
+; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX1]], [[N_MOD_VF]]
+; DEFAULT-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], 4
+; DEFAULT-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr null, i64 [[TMP3]]
+; DEFAULT-NEXT: [[IND_END2:%.*]] = trunc i64 [[N_VEC]] to i32
+; DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]]
+; DEFAULT: vector.body:
+; DEFAULT-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ null, [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
+; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; DEFAULT-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; DEFAULT-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> <i64 0, i64 4, i64 8, i64 12>
+; DEFAULT-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32
+; DEFAULT-NEXT: [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], 0
+; DEFAULT-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[P:%.*]], i32 [[TMP4]]
+; DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i32 0
+; DEFAULT-NEXT: store <4 x ptr> [[VECTOR_GEP]], ptr [[TMP6]], align 8
+; DEFAULT-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[Q:%.*]], i32 [[TMP4]]
+; DEFAULT-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0
+; DEFAULT-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP8]], align 4
+; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; DEFAULT-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 16
+; DEFAULT-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; DEFAULT-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; DEFAULT: middle.block:
+; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX1]], [[N_VEC]]
+; DEFAULT-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
+; DEFAULT: scalar.ph:
+; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ null, [[VECTOR_SCEVCHECK]] ], [ null, [[ENTRY:%.*]] ]
+; DEFAULT-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ]
+; DEFAULT-NEXT: br label [[FOR_BODY:%.*]]
+; DEFAULT: for.body:
+; DEFAULT-NEXT: [[IV_PTR:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_PTR_NEXT:%.*]], [[FOR_BODY]] ]
+; DEFAULT-NEXT: [[IV_INT:%.*]] = phi i32 [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ], [ [[IV_INT_NEXT:%.*]], [[FOR_BODY]] ]
+; DEFAULT-NEXT: [[P_GEP:%.*]] = getelementptr inbounds ptr, ptr [[P]], i32 [[IV_INT]]
+; DEFAULT-NEXT: store ptr [[IV_PTR]], ptr [[P_GEP]], align 8
+; DEFAULT-NEXT: [[Q_GEP:%.*]] = getelementptr inbounds i32, ptr [[Q]], i32 [[IV_INT]]
+; DEFAULT-NEXT: store i32 [[IV_INT]], ptr [[Q_GEP]], align 4
+; DEFAULT-NEXT: [[IV_INT_NEXT]] = add i32 [[IV_INT]], 1
+; DEFAULT-NEXT: [[IV_PTR_NEXT]] = getelementptr inbounds i32, ptr [[IV_PTR]], i32 1
+; DEFAULT-NEXT: [[DONE:%.*]] = icmp ult i32 [[IV_INT_NEXT]], [[N]]
+; DEFAULT-NEXT: br i1 [[DONE]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP7:![0-9]+]]
+; DEFAULT: for.end:
+; DEFAULT-NEXT: ret void
+;
+; STRIDED-LABEL: @outside_lattice(
+; STRIDED-NEXT: entry:
+; STRIDED-NEXT: [[TMP0:%.*]] = zext i32 [[N:%.*]] to i64
+; STRIDED-NEXT: [[UMAX1:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP0]], i64 1)
+; STRIDED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX1]], 4
+; STRIDED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
+; STRIDED: vector.scevcheck:
+; STRIDED-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N]], i32 1)
+; STRIDED-NEXT: [[TMP1:%.*]] = add i32 [[UMAX]], -1
+; STRIDED-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0
+; STRIDED-NEXT: br i1 [[TMP2]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; STRIDED: vector.ph:
+; STRIDED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX1]], 4
+; STRIDED-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX1]], [[N_MOD_VF]]
+; STRIDED-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], 4
+; STRIDED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr null, i64 [[TMP3]]
+; STRIDED-NEXT: [[IND_END2:%.*]] = trunc i64 [[N_VEC]] to i32
+; STRIDED-NEXT: br label [[VECTOR_BODY:%.*]]
+; STRIDED: vector.body:
+; STRIDED-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ null, [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
+; STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; STRIDED-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; STRIDED-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> <i64 0, i64 4, i64 8, i64 12>
+; STRIDED-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32
+; STRIDED-NEXT: [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], 0
+; STRIDED-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[P:%.*]], i32 [[TMP4]]
+; STRIDED-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i32 0
+; STRIDED-NEXT: store <4 x ptr> [[VECTOR_GEP]], ptr [[TMP6]], align 8
+; STRIDED-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[Q:%.*]], i32 [[TMP4]]
+; STRIDED-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0
+; STRIDED-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP8]], align 4
+; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; STRIDED-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; STRIDED-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 16
+; STRIDED-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; STRIDED-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; STRIDED: middle.block:
+; STRIDED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX1]], [[N_VEC]]
+; STRIDED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
+; STRIDED: scalar.ph:
+; STRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ null, [[VECTOR_SCEVCHECK]] ], [ null, [[ENTRY:%.*]] ]
+; STRIDED-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ]
+; STRIDED-NEXT: br label [[FOR_BODY:%.*]]
+; STRIDED: for.body:
+; STRIDED-NEXT: [[IV_PTR:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_PTR_NEXT:%.*]], [[FOR_BODY]] ]
+; STRIDED-NEXT: [[IV_INT:%.*]] = phi i32 [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ], [ [[IV_INT_NEXT:%.*]], [[FOR_BODY]] ]
+; STRIDED-NEXT: [[P_GEP:%.*]] = getelementptr inbounds ptr, ptr [[P]], i32 [[IV_INT]]
+; STRIDED-NEXT: store ptr [[IV_PTR]], ptr [[P_GEP]], align 8
+; STRIDED-NEXT: [[Q_GEP:%.*]] = getelementptr inbounds i32, ptr [[Q]], i32 [[IV_INT]]
+; STRIDED-NEXT: store i32 [[IV_INT]], ptr [[Q_GEP]], align 4
+; STRIDED-NEXT: [[IV_INT_NEXT]] = add i32 [[IV_INT]], 1
+; STRIDED-NEXT: [[IV_PTR_NEXT]] = getelementptr inbounds i32, ptr [[IV_PTR]], i32 1
+; STRIDED-NEXT: [[DONE:%.*]] = icmp ult i32 [[IV_INT_NEXT]], [[N]]
+; STRIDED-NEXT: br i1 [[DONE]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP9:![0-9]+]]
+; STRIDED: for.end:
+; STRIDED-NEXT: ret void
+;
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %iv.ptr = phi ptr [ null, %entry ], [ %iv.ptr.next, %for.body ]
+ %iv.int = phi i32 [ 0, %entry ], [ %iv.int.next, %for.body ]
+
+ %p.gep = getelementptr inbounds ptr, ptr %p, i32 %iv.int
+ store ptr %iv.ptr, ptr %p.gep
+
+ %q.gep = getelementptr inbounds i32, ptr %q, i32 %iv.int
+ store i32 %iv.int, ptr %q.gep
+
+ %iv.int.next = add i32 %iv.int, 1
+ %iv.ptr.next = getelementptr inbounds i32, ptr %iv.ptr, i32 1
+
+ %done = icmp ult i32 %iv.int.next, %n
+ br i1 %done, label %for.body, label %for.end
+
+for.end: ; preds = %for.body
+ ret void
+}
>From 6c659cacc793b0cb1cc57b56c1de4b5d16e8c66b Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Thu, 5 Dec 2024 00:09:46 +0800
Subject: [PATCH 07/16] Remove VPWidenIntOrFpInductionPHIRecipe, use
VPWidenPHIRecipe instead
---
.../Transforms/Vectorize/LoopVectorize.cpp | 3 +-
llvm/lib/Transforms/Vectorize/VPlan.h | 44 ++++---------------
.../Transforms/Vectorize/VPlanAnalysis.cpp | 15 +++----
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 28 +-----------
.../Transforms/Vectorize/VPlanTransforms.cpp | 3 +-
llvm/lib/Transforms/Vectorize/VPlanValue.h | 1 -
.../LoopVectorize/pointer-induction.ll | 8 ++--
7 files changed, 24 insertions(+), 78 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index b8b958ddaa9ae..3d69226a4c453 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2932,8 +2932,7 @@ LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI,
void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
// Fix widened non-induction PHIs by setting up the PHI operands.
- if (EnableVPlanNativePath)
- fixNonInductionPHIs(State);
+ fixNonInductionPHIs(State);
// After vectorization, the exit blocks of the original loop will have
// additional predecessors. Invalidate SCEVs for the exit phis in case SE
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 52459dcac2943..f8420e0a87948 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1895,39 +1895,6 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe {
}
};
-/// A recipe to generate the PHI of a widened IV, expanded from
-/// VPWidenIntOrFpInductionRecipe.
-class VPWidenIntOrFpInductionPHIRecipe : public VPHeaderPHIRecipe {
- Instruction *IV;
-
-public:
- VPWidenIntOrFpInductionPHIRecipe(Instruction *IV, VPValue *Start, DebugLoc DL)
- : VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionPHISC, IV, Start, DL),
- IV(IV) {
- assert((isa<PHINode>(IV) || isa<TruncInst>(IV)) &&
- "Expected either an induction phi-node or a truncate of it!");
- }
-
- ~VPWidenIntOrFpInductionPHIRecipe() override = default;
-
- VPWidenIntOrFpInductionPHIRecipe *clone() override {
- auto *R =
- new VPWidenIntOrFpInductionPHIRecipe(IV, getOperand(0), getDebugLoc());
- R->addOperand(getBackedgeValue());
- return R;
- }
-
- VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionPHISC)
-
- void execute(VPTransformState &State) override;
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- /// Print the recipe.
- void print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const override;
-#endif
-};
-
class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe,
public VPUnrollPartAccessor<3> {
bool IsScalarAfterVectorization;
@@ -2017,11 +1984,16 @@ class VPScalarPHIRecipe : public VPHeaderPHIRecipe {
/// exactly 2 incoming values, the first from the predecessor of the region and
/// the second from the exiting block of the region.
class VPWidenPHIRecipe : public VPSingleDefRecipe {
+ /// Name to use for the generated IR instruction for the widened phi.
+ std::string Name;
+
public:
/// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start and
- /// debug location \p DL.
- VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr, DebugLoc DL = {})
- : VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef<VPValue *>(), Phi, DL) {
+ /// debug location \p DL..
+ VPWidenPHIRecipe(Instruction *Phi, VPValue *Start = nullptr, DebugLoc DL = {},
+ const Twine &Name = "vec.phi")
+ : VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef<VPValue *>(), Phi, DL),
+ Name(Name.str()) {
if (Start)
addOperand(Start);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index 6108f2f4e4513..8414b36f3ae22 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -236,14 +236,13 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
.Case<VPActiveLaneMaskPHIRecipe, VPCanonicalIVPHIRecipe,
VPFirstOrderRecurrencePHIRecipe, VPReductionPHIRecipe,
VPWidenPointerInductionRecipe, VPEVLBasedIVPHIRecipe,
- VPScalarPHIRecipe, VPWidenIntOrFpInductionPHIRecipe>(
- [this](const auto *R) {
- // Handle header phi recipes, except VPWidenIntOrFpInduction
- // which needs special handling due it being possibly truncated.
- // TODO: consider inferring/caching type of siblings, e.g.,
- // backedge value, here and in cases below.
- return inferScalarType(R->getStartValue());
- })
+ VPScalarPHIRecipe>([this](const auto *R) {
+ // Handle header phi recipes, except VPWidenIntOrFpInduction
+ // which needs special handling due it being possibly truncated.
+ // TODO: consider inferring/caching type of siblings, e.g.,
+ // backedge value, here and in cases below.
+ return inferScalarType(R->getStartValue());
+ })
.Case<VPWidenIntOrFpInductionRecipe, VPDerivedIVRecipe,
VPStepVectorRecipe>(
[](const auto *R) { return R->getScalarType(); })
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 394d6ee8a5d14..c2bb975d538cf 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -1709,27 +1709,6 @@ static Constant *getSignedIntOrFpConstant(Type *Ty, int64_t C) {
: ConstantFP::get(Ty, C);
}
-void VPWidenIntOrFpInductionPHIRecipe::execute(VPTransformState &State) {
- BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
-
- Value *Start = State.get(getOperand(0));
- PHINode *Phi = State.Builder.CreatePHI(Start->getType(), 2, "vec.ind");
- Phi->addIncoming(Start, VectorPH);
- Phi->setDebugLoc(getDebugLoc());
- State.set(this, Phi);
-}
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void VPWidenIntOrFpInductionPHIRecipe::print(raw_ostream &O,
- const Twine &Indent,
- VPSlotTracker &SlotTracker) const {
- O << Indent;
- printAsOperand(O, SlotTracker);
- O << " = WIDEN-INDUCTION-PHI ";
- printOperands(O, SlotTracker);
-}
-#endif
-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
@@ -3507,13 +3486,10 @@ VPBasicBlock *VPWidenPHIRecipe::getIncomingBlock(unsigned I) {
}
void VPWidenPHIRecipe::execute(VPTransformState &State) {
- assert(EnableVPlanNativePath &&
- "Non-native vplans are not expected to have VPWidenPHIRecipes.");
-
State.setDebugLocFrom(getDebugLoc());
Value *Op0 = State.get(getOperand(0));
Type *VecTy = Op0->getType();
- Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");
+ Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, Name);
State.set(this, VecPhi);
}
@@ -3522,7 +3498,7 @@ void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "WIDEN-PHI ";
- auto *OriginalPhi = cast<PHINode>(getUnderlyingValue());
+ auto *OriginalPhi = cast<Instruction>(getUnderlyingValue());
// Unless all incoming values are modeled in VPlan print the original PHI
// directly.
// TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 64e2d7b43463a..6e3cce6cac627 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2095,7 +2095,8 @@ expandVPWidenIntOrFpInduction(VPWidenIntOrFpInductionRecipe *WidenIVR,
// Create the widened phi of the vector IV.
auto *WidePHI =
- new VPWidenIntOrFpInductionPHIRecipe(IV, Init, WidenIVR->getDebugLoc());
+ new VPWidenPHIRecipe(IV, nullptr, WidenIVR->getDebugLoc(), "vec.ind");
+ WidePHI->addOperand(Init);
WidePHI->insertBefore(WidenIVR);
// Create the backedge value for the vector IV.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 622ebf211e8e6..cb0ae78c0877e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -366,7 +366,6 @@ class VPDef {
VPEVLBasedIVPHISC,
VPFirstOrderRecurrencePHISC,
VPWidenIntOrFpInductionSC,
- VPWidenIntOrFpInductionPHISC,
VPWidenPointerInductionSC,
VPScalarPHISC,
VPReductionPHISC,
diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
index 6a297fb21a58b..eab3a1a904371 100644
--- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
@@ -327,8 +327,8 @@ define void @outside_lattice(ptr noalias %p, ptr noalias %q, i32 %n) {
; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX1]], [[N_VEC]]
; DEFAULT-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; DEFAULT: scalar.ph:
-; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ null, [[VECTOR_SCEVCHECK]] ], [ null, [[ENTRY:%.*]] ]
-; DEFAULT-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ]
+; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ null, [[ENTRY:%.*]] ], [ null, [[VECTOR_SCEVCHECK]] ]
+; DEFAULT-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
; DEFAULT-NEXT: br label [[FOR_BODY:%.*]]
; DEFAULT: for.body:
; DEFAULT-NEXT: [[IV_PTR:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_PTR_NEXT:%.*]], [[FOR_BODY]] ]
@@ -384,8 +384,8 @@ define void @outside_lattice(ptr noalias %p, ptr noalias %q, i32 %n) {
; STRIDED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX1]], [[N_VEC]]
; STRIDED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; STRIDED: scalar.ph:
-; STRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ null, [[VECTOR_SCEVCHECK]] ], [ null, [[ENTRY:%.*]] ]
-; STRIDED-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ]
+; STRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ null, [[ENTRY:%.*]] ], [ null, [[VECTOR_SCEVCHECK]] ]
+; STRIDED-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
; STRIDED-NEXT: br label [[FOR_BODY:%.*]]
; STRIDED: for.body:
; STRIDED-NEXT: [[IV_PTR:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_PTR_NEXT:%.*]], [[FOR_BODY]] ]
>From 1cd6c81f78a7171156026167de34b62ea287988b Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Mon, 6 Jan 2025 19:17:08 +0800
Subject: [PATCH 08/16] Use dyn_cast
---
llvm/lib/Transforms/Vectorize/VPlan.cpp | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index d2c732b7230e3..5931ca2df9efe 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -1050,8 +1050,7 @@ void VPlan::execute(VPTransformState *State) {
if (isa<VPWidenPHIRecipe>(&R))
continue;
- if (isa<VPWidenPointerInductionRecipe>(&R)) {
- auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
+ if (auto *WidenPhi = dyn_cast<VPWidenPointerInductionRecipe>(&R)) {
assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) &&
"recipe generating only scalars should have been replaced");
auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi));
>From dcc6640e92611f48187f8bdc798ea205a0601d27 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Mon, 6 Jan 2025 23:42:01 +0800
Subject: [PATCH 09/16] Remove VPSplatRecipe, replace with VPInstruction::Splat
---
.../Vectorize/LoopVectorizationPlanner.h | 4 +--
llvm/lib/Transforms/Vectorize/VPlan.h | 35 ++-----------------
.../Transforms/Vectorize/VPlanAnalysis.cpp | 2 ++
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 28 +++++----------
llvm/lib/Transforms/Vectorize/VPlanValue.h | 1 -
5 files changed, 15 insertions(+), 55 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 7f1ec987ad038..ae3ca6b0df914 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -265,8 +265,8 @@ class VPBuilder {
FPBinOp ? FPBinOp->getFastMathFlags() : FastMathFlags()));
}
- VPSplatRecipe *createSplat(VPValue *Val) {
- return tryInsertInstruction(new VPSplatRecipe(Val));
+ VPInstruction *createSplat(VPValue *Val) {
+ return tryInsertInstruction(new VPInstruction(VPInstruction::Splat, {Val}));
}
VPStepVectorRecipe *createStepVector(Type *Ty) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index f8420e0a87948..c6390a69edd1f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -880,6 +880,8 @@ class VPInstruction : public VPRecipeWithIRFlags,
// Extracts the first active lane of a vector, where the first operand is
// the predicate, and the second operand is the vector to extract.
ExtractFirstActive,
+ // Splats a scalar value across all lanes.
+ Splat,
};
private:
@@ -1236,39 +1238,6 @@ class VPScalarCastRecipe : public VPSingleDefRecipe {
}
};
-/// A for splatting a scalar value to a vector.
-class VPSplatRecipe : public VPSingleDefRecipe {
-public:
- VPSplatRecipe(VPValue *Op) : VPSingleDefRecipe(VPDef::VPSplatSC, {Op}) {}
-
- ~VPSplatRecipe() override = default;
-
- VPSplatRecipe *clone() override { return new VPSplatRecipe(getOperand(0)); }
-
- VP_CLASSOF_IMPL(VPDef::VPSplatSC)
-
- void execute(VPTransformState &State) override;
-
- /// Return the cost of this VPSplatRecipe.
- InstructionCost computeCost(ElementCount VF,
- VPCostContext &Ctx) const override {
- // TODO: Compute accurate cost after retiring the legacy cost model.
- return 0;
- }
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- void print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const override;
-#endif
-
- /// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
- assert(is_contained(operands(), Op) &&
- "Op must be an operand of the recipe");
- return true;
- }
-};
-
/// A recipe for generating a step vector.
class VPStepVectorRecipe : public VPSingleDefRecipe {
/// Scalar return type of the intrinsic.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index 8414b36f3ae22..ec97774ce37f2 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -96,6 +96,8 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
case VPInstruction::BranchOnCond:
case VPInstruction::BranchOnCount:
return Type::getVoidTy(Ctx);
+ case VPInstruction::Splat:
+ return inferScalarType(R->getOperand(0));
default:
break;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index c2bb975d538cf..8ddd0e048e3ed 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -73,7 +73,6 @@ bool VPRecipeBase::mayWriteToMemory() const {
case VPBranchOnMaskSC:
case VPScalarIVStepsSC:
case VPPredInstPHISC:
- case VPSplatSC:
case VPStepVectorSC:
return false;
case VPBlendSC:
@@ -122,7 +121,6 @@ bool VPRecipeBase::mayReadFromMemory() const {
case VPScalarIVStepsSC:
case VPWidenStoreEVLSC:
case VPWidenStoreSC:
- case VPSplatSC:
case VPStepVectorSC:
return false;
case VPBlendSC:
@@ -154,7 +152,6 @@ bool VPRecipeBase::mayHaveSideEffects() const {
case VPPredInstPHISC:
case VPScalarCastSC:
case VPReverseVectorPointerSC:
- case VPSplatSC:
case VPStepVectorSC:
return false;
case VPInstructionSC:
@@ -715,6 +712,10 @@ Value *VPInstruction::generate(VPTransformState &State) {
Builder.getInt64Ty(), Mask, true, "first.active.lane");
return Builder.CreateExtractElement(Vec, Ctz, "early.exit.value");
}
+ case VPInstruction::Splat:
+ return State.Builder.CreateVectorSplat(State.VF,
+ State.get(getOperand(0), true));
+
default:
llvm_unreachable("Unsupported opcode for instruction");
}
@@ -826,6 +827,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
case VPInstruction::LogicalAnd:
case VPInstruction::Not:
case VPInstruction::PtrAdd:
+ case VPInstruction::Splat:
return false;
default:
return true;
@@ -853,6 +855,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
case VPInstruction::BranchOnCount:
case VPInstruction::BranchOnCond:
case VPInstruction::ResumePhi:
+ case VPInstruction::Splat:
return true;
};
llvm_unreachable("switch should return");
@@ -944,6 +947,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::ExtractFirstActive:
O << "extract-first-active";
break;
+ case VPInstruction::Splat:
+ O << "splat";
+ break;
default:
O << Instruction::getOpcodeName(getOpcode());
}
@@ -2319,22 +2325,6 @@ void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent,
}
#endif
-void VPSplatRecipe::execute(VPTransformState &State) {
- Value *Splat =
- State.Builder.CreateVectorSplat(State.VF, State.get(getOperand(0), true));
- State.set(this, Splat);
-}
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void VPSplatRecipe::print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const {
- O << Indent;
- printAsOperand(O, SlotTracker);
- O << " = SPLAT ";
- printOperands(O, SlotTracker);
-}
-#endif
-
void VPStepVectorRecipe::execute(VPTransformState &State) {
VectorType *Ty = VectorType::get(ScalarTy, State.VF);
State.set(this, State.Builder.CreateStepVector(Ty));
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index cb0ae78c0877e..61422bfe6c0b0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -354,7 +354,6 @@ class VPDef {
VPWidenSelectSC,
VPBlendSC,
VPHistogramSC,
- VPSplatSC,
VPStepVectorSC,
// START: Phi-like recipes. Need to be kept together.
VPWidenPHISC,
>From ca81015f3f2c0a561f585b2da6ee50e37379fb5d Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Mon, 6 Jan 2025 23:42:26 +0800
Subject: [PATCH 10/16] Update comment
---
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 6e3cce6cac627..8861f499d7e2f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2038,7 +2038,7 @@ void VPlanTransforms::createInterleaveGroups(
///
/// <x1> vector loop: {
/// vector.body:
-/// ir<%i> = WIDEN-INDUCTION-PHI vp<%induction>, vp<%vec.ind.next>
+/// ir<%i> = WIDEN-PHI vp<%induction>, vp<%vec.ind.next>
/// ...
/// vp<%vec.ind.next> = add ir<%i>, vp<%inc>
/// EMIT branch-on-count ...
>From 8b77af442a5ccd9b1e1f5e98d2b51c7d0cf5a12c Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Wed, 8 Jan 2025 15:49:24 +0800
Subject: [PATCH 11/16] Pass Name when generating VPInstruction::Splat
---
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 8ddd0e048e3ed..3fc424a111dd9 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -713,8 +713,8 @@ Value *VPInstruction::generate(VPTransformState &State) {
return Builder.CreateExtractElement(Vec, Ctz, "early.exit.value");
}
case VPInstruction::Splat:
- return State.Builder.CreateVectorSplat(State.VF,
- State.get(getOperand(0), true));
+ return State.Builder.CreateVectorSplat(
+ State.VF, State.get(getOperand(0), true), Name);
default:
llvm_unreachable("Unsupported opcode for instruction");
>From 5ad4cc1a2b8f5ca45b7ba265cb1da1b728d2fe4a Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Mon, 3 Feb 2025 17:55:55 +0800
Subject: [PATCH 12/16] Allow splatting VPScalarCastRecipe directly, remove
VPInstruction::splat
---
.../Vectorize/LoopVectorizationPlanner.h | 4 -
llvm/lib/Transforms/Vectorize/VPlan.cpp | 20 +-
llvm/lib/Transforms/Vectorize/VPlan.h | 2 -
.../Transforms/Vectorize/VPlanAnalysis.cpp | 2 -
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 10 -
.../Transforms/Vectorize/VPlanTransforms.cpp | 7 +-
.../AArch64/clamped-trip-count.ll | 12 +-
.../AArch64/divs-with-scalable-vfs.ll | 12 +-
.../AArch64/outer_loop_prefer_scalable.ll | 8 +-
.../AArch64/scalable-avoid-scalarization.ll | 6 +-
.../AArch64/simple_early_exit.ll | 12 +-
.../AArch64/sve-interleaved-accesses.ll | 47 ++--
.../LoopVectorize/AArch64/sve-tail-folding.ll | 6 +-
.../RISCV/blocks-with-dead-instructions.ll | 42 ++--
.../LoopVectorize/RISCV/dead-ops-cost.ll | 24 +-
.../LoopVectorize/RISCV/induction-costs.ll | 6 +-
.../LoopVectorize/RISCV/mask-index-type.ll | 6 +-
.../RISCV/masked_gather_scatter.ll | 12 +-
...ruction-or-drop-poison-generating-flags.ll | 6 +-
.../LoopVectorize/RISCV/strided-accesses.ll | 18 +-
.../LoopVectorize/RISCV/uniform-load-store.ll | 30 +--
...rize-force-tail-with-evl-cond-reduction.ll | 24 +-
.../X86/epilog-vectorization-inductions.ll | 8 +-
.../LoopVectorize/X86/induction-costs.ll | 12 +-
.../LoopVectorize/X86/induction-step.ll | 8 +-
.../LoopVectorize/X86/scatter_crash.ll | 16 +-
.../LoopVectorize/create-induction-resume.ll | 4 +-
.../LoopVectorize/float-induction.ll | 36 +--
.../LoopVectorize/induction-step.ll | 42 ++--
.../Transforms/LoopVectorize/induction.ll | 238 +++++++++---------
.../LoopVectorize/iv_outside_user.ll | 24 +-
.../optimal-epilog-vectorization.ll | 16 +-
.../LoopVectorize/outer_loop_scalable.ll | 6 +-
.../pr30654-phiscev-sext-trunc.ll | 12 +-
.../LoopVectorize/scalable-inductions.ll | 12 +-
35 files changed, 327 insertions(+), 423 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index ae3ca6b0df914..f7c068eaa4d9d 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -265,10 +265,6 @@ class VPBuilder {
FPBinOp ? FPBinOp->getFastMathFlags() : FastMathFlags()));
}
- VPInstruction *createSplat(VPValue *Val) {
- return tryInsertInstruction(new VPInstruction(VPInstruction::Splat, {Val}));
- }
-
VPStepVectorRecipe *createStepVector(Type *Ty) {
return tryInsertInstruction(new VPStepVectorRecipe(Ty));
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 5931ca2df9efe..59bd90b5c2f5f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -313,15 +313,17 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
LastLane = 0;
}
- auto *LastInst = cast<Instruction>(get(Def, LastLane));
- // Set the insert point after the last scalarized instruction or after the
- // last PHI, if LastInst is a PHI. This ensures the insertelement sequence
- // will directly follow the scalar definitions.
auto OldIP = Builder.saveIP();
- auto NewIP = isa<PHINode>(LastInst)
- ? LastInst->getParent()->getFirstNonPHIIt()
- : std::next(BasicBlock::iterator(LastInst));
- Builder.SetInsertPoint(&*NewIP);
+ auto *LastVal = get(Def, LastLane);
+ if (auto *LastInst = dyn_cast<Instruction>(LastVal)) {
+ // Set the insert point after the last scalarized instruction or after the
+ // last PHI, if LastInst is a PHI. This ensures the insertelement sequence
+ // will directly follow the scalar definitions.
+ auto NewIP = isa<PHINode>(LastInst)
+ ? LastInst->getParent()->getFirstNonPHIIt()
+ : std::next(BasicBlock::iterator(LastInst));
+ Builder.SetInsertPoint(&*NewIP);
+ }
// However, if we are vectorizing, we need to construct the vector values.
// If the value is known to be uniform after vectorization, we can just
@@ -336,7 +338,7 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
} else {
// Initialize packing with insertelements to start from undef.
assert(!VF.isScalable() && "VF is assumed to be non scalable.");
- Value *Undef = PoisonValue::get(toVectorizedTy(LastInst->getType(), VF));
+ Value *Undef = PoisonValue::get(toVectorizedTy(LastVal->getType(), VF));
set(Def, Undef);
for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane)
packScalarIntoVectorizedValue(Def, Lane);
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index c6390a69edd1f..e1e86fe980cc7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -880,8 +880,6 @@ class VPInstruction : public VPRecipeWithIRFlags,
// Extracts the first active lane of a vector, where the first operand is
// the predicate, and the second operand is the vector to extract.
ExtractFirstActive,
- // Splats a scalar value across all lanes.
- Splat,
};
private:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index ec97774ce37f2..8414b36f3ae22 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -96,8 +96,6 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
case VPInstruction::BranchOnCond:
case VPInstruction::BranchOnCount:
return Type::getVoidTy(Ctx);
- case VPInstruction::Splat:
- return inferScalarType(R->getOperand(0));
default:
break;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 3fc424a111dd9..a18a112048f1e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -712,9 +712,6 @@ Value *VPInstruction::generate(VPTransformState &State) {
Builder.getInt64Ty(), Mask, true, "first.active.lane");
return Builder.CreateExtractElement(Vec, Ctz, "early.exit.value");
}
- case VPInstruction::Splat:
- return State.Builder.CreateVectorSplat(
- State.VF, State.get(getOperand(0), true), Name);
default:
llvm_unreachable("Unsupported opcode for instruction");
@@ -827,7 +824,6 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
case VPInstruction::LogicalAnd:
case VPInstruction::Not:
case VPInstruction::PtrAdd:
- case VPInstruction::Splat:
return false;
default:
return true;
@@ -855,7 +851,6 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
case VPInstruction::BranchOnCount:
case VPInstruction::BranchOnCond:
case VPInstruction::ResumePhi:
- case VPInstruction::Splat:
return true;
};
llvm_unreachable("switch should return");
@@ -947,9 +942,6 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::ExtractFirstActive:
O << "extract-first-active";
break;
- case VPInstruction::Splat:
- O << "splat";
- break;
default:
O << Instruction::getOpcodeName(getOpcode());
}
@@ -2294,8 +2286,6 @@ void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
Value *VPScalarCastRecipe ::generate(VPTransformState &State) {
State.setDebugLocFrom(getDebugLoc());
- assert(vputils::onlyFirstLaneUsed(this) &&
- "Codegen only implemented for first lane.");
switch (Opcode) {
case Instruction::SExt:
case Instruction::ZExt:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 8861f499d7e2f..cb9dc7da2a656 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2089,9 +2089,8 @@ expandVPWidenIntOrFpInduction(VPWidenIntOrFpInductionRecipe *WidenIVR,
// FIXME: The newly created binary instructions should contain nsw/nuw
// flags, which can be found from the original scalar operations.
- Init = Builder.createNaryOp(MulOp, {Init, Builder.createSplat(Step)}, FMFs);
- Init = Builder.createNaryOp(AddOp, {Builder.createSplat(Start), Init}, FMFs,
- {}, "induction");
+ Init = Builder.createNaryOp(MulOp, {Init, Step}, FMFs);
+ Init = Builder.createNaryOp(AddOp, {Start, Init}, FMFs, {}, "induction");
// Create the widened phi of the vector IV.
auto *WidePHI =
@@ -2115,7 +2114,7 @@ expandVPWidenIntOrFpInduction(VPWidenIntOrFpInductionRecipe *WidenIVR,
else if (Ty != TypeInfo.inferScalarType(VF))
VF = Builder.createScalarCast(Instruction::CastOps::Trunc, VF, Ty, DL);
- Inc = Builder.createSplat(Builder.createNaryOp(MulOp, {Step, VF}, FMFs));
+ Inc = Builder.createNaryOp(MulOp, {Step, VF}, FMFs);
Prev = WidePHI;
}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll
index 8a372d134eebb..aa27330e47b5c 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll
@@ -20,9 +20,9 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 8 x i64> [[TMP8]], splat (i64 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP7]]
-; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP12]], i64 0
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP6]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP9:%.*]] = mul <vscale x 8 x i64> splat (i64 1), [[DOTSPLAT]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -39,7 +39,7 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
; CHECK-NEXT: call void @llvm.masked.store.nxv8i8.p0(<vscale x 8 x i8> [[TMP14]], ptr [[TMP17]], i32 1, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]])
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_NEXT]], i64 8)
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[TMP9]]
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
@@ -106,9 +106,9 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 8 x i64> [[TMP8]], splat (i64 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP7]]
-; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP12]], i64 0
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP6]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP9:%.*]] = mul <vscale x 8 x i64> splat (i64 1), [[DOTSPLAT]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -125,7 +125,7 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
; CHECK-NEXT: call void @llvm.masked.store.nxv8i8.p0(<vscale x 8 x i8> [[TMP14]], ptr [[TMP17]], i32 1, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]])
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_NEXT]], i64 [[WIDE_TRIP_COUNT]])
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[TMP9]]
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll
index 96283637b34bf..40719fa47393f 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll
@@ -127,9 +127,9 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 2 x i64> [[TMP15]], splat (i64 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP17]]
-; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP9]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP20]], i64 0
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP9]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP18:%.*]] = mul <vscale x 2 x i64> splat (i64 1), [[DOTSPLAT]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[M]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -156,7 +156,7 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]]
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP14]])
; CHECK-NEXT: [[TMP36:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[TMP18]]
; CHECK-NEXT: [[TMP37:%.*]] = extractelement <vscale x 2 x i1> [[TMP36]], i32 0
; CHECK-NEXT: br i1 [[TMP37]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
@@ -251,9 +251,9 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 2 x i64> [[TMP15]], splat (i64 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP17]]
-; CHECK-NEXT: [[TMP18:%.*]] = mul i64 1, [[TMP9]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP18]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP9]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP18:%.*]] = mul <vscale x 2 x i64> splat (i64 1), [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[MUL_2_I]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT3]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -283,7 +283,7 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]]
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP14]])
; CHECK-NEXT: [[TMP47:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[TMP18]]
; CHECK-NEXT: [[TMP48:%.*]] = extractelement <vscale x 2 x i1> [[TMP47]], i32 0
; CHECK-NEXT: br i1 [[TMP48]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll
index 3302103873bd3..71e07f01651cd 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll
@@ -23,9 +23,9 @@ define void @foo() {
; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 4 x i64> [[TMP6]], splat (i64 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP7]]
-; CHECK-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP5]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP8]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP5]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = mul <vscale x 4 x i64> splat (i64 1), [[BROADCAST_SPLAT]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_LATCH:%.*]] ]
@@ -47,7 +47,7 @@ define void @foo() {
; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <vscale x 4 x float> [ [[TMP11]], [[INNER_LOOP1]] ]
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> [[VEC_PHI4]], <vscale x 4 x ptr> [[TMP9]], i32 4, <vscale x 4 x i1> splat (i1 true))
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[TMP8]]
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll
index 4074210a95b59..bd4eaad9fb099 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll
@@ -31,9 +31,9 @@ define void @test_no_scalarization(ptr %a, ptr noalias %b, i32 %idx, i32 %n) #0
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[IDX]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i32> [[DOTSPLATINSERT]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i32> [[DOTSPLAT]], [[TMP10]]
-; CHECK-NEXT: [[TMP13:%.*]] = mul i32 1, [[TMP7]]
-; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[TMP13]], i64 0
+; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[TMP7]], i64 0
; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <vscale x 2 x i32> [[DOTSPLATINSERT1]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = mul <vscale x 2 x i32> splat (i32 1), [[DOTSPLAT2]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -48,7 +48,7 @@ define void @test_no_scalarization(ptr %a, ptr noalias %b, i32 %idx, i32 %n) #0
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr double, ptr [[TMP18]], i32 0
; CHECK-NEXT: store <vscale x 2 x double> [[WIDE_LOAD]], ptr [[TMP19]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP7]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i32> [[VEC_IND]], [[DOTSPLAT2]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i32> [[VEC_IND]], [[TMP11]]
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll
index 42600a132c2c3..9d08613a06d4a 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll
@@ -28,9 +28,9 @@ define i64 @same_exit_block_pre_inc_use1() #1 {
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 16 x i64> @llvm.stepvector.nxv16i64()
; CHECK-NEXT: [[TMP8:%.*]] = mul <vscale x 16 x i64> [[TMP7]], splat (i64 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 16 x i64> splat (i64 3), [[TMP8]]
-; CHECK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP5]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i64> poison, i64 [[TMP9]], i64 0
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i64> poison, i64 [[TMP5]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i64> [[DOTSPLATINSERT]], <vscale x 16 x i64> poison, <vscale x 16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP9:%.*]] = mul <vscale x 16 x i64> splat (i64 1), [[DOTSPLAT]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VECTOR_BODY]] ]
@@ -49,7 +49,7 @@ define i64 @same_exit_block_pre_inc_use1() #1 {
; CHECK-NEXT: [[TMP17:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[TMP16]])
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT3]], [[N_VEC]]
; CHECK-NEXT: [[TMP19:%.*]] = or i1 [[TMP17]], [[TMP18]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i64> [[VEC_IND]], [[TMP9]]
; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.split:
; CHECK-NEXT: br i1 [[TMP17]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
@@ -286,9 +286,9 @@ define i64 @loop_contains_safe_div() #1 {
; CHECK-NEXT: [[TMP16:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 4 x i64> [[TMP16]], splat (i64 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> splat (i64 3), [[TMP17]]
-; CHECK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP5]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP9]], i64 0
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP5]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP9:%.*]] = mul <vscale x 4 x i64> splat (i64 1), [[DOTSPLAT]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ]
@@ -305,7 +305,7 @@ define i64 @loop_contains_safe_div() #1 {
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP15]])
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[INDEX1]]
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[TMP9]]
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: middle.split:
; CHECK-NEXT: br i1 [[TMP6]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll
index 08146a737eb1b..da1782be43af5 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll
@@ -108,9 +108,9 @@ define void @test_array_load2_i16_store2(i32 %C, i32 %D) #1 {
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[TMP3:%.*]] = shl <vscale x 4 x i64> [[TMP2]], splat (i64 1)
-; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP0]], 3
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP4]], i64 0
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP1]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = shl <vscale x 4 x i64> [[DOTSPLAT]], splat (i64 1)
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[C:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[D:%.*]], i64 0
@@ -133,7 +133,7 @@ define void @test_array_load2_i16_store2(i32 %C, i32 %D) #1 {
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32> [[TMP10]], <vscale x 4 x i32> [[TMP12]])
; CHECK-NEXT: store <vscale x 8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP14]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[TMP4]]
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512
; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.block:
@@ -197,9 +197,9 @@ define void @test_array_load2_store2_i16(i32 noundef %C, i32 noundef %D) #1 {
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[TMP3:%.*]] = shl <vscale x 4 x i64> [[TMP2]], splat (i64 1)
-; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP0]], 3
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP4]], i64 0
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP1]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = shl <vscale x 4 x i64> [[DOTSPLAT]], splat (i64 1)
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[C:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[D:%.*]], i64 0
@@ -224,7 +224,7 @@ define void @test_array_load2_store2_i16(i32 noundef %C, i32 noundef %D) #1 {
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x i16], ptr @CD_i16, i64 0, <vscale x 4 x i64> [[TMP9]]
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> [[TMP14]], <vscale x 4 x ptr> [[TMP15]], i32 2, <vscale x 4 x i1> splat (i1 true))
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[TMP4]]
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512
; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: middle.block:
@@ -379,8 +379,7 @@ define void @test_reversed_load2_store2(ptr noalias nocapture readonly %A, ptr n
; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
; CHECK-NEXT: [[INDUCTION:%.*]] = sub <vscale x 4 x i32> splat (i32 1023), [[TMP2]]
; CHECK-NEXT: [[TMP3:%.*]] = trunc nuw nsw i64 [[TMP1]] to i32
-; CHECK-NEXT: [[DOTNEG:%.*]] = sub nsw i32 0, [[TMP3]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[DOTNEG]], i64 0
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP3]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
@@ -412,7 +411,7 @@ define void @test_reversed_load2_store2(ptr noalias nocapture readonly %A, ptr n
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32> [[REVERSE2]], <vscale x 4 x i32> [[REVERSE3]])
; CHECK-NEXT: store <vscale x 8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP19]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = sub <vscale x 4 x i32> [[VEC_IND]], [[DOTSPLAT]]
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK: middle.block:
@@ -637,8 +636,7 @@ define void @load_gap_reverse(ptr noalias nocapture readonly %P1, ptr noalias no
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[INDUCTION:%.*]] = sub <vscale x 4 x i64> splat (i64 1023), [[TMP2]]
-; CHECK-NEXT: [[DOTNEG:%.*]] = sub nsw i64 0, [[TMP1]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[DOTNEG]], i64 0
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP1]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[X:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
@@ -654,7 +652,7 @@ define void @load_gap_reverse(ptr noalias nocapture readonly %P1, ptr noalias no
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> [[TMP4]], <vscale x 4 x ptr> [[TMP5]], i32 8, <vscale x 4 x i1> splat (i1 true))
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> [[TMP7]], <vscale x 4 x ptr> [[TMP6]], i32 8, <vscale x 4 x i1> splat (i1 true))
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = sub <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
; CHECK: middle.block:
@@ -1253,9 +1251,9 @@ define void @PR27626_4(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 {
; CHECK-NEXT: [[IND_END:%.*]] = shl nuw i64 [[N_VEC]], 1
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[TMP9:%.*]] = shl <vscale x 4 x i64> [[TMP8]], splat (i64 1)
-; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i64 [[TMP6]], 3
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP11]], i64 0
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP7]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = shl <vscale x 4 x i64> [[DOTSPLAT]], splat (i64 1)
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Y:%.*]], i64 0
@@ -1272,7 +1270,7 @@ define void @PR27626_4(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 {
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32> [[BROADCAST_SPLAT2]], <vscale x 4 x i32> [[BROADCAST_SPLAT4]])
; CHECK-NEXT: store <vscale x 8 x i32> [[INTERLEAVED_VEC]], ptr [[P]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[TMP11]]
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
; CHECK: middle.block:
@@ -1346,9 +1344,9 @@ define void @PR27626_5(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 {
; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[TMP10:%.*]] = shl <vscale x 4 x i64> [[TMP9]], splat (i64 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> [[TMP10]], splat (i64 3)
-; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i64 [[TMP7]], 3
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP12]], i64 0
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP8]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = shl <vscale x 4 x i64> [[DOTSPLAT]], splat (i64 1)
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Y:%.*]], i64 0
@@ -1368,7 +1366,7 @@ define void @PR27626_5(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 {
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[BROADCAST_SPLAT2]], <vscale x 4 x ptr> [[TMP17]], i32 4, <vscale x 4 x i1> splat (i1 true))
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[BROADCAST_SPLAT4]], <vscale x 4 x ptr> [[TMP15]], i32 4, <vscale x 4 x i1> splat (i1 true))
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[TMP12]]
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
; CHECK: middle.block:
@@ -1454,9 +1452,9 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) #1 {
; CHECK-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 1
; CHECK-NEXT: [[TMP14:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[TMP15:%.*]] = shl <vscale x 4 x i64> [[TMP14]], splat (i64 1)
-; CHECK-NEXT: [[TMP16:%.*]] = shl nuw nsw i64 [[TMP9]], 3
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP16]], i64 0
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP10]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP16:%.*]] = shl <vscale x 4 x i64> [[DOTSPLAT]], splat (i64 1)
; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i32 [[TMP11]], 2
; CHECK-NEXT: [[TMP13:%.*]] = add nsw i32 [[TMP12]], -1
@@ -1466,7 +1464,7 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) #1 {
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_MASKED_GATHER4:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[TMP15]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP18:%.*]] = add nuw nsw <vscale x 4 x i64> [[VEC_IND]], splat (i64 1)
+; CHECK-NEXT: [[TMP18:%.*]] = or disjoint <vscale x 4 x i64> [[VEC_IND]], splat (i64 1)
; CHECK-NEXT: [[TMP19:%.*]] = add nuw nsw <vscale x 4 x i64> [[VEC_IND]], splat (i64 2)
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i16, ptr [[A]], <vscale x 4 x i64> [[TMP18]]
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> [[TMP20]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i16> poison), !alias.scope [[META34:![0-9]+]]
@@ -1481,7 +1479,7 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) #1 {
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]]
; CHECK-NEXT: store <vscale x 4 x i32> [[TMP27]], ptr [[TMP28]], align 4, !alias.scope [[META37:![0-9]+]], !noalias [[META34]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[TMP16]]
; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP39:![0-9]+]]
; CHECK: middle.block:
@@ -1715,8 +1713,7 @@ define void @interleave_deinterleave_reverse(ptr noalias nocapture readonly %A,
; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
; CHECK-NEXT: [[INDUCTION:%.*]] = sub <vscale x 4 x i32> splat (i32 1023), [[TMP2]]
; CHECK-NEXT: [[TMP3:%.*]] = trunc nuw nsw i64 [[TMP1]] to i32
-; CHECK-NEXT: [[TMP4:%.*]] = sub nsw i32 0, [[TMP3]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP4]], i64 0
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP3]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
@@ -1762,7 +1759,7 @@ define void @interleave_deinterleave_reverse(ptr noalias nocapture readonly %A,
; CHECK-NEXT: [[INTERLEAVED_VEC11:%.*]] = call <vscale x 16 x i32> @llvm.vector.interleave2.nxv16i32(<vscale x 8 x i32> [[INTERLEAVED_VEC]], <vscale x 8 x i32> [[INTERLEAVED_VEC10]])
; CHECK-NEXT: store <vscale x 16 x i32> [[INTERLEAVED_VEC11]], ptr [[TMP26]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = sub <vscale x 4 x i32> [[VEC_IND]], [[DOTSPLAT]]
; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]]
; CHECK: middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll
index 75b2df93c9350..a4a57a186ebed 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll
@@ -227,9 +227,9 @@ define void @copy_stride4(ptr noalias %dst, ptr noalias %src, i64 %n) #0 {
; CHECK-NEXT: [[TMP13:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[TMP15:%.*]] = mul <vscale x 4 x i64> [[TMP13]], splat (i64 4)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP15]]
-; CHECK-NEXT: [[TMP18:%.*]] = mul i64 4, [[TMP7]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP18]], i64 0
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP7]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP16:%.*]] = mul <vscale x 4 x i64> splat (i64 4), [[DOTSPLAT]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ]
@@ -242,7 +242,7 @@ define void @copy_stride4(ptr noalias %dst, ptr noalias %src, i64 %n) #0 {
; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP7]]
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP12]])
; CHECK-NEXT: [[TMP21:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[TMP16]]
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <vscale x 4 x i1> [[TMP21]], i32 0
; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll
index 109de3e14da0b..bfe42fa9d02e1 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll
@@ -28,9 +28,9 @@ define void @block_with_dead_inst_1(ptr %src, i64 %N) #0 {
; CHECK-NEXT: [[TMP11:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
; CHECK-NEXT: [[TMP13:%.*]] = mul <vscale x 8 x i64> [[TMP11]], splat (i64 3)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP13]]
-; CHECK-NEXT: [[TMP14:%.*]] = mul i64 3, [[TMP10]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP14]], i64 0
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP10]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = mul <vscale x 8 x i64> splat (i64 3), [[DOTSPLAT]]
; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[TMP17]], 8
; CHECK-NEXT: [[TMP19:%.*]] = sub i32 [[TMP18]], 1
@@ -43,7 +43,7 @@ define void @block_with_dead_inst_1(ptr %src, i64 %N) #0 {
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i16, ptr [[SRC]], <vscale x 8 x i64> [[VEC_IND]]
; CHECK-NEXT: call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x ptr> [[TMP20]], i32 2, <vscale x 8 x i1> splat (i1 true))
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[TMP14]]
; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
@@ -122,9 +122,9 @@ define void @block_with_dead_inst_2(ptr %src) #0 {
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 4 x i64> [[TMP8]], splat (i64 3)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP10]]
-; CHECK-NEXT: [[TMP11:%.*]] = mul i64 3, [[TMP7]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP11]], i64 0
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP7]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = mul <vscale x 4 x i64> splat (i64 3), [[DOTSPLAT]]
; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], 4
; CHECK-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], 1
@@ -137,7 +137,7 @@ define void @block_with_dead_inst_2(ptr %src) #0 {
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i16, ptr [[SRC]], <vscale x 4 x i64> [[VEC_IND]]
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x ptr> [[TMP17]], i32 2, <vscale x 4 x i1> splat (i1 true))
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[TMP11]]
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
@@ -216,9 +216,9 @@ define void @multiple_blocks_with_dead_insts_3(ptr %src) #0 {
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 4 x i64> [[TMP8]], splat (i64 3)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP10]]
-; CHECK-NEXT: [[TMP11:%.*]] = mul i64 3, [[TMP7]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP11]], i64 0
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP7]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = mul <vscale x 4 x i64> splat (i64 3), [[DOTSPLAT]]
; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], 4
; CHECK-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], 1
@@ -231,7 +231,7 @@ define void @multiple_blocks_with_dead_insts_3(ptr %src) #0 {
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i16, ptr [[SRC]], <vscale x 4 x i64> [[VEC_IND]]
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x ptr> [[TMP17]], i32 2, <vscale x 4 x i1> splat (i1 true))
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[TMP11]]
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
@@ -320,9 +320,9 @@ define void @multiple_blocks_with_dead_insts_4(ptr %src, i64 %N) #0 {
; CHECK-NEXT: [[TMP11:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
; CHECK-NEXT: [[TMP13:%.*]] = mul <vscale x 8 x i64> [[TMP11]], splat (i64 3)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP13]]
-; CHECK-NEXT: [[TMP14:%.*]] = mul i64 3, [[TMP10]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP14]], i64 0
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP10]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = mul <vscale x 8 x i64> splat (i64 3), [[DOTSPLAT]]
; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[TMP17]], 8
; CHECK-NEXT: [[TMP19:%.*]] = sub i32 [[TMP18]], 1
@@ -335,7 +335,7 @@ define void @multiple_blocks_with_dead_insts_4(ptr %src, i64 %N) #0 {
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i16, ptr [[SRC]], <vscale x 8 x i64> [[VEC_IND]]
; CHECK-NEXT: call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x ptr> [[TMP20]], i32 2, <vscale x 8 x i1> splat (i1 true))
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[TMP14]]
; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
@@ -426,9 +426,9 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_5(ptr %src) #0 {
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 4 x i64> [[TMP8]], splat (i64 3)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP10]]
-; CHECK-NEXT: [[TMP11:%.*]] = mul i64 3, [[TMP7]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP11]], i64 0
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP7]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = mul <vscale x 4 x i64> splat (i64 3), [[DOTSPLAT]]
; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], 4
; CHECK-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], 1
@@ -441,7 +441,7 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_5(ptr %src) #0 {
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i16, ptr [[SRC]], <vscale x 4 x i64> [[VEC_IND]]
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x ptr> [[TMP17]], i32 2, <vscale x 4 x i1> splat (i1 true))
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[TMP11]]
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
@@ -540,9 +540,9 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_6(ptr %src, i1 %
; CHECK-NEXT: [[TMP11:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
; CHECK-NEXT: [[TMP13:%.*]] = mul <vscale x 8 x i64> [[TMP11]], splat (i64 3)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP13]]
-; CHECK-NEXT: [[TMP14:%.*]] = mul i64 3, [[TMP10]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP14]], i64 0
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP10]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = mul <vscale x 8 x i64> splat (i64 3), [[DOTSPLAT]]
; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[TMP17]], 8
; CHECK-NEXT: [[TMP19:%.*]] = sub i32 [[TMP18]], 1
@@ -555,7 +555,7 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_6(ptr %src, i1 %
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i16, ptr [[SRC]], <vscale x 8 x i64> [[VEC_IND]]
; CHECK-NEXT: call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x ptr> [[TMP20]], i32 2, <vscale x 8 x i1> splat (i1 true))
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[TMP14]]
; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
@@ -857,9 +857,9 @@ define void @dead_load_in_block(ptr %dst, ptr %src, i8 %N, i64 %x) #0 {
; CHECK-NEXT: [[TMP18:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; CHECK-NEXT: [[TMP20:%.*]] = mul <vscale x 2 x i64> [[TMP18]], splat (i64 3)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP20]]
-; CHECK-NEXT: [[TMP23:%.*]] = mul i64 3, [[TMP17]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP23]], i64 0
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP17]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP21:%.*]] = mul <vscale x 2 x i64> splat (i64 3), [[DOTSPLAT]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -867,7 +867,7 @@ define void @dead_load_in_block(ptr %dst, ptr %src, i8 %N, i64 %x) #0 {
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[DST]], <vscale x 2 x i64> [[VEC_IND]]
; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> zeroinitializer, <vscale x 2 x ptr> [[TMP24]], i32 4, <vscale x 2 x i1> splat (i1 true)), !alias.scope [[META18:![0-9]+]], !noalias [[META21:![0-9]+]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP17]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[TMP21]]
; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll
index 8475004091f3f..eefe570d68056 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll
@@ -38,9 +38,9 @@ define void @dead_load(ptr %p, i16 %start) {
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[START_EXT]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> [[DOTSPLAT]], [[TMP17]]
-; CHECK-NEXT: [[TMP20:%.*]] = mul i64 3, [[TMP14]]
-; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP20]], i64 0
+; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP14]], i64 0
; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT1]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP19:%.*]] = mul <vscale x 8 x i64> splat (i64 3), [[DOTSPLAT2]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -48,7 +48,7 @@ define void @dead_load(ptr %p, i16 %start) {
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i16, ptr [[P]], <vscale x 8 x i64> [[VEC_IND]]
; CHECK-NEXT: call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x ptr> [[TMP21]], i32 2, <vscale x 8 x i1> splat (i1 true))
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP14]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT2]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[TMP19]]
; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
@@ -117,9 +117,9 @@ define i8 @dead_live_out_due_to_scalar_epilogue_required(ptr %src, ptr %dst) {
; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
; CHECK-NEXT: [[TMP11:%.*]] = mul <vscale x 4 x i32> [[TMP9]], splat (i32 4)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i32> zeroinitializer, [[TMP11]]
-; CHECK-NEXT: [[TMP14:%.*]] = mul i32 4, [[TMP8]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP14]], i64 0
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP8]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = mul <vscale x 4 x i32> splat (i32 4), [[DOTSPLAT]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -128,7 +128,7 @@ define i8 @dead_live_out_due_to_scalar_epilogue_required(ptr %src, ptr %dst) {
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[DST]], <vscale x 4 x i64> [[TMP15]]
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> zeroinitializer, <vscale x 4 x ptr> [[TMP16]], i32 1, <vscale x 4 x i1> splat (i1 true)), !alias.scope [[META4:![0-9]+]], !noalias [[META7:![0-9]+]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP8]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[TMP12]]
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
@@ -335,9 +335,9 @@ define void @test_phi_in_latch_redundant(ptr %dst, i32 %a) {
; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; CHECK-NEXT: [[TMP8:%.*]] = mul <vscale x 2 x i64> [[TMP6]], splat (i64 9)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP8]]
-; CHECK-NEXT: [[TMP9:%.*]] = mul i64 9, [[TMP5]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP9]], i64 0
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP5]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = mul <vscale x 2 x i64> splat (i64 9), [[DOTSPLAT]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -345,7 +345,7 @@ define void @test_phi_in_latch_redundant(ptr %dst, i32 %a) {
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[DST]], <vscale x 2 x i64> [[VEC_IND]]
; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> [[TMP10]], <vscale x 2 x ptr> [[TMP11]], i32 4, <vscale x 2 x i1> splat (i1 true))
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[TMP13]]
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
@@ -417,9 +417,9 @@ define void @gather_interleave_group_with_dead_insert_pos(i64 %N, ptr noalias %s
; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[TMP11:%.*]] = mul <vscale x 4 x i64> [[TMP9]], splat (i64 2)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP11]]
-; CHECK-NEXT: [[TMP12:%.*]] = mul i64 2, [[TMP8]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP12]], i64 0
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP8]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = mul <vscale x 4 x i64> splat (i64 2), [[DOTSPLAT]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -435,7 +435,7 @@ define void @gather_interleave_group_with_dead_insert_pos(i64 %N, ptr noalias %s
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[DST]], <vscale x 4 x i64> [[VEC_IND]]
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP18]], <vscale x 4 x ptr> [[TMP19]], i32 4, <vscale x 4 x i1> splat (i1 true))
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[TMP12]]
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll
index 396b54995fe2a..59463191b5ada 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll
@@ -75,9 +75,9 @@ define void @skip_free_iv_truncate(i16 %x, ptr %A) #0 {
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[X_I64]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> [[DOTSPLAT]], [[TMP55]]
-; CHECK-NEXT: [[TMP58:%.*]] = mul i64 3, [[TMP52]]
-; CHECK-NEXT: [[DOTSPLATINSERT24:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP58]], i64 0
+; CHECK-NEXT: [[DOTSPLATINSERT24:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP52]], i64 0
; CHECK-NEXT: [[DOTSPLAT25:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT24]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP54:%.*]] = mul <vscale x 8 x i64> splat (i64 3), [[DOTSPLAT25]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -85,7 +85,7 @@ define void @skip_free_iv_truncate(i16 %x, ptr %A) #0 {
; CHECK-NEXT: [[TMP59:%.*]] = getelementptr i16, ptr [[A]], <vscale x 8 x i64> [[VEC_IND]]
; CHECK-NEXT: call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x ptr> [[TMP59]], i32 2, <vscale x 8 x i1> splat (i1 true)), !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP52]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT25]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[TMP54]]
; CHECK-NEXT: [[TMP60:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP60]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll b/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll
index c00868c3e0be2..cea04be41a5d1 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll
@@ -25,9 +25,9 @@ define void @test(ptr noalias nocapture %a, ptr noalias nocapture %b, i32 %v) {
; VLENUNK-NEXT: [[TMP6:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; VLENUNK-NEXT: [[TMP8:%.*]] = mul <vscale x 4 x i64> [[TMP6]], splat (i64 1)
; VLENUNK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP8]]
-; VLENUNK-NEXT: [[TMP11:%.*]] = mul i64 1, [[TMP5]]
-; VLENUNK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP11]], i64 0
+; VLENUNK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP5]], i64 0
; VLENUNK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+; VLENUNK-NEXT: [[TMP9:%.*]] = mul <vscale x 4 x i64> splat (i64 1), [[DOTSPLAT]]
; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[V:%.*]], i64 0
; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -46,7 +46,7 @@ define void @test(ptr noalias nocapture %a, ptr noalias nocapture %b, i32 %v) {
; VLENUNK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 0
; VLENUNK-NEXT: store <vscale x 4 x i32> [[TMP17]], ptr [[TMP19]], align 4
; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
-; VLENUNK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; VLENUNK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[TMP9]]
; VLENUNK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; VLENUNK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; VLENUNK: middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll
index 2276b592aac8a..8b01a4b4cd904 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll
@@ -44,9 +44,9 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea
; RV32-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; RV32-NEXT: [[TMP9:%.*]] = mul <vscale x 2 x i64> [[TMP7]], splat (i64 16)
; RV32-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP9]]
-; RV32-NEXT: [[TMP12:%.*]] = mul i64 16, [[TMP6]]
-; RV32-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP12]], i64 0
+; RV32-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP6]], i64 0
; RV32-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+; RV32-NEXT: [[TMP10:%.*]] = mul <vscale x 2 x i64> splat (i64 16), [[DOTSPLAT]]
; RV32-NEXT: br label [[VECTOR_BODY:%.*]]
; RV32: vector.body:
; RV32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -62,7 +62,7 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea
; RV32-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, ptr [[A]], <vscale x 2 x i64> [[VEC_IND]]
; RV32-NEXT: call void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> [[TMP18]], <vscale x 2 x ptr> [[TMP19]], i32 8, <vscale x 2 x i1> [[TMP14]]), !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]]
; RV32-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
-; RV32-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; RV32-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[TMP10]]
; RV32-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; RV32-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; RV32: middle.block:
@@ -123,9 +123,9 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea
; RV64-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; RV64-NEXT: [[TMP9:%.*]] = mul <vscale x 2 x i64> [[TMP7]], splat (i64 16)
; RV64-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP9]]
-; RV64-NEXT: [[TMP12:%.*]] = mul i64 16, [[TMP6]]
-; RV64-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP12]], i64 0
+; RV64-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP6]], i64 0
; RV64-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+; RV64-NEXT: [[TMP10:%.*]] = mul <vscale x 2 x i64> splat (i64 16), [[DOTSPLAT]]
; RV64-NEXT: br label [[VECTOR_BODY:%.*]]
; RV64: vector.body:
; RV64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -141,7 +141,7 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea
; RV64-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, ptr [[A]], <vscale x 2 x i64> [[VEC_IND]]
; RV64-NEXT: call void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> [[TMP18]], <vscale x 2 x ptr> [[TMP19]], i32 8, <vscale x 2 x i1> [[TMP14]]), !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]]
; RV64-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
-; RV64-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; RV64-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[TMP10]]
; RV64-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; RV64-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; RV64: middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll
index ae6e833da01cf..49b36f9845735 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll
@@ -23,9 +23,9 @@ define void @pr87378_vpinstruction_or_drop_poison_generating_flags(ptr %arg, i64
; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
; CHECK-NEXT: [[TMP8:%.*]] = mul <vscale x 8 x i64> [[TMP6]], splat (i64 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP8]]
-; CHECK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP5]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP9]], i64 0
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP5]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP9:%.*]] = mul <vscale x 8 x i64> splat (i64 1), [[DOTSPLAT]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[A]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[B]], i64 0
@@ -53,7 +53,7 @@ define void @pr87378_vpinstruction_or_drop_poison_generating_flags(ptr %arg, i64
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i16, ptr [[TMP24]], i32 0
; CHECK-NEXT: call void @llvm.masked.store.nxv8i16.p0(<vscale x 8 x i16> zeroinitializer, ptr [[TMP25]], i32 2, <vscale x 8 x i1> [[TMP22]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[TMP9]]
; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
index 6a5a0eb3e54b0..a656ce1f06537 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
@@ -22,9 +22,9 @@ define void @single_constant_stride_int_scaled(ptr %p) {
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 4 x i64> [[TMP8]], splat (i64 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP10]]
-; CHECK-NEXT: [[TMP13:%.*]] = mul i64 1, [[TMP7]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP13]], i64 0
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP7]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = mul <vscale x 4 x i64> splat (i64 1), [[DOTSPLAT]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -35,7 +35,7 @@ define void @single_constant_stride_int_scaled(ptr %p) {
; CHECK-NEXT: [[TMP16:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], splat (i32 1)
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP16]], <vscale x 4 x ptr> [[TMP15]], i32 4, <vscale x 4 x i1> splat (i1 true))
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[TMP11]]
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
@@ -92,9 +92,9 @@ define void @single_constant_stride_int_iv(ptr %p) {
; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[TMP8:%.*]] = mul <vscale x 4 x i64> [[TMP6]], splat (i64 64)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP8]]
-; CHECK-NEXT: [[TMP11:%.*]] = mul i64 64, [[TMP5]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP11]], i64 0
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP5]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP9:%.*]] = mul <vscale x 4 x i64> splat (i64 64), [[DOTSPLAT]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -104,7 +104,7 @@ define void @single_constant_stride_int_iv(ptr %p) {
; CHECK-NEXT: [[TMP13:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], splat (i32 1)
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP13]], <vscale x 4 x ptr> [[TMP12]], i32 4, <vscale x 4 x i1> splat (i1 true))
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[TMP9]]
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.block:
@@ -560,9 +560,9 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) {
; STRIDED-NEXT: [[TMP12:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; STRIDED-NEXT: [[TMP14:%.*]] = mul <vscale x 4 x i64> [[TMP12]], splat (i64 1)
; STRIDED-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP14]]
-; STRIDED-NEXT: [[TMP15:%.*]] = mul i64 1, [[TMP11]]
-; STRIDED-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP15]], i64 0
+; STRIDED-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP11]], i64 0
; STRIDED-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+; STRIDED-NEXT: [[TMP15:%.*]] = mul <vscale x 4 x i64> splat (i64 1), [[DOTSPLAT]]
; STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[STRIDE]], i64 0
; STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; STRIDED-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -576,7 +576,7 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) {
; STRIDED-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[P2]], <vscale x 4 x i64> [[TMP18]]
; STRIDED-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP20]], <vscale x 4 x ptr> [[TMP21]], i32 4, <vscale x 4 x i1> splat (i1 true)), !alias.scope [[META11:![0-9]+]], !noalias [[META8]]
; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]]
-; STRIDED-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; STRIDED-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[TMP15]]
; STRIDED-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; STRIDED-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
; STRIDED: middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll
index 219e1fe05ebf7..2e1afb0c9dd28 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll
@@ -325,9 +325,9 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca
; SCALABLE-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; SCALABLE-NEXT: [[TMP8:%.*]] = mul <vscale x 2 x i64> [[TMP6]], splat (i64 1)
; SCALABLE-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP8]]
-; SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP5]]
-; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP9]], i64 0
+; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP5]], i64 0
; SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+; SCALABLE-NEXT: [[TMP9:%.*]] = mul <vscale x 2 x i64> splat (i64 1), [[DOTSPLAT]]
; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[B:%.*]], i64 0
; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -342,7 +342,7 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca
; SCALABLE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 0
; SCALABLE-NEXT: store <vscale x 2 x i64> [[PREDPHI]], ptr [[TMP13]], align 8
; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
-; SCALABLE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; SCALABLE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[TMP9]]
; SCALABLE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; SCALABLE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; SCALABLE: middle.block:
@@ -432,9 +432,9 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca
; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul <vscale x 2 x i64> [[TMP5]], splat (i64 1)
; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP7]]
-; TF-SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP4]]
-; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP8]], i64 0
+; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP4]], i64 0
; TF-SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+; TF-SCALABLE-NEXT: [[TMP8:%.*]] = mul <vscale x 2 x i64> splat (i64 1), [[DOTSPLAT]]
; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[B:%.*]], i64 0
; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -451,7 +451,7 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca
; TF-SCALABLE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 0
; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI]], ptr [[TMP13]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
-; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[TMP8]]
; TF-SCALABLE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; TF-SCALABLE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; TF-SCALABLE: middle.block:
@@ -995,9 +995,9 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias
; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul <vscale x 2 x i64> [[TMP5]], splat (i64 1)
; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP7]]
-; TF-SCALABLE-NEXT: [[V:%.*]] = mul i64 1, [[TMP4]]
-; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V]], i64 0
+; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP4]], i64 0
; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+; TF-SCALABLE-NEXT: [[TMP13:%.*]] = mul <vscale x 2 x i64> splat (i64 1), [[BROADCAST_SPLAT2]]
; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[B:%.*]], i64 0
; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP8:%.*]], i64 0
@@ -1013,7 +1013,7 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias
; TF-SCALABLE-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 0
; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[DOTSPLAT]], ptr [[TMP11]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
-; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT2]]
+; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[TMP13]]
; TF-SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; TF-SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; TF-SCALABLE: middle.block:
@@ -1125,9 +1125,9 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc
; SCALABLE-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; SCALABLE-NEXT: [[TMP8:%.*]] = mul <vscale x 2 x i64> [[TMP6]], splat (i64 1)
; SCALABLE-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP8]]
-; SCALABLE-NEXT: [[V:%.*]] = mul i64 1, [[TMP5]]
-; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V]], i64 0
+; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP5]], i64 0
; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+; SCALABLE-NEXT: [[TMP15:%.*]] = mul <vscale x 2 x i64> splat (i64 1), [[BROADCAST_SPLAT]]
; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP9:%.*]], i64 0
; SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[B:%.*]], i64 0
@@ -1143,7 +1143,7 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc
; SCALABLE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 0
; SCALABLE-NEXT: store <vscale x 2 x i64> [[DOTSPLAT]], ptr [[TMP13]], align 8
; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
-; SCALABLE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
+; SCALABLE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[TMP15]]
; SCALABLE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; SCALABLE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
; SCALABLE: middle.block:
@@ -1231,9 +1231,9 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc
; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul <vscale x 2 x i64> [[TMP5]], splat (i64 1)
; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP7]]
-; TF-SCALABLE-NEXT: [[V:%.*]] = mul i64 1, [[TMP4]]
-; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V]], i64 0
+; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP4]], i64 0
; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+; TF-SCALABLE-NEXT: [[TMP15:%.*]] = mul <vscale x 2 x i64> splat (i64 1), [[BROADCAST_SPLAT]]
; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP8:%.*]], i64 0
; TF-SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[B:%.*]], i64 0
@@ -1251,7 +1251,7 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc
; TF-SCALABLE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 0
; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[DOTSPLAT]], ptr [[TMP13]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
-; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
+; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[TMP15]]
; TF-SCALABLE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; TF-SCALABLE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; TF-SCALABLE: middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll
index 8893956bb3384..1851fd1c6e11b 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll
@@ -605,9 +605,9 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) {
; NO-VP-OUTLOOP-NEXT: [[TMP14:%.*]] = mul <vscale x 4 x i32> [[TMP12]], splat (i32 1)
; NO-VP-OUTLOOP-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i32> zeroinitializer, [[TMP14]]
; NO-VP-OUTLOOP-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP10]] to i32
-; NO-VP-OUTLOOP-NEXT: [[TMP17:%.*]] = mul i32 1, [[TMP16]]
-; NO-VP-OUTLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP17]], i64 0
+; NO-VP-OUTLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP16]], i64 0
; NO-VP-OUTLOOP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+; NO-VP-OUTLOOP-NEXT: [[TMP13:%.*]] = mul <vscale x 4 x i32> splat (i32 1), [[DOTSPLAT]]
; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0
; NO-VP-OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]]
; NO-VP-OUTLOOP: vector.body:
@@ -622,7 +622,7 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) {
; NO-VP-OUTLOOP-NEXT: [[TMP22:%.*]] = select <vscale x 4 x i1> [[TMP27]], <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], <vscale x 4 x i32> zeroinitializer
; NO-VP-OUTLOOP-NEXT: [[TMP23]] = add <vscale x 4 x i32> [[TMP22]], [[VEC_PHI]]
; NO-VP-OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]]
-; NO-VP-OUTLOOP-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[DOTSPLAT]]
+; NO-VP-OUTLOOP-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[TMP13]]
; NO-VP-OUTLOOP-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; NO-VP-OUTLOOP-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; NO-VP-OUTLOOP: middle.block:
@@ -667,9 +667,9 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) {
; NO-VP-INLOOP-NEXT: [[TMP8:%.*]] = mul <vscale x 4 x i32> [[TMP6]], splat (i32 1)
; NO-VP-INLOOP-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i32> zeroinitializer, [[TMP8]]
; NO-VP-INLOOP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP5]] to i32
-; NO-VP-INLOOP-NEXT: [[TMP11:%.*]] = mul i32 1, [[TMP10]]
-; NO-VP-INLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP11]], i64 0
+; NO-VP-INLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP10]], i64 0
; NO-VP-INLOOP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+; NO-VP-INLOOP-NEXT: [[TMP9:%.*]] = mul <vscale x 4 x i32> splat (i32 1), [[DOTSPLAT]]
; NO-VP-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]]
; NO-VP-INLOOP: vector.body:
; NO-VP-INLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -684,7 +684,7 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) {
; NO-VP-INLOOP-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP16]])
; NO-VP-INLOOP-NEXT: [[TMP18]] = add i32 [[TMP17]], [[VEC_PHI]]
; NO-VP-INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
-; NO-VP-INLOOP-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[DOTSPLAT]]
+; NO-VP-INLOOP-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[TMP9]]
; NO-VP-INLOOP-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; NO-VP-INLOOP-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; NO-VP-INLOOP: middle.block:
@@ -797,9 +797,9 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) {
; NO-VP-OUTLOOP-NEXT: [[TMP14:%.*]] = mul <vscale x 4 x i32> [[TMP12]], splat (i32 1)
; NO-VP-OUTLOOP-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i32> zeroinitializer, [[TMP14]]
; NO-VP-OUTLOOP-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP10]] to i32
-; NO-VP-OUTLOOP-NEXT: [[TMP17:%.*]] = mul i32 1, [[TMP16]]
-; NO-VP-OUTLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP17]], i64 0
+; NO-VP-OUTLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP16]], i64 0
; NO-VP-OUTLOOP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+; NO-VP-OUTLOOP-NEXT: [[TMP13:%.*]] = mul <vscale x 4 x i32> splat (i32 1), [[DOTSPLAT]]
; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0
; NO-VP-OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]]
; NO-VP-OUTLOOP: vector.body:
@@ -815,7 +815,7 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) {
; NO-VP-OUTLOOP-NEXT: [[TMP23:%.*]] = xor <vscale x 4 x i1> [[TMP28]], splat (i1 true)
; NO-VP-OUTLOOP-NEXT: [[PREDPHI]] = select <vscale x 4 x i1> [[TMP23]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> [[TMP22]]
; NO-VP-OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]]
-; NO-VP-OUTLOOP-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[DOTSPLAT]]
+; NO-VP-OUTLOOP-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[TMP13]]
; NO-VP-OUTLOOP-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; NO-VP-OUTLOOP-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; NO-VP-OUTLOOP: middle.block:
@@ -864,9 +864,9 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) {
; NO-VP-INLOOP-NEXT: [[TMP8:%.*]] = mul <vscale x 4 x i32> [[TMP6]], splat (i32 1)
; NO-VP-INLOOP-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i32> zeroinitializer, [[TMP8]]
; NO-VP-INLOOP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP5]] to i32
-; NO-VP-INLOOP-NEXT: [[TMP11:%.*]] = mul i32 1, [[TMP10]]
-; NO-VP-INLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP11]], i64 0
+; NO-VP-INLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP10]], i64 0
; NO-VP-INLOOP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+; NO-VP-INLOOP-NEXT: [[TMP9:%.*]] = mul <vscale x 4 x i32> splat (i32 1), [[DOTSPLAT]]
; NO-VP-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]]
; NO-VP-INLOOP: vector.body:
; NO-VP-INLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -881,7 +881,7 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) {
; NO-VP-INLOOP-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP16]])
; NO-VP-INLOOP-NEXT: [[TMP18]] = add i32 [[TMP17]], [[VEC_PHI]]
; NO-VP-INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
-; NO-VP-INLOOP-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[DOTSPLAT]]
+; NO-VP-INLOOP-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[TMP9]]
; NO-VP-INLOOP-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; NO-VP-INLOOP-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; NO-VP-INLOOP: middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll
index 5c1cb1da120c0..47136d0f73a0a 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll
@@ -150,9 +150,7 @@ define void @test_induction_step_needs_expansion(ptr noalias %j, ptr %k, i64 %l,
; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <16 x i16> poison, i16 [[TMP0]], i64 0
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[DOTSPLATINSERT2]], <16 x i16> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = mul <16 x i16> splat (i16 16), [[TMP2]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[TMP0]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i16> [[DOTSPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP11:%.*]] = mul <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, [[DOTSPLAT]]
+; CHECK-NEXT: [[TMP11:%.*]] = mul <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, [[TMP2]]
; CHECK-NEXT: [[INDUCTION:%.*]] = add <16 x i16> zeroinitializer, [[TMP11]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[OFF]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <16 x i16> [[BROADCAST_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
@@ -203,9 +201,7 @@ define void @test_induction_step_needs_expansion(ptr noalias %j, ptr %k, i64 %l,
; CHECK-NEXT: [[DOTSPLATINSERT15:%.*]] = insertelement <8 x i16> poison, i16 [[BC_RESUME_VAL]], i64 0
; CHECK-NEXT: [[DOTSPLAT16:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT15]], <8 x i16> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION17:%.*]] = add <8 x i16> [[DOTSPLAT16]], [[TMP14]]
-; CHECK-NEXT: [[TMP15:%.*]] = mul i16 [[TMP0]], 8
-; CHECK-NEXT: [[DOTSPLATINSERT18:%.*]] = insertelement <8 x i16> poison, i16 [[TMP15]], i64 0
-; CHECK-NEXT: [[DOTSPLAT19:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT18]], <8 x i16> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[DOTSPLAT19:%.*]] = mul <8 x i16> [[DOTSPLAT14]], splat (i16 8)
; CHECK-NEXT: [[BROADCAST_SPLATINSERT22:%.*]] = insertelement <8 x i16> poison, i16 [[OFF]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT23:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT22]], <8 x i16> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll
index 45d1385fd6fa9..2f6276172f296 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll
@@ -719,9 +719,7 @@ define void @wombat(i32 %arg, ptr %dst) #1 {
; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <8 x i32> poison, i32 [[MUL]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT2]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i32> [[DOTSPLAT]], [[TMP1]]
-; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[ARG]], 8
-; CHECK-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <8 x i32> poison, i32 [[TMP2]], i64 0
-; CHECK-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT3]], <8 x i32> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[DOTSPLAT4:%.*]] = mul <8 x i32> [[DOTSPLAT2]], splat (i32 8)
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -793,9 +791,7 @@ define void @wombat2(i32 %arg, ptr %dst) #1 {
; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <8 x i32> poison, i32 [[MUL]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT2]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i32> [[DOTSPLAT]], [[TMP1]]
-; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[ARG]], 8
-; CHECK-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <8 x i32> poison, i32 [[TMP2]], i64 0
-; CHECK-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT3]], <8 x i32> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[DOTSPLAT4:%.*]] = mul <8 x i32> [[DOTSPLAT2]], splat (i32 8)
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -870,9 +866,7 @@ define void @with_dead_use(i32 %arg, ptr %dst) #1 {
; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <8 x i32> poison, i32 [[MUL]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT2]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i32> [[DOTSPLAT]], [[TMP1]]
-; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[ARG]], 8
-; CHECK-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <8 x i32> poison, i32 [[TMP2]], i64 0
-; CHECK-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT3]], <8 x i32> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[DOTSPLAT4:%.*]] = mul <8 x i32> [[DOTSPLAT2]], splat (i32 8)
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-step.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-step.ll
index 1dd2692ba6822..c78cf3c586788 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/induction-step.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/induction-step.ll
@@ -17,9 +17,7 @@ define i16 @wide_add_induction_step_live_in(ptr %dst, i64 %N, i16 %off) {
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[O_1]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i16> splat (i16 4), [[BROADCAST_SPLAT]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[O_1]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i16> [[DOTSPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i16> <i16 0, i16 1, i16 2, i16 3>, [[DOTSPLAT]]
+; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i16> <i16 0, i16 1, i16 2, i16 3>, [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i16> zeroinitializer, [[TMP2]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
@@ -91,9 +89,7 @@ define i16 @wide_sub_induction_step_live_in(ptr %dst, i64 %N, i16 %off) {
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i16> splat (i16 4), [[BROADCAST_SPLAT]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i16> [[DOTSPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i16> <i16 0, i16 1, i16 2, i16 3>, [[DOTSPLAT]]
+; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i16> <i16 0, i16 1, i16 2, i16 3>, [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i16> zeroinitializer, [[TMP3]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i16> poison, i16 [[O_1]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT1]], <4 x i16> poison, <4 x i32> zeroinitializer
diff --git a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll
index 312125608c87e..6cac99c0cbc50 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll
@@ -131,7 +131,7 @@ define void @_Z3fn1v() #0 {
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i1> [[BROADCAST_SPLATINSERT]], <16 x i1> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: [[TMP34:%.*]] = xor <16 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
; CHECK-NEXT: br label [[VECTOR_BODY29:%.*]]
-; CHECK: vector.body28:
+; CHECK: vector.body30:
; CHECK-NEXT: [[INDEX34:%.*]] = phi i64 [ 0, [[VECTOR_PH25]] ], [ [[INDEX_NEXT39:%.*]], [[VECTOR_BODY29]] ]
; CHECK-NEXT: [[VEC_IND35:%.*]] = phi <16 x i64> [ <i64 8, i64 10, i64 12, i64 14, i64 16, i64 18, i64 20, i64 22, i64 24, i64 26, i64 28, i64 30, i64 32, i64 34, i64 36, i64 38>, [[VECTOR_PH25]] ], [ [[VEC_IND_NEXT36:%.*]], [[VECTOR_BODY29]] ]
; CHECK-NEXT: [[VEC_IND37:%.*]] = phi <16 x i64> [ <i64 0, i64 2, i64 4, i64 6, i64 8, i64 10, i64 12, i64 14, i64 16, i64 18, i64 20, i64 22, i64 24, i64 26, i64 28, i64 30>, [[VECTOR_PH25]] ], [ [[VEC_IND_NEXT38:%.*]], [[VECTOR_BODY29]] ]
@@ -153,18 +153,18 @@ define void @_Z3fn1v() #0 {
; CHECK-NEXT: [[VEC_IND_NEXT36]] = add <16 x i64> [[VEC_IND35]], splat (i64 32)
; CHECK-NEXT: [[VEC_IND_NEXT38]] = add <16 x i64> [[VEC_IND37]], splat (i64 32)
; CHECK-NEXT: [[TMP41:%.*]] = icmp eq i64 [[INDEX_NEXT39]], [[N_VEC32]]
-; CHECK-NEXT: br i1 [[TMP41]], label [[MIDDLE_BLOCK35:%.*]], label [[VECTOR_BODY29]], !llvm.loop [[LOOP4:![0-9]+]]
-; CHECK: middle.block35:
+; CHECK-NEXT: br i1 [[TMP41]], label [[MIDDLE_BLOCK37:%.*]], label [[VECTOR_BODY29]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK: middle.block37:
; CHECK-NEXT: [[CMP_N40:%.*]] = icmp eq i64 [[TMP28]], [[N_VEC32]]
; CHECK-NEXT: br i1 [[CMP_N40]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK43:%.*]]
-; CHECK: vec.epilog.iter.check42:
+; CHECK: vec.epilog.iter.check44:
; CHECK-NEXT: [[TMP42:%.*]] = mul i64 [[N_VEC32]], 2
; CHECK-NEXT: [[IND_END55:%.*]] = add i64 8, [[TMP42]]
; CHECK-NEXT: [[IND_END58:%.*]] = mul i64 [[N_VEC32]], 2
; CHECK-NEXT: [[N_VEC_REMAINING49:%.*]] = sub i64 [[TMP28]], [[N_VEC32]]
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK50:%.*]] = icmp ult i64 [[N_VEC_REMAINING49]], 8
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK50]], label [[VEC_EPILOG_SCALAR_PH40]], label [[VEC_EPILOG_PH42]]
-; CHECK: vec.epilog.ph41:
+; CHECK: vec.epilog.ph43:
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL51:%.*]] = phi i64 [ [[N_VEC32]], [[VEC_EPILOG_ITER_CHECK43]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK24]] ]
; CHECK-NEXT: [[BC_RESUME_VAL42:%.*]] = phi i64 [ [[IND_END41]], [[VEC_EPILOG_ITER_CHECK43]] ], [ 8, [[VECTOR_MAIN_LOOP_ITER_CHECK24]] ]
; CHECK-NEXT: [[BC_RESUME_VAL44:%.*]] = phi i64 [ [[IND_END43]], [[VEC_EPILOG_ITER_CHECK43]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK24]] ]
@@ -183,7 +183,7 @@ define void @_Z3fn1v() #0 {
; CHECK-NEXT: [[DOTSPLAT68:%.*]] = shufflevector <8 x i64> [[DOTSPLATINSERT67]], <8 x i64> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION69:%.*]] = add <8 x i64> [[DOTSPLAT68]], <i64 0, i64 2, i64 4, i64 6, i64 8, i64 10, i64 12, i64 14>
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY51:%.*]]
-; CHECK: vec.epilog.vector.body57:
+; CHECK: vec.epilog.vector.body59:
; CHECK-NEXT: [[INDEX61:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL51]], [[VEC_EPILOG_PH42]] ], [ [[INDEX_NEXT74:%.*]], [[VEC_EPILOG_VECTOR_BODY51]] ]
; CHECK-NEXT: [[VEC_IND65:%.*]] = phi <8 x i64> [ [[INDUCTION64]], [[VEC_EPILOG_PH42]] ], [ [[VEC_IND_NEXT66:%.*]], [[VEC_EPILOG_VECTOR_BODY51]] ]
; CHECK-NEXT: [[VEC_IND70:%.*]] = phi <8 x i64> [ [[INDUCTION69]], [[VEC_EPILOG_PH42]] ], [ [[VEC_IND_NEXT71:%.*]], [[VEC_EPILOG_VECTOR_BODY51]] ]
@@ -206,10 +206,10 @@ define void @_Z3fn1v() #0 {
; CHECK-NEXT: [[VEC_IND_NEXT71]] = add <8 x i64> [[VEC_IND70]], splat (i64 16)
; CHECK-NEXT: [[TMP55:%.*]] = icmp eq i64 [[INDEX_NEXT74]], [[N_VEC53]]
; CHECK-NEXT: br i1 [[TMP55]], label [[VEC_EPILOG_MIDDLE_BLOCK40:%.*]], label [[VEC_EPILOG_VECTOR_BODY51]], !llvm.loop [[LOOP5:![0-9]+]]
-; CHECK: vec.epilog.middle.block64:
+; CHECK: vec.epilog.middle.block66:
; CHECK-NEXT: [[CMP_N65:%.*]] = icmp eq i64 [[TMP28]], [[N_VEC53]]
; CHECK-NEXT: br i1 [[CMP_N65]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH40]]
-; CHECK: vec.epilog.scalar.ph40:
+; CHECK: vec.epilog.scalar.ph42:
; CHECK-NEXT: [[BC_RESUME_VAL56:%.*]] = phi i64 [ [[IND_END54]], [[VEC_EPILOG_MIDDLE_BLOCK40]] ], [ [[IND_END55]], [[VEC_EPILOG_ITER_CHECK43]] ], [ 8, [[ITER_CHECK22]] ]
; CHECK-NEXT: [[BC_RESUME_VAL67:%.*]] = phi i64 [ [[IND_END57]], [[VEC_EPILOG_MIDDLE_BLOCK40]] ], [ [[IND_END58]], [[VEC_EPILOG_ITER_CHECK43]] ], [ 0, [[ITER_CHECK22]] ]
; CHECK-NEXT: br label [[FOR_BODY_US:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll b/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll
index c745b4f74786c..3f49fd9334615 100644
--- a/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll
+++ b/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll
@@ -50,9 +50,7 @@ define void @test(i32 %arg, i32 %L1.limit, i32 %L2.switch, i1 %c, ptr %dst) {
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[DOTSPLAT]]
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> splat (i32 1), [[TMP4]]
-; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[INDUCTION_IV_LCSSA1]], 4
-; CHECK-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i64 0
-; CHECK-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT3]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[DOTSPLAT4:%.*]] = mul <4 x i32> [[DOTSPLAT]], splat (i32 4)
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[L1_EXIT_VAL]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/float-induction.ll b/llvm/test/Transforms/LoopVectorize/float-induction.ll
index 9aff603ef5fce..c505340a4dfda 100644
--- a/llvm/test/Transforms/LoopVectorize/float-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/float-induction.ll
@@ -38,9 +38,7 @@ define void @fp_iv_loop1_fast_FMF(float %init, ptr noalias nocapture %A, i32 %N)
; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0
; VEC4_INTERL1-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer
; VEC4_INTERL1-NEXT: [[INDUCTION:%.*]] = fsub fast <4 x float> [[DOTSPLAT3]], [[TMP6]]
-; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], 4.000000e+00
-; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT4:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i64 0
-; VEC4_INTERL1-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT4]], <4 x float> poison, <4 x i32> zeroinitializer
+; VEC4_INTERL1-NEXT: [[DOTSPLAT5:%.*]] = fmul fast <4 x float> [[DOTSPLAT]], splat (float 4.000000e+00)
; VEC4_INTERL1-NEXT: br label [[VECTOR_BODY:%.*]]
; VEC4_INTERL1: vector.body:
; VEC4_INTERL1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -87,11 +85,9 @@ define void @fp_iv_loop1_fast_FMF(float %init, ptr noalias nocapture %A, i32 %N)
; VEC4_INTERL2-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float
; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]]
; VEC4_INTERL2-NEXT: [[IND_END:%.*]] = fsub fast float [[INIT:%.*]], [[TMP1]]
-; VEC4_INTERL2-NEXT: [[FPINC_INS:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0
-; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[FPINC_INS]], <float 4.000000e+00, float poison, float poison, float poison>
-; VEC4_INTERL2-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <4 x i32> zeroinitializer
; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0
; VEC4_INTERL2-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
+; VEC4_INTERL2-NEXT: [[DOTSPLAT5:%.*]] = fmul fast <4 x float> [[DOTSPLAT]], splat (float 4.000000e+00)
; VEC4_INTERL2-NEXT: [[TMP7:%.*]] = fmul fast <4 x float> [[DOTSPLAT]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0
; VEC4_INTERL2-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer
@@ -202,9 +198,7 @@ define void @fp_iv_loop1_fast_FMF(float %init, ptr noalias nocapture %A, i32 %N)
; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0
; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT2]], <2 x float> poison, <2 x i32> zeroinitializer
; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fsub fast <2 x float> [[DOTSPLAT3]], [[TMP6]]
-; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], 2.000000e+00
-; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT4:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i64 0
-; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT4]], <2 x float> poison, <2 x i32> zeroinitializer
+; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT5:%.*]] = fmul fast <2 x float> [[DOTSPLAT]], splat (float 2.000000e+00)
; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]]
; VEC2_INTERL1_PRED_STORE: vector.body:
; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -285,9 +279,7 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, ptr noalias nocapture %A, i32
; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0
; VEC4_INTERL1-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer
; VEC4_INTERL1-NEXT: [[INDUCTION:%.*]] = fsub reassoc <4 x float> [[DOTSPLAT3]], [[TMP6]]
-; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = fmul reassoc float [[FPINC]], 4.000000e+00
-; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT4:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i64 0
-; VEC4_INTERL1-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT4]], <4 x float> poison, <4 x i32> zeroinitializer
+; VEC4_INTERL1-NEXT: [[DOTSPLAT5:%.*]] = fmul reassoc <4 x float> [[DOTSPLAT]], splat (float 4.000000e+00)
; VEC4_INTERL1-NEXT: br label [[VECTOR_BODY:%.*]]
; VEC4_INTERL1: vector.body:
; VEC4_INTERL1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -334,11 +326,9 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, ptr noalias nocapture %A, i32
; VEC4_INTERL2-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float
; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = fmul reassoc float [[FPINC]], [[DOTCAST]]
; VEC4_INTERL2-NEXT: [[IND_END:%.*]] = fsub reassoc float [[INIT:%.*]], [[TMP1]]
-; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0
-; VEC4_INTERL2-NEXT: [[MUL:%.*]] = fmul reassoc <4 x float> [[DOTSPLATINSERT2]], <float 4.000000e+00, float poison, float poison, float poison>
-; VEC4_INTERL2-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <4 x float> [[MUL]], <4 x float> poison, <4 x i32> zeroinitializer
; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0
; VEC4_INTERL2-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
+; VEC4_INTERL2-NEXT: [[DOTSPLAT5:%.*]] = fmul reassoc <4 x float> [[DOTSPLAT]], splat (float 4.000000e+00)
; VEC4_INTERL2-NEXT: [[TMP7:%.*]] = fmul reassoc <4 x float> [[DOTSPLAT]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0
; VEC4_INTERL2-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT1]], <4 x float> poison, <4 x i32> zeroinitializer
@@ -451,9 +441,7 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, ptr noalias nocapture %A, i32
; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0
; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT2]], <2 x float> poison, <2 x i32> zeroinitializer
; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fsub reassoc <2 x float> [[DOTSPLAT3]], [[TMP6]]
-; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fmul reassoc float [[FPINC]], 2.000000e+00
-; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT4:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i64 0
-; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT4]], <2 x float> poison, <2 x i32> zeroinitializer
+; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT5:%.*]] = fmul reassoc <2 x float> [[DOTSPLAT]], splat (float 2.000000e+00)
; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]]
; VEC2_INTERL1_PRED_STORE: vector.body:
; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -770,9 +758,7 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca
; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0
; VEC4_INTERL1-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT5]], <4 x float> poison, <4 x i32> zeroinitializer
; VEC4_INTERL1-NEXT: [[INDUCTION:%.*]] = fadd fast <4 x float> [[DOTSPLAT6]], [[TMP13]]
-; VEC4_INTERL1-NEXT: [[TMP14:%.*]] = fmul fast float [[TMP0]], 4.000000e+00
-; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT7:%.*]] = insertelement <4 x float> poison, float [[TMP14]], i64 0
-; VEC4_INTERL1-NEXT: [[DOTSPLAT8:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT7]], <4 x float> poison, <4 x i32> zeroinitializer
+; VEC4_INTERL1-NEXT: [[DOTSPLAT8:%.*]] = fmul fast <4 x float> [[DOTSPLAT]], splat (float 4.000000e+00)
; VEC4_INTERL1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0
; VEC4_INTERL1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
; VEC4_INTERL1-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -841,11 +827,9 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca
; VEC4_INTERL2-NEXT: [[DOTCAST2:%.*]] = uitofp nneg i64 [[N_VEC]] to float
; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[TMP0]], [[DOTCAST2]]
; VEC4_INTERL2-NEXT: [[IND_END3:%.*]] = fadd fast float [[INIT:%.*]], [[TMP3]]
-; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0
-; VEC4_INTERL2-NEXT: [[TMP19:%.*]] = fmul fast <4 x float> [[DOTSPLATINSERT2]], <float 4.000000e+00, float poison, float poison, float poison>
-; VEC4_INTERL2-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <4 x float> [[TMP19]], <4 x float> poison, <4 x i32> zeroinitializer
; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0
; VEC4_INTERL2-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
+; VEC4_INTERL2-NEXT: [[DOTSPLAT5:%.*]] = fmul fast <4 x float> [[DOTSPLAT]], splat (float 4.000000e+00)
; VEC4_INTERL2-NEXT: [[TMP20:%.*]] = fmul fast <4 x float> [[DOTSPLAT]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT6:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0
; VEC4_INTERL2-NEXT: [[DOTSPLAT7:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT6]], <4 x float> poison, <4 x i32> zeroinitializer
@@ -1012,9 +996,7 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca
; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0
; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT5]], <2 x float> poison, <2 x i32> zeroinitializer
; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fadd fast <2 x float> [[DOTSPLAT6]], [[TMP13]]
-; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP14:%.*]] = fmul fast float [[TMP0]], 2.000000e+00
-; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT7:%.*]] = insertelement <2 x float> poison, float [[TMP14]], i64 0
-; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT8:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT7]], <2 x float> poison, <2 x i32> zeroinitializer
+; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT8:%.*]] = fmul fast <2 x float> [[DOTSPLAT]], splat (float 2.000000e+00)
; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0
; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer
; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/induction-step.ll b/llvm/test/Transforms/LoopVectorize/induction-step.ll
index 343c5ee538790..6b57ed7bd6d47 100644
--- a/llvm/test/Transforms/LoopVectorize/induction-step.ll
+++ b/llvm/test/Transforms/LoopVectorize/induction-step.ll
@@ -39,9 +39,7 @@ define void @induction_with_global(i32 %init, ptr noalias nocapture %A, i32 %N)
; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[INIT]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION4:%.*]] = add <8 x i32> [[DOTSPLAT]], [[TMP6]]
-; CHECK-NEXT: [[TMP7:%.*]] = mul i32 [[TMP0]], 8
-; CHECK-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <8 x i32> poison, i32 [[TMP7]], i64 0
-; CHECK-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT5]], <8 x i32> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[DOTSPLAT6:%.*]] = mul <8 x i32> [[DOTSPLAT3]], splat (i32 8)
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -59,11 +57,11 @@ define void @induction_with_global(i32 %init, ptr noalias nocapture %A, i32 %N)
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi i32 [ [[TMP4]], %[[MIDDLE_BLOCK]] ], [ [[INIT]], %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[TMP4]], %[[MIDDLE_BLOCK]] ], [ [[INIT]], %[[ENTRY]] ]
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
-; CHECK-NEXT: [[X_05:%.*]] = phi i32 [ [[BC_RESUME_VAL5]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[X_05:%.*]] = phi i32 [ [[BC_RESUME_VAL3]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
; CHECK-NEXT: store i32 [[X_05]], ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[ADD]] = add nsw i32 [[TMP0]], [[X_05]]
@@ -130,9 +128,7 @@ define i32 @induction_with_loop_inv(i32 %init, ptr noalias nocapture %A, i32 %N,
; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <8 x i32> poison, i32 [[X_011]], i64 0
; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT2]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i32> [[DOTSPLAT3]], [[TMP12]]
-; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[J_012]], 8
-; CHECK-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <8 x i32> poison, i32 [[TMP5]], i64 0
-; CHECK-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT5]], <8 x i32> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[DOTSPLAT6:%.*]] = mul <8 x i32> [[DOTSPLAT]], splat (i32 8)
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -150,11 +146,11 @@ define i32 @induction_with_loop_inv(i32 %init, ptr noalias nocapture %A, i32 %N,
; CHECK-NEXT: br i1 [[CMP_N]], label %[[INNER_EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[OUTER_HEADER]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi i32 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[X_011]], %[[OUTER_HEADER]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[X_011]], %[[OUTER_HEADER]] ]
; CHECK-NEXT: br label %[[INNER:.*]]
; CHECK: [[INNER]]:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[INNER]] ]
-; CHECK-NEXT: [[X_18:%.*]] = phi i32 [ [[BC_RESUME_VAL5]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[INNER]] ]
+; CHECK-NEXT: [[X_18:%.*]] = phi i32 [ [[BC_RESUME_VAL3]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[INNER]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
; CHECK-NEXT: store i32 [[X_18]], ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[ADD]] = add nsw i32 [[X_18]], [[J_012]]
@@ -225,9 +221,7 @@ define void @non_primary_iv_loop_inv_trunc(ptr %a, i64 %n, i64 %step) {
; CHECK-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT5]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = mul <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[DOTSPLAT6]]
; CHECK-NEXT: [[INDUCTION7:%.*]] = add <8 x i32> zeroinitializer, [[TMP4]]
-; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[TMP3]], 8
-; CHECK-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <8 x i32> poison, i32 [[TMP5]], i64 0
-; CHECK-NEXT: [[DOTSPLAT9:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT8]], <8 x i32> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[DOTSPLAT9:%.*]] = mul <8 x i32> [[DOTSPLAT6]], splat (i32 8)
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -245,11 +239,11 @@ define void @non_primary_iv_loop_inv_trunc(ptr %a, i64 %n, i64 %step) {
; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ [[TMP0]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[TMP0]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
-; CHECK-NEXT: [[J:%.*]] = phi i64 [ [[J_NEXT:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL3]], %[[SCALAR_PH]] ]
+; CHECK-NEXT: [[J:%.*]] = phi i64 [ [[J_NEXT:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I]]
; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[J]] to i32
; CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP0]], align 4
@@ -350,22 +344,18 @@ define void @wide_add_induction_step_live_in(ptr %dst, i64 %N, i16 %off) {
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = mul <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, [[DOTSPLAT]]
; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i16> zeroinitializer, [[TMP1]]
-; CHECK-NEXT: [[TMP2:%.*]] = mul i16 [[O_1]], 8
-; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <8 x i16> poison, i16 [[TMP2]], i64 0
-; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT1]], <8 x i16> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[O_1]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = mul <8 x i16> [[DOTSPLAT]], splat (i16 8)
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i16> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = add <8 x i16> [[VEC_IND]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[TMP4:%.*]] = add <8 x i16> [[VEC_IND]], [[DOTSPLAT]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0
; CHECK-NEXT: store <8 x i16> [[TMP4]], ptr [[TMP6]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i16> [[VEC_IND]], [[DOTSPLAT2]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i16> [[VEC_IND]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
@@ -373,11 +363,11 @@ define void @wide_add_induction_step_live_in(ptr %dst, i64 %N, i16 %off) {
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i16 [ [[TMP0]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i16 [ [[TMP0]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[IV_2:%.*]] = phi i16 [ [[BC_RESUME_VAL3]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[IV_2:%.*]] = phi i16 [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[ADD]] = add i16 [[IV_2]], [[O_1]]
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[IV]]
; CHECK-NEXT: store i16 [[ADD]], ptr [[GEP_DST]], align 2
@@ -422,9 +412,7 @@ define void @wide_sub_induction_step_live_in(ptr %dst, i64 %N, i16 %off) {
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, [[DOTSPLAT]]
; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i16> zeroinitializer, [[TMP2]]
-; CHECK-NEXT: [[TMP3:%.*]] = mul i16 [[TMP0]], 8
-; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <8 x i16> poison, i16 [[TMP3]], i64 0
-; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT1]], <8 x i16> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[DOTSPLAT2:%.*]] = mul <8 x i16> [[DOTSPLAT]], splat (i16 8)
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[O_1]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll
index d2d123af4b8f7..e58f9ee58c53c 100644
--- a/llvm/test/Transforms/LoopVectorize/induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/induction.ll
@@ -3138,7 +3138,7 @@ define i32 @testoverflowcheck() {
; CHECK-NEXT: [[TMP5]] = and <2 x i32> [[BROADCAST_SPLAT]], [[VEC_PHI]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[TMP5]])
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP3]], [[N_VEC]]
@@ -3153,7 +3153,7 @@ define i32 @testoverflowcheck() {
; CHECK-NEXT: [[AND_I]] = and i32 [[TMP0]], [[AND3_I]]
; CHECK-NEXT: [[INC_I]] = add i8 [[INC4_I]], 1
; CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp eq i8 [[INC_I]], 0
-; CHECK-NEXT: br i1 [[TOBOOL_I]], label [[LOOPEXIT]], label [[COND_END_I]], !llvm.loop [[LOOP35:![0-9]+]]
+; CHECK-NEXT: br i1 [[TOBOOL_I]], label [[LOOPEXIT]], label [[COND_END_I]], !llvm.loop [[LOOP33:![0-9]+]]
; CHECK: loopexit:
; CHECK-NEXT: [[AND_I_LCSSA:%.*]] = phi i32 [ [[AND_I]], [[COND_END_I]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[AND_I_LCSSA]]
@@ -3180,7 +3180,7 @@ define i32 @testoverflowcheck() {
; IND-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IND-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; IND-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; IND-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]]
+; IND-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
; IND: middle.block:
; IND-NEXT: [[TMP6:%.*]] = and <2 x i32> [[TMP4]], [[BROADCAST_SPLAT]]
; IND-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[TMP6]])
@@ -3195,7 +3195,7 @@ define i32 @testoverflowcheck() {
; IND-NEXT: [[AND_I:%.*]] = and i32 [[BC_MERGE_RDX]], [[TMP0]]
; IND-NEXT: [[INC_I]] = add i8 [[INC4_I]], 1
; IND-NEXT: [[TOBOOL_I:%.*]] = icmp eq i8 [[INC_I]], 0
-; IND-NEXT: br i1 [[TOBOOL_I]], label [[LOOPEXIT]], label [[COND_END_I]], !llvm.loop [[LOOP35:![0-9]+]]
+; IND-NEXT: br i1 [[TOBOOL_I]], label [[LOOPEXIT]], label [[COND_END_I]], !llvm.loop [[LOOP33:![0-9]+]]
; IND: loopexit:
; IND-NEXT: [[AND_I_LCSSA:%.*]] = phi i32 [ [[AND_I]], [[COND_END_I]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
; IND-NEXT: ret i32 [[AND_I_LCSSA]]
@@ -3222,7 +3222,7 @@ define i32 @testoverflowcheck() {
; UNROLL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; UNROLL-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; UNROLL-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]]
+; UNROLL-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
; UNROLL: middle.block:
; UNROLL-NEXT: [[TMP6:%.*]] = and <2 x i32> [[TMP4]], [[BROADCAST_SPLAT]]
; UNROLL-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[TMP6]])
@@ -3237,7 +3237,7 @@ define i32 @testoverflowcheck() {
; UNROLL-NEXT: [[AND_I:%.*]] = and i32 [[BC_MERGE_RDX]], [[TMP0]]
; UNROLL-NEXT: [[INC_I]] = add i8 [[INC4_I]], 1
; UNROLL-NEXT: [[TOBOOL_I:%.*]] = icmp eq i8 [[INC_I]], 0
-; UNROLL-NEXT: br i1 [[TOBOOL_I]], label [[LOOPEXIT]], label [[COND_END_I]], !llvm.loop [[LOOP35:![0-9]+]]
+; UNROLL-NEXT: br i1 [[TOBOOL_I]], label [[LOOPEXIT]], label [[COND_END_I]], !llvm.loop [[LOOP33:![0-9]+]]
; UNROLL: loopexit:
; UNROLL-NEXT: [[AND_I_LCSSA:%.*]] = phi i32 [ [[AND_I]], [[COND_END_I]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
; UNROLL-NEXT: ret i32 [[AND_I_LCSSA]]
@@ -3269,7 +3269,7 @@ define i32 @testoverflowcheck() {
; UNROLL-NO-IC-NEXT: [[TMP6]] = and <2 x i32> [[BROADCAST_SPLAT]], [[VEC_PHI1]]
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; UNROLL-NO-IC-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]]
+; UNROLL-NO-IC-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
; UNROLL-NO-IC: middle.block:
; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = and <2 x i32> [[TMP6]], [[TMP5]]
; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[BIN_RDX]])
@@ -3285,7 +3285,7 @@ define i32 @testoverflowcheck() {
; UNROLL-NO-IC-NEXT: [[AND_I]] = and i32 [[TMP0]], [[AND3_I]]
; UNROLL-NO-IC-NEXT: [[INC_I]] = add i8 [[INC4_I]], 1
; UNROLL-NO-IC-NEXT: [[TOBOOL_I:%.*]] = icmp eq i8 [[INC_I]], 0
-; UNROLL-NO-IC-NEXT: br i1 [[TOBOOL_I]], label [[LOOPEXIT]], label [[COND_END_I]], !llvm.loop [[LOOP35:![0-9]+]]
+; UNROLL-NO-IC-NEXT: br i1 [[TOBOOL_I]], label [[LOOPEXIT]], label [[COND_END_I]], !llvm.loop [[LOOP33:![0-9]+]]
; UNROLL-NO-IC: loopexit:
; UNROLL-NO-IC-NEXT: [[AND_I_LCSSA:%.*]] = phi i32 [ [[AND_I]], [[COND_END_I]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-IC-NEXT: ret i32 [[AND_I_LCSSA]]
@@ -3312,7 +3312,7 @@ define i32 @testoverflowcheck() {
; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
; INTERLEAVE-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; INTERLEAVE-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]]
+; INTERLEAVE-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
; INTERLEAVE: middle.block:
; INTERLEAVE-NEXT: [[TMP6:%.*]] = and <4 x i32> [[TMP4]], [[BROADCAST_SPLAT]]
; INTERLEAVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP6]])
@@ -3327,7 +3327,7 @@ define i32 @testoverflowcheck() {
; INTERLEAVE-NEXT: [[AND_I:%.*]] = and i32 [[BC_MERGE_RDX]], [[TMP0]]
; INTERLEAVE-NEXT: [[INC_I]] = add i8 [[INC4_I]], 1
; INTERLEAVE-NEXT: [[TOBOOL_I:%.*]] = icmp eq i8 [[INC_I]], 0
-; INTERLEAVE-NEXT: br i1 [[TOBOOL_I]], label [[LOOPEXIT]], label [[COND_END_I]], !llvm.loop [[LOOP35:![0-9]+]]
+; INTERLEAVE-NEXT: br i1 [[TOBOOL_I]], label [[LOOPEXIT]], label [[COND_END_I]], !llvm.loop [[LOOP33:![0-9]+]]
; INTERLEAVE: loopexit:
; INTERLEAVE-NEXT: [[AND_I_LCSSA:%.*]] = phi i32 [ [[AND_I]], [[COND_END_I]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
; INTERLEAVE-NEXT: ret i32 [[AND_I_LCSSA]]
@@ -3401,7 +3401,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) {
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
@@ -3420,7 +3420,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) {
; CHECK-NEXT: [[IDX_INC_EXT]] = zext i8 [[IDX_INC]] to i32
; CHECK-NEXT: [[IDX_B_INC]] = add nuw nsw i32 [[IDX_B]], 1
; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IDX_B]], [[LEN]]
-; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP37:![0-9]+]]
+; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP35:![0-9]+]]
; CHECK: exit.loopexit:
; CHECK-NEXT: br label [[EXIT]]
; CHECK: exit:
@@ -3486,7 +3486,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) {
; IND-NEXT: [[IDX_INC_EXT]] = zext i8 [[IDX_INC]] to i32
; IND-NEXT: [[IDX_B_INC]] = add nuw nsw i32 [[IDX_B]], 1
; IND-NEXT: [[C:%.*]] = icmp ult i32 [[IDX_B]], [[LEN]]
-; IND-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP37:![0-9]+]]
+; IND-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP35:![0-9]+]]
; IND: exit.loopexit:
; IND-NEXT: br label [[EXIT]]
; IND: exit:
@@ -3535,7 +3535,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) {
; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4)
; UNROLL-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; UNROLL-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
+; UNROLL-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]]
; UNROLL: middle.block:
; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
; UNROLL-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
@@ -3555,7 +3555,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) {
; UNROLL-NEXT: [[IDX_INC_EXT]] = zext i8 [[IDX_INC]] to i32
; UNROLL-NEXT: [[IDX_B_INC]] = add nuw nsw i32 [[IDX_B]], 1
; UNROLL-NEXT: [[C:%.*]] = icmp ult i32 [[IDX_B]], [[LEN]]
-; UNROLL-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP37:![0-9]+]]
+; UNROLL-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP35:![0-9]+]]
; UNROLL: exit.loopexit:
; UNROLL-NEXT: br label [[EXIT]]
; UNROLL: exit:
@@ -3609,7 +3609,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) {
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2)
; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; UNROLL-NO-IC-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
+; UNROLL-NO-IC-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]]
; UNROLL-NO-IC: middle.block:
; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
@@ -3628,7 +3628,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) {
; UNROLL-NO-IC-NEXT: [[IDX_INC_EXT]] = zext i8 [[IDX_INC]] to i32
; UNROLL-NO-IC-NEXT: [[IDX_B_INC]] = add nuw nsw i32 [[IDX_B]], 1
; UNROLL-NO-IC-NEXT: [[C:%.*]] = icmp ult i32 [[IDX_B]], [[LEN]]
-; UNROLL-NO-IC-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP37:![0-9]+]]
+; UNROLL-NO-IC-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP35:![0-9]+]]
; UNROLL-NO-IC: exit.loopexit:
; UNROLL-NO-IC-NEXT: br label [[EXIT]]
; UNROLL-NO-IC: exit:
@@ -3677,7 +3677,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) {
; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8)
; INTERLEAVE-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; INTERLEAVE-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
+; INTERLEAVE-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]]
; INTERLEAVE: middle.block:
; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
@@ -3697,7 +3697,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) {
; INTERLEAVE-NEXT: [[IDX_INC_EXT]] = zext i8 [[IDX_INC]] to i32
; INTERLEAVE-NEXT: [[IDX_B_INC]] = add nuw nsw i32 [[IDX_B]], 1
; INTERLEAVE-NEXT: [[C:%.*]] = icmp ult i32 [[IDX_B]], [[LEN]]
-; INTERLEAVE-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP37:![0-9]+]]
+; INTERLEAVE-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP35:![0-9]+]]
; INTERLEAVE: exit.loopexit:
; INTERLEAVE-NEXT: br label [[EXIT]]
; INTERLEAVE: exit:
@@ -3781,7 +3781,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 8)
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
@@ -3801,7 +3801,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
; CHECK-NEXT: [[MUL]] = mul i32 [[IDX_INC_EXT]], 4
; CHECK-NEXT: [[IDX_B_INC]] = add nuw nsw i32 [[IDX_B]], 1
; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IDX_B]], [[LEN]]
-; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP39:![0-9]+]]
+; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP37:![0-9]+]]
; CHECK: exit.loopexit:
; CHECK-NEXT: br label [[EXIT]]
; CHECK: exit:
@@ -3849,7 +3849,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
; IND-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 8)
; IND-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; IND-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
+; IND-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
; IND: middle.block:
; IND-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
; IND-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
@@ -3870,7 +3870,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
; IND-NEXT: [[MUL]] = shl nuw nsw i32 [[IDX_INC_EXT]], 2
; IND-NEXT: [[IDX_B_INC]] = add nuw nsw i32 [[IDX_B]], 1
; IND-NEXT: [[C:%.*]] = icmp ult i32 [[IDX_B]], [[LEN]]
-; IND-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP39:![0-9]+]]
+; IND-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP37:![0-9]+]]
; IND: exit.loopexit:
; IND-NEXT: br label [[EXIT]]
; IND: exit:
@@ -3921,7 +3921,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 16)
; UNROLL-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; UNROLL-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
+; UNROLL-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
; UNROLL: middle.block:
; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
; UNROLL-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
@@ -3942,7 +3942,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
; UNROLL-NEXT: [[MUL]] = shl nuw nsw i32 [[IDX_INC_EXT]], 2
; UNROLL-NEXT: [[IDX_B_INC]] = add nuw nsw i32 [[IDX_B]], 1
; UNROLL-NEXT: [[C:%.*]] = icmp ult i32 [[IDX_B]], [[LEN]]
-; UNROLL-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP39:![0-9]+]]
+; UNROLL-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP37:![0-9]+]]
; UNROLL: exit.loopexit:
; UNROLL-NEXT: br label [[EXIT]]
; UNROLL: exit:
@@ -3998,7 +3998,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 8)
; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; UNROLL-NO-IC-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
+; UNROLL-NO-IC-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
; UNROLL-NO-IC: middle.block:
; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
@@ -4018,7 +4018,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
; UNROLL-NO-IC-NEXT: [[MUL]] = mul i32 [[IDX_INC_EXT]], 4
; UNROLL-NO-IC-NEXT: [[IDX_B_INC]] = add nuw nsw i32 [[IDX_B]], 1
; UNROLL-NO-IC-NEXT: [[C:%.*]] = icmp ult i32 [[IDX_B]], [[LEN]]
-; UNROLL-NO-IC-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP39:![0-9]+]]
+; UNROLL-NO-IC-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP37:![0-9]+]]
; UNROLL-NO-IC: exit.loopexit:
; UNROLL-NO-IC-NEXT: br label [[EXIT]]
; UNROLL-NO-IC: exit:
@@ -4069,7 +4069,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 32)
; INTERLEAVE-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; INTERLEAVE-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
+; INTERLEAVE-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
; INTERLEAVE: middle.block:
; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
@@ -4090,7 +4090,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
; INTERLEAVE-NEXT: [[MUL]] = shl nuw nsw i32 [[IDX_INC_EXT]], 2
; INTERLEAVE-NEXT: [[IDX_B_INC]] = add nuw nsw i32 [[IDX_B]], 1
; INTERLEAVE-NEXT: [[C:%.*]] = icmp ult i32 [[IDX_B]], [[LEN]]
-; INTERLEAVE-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP39:![0-9]+]]
+; INTERLEAVE-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP37:![0-9]+]]
; INTERLEAVE: exit.loopexit:
; INTERLEAVE-NEXT: br label [[EXIT]]
; INTERLEAVE: exit:
@@ -4147,7 +4147,7 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) {
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[K]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
@@ -4160,7 +4160,7 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) {
; CHECK-NEXT: store i32 [[INDVARS_IV]], ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[K]]
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP39:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
@@ -4180,7 +4180,7 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) {
; IND-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
; IND-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; IND-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]]
+; IND-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
; IND: middle.block:
; IND-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[K]], [[N_VEC]]
; IND-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
@@ -4194,7 +4194,7 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) {
; IND-NEXT: store i32 [[INDVARS_IV]], ptr [[ARRAYIDX]], align 4
; IND-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
; IND-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[K]]
-; IND-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]]
+; IND-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP39:![0-9]+]]
; IND: exit:
; IND-NEXT: ret void
;
@@ -4217,7 +4217,7 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) {
; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4)
; UNROLL-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; UNROLL-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]]
+; UNROLL-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
; UNROLL: middle.block:
; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[K]], [[N_VEC]]
; UNROLL-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
@@ -4231,7 +4231,7 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) {
; UNROLL-NEXT: store i32 [[INDVARS_IV]], ptr [[ARRAYIDX]], align 4
; UNROLL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
; UNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[K]]
-; UNROLL-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]]
+; UNROLL-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP39:![0-9]+]]
; UNROLL: exit:
; UNROLL-NEXT: ret void
;
@@ -4256,7 +4256,7 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) {
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2)
; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; UNROLL-NO-IC-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]]
+; UNROLL-NO-IC-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
; UNROLL-NO-IC: middle.block:
; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[K]], [[N_VEC]]
; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
@@ -4269,7 +4269,7 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) {
; UNROLL-NO-IC-NEXT: store i32 [[INDVARS_IV]], ptr [[ARRAYIDX]], align 4
; UNROLL-NO-IC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[K]]
-; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]]
+; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP39:![0-9]+]]
; UNROLL-NO-IC: exit:
; UNROLL-NO-IC-NEXT: ret void
;
@@ -4292,7 +4292,7 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) {
; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8)
; INTERLEAVE-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; INTERLEAVE-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]]
+; INTERLEAVE-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
; INTERLEAVE: middle.block:
; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[K]], [[N_VEC]]
; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
@@ -4306,7 +4306,7 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) {
; INTERLEAVE-NEXT: store i32 [[INDVARS_IV]], ptr [[ARRAYIDX]], align 4
; INTERLEAVE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
; INTERLEAVE-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[K]]
-; INTERLEAVE-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]]
+; INTERLEAVE-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP39:![0-9]+]]
; INTERLEAVE: exit:
; INTERLEAVE-NEXT: ret void
;
@@ -4352,7 +4352,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[K]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
@@ -4366,7 +4366,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
; CHECK-NEXT: store i32 [[TRUNC_IV]], ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[K]]
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
@@ -4390,7 +4390,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
; IND-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IND-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]]
+; IND-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]]
; IND: middle.block:
; IND-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[K]], [[N_VEC]]
; IND-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
@@ -4406,7 +4406,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
; IND-NEXT: store i32 [[TRUNC_IV]], ptr [[ARRAYIDX]], align 4
; IND-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; IND-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[K]]
-; IND-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]]
+; IND-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]]
; IND: exit:
; IND-NEXT: ret void
;
@@ -4433,7 +4433,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4)
; UNROLL-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; UNROLL-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]]
+; UNROLL-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]]
; UNROLL: middle.block:
; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[K]], [[N_VEC]]
; UNROLL-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
@@ -4449,7 +4449,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
; UNROLL-NEXT: store i32 [[TRUNC_IV]], ptr [[ARRAYIDX]], align 4
; UNROLL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; UNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[K]]
-; UNROLL-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]]
+; UNROLL-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]]
; UNROLL: exit:
; UNROLL-NEXT: ret void
;
@@ -4482,7 +4482,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2)
; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; UNROLL-NO-IC-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]]
+; UNROLL-NO-IC-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]]
; UNROLL-NO-IC: middle.block:
; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[K]], [[N_VEC]]
; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
@@ -4496,7 +4496,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
; UNROLL-NO-IC-NEXT: store i32 [[TRUNC_IV]], ptr [[ARRAYIDX]], align 4
; UNROLL-NO-IC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[K]]
-; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]]
+; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]]
; UNROLL-NO-IC: exit:
; UNROLL-NO-IC-NEXT: ret void
;
@@ -4523,7 +4523,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8)
; INTERLEAVE-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; INTERLEAVE-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]]
+; INTERLEAVE-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]]
; INTERLEAVE: middle.block:
; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[K]], [[N_VEC]]
; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
@@ -4539,7 +4539,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
; INTERLEAVE-NEXT: store i32 [[TRUNC_IV]], ptr [[ARRAYIDX]], align 4
; INTERLEAVE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; INTERLEAVE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[K]]
-; INTERLEAVE-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]]
+; INTERLEAVE-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]]
; INTERLEAVE: exit:
; INTERLEAVE-NEXT: ret void
;
@@ -4586,7 +4586,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) {
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
@@ -4599,7 +4599,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) {
; CHECK-NEXT: store i32 [[INDVARS_IV]], ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[K]]
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP45:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
@@ -4625,7 +4625,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) {
; IND-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
; IND-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; IND-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]]
+; IND-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]]
; IND: middle.block:
; IND-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
; IND-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
@@ -4639,7 +4639,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) {
; IND-NEXT: store i32 [[INDVARS_IV]], ptr [[ARRAYIDX]], align 4
; IND-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
; IND-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[K]]
-; IND-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP45:![0-9]+]]
+; IND-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]]
; IND: exit:
; IND-NEXT: ret void
;
@@ -4668,7 +4668,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) {
; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4)
; UNROLL-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; UNROLL-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]]
+; UNROLL-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]]
; UNROLL: middle.block:
; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
; UNROLL-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
@@ -4682,7 +4682,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) {
; UNROLL-NEXT: store i32 [[INDVARS_IV]], ptr [[ARRAYIDX]], align 4
; UNROLL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
; UNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[K]]
-; UNROLL-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP45:![0-9]+]]
+; UNROLL-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]]
; UNROLL: exit:
; UNROLL-NEXT: ret void
;
@@ -4713,7 +4713,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) {
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2)
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; UNROLL-NO-IC-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]]
+; UNROLL-NO-IC-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]]
; UNROLL-NO-IC: middle.block:
; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
@@ -4726,7 +4726,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) {
; UNROLL-NO-IC-NEXT: store i32 [[INDVARS_IV]], ptr [[ARRAYIDX]], align 4
; UNROLL-NO-IC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[K]]
-; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP45:![0-9]+]]
+; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]]
; UNROLL-NO-IC: exit:
; UNROLL-NO-IC-NEXT: ret void
;
@@ -4755,7 +4755,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) {
; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8)
; INTERLEAVE-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; INTERLEAVE-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]]
+; INTERLEAVE-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]]
; INTERLEAVE: middle.block:
; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
@@ -4769,7 +4769,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) {
; INTERLEAVE-NEXT: store i32 [[INDVARS_IV]], ptr [[ARRAYIDX]], align 4
; INTERLEAVE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
; INTERLEAVE-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[K]]
-; INTERLEAVE-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP45:![0-9]+]]
+; INTERLEAVE-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]]
; INTERLEAVE: exit:
; INTERLEAVE-NEXT: ret void
;
@@ -4809,7 +4809,7 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) {
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4)
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
@@ -4826,7 +4826,7 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) {
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
; CHECK-NEXT: [[J_NEXT]] = add nuw nsw i64 [[J]], 2
; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP47:![0-9]+]]
+; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP45:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
@@ -4847,7 +4847,7 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) {
; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4)
; IND-NEXT: [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IND-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]]
+; IND-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]]
; IND: middle.block:
; IND-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
; IND-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
@@ -4864,7 +4864,7 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) {
; IND-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
; IND-NEXT: [[J_NEXT]] = add nuw nsw i64 [[J]], 2
; IND-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
-; IND-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP47:![0-9]+]]
+; IND-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP45:![0-9]+]]
; IND: for.end:
; IND-NEXT: ret void
;
@@ -4888,7 +4888,7 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) {
; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 8)
; UNROLL-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; UNROLL-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]]
+; UNROLL-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]]
; UNROLL: middle.block:
; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
@@ -4905,7 +4905,7 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) {
; UNROLL-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
; UNROLL-NEXT: [[J_NEXT]] = add nuw nsw i64 [[J]], 2
; UNROLL-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
-; UNROLL-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP47:![0-9]+]]
+; UNROLL-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP45:![0-9]+]]
; UNROLL: for.end:
; UNROLL-NEXT: ret void
;
@@ -4932,7 +4932,7 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) {
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 4)
; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; UNROLL-NO-IC-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]]
+; UNROLL-NO-IC-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]]
; UNROLL-NO-IC: middle.block:
; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
@@ -4949,7 +4949,7 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) {
; UNROLL-NO-IC-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
; UNROLL-NO-IC-NEXT: [[J_NEXT]] = add nuw nsw i64 [[J]], 2
; UNROLL-NO-IC-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
-; UNROLL-NO-IC-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP47:![0-9]+]]
+; UNROLL-NO-IC-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP45:![0-9]+]]
; UNROLL-NO-IC: for.end:
; UNROLL-NO-IC-NEXT: ret void
;
@@ -4973,7 +4973,7 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) {
; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 16)
; INTERLEAVE-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; INTERLEAVE-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]]
+; INTERLEAVE-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]]
; INTERLEAVE: middle.block:
; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
@@ -4990,7 +4990,7 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) {
; INTERLEAVE-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
; INTERLEAVE-NEXT: [[J_NEXT]] = add nuw nsw i64 [[J]], 2
; INTERLEAVE-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
-; INTERLEAVE-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP47:![0-9]+]]
+; INTERLEAVE-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP45:![0-9]+]]
; INTERLEAVE: for.end:
; INTERLEAVE-NEXT: ret void
;
@@ -5057,7 +5057,7 @@ define i32 @PR32419(i32 %a, i16 %b) {
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2)
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20
-; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[TMP15]])
; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
@@ -5080,7 +5080,7 @@ define i32 @PR32419(i32 %a, i16 %b) {
; CHECK-NEXT: [[VAR6]] = or i32 [[VAR0]], [[VAR5]]
; CHECK-NEXT: [[I_NEXT]] = add nsw i32 [[I]], 1
; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[I_NEXT]], 0
-; CHECK-NEXT: br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP49:![0-9]+]]
+; CHECK-NEXT: br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP47:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: [[VAR7:%.*]] = phi i32 [ [[VAR6]], [[FOR_INC]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[VAR7]]
@@ -5121,7 +5121,7 @@ define i32 @PR32419(i32 %a, i16 %b) {
; IND-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2)
; IND-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20
-; IND-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]]
+; IND-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]]
; IND: middle.block:
; IND-NEXT: [[TMP16:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[TMP14]])
; IND-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
@@ -5132,7 +5132,7 @@ define i32 @PR32419(i32 %a, i16 %b) {
; IND: for.cond:
; IND-NEXT: br label [[FOR_INC]]
; IND: for.inc:
-; IND-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP49:![0-9]+]]
+; IND-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP47:![0-9]+]]
; IND: for.end:
; IND-NEXT: [[VAR7:%.*]] = phi i32 [ poison, [[FOR_INC]] ], [ [[TMP16]], [[MIDDLE_BLOCK]] ]
; IND-NEXT: ret i32 [[VAR7]]
@@ -5196,7 +5196,7 @@ define i32 @PR32419(i32 %a, i16 %b) {
; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 4)
; UNROLL-NEXT: [[TMP28:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20
-; UNROLL-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]]
+; UNROLL-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]]
; UNROLL: middle.block:
; UNROLL-NEXT: [[BIN_RDX:%.*]] = or <2 x i32> [[TMP27]], [[TMP26]]
; UNROLL-NEXT: [[TMP29:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[BIN_RDX]])
@@ -5208,7 +5208,7 @@ define i32 @PR32419(i32 %a, i16 %b) {
; UNROLL: for.cond:
; UNROLL-NEXT: br label [[FOR_INC]]
; UNROLL: for.inc:
-; UNROLL-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP49:![0-9]+]]
+; UNROLL-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP47:![0-9]+]]
; UNROLL: for.end:
; UNROLL-NEXT: [[VAR7:%.*]] = phi i32 [ poison, [[FOR_INC]] ], [ [[TMP29]], [[MIDDLE_BLOCK]] ]
; UNROLL-NEXT: ret i32 [[VAR7]]
@@ -5276,7 +5276,7 @@ define i32 @PR32419(i32 %a, i16 %b) {
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[STEP_ADD]], splat (i16 2)
; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20
-; UNROLL-NO-IC-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]]
+; UNROLL-NO-IC-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]]
; UNROLL-NO-IC: middle.block:
; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = or <2 x i32> [[TMP29]], [[TMP28]]
; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[BIN_RDX]])
@@ -5300,7 +5300,7 @@ define i32 @PR32419(i32 %a, i16 %b) {
; UNROLL-NO-IC-NEXT: [[VAR6]] = or i32 [[VAR0]], [[VAR5]]
; UNROLL-NO-IC-NEXT: [[I_NEXT]] = add nsw i32 [[I]], 1
; UNROLL-NO-IC-NEXT: [[COND:%.*]] = icmp eq i32 [[I_NEXT]], 0
-; UNROLL-NO-IC-NEXT: br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP49:![0-9]+]]
+; UNROLL-NO-IC-NEXT: br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP47:![0-9]+]]
; UNROLL-NO-IC: for.end:
; UNROLL-NO-IC-NEXT: [[VAR7:%.*]] = phi i32 [ [[VAR6]], [[FOR_INC]] ], [ [[TMP31]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-IC-NEXT: ret i32 [[VAR7]]
@@ -5400,7 +5400,7 @@ define i32 @PR32419(i32 %a, i16 %b) {
; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 8)
; INTERLEAVE-NEXT: [[TMP48:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
-; INTERLEAVE-NEXT: br i1 [[TMP48]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]]
+; INTERLEAVE-NEXT: br i1 [[TMP48]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]]
; INTERLEAVE: middle.block:
; INTERLEAVE-NEXT: [[BIN_RDX:%.*]] = or <4 x i32> [[TMP47]], [[TMP46]]
; INTERLEAVE-NEXT: [[TMP49:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[BIN_RDX]])
@@ -5423,7 +5423,7 @@ define i32 @PR32419(i32 %a, i16 %b) {
; INTERLEAVE-NEXT: [[VAR6]] = or i32 [[VAR0]], [[VAR4]]
; INTERLEAVE-NEXT: [[I_NEXT]] = add nsw i32 [[I]], 1
; INTERLEAVE-NEXT: [[COND:%.*]] = icmp eq i32 [[I_NEXT]], 0
-; INTERLEAVE-NEXT: br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP49:![0-9]+]]
+; INTERLEAVE-NEXT: br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP47:![0-9]+]]
; INTERLEAVE: for.end:
; INTERLEAVE-NEXT: [[VAR7:%.*]] = phi i32 [ [[VAR6]], [[FOR_INC]] ], [ poison, [[MIDDLE_BLOCK]] ]
; INTERLEAVE-NEXT: ret i32 [[VAR7]]
@@ -5489,7 +5489,7 @@ define i64 @trunc_with_first_order_recurrence() {
; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <2 x i32> [[VEC_IND2]], splat (i32 2)
; CHECK-NEXT: [[VEC_IND_NEXT5]] = add <2 x i32> [[VEC_IND4]], splat (i32 2)
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112
-; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[TMP10]])
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[VEC_IND2]], i32 1
@@ -5523,7 +5523,7 @@ define i64 @trunc_with_first_order_recurrence() {
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[C24]] = add nuw nsw i32 [[X]], 1
; CHECK-NEXT: [[EXITCOND_I:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 114
-; CHECK-NEXT: br i1 [[EXITCOND_I]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP51:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND_I]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP49:![0-9]+]]
;
; IND-LABEL: @trunc_with_first_order_recurrence(
; IND-NEXT: entry:
@@ -5553,7 +5553,7 @@ define i64 @trunc_with_first_order_recurrence() {
; IND-NEXT: [[VEC_IND_NEXT3]] = add <2 x i32> [[VEC_IND2]], splat (i32 2)
; IND-NEXT: [[VEC_IND_NEXT5]] = add <2 x i32> [[VEC_IND4]], splat (i32 2)
; IND-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112
-; IND-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]]
+; IND-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]]
; IND: middle.block:
; IND-NEXT: [[TMP12:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[TMP10]])
; IND-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[VEC_IND2]], i64 1
@@ -5585,7 +5585,7 @@ define i64 @trunc_with_first_order_recurrence() {
; IND-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; IND-NEXT: [[C24]] = add nuw nsw i32 [[X]], 1
; IND-NEXT: [[EXITCOND_I:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 114
-; IND-NEXT: br i1 [[EXITCOND_I]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP51:![0-9]+]]
+; IND-NEXT: br i1 [[EXITCOND_I]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP49:![0-9]+]]
;
; UNROLL-LABEL: @trunc_with_first_order_recurrence(
; UNROLL-NEXT: entry:
@@ -5630,7 +5630,7 @@ define i64 @trunc_with_first_order_recurrence() {
; UNROLL-NEXT: [[VEC_IND_NEXT4]] = add <2 x i32> [[VEC_IND3]], splat (i32 4)
; UNROLL-NEXT: [[VEC_IND_NEXT6]] = add <2 x i32> [[VEC_IND5]], splat (i32 4)
; UNROLL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112
-; UNROLL-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]]
+; UNROLL-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]]
; UNROLL: middle.block:
; UNROLL-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[TMP21]], [[TMP20]]
; UNROLL-NEXT: [[TMP23:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX]])
@@ -5663,7 +5663,7 @@ define i64 @trunc_with_first_order_recurrence() {
; UNROLL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; UNROLL-NEXT: [[C24]] = add nuw nsw i32 [[X]], 1
; UNROLL-NEXT: [[EXITCOND_I:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 114
-; UNROLL-NEXT: br i1 [[EXITCOND_I]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP51:![0-9]+]]
+; UNROLL-NEXT: br i1 [[EXITCOND_I]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP49:![0-9]+]]
;
; UNROLL-NO-IC-LABEL: @trunc_with_first_order_recurrence(
; UNROLL-NO-IC-NEXT: entry:
@@ -5708,7 +5708,7 @@ define i64 @trunc_with_first_order_recurrence() {
; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT4]] = add <2 x i32> [[STEP_ADD7]], splat (i32 2)
; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT6]] = add <2 x i32> [[STEP_ADD8]], splat (i32 2)
; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112
-; UNROLL-NO-IC-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]]
+; UNROLL-NO-IC-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]]
; UNROLL-NO-IC: middle.block:
; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[TMP21]], [[TMP20]]
; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX]])
@@ -5743,7 +5743,7 @@ define i64 @trunc_with_first_order_recurrence() {
; UNROLL-NO-IC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; UNROLL-NO-IC-NEXT: [[C24]] = add nuw nsw i32 [[X]], 1
; UNROLL-NO-IC-NEXT: [[EXITCOND_I:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 114
-; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND_I]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP51:![0-9]+]]
+; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND_I]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP49:![0-9]+]]
;
; INTERLEAVE-LABEL: @trunc_with_first_order_recurrence(
; INTERLEAVE-NEXT: entry:
@@ -5788,7 +5788,7 @@ define i64 @trunc_with_first_order_recurrence() {
; INTERLEAVE-NEXT: [[VEC_IND_NEXT4]] = add <4 x i32> [[VEC_IND3]], splat (i32 8)
; INTERLEAVE-NEXT: [[VEC_IND_NEXT6]] = add <4 x i32> [[VEC_IND5]], splat (i32 8)
; INTERLEAVE-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112
-; INTERLEAVE-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]]
+; INTERLEAVE-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]]
; INTERLEAVE: middle.block:
; INTERLEAVE-NEXT: [[BIN_RDX:%.*]] = add <4 x i64> [[TMP21]], [[TMP20]]
; INTERLEAVE-NEXT: [[TMP23:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[BIN_RDX]])
@@ -5821,7 +5821,7 @@ define i64 @trunc_with_first_order_recurrence() {
; INTERLEAVE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; INTERLEAVE-NEXT: [[C24]] = add nuw nsw i32 [[X]], 1
; INTERLEAVE-NEXT: [[EXITCOND_I:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 114
-; INTERLEAVE-NEXT: br i1 [[EXITCOND_I]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP51:![0-9]+]]
+; INTERLEAVE-NEXT: br i1 [[EXITCOND_I]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP49:![0-9]+]]
;
entry:
br label %loop
@@ -5880,7 +5880,7 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr %
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
-; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[VEC_IND]], i32 1
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
@@ -5902,7 +5902,7 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr %
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[IV_TRUNC]], [[MUL]]
; CHECK-NEXT: store i32 [[ADD]], ptr [[DST_GEP]], align 4
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[TRUNC_IV_NEXT]], 100
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP53:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP51:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
@@ -5928,13 +5928,13 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr %
; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
; IND-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
-; IND-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]]
+; IND-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]]
; IND: middle.block:
; IND-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; IND: scalar.ph:
; IND-NEXT: br label [[LOOP:%.*]]
; IND: loop:
-; IND-NEXT: br i1 poison, label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP53:![0-9]+]]
+; IND-NEXT: br i1 poison, label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP51:![0-9]+]]
; IND: exit:
; IND-NEXT: ret void
;
@@ -5966,13 +5966,13 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr %
; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4)
; UNROLL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
-; UNROLL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]]
+; UNROLL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]]
; UNROLL: middle.block:
; UNROLL-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; UNROLL: scalar.ph:
; UNROLL-NEXT: br label [[LOOP:%.*]]
; UNROLL: loop:
-; UNROLL-NEXT: br i1 poison, label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP53:![0-9]+]]
+; UNROLL-NEXT: br i1 poison, label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP51:![0-9]+]]
; UNROLL: exit:
; UNROLL-NEXT: ret void
;
@@ -6005,7 +6005,7 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr %
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2)
; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
-; UNROLL-NO-IC-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]]
+; UNROLL-NO-IC-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]]
; UNROLL-NO-IC: middle.block:
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[STEP_ADD]], i32 1
; UNROLL-NO-IC-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
@@ -6027,7 +6027,7 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr %
; UNROLL-NO-IC-NEXT: [[ADD:%.*]] = add i32 [[IV_TRUNC]], [[MUL]]
; UNROLL-NO-IC-NEXT: store i32 [[ADD]], ptr [[DST_GEP]], align 4
; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[TRUNC_IV_NEXT]], 100
-; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP53:![0-9]+]]
+; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP51:![0-9]+]]
; UNROLL-NO-IC: exit:
; UNROLL-NO-IC-NEXT: ret void
;
@@ -6059,7 +6059,7 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr %
; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8)
; INTERLEAVE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96
-; INTERLEAVE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]]
+; INTERLEAVE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]]
; INTERLEAVE: middle.block:
; INTERLEAVE-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[STEP_ADD]], i64 3
; INTERLEAVE-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
@@ -6081,7 +6081,7 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr %
; INTERLEAVE-NEXT: [[ADD:%.*]] = add i32 [[MUL]], [[IV_TRUNC]]
; INTERLEAVE-NEXT: store i32 [[ADD]], ptr [[DST_GEP]], align 4
; INTERLEAVE-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[TRUNC_IV_NEXT]], 100
-; INTERLEAVE-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP53:![0-9]+]]
+; INTERLEAVE-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP51:![0-9]+]]
; INTERLEAVE: exit:
; INTERLEAVE-NEXT: ret void
;
@@ -6148,9 +6148,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP17:%.*]] = mul <2 x i32> <i32 0, i32 1>, [[DOTSPLAT]]
; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i32> zeroinitializer, [[TMP17]]
-; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[STEP]], 2
-; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x i32> poison, i32 [[TMP18]], i64 0
-; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT2]], <2 x i32> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[DOTSPLAT3:%.*]] = mul <2 x i32> [[DOTSPLAT]], splat (i32 2)
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -6164,7 +6162,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], [[DOTSPLAT3]]
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP54:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[VEC_IND]], i32 1
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
@@ -6185,7 +6183,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n
; CHECK-NEXT: [[IV_2_NEXT]] = add nsw i32 [[IV_2_CONV]], [[STEP]]
; CHECK-NEXT: [[IV_1_NEXT]] = add nuw nsw i64 [[IV_1]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_1_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP55:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP53:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
@@ -6221,9 +6219,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n
; IND-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP]], i64 0
; IND-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
; IND-NEXT: [[TMP15:%.*]] = mul nuw <2 x i32> [[DOTSPLAT]], <i32 0, i32 1>
-; IND-NEXT: [[TMP16:%.*]] = shl i32 [[STEP]], 1
-; IND-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x i32> poison, i32 [[TMP16]], i64 0
-; IND-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT2]], <2 x i32> poison, <2 x i32> zeroinitializer
+; IND-NEXT: [[DOTSPLAT3:%.*]] = shl <2 x i32> [[DOTSPLAT]], splat (i32 1)
; IND-NEXT: br label [[VECTOR_BODY:%.*]]
; IND: vector.body:
; IND-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -6235,7 +6231,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n
; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], [[DOTSPLAT3]]
; IND-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IND-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP54:![0-9]+]]
+; IND-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]]
; IND: middle.block:
; IND-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[VEC_IND]], i64 1
; IND-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
@@ -6256,7 +6252,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n
; IND-NEXT: [[IV_2_NEXT]] = add nsw i32 [[IV_2_CONV]], [[STEP]]
; IND-NEXT: [[IV_1_NEXT]] = add nuw nsw i64 [[IV_1]], 1
; IND-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_1_NEXT]], [[N]]
-; IND-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP55:![0-9]+]]
+; IND-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP53:![0-9]+]]
; IND: exit:
; IND-NEXT: ret void
;
@@ -6289,11 +6285,9 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n
; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -4
; UNROLL-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32
; UNROLL-NEXT: [[IND_END:%.*]] = mul i32 [[STEP]], [[DOTCAST]]
-; UNROLL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP]], i64 0
-; UNROLL-NEXT: [[TMP15:%.*]] = shl <2 x i32> [[BROADCAST_SPLATINSERT]], <i32 1, i32 0>
-; UNROLL-NEXT: [[TMP16:%.*]] = shufflevector <2 x i32> [[TMP15]], <2 x i32> poison, <2 x i32> zeroinitializer
; UNROLL-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP]], i64 0
; UNROLL-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
+; UNROLL-NEXT: [[TMP16:%.*]] = shl <2 x i32> [[DOTSPLAT]], splat (i32 1)
; UNROLL-NEXT: [[TMP17:%.*]] = mul nuw <2 x i32> [[DOTSPLAT]], <i32 0, i32 1>
; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL: vector.body:
@@ -6310,7 +6304,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n
; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], [[TMP16]]
; UNROLL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; UNROLL-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP54:![0-9]+]]
+; UNROLL-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]]
; UNROLL: middle.block:
; UNROLL-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[STEP_ADD]], i64 1
; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
@@ -6331,7 +6325,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n
; UNROLL-NEXT: [[IV_2_NEXT]] = add nsw i32 [[IV_2_CONV]], [[STEP]]
; UNROLL-NEXT: [[IV_1_NEXT]] = add nuw nsw i64 [[IV_1]], 1
; UNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_1_NEXT]], [[N]]
-; UNROLL-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP55:![0-9]+]]
+; UNROLL-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP53:![0-9]+]]
; UNROLL: exit:
; UNROLL-NEXT: ret void
;
@@ -6370,9 +6364,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n
; UNROLL-NO-IC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP]], i64 0
; UNROLL-NO-IC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = mul <2 x i32> splat (i32 2), [[DOTSPLAT]]
-; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP]], i64 0
-; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
-; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = mul <2 x i32> <i32 0, i32 1>, [[BROADCAST_SPLAT]]
+; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = mul <2 x i32> <i32 0, i32 1>, [[DOTSPLAT]]
; UNROLL-NO-IC-NEXT: [[INDUCTION:%.*]] = add <2 x i32> zeroinitializer, [[TMP18]]
; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-IC: vector.body:
@@ -6391,7 +6383,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], [[TMP17]]
; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; UNROLL-NO-IC-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP54:![0-9]+]]
+; UNROLL-NO-IC-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]]
; UNROLL-NO-IC: middle.block:
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[STEP_ADD]], i32 1
; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
@@ -6412,7 +6404,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n
; UNROLL-NO-IC-NEXT: [[IV_2_NEXT]] = add nsw i32 [[IV_2_CONV]], [[STEP]]
; UNROLL-NO-IC-NEXT: [[IV_1_NEXT]] = add nuw nsw i64 [[IV_1]], 1
; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_1_NEXT]], [[N]]
-; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP55:![0-9]+]]
+; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP53:![0-9]+]]
; UNROLL-NO-IC: exit:
; UNROLL-NO-IC-NEXT: ret void
;
@@ -6445,11 +6437,9 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n
; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -8
; INTERLEAVE-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32
; INTERLEAVE-NEXT: [[IND_END:%.*]] = mul i32 [[STEP]], [[DOTCAST]]
-; INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[STEP]], i64 0
-; INTERLEAVE-NEXT: [[TMP15:%.*]] = shl <4 x i32> [[BROADCAST_SPLATINSERT]], <i32 2, i32 0, i32 0, i32 0>
-; INTERLEAVE-NEXT: [[TMP16:%.*]] = shufflevector <4 x i32> [[TMP15]], <4 x i32> poison, <4 x i32> zeroinitializer
; INTERLEAVE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[STEP]], i64 0
; INTERLEAVE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
+; INTERLEAVE-NEXT: [[TMP16:%.*]] = shl <4 x i32> [[DOTSPLAT]], splat (i32 2)
; INTERLEAVE-NEXT: [[TMP17:%.*]] = mul <4 x i32> [[DOTSPLAT]], <i32 0, i32 1, i32 2, i32 3>
; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]]
; INTERLEAVE: vector.body:
@@ -6466,7 +6456,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n
; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], [[TMP16]]
; INTERLEAVE-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; INTERLEAVE-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP54:![0-9]+]]
+; INTERLEAVE-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]]
; INTERLEAVE: middle.block:
; INTERLEAVE-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[STEP_ADD]], i64 3
; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
@@ -6487,7 +6477,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n
; INTERLEAVE-NEXT: [[IV_2_NEXT]] = add nsw i32 [[IV_2_CONV]], [[STEP]]
; INTERLEAVE-NEXT: [[IV_1_NEXT]] = add nuw nsw i64 [[IV_1]], 1
; INTERLEAVE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_1_NEXT]], [[N]]
-; INTERLEAVE-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP55:![0-9]+]]
+; INTERLEAVE-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP53:![0-9]+]]
; INTERLEAVE: exit:
; INTERLEAVE-NEXT: ret void
;
diff --git a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll
index 82a17297f6072..2d83ed25cb185 100644
--- a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll
+++ b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll
@@ -758,9 +758,7 @@ define float @fp_postinc_use_fadd(float %init, ptr noalias nocapture %A, i64 %N,
; VEC-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0
; VEC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT2]], <2 x float> poison, <2 x i32> zeroinitializer
; VEC-NEXT: [[INDUCTION:%.*]] = fadd fast <2 x float> [[DOTSPLAT]], [[TMP2]]
-; VEC-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], 2.000000e+00
-; VEC-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i64 0
-; VEC-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT3]], <2 x float> poison, <2 x i32> zeroinitializer
+; VEC-NEXT: [[DOTSPLAT4:%.*]] = fmul fast <2 x float> [[DOTSPLAT2]], splat (float 2.000000e+00)
; VEC-NEXT: br label %[[VECTOR_BODY:.*]]
; VEC: [[VECTOR_BODY]]:
; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -778,11 +776,11 @@ define float @fp_postinc_use_fadd(float %init, ptr noalias nocapture %A, i64 %N,
; VEC-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; VEC: [[SCALAR_PH]]:
; VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; VEC-NEXT: [[BC_RESUME_VAL5:%.*]] = phi float [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ [[INIT]], %[[ENTRY]] ]
+; VEC-NEXT: [[BC_RESUME_VAL3:%.*]] = phi float [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ [[INIT]], %[[ENTRY]] ]
; VEC-NEXT: br label %[[LOOP:.*]]
; VEC: [[LOOP]]:
; VEC-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; VEC-NEXT: [[FP_IV:%.*]] = phi float [ [[BC_RESUME_VAL5]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP]] ]
+; VEC-NEXT: [[FP_IV:%.*]] = phi float [ [[BC_RESUME_VAL3]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP]] ]
; VEC-NEXT: [[GEP_A:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
; VEC-NEXT: store float [[FP_IV]], ptr [[GEP_A]], align 4
; VEC-NEXT: [[ADD]] = fadd fast float [[FP_IV]], [[FPINC]]
@@ -878,9 +876,7 @@ define float @fp_postinc_use_fadd_ops_swapped(float %init, ptr noalias nocapture
; VEC-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0
; VEC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT2]], <2 x float> poison, <2 x i32> zeroinitializer
; VEC-NEXT: [[INDUCTION:%.*]] = fadd fast <2 x float> [[DOTSPLAT]], [[TMP2]]
-; VEC-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], 2.000000e+00
-; VEC-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i64 0
-; VEC-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT3]], <2 x float> poison, <2 x i32> zeroinitializer
+; VEC-NEXT: [[DOTSPLAT4:%.*]] = fmul fast <2 x float> [[DOTSPLAT2]], splat (float 2.000000e+00)
; VEC-NEXT: br label %[[VECTOR_BODY:.*]]
; VEC: [[VECTOR_BODY]]:
; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -898,11 +894,11 @@ define float @fp_postinc_use_fadd_ops_swapped(float %init, ptr noalias nocapture
; VEC-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; VEC: [[SCALAR_PH]]:
; VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; VEC-NEXT: [[BC_RESUME_VAL5:%.*]] = phi float [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ [[INIT]], %[[ENTRY]] ]
+; VEC-NEXT: [[BC_RESUME_VAL3:%.*]] = phi float [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ [[INIT]], %[[ENTRY]] ]
; VEC-NEXT: br label %[[LOOP:.*]]
; VEC: [[LOOP]]:
; VEC-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; VEC-NEXT: [[FP_IV:%.*]] = phi float [ [[BC_RESUME_VAL5]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP]] ]
+; VEC-NEXT: [[FP_IV:%.*]] = phi float [ [[BC_RESUME_VAL3]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP]] ]
; VEC-NEXT: [[GEP_A:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
; VEC-NEXT: store float [[FP_IV]], ptr [[GEP_A]], align 4
; VEC-NEXT: [[ADD]] = fadd fast float [[FPINC]], [[FP_IV]]
@@ -998,9 +994,7 @@ define float @fp_postinc_use_fsub(float %init, ptr noalias nocapture %A, i64 %N,
; VEC-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0
; VEC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT2]], <2 x float> poison, <2 x i32> zeroinitializer
; VEC-NEXT: [[INDUCTION:%.*]] = fsub fast <2 x float> [[DOTSPLAT]], [[TMP2]]
-; VEC-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], 2.000000e+00
-; VEC-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i64 0
-; VEC-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT3]], <2 x float> poison, <2 x i32> zeroinitializer
+; VEC-NEXT: [[DOTSPLAT4:%.*]] = fmul fast <2 x float> [[DOTSPLAT2]], splat (float 2.000000e+00)
; VEC-NEXT: br label %[[VECTOR_BODY:.*]]
; VEC: [[VECTOR_BODY]]:
; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -1018,11 +1012,11 @@ define float @fp_postinc_use_fsub(float %init, ptr noalias nocapture %A, i64 %N,
; VEC-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; VEC: [[SCALAR_PH]]:
; VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; VEC-NEXT: [[BC_RESUME_VAL5:%.*]] = phi float [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ [[INIT]], %[[ENTRY]] ]
+; VEC-NEXT: [[BC_RESUME_VAL3:%.*]] = phi float [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ [[INIT]], %[[ENTRY]] ]
; VEC-NEXT: br label %[[LOOP:.*]]
; VEC: [[LOOP]]:
; VEC-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; VEC-NEXT: [[FP_IV:%.*]] = phi float [ [[BC_RESUME_VAL5]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP]] ]
+; VEC-NEXT: [[FP_IV:%.*]] = phi float [ [[BC_RESUME_VAL3]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP]] ]
; VEC-NEXT: [[GEP_A:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
; VEC-NEXT: store float [[FP_IV]], ptr [[GEP_A]], align 4
; VEC-NEXT: [[ADD]] = fsub fast float [[FP_IV]], [[FPINC]]
diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll
index 6411a3031d1af..b695910a38b39 100644
--- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll
+++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll
@@ -478,9 +478,7 @@ define void @induction_resume_value_requires_non_trivial_scev_expansion(ptr %dst
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i8> <i8 0, i8 1, i8 2, i8 3>, [[DOTSPLAT]]
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i8> zeroinitializer, [[TMP2]]
-; CHECK-NEXT: [[TMP3:%.*]] = mul i8 [[INDUCTION_IV]], 4
-; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[TMP3]], i64 0
-; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT1]], <4 x i8> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[DOTSPLAT2:%.*]] = mul <4 x i8> [[DOTSPLAT]], splat (i8 4)
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -509,9 +507,7 @@ define void @induction_resume_value_requires_non_trivial_scev_expansion(ptr %dst
; CHECK-NEXT: [[DOTSPLATINSERT10:%.*]] = insertelement <4 x i8> poison, i8 [[BC_RESUME_VAL]], i64 0
; CHECK-NEXT: [[DOTSPLAT11:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT10]], <4 x i8> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION12:%.*]] = add <4 x i8> [[DOTSPLAT11]], [[TMP14]]
-; CHECK-NEXT: [[TMP9:%.*]] = mul i8 [[INDUCTION_IV]], 4
-; CHECK-NEXT: [[DOTSPLATINSERT13:%.*]] = insertelement <4 x i8> poison, i8 [[TMP9]], i64 0
-; CHECK-NEXT: [[DOTSPLAT14:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT13]], <4 x i8> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[DOTSPLAT14:%.*]] = mul <4 x i8> [[DOTSPLAT9]], splat (i8 4)
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; CHECK: vec.epilog.vector.body:
; CHECK-NEXT: [[INDEX7:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT18:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
@@ -568,9 +564,7 @@ define void @induction_resume_value_requires_non_trivial_scev_expansion(ptr %dst
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP2:%.*]] = mul <4 x i8> <i8 0, i8 1, i8 2, i8 3>, [[DOTSPLAT]]
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDUCTION:%.*]] = add <4 x i8> zeroinitializer, [[TMP2]]
-; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP3:%.*]] = mul i8 [[INDUCTION_IV]], 4
-; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[TMP3]], i64 0
-; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT1]], <4 x i8> poison, <4 x i32> zeroinitializer
+; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLAT2:%.*]] = mul <4 x i8> [[DOTSPLAT]], splat (i8 4)
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK-PROFITABLE-BY-DEFAULT: vector.body:
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -599,9 +593,7 @@ define void @induction_resume_value_requires_non_trivial_scev_expansion(ptr %dst
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLATINSERT10:%.*]] = insertelement <2 x i8> poison, i8 [[BC_RESUME_VAL]], i64 0
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLAT11:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT10]], <2 x i8> poison, <2 x i32> zeroinitializer
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDUCTION12:%.*]] = add <2 x i8> [[DOTSPLAT11]], [[TMP14]]
-; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP9:%.*]] = mul i8 [[INDUCTION_IV]], 2
-; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLATINSERT13:%.*]] = insertelement <2 x i8> poison, i8 [[TMP9]], i64 0
-; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLAT14:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT13]], <2 x i8> poison, <2 x i32> zeroinitializer
+; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLAT14:%.*]] = mul <2 x i8> [[DOTSPLAT9]], splat (i8 2)
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; CHECK-PROFITABLE-BY-DEFAULT: vec.epilog.vector.body:
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDEX7:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT18:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll b/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll
index 2135fc6367918..91d17c45c8979 100644
--- a/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll
+++ b/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll
@@ -29,9 +29,9 @@ define void @foo() {
; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[TMP6:%.*]] = mul <vscale x 4 x i64> [[TMP4]], splat (i64 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP6]]
-; CHECK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP19]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP9]], i64 0
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP19]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = mul <vscale x 4 x i64> splat (i64 1), [[DOTSPLAT]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[OUTER_LOOP_LATCH4:%.*]] ]
@@ -53,7 +53,7 @@ define void @foo() {
; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi <vscale x 4 x float> [ [[TMP12]], [[INNER_LOOP1]] ]
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> [[VEC_PHI5]], <vscale x 4 x ptr> [[TMP10]], i32 4, <vscale x 4 x i1> splat (i1 true))
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[TMP8]]
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll b/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll
index 0cda697e0337a..fc12f76d3cfcb 100644
--- a/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll
@@ -73,9 +73,7 @@ define void @doit1(i32 %n, i32 %step) local_unnamed_addr {
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP17:%.*]] = mul <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[DOTSPLAT]]
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> zeroinitializer, [[TMP17]]
-; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[STEP]], 4
-; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP18]], i64 0
-; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[DOTSPLAT3:%.*]] = mul <4 x i32> [[DOTSPLAT]], splat (i32 4)
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -196,9 +194,7 @@ define void @doit2(i32 %n, i32 %step) local_unnamed_addr {
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP16:%.*]] = mul <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[DOTSPLAT]]
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> zeroinitializer, [[TMP16]]
-; CHECK-NEXT: [[TMP17:%.*]] = mul i32 [[STEP]], 4
-; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP17]], i64 0
-; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[DOTSPLAT3:%.*]] = mul <4 x i32> [[DOTSPLAT]], splat (i32 4)
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -391,9 +387,7 @@ define void @doit4(i32 %n, i8 signext %cstep) local_unnamed_addr {
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP14:%.*]] = mul <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[DOTSPLAT]]
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> zeroinitializer, [[TMP14]]
-; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[CONV]], 4
-; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP15]], i64 0
-; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[DOTSPLAT3:%.*]] = mul <4 x i32> [[DOTSPLAT]], splat (i32 4)
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll b/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll
index f136b0e2e0b31..a6be32aee779d 100644
--- a/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll
@@ -192,9 +192,9 @@ define void @add_unique_ind32(ptr noalias nocapture %a, i64 %n) {
; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
; CHECK-NEXT: [[TMP7:%.*]] = shl <vscale x 4 x i32> [[TMP6]], splat (i32 1)
; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP5]] to i32
-; CHECK-NEXT: [[TMP9:%.*]] = shl i32 [[TMP8]], 1
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP9]], i64 0
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP8]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = shl <vscale x 4 x i32> [[DOTSPLAT]], splat (i32 1)
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -202,7 +202,7 @@ define void @add_unique_ind32(ptr noalias nocapture %a, i64 %n) {
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: store <vscale x 4 x i32> [[VEC_IND]], ptr [[TMP10]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[TMP12]]
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: middle.block:
@@ -272,9 +272,9 @@ define void @add_unique_indf32(ptr noalias nocapture %a, i64 %n) {
; CHECK-NEXT: [[TMP9:%.*]] = fmul <vscale x 4 x float> [[TMP8]], splat (float 2.000000e+00)
; CHECK-NEXT: [[INDUCTION:%.*]] = fadd <vscale x 4 x float> [[TMP9]], zeroinitializer
; CHECK-NEXT: [[TMP12:%.*]] = uitofp i64 [[TMP6]] to float
-; CHECK-NEXT: [[TMP13:%.*]] = fmul float [[TMP12]], 2.000000e+00
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[TMP13]], i64 0
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[TMP12]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = fmul <vscale x 4 x float> [[DOTSPLAT]], splat (float 2.000000e+00)
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -282,7 +282,7 @@ define void @add_unique_indf32(ptr noalias nocapture %a, i64 %n) {
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: store <vscale x 4 x float> [[VEC_IND]], ptr [[TMP14]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = fadd <vscale x 4 x float> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = fadd <vscale x 4 x float> [[VEC_IND]], [[TMP13]]
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK: middle.block:
>From d183c91430311acaeff7c1a173fa3402093fccba Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Mon, 3 Feb 2025 18:17:37 +0800
Subject: [PATCH 13/16] Remove VPStepVectorRecipe, replace with VPInstruction
---
.../Vectorize/LoopVectorizationPlanner.h | 6 ++-
llvm/lib/Transforms/Vectorize/VPlan.h | 38 ++-----------------
.../Transforms/Vectorize/VPlanAnalysis.cpp | 4 +-
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 28 +++++---------
llvm/lib/Transforms/Vectorize/VPlanValue.h | 1 -
5 files changed, 19 insertions(+), 58 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index f7c068eaa4d9d..15ebd37d664da 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -265,8 +265,10 @@ class VPBuilder {
FPBinOp ? FPBinOp->getFastMathFlags() : FastMathFlags()));
}
- VPStepVectorRecipe *createStepVector(Type *Ty) {
- return tryInsertInstruction(new VPStepVectorRecipe(Ty));
+ VPInstruction *createStepVector(Type *Ty) {
+ VPValue *TyVal = BB->getPlan()->getOrAddLiveIn(Constant::getNullValue(Ty));
+ return tryInsertInstruction(
+ new VPInstruction(VPInstruction::StepVector, {TyVal}));
}
//===--------------------------------------------------------------------===//
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index e1e86fe980cc7..58ba194cca6c5 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -880,6 +880,9 @@ class VPInstruction : public VPRecipeWithIRFlags,
// Extracts the first active lane of a vector, where the first operand is
// the predicate, and the second operand is the vector to extract.
ExtractFirstActive,
+ // Creates a step vector starting from 0 with a step of 1. The first operand
+ // is a dummy constant that should be used to specify the element type.
+ StepVector,
};
private:
@@ -1236,41 +1239,6 @@ class VPScalarCastRecipe : public VPSingleDefRecipe {
}
};
-/// A recipe for generating a step vector.
-class VPStepVectorRecipe : public VPSingleDefRecipe {
- /// Scalar return type of the intrinsic.
- Type *ScalarTy;
-
-public:
- VPStepVectorRecipe(Type *Ty)
- : VPSingleDefRecipe(VPDef::VPStepVectorSC, {}), ScalarTy(Ty) {}
-
- ~VPStepVectorRecipe() override = default;
-
- VPStepVectorRecipe *clone() override {
- return new VPStepVectorRecipe(ScalarTy);
- }
-
- VP_CLASSOF_IMPL(VPDef::VPStepVectorSC)
-
- void execute(VPTransformState &State) override;
-
- /// Return the cost of this VPStepVectorRecipe.
- InstructionCost computeCost(ElementCount VF,
- VPCostContext &Ctx) const override {
- // TODO: Compute accurate cost after retiring the legacy cost model.
- return 0;
- }
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- void print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const override;
-#endif
-
- /// Return the scalar return type of the intrinsic.
- Type *getScalarType() const { return ScalarTy; }
-};
-
/// A recipe for widening vector intrinsics.
class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags {
/// ID of the vector intrinsic to widen.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index 8414b36f3ae22..4e4892005a7be 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -77,6 +77,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
case VPInstruction::CalculateTripCountMinusVF:
case VPInstruction::CanonicalIVIncrementForPart:
case VPInstruction::AnyOf:
+ case VPInstruction::StepVector:
return SetResultTyFromOp();
case VPInstruction::ExtractFirstActive:
case VPInstruction::ExtractFromEnd: {
@@ -243,8 +244,7 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
// backedge value, here and in cases below.
return inferScalarType(R->getStartValue());
})
- .Case<VPWidenIntOrFpInductionRecipe, VPDerivedIVRecipe,
- VPStepVectorRecipe>(
+ .Case<VPWidenIntOrFpInductionRecipe, VPDerivedIVRecipe>(
[](const auto *R) { return R->getScalarType(); })
.Case<VPReductionRecipe, VPPredInstPHIRecipe, VPWidenPHIRecipe,
VPScalarIVStepsRecipe, VPWidenGEPRecipe, VPVectorPointerRecipe,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index a18a112048f1e..d4e59e44d70c9 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -73,7 +73,6 @@ bool VPRecipeBase::mayWriteToMemory() const {
case VPBranchOnMaskSC:
case VPScalarIVStepsSC:
case VPPredInstPHISC:
- case VPStepVectorSC:
return false;
case VPBlendSC:
case VPReductionEVLSC:
@@ -121,7 +120,6 @@ bool VPRecipeBase::mayReadFromMemory() const {
case VPScalarIVStepsSC:
case VPWidenStoreEVLSC:
case VPWidenStoreSC:
- case VPStepVectorSC:
return false;
case VPBlendSC:
case VPReductionEVLSC:
@@ -152,7 +150,6 @@ bool VPRecipeBase::mayHaveSideEffects() const {
case VPPredInstPHISC:
case VPScalarCastSC:
case VPReverseVectorPointerSC:
- case VPStepVectorSC:
return false;
case VPInstructionSC:
return mayWriteToMemory();
@@ -712,6 +709,10 @@ Value *VPInstruction::generate(VPTransformState &State) {
Builder.getInt64Ty(), Mask, true, "first.active.lane");
return Builder.CreateExtractElement(Vec, Ctz, "early.exit.value");
}
+ case VPInstruction::StepVector: {
+ Type *EltTy = State.get(getOperand(0), true)->getType();
+ return State.Builder.CreateStepVector(VectorType::get(EltTy, State.VF));
+ }
default:
llvm_unreachable("Unsupported opcode for instruction");
@@ -824,6 +825,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
case VPInstruction::LogicalAnd:
case VPInstruction::Not:
case VPInstruction::PtrAdd:
+ case VPInstruction::StepVector:
return false;
default:
return true;
@@ -851,6 +853,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
case VPInstruction::BranchOnCount:
case VPInstruction::BranchOnCond:
case VPInstruction::ResumePhi:
+ case VPInstruction::StepVector:
return true;
};
llvm_unreachable("switch should return");
@@ -942,6 +945,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::ExtractFirstActive:
O << "extract-first-active";
break;
+ case VPInstruction::StepVector:
+ O << "step-vector";
+ break;
default:
O << Instruction::getOpcodeName(getOpcode());
}
@@ -2291,7 +2297,7 @@ Value *VPScalarCastRecipe ::generate(VPTransformState &State) {
case Instruction::ZExt:
case Instruction::Trunc:
case Instruction::UIToFP: {
- // Note: SExt not used yet.
+ // Note: SExt/ZExt not used yet.
Value *Op = State.get(getOperand(0), VPLane(0));
return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy);
}
@@ -2315,20 +2321,6 @@ void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent,
}
#endif
-void VPStepVectorRecipe::execute(VPTransformState &State) {
- VectorType *Ty = VectorType::get(ScalarTy, State.VF);
- State.set(this, State.Builder.CreateStepVector(Ty));
-}
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void VPStepVectorRecipe::print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const {
- O << Indent;
- printAsOperand(O, SlotTracker);
- O << " = STEP-VECTOR";
-}
-#endif
-
void VPBranchOnMaskRecipe::execute(VPTransformState &State) {
assert(State.Lane && "Branch on Mask works only on single instance.");
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 61422bfe6c0b0..a058b2a121d59 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -354,7 +354,6 @@ class VPDef {
VPWidenSelectSC,
VPBlendSC,
VPHistogramSC,
- VPStepVectorSC,
// START: Phi-like recipes. Need to be kept together.
VPWidenPHISC,
VPPredInstPHISC,
>From 79b9700d6f5ce3833ad5bee74b1c0e76a8d5da43 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Tue, 18 Feb 2025 18:24:30 +0800
Subject: [PATCH 14/16] Add VPlan debug output test
---
.../LoopVectorize/vplan-iv-transforms.ll | 56 ++++++++++++++++++-
1 file changed, 55 insertions(+), 1 deletion(-)
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll b/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll
index be1eb78cab607..7068a795cbb5d 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll
@@ -29,7 +29,7 @@ define void @iv_no_binary_op_in_descriptor(i1 %c, ptr %dst) {
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
-; CHECK-NEXT: middle.block:
+ ; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1000>, vp<[[VEC_TC]]>
; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
@@ -65,3 +65,57 @@ loop.latch:
exit:
ret void
}
+
+; Check that VPWidenIntOrFPInductionRecipe is expanded into smaller recipes in
+; the final VPlan.
+define void @iv_expand(ptr %p, i64 %n) {
+; CHECK-LABEL: LV: Checking a loop in 'iv_expand'
+; CHECK: VPlan 'Initial VPlan for VF={8},UF>=1' {
+; CHECK: <x1> vector loop: {
+; CHECK-NEXT: vector.body:
+; CHECK-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
+; CHECK-NEXT: ir<%i> = WIDEN-INDUCTION ir<0>, ir<1>, vp<%0>
+; CHECK-NEXT: vp<%4> = SCALAR-STEPS vp<%3>, ir<1>
+; CHECK-NEXT: CLONE ir<%q> = getelementptr ir<%p>, vp<%4>
+; CHECK-NEXT: vp<%5> = vector-pointer ir<%q>
+; CHECK-NEXT: WIDEN ir<%x> = load vp<%5>
+; CHECK-NEXT: WIDEN ir<%y> = add ir<%x>, ir<%i>
+; CHECK-NEXT: vp<%6> = vector-pointer ir<%q>
+; CHECK-NEXT: WIDEN store vp<%6>, ir<%y>
+; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%3>, vp<%1>
+; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%2>
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+; CHECK-NEXT: Successor(s): middle.block
+; CHECK: VPlan 'Final VPlan for VF={8},UF={1}'
+; CHECK: <x1> vector loop: {
+; CHECK-NEXT: vector.body:
+; CHECK-NEXT: SCALAR-PHI vp<%3> = phi ir<0>, vp<%index.next>
+; CHECK-NEXT: WIDEN-PHI ir<%i> = phi vp<%induction>, vp<%vec.ind.next>
+; CHECK-NEXT: vp<%4> = SCALAR-STEPS vp<%3>, ir<1>
+; CHECK-NEXT: CLONE ir<%q> = getelementptr ir<%p>, vp<%4>
+; CHECK-NEXT: vp<%5> = vector-pointer ir<%q>
+; CHECK-NEXT: WIDEN ir<%x> = load vp<%5>
+; CHECK-NEXT: WIDEN ir<%y> = add ir<%x>, ir<%i>
+; CHECK-NEXT: vp<%6> = vector-pointer ir<%q>
+; CHECK-NEXT: WIDEN store vp<%6>, ir<%y>
+; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%3>, ir<8>
+; CHECK-NEXT: EMIT vp<%vec.ind.next> = add ir<%i>, vp<%2>
+; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, ir<%n.vec>
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+; CHECK-NEXT: Successor(s): middle.block
+entry:
+ br label %loop
+loop:
+ %i = phi i64 [0, %entry], [%i.next, %loop]
+ %q = getelementptr i64, ptr %p, i64 %i
+ %x = load i64, ptr %q
+ %y = add i64 %x, %i
+ store i64 %y, ptr %q
+ %i.next = add i64 %i, 1
+ %done = icmp eq i64 %i.next, %n
+ br i1 %done, label %exit, label %loop
+exit:
+ ret void
+}
>From d3d85f6d39486bbc6e62d7a3d449e3a36b636099 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Tue, 18 Feb 2025 18:25:17 +0800
Subject: [PATCH 15/16] Undo stray whitespace change
---
llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll b/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll
index 7068a795cbb5d..a3558ba82f812 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll
@@ -29,7 +29,7 @@ define void @iv_no_binary_op_in_descriptor(i1 %c, ptr %dst) {
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
- ; CHECK-NEXT: middle.block:
+; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1000>, vp<[[VEC_TC]]>
; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
>From 8f9461019a153f562b6bcc35a205a13dd90c8b97 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Tue, 18 Feb 2025 20:19:36 +0800
Subject: [PATCH 16/16] Fix ARM test
---
.../LoopVectorize/ARM/mve-gather-scatter-tailpred.ll | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll
index fda9ef2cf6c2f..8d9781f605134 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll
@@ -516,9 +516,7 @@ define void @test_stride_noninvar3_4i32(ptr readonly %data, ptr noalias nocaptur
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[DOTSPLAT]]
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> splat (i32 3), [[TMP1]]
-; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[X]], 4
-; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i64 0
-; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[DOTSPLAT3:%.*]] = mul <4 x i32> [[DOTSPLAT]], splat (i32 4)
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
More information about the llvm-commits
mailing list