[llvm] [VPlan] Introduce explicit broadcasts for live-ins. (PR #124644)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 27 14:48:42 PST 2025
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/124644
>From 2aaf89c469c594887a226fb6673c865016096b27 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 27 Jan 2025 12:11:33 +0000
Subject: [PATCH] [VPlan] Introduce explicit broadcasts for live-ins.
Add a new VPInstruction::Broadcast opcode and use it to materialize
explicit broadcasts of live-ins. The initial patch only materlizes the
broadcasts if the vector preheader dominates all uses that need it.
Later patches will pick the best valid insert point, thus retiring
implicit hoisting of broadcasts from VPTransformsState::get().
---
.../Transforms/Vectorize/LoopVectorize.cpp | 2 +-
llvm/lib/Transforms/Vectorize/VPlan.h | 45 ++++++++
.../Transforms/Vectorize/VPlanAnalysis.cpp | 1 +
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 11 +-
.../Transforms/Vectorize/VPlanTransforms.cpp | 31 ++++++
.../Transforms/Vectorize/VPlanTransforms.h | 3 +
.../AArch64/clamped-trip-count.ll | 8 +-
.../AArch64/divs-with-scalable-vfs.ll | 14 +--
.../AArch64/sve-interleaved-accesses.ll | 56 +++++-----
.../sve-interleaved-masked-accesses.ll | 28 ++---
.../AArch64/tail-folding-styles.ll | 4 +-
.../LoopVectorize/RISCV/mask-index-type.ll | 4 +-
...ruction-or-drop-poison-generating-flags.ll | 12 +--
.../LoopVectorize/RISCV/strided-accesses.ll | 10 +-
.../truncate-to-minimal-bitwidth-cost.ll | 8 +-
.../LoopVectorize/RISCV/uniform-load-store.ll | 54 +++++-----
.../X86/divs-with-tail-folding.ll | 4 +-
.../X86/epilog-vectorization-inductions.ll | 34 +++---
.../LoopVectorize/X86/induction-costs.ll | 6 +-
.../LoopVectorize/X86/induction-step.ll | 12 +--
.../LoopVectorize/X86/masked-store-cost.ll | 6 +-
.../Transforms/LoopVectorize/X86/pr54634.ll | 6 +-
.../x86-interleaved-accesses-masked-group.ll | 48 ++++-----
.../LoopVectorize/X86/x86-predication.ll | 100 +++++++++---------
.../LoopVectorize/create-induction-resume.ll | 14 +--
.../epilog-vectorization-any-of-reductions.ll | 8 +-
.../first-order-recurrence-complex.ll | 6 +-
.../LoopVectorize/first-order-recurrence.ll | 4 +-
.../LoopVectorize/induction-step.ll | 14 +--
.../Transforms/LoopVectorize/induction.ll | 10 +-
.../LoopVectorize/no_outside_user.ll | 4 +-
.../outer_loop_hcfg_construction.ll | 24 ++---
.../scalable-first-order-recurrence.ll | 4 +-
.../LoopVectorize/scalable-iv-outside-user.ll | 12 +--
.../vplan-widen-select-instruction.ll | 24 ++---
35 files changed, 360 insertions(+), 271 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index f592e5557c17d5..a5fa304b7631e6 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7648,7 +7648,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
((VectorizingEpilogue && ExpandedSCEVs) ||
(!VectorizingEpilogue && !ExpandedSCEVs)) &&
"expanded SCEVs to reuse can only be used during epilogue vectorization");
-
+ VPlanTransforms::materializeBroadcasts(BestVPlan);
// TODO: Move to VPlan transform stage once the transition to the VPlan-based
// cost model is complete for better cost estimates.
VPlanTransforms::unrollByUF(BestVPlan, BestUF,
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 253f22d299b623..2b3319250673de 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1210,6 +1210,7 @@ class VPInstruction : public VPRecipeWithIRFlags,
CanonicalIVIncrementForPart,
BranchOnCount,
BranchOnCond,
+ Broadcast,
ComputeReductionResult,
// Takes the VPValue to extract from as first operand and the lane or part
// to extract as second operand, counting from the end starting with 1 for
@@ -1855,6 +1856,13 @@ struct VPWidenSelectRecipe : public VPRecipeWithIRFlags {
bool isInvariantCond() const {
return getCond()->isDefinedOutsideLoopRegions();
}
+
+ /// Returns true if the recipe only uses the first lane of operand \p Op.
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ return Op == getCond() && isInvariantCond();
+ }
};
/// A recipe for handling GEP instructions.
@@ -1902,6 +1910,13 @@ class VPWidenGEPRecipe : public VPRecipeWithIRFlags {
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
#endif
+
+ /// Returns true if the recipe only uses the first lane of operand \p Op.
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ return Op == getOperand(0);
+ }
};
/// A recipe to compute the pointers for widened memory accesses of IndexTy
@@ -2217,6 +2232,13 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe {
VPValue *getLastUnrolledPartOperand() {
return getNumOperands() == 5 ? getOperand(4) : this;
}
+
+ /// Returns true if the recipe only uses the first lane of operand \p Op.
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ return Op == getStartValue();
+ }
};
class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe,
@@ -2249,6 +2271,13 @@ class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe,
/// Returns true if only scalar values will be generated.
bool onlyScalarsGenerated(bool IsScalable);
+ /// Returns true if the recipe only uses the first lane of operand \p Op.
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ return Op == getOperand(0);
+ }
+
/// Returns the VPValue representing the value of this induction at
/// the first unrolled part, if it exists. Returns itself if unrolling did not
/// take place.
@@ -2377,6 +2406,13 @@ struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe {
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
#endif
+
+ /// Returns true if the recipe only uses the first lane of operand \p Op.
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ return Op == getStartValue();
+ }
};
/// A recipe for handling reduction phis. The start value is the first operand
@@ -2443,6 +2479,13 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
/// Returns true, if the phi is part of an in-loop reduction.
bool isInLoop() const { return IsInLoop; }
+
+ /// Returns true if the recipe only uses the first lane of operand \p Op.
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ return Op == getStartValue();
+ }
};
/// A recipe for forming partial reductions. In the loop, an accumulator and
@@ -4054,6 +4097,8 @@ class VPlan {
/// Return the live-in VPValue for \p V, if there is one or nullptr otherwise.
VPValue *getLiveIn(Value *V) const { return Value2VPValue.lookup(V); }
+ ArrayRef<VPValue *> getLiveIns() const { return VPLiveInsToFree; }
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the live-ins of this VPlan to \p O.
void printLiveIns(raw_ostream &O) const;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index 27357ff04b5f2a..1408f10b23e1e3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -89,6 +89,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
inferScalarType(R->getOperand(1))->isIntegerTy(1) &&
"LogicalAnd operands should be bool");
return IntegerType::get(Ctx, 1);
+ case VPInstruction::Broadcast:
case VPInstruction::PtrAdd:
// Return the type based on the pointer argument (i.e. first operand).
return inferScalarType(R->getOperand(0));
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 2679ed6b26b5d1..ec442ea201e6d3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -575,6 +575,10 @@ Value *VPInstruction::generate(VPTransformState &State) {
Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
return CondBr;
}
+ case VPInstruction::Broadcast: {
+ return Builder.CreateVectorSplat(
+ State.VF, State.get(getOperand(0), /*IsScalar*/ true), "broadcast");
+ }
case VPInstruction::ComputeReductionResult: {
// FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
// and will be removed by breaking up the recipe further.
@@ -790,7 +794,6 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
case Instruction::ICmp:
case Instruction::Select:
case Instruction::Or:
- case VPInstruction::PtrAdd:
// TODO: Cover additional opcodes.
return vputils::onlyFirstLaneUsed(this);
case VPInstruction::ActiveLaneMask:
@@ -801,6 +804,8 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
case VPInstruction::BranchOnCond:
case VPInstruction::ResumePhi:
return true;
+ case VPInstruction::PtrAdd:
+ return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this);
};
llvm_unreachable("switch should return");
}
@@ -873,6 +878,10 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::BranchOnCount:
O << "branch-on-count";
break;
+ case VPInstruction::Broadcast:
+ O << "broadcast";
+ break;
+
case VPInstruction::ExtractFromEnd:
O << "extract-from-end";
break;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 714250a56ff576..d5d809a276bb92 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2146,3 +2146,34 @@ bool VPlanTransforms::handleUncountableEarlyExit(
LatchExitingBranch->eraseFromParent();
return true;
}
+
+void VPlanTransforms::materializeBroadcasts(VPlan &Plan) {
+ VPDominatorTree VPDT;
+ VPDT.recalculate(Plan);
+ auto *VectorPreheader = Plan.getVectorPreheader();
+ VPBuilder Builder(VectorPreheader);
+ for (VPValue *LiveIn : Plan.getLiveIns()) {
+ if (all_of(LiveIn->users(),
+ [LiveIn](VPUser *U) {
+ return cast<VPRecipeBase>(U)->usesScalars(LiveIn);
+ }) ||
+ !LiveIn->getLiveInIRValue() ||
+ isa<Constant>(LiveIn->getLiveInIRValue()))
+ continue;
+
+ // Add explicit broadcast if the vector preheader dominates all users.
+ // TODO: Find valid inert point for all users.
+ if (all_of(LiveIn->users(), [&VPDT, VectorPreheader](VPUser *U) {
+ return VectorPreheader != cast<VPRecipeBase>(U)->getParent() &&
+ VPDT.dominates(VectorPreheader,
+ cast<VPRecipeBase>(U)->getParent());
+ })) {
+ auto *Broadcast =
+ Builder.createNaryOp(VPInstruction::Broadcast, {LiveIn});
+ LiveIn->replaceUsesWithIf(Broadcast, [LiveIn, Broadcast](VPUser &U,
+ unsigned Idx) {
+ return Broadcast != &U && !cast<VPRecipeBase>(&U)->usesScalars(LiveIn);
+ });
+ }
+ }
+}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index b31fef5d62456b..f749e2f0a81b86 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -144,6 +144,9 @@ struct VPlanTransforms {
static void
optimizeInductionExitUsers(VPlan &Plan,
DenseMap<VPValue *, VPValue *> &EndValues);
+
+ /// Add explicit broadcasts for live-ins used as vectors.
+ static void materializeBroadcasts(VPlan &Plan);
};
} // namespace llvm
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll
index 5b77ced73bce0a..10300982bd9f4c 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll
@@ -17,14 +17,14 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[N_VEC]]
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 8)
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 8 x i64> [[TMP8]], splat (i64 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP7]]
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP12]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -103,14 +103,14 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[N_VEC]]
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[WIDE_TRIP_COUNT]])
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 8 x i64> [[TMP8]], splat (i64 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP7]]
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP12]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll
index 2c37593be78612..6f3d49d442f1e6 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll
@@ -124,14 +124,14 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i
; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[N]], [[TMP11]]
; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 0
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[N]])
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[M]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 2 x i64> [[TMP15]], splat (i64 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP17]]
; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP9]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP20]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[M]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -248,13 +248,13 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[TMP0]], [[TMP11]]
; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 0
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[TMP0]])
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[MUL_2_I]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 2 x i64> [[TMP15]], splat (i64 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP17]]
; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP9]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP20]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[MUL_2_I]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP20]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT3]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
@@ -262,7 +262,7 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP23:%.*]] = udiv <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT4]]
+; CHECK-NEXT: [[TMP23:%.*]] = udiv <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP24:%.*]] = urem i64 [[TMP21]], [[MUL_2_I]]
; CHECK-NEXT: [[TMP25:%.*]] = udiv i64 [[TMP24]], [[MUL_1_I]]
; CHECK-NEXT: [[TMP26:%.*]] = urem i64 [[TMP24]], [[MUL_1_I]]
@@ -283,7 +283,7 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]]
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP14]])
; CHECK-NEXT: [[TMP47:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT4]]
; CHECK-NEXT: [[TMP48:%.*]] = extractelement <vscale x 2 x i1> [[TMP47]], i32 0
; CHECK-NEXT: br i1 [[TMP48]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll
index bf956227334616..350ad5b3e54330 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll
@@ -106,15 +106,15 @@ define void @test_array_load2_i16_store2(i32 %C, i32 %D) #1 {
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[C:%.*]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[D:%.*]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[TMP3:%.*]] = shl <vscale x 4 x i64> [[TMP2]], splat (i64 1)
; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP0]], 3
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP5]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[C:%.*]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[D:%.*]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT2]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -195,15 +195,15 @@ define void @test_array_load2_store2_i16(i32 noundef %C, i32 noundef %D) #1 {
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[C:%.*]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[D:%.*]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[TMP3:%.*]] = shl <vscale x 4 x i64> [[TMP2]], splat (i64 1)
; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP0]], 3
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP5]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[C:%.*]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[D:%.*]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -635,18 +635,18 @@ define void @load_gap_reverse(ptr noalias nocapture readonly %P1, ptr noalias no
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[X:%.*]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT1:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[INDUCTION:%.*]] = sub <vscale x 4 x i64> splat (i64 1023), [[TMP2]]
; CHECK-NEXT: [[DOTNEG:%.*]] = sub nsw i64 0, [[TMP1]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[DOTNEG]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[X:%.*]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[DOTNEG]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = add nsw <vscale x 4 x i64> [[BROADCAST_SPLAT]], [[VEC_IND]]
+; CHECK-NEXT: [[TMP4:%.*]] = add nsw <vscale x 4 x i64> [[BROADCAST_SPLAT1]], [[VEC_IND]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P1:%.*]], <vscale x 4 x i64> [[VEC_IND]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P2:%.*]], <vscale x 4 x i64> [[VEC_IND]], i32 1
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> [[TMP6]], i32 8, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i64> poison)
@@ -654,7 +654,7 @@ define void @load_gap_reverse(ptr noalias nocapture readonly %P1, ptr noalias no
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> [[TMP4]], <vscale x 4 x ptr> [[TMP5]], i32 8, <vscale x 4 x i1> splat (i1 true))
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> [[TMP7]], <vscale x 4 x ptr> [[TMP6]], i32 8, <vscale x 4 x i1> splat (i1 true))
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
; CHECK: middle.block:
@@ -884,11 +884,11 @@ define void @PR27626_0(ptr %p, i32 %z, i64 %n) #1 {
; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[SMAX]], [[TMP6]]
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Z:%.*]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP8]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Z:%.*]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -1062,11 +1062,11 @@ define void @PR27626_2(ptr %p, i64 %n, i32 %z) #1 {
; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[SMAX]], [[TMP6]]
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Z:%.*]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP8]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Z:%.*]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -1251,17 +1251,17 @@ define void @PR27626_4(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 {
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 2
; CHECK-NEXT: [[IND_END:%.*]] = shl nuw i64 [[N_VEC]], 1
-; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
-; CHECK-NEXT: [[TMP9:%.*]] = shl <vscale x 4 x i64> [[TMP8]], splat (i64 1)
-; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i64 [[TMP6]], 3
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP11]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Y:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Z:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT3]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP10:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
+; CHECK-NEXT: [[TMP9:%.*]] = shl <vscale x 4 x i64> [[TMP10]], splat (i64 1)
+; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i64 [[TMP6]], 3
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP11]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -1343,18 +1343,18 @@ define void @PR27626_5(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 {
; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2
; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i64 [[N_VEC]], 1
; CHECK-NEXT: [[IND_END:%.*]] = or disjoint i64 [[TMP11]], 3
-; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
-; CHECK-NEXT: [[TMP10:%.*]] = shl <vscale x 4 x i64> [[TMP9]], splat (i64 1)
-; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> [[TMP10]], splat (i64 3)
-; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i64 [[TMP7]], 3
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP12]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Y:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Z:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT3]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP10:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
+; CHECK-NEXT: [[TMP21:%.*]] = shl <vscale x 4 x i64> [[TMP10]], splat (i64 1)
+; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> [[TMP21]], splat (i64 3)
+; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i64 [[TMP7]], 3
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP12]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll
index 1a281fe7c6f7fa..45dee0705ff435 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll
@@ -37,11 +37,11 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no
; SCALAR_TAIL_FOLDING-NEXT: [[N_VEC:%.*]] = sub nuw nsw i32 1024, [[N_MOD_VF]]
; SCALAR_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32()
; SCALAR_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = shl i32 [[TMP4]], 4
+; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV]], i64 0
+; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32()
; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP5]], i64 0
; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[DOTSPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
-; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV]], i64 0
-; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; SCALAR_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]]
; SCALAR_TAIL_FOLDING: vector.body:
; SCALAR_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -113,11 +113,11 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no
; PREDICATED_TAIL_FOLDING-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 4
; PREDICATED_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.usub.sat.i32(i32 1024, i32 [[TMP3]])
; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 0, i32 1024)
+; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV]], i64 0
+; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32()
; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP1]], i64 0
; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[DOTSPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
-; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV]], i64 0
-; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; PREDICATED_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]]
; PREDICATED_TAIL_FOLDING: vector.body:
; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -221,11 +221,11 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no
; SCALAR_TAIL_FOLDING-NEXT: [[N_VEC:%.*]] = sub nuw nsw i32 1024, [[N_MOD_VF]]
; SCALAR_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32()
; SCALAR_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = shl i32 [[TMP4]], 4
+; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV]], i64 0
+; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32()
; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP5]], i64 0
; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[DOTSPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
-; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV]], i64 0
-; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; SCALAR_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]]
; SCALAR_TAIL_FOLDING: vector.body:
; SCALAR_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -282,11 +282,11 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no
; PREDICATED_TAIL_FOLDING-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 4
; PREDICATED_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.usub.sat.i32(i32 1024, i32 [[TMP3]])
; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 0, i32 1024)
+; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV]], i64 0
+; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32()
; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP1]], i64 0
; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[DOTSPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
-; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV]], i64 0
-; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; PREDICATED_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]]
; PREDICATED_TAIL_FOLDING: vector.body:
; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -379,13 +379,13 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no
; SCALAR_TAIL_FOLDING-NEXT: [[N_VEC:%.*]] = sub nuw nsw i32 1024, [[N_MOD_VF]]
; SCALAR_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32()
; SCALAR_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = shl i32 [[TMP4]], 4
-; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32()
-; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP5]], i64 0
-; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[DOTSPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV]], i64 0
; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV3]], i64 0
; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
+; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32()
+; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP5]], i64 0
+; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[DOTSPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; SCALAR_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]]
; SCALAR_TAIL_FOLDING: vector.body:
; SCALAR_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -449,13 +449,13 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no
; PREDICATED_TAIL_FOLDING-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 4
; PREDICATED_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.usub.sat.i32(i32 1024, i32 [[TMP3]])
; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 0, i32 1024)
-; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32()
-; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP1]], i64 0
-; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[DOTSPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV]], i64 0
; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV3]], i64 0
; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
+; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32()
+; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP1]], i64 0
+; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[DOTSPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; PREDICATED_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]]
; PREDICATED_TAIL_FOLDING: vector.body:
; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll b/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll
index 0cfb2d5edcc632..7325230b84e813 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll
@@ -115,10 +115,10 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features
; DATA_NO_LANEMASK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[UMAX]], 1
; DATA_NO_LANEMASK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
; DATA_NO_LANEMASK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4
-; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
-; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL:%.*]], i64 0
; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT4]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
+; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT5]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; DATA_NO_LANEMASK-NEXT: br label [[VECTOR_BODY:%.*]]
; DATA_NO_LANEMASK: vector.body:
; DATA_NO_LANEMASK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VECTOR_BODY]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll b/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll
index c00868c3e0be23..fbff00ea5a2965 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll
@@ -22,14 +22,14 @@ define void @test(ptr noalias nocapture %a, ptr noalias nocapture %b, i32 %v) {
; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; VLENUNK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; VLENUNK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
+; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[V:%.*]], i64 0
+; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; VLENUNK-NEXT: [[TMP6:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; VLENUNK-NEXT: [[TMP8:%.*]] = mul <vscale x 4 x i64> [[TMP6]], splat (i64 1)
; VLENUNK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP8]]
; VLENUNK-NEXT: [[TMP11:%.*]] = mul i64 1, [[TMP5]]
; VLENUNK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP11]], i64 0
; VLENUNK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
-; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[V:%.*]], i64 0
-; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]]
; VLENUNK: vector.body:
; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll
index 09e8a760a355c0..ad6a9a858a1cd6 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll
@@ -20,18 +20,18 @@ define void @pr87378_vpinstruction_or_drop_poison_generating_flags(ptr %arg, i64
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1001, [[N_MOD_VF]]
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8
-; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
-; CHECK-NEXT: [[TMP8:%.*]] = mul <vscale x 8 x i64> [[TMP6]], splat (i64 1)
-; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP8]]
-; CHECK-NEXT: [[TMP11:%.*]] = mul i64 1, [[TMP5]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP11]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[A]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[B]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[C]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT3]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
+; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 8 x i64> [[TMP6]], splat (i64 1)
+; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP7]]
+; CHECK-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP5]]
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP8]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
index 30cb33e64eccf6..8e106d857825e2 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
@@ -528,26 +528,26 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) {
; STRIDED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; STRIDED-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
; STRIDED-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4
+; STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[STRIDE]], i64 0
+; STRIDED-NEXT: [[BROADCAST_SPLAT1:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; STRIDED-NEXT: [[TMP12:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; STRIDED-NEXT: [[TMP14:%.*]] = mul <vscale x 4 x i64> [[TMP12]], splat (i64 1)
; STRIDED-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP14]]
; STRIDED-NEXT: [[TMP17:%.*]] = mul i64 1, [[TMP11]]
-; STRIDED-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP17]], i64 0
-; STRIDED-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
-; STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[STRIDE]], i64 0
+; STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP17]], i64 0
; STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; STRIDED-NEXT: br label [[VECTOR_BODY:%.*]]
; STRIDED: vector.body:
; STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; STRIDED-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; STRIDED-NEXT: [[TMP18:%.*]] = mul nuw nsw <vscale x 4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
+; STRIDED-NEXT: [[TMP18:%.*]] = mul nuw nsw <vscale x 4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT1]]
; STRIDED-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[P]], <vscale x 4 x i64> [[TMP18]]
; STRIDED-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP19]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> poison), !alias.scope [[META8:![0-9]+]]
; STRIDED-NEXT: [[TMP20:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], splat (i32 1)
; STRIDED-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[P2]], <vscale x 4 x i64> [[TMP18]]
; STRIDED-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP20]], <vscale x 4 x ptr> [[TMP21]], i32 4, <vscale x 4 x i1> splat (i1 true)), !alias.scope [[META11:![0-9]+]], !noalias [[META8]]
; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]]
-; STRIDED-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; STRIDED-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
; STRIDED-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; STRIDED-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
; STRIDED: middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll
index f38aa11b5af873..000c1570ef92dc 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll
@@ -163,12 +163,12 @@ define void @truncate_to_i1_used_by_branch(i8 %x, ptr %dst) #0 {
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i8> [[BROADCAST_SPLATINSERT]], <2 x i8> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP0:%.*]] = trunc <2 x i8> [[BROADCAST_SPLAT]] to <2 x i1>
; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i1> splat (i1 true), [[TMP0]]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x ptr> poison, ptr [[DST]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT1]], <2 x ptr> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 0, i32 2)
; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[ACTIVE_LANE_MASK]], <2 x i1> [[TMP2]], <2 x i1> zeroinitializer
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x ptr> poison, ptr [[DST]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT1]], <2 x ptr> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: call void @llvm.masked.scatter.v2i8.v2p0(<2 x i8> zeroinitializer, <2 x ptr> [[BROADCAST_SPLAT4]], i32 1, <2 x i1> [[TMP3]])
; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; CHECK: [[MIDDLE_BLOCK]]:
@@ -276,10 +276,10 @@ define void @icmp_only_first_op_truncated(ptr noalias %dst, i32 %x, i64 %N, i64
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <vscale x 2 x i32> [[TMP7]], [[BROADCAST_SPLAT2]]
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[X]] to i64
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr double, ptr [[SRC]], i64 [[TMP9]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[TMP10]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT3]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[DST]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT5]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[TMP10]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT6]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll
index 95496bb51a1bc9..8cba459d1e6726 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll
@@ -322,14 +322,14 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca
; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]]
; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; SCALABLE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2
+; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[B:%.*]], i64 0
+; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
; SCALABLE-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; SCALABLE-NEXT: [[TMP8:%.*]] = mul <vscale x 2 x i64> [[TMP6]], splat (i64 1)
; SCALABLE-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP8]]
; SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP5]]
; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP9]], i64 0
; SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[B:%.*]], i64 0
-; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
; SCALABLE: vector.body:
; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -429,14 +429,14 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca
; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
; TF-SCALABLE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2
+; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[B:%.*]], i64 0
+; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul <vscale x 2 x i64> [[TMP5]], splat (i64 1)
; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP7]]
; TF-SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP4]]
; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP8]], i64 0
; TF-SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[B:%.*]], i64 0
-; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
; TF-SCALABLE: vector.body:
; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -992,15 +992,15 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias
; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
; TF-SCALABLE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2
+; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[B:%.*]], i64 0
+; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
+; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0
+; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT2]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul <vscale x 2 x i64> [[TMP5]], splat (i64 1)
; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP7]]
; TF-SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP4]]
-; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP8]], i64 0
-; TF-SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[B:%.*]], i64 0
-; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
-; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0
+; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP8]], i64 0
; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
; TF-SCALABLE: vector.body:
@@ -1011,9 +1011,9 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias
; TF-SCALABLE-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> [[VEC_IND]], <vscale x 2 x ptr> [[BROADCAST_SPLAT]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
; TF-SCALABLE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP9]]
; TF-SCALABLE-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 0
-; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP11]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
+; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[BROADCAST_SPLAT3]], ptr [[TMP11]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
-; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT2]]
; TF-SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; TF-SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; TF-SCALABLE: middle.block:
@@ -1122,28 +1122,28 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc
; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]]
; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; SCALABLE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2
+; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0
+; SCALABLE-NEXT: [[BROADCAST_SPLAT1:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT2]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[B:%.*]], i64 0
+; SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
; SCALABLE-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; SCALABLE-NEXT: [[TMP8:%.*]] = mul <vscale x 2 x i64> [[TMP6]], splat (i64 1)
; SCALABLE-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP8]]
; SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP5]]
-; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP9]], i64 0
-; SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0
+; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP9]], i64 0
; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[B:%.*]], i64 0
-; SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
; SCALABLE: vector.body:
; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; SCALABLE-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; SCALABLE-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0
; SCALABLE-NEXT: [[TMP11:%.*]] = icmp ugt <vscale x 2 x i64> [[VEC_IND]], splat (i64 10)
-; SCALABLE-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], <vscale x 2 x ptr> [[BROADCAST_SPLAT2]], i32 8, <vscale x 2 x i1> [[TMP11]])
+; SCALABLE-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> [[BROADCAST_SPLAT1]], <vscale x 2 x ptr> [[BROADCAST_SPLAT2]], i32 8, <vscale x 2 x i1> [[TMP11]])
; SCALABLE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP10]]
; SCALABLE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 0
-; SCALABLE-NEXT: store <vscale x 2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP13]], align 8
+; SCALABLE-NEXT: store <vscale x 2 x i64> [[BROADCAST_SPLAT1]], ptr [[TMP13]], align 8
; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
-; SCALABLE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; SCALABLE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
; SCALABLE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; SCALABLE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
; SCALABLE: middle.block:
@@ -1228,16 +1228,16 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc
; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
; TF-SCALABLE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2
+; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0
+; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT1:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT2]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[B:%.*]], i64 0
+; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul <vscale x 2 x i64> [[TMP5]], splat (i64 1)
; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP7]]
; TF-SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP4]]
-; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP8]], i64 0
-; TF-SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0
+; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP8]], i64 0
; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[B:%.*]], i64 0
-; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
; TF-SCALABLE: vector.body:
; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -1246,12 +1246,12 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc
; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP9]], i64 1025)
; TF-SCALABLE-NEXT: [[TMP10:%.*]] = icmp ugt <vscale x 2 x i64> [[VEC_IND]], splat (i64 10)
; TF-SCALABLE-NEXT: [[TMP11:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> [[TMP10]], <vscale x 2 x i1> zeroinitializer
-; TF-SCALABLE-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], <vscale x 2 x ptr> [[BROADCAST_SPLAT2]], i32 8, <vscale x 2 x i1> [[TMP11]])
+; TF-SCALABLE-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> [[BROADCAST_SPLAT1]], <vscale x 2 x ptr> [[BROADCAST_SPLAT2]], i32 8, <vscale x 2 x i1> [[TMP11]])
; TF-SCALABLE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP9]]
; TF-SCALABLE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 0
-; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP13]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
+; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[BROADCAST_SPLAT1]], ptr [[TMP13]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
-; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
; TF-SCALABLE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; TF-SCALABLE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; TF-SCALABLE: middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll
index f14422e0a60690..0b1f52d2e277c9 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll
@@ -108,10 +108,10 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[N]], 1
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[M]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT2]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_SDIV_CONTINUE8:.*]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll
index c14ddca6c913d8..dcaf9f08a6a902 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll
@@ -147,15 +147,15 @@ define void @test_induction_step_needs_expansion(ptr noalias %j, ptr %k, i64 %l,
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[L]], [[N_MOD_VF]]
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i16
; CHECK-NEXT: [[IND_END:%.*]] = mul i16 [[DOTCAST]], [[TMP0]]
-; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <16 x i16> poison, i16 [[TMP0]], i64 0
+; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <16 x i16> poison, i16 [[OFF]], i64 0
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[DOTSPLATINSERT2]], <16 x i16> poison, <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP1:%.*]] = mul <16 x i16> splat (i16 16), [[TMP2]]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <16 x i16> poison, i16 [[TMP0]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <16 x i16> [[BROADCAST_SPLATINSERT2]], <16 x i16> poison, <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP1:%.*]] = mul <16 x i16> splat (i16 16), [[BROADCAST_SPLAT3]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[TMP0]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i16> [[DOTSPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: [[TMP11:%.*]] = mul <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, [[DOTSPLAT]]
; CHECK-NEXT: [[INDUCTION:%.*]] = add <16 x i16> zeroinitializer, [[TMP11]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[OFF]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <16 x i16> [[BROADCAST_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -164,10 +164,10 @@ define void @test_induction_step_needs_expansion(ptr noalias %j, ptr %k, i64 %l,
; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add <16 x i16> [[STEP_ADD]], [[TMP1]]
; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add <16 x i16> [[STEP_ADD_2]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = sub <16 x i16> [[VEC_IND]], [[BROADCAST_SPLAT3]]
-; CHECK-NEXT: [[TMP5:%.*]] = sub <16 x i16> [[STEP_ADD]], [[BROADCAST_SPLAT3]]
-; CHECK-NEXT: [[TMP6:%.*]] = sub <16 x i16> [[STEP_ADD_2]], [[BROADCAST_SPLAT3]]
-; CHECK-NEXT: [[TMP7:%.*]] = sub <16 x i16> [[STEP_ADD_3]], [[BROADCAST_SPLAT3]]
+; CHECK-NEXT: [[TMP4:%.*]] = sub <16 x i16> [[VEC_IND]], [[TMP2]]
+; CHECK-NEXT: [[TMP5:%.*]] = sub <16 x i16> [[STEP_ADD]], [[TMP2]]
+; CHECK-NEXT: [[TMP6:%.*]] = sub <16 x i16> [[STEP_ADD_2]], [[TMP2]]
+; CHECK-NEXT: [[TMP7:%.*]] = sub <16 x i16> [[STEP_ADD_3]], [[TMP2]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[K:%.*]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i32 0
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i32 16
@@ -197,28 +197,28 @@ define void @test_induction_step_needs_expansion(ptr noalias %j, ptr %k, i64 %l,
; CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[L]], [[N_MOD_VF4]]
; CHECK-NEXT: [[DOTCAST7:%.*]] = trunc i64 [[N_VEC5]] to i16
; CHECK-NEXT: [[IND_END8:%.*]] = mul i16 [[DOTCAST7]], [[TMP0]]
-; CHECK-NEXT: [[DOTSPLATINSERT13:%.*]] = insertelement <8 x i16> poison, i16 [[BC_RESUME_VAL]], i64 0
+; CHECK-NEXT: [[DOTSPLATINSERT13:%.*]] = insertelement <8 x i16> poison, i16 [[OFF]], i64 0
; CHECK-NEXT: [[DOTSPLAT14:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT13]], <8 x i16> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT: [[DOTSPLATINSERT15:%.*]] = insertelement <8 x i16> poison, i16 [[TMP0]], i64 0
+; CHECK-NEXT: [[DOTSPLATINSERT15:%.*]] = insertelement <8 x i16> poison, i16 [[BC_RESUME_VAL]], i64 0
; CHECK-NEXT: [[DOTSPLAT16:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT15]], <8 x i16> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP14:%.*]] = mul <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, [[DOTSPLAT16]]
-; CHECK-NEXT: [[INDUCTION17:%.*]] = add <8 x i16> [[DOTSPLAT14]], [[TMP14]]
+; CHECK-NEXT: [[DOTSPLATINSERT14:%.*]] = insertelement <8 x i16> poison, i16 [[TMP0]], i64 0
+; CHECK-NEXT: [[DOTSPLAT15:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT14]], <8 x i16> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP22:%.*]] = mul <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, [[DOTSPLAT15]]
+; CHECK-NEXT: [[INDUCTION17:%.*]] = add <8 x i16> [[DOTSPLAT16]], [[TMP22]]
; CHECK-NEXT: [[TMP15:%.*]] = mul i16 [[TMP0]], 8
-; CHECK-NEXT: [[DOTSPLATINSERT18:%.*]] = insertelement <8 x i16> poison, i16 [[TMP15]], i64 0
-; CHECK-NEXT: [[DOTSPLAT19:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT18]], <8 x i16> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT22:%.*]] = insertelement <8 x i16> poison, i16 [[OFF]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT22:%.*]] = insertelement <8 x i16> poison, i16 [[TMP15]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT23:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT22]], <8 x i16> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; CHECK: vec.epilog.vector.body:
; CHECK-NEXT: [[INDEX12:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT24:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND20:%.*]] = phi <8 x i16> [ [[INDUCTION17]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT21:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX12]], 0
-; CHECK-NEXT: [[TMP17:%.*]] = sub <8 x i16> [[VEC_IND20]], [[BROADCAST_SPLAT23]]
+; CHECK-NEXT: [[TMP17:%.*]] = sub <8 x i16> [[VEC_IND20]], [[DOTSPLAT14]]
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i16, ptr [[K]], i64 [[TMP16]]
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i16, ptr [[TMP18]], i32 0
; CHECK-NEXT: store <8 x i16> [[TMP17]], ptr [[TMP19]], align 2
; CHECK-NEXT: [[INDEX_NEXT24]] = add nuw i64 [[INDEX12]], 8
-; CHECK-NEXT: [[VEC_IND_NEXT21]] = add <8 x i16> [[VEC_IND20]], [[DOTSPLAT19]]
+; CHECK-NEXT: [[VEC_IND_NEXT21]] = add <8 x i16> [[VEC_IND20]], [[BROADCAST_SPLAT23]]
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT24]], [[N_VEC5]]
; CHECK-NEXT: br i1 [[TMP20]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: vec.epilog.middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll
index 68cbfad91c541b..578a7f367c9071 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll
@@ -515,13 +515,13 @@ define i32 @test_scalar_predicated_cost(i64 %x, i64 %y, ptr %A) #0 {
; CHECK-NEXT: br i1 false, label [[SCALAR_PH]], label [[VEC_EPILOG_PH]]
; CHECK: vec.epilog.ph:
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 96, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_PH]] ]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[VEC_EPILOG_RESUME_VAL]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[DOTSPLAT]], <i64 0, i64 1, i64 2, i64 3>
; CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <4 x i64> poison, i64 [[Y]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT7]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <4 x i64> poison, i64 [[X]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT10:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT9]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[VEC_EPILOG_RESUME_VAL]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[DOTSPLAT]], <i64 0, i64 1, i64 2, i64 3>
; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
; CHECK: vec.epilog.vector.body:
; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT11:%.*]], [[LOOP_HEADER]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-step.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-step.ll
index 1dd2692ba6822d..4512fb14b54312 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/induction-step.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/induction-step.ll
@@ -88,23 +88,23 @@ define i16 @wide_sub_induction_step_live_in(ptr %dst, i64 %N, i16 %off) {
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i16
; CHECK-NEXT: [[TMP1:%.*]] = mul i16 [[DOTCAST]], [[TMP0]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[O_1]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i16> splat (i16 4), [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT1]], <4 x i16> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i16> splat (i16 4), [[BROADCAST_SPLAT2]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i16> [[DOTSPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i16> <i16 0, i16 1, i16 2, i16 3>, [[DOTSPLAT]]
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i16> zeroinitializer, [[TMP3]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i16> poison, i16 [[O_1]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT1]], <4 x i16> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = sub <4 x i16> [[VEC_IND]], [[BROADCAST_SPLAT2]]
-; CHECK-NEXT: [[TMP10:%.*]] = sub <4 x i16> [[STEP_ADD]], [[BROADCAST_SPLAT2]]
+; CHECK-NEXT: [[TMP5:%.*]] = sub <4 x i16> [[VEC_IND]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[TMP10:%.*]] = sub <4 x i16> [[STEP_ADD]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[DST:%.*]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i32 0
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i32 4
diff --git a/llvm/test/Transforms/LoopVectorize/X86/masked-store-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/masked-store-cost.ll
index f0a1e5c47d06a0..1638ec2f5dc85f 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/masked-store-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/masked-store-cost.ll
@@ -58,13 +58,13 @@ define i32 @test_scalar_predicated_cost(i64 %x, i64 %y, ptr %A) #0 {
; CHECK-NEXT: br i1 false, label [[SCALAR_PH]], label [[VEC_EPILOG_PH]]
; CHECK: vec.epilog.ph:
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 96, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_PH]] ]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[VEC_EPILOG_RESUME_VAL]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[DOTSPLAT]], <i64 0, i64 1, i64 2, i64 3>
; CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <4 x i64> poison, i64 [[Y]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT7]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <4 x i64> poison, i64 [[X]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT10:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT9]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[VEC_EPILOG_RESUME_VAL]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[DOTSPLAT]], <i64 0, i64 1, i64 2, i64 3>
; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
; CHECK: vec.epilog.vector.body:
; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT11:%.*]], [[LOOP_HEADER]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll b/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll
index 1cee80f88ec62f..07e887d373086c 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll
@@ -69,13 +69,13 @@ define ptr addrspace(10) @japi1_vect_42283(ptr nocapture readonly %0, i32 %1) lo
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[TOP]] ]
; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[TMP8]], 4
; CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[TMP8]], [[N_MOD_VF4]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[VEC_EPILOG_RESUME_VAL]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[DOTSPLAT]], <i64 0, i64 1, i64 2, i64 3>
; CHECK-NEXT: [[BROADCAST_SPLATINSERT10:%.*]] = insertelement <4 x ptr addrspace(10)> poison, ptr addrspace(10) [[DOTUNPACK]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT11:%.*]] = shufflevector <4 x ptr addrspace(10)> [[BROADCAST_SPLATINSERT10]], <4 x ptr addrspace(10)> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT12:%.*]] = insertelement <4 x i64> poison, i64 [[DOTUNPACK2]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT13:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT12]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[VEC_EPILOG_RESUME_VAL]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[DOTSPLAT]], <i64 0, i64 1, i64 2, i64 3>
; CHECK-NEXT: br label [[L26:%.*]]
; CHECK: vec.epilog.vector.body:
; CHECK-NEXT: [[INDEX7:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDEX_NEXT14:%.*]], [[L26]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll
index f29428c51c636b..08ba799b6afea5 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll
@@ -391,16 +391,16 @@ define dso_local void @masked_strided1_optsize_unknown_tc(ptr noalias nocapture
; DISABLED_MASKED_STRIDED-NEXT: [[N_RND_UP:%.*]] = add nuw i32 [[N]], 7
; DISABLED_MASKED_STRIDED-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8
; DISABLED_MASKED_STRIDED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add nsw i32 [[N]], -1
-; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0
+; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[CONV]], i64 0
; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
-; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[CONV]], i64 0
+; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0
; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer
; DISABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]]
; DISABLED_MASKED_STRIDED: vector.body:
; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE16:%.*]] ]
; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE16]] ]
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]]
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]]
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP0]], <8 x i1> [[TMP1]], <8 x i1> zeroinitializer
; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], splat (i32 1)
; DISABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP3]], i64 0
@@ -501,16 +501,16 @@ define dso_local void @masked_strided1_optsize_unknown_tc(ptr noalias nocapture
; ENABLED_MASKED_STRIDED-NEXT: [[N_RND_UP:%.*]] = add nuw i32 [[N]], 7
; ENABLED_MASKED_STRIDED-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8
; ENABLED_MASKED_STRIDED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add nsw i32 [[N]], -1
-; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0
+; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[CONV]], i64 0
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
-; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[CONV]], i64 0
+; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer
; ENABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]]
; ENABLED_MASKED_STRIDED: vector.body:
; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
-; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]]
+; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]]
+; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = select <8 x i1> [[TMP0]], <8 x i1> [[TMP1]], <8 x i1> zeroinitializer
; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = shl i32 [[INDEX]], 1
; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 [[TMP2]]
@@ -595,16 +595,16 @@ define dso_local void @masked_strided3_optsize_unknown_tc(ptr noalias nocapture
; DISABLED_MASKED_STRIDED-NEXT: [[N_RND_UP:%.*]] = add nuw i32 [[N]], 7
; DISABLED_MASKED_STRIDED-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8
; DISABLED_MASKED_STRIDED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add nsw i32 [[N]], -1
-; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0
+; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[CONV]], i64 0
; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
-; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[CONV]], i64 0
+; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0
; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer
; DISABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]]
; DISABLED_MASKED_STRIDED: vector.body:
; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE16:%.*]] ]
; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE16]] ]
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]]
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]]
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP0]], <8 x i1> [[TMP1]], <8 x i1> zeroinitializer
; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = mul nsw <8 x i32> [[VEC_IND]], splat (i32 3)
; DISABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP3]], i64 0
@@ -705,16 +705,16 @@ define dso_local void @masked_strided3_optsize_unknown_tc(ptr noalias nocapture
; ENABLED_MASKED_STRIDED-NEXT: [[N_RND_UP:%.*]] = add nuw i32 [[N]], 7
; ENABLED_MASKED_STRIDED-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8
; ENABLED_MASKED_STRIDED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add nsw i32 [[N]], -1
-; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0
+; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[CONV]], i64 0
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
-; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[CONV]], i64 0
+; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer
; ENABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]]
; ENABLED_MASKED_STRIDED: vector.body:
; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
-; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]]
+; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]]
+; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = select <8 x i1> [[TMP0]], <8 x i1> [[TMP1]], <8 x i1> zeroinitializer
; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = mul i32 [[INDEX]], 3
; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 [[TMP2]]
@@ -2202,16 +2202,16 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly
; DISABLED_MASKED_STRIDED-NEXT: [[N_RND_UP:%.*]] = add nuw i32 [[N]], 7
; DISABLED_MASKED_STRIDED-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8
; DISABLED_MASKED_STRIDED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add nsw i32 [[N]], -1
-; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0
+; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[GUARD:%.*]], i64 0
; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
-; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[GUARD:%.*]], i64 0
+; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0
; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer
; DISABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]]
; DISABLED_MASKED_STRIDED: vector.body:
; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE62:%.*]] ]
; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE62]] ]
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
-; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp sgt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]]
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]]
+; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp sgt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP0]], <8 x i1> [[TMP1]], <8 x i1> zeroinitializer
; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], splat (i32 1)
; DISABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP3]], i64 0
@@ -2536,16 +2536,16 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly
; ENABLED_MASKED_STRIDED-NEXT: [[N_RND_UP:%.*]] = add nuw i32 [[N]], 7
; ENABLED_MASKED_STRIDED-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8
; ENABLED_MASKED_STRIDED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add nsw i32 [[N]], -1
-; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0
+; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[GUARD:%.*]], i64 0
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
-; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[GUARD:%.*]], i64 0
+; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0
; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer
; ENABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]]
; ENABLED_MASKED_STRIDED: vector.body:
; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
-; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp sgt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]]
+; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]]
+; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp sgt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = select <8 x i1> [[TMP0]], <8 x i1> [[TMP1]], <8 x i1> zeroinitializer
; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = shl i32 [[INDEX]], 1
; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 [[TMP2]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll
index 2ac30337a4fb9c..142bd3e17fd215 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll
@@ -208,15 +208,15 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) {
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[SMAX]], 2
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[SMAX]], [[N_MOD_VF]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x i32> poison, i32 [[X:%.*]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT3]], <2 x i32> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <2 x i1> poison, i1 [[TMP1:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT4]], <2 x i1> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i32> poison, i32 [[X:%.*]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT1]], <2 x i32> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE2:%.*]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_UDIV_CONTINUE2]] ]
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[PRED_UDIV_CONTINUE2]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE4:%.*]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_UDIV_CONTINUE4]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[PRED_UDIV_CONTINUE4]] ]
; CHECK-NEXT: [[TMP0:%.*]] = mul <2 x i64> [[VEC_IND]], splat (i64 777)
; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
; CHECK: pred.udiv.if:
@@ -228,16 +228,16 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) {
; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE]]
; CHECK: pred.udiv.continue:
; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_UDIV_IF]] ]
-; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_UDIV_IF1:%.*]], label [[PRED_UDIV_CONTINUE2]]
-; CHECK: pred.udiv.if1:
+; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_UDIV_IF3:%.*]], label [[PRED_UDIV_CONTINUE4]]
+; CHECK: pred.udiv.if3:
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
; CHECK-NEXT: [[TMP13:%.*]] = udiv i32 [[TMP12]], [[X]]
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP8]], i32 [[TMP13]], i32 1
-; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE2]]
-; CHECK: pred.udiv.continue2:
-; CHECK-NEXT: [[TMP16:%.*]] = phi <2 x i32> [ [[TMP8]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP14]], [[PRED_UDIV_IF1]] ]
+; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE4]]
+; CHECK: pred.udiv.continue4:
+; CHECK-NEXT: [[TMP16:%.*]] = phi <2 x i32> [ [[TMP8]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP14]], [[PRED_UDIV_IF3]] ]
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[BROADCAST_SPLAT]], <2 x i32> [[TMP16]], <2 x i32> [[BROADCAST_SPLAT4]]
; CHECK-NEXT: [[TMP18]] = add <2 x i32> [[VEC_PHI]], [[PREDPHI]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
@@ -280,15 +280,15 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) {
; SINK-GATHER: vector.ph:
; SINK-GATHER-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[SMAX]], 8
; SINK-GATHER-NEXT: [[N_VEC:%.*]] = sub i64 [[SMAX]], [[N_MOD_VF]]
-; SINK-GATHER-NEXT: [[BROADCAST_SPLATINSERT15:%.*]] = insertelement <8 x i32> poison, i32 [[X:%.*]], i64 0
-; SINK-GATHER-NEXT: [[BROADCAST_SPLAT16:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT15]], <8 x i32> poison, <8 x i32> zeroinitializer
; SINK-GATHER-NEXT: [[BROADCAST_SPLATINSERT16:%.*]] = insertelement <8 x i1> poison, i1 [[TMP1:%.*]], i64 0
; SINK-GATHER-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i1> [[BROADCAST_SPLATINSERT16]], <8 x i1> poison, <8 x i32> zeroinitializer
+; SINK-GATHER-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[X:%.*]], i64 0
+; SINK-GATHER-NEXT: [[BROADCAST_SPLAT16:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer
; SINK-GATHER-NEXT: br label [[VECTOR_BODY:%.*]]
; SINK-GATHER: vector.body:
-; SINK-GATHER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE14:%.*]] ]
-; SINK-GATHER-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_UDIV_CONTINUE14]] ]
-; SINK-GATHER-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP66:%.*]], [[PRED_UDIV_CONTINUE14]] ]
+; SINK-GATHER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE16:%.*]] ]
+; SINK-GATHER-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_UDIV_CONTINUE16]] ]
+; SINK-GATHER-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP66:%.*]], [[PRED_UDIV_CONTINUE16]] ]
; SINK-GATHER-NEXT: [[TMP0:%.*]] = mul <8 x i64> [[VEC_IND]], splat (i64 777)
; SINK-GATHER-NEXT: br i1 [[TMP1]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
; SINK-GATHER: pred.udiv.if:
@@ -300,76 +300,76 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) {
; SINK-GATHER-NEXT: br label [[PRED_UDIV_CONTINUE]]
; SINK-GATHER: pred.udiv.continue:
; SINK-GATHER-NEXT: [[TMP8:%.*]] = phi <8 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_UDIV_IF]] ]
-; SINK-GATHER-NEXT: br i1 [[TMP1]], label [[PRED_UDIV_IF1:%.*]], label [[PRED_UDIV_CONTINUE2:%.*]]
-; SINK-GATHER: pred.udiv.if1:
+; SINK-GATHER-NEXT: br i1 [[TMP1]], label [[PRED_UDIV_IF5:%.*]], label [[PRED_UDIV_CONTINUE4:%.*]]
+; SINK-GATHER: pred.udiv.if3:
; SINK-GATHER-NEXT: [[TMP10:%.*]] = extractelement <8 x i64> [[TMP0]], i32 1
; SINK-GATHER-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]]
; SINK-GATHER-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
; SINK-GATHER-NEXT: [[TMP13:%.*]] = udiv i32 [[TMP12]], [[X]]
; SINK-GATHER-NEXT: [[TMP14:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[TMP13]], i32 1
-; SINK-GATHER-NEXT: br label [[PRED_UDIV_CONTINUE2]]
-; SINK-GATHER: pred.udiv.continue2:
-; SINK-GATHER-NEXT: [[TMP16:%.*]] = phi <8 x i32> [ [[TMP8]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP14]], [[PRED_UDIV_IF1]] ]
-; SINK-GATHER-NEXT: br i1 [[TMP1]], label [[PRED_UDIV_IF3:%.*]], label [[PRED_UDIV_CONTINUE4:%.*]]
-; SINK-GATHER: pred.udiv.if3:
+; SINK-GATHER-NEXT: br label [[PRED_UDIV_CONTINUE4]]
+; SINK-GATHER: pred.udiv.continue4:
+; SINK-GATHER-NEXT: [[TMP16:%.*]] = phi <8 x i32> [ [[TMP8]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP14]], [[PRED_UDIV_IF5]] ]
+; SINK-GATHER-NEXT: br i1 [[TMP1]], label [[PRED_UDIV_IF6:%.*]], label [[PRED_UDIV_CONTINUE6:%.*]]
+; SINK-GATHER: pred.udiv.if5:
; SINK-GATHER-NEXT: [[TMP18:%.*]] = extractelement <8 x i64> [[TMP0]], i32 2
; SINK-GATHER-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP18]]
; SINK-GATHER-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
; SINK-GATHER-NEXT: [[TMP21:%.*]] = udiv i32 [[TMP20]], [[X]]
; SINK-GATHER-NEXT: [[TMP22:%.*]] = insertelement <8 x i32> [[TMP16]], i32 [[TMP21]], i32 2
-; SINK-GATHER-NEXT: br label [[PRED_UDIV_CONTINUE4]]
-; SINK-GATHER: pred.udiv.continue4:
-; SINK-GATHER-NEXT: [[TMP24:%.*]] = phi <8 x i32> [ [[TMP16]], [[PRED_UDIV_CONTINUE2]] ], [ [[TMP22]], [[PRED_UDIV_IF3]] ]
-; SINK-GATHER-NEXT: br i1 [[TMP1]], label [[PRED_UDIV_IF5:%.*]], label [[PRED_UDIV_CONTINUE6:%.*]]
-; SINK-GATHER: pred.udiv.if5:
+; SINK-GATHER-NEXT: br label [[PRED_UDIV_CONTINUE6]]
+; SINK-GATHER: pred.udiv.continue6:
+; SINK-GATHER-NEXT: [[TMP24:%.*]] = phi <8 x i32> [ [[TMP16]], [[PRED_UDIV_CONTINUE4]] ], [ [[TMP22]], [[PRED_UDIV_IF6]] ]
+; SINK-GATHER-NEXT: br i1 [[TMP1]], label [[PRED_UDIV_IF7:%.*]], label [[PRED_UDIV_CONTINUE8:%.*]]
+; SINK-GATHER: pred.udiv.if7:
; SINK-GATHER-NEXT: [[TMP26:%.*]] = extractelement <8 x i64> [[TMP0]], i32 3
; SINK-GATHER-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP26]]
; SINK-GATHER-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4
; SINK-GATHER-NEXT: [[TMP29:%.*]] = udiv i32 [[TMP28]], [[X]]
; SINK-GATHER-NEXT: [[TMP30:%.*]] = insertelement <8 x i32> [[TMP24]], i32 [[TMP29]], i32 3
-; SINK-GATHER-NEXT: br label [[PRED_UDIV_CONTINUE6]]
-; SINK-GATHER: pred.udiv.continue6:
-; SINK-GATHER-NEXT: [[TMP32:%.*]] = phi <8 x i32> [ [[TMP24]], [[PRED_UDIV_CONTINUE4]] ], [ [[TMP30]], [[PRED_UDIV_IF5]] ]
-; SINK-GATHER-NEXT: br i1 [[TMP1]], label [[PRED_UDIV_IF7:%.*]], label [[PRED_UDIV_CONTINUE8:%.*]]
-; SINK-GATHER: pred.udiv.if7:
+; SINK-GATHER-NEXT: br label [[PRED_UDIV_CONTINUE8]]
+; SINK-GATHER: pred.udiv.continue8:
+; SINK-GATHER-NEXT: [[TMP32:%.*]] = phi <8 x i32> [ [[TMP24]], [[PRED_UDIV_CONTINUE6]] ], [ [[TMP30]], [[PRED_UDIV_IF7]] ]
+; SINK-GATHER-NEXT: br i1 [[TMP1]], label [[PRED_UDIV_IF9:%.*]], label [[PRED_UDIV_CONTINUE10:%.*]]
+; SINK-GATHER: pred.udiv.if9:
; SINK-GATHER-NEXT: [[TMP34:%.*]] = extractelement <8 x i64> [[TMP0]], i32 4
; SINK-GATHER-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP34]]
; SINK-GATHER-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
; SINK-GATHER-NEXT: [[TMP37:%.*]] = udiv i32 [[TMP36]], [[X]]
; SINK-GATHER-NEXT: [[TMP38:%.*]] = insertelement <8 x i32> [[TMP32]], i32 [[TMP37]], i32 4
-; SINK-GATHER-NEXT: br label [[PRED_UDIV_CONTINUE8]]
-; SINK-GATHER: pred.udiv.continue8:
-; SINK-GATHER-NEXT: [[TMP40:%.*]] = phi <8 x i32> [ [[TMP32]], [[PRED_UDIV_CONTINUE6]] ], [ [[TMP38]], [[PRED_UDIV_IF7]] ]
-; SINK-GATHER-NEXT: br i1 [[TMP1]], label [[PRED_UDIV_IF9:%.*]], label [[PRED_UDIV_CONTINUE10:%.*]]
-; SINK-GATHER: pred.udiv.if9:
+; SINK-GATHER-NEXT: br label [[PRED_UDIV_CONTINUE10]]
+; SINK-GATHER: pred.udiv.continue10:
+; SINK-GATHER-NEXT: [[TMP40:%.*]] = phi <8 x i32> [ [[TMP32]], [[PRED_UDIV_CONTINUE8]] ], [ [[TMP38]], [[PRED_UDIV_IF9]] ]
+; SINK-GATHER-NEXT: br i1 [[TMP1]], label [[PRED_UDIV_IF11:%.*]], label [[PRED_UDIV_CONTINUE12:%.*]]
+; SINK-GATHER: pred.udiv.if11:
; SINK-GATHER-NEXT: [[TMP42:%.*]] = extractelement <8 x i64> [[TMP0]], i32 5
; SINK-GATHER-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP42]]
; SINK-GATHER-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4
; SINK-GATHER-NEXT: [[TMP45:%.*]] = udiv i32 [[TMP44]], [[X]]
; SINK-GATHER-NEXT: [[TMP46:%.*]] = insertelement <8 x i32> [[TMP40]], i32 [[TMP45]], i32 5
-; SINK-GATHER-NEXT: br label [[PRED_UDIV_CONTINUE10]]
-; SINK-GATHER: pred.udiv.continue10:
-; SINK-GATHER-NEXT: [[TMP48:%.*]] = phi <8 x i32> [ [[TMP40]], [[PRED_UDIV_CONTINUE8]] ], [ [[TMP46]], [[PRED_UDIV_IF9]] ]
-; SINK-GATHER-NEXT: br i1 [[TMP1]], label [[PRED_UDIV_IF11:%.*]], label [[PRED_UDIV_CONTINUE12:%.*]]
-; SINK-GATHER: pred.udiv.if11:
+; SINK-GATHER-NEXT: br label [[PRED_UDIV_CONTINUE12]]
+; SINK-GATHER: pred.udiv.continue12:
+; SINK-GATHER-NEXT: [[TMP48:%.*]] = phi <8 x i32> [ [[TMP40]], [[PRED_UDIV_CONTINUE10]] ], [ [[TMP46]], [[PRED_UDIV_IF11]] ]
+; SINK-GATHER-NEXT: br i1 [[TMP1]], label [[PRED_UDIV_IF13:%.*]], label [[PRED_UDIV_CONTINUE14:%.*]]
+; SINK-GATHER: pred.udiv.if13:
; SINK-GATHER-NEXT: [[TMP50:%.*]] = extractelement <8 x i64> [[TMP0]], i32 6
; SINK-GATHER-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP50]]
; SINK-GATHER-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP51]], align 4
; SINK-GATHER-NEXT: [[TMP53:%.*]] = udiv i32 [[TMP52]], [[X]]
; SINK-GATHER-NEXT: [[TMP54:%.*]] = insertelement <8 x i32> [[TMP48]], i32 [[TMP53]], i32 6
-; SINK-GATHER-NEXT: br label [[PRED_UDIV_CONTINUE12]]
-; SINK-GATHER: pred.udiv.continue12:
-; SINK-GATHER-NEXT: [[TMP56:%.*]] = phi <8 x i32> [ [[TMP48]], [[PRED_UDIV_CONTINUE10]] ], [ [[TMP54]], [[PRED_UDIV_IF11]] ]
-; SINK-GATHER-NEXT: br i1 [[TMP1]], label [[PRED_UDIV_IF13:%.*]], label [[PRED_UDIV_CONTINUE14]]
-; SINK-GATHER: pred.udiv.if13:
+; SINK-GATHER-NEXT: br label [[PRED_UDIV_CONTINUE14]]
+; SINK-GATHER: pred.udiv.continue14:
+; SINK-GATHER-NEXT: [[TMP56:%.*]] = phi <8 x i32> [ [[TMP48]], [[PRED_UDIV_CONTINUE12]] ], [ [[TMP54]], [[PRED_UDIV_IF13]] ]
+; SINK-GATHER-NEXT: br i1 [[TMP1]], label [[PRED_UDIV_IF15:%.*]], label [[PRED_UDIV_CONTINUE16]]
+; SINK-GATHER: pred.udiv.if15:
; SINK-GATHER-NEXT: [[TMP58:%.*]] = extractelement <8 x i64> [[TMP0]], i32 7
; SINK-GATHER-NEXT: [[TMP59:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP58]]
; SINK-GATHER-NEXT: [[TMP60:%.*]] = load i32, ptr [[TMP59]], align 4
; SINK-GATHER-NEXT: [[TMP61:%.*]] = udiv i32 [[TMP60]], [[X]]
; SINK-GATHER-NEXT: [[TMP62:%.*]] = insertelement <8 x i32> [[TMP56]], i32 [[TMP61]], i32 7
-; SINK-GATHER-NEXT: br label [[PRED_UDIV_CONTINUE14]]
-; SINK-GATHER: pred.udiv.continue14:
-; SINK-GATHER-NEXT: [[TMP64:%.*]] = phi <8 x i32> [ [[TMP56]], [[PRED_UDIV_CONTINUE12]] ], [ [[TMP62]], [[PRED_UDIV_IF13]] ]
+; SINK-GATHER-NEXT: br label [[PRED_UDIV_CONTINUE16]]
+; SINK-GATHER: pred.udiv.continue16:
+; SINK-GATHER-NEXT: [[TMP64:%.*]] = phi <8 x i32> [ [[TMP56]], [[PRED_UDIV_CONTINUE14]] ], [ [[TMP62]], [[PRED_UDIV_IF15]] ]
; SINK-GATHER-NEXT: [[PREDPHI:%.*]] = select <8 x i1> [[BROADCAST_SPLAT]], <8 x i32> [[TMP64]], <8 x i32> [[BROADCAST_SPLAT16]]
; SINK-GATHER-NEXT: [[TMP66]] = add <8 x i32> [[VEC_PHI]], [[PREDPHI]]
; SINK-GATHER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
diff --git a/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll b/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll
index c745b4f74786c9..9b47531f2bd017 100644
--- a/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll
+++ b/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll
@@ -46,14 +46,14 @@ define void @test(i32 %arg, i32 %L1.limit, i32 %L2.switch, i1 %c, ptr %dst) {
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP3:%.*]] = mul i32 12, [[INDUCTION_IV_LCSSA1]]
; CHECK-NEXT: [[IND_END:%.*]] = add i32 1, [[TMP3]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDUCTION_IV_LCSSA1]], i64 0
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[L1_EXIT_VAL]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[DOTSPLAT]]
+; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[INDUCTION_IV_LCSSA1]], i64 0
+; CHECK-NEXT: [[DOTSPLAT1:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT1]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[DOTSPLAT1]]
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> splat (i32 1), [[TMP4]]
; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[INDUCTION_IV_LCSSA1]], 4
-; CHECK-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i64 0
-; CHECK-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT3]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[L1_EXIT_VAL]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
@@ -61,13 +61,13 @@ define void @test(i32 %arg, i32 %L1.limit, i32 %L2.switch, i1 %c, ptr %dst) {
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = sub <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[TMP7:%.*]] = sub <4 x i32> [[VEC_IND]], [[DOTSPLAT]]
; CHECK-NEXT: [[TMP8:%.*]] = sext <4 x i32> [[TMP7]] to <4 x i64>
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[DST:%.*]], i64 [[TMP6]]
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP9]], i32 0
; CHECK-NEXT: store <4 x i64> [[TMP8]], ptr [[TMP10]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], [[DOTSPLAT4]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 12
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll
index 94593a7d9a81de..0e842c819fd8be 100644
--- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll
@@ -248,13 +248,13 @@ define i1 @any_of_reduction_i1_epilog(i64 %N, i32 %a) {
; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[TMP0]], 4
; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF2]]
; CHECK-NEXT: [[IND_END5:%.*]] = trunc i64 [[N_VEC3]] to i32
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT14:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT4]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP7]], i64 0
; CHECK-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x i1> [[MINMAX_IDENT_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[BC_RESUME_VAL]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[DOTSPLAT]], <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT13:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT14:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT13]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; CHECK: vec.epilog.vector.body:
; CHECK-NEXT: [[INDEX9:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT15:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
@@ -384,10 +384,10 @@ define i1 @any_of_reduction_i1_epilog2(ptr %start, ptr %end, i64 %x) {
; CHECK-NEXT: [[N_VEC8:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF7]]
; CHECK-NEXT: [[TMP25:%.*]] = mul i64 [[N_VEC8]], 16
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP25]]
-; CHECK-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP48]], i64 0
-; CHECK-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x i1> [[MINMAX_IDENT_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT18:%.*]] = insertelement <4 x i64> poison, i64 [[X]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT19:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT18]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT13:%.*]] = insertelement <4 x i1> poison, i1 [[TMP48]], i64 0
+; CHECK-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT13]], <4 x i1> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; CHECK: vec.epilog.vector.body:
; CHECK-NEXT: [[INDEX11:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT20:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll
index 253ecaca75be87..3feda0d6b3c53b 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll
@@ -16,9 +16,9 @@ define void @can_sink_after_store(i32 %x, ptr %ptr, i64 %tc) local_unnamed_addr
; CHECK-NEXT: [[DOTPRE:%.*]] = load i32, ptr [[IDX_PHI_TRANS]], align 4
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
-; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[DOTPRE]], i32 3
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[DOTPRE]], i32 3
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -96,9 +96,9 @@ define void @sink_sdiv(i32 %x, ptr %ptr, i64 %tc) local_unnamed_addr #0 {
; CHECK-NEXT: [[DOTPRE:%.*]] = load i32, ptr [[IDX_PHI_TRANS]], align 4
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
-; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[DOTPRE]], i32 3
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[DOTPRE]], i32 3
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -175,9 +175,9 @@ define void @can_sink_with_additional_user(i32 %x, ptr %ptr, i64 %tc) {
; CHECK-NEXT: [[DOTPRE:%.*]] = load i32, ptr [[IDX_PHI_TRANS]], align 4
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
-; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[DOTPRE]], i32 3
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[DOTPRE]], i32 3
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
index 509b8f99e4b61f..764e6fe1fc333e 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
@@ -488,9 +488,9 @@ define void @recurrence_3(ptr readonly noalias %a, ptr noalias %b, i32 %n, float
; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 8
; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = add i64 1, [[N_VEC]]
-; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i32 3
; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[CONV1]], i64 0
; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
+; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i32 3
; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-IC: vector.body:
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -641,9 +641,9 @@ define void @recurrence_3(ptr readonly noalias %a, ptr noalias %b, i32 %n, float
; SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
; SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; SINK-AFTER-NEXT: [[IND_END:%.*]] = add i64 1, [[N_VEC]]
-; SINK-AFTER-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i32 3
; SINK-AFTER-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[CONV1]], i64 0
; SINK-AFTER-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
+; SINK-AFTER-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i32 3
; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]]
; SINK-AFTER: vector.body:
; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/induction-step.ll b/llvm/test/Transforms/LoopVectorize/induction-step.ll
index f553864b5fc386..d72dd190ad4b49 100644
--- a/llvm/test/Transforms/LoopVectorize/induction-step.ll
+++ b/llvm/test/Transforms/LoopVectorize/induction-step.ll
@@ -418,26 +418,26 @@ define void @wide_sub_induction_step_live_in(ptr %dst, i64 %N, i16 %off) {
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i16
; CHECK-NEXT: [[TMP1:%.*]] = mul i16 [[DOTCAST]], [[TMP0]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[TMP0]], i64 0
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[O_1]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, [[DOTSPLAT]]
+; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <8 x i16> poison, i16 [[TMP0]], i64 0
+; CHECK-NEXT: [[DOTSPLAT1:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT1]], <8 x i16> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, [[DOTSPLAT1]]
; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i16> zeroinitializer, [[TMP2]]
; CHECK-NEXT: [[TMP3:%.*]] = mul i16 [[TMP0]], 8
-; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <8 x i16> poison, i16 [[TMP3]], i64 0
-; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT1]], <8 x i16> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[O_1]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[TMP3]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i16> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = sub <8 x i16> [[VEC_IND]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[TMP5:%.*]] = sub <8 x i16> [[VEC_IND]], [[DOTSPLAT]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i32 0
; CHECK-NEXT: store <8 x i16> [[TMP5]], ptr [[TMP7]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i16> [[VEC_IND]], [[DOTSPLAT2]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i16> [[VEC_IND]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll
index 4bbf1e8fcc76ae..b78c9c8206bc7f 100644
--- a/llvm/test/Transforms/LoopVectorize/induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/induction.ll
@@ -3128,9 +3128,9 @@ define i32 @testoverflowcheck() {
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP3]], [[N_MOD_VF]]
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i32 [[N_VEC]] to i8
; CHECK-NEXT: [[IND_END:%.*]] = add i8 [[DOTPR_I]], [[DOTCAST]]
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> splat (i32 -1), i32 [[C_PROMOTED_I]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> splat (i32 -1), i32 [[C_PROMOTED_I]], i32 0
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -3172,9 +3172,9 @@ define i32 @testoverflowcheck() {
; IND-NEXT: [[N_VEC:%.*]] = and i32 [[TMP3]], 510
; IND-NEXT: [[DOTCAST:%.*]] = trunc i32 [[N_VEC]] to i8
; IND-NEXT: [[IND_END:%.*]] = add i8 [[DOTPR_I]], [[DOTCAST]]
-; IND-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> <i32 poison, i32 -1>, i32 [[C_PROMOTED_I]], i64 0
; IND-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i64 0
; IND-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
+; IND-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> <i32 poison, i32 -1>, i32 [[C_PROMOTED_I]], i64 0
; IND-NEXT: br label [[VECTOR_BODY:%.*]]
; IND: vector.body:
; IND-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -3214,9 +3214,9 @@ define i32 @testoverflowcheck() {
; UNROLL-NEXT: [[N_VEC:%.*]] = and i32 [[TMP3]], 508
; UNROLL-NEXT: [[DOTCAST:%.*]] = trunc i32 [[N_VEC]] to i8
; UNROLL-NEXT: [[IND_END:%.*]] = add i8 [[DOTPR_I]], [[DOTCAST]]
-; UNROLL-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> <i32 poison, i32 -1>, i32 [[C_PROMOTED_I]], i64 0
; UNROLL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i64 0
; UNROLL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
+; UNROLL-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> <i32 poison, i32 -1>, i32 [[C_PROMOTED_I]], i64 0
; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL: vector.body:
; UNROLL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -3257,9 +3257,9 @@ define i32 @testoverflowcheck() {
; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP3]], [[N_MOD_VF]]
; UNROLL-NO-IC-NEXT: [[DOTCAST:%.*]] = trunc i32 [[N_VEC]] to i8
; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = add i8 [[DOTPR_I]], [[DOTCAST]]
-; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> splat (i32 -1), i32 [[C_PROMOTED_I]], i32 0
; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i64 0
; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
+; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> splat (i32 -1), i32 [[C_PROMOTED_I]], i32 0
; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-IC: vector.body:
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -3304,9 +3304,9 @@ define i32 @testoverflowcheck() {
; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i32 [[TMP3]], 504
; INTERLEAVE-NEXT: [[DOTCAST:%.*]] = trunc i32 [[N_VEC]] to i8
; INTERLEAVE-NEXT: [[IND_END:%.*]] = add i8 [[DOTPR_I]], [[DOTCAST]]
-; INTERLEAVE-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> <i32 poison, i32 -1, i32 -1, i32 -1>, i32 [[C_PROMOTED_I]], i64 0
; INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0
; INTERLEAVE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
+; INTERLEAVE-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> <i32 poison, i32 -1, i32 -1, i32 -1>, i32 [[C_PROMOTED_I]], i64 0
; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]]
; INTERLEAVE: vector.body:
; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/no_outside_user.ll b/llvm/test/Transforms/LoopVectorize/no_outside_user.ll
index 81912b078b3b78..7b59f1b5dbe9cf 100644
--- a/llvm/test/Transforms/LoopVectorize/no_outside_user.ll
+++ b/llvm/test/Transforms/LoopVectorize/no_outside_user.ll
@@ -174,11 +174,11 @@ define i32 @test3(i32 %N) {
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]]
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[B_PROMOTED]], [[N_VEC]]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[N]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[B_PROMOTED]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], <i32 0, i32 1>
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[N]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/outer_loop_hcfg_construction.ll b/llvm/test/Transforms/LoopVectorize/outer_loop_hcfg_construction.ll
index ae8f0f61231cc5..4e5fc3126435db 100644
--- a/llvm/test/Transforms/LoopVectorize/outer_loop_hcfg_construction.ll
+++ b/llvm/test/Transforms/LoopVectorize/outer_loop_hcfg_construction.ll
@@ -32,25 +32,25 @@ define void @non_outermost_loop_hcfg_construction(i64 %n, ptr %a) {
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i64> poison, i64 [[N]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT2]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[MIDDLE_LOOP_LATCH4:%.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[MIDDLE_LOOP_LATCH4]] ]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[A]], <4 x i64> [[VEC_IND]]
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x ptr> @llvm.masked.gather.v4p0.v4p0(<4 x ptr> [[TMP3]], i32 8, <4 x i1> splat (i1 true), <4 x ptr> poison)
-; CHECK-NEXT: br label [[INNERMOST_LOOP1:%.*]]
-; CHECK: innermost.loop1:
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP5:%.*]], [[INNERMOST_LOOP1]] ]
+; CHECK-NEXT: br label [[INNERMOST_LOOP3:%.*]]
+; CHECK: innermost.loop3:
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP5:%.*]], [[INNERMOST_LOOP3]] ]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, <4 x ptr> [[WIDE_MASKED_GATHER]], <4 x i64> [[VEC_PHI]]
; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[BROADCAST_SPLAT]], <4 x ptr> [[TMP4]], i32 4, <4 x i1> splat (i1 true))
; CHECK-NEXT: [[TMP5]] = add nuw nsw <4 x i64> [[VEC_PHI]], splat (i64 1)
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i64> [[TMP5]], [[BROADCAST_SPLAT3]]
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0
-; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_LOOP_LATCH4]], label [[INNERMOST_LOOP1]]
+; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_LOOP_LATCH4]], label [[INNERMOST_LOOP3]]
; CHECK: vector.latch:
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
@@ -173,17 +173,17 @@ define void @non_outermost_loop_hcfg_construction_other_loops_at_same_level(i64
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[ADD]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i64> poison, i64 [[N]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT2]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[ADD]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[MIDDLE_LOOP_J0_CLEANUP4:%.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[MIDDLE_LOOP_J0_CLEANUP4]] ]
-; CHECK-NEXT: br label [[INNERMOST_LOOP1:%.*]]
-; CHECK: innermost.loop1:
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP5:%.*]], [[INNERMOST_LOOP1]] ]
+; CHECK-NEXT: br label [[INNERMOST_LOOP3:%.*]]
+; CHECK: innermost.loop3:
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP5:%.*]], [[INNERMOST_LOOP3]] ]
; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw <4 x i64> [[BROADCAST_SPLAT]], [[VEC_PHI]]
; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i64> [[TMP1]] to <4 x i32>
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr ptr, ptr [[INVARIANT_GEP]], <4 x i64> [[VEC_PHI]]
@@ -193,7 +193,7 @@ define void @non_outermost_loop_hcfg_construction_other_loops_at_same_level(i64
; CHECK-NEXT: [[TMP5]] = add nuw nsw <4 x i64> [[VEC_PHI]], splat (i64 1)
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i64> [[TMP5]], [[BROADCAST_SPLAT3]]
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0
-; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_LOOP_J0_CLEANUP4]], label [[INNERMOST_LOOP1]]
+; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_LOOP_J0_CLEANUP4]], label [[INNERMOST_LOOP3]]
; CHECK: vector.latch:
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
diff --git a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll
index f469776f889c70..59727aeb8249a9 100644
--- a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll
@@ -197,12 +197,12 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x) {
; CHECK-VF4UF2: vector.ph
; CHECK-VF4UF2: call i32 @llvm.vscale.i32()
; CHECK-VF4UF2: call i32 @llvm.vscale.i32()
+; CHECK-VF4UF2: %[[SPLAT_INS1:.*]] = insertelement <vscale x 4 x i32> poison, i32 %x, i64 0
+; CHECK-VF4UF2: %[[SPLAT1:.*]] = shufflevector <vscale x 4 x i32> %[[SPLAT_INS1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-VF4UF2: %[[VSCALE1:.*]] = call i32 @llvm.vscale.i32()
; CHECK-VF4UF2: %[[MUL1:.*]] = mul i32 %[[VSCALE1]], 4
; CHECK-VF4UF2: %[[SUB1:.*]] = sub i32 %[[MUL1]], 1
; CHECK-VF4UF2: %[[VEC_RECUR_INIT:.*]] = insertelement <vscale x 4 x i32> poison, i32 0, i32 %[[SUB1]]
-; CHECK-VF4UF2: %[[SPLAT_INS1:.*]] = insertelement <vscale x 4 x i32> poison, i32 %x, i64 0
-; CHECK-VF4UF2: %[[SPLAT1:.*]] = shufflevector <vscale x 4 x i32> %[[SPLAT_INS1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; ; CHECK-VF4UF2: vector.body
; CHECK-VF4UF2: %[[VEC_RECUR:.*]] = phi <vscale x 4 x i32> [ %[[VEC_RECUR_INIT]], %vector.ph ], [ %[[ADD2:.*]], %vector.body ]
; CHECK-VF4UF2: %[[ADD1:.*]] = add <vscale x 4 x i32> %{{.*}}, %[[SPLAT1]]
diff --git a/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll b/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll
index d88b0583ffc042..f826ce814ca5b5 100644
--- a/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll
@@ -19,18 +19,18 @@ define i32 @iv_live_out_wide(ptr %dst) {
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[TMP4]], 2
; CHECK-NEXT: [[TMP6:%.*]] = mul i32 [[TMP5]], 2
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[STEP_2]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
; CHECK-NEXT: [[TMP8:%.*]] = mul <vscale x 2 x i32> [[TMP7]], splat (i32 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i32> zeroinitializer, [[TMP8]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[TMP5]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[STEP_2]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[TMP5]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add <vscale x 2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add <vscale x 2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]]
; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 0
@@ -39,9 +39,9 @@ define i32 @iv_live_out_wide(ptr %dst) {
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP13]]
; CHECK-NEXT: store <vscale x 2 x i16> zeroinitializer, ptr [[TMP11]], align 2
; CHECK-NEXT: store <vscale x 2 x i16> zeroinitializer, ptr [[TMP14]], align 2
-; CHECK-NEXT: [[TMP15:%.*]] = add <vscale x 2 x i32> [[BROADCAST_SPLAT2]], [[STEP_ADD]]
+; CHECK-NEXT: [[TMP15:%.*]] = add <vscale x 2 x i32> [[BROADCAST_SPLAT]], [[STEP_ADD]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP6]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i32> [[STEP_ADD]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i32> [[STEP_ADD]], [[BROADCAST_SPLAT2]]
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-widen-select-instruction.ll b/llvm/test/Transforms/LoopVectorize/vplan-widen-select-instruction.ll
index 8fb68d7413d0e4..2d07b33eaced4c 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-widen-select-instruction.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-widen-select-instruction.ll
@@ -23,9 +23,9 @@ define void @loop_invariant_select(ptr noalias nocapture %out, i1 %select, doubl
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR1_LATCH4:%.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[FOR1_LATCH4]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds double, ptr [[OUT:%.*]], <4 x i64> [[VEC_IND]]
-; CHECK-NEXT: br label [[FOR2_HEADER1:%.*]]
-; CHECK: for2.header1:
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP2:%.*]], [[FOR2_HEADER1]] ]
+; CHECK-NEXT: br label [[FOR2_HEADER3:%.*]]
+; CHECK: for2.header3:
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP2:%.*]], [[FOR2_HEADER3]] ]
; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[SELECT:%.*]], <4 x double> [[BROADCAST_SPLAT]], <4 x double> [[BROADCAST_SPLAT3]]
; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> [[TMP1]], <4 x ptr> [[TMP0]], i32 8, <4 x i1> splat (i1 true))
entry:
@@ -68,9 +68,9 @@ define void @outer_loop_dependant_select(ptr noalias nocapture %out, double %a,
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR1_LATCH4:%.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[FOR1_LATCH4]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds double, ptr [[OUT:%.*]], <4 x i64> [[VEC_IND]]
-; CHECK-NEXT: br label [[FOR2_HEADER1:%.*]]
-; CHECK: for2.header1:
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP3:%.*]], [[FOR2_HEADER1]] ]
+; CHECK-NEXT: br label [[FOR2_HEADER3:%.*]]
+; CHECK: for2.header3:
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP3:%.*]], [[FOR2_HEADER3]] ]
; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i64> [[VEC_IND]] to <4 x i1>
; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x double> [[BROADCAST_SPLAT]], <4 x double> [[BROADCAST_SPLAT3]]
; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> [[TMP2]], <4 x ptr> [[TMP0]], i32 8, <4 x i1> splat (i1 true))
@@ -115,9 +115,9 @@ define void @inner_loop_dependant_select(ptr noalias nocapture %out, double %a,
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR1_LATCH4:%.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[FOR1_LATCH4]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds double, ptr [[OUT:%.*]], <4 x i64> [[VEC_IND]]
-; CHECK-NEXT: br label [[FOR2_HEADER1:%.*]]
-; CHECK: for2.header1:
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP3:%.*]], [[FOR2_HEADER1]] ]
+; CHECK-NEXT: br label [[FOR2_HEADER3:%.*]]
+; CHECK: for2.header3:
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP3:%.*]], [[FOR2_HEADER3]] ]
; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i64> [[VEC_PHI]] to <4 x i1>
; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x double> [[BROADCAST_SPLAT]], <4 x double> [[BROADCAST_SPLAT3]]
; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> [[TMP2]], <4 x ptr> [[TMP0]], i32 8, <4 x i1> splat (i1 true))
@@ -162,9 +162,9 @@ define void @outer_and_inner_loop_dependant_select(ptr noalias nocapture %out, d
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR1_LATCH4:%.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[FOR1_LATCH4]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds double, ptr [[OUT:%.*]], <4 x i64> [[VEC_IND]]
-; CHECK-NEXT: br label [[FOR2_HEADER1:%.*]]
-; CHECK: for2.header1:
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP4:%.*]], [[FOR2_HEADER1]] ]
+; CHECK-NEXT: br label [[FOR2_HEADER3:%.*]]
+; CHECK: for2.header3:
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP4:%.*]], [[FOR2_HEADER3]] ]
; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], [[VEC_PHI]]
; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i64> [[TMP1]] to <4 x i1>
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x double> [[BROADCAST_SPLAT]], <4 x double> [[BROADCAST_SPLAT3]]
More information about the llvm-commits
mailing list