[llvm] [VPlan] Convert EVL loops to variable-length stepping after dissolution (PR #147222)
Shih-Po Hung via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 28 00:31:14 PDT 2025
https://github.com/arcbbb updated https://github.com/llvm/llvm-project/pull/147222
>From 69be6cc82ddbace11dad30f45e1f2b3855d1899c Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Sat, 5 Jul 2025 19:24:57 -0700
Subject: [PATCH 1/9] [VPlan] Convert EVL loops to variable-length stepping
after dissolution
Loop regions require fixed-length steps and rounded-up trip counts, but
after dissolution creates explicit control flow, EVL loops can leverage
variable-length stepping with original trip counts.
This patch adds a post-dissolution transform pass to convert EVL loops
from fixed-length to variable-length stepping .
---
.../Transforms/Vectorize/LoopVectorize.cpp | 2 +
.../Transforms/Vectorize/VPlanTransforms.cpp | 61 ++++++--
.../Transforms/Vectorize/VPlanTransforms.h | 11 ++
.../Transforms/Vectorize/VPlanVerifier.cpp | 4 +-
.../RISCV/evl-compatible-loops.ll | 4 +-
.../LoopVectorize/RISCV/inloop-reduction.ll | 18 +--
.../RISCV/interleaved-masked-access.ll | 138 ++++++++----------
.../truncate-to-minimal-bitwidth-evl-crash.ll | 6 +-
.../RISCV/type-info-cache-evl-crash.ll | 4 +-
...-force-tail-with-evl-bin-unary-ops-args.ll | 108 +++++---------
...ize-force-tail-with-evl-call-intrinsics.ll | 44 ++----
...ize-force-tail-with-evl-cast-intrinsics.ll | 44 ++----
...rize-force-tail-with-evl-cond-reduction.ll | 42 ++----
.../vectorize-force-tail-with-evl-div.ll | 16 +-
...ce-tail-with-evl-fixed-order-recurrence.ll | 18 +--
...rize-force-tail-with-evl-gather-scatter.ll | 4 +-
...ze-force-tail-with-evl-inloop-reduction.ll | 82 ++++-------
...ectorize-force-tail-with-evl-interleave.ll | 9 --
...-force-tail-with-evl-intermediate-store.ll | 12 +-
.../vectorize-force-tail-with-evl-iv32.ll | 6 +-
...e-force-tail-with-evl-known-no-overflow.ll | 16 +-
...ze-force-tail-with-evl-masked-loadstore.ll | 6 +-
...e-force-tail-with-evl-ordered-reduction.ll | 6 +-
...vectorize-force-tail-with-evl-reduction.ll | 60 ++------
...-force-tail-with-evl-reverse-load-store.ll | 16 +-
...e-force-tail-with-evl-safe-dep-distance.ll | 12 +-
...orize-force-tail-with-evl-uniform-store.ll | 4 +-
.../RISCV/vectorize-vp-intrinsics.ll | 4 +-
28 files changed, 288 insertions(+), 469 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 6616e61f9bb84..a67a66f172ec5 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7290,6 +7290,8 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
// Regions are dissolved after optimizing for VF and UF, which completely
// removes unneeded loop regions first.
VPlanTransforms::dissolveLoopRegions(BestVPlan);
+ // Enable variable-length stepping for EVL loops after regions are dissolved
+ VPlanTransforms::simplifyEVLIVs(BestVPlan);
// Perform the actual loop transformation.
VPTransformState State(&TTI, BestVF, LI, DT, ILV.AC, ILV.Builder, &BestVPlan,
OrigLoop->getParentLoop(),
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 935a4e41bab06..603bad90ff816 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2369,6 +2369,58 @@ bool VPlanTransforms::tryAddExplicitVectorLength(
return true;
}
+void VPlanTransforms::simplifyEVLIVs(VPlan &Plan) {
+ auto ConvertEVLPhi = [](VPlan &Plan, VPBasicBlock *Entry,
+ VPEVLBasedIVPHIRecipe *EVLPhi) {
+ using namespace llvm::VPlanPatternMatch;
+ VPValue *EVLIncrement = EVLPhi->getBackedgeValue();
+
+ // Convert EVLPhi to concrete recipe.
+ auto *ScalarR = VPBuilder(EVLPhi).createScalarPhi(
+ {EVLPhi->getStartValue(), EVLIncrement}, EVLPhi->getDebugLoc(),
+ "evl.based.iv");
+ EVLPhi->replaceAllUsesWith(ScalarR);
+ EVLPhi->eraseFromParent();
+
+ // Find the latch-exiting block and convert to variable-length stepping.
+ // Before: (branch-on-cond CanonicalIVInc, VectorTripCount)
+ // After: (branch-on-cond EVLIVInc, TripCount)
+ auto FindLatchExiting = [](VPBasicBlock *Entry) {
+ auto Range =
+ VPBlockUtils::blocksOnly<VPBasicBlock>(vp_depth_first_shallow(Entry));
+ auto It = find_if(Range, [&](VPBasicBlock *VPBB) {
+ return any_of(VPBB->successors(),
+ [&](VPBlockBase *Succ) { return Succ == Entry; });
+ });
+ return It != Range.end() ? *It : nullptr;
+ };
+ VPBasicBlock *LatchExiting = FindLatchExiting(Entry);
+ assert(LatchExiting && "LatchExiting is not found");
+ auto *LatchExitingBr = cast<VPInstruction>(LatchExiting->getTerminator());
+ VPValue *ScalarIVInc;
+ if (!LatchExitingBr ||
+ !match(LatchExitingBr,
+ m_BranchOnCount(m_VPValue(ScalarIVInc),
+ m_Specific(&Plan.getVectorTripCount()))))
+ return;
+ LatchExitingBr->setOperand(1, Plan.getTripCount());
+ ScalarIVInc->replaceAllUsesWith(EVLIncrement);
+ VPRecipeBase *IVIncR = ScalarIVInc->getDefiningRecipe();
+ VPRecipeBase *ScalarIV = IVIncR->getOperand(0)->getDefiningRecipe();
+ IVIncR->eraseFromParent();
+ ScalarIV->eraseFromParent();
+ };
+
+ // Find EVL loop entries by locating VPEVLBasedIVPHIRecipe
+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
+ vp_depth_first_shallow(Plan.getEntry())))
+ for (VPRecipeBase &R : VPBB->phis())
+ if (auto *PhiR = dyn_cast<VPEVLBasedIVPHIRecipe>(&R)) {
+ ConvertEVLPhi(Plan, VPBB, PhiR);
+ break;
+ }
+}
+
void VPlanTransforms::dropPoisonGeneratingRecipes(
VPlan &Plan,
const std::function<bool(BasicBlock *)> &BlockNeedsPredication) {
@@ -2700,15 +2752,6 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan,
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_deep(Plan.getEntry()))) {
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
- if (auto *PhiR = dyn_cast<VPEVLBasedIVPHIRecipe>(&R)) {
- auto *ScalarR = VPBuilder(PhiR).createScalarPhi(
- {PhiR->getStartValue(), PhiR->getBackedgeValue()},
- PhiR->getDebugLoc(), "evl.based.iv");
- PhiR->replaceAllUsesWith(ScalarR);
- ToRemove.push_back(PhiR);
- continue;
- }
-
if (auto *WidenIVR = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R)) {
expandVPWidenIntOrFpInduction(WidenIVR, TypeInfo);
ToRemove.push_back(WidenIVR);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index d5af6cd73a4a0..a9b491aae4b45 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -209,6 +209,17 @@ struct VPlanTransforms {
/// Replace loop regions with explicit CFG.
static void dissolveLoopRegions(VPlan &Plan);
+ /// Transform EVL loops to use variable-length stepping after region
+ /// dissolution.
+ ///
+ /// Once loop regions are replaced with explicit CFG, EVL loops can step with
+ /// variable vector lengths instead of fixed lengths. This transformation:
+ /// * EVL-Phi concretization (makes them concrete)
+ /// * Replaces fixed-length stepping (branch-on-cond CanonicalIVInc,
+ /// VectorTripCount) with variable-length stepping (branch-on-cond
+ /// EVLIVInc, TripCount).
+ static void simplifyEVLIVs(VPlan &Plan);
+
/// Lower abstract recipes to concrete ones, that can be codegen'd. Use \p
/// CanonicalIVTy as type for all un-typed live-ins in VPTypeAnalysis.
static void convertToConcreteRecipes(VPlan &Plan, Type &CanonicalIVTy);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
index 38ada33d7ee19..7c160c3e0d597 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
@@ -192,7 +192,9 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const {
errs() << "EVL used by unexpected VPInstruction\n";
return false;
}
- if (I->getNumUsers() != 1) {
+ // EVLIVIncrement is only used by EVLIV & BranchOnCount.
+ // More than two is unexpected.
+ if (I->getNumUsers() > 2) {
errs() << "EVL is used in VPInstruction with multiple users\n";
return false;
}
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll b/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll
index 32d17b919e24f..23c1fde1a7711 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll
@@ -23,7 +23,6 @@ define void @test_wide_integer_induction(ptr noalias %a, i64 %N) {
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP10]]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: vector.body:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDEX_EVL_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
@@ -36,9 +35,8 @@ define void @test_wide_integer_induction(ptr noalias %a, i64 %N) {
; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VEC_IND]], ptr align 8 [[TMP14]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP11]])
; CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[TMP11]] to i64
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
-; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], [[TMP8]]
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]]
+; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll
index f44cbf245c9a5..b1d04805a5f8b 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll
@@ -130,7 +130,6 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) {
; IF-EVL-OUTLOOP-NEXT: [[TMP4:%.*]] = mul nuw i32 [[TMP3]], 4
; IF-EVL-OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL-OUTLOOP: vector.body:
-; IF-EVL-OUTLOOP-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-OUTLOOP-NEXT: [[EVL_BASED_IV:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-OUTLOOP-NEXT: [[AVL:%.*]] = sub i32 [[N]], [[EVL_BASED_IV]]
@@ -141,8 +140,7 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) {
; IF-EVL-OUTLOOP-NEXT: [[VP_OP:%.*]] = add <vscale x 4 x i32> [[VEC_PHI]], [[TMP9]]
; IF-EVL-OUTLOOP-NEXT: [[TMP10]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[VP_OP]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP5]])
; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP5]], [[EVL_BASED_IV]]
-; IF-EVL-OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP4]]
-; IF-EVL-OUTLOOP-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-OUTLOOP-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; IF-EVL-OUTLOOP: middle.block:
; IF-EVL-OUTLOOP-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP10]])
@@ -185,7 +183,6 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) {
; IF-EVL-INLOOP-NEXT: [[TMP4:%.*]] = mul nuw i32 [[TMP3]], 8
; IF-EVL-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL-INLOOP: vector.body:
-; IF-EVL-INLOOP-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-INLOOP-NEXT: [[EVL_BASED_IV:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-INLOOP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-INLOOP-NEXT: [[TMP5:%.*]] = sub i32 [[N]], [[EVL_BASED_IV]]
@@ -196,8 +193,7 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) {
; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = call i32 @llvm.vp.reduce.add.nxv8i32(i32 0, <vscale x 8 x i32> [[TMP14]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP6]])
; IF-EVL-INLOOP-NEXT: [[TMP11]] = add i32 [[TMP10]], [[VEC_PHI]]
; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP6]], [[EVL_BASED_IV]]
-; IF-EVL-INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP4]]
-; IF-EVL-INLOOP-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-INLOOP-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-INLOOP-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; IF-EVL-INLOOP: middle.block:
; IF-EVL-INLOOP-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
@@ -352,7 +348,6 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) {
; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; IF-EVL-OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL-OUTLOOP: vector.body:
-; IF-EVL-OUTLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-OUTLOOP-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-OUTLOOP-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
@@ -364,8 +359,7 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) {
; IF-EVL-OUTLOOP-NEXT: [[TMP15]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP14]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP9]])
; IF-EVL-OUTLOOP-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64
; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
-; IF-EVL-OUTLOOP-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
-; IF-EVL-OUTLOOP-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-OUTLOOP-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; IF-EVL-OUTLOOP: middle.block:
; IF-EVL-OUTLOOP-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> [[TMP15]])
@@ -402,7 +396,6 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) {
; IF-EVL-INLOOP-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
; IF-EVL-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL-INLOOP: vector.body:
-; IF-EVL-INLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-INLOOP-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-INLOOP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-INLOOP-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
@@ -413,9 +406,8 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) {
; IF-EVL-INLOOP-NEXT: [[RDX_MINMAX]] = call i32 @llvm.smin.i32(i32 [[TMP13]], i32 [[VEC_PHI]])
; IF-EVL-INLOOP-NEXT: [[TMP14:%.*]] = zext i32 [[TMP9]] to i64
; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP14]], [[EVL_BASED_IV]]
-; IF-EVL-INLOOP-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
-; IF-EVL-INLOOP-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-INLOOP-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
+; IF-EVL-INLOOP-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; IF-EVL-INLOOP: middle.block:
; IF-EVL-INLOOP-NEXT: br label [[FOR_END:%.*]]
; IF-EVL-INLOOP: scalar.ph:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll
index dbe6f27d61749..9a19b89a2bd7a 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll
@@ -110,47 +110,38 @@ define void @masked_strided_factor2(ptr noalias nocapture readonly %p, ptr noali
; PREDICATED_DATA-WITH-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; PREDICATED_DATA-WITH-EVL: vector.ph:
; PREDICATED_DATA-WITH-EVL-NEXT: [[CONV:%.*]] = zext i8 [[GUARD]] to i32
-; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32()
-; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP1:%.*]] = shl nuw i32 [[TMP0]], 4
-; PREDICATED_DATA-WITH-EVL-NEXT: [[N_RND_UP:%.*]] = add i32 [[TMP1]], 1023
-; PREDICATED_DATA-WITH-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], [[TMP1]]
-; PREDICATED_DATA-WITH-EVL-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
-; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32()
-; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 4
; PREDICATED_DATA-WITH-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV]], i64 0
; PREDICATED_DATA-WITH-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
-; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP4:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32()
+; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32()
; PREDICATED_DATA-WITH-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
; PREDICATED_DATA-WITH-EVL: vector.body:
-; PREDICATED_DATA-WITH-EVL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; PREDICATED_DATA-WITH-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
-; PREDICATED_DATA-WITH-EVL-NEXT: [[VEC_IND:%.*]] = phi <vscale x 16 x i32> [ [[TMP4]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; PREDICATED_DATA-WITH-EVL-NEXT: [[VEC_IND:%.*]] = phi <vscale x 16 x i32> [ [[TMP0]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; PREDICATED_DATA-WITH-EVL-NEXT: [[AVL:%.*]] = sub i32 1024, [[EVL_BASED_IV]]
-; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[AVL]], i32 16, i1 true)
-; PREDICATED_DATA-WITH-EVL-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP5]], i64 0
+; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP1:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[AVL]], i32 16, i1 true)
+; PREDICATED_DATA-WITH-EVL-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP1]], i64 0
; PREDICATED_DATA-WITH-EVL-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
-; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP6:%.*]] = icmp ugt <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
-; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP7:%.*]] = shl nuw nsw <vscale x 16 x i32> [[VEC_IND]], splat (i32 1)
-; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP8:%.*]] = zext nneg <vscale x 16 x i32> [[TMP7]] to <vscale x 16 x i64>
-; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[P]], <vscale x 16 x i64> [[TMP8]]
-; PREDICATED_DATA-WITH-EVL-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 16 x i8> @llvm.vp.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr> align 1 [[TMP9]], <vscale x 16 x i1> [[TMP6]], i32 [[TMP5]])
-; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP10:%.*]] = or disjoint <vscale x 16 x i32> [[TMP7]], splat (i32 1)
-; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP11:%.*]] = zext nneg <vscale x 16 x i32> [[TMP10]] to <vscale x 16 x i64>
-; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[P]], <vscale x 16 x i64> [[TMP11]]
-; PREDICATED_DATA-WITH-EVL-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call <vscale x 16 x i8> @llvm.vp.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr> align 1 [[TMP12]], <vscale x 16 x i1> [[TMP6]], i32 [[TMP5]])
-; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP13:%.*]] = call <vscale x 16 x i8> @llvm.smax.nxv16i8(<vscale x 16 x i8> [[WIDE_MASKED_GATHER]], <vscale x 16 x i8> [[WIDE_MASKED_GATHER3]])
-; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP14:%.*]] = zext nneg <vscale x 16 x i32> [[TMP7]] to <vscale x 16 x i64>
-; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[Q]], <vscale x 16 x i64> [[TMP14]]
-; PREDICATED_DATA-WITH-EVL-NEXT: call void @llvm.vp.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> [[TMP13]], <vscale x 16 x ptr> align 1 [[TMP15]], <vscale x 16 x i1> [[TMP6]], i32 [[TMP5]])
-; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP16:%.*]] = sub <vscale x 16 x i8> zeroinitializer, [[TMP13]]
-; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP17:%.*]] = zext nneg <vscale x 16 x i32> [[TMP10]] to <vscale x 16 x i64>
-; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[Q]], <vscale x 16 x i64> [[TMP17]]
-; PREDICATED_DATA-WITH-EVL-NEXT: call void @llvm.vp.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> [[TMP16]], <vscale x 16 x ptr> align 1 [[TMP18]], <vscale x 16 x i1> [[TMP6]], i32 [[TMP5]])
-; PREDICATED_DATA-WITH-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP5]], [[EVL_BASED_IV]]
-; PREDICATED_DATA-WITH-EVL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP3]]
+; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP2:%.*]] = icmp ugt <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
+; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP3:%.*]] = shl nuw nsw <vscale x 16 x i32> [[VEC_IND]], splat (i32 1)
+; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP4:%.*]] = zext nneg <vscale x 16 x i32> [[TMP3]] to <vscale x 16 x i64>
+; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[P]], <vscale x 16 x i64> [[TMP4]]
+; PREDICATED_DATA-WITH-EVL-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 16 x i8> @llvm.vp.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr> align 1 [[TMP5]], <vscale x 16 x i1> [[TMP2]], i32 [[TMP1]])
+; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP6:%.*]] = or disjoint <vscale x 16 x i32> [[TMP3]], splat (i32 1)
+; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP7:%.*]] = zext nneg <vscale x 16 x i32> [[TMP6]] to <vscale x 16 x i64>
+; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[P]], <vscale x 16 x i64> [[TMP7]]
+; PREDICATED_DATA-WITH-EVL-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call <vscale x 16 x i8> @llvm.vp.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr> align 1 [[TMP8]], <vscale x 16 x i1> [[TMP2]], i32 [[TMP1]])
+; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP9:%.*]] = call <vscale x 16 x i8> @llvm.smax.nxv16i8(<vscale x 16 x i8> [[WIDE_MASKED_GATHER]], <vscale x 16 x i8> [[WIDE_MASKED_GATHER3]])
+; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP10:%.*]] = zext nneg <vscale x 16 x i32> [[TMP3]] to <vscale x 16 x i64>
+; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[Q]], <vscale x 16 x i64> [[TMP10]]
+; PREDICATED_DATA-WITH-EVL-NEXT: call void @llvm.vp.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> [[TMP9]], <vscale x 16 x ptr> align 1 [[TMP11]], <vscale x 16 x i1> [[TMP2]], i32 [[TMP1]])
+; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP12:%.*]] = sub <vscale x 16 x i8> zeroinitializer, [[TMP9]]
+; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP13:%.*]] = zext nneg <vscale x 16 x i32> [[TMP6]] to <vscale x 16 x i64>
+; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[Q]], <vscale x 16 x i64> [[TMP13]]
+; PREDICATED_DATA-WITH-EVL-NEXT: call void @llvm.vp.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> [[TMP12]], <vscale x 16 x ptr> align 1 [[TMP14]], <vscale x 16 x i1> [[TMP2]], i32 [[TMP1]])
+; PREDICATED_DATA-WITH-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP1]], [[EVL_BASED_IV]]
; PREDICATED_DATA-WITH-EVL-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]]
-; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP19:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; PREDICATED_DATA-WITH-EVL-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_EVL_NEXT]], 1024
+; PREDICATED_DATA-WITH-EVL-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; PREDICATED_DATA-WITH-EVL: middle.block:
; PREDICATED_DATA-WITH-EVL-NEXT: br label [[FOR_END:%.*]]
; PREDICATED_DATA-WITH-EVL: scalar.ph:
@@ -327,63 +318,54 @@ define void @masked_strided_factor4(ptr noalias nocapture readonly %p, ptr noali
; PREDICATED_DATA-WITH-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; PREDICATED_DATA-WITH-EVL: vector.ph:
; PREDICATED_DATA-WITH-EVL-NEXT: [[CONV:%.*]] = zext i8 [[GUARD]] to i32
-; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32()
-; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP1:%.*]] = shl nuw i32 [[TMP0]], 4
-; PREDICATED_DATA-WITH-EVL-NEXT: [[N_RND_UP:%.*]] = add i32 [[TMP1]], 1023
-; PREDICATED_DATA-WITH-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], [[TMP1]]
-; PREDICATED_DATA-WITH-EVL-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
-; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32()
-; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 4
; PREDICATED_DATA-WITH-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[CONV]], i64 0
; PREDICATED_DATA-WITH-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
-; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP4:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32()
+; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32()
; PREDICATED_DATA-WITH-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
; PREDICATED_DATA-WITH-EVL: vector.body:
-; PREDICATED_DATA-WITH-EVL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; PREDICATED_DATA-WITH-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
-; PREDICATED_DATA-WITH-EVL-NEXT: [[VEC_IND:%.*]] = phi <vscale x 16 x i32> [ [[TMP4]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; PREDICATED_DATA-WITH-EVL-NEXT: [[VEC_IND:%.*]] = phi <vscale x 16 x i32> [ [[TMP0]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; PREDICATED_DATA-WITH-EVL-NEXT: [[AVL:%.*]] = sub i32 1024, [[EVL_BASED_IV]]
-; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[AVL]], i32 16, i1 true)
-; PREDICATED_DATA-WITH-EVL-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP5]], i64 0
+; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP1:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[AVL]], i32 16, i1 true)
+; PREDICATED_DATA-WITH-EVL-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP1]], i64 0
; PREDICATED_DATA-WITH-EVL-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
-; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP6:%.*]] = icmp ugt <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
-; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP7:%.*]] = shl nuw nsw <vscale x 16 x i32> [[VEC_IND]], splat (i32 2)
-; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP8:%.*]] = or disjoint <vscale x 16 x i32> [[TMP7]], splat (i32 1)
-; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP9:%.*]] = or disjoint <vscale x 16 x i32> [[TMP7]], splat (i32 2)
-; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP10:%.*]] = or disjoint <vscale x 16 x i32> [[TMP7]], splat (i32 3)
-; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP11:%.*]] = zext nneg <vscale x 16 x i32> [[TMP7]] to <vscale x 16 x i64>
+; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP2:%.*]] = icmp ugt <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
+; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP3:%.*]] = shl nuw nsw <vscale x 16 x i32> [[VEC_IND]], splat (i32 2)
+; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP4:%.*]] = or disjoint <vscale x 16 x i32> [[TMP3]], splat (i32 1)
+; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP5:%.*]] = or disjoint <vscale x 16 x i32> [[TMP3]], splat (i32 2)
+; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP6:%.*]] = or disjoint <vscale x 16 x i32> [[TMP3]], splat (i32 3)
+; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP7:%.*]] = zext nneg <vscale x 16 x i32> [[TMP3]] to <vscale x 16 x i64>
+; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[P]], <vscale x 16 x i64> [[TMP7]]
+; PREDICATED_DATA-WITH-EVL-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 16 x i8> @llvm.vp.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr> align 1 [[TMP8]], <vscale x 16 x i1> [[TMP2]], i32 [[TMP1]])
+; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP9:%.*]] = zext nneg <vscale x 16 x i32> [[TMP4]] to <vscale x 16 x i64>
+; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[P]], <vscale x 16 x i64> [[TMP9]]
+; PREDICATED_DATA-WITH-EVL-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call <vscale x 16 x i8> @llvm.vp.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr> align 1 [[TMP10]], <vscale x 16 x i1> [[TMP2]], i32 [[TMP1]])
+; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP11:%.*]] = zext nneg <vscale x 16 x i32> [[TMP5]] to <vscale x 16 x i64>
; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[P]], <vscale x 16 x i64> [[TMP11]]
-; PREDICATED_DATA-WITH-EVL-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 16 x i8> @llvm.vp.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr> align 1 [[TMP12]], <vscale x 16 x i1> [[TMP6]], i32 [[TMP5]])
-; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP13:%.*]] = zext nneg <vscale x 16 x i32> [[TMP8]] to <vscale x 16 x i64>
+; PREDICATED_DATA-WITH-EVL-NEXT: [[WIDE_MASKED_GATHER4:%.*]] = call <vscale x 16 x i8> @llvm.vp.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr> align 1 [[TMP12]], <vscale x 16 x i1> [[TMP2]], i32 [[TMP1]])
+; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP13:%.*]] = zext nneg <vscale x 16 x i32> [[TMP6]] to <vscale x 16 x i64>
; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[P]], <vscale x 16 x i64> [[TMP13]]
-; PREDICATED_DATA-WITH-EVL-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call <vscale x 16 x i8> @llvm.vp.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr> align 1 [[TMP14]], <vscale x 16 x i1> [[TMP6]], i32 [[TMP5]])
-; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP15:%.*]] = zext nneg <vscale x 16 x i32> [[TMP9]] to <vscale x 16 x i64>
-; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[P]], <vscale x 16 x i64> [[TMP15]]
-; PREDICATED_DATA-WITH-EVL-NEXT: [[WIDE_MASKED_GATHER4:%.*]] = call <vscale x 16 x i8> @llvm.vp.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr> align 1 [[TMP16]], <vscale x 16 x i1> [[TMP6]], i32 [[TMP5]])
-; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP17:%.*]] = zext nneg <vscale x 16 x i32> [[TMP10]] to <vscale x 16 x i64>
-; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[P]], <vscale x 16 x i64> [[TMP17]]
-; PREDICATED_DATA-WITH-EVL-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call <vscale x 16 x i8> @llvm.vp.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr> align 1 [[TMP18]], <vscale x 16 x i1> [[TMP6]], i32 [[TMP5]])
-; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP19:%.*]] = call <vscale x 16 x i8> @llvm.smax.nxv16i8(<vscale x 16 x i8> [[WIDE_MASKED_GATHER]], <vscale x 16 x i8> [[WIDE_MASKED_GATHER3]])
-; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP20:%.*]] = sub <vscale x 16 x i8> zeroinitializer, [[TMP19]]
-; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP21:%.*]] = call <vscale x 16 x i8> @llvm.smax.nxv16i8(<vscale x 16 x i8> [[WIDE_MASKED_GATHER4]], <vscale x 16 x i8> [[WIDE_MASKED_GATHER5]])
-; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP22:%.*]] = sub <vscale x 16 x i8> zeroinitializer, [[TMP21]]
-; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP23:%.*]] = zext nneg <vscale x 16 x i32> [[TMP7]] to <vscale x 16 x i64>
+; PREDICATED_DATA-WITH-EVL-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call <vscale x 16 x i8> @llvm.vp.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr> align 1 [[TMP14]], <vscale x 16 x i1> [[TMP2]], i32 [[TMP1]])
+; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP15:%.*]] = call <vscale x 16 x i8> @llvm.smax.nxv16i8(<vscale x 16 x i8> [[WIDE_MASKED_GATHER]], <vscale x 16 x i8> [[WIDE_MASKED_GATHER3]])
+; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP16:%.*]] = sub <vscale x 16 x i8> zeroinitializer, [[TMP15]]
+; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP17:%.*]] = call <vscale x 16 x i8> @llvm.smax.nxv16i8(<vscale x 16 x i8> [[WIDE_MASKED_GATHER4]], <vscale x 16 x i8> [[WIDE_MASKED_GATHER5]])
+; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP18:%.*]] = sub <vscale x 16 x i8> zeroinitializer, [[TMP17]]
+; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP19:%.*]] = zext nneg <vscale x 16 x i32> [[TMP3]] to <vscale x 16 x i64>
+; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[Q]], <vscale x 16 x i64> [[TMP19]]
+; PREDICATED_DATA-WITH-EVL-NEXT: call void @llvm.vp.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> [[TMP15]], <vscale x 16 x ptr> align 1 [[TMP20]], <vscale x 16 x i1> [[TMP2]], i32 [[TMP1]])
+; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP21:%.*]] = zext nneg <vscale x 16 x i32> [[TMP4]] to <vscale x 16 x i64>
+; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[Q]], <vscale x 16 x i64> [[TMP21]]
+; PREDICATED_DATA-WITH-EVL-NEXT: call void @llvm.vp.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> [[TMP16]], <vscale x 16 x ptr> align 1 [[TMP22]], <vscale x 16 x i1> [[TMP2]], i32 [[TMP1]])
+; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP23:%.*]] = zext nneg <vscale x 16 x i32> [[TMP5]] to <vscale x 16 x i64>
; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[Q]], <vscale x 16 x i64> [[TMP23]]
-; PREDICATED_DATA-WITH-EVL-NEXT: call void @llvm.vp.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> [[TMP19]], <vscale x 16 x ptr> align 1 [[TMP24]], <vscale x 16 x i1> [[TMP6]], i32 [[TMP5]])
-; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP25:%.*]] = zext nneg <vscale x 16 x i32> [[TMP8]] to <vscale x 16 x i64>
+; PREDICATED_DATA-WITH-EVL-NEXT: call void @llvm.vp.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> [[TMP17]], <vscale x 16 x ptr> align 1 [[TMP24]], <vscale x 16 x i1> [[TMP2]], i32 [[TMP1]])
+; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP25:%.*]] = zext nneg <vscale x 16 x i32> [[TMP6]] to <vscale x 16 x i64>
; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[Q]], <vscale x 16 x i64> [[TMP25]]
-; PREDICATED_DATA-WITH-EVL-NEXT: call void @llvm.vp.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> [[TMP20]], <vscale x 16 x ptr> align 1 [[TMP26]], <vscale x 16 x i1> [[TMP6]], i32 [[TMP5]])
-; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP27:%.*]] = zext nneg <vscale x 16 x i32> [[TMP9]] to <vscale x 16 x i64>
-; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[Q]], <vscale x 16 x i64> [[TMP27]]
-; PREDICATED_DATA-WITH-EVL-NEXT: call void @llvm.vp.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> [[TMP21]], <vscale x 16 x ptr> align 1 [[TMP28]], <vscale x 16 x i1> [[TMP6]], i32 [[TMP5]])
-; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP29:%.*]] = zext nneg <vscale x 16 x i32> [[TMP10]] to <vscale x 16 x i64>
-; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[Q]], <vscale x 16 x i64> [[TMP29]]
-; PREDICATED_DATA-WITH-EVL-NEXT: call void @llvm.vp.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> [[TMP22]], <vscale x 16 x ptr> align 1 [[TMP30]], <vscale x 16 x i1> [[TMP6]], i32 [[TMP5]])
-; PREDICATED_DATA-WITH-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP5]], [[EVL_BASED_IV]]
-; PREDICATED_DATA-WITH-EVL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP3]]
+; PREDICATED_DATA-WITH-EVL-NEXT: call void @llvm.vp.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> [[TMP18]], <vscale x 16 x ptr> align 1 [[TMP26]], <vscale x 16 x i1> [[TMP2]], i32 [[TMP1]])
+; PREDICATED_DATA-WITH-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP1]], [[EVL_BASED_IV]]
; PREDICATED_DATA-WITH-EVL-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]]
-; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP31:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; PREDICATED_DATA-WITH-EVL-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP27:%.*]] = icmp eq i32 [[INDEX_EVL_NEXT]], 1024
+; PREDICATED_DATA-WITH-EVL-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; PREDICATED_DATA-WITH-EVL: middle.block:
; PREDICATED_DATA-WITH-EVL-NEXT: br label [[FOR_END:%.*]]
; PREDICATED_DATA-WITH-EVL: scalar.ph:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll
index 1e26d18059836..5b4155cfdad36 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll
@@ -21,7 +21,6 @@ define void @truncate_to_minimal_bitwidths_widen_cast_recipe(ptr %src) {
; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 8
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[AVL:%.*]] = sub i64 9, [[EVL_BASED_IV]]
; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true)
@@ -34,9 +33,8 @@ define void @truncate_to_minimal_bitwidths_widen_cast_recipe(ptr %src) {
; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> [[TMP14]], <vscale x 8 x ptr> align 1 zeroinitializer, <vscale x 8 x i1> splat (i1 true), i32 [[TMP7]])
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP9]], [[EVL_BASED_IV]]
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
-; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 9
+; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[SCALAR_PH]]:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/type-info-cache-evl-crash.ll b/llvm/test/Transforms/LoopVectorize/RISCV/type-info-cache-evl-crash.ll
index 0e1ec57dac3d5..49f34e9967e38 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/type-info-cache-evl-crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/type-info-cache-evl-crash.ll
@@ -32,7 +32,6 @@ define void @type_info_cache_clobber(ptr %dstv, ptr %src, i64 %wide.trip.count)
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 8 x ptr> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[AVL:%.*]] = sub i64 [[TMP0]], [[EVL_BASED_IV]]
; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true)
@@ -50,8 +49,7 @@ define void @type_info_cache_clobber(ptr %dstv, ptr %src, i64 %wide.trip.count)
; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> [[TMP19]], <vscale x 8 x ptr> align 2 zeroinitializer, <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]])
; CHECK-NEXT: [[TMP20:%.*]] = zext i32 [[TMP11]] to i64
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP20]], [[EVL_BASED_IV]]
-; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]]
-; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[TMP0]]
; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[EXIT:.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-bin-unary-ops-args.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-bin-unary-ops-args.ll
index f90c450c0bf14..b7b9b519bca31 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-bin-unary-ops-args.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-bin-unary-ops-args.ll
@@ -34,7 +34,6 @@ define void @test_and(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 16
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
@@ -45,9 +44,8 @@ define void @test_and(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP16]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
-; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100
+; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]]
; IF-EVL: [[SCALAR_PH]]:
@@ -125,7 +123,6 @@ define void @test_or(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 16
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
@@ -136,9 +133,8 @@ define void @test_or(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP16]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
-; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100
+; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]]
; IF-EVL: [[SCALAR_PH]]:
@@ -216,7 +212,6 @@ define void @test_xor(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 16
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
@@ -227,9 +222,8 @@ define void @test_xor(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP16]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
-; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100
+; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]]
; IF-EVL: [[SCALAR_PH]]:
@@ -307,7 +301,6 @@ define void @test_shl(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 16
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
@@ -318,9 +311,8 @@ define void @test_shl(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP16]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
-; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
+; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100
+; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]]
; IF-EVL: [[SCALAR_PH]]:
@@ -398,7 +390,6 @@ define void @test_lshr(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 16
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
@@ -409,9 +400,8 @@ define void @test_lshr(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP16]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
-; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
+; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100
+; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]]
; IF-EVL: [[SCALAR_PH]]:
@@ -489,7 +479,6 @@ define void @test_ashr(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 16
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
@@ -500,9 +489,8 @@ define void @test_ashr(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP16]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
-; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
+; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100
+; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]]
; IF-EVL: [[SCALAR_PH]]:
@@ -580,7 +568,6 @@ define void @test_add(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 16
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
@@ -591,9 +578,8 @@ define void @test_add(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP16]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
-; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
+; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100
+; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]]
; IF-EVL: [[SCALAR_PH]]:
@@ -671,7 +657,6 @@ define void @test_sub(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 16
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
@@ -682,9 +667,8 @@ define void @test_sub(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP16]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
-; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
+; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100
+; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]]
; IF-EVL: [[SCALAR_PH]]:
@@ -762,7 +746,6 @@ define void @test_mul(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 16
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
@@ -773,9 +756,8 @@ define void @test_mul(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP16]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
-; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
+; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100
+; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]]
; IF-EVL: [[SCALAR_PH]]:
@@ -853,7 +835,6 @@ define void @test_sdiv(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 16
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
@@ -864,9 +845,8 @@ define void @test_sdiv(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP16]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
-; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
+; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100
+; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]]
; IF-EVL: [[SCALAR_PH]]:
@@ -944,7 +924,6 @@ define void @test_udiv(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 16
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
@@ -955,9 +934,8 @@ define void @test_udiv(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP16]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
-; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
+; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100
+; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]]
; IF-EVL: [[SCALAR_PH]]:
@@ -1035,7 +1013,6 @@ define void @test_srem(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 16
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
@@ -1046,9 +1023,8 @@ define void @test_srem(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP16]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
-; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
+; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100
+; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]]
; IF-EVL: [[SCALAR_PH]]:
@@ -1126,7 +1102,6 @@ define void @test_urem(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 16
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
@@ -1137,9 +1112,8 @@ define void @test_urem(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP16]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
-; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
+; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100
+; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]]
; IF-EVL: [[SCALAR_PH]]:
@@ -1220,7 +1194,6 @@ define void @test_fadd(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP11:%.*]] = sub i64 100, [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP11]], i32 4, i1 true)
@@ -1231,9 +1204,8 @@ define void @test_fadd(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[VP_OP]], ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP19]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]]
-; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]]
+; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100
+; IF-EVL-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]]
; IF-EVL: [[SCALAR_PH]]:
@@ -1312,7 +1284,6 @@ define void @test_fsub(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP11:%.*]] = sub i64 100, [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP11]], i32 4, i1 true)
@@ -1323,9 +1294,8 @@ define void @test_fsub(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[VP_OP]], ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP19]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]]
-; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]]
+; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100
+; IF-EVL-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]]
; IF-EVL: [[SCALAR_PH]]:
@@ -1404,7 +1374,6 @@ define void @test_fmul(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP11:%.*]] = sub i64 100, [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP11]], i32 4, i1 true)
@@ -1415,9 +1384,8 @@ define void @test_fmul(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[VP_OP]], ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP19]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]]
-; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP33:![0-9]+]]
+; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100
+; IF-EVL-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP33:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]]
; IF-EVL: [[SCALAR_PH]]:
@@ -1496,7 +1464,6 @@ define void @test_fdiv(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP11:%.*]] = sub i64 100, [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP11]], i32 4, i1 true)
@@ -1507,9 +1474,8 @@ define void @test_fdiv(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[VP_OP]], ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP19]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]]
-; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP35:![0-9]+]]
+; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100
+; IF-EVL-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP35:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]]
; IF-EVL: [[SCALAR_PH]]:
@@ -1641,7 +1607,6 @@ define void @test_fneg(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP11:%.*]] = sub i64 100, [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP11]], i32 4, i1 true)
@@ -1652,9 +1617,8 @@ define void @test_fneg(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[VP_OP]], ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP19]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]]
-; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP37:![0-9]+]]
+; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100
+; IF-EVL-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP37:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]]
; IF-EVL: [[SCALAR_PH]]:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-call-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-call-intrinsics.ll
index dfa01a5d71e33..66d7ae72445c7 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-call-intrinsics.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-call-intrinsics.ll
@@ -39,7 +39,6 @@ define void @vp_smax(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
@@ -52,9 +51,8 @@ define void @vp_smax(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP29]], ptr align 4 [[TMP16]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP9]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
-; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
+; IF-EVL-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br label %[[EXIT:.*]]
; IF-EVL: [[SCALAR_PH]]:
@@ -145,7 +143,6 @@ define void @vp_smin(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
@@ -158,9 +155,8 @@ define void @vp_smin(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP29]], ptr align 4 [[TMP16]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP9]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
-; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
+; IF-EVL-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br label %[[EXIT:.*]]
; IF-EVL: [[SCALAR_PH]]:
@@ -251,7 +247,6 @@ define void @vp_umax(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
@@ -264,9 +259,8 @@ define void @vp_umax(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP29]], ptr align 4 [[TMP16]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP9]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
-; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
+; IF-EVL-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br label %[[EXIT:.*]]
; IF-EVL: [[SCALAR_PH]]:
@@ -357,7 +351,6 @@ define void @vp_umin(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
@@ -370,9 +363,8 @@ define void @vp_umin(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP29]], ptr align 4 [[TMP16]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP9]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
-; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
+; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
+; IF-EVL-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br label %[[EXIT:.*]]
; IF-EVL: [[SCALAR_PH]]:
@@ -459,7 +451,6 @@ define void @vp_ctlz(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
@@ -470,8 +461,7 @@ define void @vp_ctlz(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP24]], ptr align 4 [[TMP14]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
-; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br label %[[EXIT:.*]]
@@ -552,7 +542,6 @@ define void @vp_cttz(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 4
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP13:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
@@ -563,8 +552,7 @@ define void @vp_cttz(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP17]], ptr align 4 [[TMP18]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP13]])
; IF-EVL-NEXT: [[TMP20:%.*]] = zext i32 [[TMP13]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP20]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP12]]
-; IF-EVL-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br label %[[EXIT:.*]]
@@ -645,7 +633,6 @@ define void @vp_lrint(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
@@ -658,8 +645,7 @@ define void @vp_lrint(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP15]], ptr align 4 [[TMP16]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP9]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
-; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br label %[[EXIT:.*]]
@@ -746,7 +732,6 @@ define void @vp_llrint(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
@@ -759,8 +744,7 @@ define void @vp_llrint(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP15]], ptr align 4 [[TMP16]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP9]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
-; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br label %[[EXIT:.*]]
@@ -847,7 +831,6 @@ define void @vp_abs(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
@@ -858,8 +841,7 @@ define void @vp_abs(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP24]], ptr align 4 [[TMP14]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
-; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br label %[[EXIT:.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cast-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cast-intrinsics.ll
index 71391704c653c..e053d6883f4aa 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cast-intrinsics.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cast-intrinsics.ll
@@ -34,7 +34,6 @@ define void @vp_sext(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 2
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
@@ -45,8 +44,7 @@ define void @vp_sext(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP16]], ptr align 8 [[TMP17]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP12]]), !alias.scope [[META3:![0-9]+]], !noalias [[META0]]
; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP19]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP11]]
-; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br label %[[EXIT:.*]]
@@ -126,7 +124,6 @@ define void @vp_zext(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 2
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
@@ -137,8 +134,7 @@ define void @vp_zext(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP16]], ptr align 8 [[TMP17]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP12]]), !alias.scope [[META13:![0-9]+]], !noalias [[META10]]
; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP19]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP11]]
-; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br label %[[EXIT:.*]]
@@ -218,7 +214,6 @@ define void @vp_trunc(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 2
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
@@ -229,8 +224,7 @@ define void @vp_trunc(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i32.p0(<vscale x 2 x i32> [[TMP16]], ptr align 4 [[TMP17]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP12]]), !alias.scope [[META20:![0-9]+]], !noalias [[META17]]
; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP19]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP11]]
-; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br label %[[EXIT:.*]]
@@ -310,7 +304,6 @@ define void @vp_fpext(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 2
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
@@ -321,8 +314,7 @@ define void @vp_fpext(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv2f64.p0(<vscale x 2 x double> [[TMP16]], ptr align 8 [[TMP17]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP12]]), !alias.scope [[META27:![0-9]+]], !noalias [[META24]]
; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP19]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP11]]
-; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br label %[[EXIT:.*]]
@@ -402,7 +394,6 @@ define void @vp_fptrunc(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 2
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
@@ -413,8 +404,7 @@ define void @vp_fptrunc(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv2f32.p0(<vscale x 2 x float> [[TMP16]], ptr align 4 [[TMP17]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP12]]), !alias.scope [[META34:![0-9]+]], !noalias [[META31]]
; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP19]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP11]]
-; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br label %[[EXIT:.*]]
@@ -494,7 +484,6 @@ define void @vp_sitofp(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP12]], 4
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
@@ -505,8 +494,7 @@ define void @vp_sitofp(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[TMP18]], ptr align 4 [[TMP19]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP14]])
; IF-EVL-NEXT: [[TMP21:%.*]] = zext i32 [[TMP14]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP13]]
-; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br label %[[EXIT:.*]]
@@ -586,7 +574,6 @@ define void @vp_uitofp(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP12]], 4
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
@@ -597,8 +584,7 @@ define void @vp_uitofp(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[TMP18]], ptr align 4 [[TMP19]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP14]])
; IF-EVL-NEXT: [[TMP21:%.*]] = zext i32 [[TMP14]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP13]]
-; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br label %[[EXIT:.*]]
@@ -678,7 +664,6 @@ define void @vp_fptosi(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP12]], 4
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
@@ -689,8 +674,7 @@ define void @vp_fptosi(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP18]], ptr align 4 [[TMP19]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP14]])
; IF-EVL-NEXT: [[TMP21:%.*]] = zext i32 [[TMP14]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP13]]
-; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br label %[[EXIT:.*]]
@@ -770,7 +754,6 @@ define void @vp_fptoui(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP12]], 4
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
@@ -781,8 +764,7 @@ define void @vp_fptoui(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP18]], ptr align 4 [[TMP19]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP14]])
; IF-EVL-NEXT: [[TMP21:%.*]] = zext i32 [[TMP14]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP13]]
-; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br label %[[EXIT:.*]]
@@ -862,7 +844,6 @@ define void @vp_inttoptr(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP12]], 2
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
@@ -873,8 +854,7 @@ define void @vp_inttoptr(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv2p0.p0(<vscale x 2 x ptr> [[TMP18]], ptr align 8 [[TMP19]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP14]])
; IF-EVL-NEXT: [[TMP21:%.*]] = zext i32 [[TMP14]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP13]]
-; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br label %[[EXIT:.*]]
@@ -948,7 +928,6 @@ define void @vp_ptrtoint(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP10]]
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
@@ -963,9 +942,8 @@ define void @vp_ptrtoint(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP15]], ptr align 8 [[TMP16]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
; IF-EVL-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
-; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br label %[[EXIT:.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll
index f8e8435b0eb4c..d0d63dfdc3595 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll
@@ -38,7 +38,6 @@ define i32 @cond_add(ptr %a, i64 %n, i32 %start) {
; IF-EVL-OUTLOOP-NEXT: [[TMP9:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0
; IF-EVL-OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL-OUTLOOP: vector.body:
-; IF-EVL-OUTLOOP-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-OUTLOOP-NEXT: [[EVL_BASED_IV1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT1:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[TMP9]], [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-OUTLOOP-NEXT: [[TMP10:%.*]] = sub i64 [[N]], [[EVL_BASED_IV1]]
@@ -51,9 +50,8 @@ define i32 @cond_add(ptr %a, i64 %n, i32 %start) {
; IF-EVL-OUTLOOP-NEXT: [[TMP20]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[VP_OP]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP11]])
; IF-EVL-OUTLOOP-NEXT: [[TMP22:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT1]] = add i64 [[TMP22]], [[EVL_BASED_IV1]]
-; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[EVL_BASED_IV]], [[TMP8]]
-; IF-EVL-OUTLOOP-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N_VEC]]
-; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; IF-EVL-OUTLOOP-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT1]], [[N]]
+; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; IF-EVL-OUTLOOP: middle.block:
; IF-EVL-OUTLOOP-NEXT: [[TMP24:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP20]])
; IF-EVL-OUTLOOP-NEXT: br label [[FOR_END:%.*]]
@@ -91,7 +89,6 @@ define i32 @cond_add(ptr %a, i64 %n, i32 %start) {
; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4
; IF-EVL-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL-INLOOP: vector.body:
-; IF-EVL-INLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-INLOOP-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-INLOOP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START]], [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-INLOOP-NEXT: [[TMP11:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
@@ -104,9 +101,8 @@ define i32 @cond_add(ptr %a, i64 %n, i32 %start) {
; IF-EVL-INLOOP-NEXT: [[TMP22]] = add i32 [[TMP21]], [[VEC_PHI]]
; IF-EVL-INLOOP-NEXT: [[TMP23:%.*]] = zext i32 [[TMP12]] to i64
; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP23]], [[EVL_BASED_IV]]
-; IF-EVL-INLOOP-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]]
-; IF-EVL-INLOOP-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-INLOOP-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; IF-EVL-INLOOP-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
+; IF-EVL-INLOOP-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; IF-EVL-INLOOP: middle.block:
; IF-EVL-INLOOP-NEXT: br label [[FOR_END:%.*]]
; IF-EVL-INLOOP: scalar.ph:
@@ -266,7 +262,6 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) {
; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; IF-EVL-OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL-OUTLOOP: vector.body:
-; IF-EVL-OUTLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-OUTLOOP-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[TMP9]], [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-OUTLOOP-NEXT: [[TMP10:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
@@ -286,9 +281,8 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) {
; IF-EVL-OUTLOOP-NEXT: [[PREDPHI]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[PREDPHI1]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP11]])
; IF-EVL-OUTLOOP-NEXT: [[TMP23:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP23]], [[EVL_BASED_IV]]
-; IF-EVL-OUTLOOP-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
-; IF-EVL-OUTLOOP-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; IF-EVL-OUTLOOP-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
+; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; IF-EVL-OUTLOOP: middle.block:
; IF-EVL-OUTLOOP-NEXT: [[TMP27:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[PREDPHI]])
; IF-EVL-OUTLOOP-NEXT: br label [[FOR_END:%.*]]
@@ -330,7 +324,6 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) {
; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4
; IF-EVL-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL-INLOOP: vector.body:
-; IF-EVL-INLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-INLOOP-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-INLOOP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START]], [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-INLOOP-NEXT: [[TMP11:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
@@ -342,9 +335,8 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) {
; IF-EVL-INLOOP-NEXT: [[TMP22]] = add i32 [[TMP21]], [[VEC_PHI]]
; IF-EVL-INLOOP-NEXT: [[TMP23:%.*]] = zext i32 [[TMP12]] to i64
; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP23]], [[EVL_BASED_IV]]
-; IF-EVL-INLOOP-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]]
-; IF-EVL-INLOOP-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-INLOOP-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; IF-EVL-INLOOP-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
+; IF-EVL-INLOOP-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; IF-EVL-INLOOP: middle.block:
; IF-EVL-INLOOP-NEXT: br label [[FOR_END:%.*]]
; IF-EVL-INLOOP: scalar.ph:
@@ -522,7 +514,6 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) {
; IF-EVL-OUTLOOP-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i32> zeroinitializer, [[TMP11]]
; IF-EVL-OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL-OUTLOOP: vector.body:
-; IF-EVL-OUTLOOP-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-OUTLOOP-NEXT: [[EVL_BASED_IV1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT1:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[TMP9]], [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-OUTLOOP-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -539,9 +530,8 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) {
; IF-EVL-OUTLOOP-NEXT: [[TMP19]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP18]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP12]])
; IF-EVL-OUTLOOP-NEXT: [[TMP20:%.*]] = zext i32 [[TMP12]] to i64
; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT1]] = add i64 [[TMP20]], [[EVL_BASED_IV1]]
-; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[EVL_BASED_IV]], [[TMP8]]
; IF-EVL-OUTLOOP-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
-; IF-EVL-OUTLOOP-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N_VEC]]
+; IF-EVL-OUTLOOP-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT1]], [[N]]
; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; IF-EVL-OUTLOOP: middle.block:
; IF-EVL-OUTLOOP-NEXT: [[TMP22:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP19]])
@@ -584,7 +574,6 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) {
; IF-EVL-INLOOP-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i32> zeroinitializer, [[TMP10]]
; IF-EVL-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL-INLOOP: vector.body:
-; IF-EVL-INLOOP-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-INLOOP-NEXT: [[EVL_BASED_IV1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT1:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-INLOOP-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], [[VECTOR_PH]] ], [ [[ADD:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-INLOOP-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -601,10 +590,9 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) {
; IF-EVL-INLOOP-NEXT: [[ADD]] = add i32 [[TMP17]], [[RDX]]
; IF-EVL-INLOOP-NEXT: [[TMP19:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT1]] = add i64 [[TMP19]], [[EVL_BASED_IV1]]
-; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[EVL_BASED_IV]], [[TMP8]]
; IF-EVL-INLOOP-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
-; IF-EVL-INLOOP-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N_VEC]]
-; IF-EVL-INLOOP-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; IF-EVL-INLOOP-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT1]], [[N]]
+; IF-EVL-INLOOP-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; IF-EVL-INLOOP: middle.block:
; IF-EVL-INLOOP-NEXT: br label [[FOR_END:%.*]]
; IF-EVL-INLOOP: scalar.ph:
@@ -792,7 +780,6 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) {
; IF-EVL-OUTLOOP-NEXT: [[INDUCTION1:%.*]] = add <vscale x 4 x i32> zeroinitializer, [[TMP13]]
; IF-EVL-OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL-OUTLOOP: vector.body:
-; IF-EVL-OUTLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-OUTLOOP-NEXT: [[IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-OUTLOOP-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[TMP9]], [[VECTOR_PH]] ], [ [[TMP24:%.*]], [[VECTOR_BODY]] ]
@@ -816,10 +803,9 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) {
; IF-EVL-OUTLOOP-NEXT: [[TMP24]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[PREDPHI]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP14]])
; IF-EVL-OUTLOOP-NEXT: [[TMP25:%.*]] = zext i32 [[TMP14]] to i64
; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP25]], [[IV]]
-; IF-EVL-OUTLOOP-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
; IF-EVL-OUTLOOP-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT6]]
; IF-EVL-OUTLOOP-NEXT: [[VEC_IND_NEXT7]] = add <vscale x 4 x i32> [[VEC_IND2]], [[BROADCAST_SPLAT4]]
-; IF-EVL-OUTLOOP-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-OUTLOOP-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK1:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; IF-EVL-OUTLOOP: middle.block:
; IF-EVL-OUTLOOP-NEXT: [[TMP27:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP24]])
@@ -866,7 +852,6 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) {
; IF-EVL-INLOOP-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i32> zeroinitializer, [[TMP10]]
; IF-EVL-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL-INLOOP: vector.body:
-; IF-EVL-INLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-INLOOP-NEXT: [[IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-INLOOP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START]], [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-INLOOP-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -882,9 +867,8 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) {
; IF-EVL-INLOOP-NEXT: [[TMP17]] = add i32 [[TMP16]], [[VEC_PHI]]
; IF-EVL-INLOOP-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[IV]]
-; IF-EVL-INLOOP-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
; IF-EVL-INLOOP-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
-; IF-EVL-INLOOP-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-INLOOP-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-INLOOP-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK1:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; IF-EVL-INLOOP: middle.block:
; IF-EVL-INLOOP-NEXT: br label [[FOR_END:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-div.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-div.ll
index 0a66ce8fd5896..160dbd61ffbfe 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-div.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-div.ll
@@ -25,7 +25,6 @@ define void @test_sdiv(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
; IF-EVL-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 1024, [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
@@ -39,8 +38,7 @@ define void @test_sdiv(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP]], ptr align 8 [[TMP12]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
; IF-EVL-NEXT: [[TMP14:%.*]] = zext i32 [[TMP5]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP14]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
-; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1024
; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br label %[[EXIT:.*]]
@@ -118,7 +116,6 @@ define void @test_udiv(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
; IF-EVL-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 1024, [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
@@ -132,8 +129,7 @@ define void @test_udiv(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP]], ptr align 8 [[TMP12]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
; IF-EVL-NEXT: [[TMP14:%.*]] = zext i32 [[TMP5]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP14]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
-; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1024
; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br label %[[EXIT:.*]]
@@ -210,7 +206,6 @@ define void @test_srem(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
; IF-EVL-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 1024, [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
@@ -224,8 +219,7 @@ define void @test_srem(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP]], ptr align 8 [[TMP12]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
; IF-EVL-NEXT: [[TMP14:%.*]] = zext i32 [[TMP5]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP14]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
-; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1024
; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br label %[[EXIT:.*]]
@@ -302,7 +296,6 @@ define void @test_urem(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
; IF-EVL-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 1024, [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
@@ -316,8 +309,7 @@ define void @test_urem(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP]], ptr align 8 [[TMP12]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
; IF-EVL-NEXT: [[TMP14:%.*]] = zext i32 [[TMP5]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP14]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
-; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1024
; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br label %[[EXIT:.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-fixed-order-recurrence.ll
index 7c8f111a0d52e..46c36ff4cecfa 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-fixed-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-fixed-order-recurrence.ll
@@ -32,7 +32,6 @@ define void @first_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) {
; IF-EVL-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 33, i32 [[TMP11]]
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 4 x i32> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[VP_OP_LOAD:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[PREV_EVL:%.*]] = phi i32 [ [[TMP25]], %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[VECTOR_BODY]] ]
@@ -46,8 +45,7 @@ define void @first_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_OP]], ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP19]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
-; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[TC]]
; IF-EVL-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br label %[[FOR_END:.*]]
@@ -169,7 +167,6 @@ define void @second_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) {
; IF-EVL-NEXT: [[VECTOR_RECUR_INIT1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 22, i32 [[TMP14]]
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 4 x i32> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[VP_OP_LOAD:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VECTOR_RECUR2:%.*]] = phi <vscale x 4 x i32> [ [[VECTOR_RECUR_INIT1]], %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ]
@@ -185,8 +182,7 @@ define void @second_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_OP]], ptr align 4 [[TMP21]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP15]])
; IF-EVL-NEXT: [[TMP23:%.*]] = zext i32 [[TMP15]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP23]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
-; IF-EVL-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[TC]]
; IF-EVL-NEXT: br i1 [[TMP24]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br label %[[FOR_END:.*]]
@@ -327,7 +323,6 @@ define void @third_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) {
; IF-EVL-NEXT: [[VECTOR_RECUR_INIT3:%.*]] = insertelement <vscale x 4 x i32> poison, i32 11, i32 [[TMP17]]
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 4 x i32> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[VP_OP_LOAD:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VECTOR_RECUR2:%.*]] = phi <vscale x 4 x i32> [ [[VECTOR_RECUR_INIT1]], %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ]
@@ -346,8 +341,7 @@ define void @third_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_OP5]], ptr align 4 [[TMP25]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP18]])
; IF-EVL-NEXT: [[TMP27:%.*]] = zext i32 [[TMP18]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP27]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
-; IF-EVL-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[TC]]
; IF-EVL-NEXT: br i1 [[TMP28]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br label %[[FOR_END:.*]]
@@ -602,7 +596,6 @@ define void @first_order_recurrence_indvar(ptr noalias %A, i64 %TC) {
; IF-EVL-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 33, i32 [[TMP10]]
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 2 x i64> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[TMP20:%.*]], %[[VECTOR_BODY]] ]
@@ -619,10 +612,9 @@ define void @first_order_recurrence_indvar(ptr noalias %A, i64 %TC) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP15]], ptr align 8 [[TMP9]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[TMP21:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]]
; IF-EVL-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
-; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
+; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[TC]]
+; IF-EVL-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br label %[[FOR_END:.*]]
; IF-EVL: [[SCALAR_PH]]:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-gather-scatter.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-gather-scatter.ll
index 2b7a9fbdfd801..0f8dc47c25517 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-gather-scatter.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-gather-scatter.ll
@@ -27,7 +27,6 @@ define void @gather_scatter(ptr noalias %in, ptr noalias %out, ptr noalias %inde
; IF-EVL-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP10]]
; IF-EVL-NEXT: br label [[FOR_BODY:%.*]]
; IF-EVL: vector.body:
-; IF-EVL-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDEX_EVL_NEXT:%.*]], [[FOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[FOR_BODY]] ]
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
@@ -44,9 +43,8 @@ define void @gather_scatter(ptr noalias %in, ptr noalias %out, ptr noalias %inde
; IF-EVL-NEXT: call void @llvm.vp.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> [[WIDE_MASKED_GATHER2]], <vscale x 2 x ptr> align 4 [[TMP16]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[TMP17:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP17]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], [[TMP8]]
; IF-EVL-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
-; IF-EVL-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; IF-EVL: middle.block:
; IF-EVL-NEXT: br label [[FOR_END:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-inloop-reduction.ll
index a7955a55b7956..4162058d64d86 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-inloop-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-inloop-reduction.ll
@@ -26,7 +26,6 @@ define i32 @add(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL: vector.body:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
@@ -37,9 +36,8 @@ define i32 @add(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[TMP15]] = add i32 [[TMP14]], [[VEC_PHI]]
; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP10]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
-; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; IF-EVL-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
+; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; IF-EVL: middle.block:
; IF-EVL-NEXT: br label [[FOR_END:%.*]]
; IF-EVL: scalar.ph:
@@ -243,7 +241,6 @@ define i32 @or(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL: vector.body:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
@@ -254,9 +251,8 @@ define i32 @or(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[TMP15]] = or i32 [[TMP14]], [[VEC_PHI]]
; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP10]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
-; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; IF-EVL-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
+; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; IF-EVL: middle.block:
; IF-EVL-NEXT: br label [[FOR_END:%.*]]
; IF-EVL: scalar.ph:
@@ -352,7 +348,6 @@ define i32 @and(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL: vector.body:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
@@ -363,9 +358,8 @@ define i32 @and(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[TMP15]] = and i32 [[TMP14]], [[VEC_PHI]]
; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP10]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
-; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
+; IF-EVL-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
+; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; IF-EVL: middle.block:
; IF-EVL-NEXT: br label [[FOR_END:%.*]]
; IF-EVL: scalar.ph:
@@ -461,7 +455,6 @@ define i32 @xor(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL: vector.body:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
@@ -472,9 +465,8 @@ define i32 @xor(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[TMP15]] = xor i32 [[TMP14]], [[VEC_PHI]]
; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP10]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
-; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
+; IF-EVL-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
+; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
; IF-EVL: middle.block:
; IF-EVL-NEXT: br label [[FOR_END:%.*]]
; IF-EVL: scalar.ph:
@@ -570,7 +562,6 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL: vector.body:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
@@ -581,9 +572,8 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[RDX_MINMAX]] = call i32 @llvm.smin.i32(i32 [[TMP14]], i32 [[VEC_PHI]])
; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
-; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
+; IF-EVL-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
+; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
; IF-EVL: middle.block:
; IF-EVL-NEXT: br label [[FOR_END:%.*]]
; IF-EVL: scalar.ph:
@@ -682,7 +672,6 @@ define i32 @smax(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL: vector.body:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
@@ -693,9 +682,8 @@ define i32 @smax(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[RDX_MINMAX]] = call i32 @llvm.smax.i32(i32 [[TMP14]], i32 [[VEC_PHI]])
; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
-; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
+; IF-EVL-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
+; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
; IF-EVL: middle.block:
; IF-EVL-NEXT: br label [[FOR_END:%.*]]
; IF-EVL: scalar.ph:
@@ -794,7 +782,6 @@ define i32 @umin(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL: vector.body:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
@@ -805,9 +792,8 @@ define i32 @umin(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[RDX_MINMAX]] = call i32 @llvm.umin.i32(i32 [[TMP14]], i32 [[VEC_PHI]])
; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
-; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
+; IF-EVL-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
+; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
; IF-EVL: middle.block:
; IF-EVL-NEXT: br label [[FOR_END:%.*]]
; IF-EVL: scalar.ph:
@@ -906,7 +892,6 @@ define i32 @umax(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL: vector.body:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
@@ -917,9 +902,8 @@ define i32 @umax(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[RDX_MINMAX]] = call i32 @llvm.umax.i32(i32 [[TMP14]], i32 [[VEC_PHI]])
; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
-; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
+; IF-EVL-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
+; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
; IF-EVL: middle.block:
; IF-EVL-NEXT: br label [[FOR_END:%.*]]
; IF-EVL: scalar.ph:
@@ -1018,7 +1002,6 @@ define float @fadd(ptr %a, i64 %n, float %start) {
; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL: vector.body:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi float [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
@@ -1029,9 +1012,8 @@ define float @fadd(ptr %a, i64 %n, float %start) {
; IF-EVL-NEXT: [[TMP15]] = fadd reassoc float [[TMP14]], [[VEC_PHI]]
; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP10]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
-; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
+; IF-EVL-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
+; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
; IF-EVL: middle.block:
; IF-EVL-NEXT: br label [[FOR_END:%.*]]
; IF-EVL: scalar.ph:
@@ -1235,7 +1217,6 @@ define float @fmin(ptr %a, i64 %n, float %start) #0 {
; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL: vector.body:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi float [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX_SELECT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
@@ -1247,9 +1228,8 @@ define float @fmin(ptr %a, i64 %n, float %start) #0 {
; IF-EVL-NEXT: [[RDX_MINMAX_SELECT]] = select fast i1 [[RDX_MINMAX_CMP]], float [[TMP14]], float [[VEC_PHI]]
; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
-; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
+; IF-EVL-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
+; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
; IF-EVL: middle.block:
; IF-EVL-NEXT: br label [[FOR_END:%.*]]
; IF-EVL: scalar.ph:
@@ -1349,7 +1329,6 @@ define float @fmax(ptr %a, i64 %n, float %start) #0 {
; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL: vector.body:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi float [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX_SELECT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
@@ -1361,9 +1340,8 @@ define float @fmax(ptr %a, i64 %n, float %start) #0 {
; IF-EVL-NEXT: [[RDX_MINMAX_SELECT]] = select fast i1 [[RDX_MINMAX_CMP]], float [[TMP14]], float [[VEC_PHI]]
; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
-; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
+; IF-EVL-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
+; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
; IF-EVL: middle.block:
; IF-EVL-NEXT: br label [[FOR_END:%.*]]
; IF-EVL: scalar.ph:
@@ -1683,7 +1661,6 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) {
; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL: vector.body:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi float [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
@@ -1697,8 +1674,7 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) {
; IF-EVL-NEXT: [[TMP18]] = fadd reassoc float [[TMP17]], [[VEC_PHI]]
; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP10]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP19]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
-; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP33:![0-9]+]]
; IF-EVL: middle.block:
; IF-EVL-NEXT: br label [[FOR_END:%.*]]
@@ -1804,7 +1780,6 @@ define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) {
; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL: vector.body:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
@@ -1815,9 +1790,8 @@ define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) {
; IF-EVL-NEXT: [[TMP16]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> [[TMP14]], <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> [[VEC_PHI]], i32 [[TMP10]])
; IF-EVL-NEXT: [[TMP17:%.*]] = zext i32 [[TMP10]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP17]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
-; IF-EVL-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP35:![0-9]+]]
+; IF-EVL-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
+; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP35:![0-9]+]]
; IF-EVL: middle.block:
; IF-EVL-NEXT: [[TMP19:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP16]])
; IF-EVL-NEXT: [[TMP20:%.*]] = freeze i1 [[TMP19]]
@@ -1922,7 +1896,6 @@ define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) {
; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL: vector.body:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
@@ -1933,9 +1906,8 @@ define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) {
; IF-EVL-NEXT: [[TMP16]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> [[TMP14]], <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> [[VEC_PHI]], i32 [[TMP10]])
; IF-EVL-NEXT: [[TMP17:%.*]] = zext i32 [[TMP10]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP17]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
-; IF-EVL-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP37:![0-9]+]]
+; IF-EVL-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
+; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP37:![0-9]+]]
; IF-EVL: middle.block:
; IF-EVL-NEXT: [[TMP19:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP16]])
; IF-EVL-NEXT: [[TMP20:%.*]] = freeze i1 [[TMP19]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-interleave.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-interleave.ll
index b2ebe6f8e28e6..1af3d22e22987 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-interleave.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-interleave.ll
@@ -28,7 +28,6 @@ define void @interleave(ptr noalias %a, ptr noalias %b, i64 %N) {
; IF-EVL-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP12]]
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL: vector.body:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
@@ -42,14 +41,6 @@ define void @interleave(ptr noalias %a, ptr noalias %b, i64 %N) {
; IF-EVL-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], <vscale x 4 x i64> [[VEC_IND]], i32 1
; IF-EVL-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call <vscale x 4 x i32> @llvm.vp.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> align 4 [[TMP23]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[TMP26:%.*]] = add nsw <vscale x 4 x i32> [[WIDE_MASKED_GATHER5]], [[WIDE_MASKED_GATHER3]]
-; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]]
-; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP26]], ptr align 4 [[TMP18]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]])
-; IF-EVL-NEXT: [[TMP14:%.*]] = zext i32 [[TMP11]] to i64
-; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP14]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
-; IF-EVL-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT2]]
-; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; IF-EVL: middle.block:
; IF-EVL-NEXT: br label [[FOR_COND_CLEANUP:%.*]]
; IF-EVL: scalar.ph:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-intermediate-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-intermediate-store.ll
index 314c2013cc8a3..9e3c9a3bb113d 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-intermediate-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-intermediate-store.ll
@@ -46,7 +46,6 @@ define void @reduction_intermediate_store(ptr %a, i64 %n, i32 %start, ptr %addr)
; IF-EVL-OUTLOOP-NEXT: [[TMP10:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0
; IF-EVL-OUTLOOP-NEXT: br label [[FOR_BODY:%.*]]
; IF-EVL-OUTLOOP: vector.body:
-; IF-EVL-OUTLOOP-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
; IF-EVL-OUTLOOP-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDEX_EVL_NEXT:%.*]], [[FOR_BODY]] ]
; IF-EVL-OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[TMP10]], [[ENTRY]] ], [ [[TMP19:%.*]], [[FOR_BODY]] ]
; IF-EVL-OUTLOOP-NEXT: [[TMP11:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
@@ -57,9 +56,8 @@ define void @reduction_intermediate_store(ptr %a, i64 %n, i32 %start, ptr %addr)
; IF-EVL-OUTLOOP-NEXT: [[TMP19]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[VP_OP]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP12]])
; IF-EVL-OUTLOOP-NEXT: [[TMP21:%.*]] = zext i32 [[TMP12]] to i64
; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]]
-; IF-EVL-OUTLOOP-NEXT: [[IV_NEXT]] = add i64 [[IV]], [[TMP9]]
-; IF-EVL-OUTLOOP-NEXT: [[TMP22:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]]
-; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; IF-EVL-OUTLOOP-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
+; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; IF-EVL-OUTLOOP: middle.block:
; IF-EVL-OUTLOOP-NEXT: [[TMP23:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP19]])
; IF-EVL-OUTLOOP-NEXT: store i32 [[TMP23]], ptr [[ADDR]], align 4, !alias.scope [[META7:![0-9]+]], !noalias [[META0]]
@@ -104,7 +102,6 @@ define void @reduction_intermediate_store(ptr %a, i64 %n, i32 %start, ptr %addr)
; IF-EVL-INLOOP-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 4
; IF-EVL-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL-INLOOP: vector.body:
-; IF-EVL-INLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-INLOOP-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-INLOOP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START]], [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-INLOOP-NEXT: [[TMP13:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
@@ -115,9 +112,8 @@ define void @reduction_intermediate_store(ptr %a, i64 %n, i32 %start, ptr %addr)
; IF-EVL-INLOOP-NEXT: [[TMP22]] = add i32 [[TMP21]], [[VEC_PHI]]
; IF-EVL-INLOOP-NEXT: [[TMP23:%.*]] = zext i32 [[TMP14]] to i64
; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP23]], [[EVL_BASED_IV]]
-; IF-EVL-INLOOP-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP12]]
-; IF-EVL-INLOOP-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-INLOOP-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; IF-EVL-INLOOP-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
+; IF-EVL-INLOOP-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; IF-EVL-INLOOP: middle.block:
; IF-EVL-INLOOP-NEXT: store i32 [[TMP22]], ptr [[ADDR]], align 4, !alias.scope [[META7:![0-9]+]], !noalias [[META0]]
; IF-EVL-INLOOP-NEXT: br label [[FOR_END:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-iv32.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-iv32.ll
index 2c265c9a2a5ea..a7cf248cbebd8 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-iv32.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-iv32.ll
@@ -24,7 +24,6 @@ define void @iv32(ptr noalias %a, ptr noalias %b, i32 %N) {
; IF-EVL-NEXT: [[TMP10:%.*]] = mul nuw i32 [[TMP9]], 4
; IF-EVL-NEXT: br label [[FOR_BODY:%.*]]
; IF-EVL: vector.body:
-; IF-EVL-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_EVL_NEXT:%.*]], [[FOR_BODY]] ]
; IF-EVL-NEXT: [[TMP11:%.*]] = sub i32 [[N]], [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[TMP11]], i32 4, i1 true)
@@ -33,9 +32,8 @@ define void @iv32(ptr noalias %a, ptr noalias %b, i32 %N) {
; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[EVL_BASED_IV]]
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_OP_LOAD]], ptr align 4 [[TMP16]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i32 [[TMP12]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[IV_NEXT]] = add i32 [[IV]], [[TMP10]]
-; IF-EVL-NEXT: [[TMP18:%.*]] = icmp eq i32 [[IV_NEXT]], [[N_VEC]]
-; IF-EVL-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; IF-EVL-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_EVL_NEXT]], [[N]]
+; IF-EVL-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; IF-EVL: middle.block:
; IF-EVL-NEXT: br label [[FOR_COND_CLEANUP:%.*]]
; IF-EVL: scalar.ph:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-known-no-overflow.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-known-no-overflow.ll
index 2c77d783ecfac..332e2cfe5ae7e 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-known-no-overflow.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-known-no-overflow.ll
@@ -27,7 +27,6 @@ define void @trip_count_max_1024(ptr %p, i64 %tc) vscale_range(2, 1024) {
; CHECK-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[AVL:%.*]] = sub i64 [[UMAX]], [[EVL_BASED_IV]]
; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
@@ -37,9 +36,8 @@ define void @trip_count_max_1024(ptr %p, i64 %tc) vscale_range(2, 1024) {
; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP]], ptr align 8 [[TMP11]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP9]])
; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP9]] to i64
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP13]], [[EVL_BASED_IV]]
-; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
-; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[UMAX]]
+; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[EXIT_LOOPEXIT:.*]]
; CHECK: [[SCALAR_PH]]:
@@ -97,7 +95,6 @@ define void @overflow_at_0(ptr %p, i64 %tc) vscale_range(2, 1024) {
; CHECK-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[AVL:%.*]] = sub i64 [[TC]], [[EVL_BASED_IV]]
; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
@@ -107,9 +104,8 @@ define void @overflow_at_0(ptr %p, i64 %tc) vscale_range(2, 1024) {
; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP]], ptr align 8 [[TMP11]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP9]])
; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP9]] to i64
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP13]], [[EVL_BASED_IV]]
-; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
-; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[TC]]
+; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[EXIT_LOOPEXIT:.*]]
; CHECK: [[SCALAR_PH]]:
@@ -167,7 +163,6 @@ define void @no_overflow_at_0(ptr %p, i64 %tc) vscale_range(2, 1024) {
; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[AVL:%.*]] = sub i64 [[TC_ADD]], [[EVL_BASED_IV]]
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
@@ -177,8 +172,7 @@ define void @no_overflow_at_0(ptr %p, i64 %tc) vscale_range(2, 1024) {
; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP]], ptr align 8 [[TMP7]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP5]] to i64
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP9]], [[EVL_BASED_IV]]
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
-; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[TC_ADD]]
; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[EXIT_LOOPEXIT:.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-masked-loadstore.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-masked-loadstore.ll
index 1efd53919c939..f35985df6e6bb 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-masked-loadstore.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-masked-loadstore.ll
@@ -24,7 +24,6 @@ define void @masked_loadstore(ptr noalias %a, ptr noalias %b, i64 %n) {
; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL: vector.body:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP9]], i32 4, i1 true)
@@ -37,9 +36,8 @@ define void @masked_loadstore(ptr noalias %a, ptr noalias %b, i64 %n) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_OP]], ptr align 4 [[TMP19]], <vscale x 4 x i1> [[TMP17]], i32 [[TMP10]])
; IF-EVL-NEXT: [[TMP21:%.*]] = zext i32 [[TMP10]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
-; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; IF-EVL-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
+; IF-EVL-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; IF-EVL: middle.block:
; IF-EVL-NEXT: br label [[EXIT:%.*]]
; IF-EVL: scalar.ph:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-ordered-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-ordered-reduction.ll
index 81e0f75b18c31..7b02092a3a703 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-ordered-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-ordered-reduction.ll
@@ -26,7 +26,6 @@ define float @fadd(ptr noalias nocapture readonly %a, i64 %n) {
; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL: vector.body:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
@@ -36,9 +35,8 @@ define float @fadd(ptr noalias nocapture readonly %a, i64 %n) {
; IF-EVL-NEXT: [[TMP14]] = call float @llvm.vp.reduce.fadd.nxv4f32(float [[VEC_PHI]], <vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
-; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; IF-EVL-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
+; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; IF-EVL: middle.block:
; IF-EVL-NEXT: br label [[FOR_END:%.*]]
; IF-EVL: scalar.ph:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll
index 73fd3895f0be6..247fa48350ce1 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll
@@ -25,7 +25,6 @@ define i32 @add(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[TMP9:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START:%.*]], i32 0
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL: vector.body:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[TMP9]], [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
@@ -36,8 +35,7 @@ define i32 @add(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[TMP14]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[VP_OP]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP10]])
; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
-; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; IF-EVL: middle.block:
; IF-EVL-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP14]])
@@ -246,7 +244,6 @@ define i32 @or(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[TMP9:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START:%.*]], i32 0
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL: vector.body:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[TMP9]], [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
@@ -257,8 +254,7 @@ define i32 @or(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[TMP14]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[VP_OP]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP10]])
; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
-; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; IF-EVL: middle.block:
; IF-EVL-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> [[TMP14]])
@@ -358,7 +354,6 @@ define i32 @and(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[TMP9:%.*]] = insertelement <vscale x 4 x i32> splat (i32 -1), i32 [[START:%.*]], i32 0
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL: vector.body:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[TMP9]], [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
@@ -369,8 +364,7 @@ define i32 @and(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[TMP14]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[VP_OP]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP10]])
; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
-; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; IF-EVL: middle.block:
; IF-EVL-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> [[TMP14]])
@@ -470,7 +464,6 @@ define i32 @xor(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[TMP9:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START:%.*]], i32 0
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL: vector.body:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[TMP9]], [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
@@ -481,8 +474,7 @@ define i32 @xor(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[TMP14]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[VP_OP]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP10]])
; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
-; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
; IF-EVL: middle.block:
; IF-EVL-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> [[TMP14]])
@@ -583,7 +575,6 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL: vector.body:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
@@ -595,8 +586,7 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[TMP15]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP14]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
-; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
; IF-EVL: middle.block:
; IF-EVL-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> [[TMP15]])
@@ -702,7 +692,6 @@ define i32 @smax(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL: vector.body:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
@@ -714,8 +703,7 @@ define i32 @smax(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[TMP15]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP14]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
-; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
; IF-EVL: middle.block:
; IF-EVL-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> [[TMP15]])
@@ -821,7 +809,6 @@ define i32 @umin(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL: vector.body:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
@@ -833,8 +820,7 @@ define i32 @umin(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[TMP15]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP14]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
-; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
; IF-EVL: middle.block:
; IF-EVL-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> [[TMP15]])
@@ -940,7 +926,6 @@ define i32 @umax(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL: vector.body:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
@@ -952,8 +937,7 @@ define i32 @umax(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[TMP15]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP14]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
-; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
; IF-EVL: middle.block:
; IF-EVL-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> [[TMP15]])
@@ -1058,7 +1042,6 @@ define float @fadd(ptr %a, i64 %n, float %start) {
; IF-EVL-NEXT: [[TMP9:%.*]] = insertelement <vscale x 4 x float> splat (float -0.000000e+00), float [[START:%.*]], i32 0
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL: vector.body:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x float> [ [[TMP9]], [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
@@ -1069,8 +1052,7 @@ define float @fadd(ptr %a, i64 %n, float %start) {
; IF-EVL-NEXT: [[TMP14]] = call <vscale x 4 x float> @llvm.vp.merge.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[VP_OP]], <vscale x 4 x float> [[VEC_PHI]], i32 [[TMP10]])
; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
-; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
; IF-EVL: middle.block:
; IF-EVL-NEXT: [[TMP17:%.*]] = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> [[TMP14]])
@@ -1280,7 +1262,6 @@ define float @fmin(ptr %a, i64 %n, float %start) #0 {
; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x float> [[BROADCAST_SPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL: vector.body:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x float> [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
@@ -1292,8 +1273,7 @@ define float @fmin(ptr %a, i64 %n, float %start) #0 {
; IF-EVL-NEXT: [[TMP15]] = call <vscale x 4 x float> @llvm.vp.merge.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[TMP14]], <vscale x 4 x float> [[VEC_PHI]], i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
-; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
; IF-EVL: middle.block:
; IF-EVL-NEXT: [[TMP18:%.*]] = call fast float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> [[TMP15]])
@@ -1399,7 +1379,6 @@ define float @fmax(ptr %a, i64 %n, float %start) #0 {
; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x float> [[BROADCAST_SPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL: vector.body:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x float> [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
@@ -1411,8 +1390,7 @@ define float @fmax(ptr %a, i64 %n, float %start) #0 {
; IF-EVL-NEXT: [[TMP15]] = call <vscale x 4 x float> @llvm.vp.merge.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[TMP14]], <vscale x 4 x float> [[VEC_PHI]], i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
-; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
; IF-EVL: middle.block:
; IF-EVL-NEXT: [[TMP18:%.*]] = call fast float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> [[TMP15]])
@@ -1737,7 +1715,6 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) {
; IF-EVL-NEXT: [[TMP9:%.*]] = insertelement <vscale x 4 x float> splat (float -0.000000e+00), float [[START:%.*]], i32 0
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL: vector.body:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x float> [ [[TMP9]], [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
@@ -1750,8 +1727,7 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) {
; IF-EVL-NEXT: [[TMP17]] = call <vscale x 4 x float> @llvm.vp.merge.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[TMP16]], <vscale x 4 x float> [[VEC_PHI]], i32 [[TMP10]])
; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP10]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
-; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP33:![0-9]+]]
; IF-EVL: middle.block:
; IF-EVL-NEXT: [[TMP20:%.*]] = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> [[TMP17]])
@@ -1858,7 +1834,6 @@ define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) {
; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL: vector.body:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
@@ -1869,9 +1844,8 @@ define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) {
; IF-EVL-NEXT: [[TMP15]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> [[TMP13]], <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> [[VEC_PHI]], i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
-; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP35:![0-9]+]]
+; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
+; IF-EVL-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP35:![0-9]+]]
; IF-EVL: middle.block:
; IF-EVL-NEXT: [[TMP18:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP15]])
; IF-EVL-NEXT: [[TMP19:%.*]] = freeze i1 [[TMP18]]
@@ -1976,7 +1950,6 @@ define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) {
; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL: vector.body:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
@@ -1987,9 +1960,8 @@ define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) {
; IF-EVL-NEXT: [[TMP15]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> [[TMP13]], <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> [[VEC_PHI]], i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
-; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP37:![0-9]+]]
+; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
+; IF-EVL-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP37:![0-9]+]]
; IF-EVL: middle.block:
; IF-EVL-NEXT: [[TMP18:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP15]])
; IF-EVL-NEXT: [[TMP19:%.*]] = freeze i1 [[TMP18]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll
index 01033c88edf2a..ad301696d5b08 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll
@@ -24,7 +24,6 @@ define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %pt
; IF-EVL-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL: vector.body:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 1024, [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
@@ -50,9 +49,8 @@ define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %pt
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_REVERSE3]], ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
; IF-EVL-NEXT: [[TMP20:%.*]] = zext i32 [[TMP5]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP20]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
-; IF-EVL-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; IF-EVL-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1024
+; IF-EVL-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; IF-EVL: middle.block:
; IF-EVL-NEXT: br label [[LOOPEND:%.*]]
; IF-EVL: scalar.ph:
@@ -124,7 +122,6 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal
; IF-EVL-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL: vector.body:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 1024, [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
@@ -156,8 +153,7 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_REVERSE5]], ptr align 4 [[TMP25]], <vscale x 4 x i1> [[VP_REVERSE_MASK6]], i32 [[TMP5]])
; IF-EVL-NEXT: [[TMP28:%.*]] = zext i32 [[TMP5]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP28]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
-; IF-EVL-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1024
; IF-EVL-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; IF-EVL: middle.block:
; IF-EVL-NEXT: br label [[LOOPEND:%.*]]
@@ -256,7 +252,6 @@ define void @multiple_reverse_vector_pointer(ptr noalias %a, ptr noalias %b, ptr
; IF-EVL-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 16
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL: vector.body:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 1025, [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 16, i1 true)
@@ -292,9 +287,8 @@ define void @multiple_reverse_vector_pointer(ptr noalias %a, ptr noalias %b, ptr
; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_REVERSE2]], ptr align 1 [[TMP26]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP6]])
; IF-EVL-NEXT: [[TMP27:%.*]] = zext i32 [[TMP6]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP27]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
-; IF-EVL-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; IF-EVL-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1025
+; IF-EVL-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; IF-EVL: middle.block:
; IF-EVL-NEXT: br label [[EXIT:%.*]]
; IF-EVL: scalar.ph:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-safe-dep-distance.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-safe-dep-distance.ll
index 2dd017c63cd30..1517ab5ce1381 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-safe-dep-distance.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-safe-dep-distance.ll
@@ -27,7 +27,6 @@ define void @test(ptr %p) {
; IF-EVL-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL: vector.body:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP5:%.*]] = sub i64 200, [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP5]], i32 2, i1 true)
@@ -38,8 +37,7 @@ define void @test(ptr %p) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP_LOAD]], ptr align 8 [[TMP11]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP6]])
; IF-EVL-NEXT: [[TMP13:%.*]] = zext i32 [[TMP6]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP13]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
-; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 200
; IF-EVL-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; IF-EVL: middle.block:
; IF-EVL-NEXT: br label [[EXIT:%.*]]
@@ -332,7 +330,6 @@ define void @trivial_due_max_vscale(ptr %p) {
; IF-EVL-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL: vector.body:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP5:%.*]] = sub i64 200, [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP5]], i32 2, i1 true)
@@ -343,8 +340,7 @@ define void @trivial_due_max_vscale(ptr %p) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP_LOAD]], ptr align 32 [[TMP11]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP6]])
; IF-EVL-NEXT: [[TMP13:%.*]] = zext i32 [[TMP6]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP13]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
-; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 200
; IF-EVL-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; IF-EVL: middle.block:
; IF-EVL-NEXT: br label [[EXIT:%.*]]
@@ -412,7 +408,6 @@ define void @no_high_lmul_or_interleave(ptr %p) {
; IF-EVL-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL: vector.body:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 3002, [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP9:%.*]] = icmp ult i64 [[AVL]], 1024
@@ -425,8 +420,7 @@ define void @no_high_lmul_or_interleave(ptr %p) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv1i64.p0(<vscale x 1 x i64> [[VP_OP_LOAD]], ptr align 32 [[TMP5]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP10]])
; IF-EVL-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP11]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]]
-; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 3002
; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
; IF-EVL: middle.block:
; IF-EVL-NEXT: br label [[EXIT:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll
index 984b64c55ce16..a453050d7adb7 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll
@@ -25,7 +25,6 @@ define void @lshift_significand(i32 %n, ptr nocapture writeonly %dst) {
; CHECK-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 2
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[AVL:%.*]] = sub i64 [[TMP0]], [[EVL_BASED_IV]]
; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
@@ -42,8 +41,7 @@ define void @lshift_significand(i32 %n, ptr nocapture writeonly %dst) {
; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_REVERSE]], ptr align 8 [[TMP19]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP11]])
; CHECK-NEXT: [[TMP20:%.*]] = zext i32 [[TMP11]] to i64
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP20]], [[EVL_BASED_IV]]
-; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]]
-; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[TMP0]]
; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[EXIT:.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll
index 74eb0949b5eda..a0f5db7119e5e 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll
@@ -24,7 +24,6 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) {
; IF-EVL-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL: vector.body:
-; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP11:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP11]], i32 4, i1 true)
@@ -37,8 +36,7 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_OP]], ptr align 4 [[TMP18]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
; IF-EVL-NEXT: [[TMP20:%.*]] = zext i32 [[TMP12]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP20]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]]
-; IF-EVL-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; IF-EVL: middle.block:
; IF-EVL-NEXT: br label [[FOR_COND_CLEANUP:%.*]]
>From 20a125f3713b1ef2f2c4e829cbb4d053696299ee Mon Sep 17 00:00:00 2001
From: Shih-Po Hung <arcbbb at gmail.com>
Date: Sat, 12 Jul 2025 06:12:13 +0800
Subject: [PATCH 2/9] Update llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Co-authored-by: Florian Hahn <flo at fhahn.com>
---
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 603bad90ff816..fa4f2051b49b1 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2383,8 +2383,8 @@ void VPlanTransforms::simplifyEVLIVs(VPlan &Plan) {
EVLPhi->eraseFromParent();
// Find the latch-exiting block and convert to variable-length stepping.
- // Before: (branch-on-cond CanonicalIVInc, VectorTripCount)
- // After: (branch-on-cond EVLIVInc, TripCount)
+ // Before: (branch-on-count CanonicalIVInc, VectorTripCount)
+ // After: (branch-on-count EVLIVInc, TripCount)
auto FindLatchExiting = [](VPBasicBlock *Entry) {
auto Range =
VPBlockUtils::blocksOnly<VPBasicBlock>(vp_depth_first_shallow(Entry));
>From a4c920627cd53cc78460632980e3f0c49f5f4729 Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Thu, 17 Jul 2025 06:22:54 -0700
Subject: [PATCH 3/9] Address comments
---
.../Transforms/Vectorize/VPlanTransforms.cpp | 29 +++++++++----------
1 file changed, 13 insertions(+), 16 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index fa4f2051b49b1..a49d3c6dff6cc 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2385,24 +2385,21 @@ void VPlanTransforms::simplifyEVLIVs(VPlan &Plan) {
// Find the latch-exiting block and convert to variable-length stepping.
// Before: (branch-on-count CanonicalIVInc, VectorTripCount)
// After: (branch-on-count EVLIVInc, TripCount)
- auto FindLatchExiting = [](VPBasicBlock *Entry) {
- auto Range =
- VPBlockUtils::blocksOnly<VPBasicBlock>(vp_depth_first_shallow(Entry));
- auto It = find_if(Range, [&](VPBasicBlock *VPBB) {
- return any_of(VPBB->successors(),
- [&](VPBlockBase *Succ) { return Succ == Entry; });
- });
- return It != Range.end() ? *It : nullptr;
- };
- VPBasicBlock *LatchExiting = FindLatchExiting(Entry);
- assert(LatchExiting && "LatchExiting is not found");
+ auto Range =
+ VPBlockUtils::blocksOnly<VPBasicBlock>(vp_depth_first_shallow(Entry));
+ auto It = find_if(Range, [&](VPBasicBlock *VPBB) {
+ return any_of(VPBB->successors(),
+ [&](VPBlockBase *Succ) { return Succ == Entry; });
+ });
+ assert((It != Range.end()) && "LatchExiting is not found");
+ VPBasicBlock *LatchExiting = *It;
auto *LatchExitingBr = cast<VPInstruction>(LatchExiting->getTerminator());
VPValue *ScalarIVInc;
- if (!LatchExitingBr ||
- !match(LatchExitingBr,
- m_BranchOnCount(m_VPValue(ScalarIVInc),
- m_Specific(&Plan.getVectorTripCount()))))
- return;
+ assert(LatchExitingBr &&
+ match(LatchExitingBr,
+ m_BranchOnCount(m_VPValue(ScalarIVInc),
+ m_Specific(&Plan.getVectorTripCount()))) &&
+ "Unexpected terminator in EVL loop");
LatchExitingBr->setOperand(1, Plan.getTripCount());
ScalarIVInc->replaceAllUsesWith(EVLIncrement);
VPRecipeBase *IVIncR = ScalarIVInc->getDefiningRecipe();
>From 016194c517ef31a81ba981dd9b2e262e41da9385 Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Thu, 17 Jul 2025 08:27:11 -0700
Subject: [PATCH 4/9] Add assertion on multiple evl-phi
---
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 13 +++++++++++--
1 file changed, 11 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index a49d3c6dff6cc..053207c555f33 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2409,13 +2409,22 @@ void VPlanTransforms::simplifyEVLIVs(VPlan &Plan) {
};
// Find EVL loop entries by locating VPEVLBasedIVPHIRecipe
+ // There should be only one EVL PHI in the entire plan
+ VPEVLBasedIVPHIRecipe *EVLPhi = nullptr;
+ VPBasicBlock *EVLPhiBlock = nullptr;
+
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_shallow(Plan.getEntry())))
for (VPRecipeBase &R : VPBB->phis())
if (auto *PhiR = dyn_cast<VPEVLBasedIVPHIRecipe>(&R)) {
- ConvertEVLPhi(Plan, VPBB, PhiR);
- break;
+ assert(!EVLPhi && "Found multiple EVL PHIs - only one expected");
+ EVLPhi = PhiR;
+ EVLPhiBlock = VPBB;
}
+
+ // Process the single EVL PHI if found
+ if (EVLPhi)
+ ConvertEVLPhi(Plan, EVLPhiBlock, EVLPhi);
}
void VPlanTransforms::dropPoisonGeneratingRecipes(
>From d274ae3a83d00f255f29ee410323c1975891e1f2 Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Thu, 17 Jul 2025 15:33:36 -0700
Subject: [PATCH 5/9] early return and remove lambda
---
.../Transforms/Vectorize/VPlanTransforms.cpp | 84 +++++++++----------
1 file changed, 40 insertions(+), 44 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 053207c555f33..7063be6b1a927 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2370,61 +2370,57 @@ bool VPlanTransforms::tryAddExplicitVectorLength(
}
void VPlanTransforms::simplifyEVLIVs(VPlan &Plan) {
- auto ConvertEVLPhi = [](VPlan &Plan, VPBasicBlock *Entry,
- VPEVLBasedIVPHIRecipe *EVLPhi) {
- using namespace llvm::VPlanPatternMatch;
- VPValue *EVLIncrement = EVLPhi->getBackedgeValue();
-
- // Convert EVLPhi to concrete recipe.
- auto *ScalarR = VPBuilder(EVLPhi).createScalarPhi(
- {EVLPhi->getStartValue(), EVLIncrement}, EVLPhi->getDebugLoc(),
- "evl.based.iv");
- EVLPhi->replaceAllUsesWith(ScalarR);
- EVLPhi->eraseFromParent();
-
- // Find the latch-exiting block and convert to variable-length stepping.
- // Before: (branch-on-count CanonicalIVInc, VectorTripCount)
- // After: (branch-on-count EVLIVInc, TripCount)
- auto Range =
- VPBlockUtils::blocksOnly<VPBasicBlock>(vp_depth_first_shallow(Entry));
- auto It = find_if(Range, [&](VPBasicBlock *VPBB) {
- return any_of(VPBB->successors(),
- [&](VPBlockBase *Succ) { return Succ == Entry; });
- });
- assert((It != Range.end()) && "LatchExiting is not found");
- VPBasicBlock *LatchExiting = *It;
- auto *LatchExitingBr = cast<VPInstruction>(LatchExiting->getTerminator());
- VPValue *ScalarIVInc;
- assert(LatchExitingBr &&
- match(LatchExitingBr,
- m_BranchOnCount(m_VPValue(ScalarIVInc),
- m_Specific(&Plan.getVectorTripCount()))) &&
- "Unexpected terminator in EVL loop");
- LatchExitingBr->setOperand(1, Plan.getTripCount());
- ScalarIVInc->replaceAllUsesWith(EVLIncrement);
- VPRecipeBase *IVIncR = ScalarIVInc->getDefiningRecipe();
- VPRecipeBase *ScalarIV = IVIncR->getOperand(0)->getDefiningRecipe();
- IVIncR->eraseFromParent();
- ScalarIV->eraseFromParent();
- };
-
+ using namespace llvm::VPlanPatternMatch;
// Find EVL loop entries by locating VPEVLBasedIVPHIRecipe
// There should be only one EVL PHI in the entire plan
VPEVLBasedIVPHIRecipe *EVLPhi = nullptr;
- VPBasicBlock *EVLPhiBlock = nullptr;
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_shallow(Plan.getEntry())))
for (VPRecipeBase &R : VPBB->phis())
if (auto *PhiR = dyn_cast<VPEVLBasedIVPHIRecipe>(&R)) {
- assert(!EVLPhi && "Found multiple EVL PHIs - only one expected");
+ assert(!EVLPhi && "Found multiple EVL PHIs. Only one expected");
EVLPhi = PhiR;
- EVLPhiBlock = VPBB;
}
- // Process the single EVL PHI if found
- if (EVLPhi)
- ConvertEVLPhi(Plan, EVLPhiBlock, EVLPhi);
+ // Early return if no EVL PHI is found
+ if (!EVLPhi)
+ return;
+
+ VPBasicBlock *Entry = EVLPhi->getParent();
+ VPValue *EVLIncrement = EVLPhi->getBackedgeValue();
+
+ // Convert EVLPhi to concrete recipe.
+ auto *ScalarR =
+ VPBuilder(EVLPhi).createScalarPhi({EVLPhi->getStartValue(), EVLIncrement},
+ EVLPhi->getDebugLoc(), "evl.based.iv");
+ EVLPhi->replaceAllUsesWith(ScalarR);
+ EVLPhi->eraseFromParent();
+
+ // Find the latch-exiting block and convert to variable-length stepping.
+ // Before: (branch-on-count CanonicalIVInc, VectorTripCount)
+ // After: (branch-on-count EVLIVInc, TripCount)
+ auto Range =
+ VPBlockUtils::blocksOnly<VPBasicBlock>(vp_depth_first_shallow(Entry));
+ auto It = find_if(Range, [&Entry](VPBasicBlock *VPBB) {
+ return any_of(VPBB->successors(),
+ [&Entry](VPBlockBase *Succ) { return Succ == Entry; });
+ });
+ assert((It != Range.end()) && "LatchExiting is not found");
+ VPBasicBlock *LatchExiting = *It;
+ auto *LatchExitingBr = cast<VPInstruction>(LatchExiting->getTerminator());
+ VPValue *ScalarIVInc;
+ assert(LatchExitingBr &&
+ match(LatchExitingBr,
+ m_BranchOnCount(m_VPValue(ScalarIVInc),
+ m_Specific(&Plan.getVectorTripCount()))) &&
+ "Unexpected terminator in EVL loop");
+ LatchExitingBr->setOperand(1, Plan.getTripCount());
+ ScalarIVInc->replaceAllUsesWith(EVLIncrement);
+ VPRecipeBase *IVIncR = ScalarIVInc->getDefiningRecipe();
+ VPRecipeBase *ScalarIV = IVIncR->getOperand(0)->getDefiningRecipe();
+ IVIncR->eraseFromParent();
+ ScalarIV->eraseFromParent();
}
void VPlanTransforms::dropPoisonGeneratingRecipes(
>From f635d4dbca4c325b08e27a5e756eb624dd9b935b Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Mon, 21 Jul 2025 01:39:33 -0700
Subject: [PATCH 6/9] Fix for single-iteration EVL loop
---
.../Transforms/Vectorize/VPlanTransforms.cpp | 29 +++++---
.../LoopVectorize/simplify-evl-phi.ll | 68 +++++++++++++++++++
2 files changed, 88 insertions(+), 9 deletions(-)
create mode 100644 llvm/test/Transforms/LoopVectorize/simplify-evl-phi.ll
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 7063be6b1a927..fa5d16c8eca50 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2397,8 +2397,19 @@ void VPlanTransforms::simplifyEVLIVs(VPlan &Plan) {
EVLPhi->replaceAllUsesWith(ScalarR);
EVLPhi->eraseFromParent();
- // Find the latch-exiting block and convert to variable-length stepping.
- // Before: (branch-on-count CanonicalIVInc, VectorTripCount)
+ // Replace CanonicalIVInc with EVL-PHI increment
+ VPRecipeBase *CanonicalIV = &*Entry->begin();
+ assert(dyn_cast<VPPhi>(CanonicalIV) && "Unexpected canoincal iv");
+ VPValue *Backedge = CanonicalIV->getOperand(1);
+ Backedge->replaceAllUsesWith(EVLIncrement);
+
+ // Remove unused phi
+ VPRecipeBase *CanonicalIVIncrement = Backedge->getDefiningRecipe();
+ CanonicalIVIncrement->eraseFromParent();
+ CanonicalIV->eraseFromParent();
+
+ // Find the latch-exiting block and replace use of VectorTripCount
+ // Before: (branch-on-count EVLIVInc, VectorTripCount)
// After: (branch-on-count EVLIVInc, TripCount)
auto Range =
VPBlockUtils::blocksOnly<VPBasicBlock>(vp_depth_first_shallow(Entry));
@@ -2407,20 +2418,20 @@ void VPlanTransforms::simplifyEVLIVs(VPlan &Plan) {
[&Entry](VPBlockBase *Succ) { return Succ == Entry; });
});
assert((It != Range.end()) && "LatchExiting is not found");
+
VPBasicBlock *LatchExiting = *It;
+
auto *LatchExitingBr = cast<VPInstruction>(LatchExiting->getTerminator());
- VPValue *ScalarIVInc;
+
+ // Skip single-iteration loop region
+ if (match(LatchExitingBr, m_BranchOnCond(m_True())))
+ return;
assert(LatchExitingBr &&
match(LatchExitingBr,
- m_BranchOnCount(m_VPValue(ScalarIVInc),
+ m_BranchOnCount(m_VPValue(EVLIncrement),
m_Specific(&Plan.getVectorTripCount()))) &&
"Unexpected terminator in EVL loop");
LatchExitingBr->setOperand(1, Plan.getTripCount());
- ScalarIVInc->replaceAllUsesWith(EVLIncrement);
- VPRecipeBase *IVIncR = ScalarIVInc->getDefiningRecipe();
- VPRecipeBase *ScalarIV = IVIncR->getOperand(0)->getDefiningRecipe();
- IVIncR->eraseFromParent();
- ScalarIV->eraseFromParent();
}
void VPlanTransforms::dropPoisonGeneratingRecipes(
diff --git a/llvm/test/Transforms/LoopVectorize/simplify-evl-phi.ll b/llvm/test/Transforms/LoopVectorize/simplify-evl-phi.ll
new file mode 100644
index 0000000000000..36e8071b8874c
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/simplify-evl-phi.ll
@@ -0,0 +1,68 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt %s -S -mtriple riscv64 -passes=loop-vectorize --prefer-predicate-over-epilogue=predicate-dont-vectorize -force-tail-folding-style=data-with-evl -riscv-v-min-trip-count=0 -force-target-instruction-cost=1 -mattr=+v | FileCheck %s
+
+; Check canonical-iv is removed in single-iteration loop
+define void @foo(ptr %arg) #0 {
+; CHECK-LABEL: define void @foo(
+; CHECK-SAME: ptr [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2
+; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1
+; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 3, [[TMP2]]
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[AVL:%.*]] = sub i64 3, [[EVL_BASED_IV]]
+; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr [3 x i64], ptr [[ARG]], i64 0, i64 [[EVL_BASED_IV]]
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[TMP6]], i32 0
+; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> zeroinitializer, ptr align 8 [[TMP7]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
+; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP5]] to i64
+; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP8]], [[EVL_BASED_IV]]
+; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br label %[[EXIT:.*]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr [3 x i64], ptr [[ARG]], i64 0, i64 [[IV]]
+; CHECK-NEXT: store i64 0, ptr [[GEP]], align 8
+; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 3
+; CHECK-NEXT: br i1 [[COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %gep = getelementptr [3 x i64], ptr %arg, i64 0, i64 %iv
+ store i64 0, ptr %gep, align 8
+ %iv.next = add i64 %iv, 1
+ %cond = icmp eq i64 %iv.next, 3
+ br i1 %cond, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+attributes #0 = { vscale_range(2,2) }
+
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]], [[META3:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META2]] = !{!"llvm.loop.isvectorized.tailfoldingstyle", !"evl"}
+; CHECK: [[META3]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META3]], [[META1]]}
+;.
>From 8f0a379123fa423fa15e4c6f1549544a838fa927 Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Mon, 21 Jul 2025 19:15:33 -0700
Subject: [PATCH 7/9] Address comments
---
.../Transforms/Vectorize/LoopVectorize.cpp | 4 +-
.../Transforms/Vectorize/VPlanTransforms.cpp | 37 +++++++++----------
.../Transforms/Vectorize/VPlanTransforms.h | 5 ++-
.../Transforms/Vectorize/VPlanVerifier.cpp | 9 ++++-
...tor-loop-backedge-elimination-with-evl.ll} | 0
5 files changed, 29 insertions(+), 26 deletions(-)
rename llvm/test/Transforms/LoopVectorize/{simplify-evl-phi.ll => RISCV/vector-loop-backedge-elimination-with-evl.ll} (100%)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index a67a66f172ec5..710ff73d7bd50 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7290,8 +7290,8 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
// Regions are dissolved after optimizing for VF and UF, which completely
// removes unneeded loop regions first.
VPlanTransforms::dissolveLoopRegions(BestVPlan);
- // Enable variable-length stepping for EVL loops after regions are dissolved
- VPlanTransforms::simplifyEVLIVs(BestVPlan);
+ // Canonicalize EVL loops after regions are dissolved.
+ VPlanTransforms::canonicalizeEVLLoops(BestVPlan);
// Perform the actual loop transformation.
VPTransformState State(&TTI, BestVF, LI, DT, ILV.AC, ILV.Builder, &BestVPlan,
OrigLoop->getParentLoop(),
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index fa5d16c8eca50..c21ae598fe2c2 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2369,10 +2369,10 @@ bool VPlanTransforms::tryAddExplicitVectorLength(
return true;
}
-void VPlanTransforms::simplifyEVLIVs(VPlan &Plan) {
+void VPlanTransforms::canonicalizeEVLLoops(VPlan &Plan) {
using namespace llvm::VPlanPatternMatch;
- // Find EVL loop entries by locating VPEVLBasedIVPHIRecipe
- // There should be only one EVL PHI in the entire plan
+ // Find EVL loop entries by locating VPEVLBasedIVPHIRecipe.
+ // There should be only one EVL PHI in the entire plan.
VPEVLBasedIVPHIRecipe *EVLPhi = nullptr;
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
@@ -2383,11 +2383,11 @@ void VPlanTransforms::simplifyEVLIVs(VPlan &Plan) {
EVLPhi = PhiR;
}
- // Early return if no EVL PHI is found
+ // Early return if no EVL PHI is found.
if (!EVLPhi)
return;
- VPBasicBlock *Entry = EVLPhi->getParent();
+ VPBasicBlock *HeaderVPBB = EVLPhi->getParent();
VPValue *EVLIncrement = EVLPhi->getBackedgeValue();
// Convert EVLPhi to concrete recipe.
@@ -2397,32 +2397,29 @@ void VPlanTransforms::simplifyEVLIVs(VPlan &Plan) {
EVLPhi->replaceAllUsesWith(ScalarR);
EVLPhi->eraseFromParent();
- // Replace CanonicalIVInc with EVL-PHI increment
- VPRecipeBase *CanonicalIV = &*Entry->begin();
- assert(dyn_cast<VPPhi>(CanonicalIV) && "Unexpected canoincal iv");
+ // Replace CanonicalIVInc with EVL-PHI increment.
+ VPRecipeBase *CanonicalIV = &*HeaderVPBB->begin();
+ assert(
+ isa<VPPhi>(CanonicalIV) &&
+ match(CanonicalIV->getOperand(1),
+ m_c_Binary<Instruction::Add>(m_Specific(cast<VPPhi>(CanonicalIV)),
+ m_Specific(&Plan.getVFxUF()))) &&
+ "Unexpected canoincal iv");
VPValue *Backedge = CanonicalIV->getOperand(1);
Backedge->replaceAllUsesWith(EVLIncrement);
- // Remove unused phi
+ // Remove unused phi and increment.
VPRecipeBase *CanonicalIVIncrement = Backedge->getDefiningRecipe();
CanonicalIVIncrement->eraseFromParent();
CanonicalIV->eraseFromParent();
- // Find the latch-exiting block and replace use of VectorTripCount
+ // Replace the use of VectorTripCount in the latch-exiting block.
// Before: (branch-on-count EVLIVInc, VectorTripCount)
// After: (branch-on-count EVLIVInc, TripCount)
- auto Range =
- VPBlockUtils::blocksOnly<VPBasicBlock>(vp_depth_first_shallow(Entry));
- auto It = find_if(Range, [&Entry](VPBasicBlock *VPBB) {
- return any_of(VPBB->successors(),
- [&Entry](VPBlockBase *Succ) { return Succ == Entry; });
- });
- assert((It != Range.end()) && "LatchExiting is not found");
-
- VPBasicBlock *LatchExiting = *It;
+ VPBasicBlock *LatchExiting =
+ HeaderVPBB->getPredecessors()[1]->getEntryBasicBlock();
auto *LatchExitingBr = cast<VPInstruction>(LatchExiting->getTerminator());
-
// Skip single-iteration loop region
if (match(LatchExitingBr, m_BranchOnCond(m_True())))
return;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index a9b491aae4b45..1c629acf0a18b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -214,11 +214,12 @@ struct VPlanTransforms {
///
/// Once loop regions are replaced with explicit CFG, EVL loops can step with
/// variable vector lengths instead of fixed lengths. This transformation:
- /// * EVL-Phi concretization (makes them concrete)
+ /// * Make EVL-Phi concrete.
+ // * Remove CanonicalIV and increment.
/// * Replaces fixed-length stepping (branch-on-cond CanonicalIVInc,
/// VectorTripCount) with variable-length stepping (branch-on-cond
/// EVLIVInc, TripCount).
- static void simplifyEVLIVs(VPlan &Plan);
+ static void canonicalizeEVLLoops(VPlan &Plan);
/// Lower abstract recipes to concrete ones, that can be codegen'd. Use \p
/// CanonicalIVTy as type for all un-typed live-ins in VPTypeAnalysis.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
index 7c160c3e0d597..095c7e0db8f3e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
@@ -17,6 +17,7 @@
#include "VPlanCFG.h"
#include "VPlanDominatorTree.h"
#include "VPlanHelpers.h"
+#include "VPlanPatternMatch.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/TypeSwitch.h"
@@ -193,8 +194,12 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const {
return false;
}
// EVLIVIncrement is only used by EVLIV & BranchOnCount.
- // More than two is unexpected.
- if (I->getNumUsers() > 2) {
+ // Having more than two users is unexpected.
+ if ((I->getNumUsers() != 1) &&
+ (I->getNumUsers() != 2 || !any_of(I->users(), [&I](VPUser *U) {
+ using namespace llvm::VPlanPatternMatch;
+ return match(U, m_BranchOnCount(m_Specific(I), m_VPValue()));
+ }))) {
errs() << "EVL is used in VPInstruction with multiple users\n";
return false;
}
diff --git a/llvm/test/Transforms/LoopVectorize/simplify-evl-phi.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vector-loop-backedge-elimination-with-evl.ll
similarity index 100%
rename from llvm/test/Transforms/LoopVectorize/simplify-evl-phi.ll
rename to llvm/test/Transforms/LoopVectorize/RISCV/vector-loop-backedge-elimination-with-evl.ll
>From 2c98bb1b17ef10cfe7ca7f558b5cb849888ea69e Mon Sep 17 00:00:00 2001
From: Shih-Po Hung <arcbbb at gmail.com>
Date: Fri, 25 Jul 2025 22:01:01 +0800
Subject: [PATCH 8/9] Update llvm/lib/Transforms/Vectorize/VPlanTransforms.h
Co-authored-by: Luke Lau <luke_lau at icloud.com>
---
llvm/lib/Transforms/Vectorize/VPlanTransforms.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 1c629acf0a18b..880159f760922 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -214,8 +214,8 @@ struct VPlanTransforms {
///
/// Once loop regions are replaced with explicit CFG, EVL loops can step with
/// variable vector lengths instead of fixed lengths. This transformation:
- /// * Make EVL-Phi concrete.
- // * Remove CanonicalIV and increment.
+ /// * Makes EVL-Phi concrete.
+ // * Removes CanonicalIV and increment.
/// * Replaces fixed-length stepping (branch-on-cond CanonicalIVInc,
/// VectorTripCount) with variable-length stepping (branch-on-cond
/// EVLIVInc, TripCount).
>From b52dcb1ac102c1d109a877edd64589d48c610a1d Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Thu, 24 Jul 2025 20:04:28 -0700
Subject: [PATCH 9/9] Update tests after rebasing
---
.../Transforms/LoopVectorize/RISCV/pr88802.ll | 4 +-
.../LoopVectorize/RISCV/scalable-tailfold.ll | 58 ++++++++-----------
.../truncate-to-minimal-bitwidth-cost.ll | 6 +-
.../LoopVectorize/RISCV/uniform-load-store.ll | 32 +++-------
...ctor-loop-backedge-elimination-with-evl.ll | 22 +++----
...ectorize-force-tail-with-evl-interleave.ll | 7 +++
6 files changed, 49 insertions(+), 80 deletions(-)
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll
index 02288112f3389..9a0f59b65d058 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll
@@ -30,7 +30,6 @@ define void @test(ptr %p, i64 %a, i8 %b) {
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i32> zeroinitializer, [[TMP10]]
; CHECK-NEXT: br label [[FOR_COND:%.*]]
; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_COND]] ]
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[FOR_COND]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[FOR_COND]] ]
; CHECK-NEXT: [[AVL:%.*]] = sub i32 9, [[EVL_BASED_IV]]
@@ -46,9 +45,8 @@ define void @test(ptr %p, i64 %a, i8 %b) {
; CHECK-NEXT: [[TMP17:%.*]] = trunc <vscale x 2 x i32> [[TMP16]] to <vscale x 2 x i8>
; CHECK-NEXT: call void @llvm.vp.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> [[TMP17]], <vscale x 2 x ptr> align 1 [[BROADCAST_SPLAT4]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP11]])
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP11]], [[EVL_BASED_IV]]
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP4]]
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT6]]
-; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[INDEX_EVL_NEXT]], 9
; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_COND]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[EXIT1:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll
index 8d8ea5a0a2380..dbb2b811af126 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll
@@ -23,19 +23,17 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v, i64 %n) {
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[AVL:%.*]] = sub i64 1025, [[EVL_BASED_IV]]
-; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
-; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[EVL_BASED_IV]]
-; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP10]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP9]])
+; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[EVL_BASED_IV]]
+; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP11]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP10]])
; CHECK-NEXT: [[TMP7:%.*]] = add <vscale x 2 x i64> [[VP_OP_LOAD]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP7]], ptr align 8 [[TMP10]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP9]])
-; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP9]] to i64
+; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP7]], ptr align 8 [[TMP11]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP10]])
+; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP10]] to i64
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP8]], [[EVL_BASED_IV]]
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
-; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1025
+; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[FOR_END:%.*]]
; CHECK: scalar.ph:
@@ -89,7 +87,6 @@ define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[AVL:%.*]] = sub i64 1025, [[EVL_BASED_IV]]
; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
@@ -99,8 +96,7 @@ define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i
; CHECK-NEXT: call void @llvm.vp.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], <vscale x 2 x ptr> align 8 [[TMP10]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP7]])
; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP7]] to i64
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP12]], [[EVL_BASED_IV]]
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
-; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1025
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[FOR_END:%.*]]
@@ -151,7 +147,6 @@ define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64
; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[AVL:%.*]] = sub i64 1025, [[EVL_BASED_IV]]
@@ -164,8 +159,7 @@ define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64
; CHECK-NEXT: [[TMP11]] = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[TMP12]], <vscale x 2 x i64> [[VEC_PHI]], i32 [[TMP7]])
; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP7]] to i64
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP15]], [[EVL_BASED_IV]]
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
-; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1025
; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> [[TMP11]])
@@ -225,17 +219,15 @@ define void @splat_int(ptr noalias nocapture %a, i64 %v, i64 %n) {
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[AVL:%.*]] = sub i64 1025, [[EVL_BASED_IV]]
-; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
-; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[EVL_BASED_IV]]
-; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], ptr align 8 [[TMP9]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP8]])
-; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP8]] to i64
+; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[EVL_BASED_IV]]
+; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], ptr align 8 [[TMP10]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP9]])
+; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP9]] to i64
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP7]], [[EVL_BASED_IV]]
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
-; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1025
+; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[FOR_END:%.*]]
; CHECK: scalar.ph:
@@ -283,7 +275,6 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[AVL:%.*]] = sub i64 1025, [[EVL_BASED_IV]]
; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
@@ -292,8 +283,7 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i
; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], ptr align 8 [[TMP8]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP7]])
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP9]], [[EVL_BASED_IV]]
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
-; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1025
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[FOR_END:%.*]]
@@ -377,19 +367,17 @@ define void @vector_add_trip1024(ptr noalias nocapture %a, i64 %v, i64 %n) {
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[AVL:%.*]] = sub i64 1024, [[EVL_BASED_IV]]
-; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
-; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[EVL_BASED_IV]]
-; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP10]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP9]])
+; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[EVL_BASED_IV]]
+; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP11]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP10]])
; CHECK-NEXT: [[TMP7:%.*]] = add <vscale x 2 x i64> [[VP_OP_LOAD]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP7]], ptr align 8 [[TMP10]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP9]])
-; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP9]] to i64
+; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP7]], ptr align 8 [[TMP11]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP10]])
+; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP10]] to i64
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP8]], [[EVL_BASED_IV]]
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
-; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1024
+; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[FOR_END:%.*]]
; CHECK: scalar.ph:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll
index 79ec73aa58c87..c21847fe0db89 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll
@@ -165,15 +165,13 @@ define void @truncate_to_i1_used_by_branch(i8 %x, ptr %dst) #0 {
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x ptr> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[AVL:%.*]] = sub i32 9, [[EVL_BASED_IV]]
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[AVL]], i32 4, i1 true)
; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> zeroinitializer, <vscale x 4 x ptr> align 1 [[BROADCAST_SPLAT2]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP6]])
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP6]], [[EVL_BASED_IV]]
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP5]]
-; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_EVL_NEXT]], 9
+; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[SCALAR_PH]]:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll
index 9062542e42d60..1fcf27acb1af0 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll
@@ -98,7 +98,6 @@ define void @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i6
; TF-SCALABLE-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2
; TF-SCALABLE-NEXT: br label %[[VECTOR_BODY:.*]]
; TF-SCALABLE: [[VECTOR_BODY]]:
-; TF-SCALABLE-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; TF-SCALABLE-NEXT: [[AVL:%.*]] = sub i64 1025, [[INDEX]]
; TF-SCALABLE-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
@@ -109,8 +108,7 @@ define void @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i6
; TF-SCALABLE-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], ptr align 8 [[TMP7]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP6]])
; TF-SCALABLE-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64
; TF-SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP8]], [[INDEX]]
-; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], [[TMP4]]
-; TF-SCALABLE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; TF-SCALABLE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1025
; TF-SCALABLE-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; TF-SCALABLE: [[MIDDLE_BLOCK]]:
; TF-SCALABLE-NEXT: br label %[[FOR_END:.*]]
@@ -383,7 +381,6 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca
; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP6]]
; TF-SCALABLE-NEXT: br label %[[VECTOR_BODY:.*]]
; TF-SCALABLE: [[VECTOR_BODY]]:
-; TF-SCALABLE-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; TF-SCALABLE-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; TF-SCALABLE-NEXT: [[AVL:%.*]] = sub i64 1025, [[INDEX]]
@@ -401,10 +398,9 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca
; TF-SCALABLE-NEXT: call void @llvm.vp.store.nxv4i64.p0(<vscale x 4 x i64> [[PREDPHI]], ptr align 8 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP7]])
; TF-SCALABLE-NEXT: [[TMP15:%.*]] = zext i32 [[TMP7]] to i64
; TF-SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP15]], [[INDEX]]
-; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], [[TMP4]]
; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
-; TF-SCALABLE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; TF-SCALABLE-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; TF-SCALABLE-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1025
+; TF-SCALABLE-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; TF-SCALABLE: [[MIDDLE_BLOCK]]:
; TF-SCALABLE-NEXT: br label %[[FOR_END:.*]]
; TF-SCALABLE: [[SCALAR_PH]]:
@@ -542,7 +538,6 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt
; TF-SCALABLE-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2
; TF-SCALABLE-NEXT: br label %[[VECTOR_BODY:.*]]
; TF-SCALABLE: [[VECTOR_BODY]]:
-; TF-SCALABLE-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; TF-SCALABLE-NEXT: [[AVL:%.*]] = sub i64 1025, [[INDEX]]
; TF-SCALABLE-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
@@ -553,8 +548,7 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt
; TF-SCALABLE-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], ptr align 8 [[TMP7]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP6]])
; TF-SCALABLE-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64
; TF-SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP8]], [[INDEX]]
-; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], [[TMP4]]
-; TF-SCALABLE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; TF-SCALABLE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1025
; TF-SCALABLE-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; TF-SCALABLE: [[MIDDLE_BLOCK]]:
; TF-SCALABLE-NEXT: br label %[[FOR_END:.*]]
@@ -682,7 +676,6 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i
; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; TF-SCALABLE-NEXT: br label %[[VECTOR_BODY:.*]]
; TF-SCALABLE: [[VECTOR_BODY]]:
-; TF-SCALABLE-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; TF-SCALABLE-NEXT: [[AVL:%.*]] = sub i64 1025, [[INDEX]]
; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
@@ -691,8 +684,7 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i
; TF-SCALABLE-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], ptr align 8 [[TMP6]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
; TF-SCALABLE-NEXT: [[TMP7:%.*]] = zext i32 [[TMP5]] to i64
; TF-SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP7]], [[INDEX]]
-; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], [[TMP4]]
-; TF-SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; TF-SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1025
; TF-SCALABLE-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; TF-SCALABLE: [[MIDDLE_BLOCK]]:
; TF-SCALABLE-NEXT: br label %[[FOR_END:.*]]
@@ -841,7 +833,6 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias
; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP7]]
; TF-SCALABLE-NEXT: br label %[[VECTOR_BODY:.*]]
; TF-SCALABLE: [[VECTOR_BODY]]:
-; TF-SCALABLE-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; TF-SCALABLE-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; TF-SCALABLE-NEXT: [[AVL:%.*]] = sub i64 1025, [[INDEX]]
@@ -855,10 +846,9 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias
; TF-SCALABLE-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[BROADCAST_SPLAT3]], ptr align 8 [[TMP10]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP9]])
; TF-SCALABLE-NEXT: [[TMP11:%.*]] = zext i32 [[TMP9]] to i64
; TF-SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP11]], [[INDEX]]
-; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], [[TMP4]]
; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT2]]
-; TF-SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; TF-SCALABLE-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
+; TF-SCALABLE-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1025
+; TF-SCALABLE-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
; TF-SCALABLE: [[MIDDLE_BLOCK]]:
; TF-SCALABLE-NEXT: br label %[[FOR_END:.*]]
; TF-SCALABLE: [[SCALAR_PH]]:
@@ -1019,7 +1009,6 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc
; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP7]]
; TF-SCALABLE-NEXT: br label %[[VECTOR_BODY:.*]]
; TF-SCALABLE: [[VECTOR_BODY]]:
-; TF-SCALABLE-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; TF-SCALABLE-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; TF-SCALABLE-NEXT: [[AVL:%.*]] = sub i64 1025, [[INDEX]]
@@ -1034,9 +1023,8 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc
; TF-SCALABLE-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[BROADCAST_SPLAT1]], ptr align 8 [[TMP12]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP9]])
; TF-SCALABLE-NEXT: [[TMP14:%.*]] = zext i32 [[TMP9]] to i64
; TF-SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP14]], [[INDEX]]
-; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], [[TMP4]]
; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
-; TF-SCALABLE-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; TF-SCALABLE-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1025
; TF-SCALABLE-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
; TF-SCALABLE: [[MIDDLE_BLOCK]]:
; TF-SCALABLE-NEXT: br label %[[FOR_END:.*]]
@@ -1175,7 +1163,6 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap
; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; TF-SCALABLE-NEXT: br label %[[VECTOR_BODY:.*]]
; TF-SCALABLE: [[VECTOR_BODY]]:
-; TF-SCALABLE-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; TF-SCALABLE-NEXT: [[AVL:%.*]] = sub i64 1025, [[INDEX]]
; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
@@ -1184,8 +1171,7 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap
; TF-SCALABLE-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], ptr align 8 [[TMP6]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
; TF-SCALABLE-NEXT: [[TMP7:%.*]] = zext i32 [[TMP5]] to i64
; TF-SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP7]], [[INDEX]]
-; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], [[TMP4]]
-; TF-SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; TF-SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1025
; TF-SCALABLE-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
; TF-SCALABLE: [[MIDDLE_BLOCK]]:
; TF-SCALABLE-NEXT: br label %[[FOR_END:.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vector-loop-backedge-elimination-with-evl.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vector-loop-backedge-elimination-with-evl.ll
index 36e8071b8874c..7c1ec9ab6c5fd 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vector-loop-backedge-elimination-with-evl.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vector-loop-backedge-elimination-with-evl.ll
@@ -18,15 +18,9 @@ define void @foo(ptr %arg) #0 {
; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[AVL:%.*]] = sub i64 3, [[EVL_BASED_IV]]
-; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr [3 x i64], ptr [[ARG]], i64 0, i64 [[EVL_BASED_IV]]
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[TMP6]], i32 0
-; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> zeroinitializer, ptr align 8 [[TMP7]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
-; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP5]] to i64
-; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP8]], [[EVL_BASED_IV]]
-; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 3, i32 2, i1 true)
+; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> zeroinitializer, ptr align 8 [[ARG]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
+; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[SCALAR_PH]]:
@@ -38,7 +32,7 @@ define void @foo(ptr %arg) #0 {
; CHECK-NEXT: store i64 0, ptr [[GEP]], align 8
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 3
-; CHECK-NEXT: br i1 [[COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-NEXT: br i1 [[COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
@@ -60,9 +54,7 @@ exit:
attributes #0 = { vscale_range(2,2) }
;.
-; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]], [[META3:![0-9]+]]}
-; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
-; CHECK: [[META2]] = !{!"llvm.loop.isvectorized.tailfoldingstyle", !"evl"}
-; CHECK: [[META3]] = !{!"llvm.loop.unroll.runtime.disable"}
-; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META3]], [[META1]]}
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[META2]] = !{!"llvm.loop.isvectorized", i32 1}
;.
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-interleave.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-interleave.ll
index 1af3d22e22987..98554f1807329 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-interleave.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-interleave.ll
@@ -41,6 +41,13 @@ define void @interleave(ptr noalias %a, ptr noalias %b, i64 %N) {
; IF-EVL-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], <vscale x 4 x i64> [[VEC_IND]], i32 1
; IF-EVL-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call <vscale x 4 x i32> @llvm.vp.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> align 4 [[TMP23]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[TMP26:%.*]] = add nsw <vscale x 4 x i32> [[WIDE_MASKED_GATHER5]], [[WIDE_MASKED_GATHER3]]
+; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]]
+; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP26]], ptr align 4 [[TMP18]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]])
+; IF-EVL-NEXT: [[TMP14:%.*]] = zext i32 [[TMP11]] to i64
+; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP14]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT2]]
+; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
+; IF-EVL-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; IF-EVL: middle.block:
; IF-EVL-NEXT: br label [[FOR_COND_CLEANUP:%.*]]
; IF-EVL: scalar.ph:
More information about the llvm-commits
mailing list