[llvm] 7250b66 - [VPlan] Create AVL as a phi from TC -> 0 with EVL tail folding (#151481)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 31 20:00:09 PDT 2025
Author: Luke Lau
Date: 2025-08-01T11:00:05+08:00
New Revision: 7250b66240f18ba16c7a511602dd559d224b37c0
URL: https://github.com/llvm/llvm-project/commit/7250b66240f18ba16c7a511602dd559d224b37c0
DIFF: https://github.com/llvm/llvm-project/commit/7250b66240f18ba16c7a511602dd559d224b37c0.diff
LOG: [VPlan] Create AVL as a phi from TC -> 0 with EVL tail folding (#151481)
This implements the first half of #151459, by changing the AVL so it's
no longer computed as `trip-count - EVL-based IV`, but instead a
separate scalar phi that is decremented by EVL each iteration.
This shortens the dependency chain for computing the AVL and should
eventually allow us to convert the branch condition to `branch-count
avl-next, 0`.
`simplifyBranchConditionForVFAndUF` had to be updated to prevent a
regression because this introduces a VPPhi in the header block.
Added:
Modified:
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll
llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll
llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll
llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll
llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll
llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-bin-unary-ops-args.ll
llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-call-intrinsics.ll
llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll
llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll
llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll
llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll
llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-gather-scatter.ll
llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll
llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll
llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-intermediate-store.ll
llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-iv32.ll
llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-known-no-overflow.ll
llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-masked-loadstore.ll
llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-ordered-reduction.ll
llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll
llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll
llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-safe-dep-distance.ll
llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-uniform-store.ll
llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll
llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll
llvm/test/Transforms/LoopVectorize/RISCV/type-info-cache-evl-crash.ll
llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll
llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll
llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-call-intrinsics.ll
llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll
llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll
llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index a1d12a3a01e5e..18d331b22a60d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1483,11 +1483,11 @@ static bool simplifyBranchConditionForVFAndUF(VPlan &Plan, ElementCount BestVF,
auto *CanIVTy = Plan.getCanonicalIV()->getScalarType();
if (all_of(Header->phis(),
IsaPred<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe,
- VPFirstOrderRecurrencePHIRecipe>)) {
+ VPFirstOrderRecurrencePHIRecipe, VPPhi>)) {
for (VPRecipeBase &HeaderR : make_early_inc_range(Header->phis())) {
- auto *HeaderPhiR = cast<VPHeaderPHIRecipe>(&HeaderR);
- HeaderPhiR->replaceAllUsesWith(HeaderPhiR->getStartValue());
- HeaderPhiR->eraseFromParent();
+ auto *Phi = cast<VPPhiAccessors>(&HeaderR);
+ HeaderR.getVPSingleValue()->replaceAllUsesWith(Phi->getIncomingValue(0));
+ HeaderR.eraseFromParent();
}
VPBlockBase *Preheader = VectorRegion->getSinglePredecessor();
@@ -2292,10 +2292,12 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
/// ...
/// %EVLPhi = EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI [ %StartV, %vector.ph ],
/// [ %NextEVLIV, %vector.body ]
-/// %AVL = sub original TC, %EVLPhi
+/// %AVL = phi [ trip-count, %vector.ph ], [ %NextAVL, %vector.body ]
/// %VPEVL = EXPLICIT-VECTOR-LENGTH %AVL
/// ...
-/// %NextEVLIV = add IVSize (cast i32 %VPEVVL to IVSize), %EVLPhi
+/// %OpEVL = cast i32 %VPEVL to IVSize
+/// %NextEVLIV = add IVSize %OpEVL, %EVLPhi
+/// %NextAVL = sub IVSize nuw %AVL, %OpEVL
/// ...
///
/// If MaxSafeElements is provided, the function adds the following recipes:
@@ -2306,12 +2308,14 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
/// ...
/// %EVLPhi = EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI [ %StartV, %vector.ph ],
/// [ %NextEVLIV, %vector.body ]
-/// %AVL = sub original TC, %EVLPhi
+/// %AVL = phi [ trip-count, %vector.ph ], [ %NextAVL, %vector.body ]
/// %cmp = cmp ult %AVL, MaxSafeElements
/// %SAFE_AVL = select %cmp, %AVL, MaxSafeElements
/// %VPEVL = EXPLICIT-VECTOR-LENGTH %SAFE_AVL
/// ...
-/// %NextEVLIV = add IVSize (cast i32 %VPEVL to IVSize), %EVLPhi
+/// %OpEVL = cast i32 %VPEVL to IVSize
+/// %NextEVLIV = add IVSize %OpEVL, %EVLPhi
+/// %NextAVL = sub IVSize nuw %AVL, %OpEVL
/// ...
///
bool VPlanTransforms::tryAddExplicitVectorLength(
@@ -2333,9 +2337,12 @@ bool VPlanTransforms::tryAddExplicitVectorLength(
auto *EVLPhi = new VPEVLBasedIVPHIRecipe(StartV, DebugLoc());
EVLPhi->insertAfter(CanonicalIVPHI);
VPBuilder Builder(Header, Header->getFirstNonPhi());
- // Compute original TC - IV as the AVL (application vector length).
- VPValue *AVL = Builder.createNaryOp(
- Instruction::Sub, {Plan.getTripCount(), EVLPhi}, DebugLoc(), "avl");
+ // Create the AVL (application vector length), starting from TC -> 0 in steps
+ // of EVL.
+ VPPhi *AVLPhi = Builder.createScalarPhi(
+ {Plan.getTripCount()}, DebugLoc::getCompilerGenerated(), "avl");
+ VPValue *AVL = AVLPhi;
+
if (MaxSafeElements) {
// Support for MaxSafeDist for correct loop emission.
VPValue *AVLSafe =
@@ -2362,6 +2369,11 @@ bool VPlanTransforms::tryAddExplicitVectorLength(
CanonicalIVIncrement->getDebugLoc(), "index.evl.next");
EVLPhi->addOperand(NextEVLIV);
+ VPValue *NextAVL = Builder.createOverflowingOp(
+ Instruction::Sub, {AVLPhi, OpVPEVL}, {/*hasNUW=*/true, /*hasNSW=*/false},
+ DebugLoc::getCompilerGenerated(), "avl.next");
+ AVLPhi->addOperand(NextAVL);
+
transformRecipestoEVLRecipes(Plan, *VPEVL);
// Replace all uses of VPCanonicalIVPHIRecipe by
diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
index 5fbff69d60749..14ae4f2204310 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
@@ -172,7 +172,8 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const {
[&](const VPInstructionWithType *S) { return VerifyEVLUse(*S, 0); })
.Case<VPInstruction>([&](const VPInstruction *I) {
if (I->getOpcode() == Instruction::PHI ||
- I->getOpcode() == Instruction::ICmp)
+ I->getOpcode() == Instruction::ICmp ||
+ I->getOpcode() == Instruction::Sub)
return VerifyEVLUse(*I, 1);
switch (I->getOpcode()) {
case Instruction::Add:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll b/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll
index f654238a1f291..5f13089ff17fd 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll
@@ -25,7 +25,7 @@ define void @test_wide_integer_induction(ptr noalias %a, i64 %N) {
; CHECK: vector.body:
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDEX_EVL_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[ENTRY]] ], [ [[AVL_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64
; CHECK-NEXT: [[TMP13:%.*]] = mul i64 1, [[TMP12]]
@@ -35,6 +35,7 @@ define void @test_wide_integer_induction(ptr noalias %a, i64 %N) {
; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VEC_IND]], ptr align 8 [[TMP14]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP11]])
; CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[TMP11]] to i64
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
+; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]]
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll
index 6ae99824fa644..6e2434aefce9d 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll
@@ -132,7 +132,7 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) {
; IF-EVL-OUTLOOP: vector.body:
; IF-EVL-OUTLOOP-NEXT: [[EVL_BASED_IV:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-OUTLOOP-NEXT: [[AVL:%.*]] = sub i32 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-OUTLOOP-NEXT: [[AVL:%.*]] = phi i32 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-OUTLOOP-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[AVL]], i32 4, i1 true)
; IF-EVL-OUTLOOP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[EVL_BASED_IV]]
; IF-EVL-OUTLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i16> @llvm.vp.load.nxv4i16.p0(ptr align 2 [[TMP7]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
@@ -140,6 +140,7 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) {
; IF-EVL-OUTLOOP-NEXT: [[VP_OP:%.*]] = add <vscale x 4 x i32> [[VEC_PHI]], [[TMP9]]
; IF-EVL-OUTLOOP-NEXT: [[TMP10]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[VP_OP]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP5]])
; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP5]], [[EVL_BASED_IV]]
+; IF-EVL-OUTLOOP-NEXT: [[AVL_NEXT]] = sub nuw i32 [[AVL]], [[TMP5]]
; IF-EVL-OUTLOOP-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; IF-EVL-OUTLOOP: middle.block:
@@ -185,7 +186,7 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) {
; IF-EVL-INLOOP: vector.body:
; IF-EVL-INLOOP-NEXT: [[EVL_BASED_IV:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-INLOOP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-INLOOP-NEXT: [[TMP5:%.*]] = sub i32 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-INLOOP-NEXT: [[TMP5:%.*]] = phi i32 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-INLOOP-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[TMP5]], i32 8, i1 true)
; IF-EVL-INLOOP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[EVL_BASED_IV]]
; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 8 x i16> @llvm.vp.load.nxv8i16.p0(ptr align 2 [[TMP8]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP6]])
@@ -193,6 +194,7 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) {
; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = call i32 @llvm.vp.reduce.add.nxv8i32(i32 0, <vscale x 8 x i32> [[TMP14]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP6]])
; IF-EVL-INLOOP-NEXT: [[TMP11]] = add i32 [[TMP10]], [[VEC_PHI]]
; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP6]], [[EVL_BASED_IV]]
+; IF-EVL-INLOOP-NEXT: [[AVL_NEXT]] = sub nuw i32 [[TMP5]], [[TMP6]]
; IF-EVL-INLOOP-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-INLOOP-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; IF-EVL-INLOOP: middle.block:
@@ -350,7 +352,7 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) {
; IF-EVL-OUTLOOP: vector.body:
; IF-EVL-OUTLOOP-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-OUTLOOP-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-OUTLOOP-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-OUTLOOP-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-OUTLOOP-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]]
; IF-EVL-OUTLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP11]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
@@ -359,6 +361,7 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) {
; IF-EVL-OUTLOOP-NEXT: [[TMP15]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP14]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP9]])
; IF-EVL-OUTLOOP-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64
; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
+; IF-EVL-OUTLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]]
; IF-EVL-OUTLOOP-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; IF-EVL-OUTLOOP: middle.block:
@@ -398,7 +401,7 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) {
; IF-EVL-INLOOP: vector.body:
; IF-EVL-INLOOP-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-INLOOP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-INLOOP-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-INLOOP-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-INLOOP-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-INLOOP-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]]
; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP11]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
@@ -406,6 +409,7 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) {
; IF-EVL-INLOOP-NEXT: [[RDX_MINMAX]] = call i32 @llvm.smin.i32(i32 [[TMP13]], i32 [[VEC_PHI]])
; IF-EVL-INLOOP-NEXT: [[TMP14:%.*]] = zext i32 [[TMP9]] to i64
; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP14]], [[EVL_BASED_IV]]
+; IF-EVL-INLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP14]]
; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-INLOOP-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; IF-EVL-INLOOP: middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll
index 02eee7a4e0576..6f20376e08d85 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll
@@ -117,7 +117,7 @@ define void @masked_strided_factor2(ptr noalias nocapture readonly %p, ptr noali
; PREDICATED_DATA-WITH-EVL: vector.body:
; PREDICATED_DATA-WITH-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; PREDICATED_DATA-WITH-EVL-NEXT: [[VEC_IND:%.*]] = phi <vscale x 16 x i32> [ [[TMP0]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; PREDICATED_DATA-WITH-EVL-NEXT: [[AVL:%.*]] = sub i32 1024, [[EVL_BASED_IV]]
+; PREDICATED_DATA-WITH-EVL-NEXT: [[AVL:%.*]] = phi i32 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP1:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[AVL]], i32 16, i1 true)
; PREDICATED_DATA-WITH-EVL-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP1]], i64 0
; PREDICATED_DATA-WITH-EVL-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
@@ -139,6 +139,7 @@ define void @masked_strided_factor2(ptr noalias nocapture readonly %p, ptr noali
; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[Q]], <vscale x 16 x i64> [[TMP13]]
; PREDICATED_DATA-WITH-EVL-NEXT: call void @llvm.vp.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> [[TMP12]], <vscale x 16 x ptr> align 1 [[TMP14]], <vscale x 16 x i1> [[TMP2]], i32 [[TMP1]])
; PREDICATED_DATA-WITH-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP1]], [[EVL_BASED_IV]]
+; PREDICATED_DATA-WITH-EVL-NEXT: [[AVL_NEXT]] = sub nuw i32 [[AVL]], [[TMP1]]
; PREDICATED_DATA-WITH-EVL-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]]
; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_EVL_NEXT]], 1024
; PREDICATED_DATA-WITH-EVL-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -325,7 +326,7 @@ define void @masked_strided_factor4(ptr noalias nocapture readonly %p, ptr noali
; PREDICATED_DATA-WITH-EVL: vector.body:
; PREDICATED_DATA-WITH-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; PREDICATED_DATA-WITH-EVL-NEXT: [[VEC_IND:%.*]] = phi <vscale x 16 x i32> [ [[TMP0]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; PREDICATED_DATA-WITH-EVL-NEXT: [[AVL:%.*]] = sub i32 1024, [[EVL_BASED_IV]]
+; PREDICATED_DATA-WITH-EVL-NEXT: [[AVL:%.*]] = phi i32 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP1:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[AVL]], i32 16, i1 true)
; PREDICATED_DATA-WITH-EVL-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP1]], i64 0
; PREDICATED_DATA-WITH-EVL-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
@@ -363,6 +364,7 @@ define void @masked_strided_factor4(ptr noalias nocapture readonly %p, ptr noali
; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[Q]], <vscale x 16 x i64> [[TMP25]]
; PREDICATED_DATA-WITH-EVL-NEXT: call void @llvm.vp.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> [[TMP18]], <vscale x 16 x ptr> align 1 [[TMP26]], <vscale x 16 x i1> [[TMP2]], i32 [[TMP1]])
; PREDICATED_DATA-WITH-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP1]], [[EVL_BASED_IV]]
+; PREDICATED_DATA-WITH-EVL-NEXT: [[AVL_NEXT]] = sub nuw i32 [[AVL]], [[TMP1]]
; PREDICATED_DATA-WITH-EVL-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]]
; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP27:%.*]] = icmp eq i32 [[INDEX_EVL_NEXT]], 1024
; PREDICATED_DATA-WITH-EVL-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll
index b82b7f3fb33b4..01df43618aad0 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll
@@ -32,7 +32,7 @@ define void @test(ptr %p, i64 %a, i8 %b) {
; CHECK: vector.body:
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[FOR_COND]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[FOR_COND]] ]
-; CHECK-NEXT: [[AVL:%.*]] = sub i32 9, [[EVL_BASED_IV]]
+; CHECK-NEXT: [[AVL:%.*]] = phi i32 [ 9, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[FOR_COND]] ]
; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[AVL]], i32 2, i1 true)
; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[TMP11]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT5]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
@@ -48,6 +48,7 @@ define void @test(ptr %p, i64 %a, i8 %b) {
; CHECK-NEXT: [[TMP17:%.*]] = trunc <vscale x 2 x i32> [[TMP16]] to <vscale x 2 x i8>
; CHECK-NEXT: call void @llvm.vp.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> [[TMP17]], <vscale x 2 x ptr> align 1 [[BROADCAST_SPLAT4]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP11]])
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP11]], [[EVL_BASED_IV]]
+; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i32 [[AVL]], [[TMP11]]
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT8]]
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[INDEX_EVL_NEXT]], 9
; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_COND]], !llvm.loop [[LOOP0:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll
index 60f3181f5256e..ed507961ef825 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll
@@ -24,7 +24,7 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v, i64 %n) {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[AVL:%.*]] = sub i64 1025, [[EVL_BASED_IV]]
+; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1025, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[EVL_BASED_IV]]
; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP11]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP10]])
@@ -32,6 +32,7 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v, i64 %n) {
; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP7]], ptr align 8 [[TMP11]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP10]])
; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP10]] to i64
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP8]], [[EVL_BASED_IV]]
+; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]]
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1025
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
@@ -88,7 +89,7 @@ define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[AVL:%.*]] = sub i64 1025, [[EVL_BASED_IV]]
+; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1025, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[EVL_BASED_IV]]
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP8]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP7]])
@@ -96,6 +97,7 @@ define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i
; CHECK-NEXT: call void @llvm.vp.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], <vscale x 2 x ptr> align 8 [[TMP10]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP7]])
; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP7]] to i64
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP12]], [[EVL_BASED_IV]]
+; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP12]]
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1025
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: middle.block:
@@ -149,7 +151,7 @@ define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64
; CHECK: vector.body:
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[AVL:%.*]] = sub i64 1025, [[EVL_BASED_IV]]
+; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1025, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[EVL_BASED_IV]]
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP8]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP7]])
@@ -159,6 +161,7 @@ define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64
; CHECK-NEXT: [[TMP11]] = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[TMP12]], <vscale x 2 x i64> [[VEC_PHI]], i32 [[TMP7]])
; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP7]] to i64
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP15]], [[EVL_BASED_IV]]
+; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP15]]
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1025
; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: middle.block:
@@ -220,12 +223,13 @@ define void @splat_int(ptr noalias nocapture %a, i64 %v, i64 %n) {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[AVL:%.*]] = sub i64 1025, [[EVL_BASED_IV]]
+; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1025, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[EVL_BASED_IV]]
; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], ptr align 8 [[TMP10]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP9]])
; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP9]] to i64
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP7]], [[EVL_BASED_IV]]
+; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1025
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK: middle.block:
@@ -276,13 +280,14 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[AVL:%.*]] = sub i64 1025, [[EVL_BASED_IV]]
+; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1025, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
; CHECK-NEXT: store i64 [[V]], ptr [[B:%.*]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[EVL_BASED_IV]]
; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], ptr align 8 [[TMP8]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP7]])
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP9]], [[EVL_BASED_IV]]
+; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]]
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1025
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
; CHECK: middle.block:
@@ -395,7 +400,7 @@ define void @vector_add_trip1024(ptr noalias nocapture %a, i64 %v, i64 %n) {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[AVL:%.*]] = sub i64 1024, [[EVL_BASED_IV]]
+; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[EVL_BASED_IV]]
; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP11]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP10]])
@@ -403,6 +408,7 @@ define void @vector_add_trip1024(ptr noalias nocapture %a, i64 %v, i64 %n) {
; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP7]], ptr align 8 [[TMP11]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP10]])
; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP10]] to i64
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP8]], [[EVL_BASED_IV]]
+; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]]
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1024
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
; CHECK: middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-bin-unary-ops-args.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-bin-unary-ops-args.ll
index 9377854e67ecc..38e78321a4f36 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-bin-unary-ops-args.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-bin-unary-ops-args.ll
@@ -33,7 +33,7 @@ define void @test_and(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[TMP10:%.*]] = phi i64 [ 100, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP13]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
@@ -42,6 +42,7 @@ define void @test_and(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP16]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP10]], [[TMP18]]
; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100
; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
@@ -158,7 +159,7 @@ define void @test_or(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[TMP10:%.*]] = phi i64 [ 100, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP13]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
@@ -167,6 +168,7 @@ define void @test_or(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP16]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP10]], [[TMP18]]
; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100
; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
@@ -283,7 +285,7 @@ define void @test_xor(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[TMP10:%.*]] = phi i64 [ 100, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP13]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
@@ -292,6 +294,7 @@ define void @test_xor(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP16]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP10]], [[TMP18]]
; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100
; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
@@ -408,7 +411,7 @@ define void @test_shl(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[TMP10:%.*]] = phi i64 [ 100, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP13]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
@@ -417,6 +420,7 @@ define void @test_shl(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP16]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP10]], [[TMP18]]
; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100
; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
@@ -533,7 +537,7 @@ define void @test_lshr(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[TMP10:%.*]] = phi i64 [ 100, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP13]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
@@ -542,6 +546,7 @@ define void @test_lshr(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP16]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP10]], [[TMP18]]
; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100
; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
@@ -658,7 +663,7 @@ define void @test_ashr(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[TMP10:%.*]] = phi i64 [ 100, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP13]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
@@ -667,6 +672,7 @@ define void @test_ashr(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP16]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP10]], [[TMP18]]
; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100
; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
@@ -783,7 +789,7 @@ define void @test_add(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[TMP10:%.*]] = phi i64 [ 100, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP13]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
@@ -792,6 +798,7 @@ define void @test_add(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP16]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP10]], [[TMP18]]
; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100
; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
@@ -908,7 +915,7 @@ define void @test_sub(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[TMP10:%.*]] = phi i64 [ 100, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP13]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
@@ -917,6 +924,7 @@ define void @test_sub(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP16]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP10]], [[TMP18]]
; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100
; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
@@ -1033,7 +1041,7 @@ define void @test_mul(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[TMP10:%.*]] = phi i64 [ 100, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP13]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
@@ -1042,6 +1050,7 @@ define void @test_mul(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP16]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP10]], [[TMP18]]
; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100
; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
@@ -1158,7 +1167,7 @@ define void @test_sdiv(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[TMP10:%.*]] = phi i64 [ 100, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP13]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
@@ -1167,6 +1176,7 @@ define void @test_sdiv(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP16]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP10]], [[TMP18]]
; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100
; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
@@ -1283,7 +1293,7 @@ define void @test_udiv(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[TMP10:%.*]] = phi i64 [ 100, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP13]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
@@ -1292,6 +1302,7 @@ define void @test_udiv(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP16]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP10]], [[TMP18]]
; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100
; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
@@ -1408,7 +1419,7 @@ define void @test_srem(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[TMP10:%.*]] = phi i64 [ 100, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP13]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
@@ -1417,6 +1428,7 @@ define void @test_srem(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP16]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP10]], [[TMP18]]
; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100
; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
@@ -1533,7 +1545,7 @@ define void @test_urem(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[TMP10:%.*]] = phi i64 [ 100, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP13]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
@@ -1542,6 +1554,7 @@ define void @test_urem(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP16]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP10]], [[TMP18]]
; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100
; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
@@ -1661,7 +1674,7 @@ define void @test_fadd(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[TMP11:%.*]] = sub i64 100, [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[TMP11:%.*]] = phi i64 [ 100, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP11]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP14]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
@@ -1670,6 +1683,7 @@ define void @test_fadd(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[VP_OP]], ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP19]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP11]], [[TMP19]]
; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100
; IF-EVL-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
@@ -1788,7 +1802,7 @@ define void @test_fsub(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[TMP11:%.*]] = sub i64 100, [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[TMP11:%.*]] = phi i64 [ 100, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP11]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP14]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
@@ -1797,6 +1811,7 @@ define void @test_fsub(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[VP_OP]], ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP19]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP11]], [[TMP19]]
; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100
; IF-EVL-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
@@ -1915,7 +1930,7 @@ define void @test_fmul(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[TMP11:%.*]] = sub i64 100, [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[TMP11:%.*]] = phi i64 [ 100, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP11]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP14]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
@@ -1924,6 +1939,7 @@ define void @test_fmul(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[VP_OP]], ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP19]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP11]], [[TMP19]]
; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100
; IF-EVL-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP33:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
@@ -2042,7 +2058,7 @@ define void @test_fdiv(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[TMP11:%.*]] = sub i64 100, [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[TMP11:%.*]] = phi i64 [ 100, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP11]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP14]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
@@ -2051,6 +2067,7 @@ define void @test_fdiv(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[VP_OP]], ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP19]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP11]], [[TMP19]]
; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100
; IF-EVL-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP35:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
@@ -2222,7 +2239,7 @@ define void @test_fneg(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[TMP11:%.*]] = sub i64 100, [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[TMP11:%.*]] = phi i64 [ 100, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP11]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP14]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
@@ -2231,6 +2248,7 @@ define void @test_fneg(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[VP_OP]], ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP19]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP11]], [[TMP19]]
; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100
; IF-EVL-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP37:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-call-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-call-intrinsics.ll
index f94f62df987eb..f60474534ff3c 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-call-intrinsics.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-call-intrinsics.ll
@@ -38,7 +38,7 @@ define void @vp_smax(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP11]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
@@ -49,6 +49,7 @@ define void @vp_smax(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP29]], ptr align 4 [[TMP16]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP9]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP18]]
; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
@@ -186,7 +187,7 @@ define void @vp_smin(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP11]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
@@ -197,6 +198,7 @@ define void @vp_smin(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP29]], ptr align 4 [[TMP16]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP9]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP18]]
; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
@@ -334,7 +336,7 @@ define void @vp_umax(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP11]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
@@ -345,6 +347,7 @@ define void @vp_umax(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP29]], ptr align 4 [[TMP16]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP9]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP18]]
; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
@@ -482,7 +485,7 @@ define void @vp_umin(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP11]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
@@ -493,6 +496,7 @@ define void @vp_umin(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP29]], ptr align 4 [[TMP16]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP9]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP18]]
; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
@@ -626,7 +630,7 @@ define void @vp_ctlz(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP11]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
@@ -635,6 +639,7 @@ define void @vp_ctlz(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP24]], ptr align 4 [[TMP14]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]]
; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
@@ -754,7 +759,7 @@ define void @vp_cttz(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP13:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP13]])
@@ -763,6 +768,7 @@ define void @vp_cttz(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP17]], ptr align 4 [[TMP18]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP13]])
; IF-EVL-NEXT: [[TMP20:%.*]] = zext i32 [[TMP13]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP20]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP20]]
; IF-EVL-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
@@ -882,7 +888,7 @@ define void @vp_lrint(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP11]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
@@ -893,6 +899,7 @@ define void @vp_lrint(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP15]], ptr align 4 [[TMP16]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP9]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP18]]
; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
@@ -1020,7 +1027,7 @@ define void @vp_llrint(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP11]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
@@ -1031,6 +1038,7 @@ define void @vp_llrint(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP15]], ptr align 4 [[TMP16]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP9]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP18]]
; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
@@ -1158,7 +1166,7 @@ define void @vp_abs(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP11]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
@@ -1167,6 +1175,7 @@ define void @vp_abs(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP24]], ptr align 4 [[TMP14]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]]
; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll
index 2253724cc501f..ce2b790cdbd4f 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll
@@ -33,7 +33,7 @@ define void @vp_sext(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i32> @llvm.vp.load.nxv2i32.p0(ptr align 4 [[TMP14]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP12]]), !alias.scope [[META0:![0-9]+]]
@@ -42,6 +42,7 @@ define void @vp_sext(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP16]], ptr align 8 [[TMP17]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP12]]), !alias.scope [[META3:![0-9]+]], !noalias [[META0]]
; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP19]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP19]]
; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
@@ -160,7 +161,7 @@ define void @vp_zext(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i32> @llvm.vp.load.nxv2i32.p0(ptr align 4 [[TMP14]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP12]]), !alias.scope [[META10:![0-9]+]]
@@ -169,6 +170,7 @@ define void @vp_zext(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP16]], ptr align 8 [[TMP17]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP12]]), !alias.scope [[META13:![0-9]+]], !noalias [[META10]]
; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP19]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP19]]
; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
@@ -287,7 +289,7 @@ define void @vp_trunc(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP14]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP12]]), !alias.scope [[META17:![0-9]+]]
@@ -296,6 +298,7 @@ define void @vp_trunc(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i32.p0(<vscale x 2 x i32> [[TMP16]], ptr align 4 [[TMP17]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP12]]), !alias.scope [[META20:![0-9]+]], !noalias [[META17]]
; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP19]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP19]]
; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
@@ -414,7 +417,7 @@ define void @vp_fpext(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x float> @llvm.vp.load.nxv2f32.p0(ptr align 4 [[TMP14]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP12]]), !alias.scope [[META24:![0-9]+]]
@@ -423,6 +426,7 @@ define void @vp_fpext(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv2f64.p0(<vscale x 2 x double> [[TMP16]], ptr align 8 [[TMP17]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP12]]), !alias.scope [[META27:![0-9]+]], !noalias [[META24]]
; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP19]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP19]]
; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
@@ -541,7 +545,7 @@ define void @vp_fptrunc(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x double> @llvm.vp.load.nxv2f64.p0(ptr align 8 [[TMP14]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP12]]), !alias.scope [[META31:![0-9]+]]
@@ -550,6 +554,7 @@ define void @vp_fptrunc(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv2f32.p0(<vscale x 2 x float> [[TMP16]], ptr align 4 [[TMP17]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP12]]), !alias.scope [[META34:![0-9]+]], !noalias [[META31]]
; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP19]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP19]]
; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
@@ -668,7 +673,7 @@ define void @vp_sitofp(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP16]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP14]])
@@ -677,6 +682,7 @@ define void @vp_sitofp(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[TMP18]], ptr align 4 [[TMP19]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP14]])
; IF-EVL-NEXT: [[TMP21:%.*]] = zext i32 [[TMP14]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP21]]
; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
@@ -795,7 +801,7 @@ define void @vp_uitofp(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP16]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP14]])
@@ -804,6 +810,7 @@ define void @vp_uitofp(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[TMP18]], ptr align 4 [[TMP19]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP14]])
; IF-EVL-NEXT: [[TMP21:%.*]] = zext i32 [[TMP14]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP21]]
; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
@@ -922,7 +929,7 @@ define void @vp_fptosi(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP16]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP14]])
@@ -931,6 +938,7 @@ define void @vp_fptosi(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP18]], ptr align 4 [[TMP19]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP14]])
; IF-EVL-NEXT: [[TMP21:%.*]] = zext i32 [[TMP14]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP21]]
; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
@@ -1049,7 +1057,7 @@ define void @vp_fptoui(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP16]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP14]])
@@ -1058,6 +1066,7 @@ define void @vp_fptoui(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP18]], ptr align 4 [[TMP19]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP14]])
; IF-EVL-NEXT: [[TMP21:%.*]] = zext i32 [[TMP14]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP21]]
; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
@@ -1176,7 +1185,7 @@ define void @vp_inttoptr(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP16]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP14]])
@@ -1185,6 +1194,7 @@ define void @vp_inttoptr(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv2p0.p0(<vscale x 2 x ptr> [[TMP18]], ptr align 8 [[TMP19]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP14]])
; IF-EVL-NEXT: [[TMP21:%.*]] = zext i32 [[TMP14]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP21]]
; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
@@ -1298,7 +1308,7 @@ define void @vp_ptrtoint(ptr %a, ptr %b, i64 %N) {
; IF-EVL: [[VECTOR_BODY]]:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
; IF-EVL-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT: [[TMP13:%.*]] = mul i64 1, [[TMP12]]
@@ -1310,6 +1320,7 @@ define void @vp_ptrtoint(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP15]], ptr align 8 [[TMP16]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP18]]
; IF-EVL-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll
index 540c46ff3c771..d02d53b8e1207 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll
@@ -36,7 +36,7 @@ define i32 @cond_add(ptr %a, i64 %n, i32 %start) {
; IF-EVL-OUTLOOP: vector.body:
; IF-EVL-OUTLOOP-NEXT: [[EVL_BASED_IV1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT1:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[TMP9]], [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-OUTLOOP-NEXT: [[TMP10:%.*]] = sub i64 [[N]], [[EVL_BASED_IV1]]
+; IF-EVL-OUTLOOP-NEXT: [[TMP10:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-OUTLOOP-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 4, i1 true)
; IF-EVL-OUTLOOP-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[EVL_BASED_IV1]]
; IF-EVL-OUTLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP16]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]])
@@ -46,6 +46,7 @@ define i32 @cond_add(ptr %a, i64 %n, i32 %start) {
; IF-EVL-OUTLOOP-NEXT: [[TMP20]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[VP_OP]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP11]])
; IF-EVL-OUTLOOP-NEXT: [[TMP22:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT1]] = add i64 [[TMP22]], [[EVL_BASED_IV1]]
+; IF-EVL-OUTLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP10]], [[TMP22]]
; IF-EVL-OUTLOOP-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT1]], [[N]]
; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; IF-EVL-OUTLOOP: middle.block:
@@ -87,7 +88,7 @@ define i32 @cond_add(ptr %a, i64 %n, i32 %start) {
; IF-EVL-INLOOP: vector.body:
; IF-EVL-INLOOP-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-INLOOP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START]], [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-INLOOP-NEXT: [[TMP11:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-INLOOP-NEXT: [[TMP11:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-INLOOP-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP11]], i32 4, i1 true)
; IF-EVL-INLOOP-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[EVL_BASED_IV]]
; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
@@ -97,6 +98,7 @@ define i32 @cond_add(ptr %a, i64 %n, i32 %start) {
; IF-EVL-INLOOP-NEXT: [[TMP22]] = add i32 [[TMP21]], [[VEC_PHI]]
; IF-EVL-INLOOP-NEXT: [[TMP23:%.*]] = zext i32 [[TMP12]] to i64
; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP23]], [[EVL_BASED_IV]]
+; IF-EVL-INLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP11]], [[TMP23]]
; IF-EVL-INLOOP-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-INLOOP-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; IF-EVL-INLOOP: middle.block:
@@ -257,7 +259,7 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) {
; IF-EVL-OUTLOOP: vector.body:
; IF-EVL-OUTLOOP-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[TMP9]], [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-OUTLOOP-NEXT: [[TMP10:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-OUTLOOP-NEXT: [[TMP10:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-OUTLOOP-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 4, i1 true)
; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP11]], i64 0
; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
@@ -272,6 +274,7 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) {
; IF-EVL-OUTLOOP-NEXT: [[PREDPHI]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[PREDPHI1]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP11]])
; IF-EVL-OUTLOOP-NEXT: [[TMP23:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP23]], [[EVL_BASED_IV]]
+; IF-EVL-OUTLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP10]], [[TMP23]]
; IF-EVL-OUTLOOP-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; IF-EVL-OUTLOOP: middle.block:
@@ -317,7 +320,7 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) {
; IF-EVL-INLOOP: vector.body:
; IF-EVL-INLOOP-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-INLOOP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START]], [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-INLOOP-NEXT: [[TMP11:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-INLOOP-NEXT: [[TMP11:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-INLOOP-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP11]], i32 4, i1 true)
; IF-EVL-INLOOP-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[EVL_BASED_IV]]
; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
@@ -326,6 +329,7 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) {
; IF-EVL-INLOOP-NEXT: [[TMP22]] = add i32 [[TMP21]], [[VEC_PHI]]
; IF-EVL-INLOOP-NEXT: [[TMP23:%.*]] = zext i32 [[TMP12]] to i64
; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP23]], [[EVL_BASED_IV]]
+; IF-EVL-INLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP11]], [[TMP23]]
; IF-EVL-INLOOP-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-INLOOP-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; IF-EVL-INLOOP: middle.block:
@@ -508,7 +512,7 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) {
; IF-EVL-OUTLOOP-NEXT: [[EVL_BASED_IV1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT1:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[TMP9]], [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-OUTLOOP-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-OUTLOOP-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV1]]
+; IF-EVL-OUTLOOP-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-OUTLOOP-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-OUTLOOP-NEXT: [[TMP13:%.*]] = mul i32 1, [[TMP12]]
; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP13]], i64 0
@@ -521,6 +525,7 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) {
; IF-EVL-OUTLOOP-NEXT: [[TMP19]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP18]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP12]])
; IF-EVL-OUTLOOP-NEXT: [[TMP20:%.*]] = zext i32 [[TMP12]] to i64
; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT1]] = add i64 [[TMP20]], [[EVL_BASED_IV1]]
+; IF-EVL-OUTLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP20]]
; IF-EVL-OUTLOOP-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
; IF-EVL-OUTLOOP-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT1]], [[N]]
; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
@@ -568,7 +573,7 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) {
; IF-EVL-INLOOP-NEXT: [[EVL_BASED_IV1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT1:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-INLOOP-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], [[VECTOR_PH]] ], [ [[ADD:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-INLOOP-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-INLOOP-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV1]]
+; IF-EVL-INLOOP-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-INLOOP-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-INLOOP-NEXT: [[TMP12:%.*]] = mul i32 1, [[TMP11]]
; IF-EVL-INLOOP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP12]], i64 0
@@ -581,6 +586,7 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) {
; IF-EVL-INLOOP-NEXT: [[ADD]] = add i32 [[TMP17]], [[RDX]]
; IF-EVL-INLOOP-NEXT: [[TMP19:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT1]] = add i64 [[TMP19]], [[EVL_BASED_IV1]]
+; IF-EVL-INLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP19]]
; IF-EVL-INLOOP-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
; IF-EVL-INLOOP-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT1]], [[N]]
; IF-EVL-INLOOP-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
@@ -768,7 +774,7 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) {
; IF-EVL-OUTLOOP-NEXT: [[IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[TMP9]], [[VECTOR_PH]] ], [ [[TMP24:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-OUTLOOP-NEXT: [[VEC_IND2:%.*]] = phi <vscale x 4 x i32> [ [[INDUCTION1]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT7:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-OUTLOOP-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[IV]]
+; IF-EVL-OUTLOOP-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-OUTLOOP-NEXT: [[TMP14:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP14]], i64 0
; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT3]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
@@ -786,6 +792,7 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) {
; IF-EVL-OUTLOOP-NEXT: [[TMP24]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[PREDPHI]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP14]])
; IF-EVL-OUTLOOP-NEXT: [[TMP25:%.*]] = zext i32 [[TMP14]] to i64
; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP25]], [[IV]]
+; IF-EVL-OUTLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP25]]
; IF-EVL-OUTLOOP-NEXT: [[VEC_IND_NEXT7]] = add <vscale x 4 x i32> [[VEC_IND2]], [[BROADCAST_SPLAT2]]
; IF-EVL-OUTLOOP-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK1:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
@@ -837,7 +844,7 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) {
; IF-EVL-INLOOP-NEXT: [[IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-INLOOP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START]], [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-INLOOP-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-INLOOP-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[IV]]
+; IF-EVL-INLOOP-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-INLOOP-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-INLOOP-NEXT: [[TMP12:%.*]] = mul i32 1, [[TMP11]]
; IF-EVL-INLOOP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP12]], i64 0
@@ -849,6 +856,7 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) {
; IF-EVL-INLOOP-NEXT: [[TMP17]] = add i32 [[TMP16]], [[VEC_PHI]]
; IF-EVL-INLOOP-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[IV]]
+; IF-EVL-INLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP18]]
; IF-EVL-INLOOP-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
; IF-EVL-INLOOP-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-INLOOP-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK1:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll
index 9299866fa195f..ae047f5f63476 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll
@@ -24,7 +24,7 @@ define void @test_sdiv(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 1024, [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ 1024, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
; IF-EVL-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[A]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP7]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
@@ -36,6 +36,7 @@ define void @test_sdiv(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP]], ptr align 8 [[TMP12]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
; IF-EVL-NEXT: [[TMP14:%.*]] = zext i32 [[TMP5]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP14]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP14]]
; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1024
; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
@@ -144,7 +145,7 @@ define void @test_udiv(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 1024, [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ 1024, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
; IF-EVL-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[A]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP7]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
@@ -156,6 +157,7 @@ define void @test_udiv(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP]], ptr align 8 [[TMP12]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
; IF-EVL-NEXT: [[TMP14:%.*]] = zext i32 [[TMP5]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP14]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP14]]
; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1024
; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
@@ -263,7 +265,7 @@ define void @test_srem(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 1024, [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ 1024, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
; IF-EVL-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[A]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP7]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
@@ -275,6 +277,7 @@ define void @test_srem(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP]], ptr align 8 [[TMP12]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
; IF-EVL-NEXT: [[TMP14:%.*]] = zext i32 [[TMP5]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP14]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP14]]
; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1024
; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
@@ -382,7 +385,7 @@ define void @test_urem(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 1024, [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ 1024, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
; IF-EVL-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[A]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP7]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
@@ -394,6 +397,7 @@ define void @test_urem(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP]], ptr align 8 [[TMP12]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
; IF-EVL-NEXT: [[TMP14:%.*]] = zext i32 [[TMP5]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP14]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP14]]
; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1024
; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll
index f8f3532ae547c..987f9460c2172 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll
@@ -32,8 +32,8 @@ define void @first_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) {
; IF-EVL: [[VECTOR_BODY]]:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 4 x i32> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[VP_OP_LOAD:%.*]], %[[VECTOR_BODY]] ]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[TC]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[PREV_EVL:%.*]] = phi i32 [ [[TMP25]], %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[TC]], [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP12]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP14]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
@@ -43,6 +43,7 @@ define void @first_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_OP]], ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP19]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP19]]
; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[TC]]
; IF-EVL-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
@@ -168,8 +169,8 @@ define void @second_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) {
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 4 x i32> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[VP_OP_LOAD:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VECTOR_RECUR2:%.*]] = phi <vscale x 4 x i32> [ [[VECTOR_RECUR_INIT1]], %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[TC]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[PREV_EVL:%.*]] = phi i32 [ [[TMP32]], %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[TC]], [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP15]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP15]])
@@ -180,6 +181,7 @@ define void @second_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_OP]], ptr align 4 [[TMP21]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP15]])
; IF-EVL-NEXT: [[TMP23:%.*]] = zext i32 [[TMP15]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP23]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP23]]
; IF-EVL-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[TC]]
; IF-EVL-NEXT: br i1 [[TMP24]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
@@ -325,8 +327,8 @@ define void @third_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) {
; IF-EVL-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 4 x i32> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[VP_OP_LOAD:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VECTOR_RECUR2:%.*]] = phi <vscale x 4 x i32> [ [[VECTOR_RECUR_INIT1]], %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VECTOR_RECUR4:%.*]] = phi <vscale x 4 x i32> [ [[VECTOR_RECUR_INIT3]], %[[VECTOR_PH]] ], [ [[TMP23:%.*]], %[[VECTOR_BODY]] ]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[TC]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[PREV_EVL:%.*]] = phi i32 [ [[TMP39]], %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[TC]], [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP18]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP20]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP18]])
@@ -339,6 +341,7 @@ define void @third_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_OP5]], ptr align 4 [[TMP25]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP18]])
; IF-EVL-NEXT: [[TMP27:%.*]] = zext i32 [[TMP18]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP27]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP27]]
; IF-EVL-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[TC]]
; IF-EVL-NEXT: br i1 [[TMP28]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
@@ -639,8 +642,8 @@ define void @first_order_recurrence_indvar(ptr noalias %A, i64 %TC) {
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 2 x i64> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[TMP20:%.*]], %[[VECTOR_BODY]] ]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[TC]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[PREV_EVL:%.*]] = phi i32 [ [[TMP5]], %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[TC]], [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP11]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
; IF-EVL-NEXT: [[TMP7:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP7]]
@@ -652,6 +655,7 @@ define void @first_order_recurrence_indvar(ptr noalias %A, i64 %TC) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP15]], ptr align 8 [[TMP9]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[TMP21:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP21]]
; IF-EVL-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[TC]]
; IF-EVL-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-gather-scatter.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-gather-scatter.ll
index 0eab77d6c8d91..2aeb1d0b25b5d 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-gather-scatter.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-gather-scatter.ll
@@ -27,7 +27,7 @@ define void @gather_scatter(ptr noalias %in, ptr noalias %out, ptr noalias %inde
; IF-EVL: vector.body:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDEX_EVL_NEXT:%.*]], [[FOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[FOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[ENTRY]] ], [ [[AVL_NEXT:%.*]], [[FOR_BODY]] ]
; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
; IF-EVL-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT: [[TMP13:%.*]] = mul i64 1, [[TMP12]]
@@ -41,6 +41,7 @@ define void @gather_scatter(ptr noalias %in, ptr noalias %out, ptr noalias %inde
; IF-EVL-NEXT: call void @llvm.vp.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> [[WIDE_MASKED_GATHER2]], <vscale x 2 x ptr> align 4 [[TMP16]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[TMP17:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP17]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP17]]
; IF-EVL-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
; IF-EVL-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll
index a5ebfe6470d13..3e23df78e0b68 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll
@@ -26,7 +26,7 @@ define i32 @add(ptr %a, i64 %n, i32 %start) {
; IF-EVL: vector.body:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]])
@@ -34,6 +34,7 @@ define i32 @add(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[TMP15]] = add i32 [[TMP14]], [[VEC_PHI]]
; IF-EVL-NEXT: [[TMP9:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]]
; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; IF-EVL: middle.block:
@@ -240,7 +241,7 @@ define i32 @or(ptr %a, i64 %n, i32 %start) {
; IF-EVL: vector.body:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]])
@@ -248,6 +249,7 @@ define i32 @or(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[TMP15]] = or i32 [[TMP14]], [[VEC_PHI]]
; IF-EVL-NEXT: [[TMP9:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]]
; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; IF-EVL: middle.block:
@@ -347,7 +349,7 @@ define i32 @and(ptr %a, i64 %n, i32 %start) {
; IF-EVL: vector.body:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]])
@@ -355,6 +357,7 @@ define i32 @and(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[TMP15]] = and i32 [[TMP14]], [[VEC_PHI]]
; IF-EVL-NEXT: [[TMP9:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]]
; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; IF-EVL: middle.block:
@@ -454,7 +457,7 @@ define i32 @xor(ptr %a, i64 %n, i32 %start) {
; IF-EVL: vector.body:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]])
@@ -462,6 +465,7 @@ define i32 @xor(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[TMP15]] = xor i32 [[TMP14]], [[VEC_PHI]]
; IF-EVL-NEXT: [[TMP9:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]]
; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
; IF-EVL: middle.block:
@@ -561,7 +565,7 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) {
; IF-EVL: vector.body:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]])
@@ -569,6 +573,7 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[RDX_MINMAX]] = call i32 @llvm.smin.i32(i32 [[TMP14]], i32 [[VEC_PHI]])
; IF-EVL-NEXT: [[TMP8:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP8]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]]
; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
; IF-EVL: middle.block:
@@ -671,7 +676,7 @@ define i32 @smax(ptr %a, i64 %n, i32 %start) {
; IF-EVL: vector.body:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]])
@@ -679,6 +684,7 @@ define i32 @smax(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[RDX_MINMAX]] = call i32 @llvm.smax.i32(i32 [[TMP14]], i32 [[VEC_PHI]])
; IF-EVL-NEXT: [[TMP8:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP8]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]]
; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
; IF-EVL: middle.block:
@@ -781,7 +787,7 @@ define i32 @umin(ptr %a, i64 %n, i32 %start) {
; IF-EVL: vector.body:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]])
@@ -789,6 +795,7 @@ define i32 @umin(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[RDX_MINMAX]] = call i32 @llvm.umin.i32(i32 [[TMP14]], i32 [[VEC_PHI]])
; IF-EVL-NEXT: [[TMP8:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP8]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]]
; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
; IF-EVL: middle.block:
@@ -891,7 +898,7 @@ define i32 @umax(ptr %a, i64 %n, i32 %start) {
; IF-EVL: vector.body:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]])
@@ -899,6 +906,7 @@ define i32 @umax(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[RDX_MINMAX]] = call i32 @llvm.umax.i32(i32 [[TMP14]], i32 [[VEC_PHI]])
; IF-EVL-NEXT: [[TMP8:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP8]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]]
; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
; IF-EVL: middle.block:
@@ -1001,7 +1009,7 @@ define float @fadd(ptr %a, i64 %n, float %start) {
; IF-EVL: vector.body:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi float [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]])
@@ -1009,6 +1017,7 @@ define float @fadd(ptr %a, i64 %n, float %start) {
; IF-EVL-NEXT: [[TMP15]] = fadd reassoc float [[TMP14]], [[VEC_PHI]]
; IF-EVL-NEXT: [[TMP9:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]]
; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
; IF-EVL: middle.block:
@@ -1215,7 +1224,7 @@ define float @fmin(ptr %a, i64 %n, float %start) #0 {
; IF-EVL: vector.body:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi float [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX_SELECT:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]])
@@ -1224,6 +1233,7 @@ define float @fmin(ptr %a, i64 %n, float %start) #0 {
; IF-EVL-NEXT: [[RDX_MINMAX_SELECT]] = select fast i1 [[RDX_MINMAX_CMP]], float [[TMP14]], float [[VEC_PHI]]
; IF-EVL-NEXT: [[TMP8:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP8]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]]
; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
; IF-EVL: middle.block:
@@ -1327,7 +1337,7 @@ define float @fmax(ptr %a, i64 %n, float %start) #0 {
; IF-EVL: vector.body:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi float [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX_SELECT:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]])
@@ -1336,6 +1346,7 @@ define float @fmax(ptr %a, i64 %n, float %start) #0 {
; IF-EVL-NEXT: [[RDX_MINMAX_SELECT]] = select fast i1 [[RDX_MINMAX_CMP]], float [[TMP14]], float [[VEC_PHI]]
; IF-EVL-NEXT: [[TMP8:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP8]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]]
; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
; IF-EVL: middle.block:
@@ -1655,7 +1666,7 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) {
; IF-EVL: vector.body:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi float [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP10]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
@@ -1666,6 +1677,7 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) {
; IF-EVL-NEXT: [[TMP18]] = fadd reassoc float [[TMP17]], [[VEC_PHI]]
; IF-EVL-NEXT: [[TMP11:%.*]] = zext i32 [[TMP9]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP11]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]]
; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP33:![0-9]+]]
; IF-EVL: middle.block:
@@ -1774,7 +1786,7 @@ define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) {
; IF-EVL: vector.body:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]])
@@ -1782,6 +1794,7 @@ define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) {
; IF-EVL-NEXT: [[TMP16]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> [[TMP13]], <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> [[VEC_PHI]], i32 [[TMP11]])
; IF-EVL-NEXT: [[TMP9:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]]
; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP35:![0-9]+]]
; IF-EVL: middle.block:
@@ -1890,7 +1903,7 @@ define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) {
; IF-EVL: vector.body:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]])
@@ -1898,6 +1911,7 @@ define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) {
; IF-EVL-NEXT: [[TMP16]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> [[TMP13]], <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> [[VEC_PHI]], i32 [[TMP11]])
; IF-EVL-NEXT: [[TMP9:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]]
; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP37:![0-9]+]]
; IF-EVL: middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll
index 2678989731634..1f7c51800f3e0 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll
@@ -28,7 +28,7 @@ define void @interleave(ptr noalias %a, ptr noalias %b, i64 %N) {
; IF-EVL: vector.body:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP13:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP13]]
@@ -43,6 +43,7 @@ define void @interleave(ptr noalias %a, ptr noalias %b, i64 %N) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP26]], ptr align 4 [[TMP18]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]])
; IF-EVL-NEXT: [[TMP14:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP14]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP14]]
; IF-EVL-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT2]]
; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-intermediate-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-intermediate-store.ll
index 51224ba7aecfa..be6ae1d4a0f6b 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-intermediate-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-intermediate-store.ll
@@ -44,7 +44,7 @@ define void @reduction_intermediate_store(ptr %a, i64 %n, i32 %start, ptr %addr)
; IF-EVL-OUTLOOP: vector.body:
; IF-EVL-OUTLOOP-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDEX_EVL_NEXT:%.*]], [[FOR_BODY]] ]
; IF-EVL-OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[TMP10]], [[ENTRY]] ], [ [[TMP19:%.*]], [[FOR_BODY]] ]
-; IF-EVL-OUTLOOP-NEXT: [[TMP11:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-OUTLOOP-NEXT: [[TMP11:%.*]] = phi i64 [ [[N]], [[ENTRY]] ], [ [[AVL_NEXT:%.*]], [[FOR_BODY]] ]
; IF-EVL-OUTLOOP-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP11]], i32 4, i1 true)
; IF-EVL-OUTLOOP-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[EVL_BASED_IV]]
; IF-EVL-OUTLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]]), !alias.scope [[META0:![0-9]+]]
@@ -52,6 +52,7 @@ define void @reduction_intermediate_store(ptr %a, i64 %n, i32 %start, ptr %addr)
; IF-EVL-OUTLOOP-NEXT: [[TMP19]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[VP_OP]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP12]])
; IF-EVL-OUTLOOP-NEXT: [[TMP21:%.*]] = zext i32 [[TMP12]] to i64
; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]]
+; IF-EVL-OUTLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP11]], [[TMP21]]
; IF-EVL-OUTLOOP-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; IF-EVL-OUTLOOP: middle.block:
@@ -100,7 +101,7 @@ define void @reduction_intermediate_store(ptr %a, i64 %n, i32 %start, ptr %addr)
; IF-EVL-INLOOP: vector.body:
; IF-EVL-INLOOP-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-INLOOP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START]], [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-INLOOP-NEXT: [[TMP13:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-INLOOP-NEXT: [[TMP13:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-INLOOP-NEXT: [[TMP14:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP13]], i32 4, i1 true)
; IF-EVL-INLOOP-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[EVL_BASED_IV]]
; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP19]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP14]]), !alias.scope [[META0:![0-9]+]]
@@ -108,6 +109,7 @@ define void @reduction_intermediate_store(ptr %a, i64 %n, i32 %start, ptr %addr)
; IF-EVL-INLOOP-NEXT: [[TMP22]] = add i32 [[TMP21]], [[VEC_PHI]]
; IF-EVL-INLOOP-NEXT: [[TMP23:%.*]] = zext i32 [[TMP14]] to i64
; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP23]], [[EVL_BASED_IV]]
+; IF-EVL-INLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP13]], [[TMP23]]
; IF-EVL-INLOOP-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-INLOOP-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; IF-EVL-INLOOP: middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-iv32.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-iv32.ll
index 7082c12db7ea1..d474a03b90ee9 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-iv32.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-iv32.ll
@@ -23,13 +23,14 @@ define void @iv32(ptr noalias %a, ptr noalias %b, i32 %N) {
; IF-EVL-NEXT: br label [[FOR_BODY:%.*]]
; IF-EVL: vector.body:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_EVL_NEXT:%.*]], [[FOR_BODY]] ]
-; IF-EVL-NEXT: [[TMP11:%.*]] = sub i32 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[TMP11:%.*]] = phi i32 [ [[N]], [[ENTRY]] ], [ [[AVL_NEXT:%.*]], [[FOR_BODY]] ]
; IF-EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[TMP11]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP14]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[EVL_BASED_IV]]
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_OP_LOAD]], ptr align 4 [[TMP16]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i32 [[TMP12]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i32 [[TMP11]], [[TMP12]]
; IF-EVL-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; IF-EVL: middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-known-no-overflow.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-known-no-overflow.ll
index 3b3b798511d4e..06c6bfe64dd2f 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-known-no-overflow.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-known-no-overflow.ll
@@ -27,7 +27,7 @@ define void @trip_count_max_1024(ptr %p, i64 %tc) vscale_range(2, 1024) {
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[AVL:%.*]] = sub i64 [[UMAX]], [[EVL_BASED_IV]]
+; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[UMAX]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[P]], i64 [[EVL_BASED_IV]]
; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP11]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP9]])
@@ -35,6 +35,7 @@ define void @trip_count_max_1024(ptr %p, i64 %tc) vscale_range(2, 1024) {
; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP]], ptr align 8 [[TMP11]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP9]])
; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP9]] to i64
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP13]], [[EVL_BASED_IV]]
+; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP13]]
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[UMAX]]
; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
@@ -95,7 +96,7 @@ define void @overflow_at_0(ptr %p, i64 %tc) vscale_range(2, 1024) {
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[AVL:%.*]] = sub i64 [[TC]], [[EVL_BASED_IV]]
+; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TC]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[P]], i64 [[EVL_BASED_IV]]
; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP11]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP9]])
@@ -103,6 +104,7 @@ define void @overflow_at_0(ptr %p, i64 %tc) vscale_range(2, 1024) {
; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP]], ptr align 8 [[TMP11]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP9]])
; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP9]] to i64
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP13]], [[EVL_BASED_IV]]
+; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP13]]
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[TC]]
; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
@@ -163,7 +165,7 @@ define void @no_overflow_at_0(ptr %p, i64 %tc) vscale_range(2, 1024) {
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[AVL:%.*]] = sub i64 [[TC_ADD]], [[EVL_BASED_IV]]
+; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TC_ADD]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[P]], i64 [[EVL_BASED_IV]]
; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP7]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
@@ -171,6 +173,7 @@ define void @no_overflow_at_0(ptr %p, i64 %tc) vscale_range(2, 1024) {
; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP]], ptr align 8 [[TMP7]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP5]] to i64
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP9]], [[EVL_BASED_IV]]
+; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]]
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[TC_ADD]]
; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-masked-loadstore.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-masked-loadstore.ll
index b6f2a50c46774..5f407fcca259d 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-masked-loadstore.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-masked-loadstore.ll
@@ -23,7 +23,7 @@ define void @masked_loadstore(ptr noalias %a, ptr noalias %b, i64 %n) {
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL: vector.body:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[TMP9:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP9]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
@@ -34,6 +34,7 @@ define void @masked_loadstore(ptr noalias %a, ptr noalias %b, i64 %n) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_OP]], ptr align 4 [[TMP19]], <vscale x 4 x i1> [[TMP17]], i32 [[TMP10]])
; IF-EVL-NEXT: [[TMP21:%.*]] = zext i32 [[TMP10]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP9]], [[TMP21]]
; IF-EVL-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; IF-EVL: middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-ordered-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-ordered-reduction.ll
index 1a6e60d801ea1..59d137011d288 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-ordered-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-ordered-reduction.ll
@@ -26,13 +26,14 @@ define float @fadd(ptr noalias nocapture readonly %a, i64 %n) {
; IF-EVL: vector.body:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[TMP9:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP9]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
; IF-EVL-NEXT: [[TMP14]] = call float @llvm.vp.reduce.fadd.nxv4f32(float [[VEC_PHI]], <vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP9]], [[TMP15]]
; IF-EVL-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; IF-EVL: middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll
index ba2ee84f7dc66..2d5718b24cb30 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll
@@ -25,7 +25,7 @@ define i32 @add(ptr %a, i64 %n, i32 %start) {
; IF-EVL: vector.body:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[TMP9]], [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
@@ -33,6 +33,7 @@ define i32 @add(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[TMP14]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[VP_OP]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP10]])
; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP15]]
; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; IF-EVL: middle.block:
@@ -242,7 +243,7 @@ define i32 @or(ptr %a, i64 %n, i32 %start) {
; IF-EVL: vector.body:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[TMP9]], [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
@@ -250,6 +251,7 @@ define i32 @or(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[TMP14]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[VP_OP]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP10]])
; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP15]]
; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; IF-EVL: middle.block:
@@ -352,7 +354,7 @@ define i32 @and(ptr %a, i64 %n, i32 %start) {
; IF-EVL: vector.body:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[TMP9]], [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
@@ -360,6 +362,7 @@ define i32 @and(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[TMP14]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[VP_OP]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP10]])
; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP15]]
; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; IF-EVL: middle.block:
@@ -462,7 +465,7 @@ define i32 @xor(ptr %a, i64 %n, i32 %start) {
; IF-EVL: vector.body:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[TMP9]], [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
@@ -470,6 +473,7 @@ define i32 @xor(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[TMP14]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[VP_OP]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP10]])
; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP15]]
; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
; IF-EVL: middle.block:
@@ -573,7 +577,7 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) {
; IF-EVL: vector.body:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP11]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
@@ -582,6 +586,7 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[TMP15]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP14]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]]
; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
; IF-EVL: middle.block:
@@ -690,7 +695,7 @@ define i32 @smax(ptr %a, i64 %n, i32 %start) {
; IF-EVL: vector.body:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP11]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
@@ -699,6 +704,7 @@ define i32 @smax(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[TMP15]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP14]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]]
; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
; IF-EVL: middle.block:
@@ -807,7 +813,7 @@ define i32 @umin(ptr %a, i64 %n, i32 %start) {
; IF-EVL: vector.body:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP11]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
@@ -816,6 +822,7 @@ define i32 @umin(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[TMP15]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP14]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]]
; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
; IF-EVL: middle.block:
@@ -924,7 +931,7 @@ define i32 @umax(ptr %a, i64 %n, i32 %start) {
; IF-EVL: vector.body:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP11]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
@@ -933,6 +940,7 @@ define i32 @umax(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[TMP15]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP14]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]]
; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
; IF-EVL: middle.block:
@@ -1040,7 +1048,7 @@ define float @fadd(ptr %a, i64 %n, float %start) {
; IF-EVL: vector.body:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x float> [ [[TMP9]], [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
@@ -1048,6 +1056,7 @@ define float @fadd(ptr %a, i64 %n, float %start) {
; IF-EVL-NEXT: [[TMP14]] = call <vscale x 4 x float> @llvm.vp.merge.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[VP_OP]], <vscale x 4 x float> [[VEC_PHI]], i32 [[TMP10]])
; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP15]]
; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
; IF-EVL: middle.block:
@@ -1258,7 +1267,7 @@ define float @fmin(ptr %a, i64 %n, float %start) #0 {
; IF-EVL: vector.body:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x float> [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP11]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
@@ -1267,6 +1276,7 @@ define float @fmin(ptr %a, i64 %n, float %start) #0 {
; IF-EVL-NEXT: [[TMP15]] = call <vscale x 4 x float> @llvm.vp.merge.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[TMP14]], <vscale x 4 x float> [[VEC_PHI]], i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]]
; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
; IF-EVL: middle.block:
@@ -1375,7 +1385,7 @@ define float @fmax(ptr %a, i64 %n, float %start) #0 {
; IF-EVL: vector.body:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x float> [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP11]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
@@ -1384,6 +1394,7 @@ define float @fmax(ptr %a, i64 %n, float %start) #0 {
; IF-EVL-NEXT: [[TMP15]] = call <vscale x 4 x float> @llvm.vp.merge.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[TMP14]], <vscale x 4 x float> [[VEC_PHI]], i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]]
; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
; IF-EVL: middle.block:
@@ -1707,7 +1718,7 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) {
; IF-EVL: vector.body:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x float> [ [[TMP9]], [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
@@ -1717,6 +1728,7 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) {
; IF-EVL-NEXT: [[TMP17]] = call <vscale x 4 x float> @llvm.vp.merge.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[TMP16]], <vscale x 4 x float> [[VEC_PHI]], i32 [[TMP10]])
; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP10]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP18]]
; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP33:![0-9]+]]
; IF-EVL: middle.block:
@@ -1826,7 +1838,7 @@ define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) {
; IF-EVL: vector.body:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP11]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
@@ -1834,6 +1846,7 @@ define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) {
; IF-EVL-NEXT: [[TMP15]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> [[TMP13]], <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> [[VEC_PHI]], i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]]
; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP35:![0-9]+]]
; IF-EVL: middle.block:
@@ -1942,7 +1955,7 @@ define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) {
; IF-EVL: vector.body:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP11]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
@@ -1950,6 +1963,7 @@ define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) {
; IF-EVL-NEXT: [[TMP15]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> [[TMP13]], <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> [[VEC_PHI]], i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]]
; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP37:![0-9]+]]
; IF-EVL: middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll
index 297a41008e415..e2db28d54ac58 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll
@@ -23,7 +23,7 @@ define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %pt
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL: vector.body:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 1024, [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[STARTVAL:%.*]], [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], -1
@@ -47,6 +47,7 @@ define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %pt
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_REVERSE3]], ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
; IF-EVL-NEXT: [[TMP20:%.*]] = zext i32 [[TMP5]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP20]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP20]]
; IF-EVL-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1024
; IF-EVL-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; IF-EVL: middle.block:
@@ -164,7 +165,7 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL: vector.body:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 1024, [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[STARTVAL:%.*]], [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[OFFSET_IDX3:%.*]] = trunc i64 [[EVL_BASED_IV]] to i32
@@ -194,6 +195,7 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_REVERSE5]], ptr align 4 [[TMP25]], <vscale x 4 x i1> [[VP_REVERSE_MASK6]], i32 [[TMP5]])
; IF-EVL-NEXT: [[TMP28:%.*]] = zext i32 [[TMP5]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP28]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP28]]
; IF-EVL-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1024
; IF-EVL-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; IF-EVL: middle.block:
@@ -343,7 +345,7 @@ define void @multiple_reverse_vector_pointer(ptr noalias %a, ptr noalias %b, ptr
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL: vector.body:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 1025, [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ 1025, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 16, i1 true)
; IF-EVL-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1024, [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[OFFSET_IDX]]
@@ -377,6 +379,7 @@ define void @multiple_reverse_vector_pointer(ptr noalias %a, ptr noalias %b, ptr
; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_REVERSE2]], ptr align 1 [[TMP26]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP6]])
; IF-EVL-NEXT: [[TMP27:%.*]] = zext i32 [[TMP6]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP27]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP27]]
; IF-EVL-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1025
; IF-EVL-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; IF-EVL: middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-safe-dep-distance.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-safe-dep-distance.ll
index 47f1cfb4589c3..1c78b25e114c4 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-safe-dep-distance.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-safe-dep-distance.ll
@@ -26,7 +26,7 @@ define void @test(ptr %p) {
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL: vector.body:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[TMP5:%.*]] = sub i64 200, [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[TMP5:%.*]] = phi i64 [ 200, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP5]], i32 2, i1 true)
; IF-EVL-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP8]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP6]])
@@ -35,6 +35,7 @@ define void @test(ptr %p) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP_LOAD]], ptr align 8 [[TMP11]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP6]])
; IF-EVL-NEXT: [[TMP13:%.*]] = zext i32 [[TMP6]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP13]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP5]], [[TMP13]]
; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 200
; IF-EVL-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; IF-EVL: middle.block:
@@ -356,7 +357,7 @@ define void @trivial_due_max_vscale(ptr %p) {
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL: vector.body:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[TMP5:%.*]] = sub i64 200, [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[TMP5:%.*]] = phi i64 [ 200, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP5]], i32 2, i1 true)
; IF-EVL-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 32 [[TMP8]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP6]])
@@ -365,6 +366,7 @@ define void @trivial_due_max_vscale(ptr %p) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP_LOAD]], ptr align 32 [[TMP11]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP6]])
; IF-EVL-NEXT: [[TMP13:%.*]] = zext i32 [[TMP6]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP13]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP5]], [[TMP13]]
; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 200
; IF-EVL-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; IF-EVL: middle.block:
@@ -461,7 +463,7 @@ define void @no_high_lmul_or_interleave(ptr %p) {
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL: vector.body:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 3002, [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ 3002, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP9:%.*]] = icmp ult i64 [[AVL]], 1024
; IF-EVL-NEXT: [[SAFE_AVL:%.*]] = select i1 [[TMP9]], i64 [[AVL]], i64 1024
; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[SAFE_AVL]], i32 1, i1 true)
@@ -472,6 +474,7 @@ define void @no_high_lmul_or_interleave(ptr %p) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv1i64.p0(<vscale x 1 x i64> [[VP_OP_LOAD]], ptr align 32 [[TMP5]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP10]])
; IF-EVL-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP11]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]]
; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 3002
; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
; IF-EVL: middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-uniform-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-uniform-store.ll
index 4ce2da76150a4..687a2e7bf9312 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-uniform-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-uniform-store.ll
@@ -26,7 +26,7 @@ define void @lshift_significand(i32 %n, ptr nocapture writeonly %dst) {
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[AVL:%.*]] = sub i64 [[TMP0]], [[EVL_BASED_IV]]
+; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP0]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[SPEC_SELECT]], [[EVL_BASED_IV]]
; CHECK-NEXT: [[TMP12:%.*]] = sub nuw nsw i64 1, [[OFFSET_IDX]]
@@ -41,6 +41,7 @@ define void @lshift_significand(i32 %n, ptr nocapture writeonly %dst) {
; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_REVERSE]], ptr align 8 [[TMP19]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP11]])
; CHECK-NEXT: [[TMP20:%.*]] = zext i32 [[TMP11]] to i64
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP20]], [[EVL_BASED_IV]]
+; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP20]]
; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[TMP0]]
; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll
index c21847fe0db89..24649729f43ba 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll
@@ -166,10 +166,11 @@ define void @truncate_to_i1_used_by_branch(i8 %x, ptr %dst) #0 {
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[AVL:%.*]] = sub i32 9, [[EVL_BASED_IV]]
+; CHECK-NEXT: [[AVL:%.*]] = phi i32 [ 9, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[AVL]], i32 4, i1 true)
; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> zeroinitializer, <vscale x 4 x ptr> align 1 [[BROADCAST_SPLAT2]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP6]])
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP6]], [[EVL_BASED_IV]]
+; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i32 [[AVL]], [[TMP6]]
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_EVL_NEXT]], 9
; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll
index b40d98077805d..dfdc893570810 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll
@@ -22,11 +22,12 @@ define void @truncate_to_minimal_bitwidths_widen_cast_recipe(ptr %src) {
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[AVL:%.*]] = sub i64 9, [[EVL_BASED_IV]]
+; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 9, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true)
; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> zeroinitializer, <vscale x 8 x ptr> align 1 zeroinitializer, <vscale x 8 x i1> splat (i1 true), i32 [[TMP7]])
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP9]], [[EVL_BASED_IV]]
+; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]]
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 9
; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/type-info-cache-evl-crash.ll b/llvm/test/Transforms/LoopVectorize/RISCV/type-info-cache-evl-crash.ll
index 46ecb19336751..6476373c08401 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/type-info-cache-evl-crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/type-info-cache-evl-crash.ll
@@ -33,7 +33,7 @@ define void @type_info_cache_clobber(ptr %dstv, ptr %src, i64 %wide.trip.count)
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[AVL:%.*]] = sub i64 [[TMP0]], [[EVL_BASED_IV]]
+; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP0]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true)
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[EVL_BASED_IV]]
; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8.p0(ptr align 1 [[TMP13]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]]), !alias.scope [[META0:![0-9]+]]
@@ -47,6 +47,7 @@ define void @type_info_cache_clobber(ptr %dstv, ptr %src, i64 %wide.trip.count)
; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x ptr> align 2 zeroinitializer, <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]])
; CHECK-NEXT: [[TMP20:%.*]] = zext i32 [[TMP11]] to i64
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP20]], [[EVL_BASED_IV]]
+; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP20]]
; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[TMP0]]
; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll
index 41b96365af59d..568aa953de511 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll
@@ -99,7 +99,7 @@ define void @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i6
; TF-SCALABLE-NEXT: br label %[[VECTOR_BODY:.*]]
; TF-SCALABLE: [[VECTOR_BODY]]:
; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; TF-SCALABLE-NEXT: [[AVL:%.*]] = sub i64 1025, [[INDEX]]
+; TF-SCALABLE-NEXT: [[AVL:%.*]] = phi i64 [ 1025, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; TF-SCALABLE-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
; TF-SCALABLE-NEXT: [[TMP5:%.*]] = load i64, ptr [[B]], align 8
; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP5]], i64 0
@@ -108,6 +108,7 @@ define void @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i6
; TF-SCALABLE-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], ptr align 8 [[TMP7]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP6]])
; TF-SCALABLE-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64
; TF-SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP8]], [[INDEX]]
+; TF-SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]]
; TF-SCALABLE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1025
; TF-SCALABLE-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; TF-SCALABLE: [[MIDDLE_BLOCK]]:
@@ -410,7 +411,7 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca
; TF-SCALABLE: [[VECTOR_BODY]]:
; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; TF-SCALABLE-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; TF-SCALABLE-NEXT: [[AVL:%.*]] = sub i64 1025, [[INDEX]]
+; TF-SCALABLE-NEXT: [[AVL:%.*]] = phi i64 [ 1025, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; TF-SCALABLE-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP7]], i64 0
; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT3]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
@@ -428,9 +429,10 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca
; TF-SCALABLE-NEXT: call void @llvm.vp.store.nxv4i64.p0(<vscale x 4 x i64> [[PREDPHI]], ptr align 8 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP7]])
; TF-SCALABLE-NEXT: [[TMP15:%.*]] = zext i32 [[TMP7]] to i64
; TF-SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP15]], [[INDEX]]
+; TF-SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP15]]
; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
-; TF-SCALABLE-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1025
-; TF-SCALABLE-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; TF-SCALABLE-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1025
+; TF-SCALABLE-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; TF-SCALABLE: [[MIDDLE_BLOCK]]:
; TF-SCALABLE-NEXT: br label %[[FOR_END:.*]]
; TF-SCALABLE: [[SCALAR_PH]]:
@@ -569,7 +571,7 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt
; TF-SCALABLE-NEXT: br label %[[VECTOR_BODY:.*]]
; TF-SCALABLE: [[VECTOR_BODY]]:
; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; TF-SCALABLE-NEXT: [[AVL:%.*]] = sub i64 1025, [[INDEX]]
+; TF-SCALABLE-NEXT: [[AVL:%.*]] = phi i64 [ 1025, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; TF-SCALABLE-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
; TF-SCALABLE-NEXT: [[TMP5:%.*]] = load i64, ptr [[B]], align 1
; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP5]], i64 0
@@ -578,6 +580,7 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt
; TF-SCALABLE-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], ptr align 8 [[TMP7]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP6]])
; TF-SCALABLE-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64
; TF-SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP8]], [[INDEX]]
+; TF-SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]]
; TF-SCALABLE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1025
; TF-SCALABLE-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; TF-SCALABLE: [[MIDDLE_BLOCK]]:
@@ -707,13 +710,14 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i
; TF-SCALABLE-NEXT: br label %[[VECTOR_BODY:.*]]
; TF-SCALABLE: [[VECTOR_BODY]]:
; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; TF-SCALABLE-NEXT: [[AVL:%.*]] = sub i64 1025, [[INDEX]]
+; TF-SCALABLE-NEXT: [[AVL:%.*]] = phi i64 [ 1025, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[B]], align 8
; TF-SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]]
; TF-SCALABLE-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], ptr align 8 [[TMP6]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
; TF-SCALABLE-NEXT: [[TMP7:%.*]] = zext i32 [[TMP5]] to i64
; TF-SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP7]], [[INDEX]]
+; TF-SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]]
; TF-SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1025
; TF-SCALABLE-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
; TF-SCALABLE: [[MIDDLE_BLOCK]]:
@@ -865,7 +869,7 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias
; TF-SCALABLE: [[VECTOR_BODY]]:
; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; TF-SCALABLE-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; TF-SCALABLE-NEXT: [[AVL:%.*]] = sub i64 1025, [[INDEX]]
+; TF-SCALABLE-NEXT: [[AVL:%.*]] = phi i64 [ 1025, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; TF-SCALABLE-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
; TF-SCALABLE-NEXT: [[TMP13:%.*]] = zext i32 [[TMP9]] to i64
; TF-SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP13]]
@@ -876,6 +880,7 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias
; TF-SCALABLE-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[BROADCAST_SPLAT3]], ptr align 8 [[TMP10]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP9]])
; TF-SCALABLE-NEXT: [[TMP11:%.*]] = zext i32 [[TMP9]] to i64
; TF-SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP11]], [[INDEX]]
+; TF-SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]]
; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT2]]
; TF-SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1025
; TF-SCALABLE-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
@@ -1041,7 +1046,7 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc
; TF-SCALABLE: [[VECTOR_BODY]]:
; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; TF-SCALABLE-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; TF-SCALABLE-NEXT: [[AVL:%.*]] = sub i64 1025, [[INDEX]]
+; TF-SCALABLE-NEXT: [[AVL:%.*]] = phi i64 [ 1025, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; TF-SCALABLE-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
; TF-SCALABLE-NEXT: [[TMP11:%.*]] = zext i32 [[TMP9]] to i64
; TF-SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP11]]
@@ -1053,6 +1058,7 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc
; TF-SCALABLE-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[BROADCAST_SPLAT1]], ptr align 8 [[TMP12]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP9]])
; TF-SCALABLE-NEXT: [[TMP14:%.*]] = zext i32 [[TMP9]] to i64
; TF-SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP14]], [[INDEX]]
+; TF-SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP14]]
; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
; TF-SCALABLE-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1025
; TF-SCALABLE-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
@@ -1194,13 +1200,14 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap
; TF-SCALABLE-NEXT: br label %[[VECTOR_BODY:.*]]
; TF-SCALABLE: [[VECTOR_BODY]]:
; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; TF-SCALABLE-NEXT: [[AVL:%.*]] = sub i64 1025, [[INDEX]]
+; TF-SCALABLE-NEXT: [[AVL:%.*]] = phi i64 [ 1025, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[B]], align 1
; TF-SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]]
; TF-SCALABLE-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], ptr align 8 [[TMP6]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
; TF-SCALABLE-NEXT: [[TMP7:%.*]] = zext i32 [[TMP5]] to i64
; TF-SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP7]], [[INDEX]]
+; TF-SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]]
; TF-SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1025
; TF-SCALABLE-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
; TF-SCALABLE: [[MIDDLE_BLOCK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll
index 65fc18af7169a..85116feab6a33 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll
@@ -23,7 +23,7 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) {
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL: vector.body:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[TMP11:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[TMP11:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP11]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP14]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
@@ -34,6 +34,7 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) {
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_OP]], ptr align 4 [[TMP18]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
; IF-EVL-NEXT: [[TMP20:%.*]] = zext i32 [[TMP12]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP20]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP11]], [[TMP20]]
; IF-EVL-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; IF-EVL-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; IF-EVL: middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-call-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-call-intrinsics.ll
index 7f1066c64c88e..c05878995f474 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-call-intrinsics.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-call-intrinsics.ll
@@ -20,7 +20,7 @@ define void @vp_smax(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: vector.body:
; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]>
-; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ]
; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[EVL]]
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
@@ -35,6 +35,7 @@ define void @vp_smax(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[SMAX]]>, vp<[[EVL]]>
; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
; IF-EVL-NEXT: No successors
@@ -76,7 +77,7 @@ define void @vp_smin(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: vector.body:
; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]>
-; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ]
; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[EVL]]>
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
@@ -91,6 +92,7 @@ define void @vp_smin(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[SMIN]]>, vp<[[EVL]]>
; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
; IF-EVL-NEXT: No successors
@@ -132,7 +134,7 @@ define void @vp_umax(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: vector.body:
; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]>
-; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ]
; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[EVL]]>
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
@@ -147,6 +149,7 @@ define void @vp_umax(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[UMAX]]>, vp<[[EVL]]>
; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
; IF-EVL-NEXT: No successors
@@ -188,7 +191,7 @@ define void @vp_umin(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: vector.body:
; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]>
-; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ]
; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[EVL]]
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
@@ -203,6 +206,7 @@ define void @vp_umin(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[UMIN]]>, vp<[[EVL]]>
; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
; IF-EVL-NEXT: No successors
@@ -244,7 +248,7 @@ define void @vp_ctlz(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: vector.body:
; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]>
-; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ]
; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[EVL]]>
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
@@ -256,6 +260,7 @@ define void @vp_ctlz(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[CTLZ]]>, vp<[[EVL]]>
; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
; IF-EVL-NEXT: No successors
@@ -295,7 +300,7 @@ define void @vp_cttz(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: vector.body:
; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]>
-; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ]
; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[EVL]]>
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
@@ -307,6 +312,7 @@ define void @vp_cttz(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[CTTZ]]>, vp<[[EVL]]>
; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
; IF-EVL-NEXT: No successors
@@ -346,7 +352,7 @@ define void @vp_lrint(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: vector.body:
; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]>
-; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ]
; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[EVL]]>
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
@@ -360,6 +366,7 @@ define void @vp_lrint(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[TRUNC]]>, vp<[[EVL]]>
; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
; IF-EVL-NEXT: No successors
@@ -401,7 +408,7 @@ define void @vp_llrint(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: vector.body:
; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]>
-; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ]
; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[EVL]]>
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
@@ -415,6 +422,7 @@ define void @vp_llrint(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[TRUNC]]>, vp<[[EVL]]>
; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
; IF-EVL-NEXT: No successors
@@ -456,7 +464,7 @@ define void @vp_abs(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: vector.body:
; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]>
-; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ]
; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[EVL]]>
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
@@ -468,6 +476,7 @@ define void @vp_abs(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[ABS]]>, vp<[[EVL]]>
; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
; IF-EVL-NEXT: No successors
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
index c1b656aec9517..8d3fe484e6468 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
@@ -19,7 +19,7 @@ define void @vp_sext(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: vector.body:
; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]>
-; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ]
; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[EVL]]>
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
@@ -31,6 +31,7 @@ define void @vp_sext(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[SEXT]]>, vp<[[EVL]]>
; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
; IF-EVL-NEXT: No successors
@@ -72,7 +73,7 @@ define void @vp_zext(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: vector.body:
; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]>
-; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ]
; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[EVL]]>
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
@@ -84,6 +85,7 @@ define void @vp_zext(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[ZEXT]]>, vp<[[EVL]]>
; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
; IF-EVL-NEXT: No successors
@@ -123,7 +125,7 @@ define void @vp_trunc(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: vector.body:
; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]>
-; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ]
; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[EVL]]>
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
@@ -135,6 +137,7 @@ define void @vp_trunc(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[TRUNC]]>, vp<[[EVL]]>
; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
; IF-EVL-NEXT: No successors
@@ -174,7 +177,7 @@ define void @vp_fpext(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: vector.body:
; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]>
-; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ]
; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[EVL]]>
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
@@ -186,6 +189,7 @@ define void @vp_fpext(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[FPEXT]]>, vp<[[EVL]]>
; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
; IF-EVL-NEXT: No successors
@@ -225,7 +229,7 @@ define void @vp_fptrunc(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: vector.body:
; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]>
-; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ]
; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[EVL]]>
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
@@ -237,6 +241,7 @@ define void @vp_fptrunc(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[FPTRUNC]]>, vp<[[EVL]]>
; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
; IF-EVL-NEXT: No successors
@@ -276,7 +281,7 @@ define void @vp_sitofp(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: vector.body:
; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]>
-; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ]
; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[EVL]]>
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
@@ -288,6 +293,7 @@ define void @vp_sitofp(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[SITOFP]]>, vp<[[EVL]]>
; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
; IF-EVL-NEXT: No successors
@@ -327,7 +333,7 @@ define void @vp_uitofp(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: vector.body:
; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]>
-; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ]
; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[EVL]]
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
@@ -339,6 +345,7 @@ define void @vp_uitofp(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[UITOFP]]>, vp<[[EVL]]>
; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
; IF-EVL-NEXT: No successors
@@ -378,7 +385,7 @@ define void @vp_fptosi(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: vector.body:
; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]>
-; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ]
; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[EVL]]>
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
@@ -390,6 +397,7 @@ define void @vp_fptosi(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[FPTOSI]]>, vp<[[EVL]]>
; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
; IF-EVL-NEXT: No successors
@@ -429,7 +437,7 @@ define void @vp_fptoui(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: vector.body:
; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]>
-; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ]
; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[EVL]]>
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
@@ -441,6 +449,7 @@ define void @vp_fptoui(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[FPTOUI]]>, vp<[[EVL]]>
; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
; IF-EVL-NEXT: No successors
@@ -480,7 +489,7 @@ define void @vp_inttoptr(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: vector.body:
; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]>
-; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ]
; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[EVL]]>
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
@@ -492,6 +501,7 @@ define void @vp_inttoptr(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[INTTOPTR]]>, vp<[[EVL]]>
; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
; IF-EVL-NEXT: No successors
@@ -532,7 +542,7 @@ define void @vp_ptrtoint(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: EMIT vp<[[INDEX:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[INDEX_NEXT:%.+]]>
; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[INDEX_EVL:%.+]]> = phi ir<0>, vp<[[INDEX_EVL_NEXT:%.+]]>
; IF-EVL-NEXT: ir<[[IV:%.+]]> = WIDEN-INDUCTION ir<0>, ir<1>, vp<[[EVL]]>
-; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<[[N]]>, vp<[[INDEX_EVL]]>
+; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ]
; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
; IF-EVL-NEXT: vp<[[SCALAR_STEPS:%.+]]> = SCALAR-STEPS vp<[[INDEX_EVL]]>, ir<1>, vp<[[EVL]]>
; IF-EVL-NEXT: WIDEN-GEP Inv[Var] ir<[[GEP:%.+]]> = getelementptr inbounds ir<%b>, ir<[[IV]]>
@@ -542,6 +552,7 @@ define void @vp_ptrtoint(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: WIDEN vp.store vp<[[VECTOR_PTR]]>, ir<[[PTRTOINT]]>, vp<[[EVL]]>
; IF-EVL-NEXT: EMIT-SCALAR vp<[[ZEXT:%.+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[INDEX_EVL_NEXT]]> = add vp<[[ZEXT]]>, vp<[[INDEX_EVL]]>
+; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[ZEXT]]>
; IF-EVL-NEXT: EMIT vp<[[INDEX_NEXT]]> = add vp<[[INDEX]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[INDEX_NEXT]]>, vp<[[VTC]]>
; IF-EVL-NEXT: No successors
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll
index 990060265c452..ab4bb90b55b70 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll
@@ -26,8 +26,8 @@ define void @first_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) {
; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]>
; IF-EVL-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<[[FOR_PHI:%.+]]> = phi ir<33>, ir<[[LD:%.+]]>
+; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%TC>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ]
; IF-EVL-NEXT: EMIT-SCALAR vp<[[PREV_EVL:%.+]]> = phi [ vp<[[VF32]]>, vector.ph ], [ vp<[[EVL:%.+]]>, vector.body ]
-; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%TC>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds nuw ir<%A>, vp<[[ST]]
@@ -40,6 +40,7 @@ define void @first_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) {
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[ADD]]>, vp<[[EVL]]>
; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
; IF-EVL-NEXT: No successors
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll
index 1c9554da688cc..dff4971ffdfa1 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll
@@ -40,7 +40,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
; IF-EVL-OUTLOOP-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
; IF-EVL-OUTLOOP-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]>
; IF-EVL-OUTLOOP-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX_PHI:%.+]]> = phi vp<[[RDX_START]]>, vp<[[RDX_SELECT:%.+]]>
-; IF-EVL-OUTLOOP-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%n>, vp<[[EVL_PHI]]>
+; IF-EVL-OUTLOOP-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%n>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ]
; IF-EVL-OUTLOOP-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
; IF-EVL-OUTLOOP-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[EVL]]>
; IF-EVL-OUTLOOP-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
@@ -50,6 +50,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
; IF-EVL-OUTLOOP-NEXT: WIDEN-INTRINSIC vp<[[RDX_SELECT]]> = call llvm.vp.merge(ir<true>, ir<[[ADD]]>, ir<[[RDX_PHI]]>, vp<[[EVL]]>)
; IF-EVL-OUTLOOP-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-OUTLOOP-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
+; IF-EVL-OUTLOOP-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]>
; IF-EVL-OUTLOOP-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-OUTLOOP-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
; IF-EVL-OUTLOOP-NEXT: No successors
@@ -79,7 +80,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
; IF-EVL-INLOOP-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
; IF-EVL-INLOOP-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]>
; IF-EVL-INLOOP-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX_PHI:%.+]]> = phi vp<[[RDX_START]]>, ir<[[RDX_NEXT:%.+]]>
-; IF-EVL-INLOOP-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%n>, vp<[[EVL_PHI]]>
+; IF-EVL-INLOOP-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%n>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ]
; IF-EVL-INLOOP-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
; IF-EVL-INLOOP-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[EVL]]>
; IF-EVL-INLOOP-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
@@ -88,6 +89,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
; IF-EVL-INLOOP-NEXT: REDUCE ir<[[ADD:%.+]]> = ir<[[RDX_PHI]]> + vp.reduce.add (ir<[[LD1]]>, vp<[[EVL]]>)
; IF-EVL-INLOOP-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-INLOOP-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
+; IF-EVL-INLOOP-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]>
; IF-EVL-INLOOP-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-INLOOP-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
; IF-EVL-INLOOP-NEXT: No successors
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll
index 42a846a52261c..b3a611eac72fc 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll
@@ -24,7 +24,7 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) {
; IF-EVL-NEXT: vector.body:
; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]>
-; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ]
; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[EVL]]>
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
@@ -39,6 +39,7 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) {
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[ADD]]>, vp<[[EVL]]>
; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
+; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
; IF-EVL-NEXT: No successors
More information about the llvm-commits
mailing list