[llvm] [VPlan] Replace EVL branch condition with (branch-on-count AVLNext, 0) (PR #152167)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 5 08:58:29 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
@llvm/pr-subscribers-llvm-transforms
Author: Luke Lau (lukel97)
<details>
<summary>Changes</summary>
This changes the branch condition to use the AVL's backedge value instead of the EVL-based IV.
This allows us to emit bnez on RISC-V and removes a use of the trip count, which should reduce register pressure.
To help match the AVL's backedge value I've added some new pattern matchers.
For the m_Phi matcher it's variadic in the number of operands it accepts, so I had to add a new template argument to Recipe_match to relax the assertion that the number of operands must exactly match the template operand types.
For m_Sub I've used it in a couple of other places that were also pattern matching on subs. Happy to split this out if reviewers prefer.
Fixes #<!-- -->151459
---
Patch is 111.07 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/152167.diff
32 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h (+53-14)
- (modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+1-2)
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+30-7)
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.h (+4-3)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll (+5-5)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll (+7-7)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-bin-unary-ops-args.ll (+36-36)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-call-intrinsics.ll (+18-18)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll (+22-22)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll (+12-12)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll (+8-8)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll (+7-7)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-gather-scatter.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll (+14-14)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-intermediate-store.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-iv32.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-known-no-overflow.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-masked-loadstore.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-ordered-reduction.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll (+28-28)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-safe-dep-distance.ll (+5-5)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-uniform-store.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/type-info-cache-evl-crash.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll (+7-7)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll (+2-2)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index d133610ef4f75..ed8f33d23f038 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -195,7 +195,7 @@ inline bind_ty<VPValue> m_VPValue(VPValue *&V) { return V; }
/// Match a VPInstruction, capturing if we match.
inline bind_ty<VPInstruction> m_VPInstruction(VPInstruction *&V) { return V; }
-template <typename Ops_t, unsigned Opcode, bool Commutative,
+template <typename Ops_t, unsigned Opcode, bool Commutative, bool Variadic,
typename... RecipeTys>
struct Recipe_match {
Ops_t Ops;
@@ -231,9 +231,12 @@ struct Recipe_match {
if ((!matchRecipeAndOpcode<RecipeTys>(R) && ...))
return false;
- assert(R->getNumOperands() == std::tuple_size<Ops_t>::value &&
- "recipe with matched opcode does not have the expected number of "
- "operands");
+ if (R->getNumOperands() != std::tuple_size<Ops_t>::value) {
+ assert(Variadic && "non-variadic recipe with matched opcode does not "
+ "have the expected number of "
+ "operands");
+ return false;
+ }
auto IdxSeq = std::make_index_sequence<std::tuple_size<Ops_t>::value>();
if (all_of_tuple_elements(IdxSeq, [R](auto Op, unsigned Idx) {
@@ -256,7 +259,9 @@ struct Recipe_match {
std::is_same<RecipeTy, VPCanonicalIVPHIRecipe>::value ||
std::is_same<RecipeTy, VPWidenSelectRecipe>::value ||
std::is_same<RecipeTy, VPDerivedIVRecipe>::value ||
- std::is_same<RecipeTy, VPWidenGEPRecipe>::value)
+ std::is_same<RecipeTy, VPWidenGEPRecipe>::value ||
+ std::is_same<RecipeTy, VPWidenPHIRecipe>::value ||
+ std::is_same<RecipeTy, VPHeaderPHIRecipe>::value)
return DefR;
else
return DefR && DefR->getOpcode() == Opcode;
@@ -272,11 +277,11 @@ struct Recipe_match {
template <unsigned Opcode, typename... RecipeTys>
using ZeroOpRecipe_match =
- Recipe_match<std::tuple<>, Opcode, false, RecipeTys...>;
+ Recipe_match<std::tuple<>, Opcode, false, false, RecipeTys...>;
template <typename Op0_t, unsigned Opcode, typename... RecipeTys>
using UnaryRecipe_match =
- Recipe_match<std::tuple<Op0_t>, Opcode, false, RecipeTys...>;
+ Recipe_match<std::tuple<Op0_t>, Opcode, false, false, RecipeTys...>;
template <typename Op0_t, unsigned Opcode>
using UnaryVPInstruction_match =
@@ -293,7 +298,8 @@ using AllUnaryRecipe_match =
template <typename Op0_t, typename Op1_t, unsigned Opcode, bool Commutative,
typename... RecipeTys>
using BinaryRecipe_match =
- Recipe_match<std::tuple<Op0_t, Op1_t>, Opcode, Commutative, RecipeTys...>;
+ Recipe_match<std::tuple<Op0_t, Op1_t>, Opcode, Commutative,
+ /*Variadic*/ false, RecipeTys...>;
template <typename Op0_t, typename Op1_t, unsigned Opcode>
using BinaryVPInstruction_match =
@@ -302,8 +308,9 @@ using BinaryVPInstruction_match =
template <typename Op0_t, typename Op1_t, typename Op2_t, unsigned Opcode,
bool Commutative, typename... RecipeTys>
-using TernaryRecipe_match = Recipe_match<std::tuple<Op0_t, Op1_t, Op2_t>,
- Opcode, Commutative, RecipeTys...>;
+using TernaryRecipe_match =
+ Recipe_match<std::tuple<Op0_t, Op1_t, Op2_t>, Opcode, Commutative,
+ /*Variadic*/ false, RecipeTys...>;
template <typename Op0_t, typename Op1_t, typename Op2_t, unsigned Opcode>
using TernaryVPInstruction_match =
@@ -343,8 +350,9 @@ m_VPInstruction(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2) {
template <typename Op0_t, typename Op1_t, typename Op2_t, typename Op3_t,
unsigned Opcode, bool Commutative, typename... RecipeTys>
-using Recipe4Op_match = Recipe_match<std::tuple<Op0_t, Op1_t, Op2_t, Op3_t>,
- Opcode, Commutative, RecipeTys...>;
+using Recipe4Op_match =
+ Recipe_match<std::tuple<Op0_t, Op1_t, Op2_t, Op3_t>, Opcode, Commutative,
+ /*Variadic*/ false, RecipeTys...>;
template <typename Op0_t, typename Op1_t, typename Op2_t, typename Op3_t,
unsigned Opcode>
@@ -378,6 +386,12 @@ m_Broadcast(const Op0_t &Op0) {
return m_VPInstruction<VPInstruction::Broadcast>(Op0);
}
+template <typename Op0_t>
+inline UnaryVPInstruction_match<Op0_t, VPInstruction::ExplicitVectorLength>
+m_ExplicitVectorLength(const Op0_t &Op0) {
+ return m_VPInstruction<VPInstruction::ExplicitVectorLength>(Op0);
+}
+
template <typename Op0_t, typename Op1_t>
inline BinaryVPInstruction_match<Op0_t, Op1_t, VPInstruction::ActiveLaneMask>
m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1) {
@@ -418,6 +432,12 @@ m_ZExtOrSExt(const Op0_t &Op0) {
return m_CombineOr(m_ZExt(Op0), m_SExt(Op0));
}
+template <typename Op0_t>
+inline match_combine_or<AllUnaryRecipe_match<Op0_t, Instruction::ZExt>, Op0_t>
+m_ZExtOrSelf(const Op0_t &Op0) {
+ return m_CombineOr(m_ZExt(Op0), Op0);
+}
+
template <unsigned Opcode, typename Op0_t, typename Op1_t,
bool Commutative = false>
inline AllBinaryRecipe_match<Op0_t, Op1_t, Opcode, Commutative>
@@ -431,6 +451,12 @@ m_c_Binary(const Op0_t &Op0, const Op1_t &Op1) {
return AllBinaryRecipe_match<Op0_t, Op1_t, Opcode, true>(Op0, Op1);
}
+template <typename Op0_t, typename Op1_t>
+inline AllBinaryRecipe_match<Op0_t, Op1_t, Instruction::Sub>
+m_Sub(const Op0_t &Op0, const Op1_t &Op1) {
+ return m_Binary<Instruction::Sub, Op0_t, Op1_t>(Op0, Op1);
+}
+
template <typename Op0_t, typename Op1_t>
inline AllBinaryRecipe_match<Op0_t, Op1_t, Instruction::Mul>
m_Mul(const Op0_t &Op0, const Op1_t &Op1) {
@@ -476,7 +502,8 @@ inline GEPLikeRecipe_match<Op0_t, Op1_t> m_GetElementPtr(const Op0_t &Op0,
template <typename Op0_t, typename Op1_t, typename Op2_t, unsigned Opcode>
using AllTernaryRecipe_match =
Recipe_match<std::tuple<Op0_t, Op1_t, Op2_t>, Opcode, false,
- VPReplicateRecipe, VPInstruction, VPWidenSelectRecipe>;
+ /*Variadic*/ false, VPReplicateRecipe, VPInstruction,
+ VPWidenSelectRecipe>;
template <typename Op0_t, typename Op1_t, typename Op2_t>
inline AllTernaryRecipe_match<Op0_t, Op1_t, Op2_t, Instruction::Select>
@@ -524,7 +551,8 @@ m_ScalarIVSteps(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2) {
template <typename Op0_t, typename Op1_t, typename Op2_t>
using VPDerivedIV_match =
- Recipe_match<std::tuple<Op0_t, Op1_t, Op2_t>, 0, false, VPDerivedIVRecipe>;
+ Recipe_match<std::tuple<Op0_t, Op1_t, Op2_t>, 0, false, /*Variadic*/ false,
+ VPDerivedIVRecipe>;
template <typename Op0_t, typename Op1_t, typename Op2_t>
inline VPDerivedIV_match<Op0_t, Op1_t, Op2_t>
@@ -532,6 +560,17 @@ m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2) {
return VPDerivedIV_match<Op0_t, Op1_t, Op2_t>({Op0, Op1, Op2});
}
+template <typename... OpTys>
+using PhiLikeRecipe_match =
+ Recipe_match<std::tuple<OpTys...>, Instruction::PHI, false, true,
+ VPWidenPHIRecipe, VPHeaderPHIRecipe, VPInstruction>;
+
+template <typename Op0_t, typename Op1_t, typename... OpTys>
+inline PhiLikeRecipe_match<Op0_t, Op1_t, OpTys...>
+m_Phi(const Op0_t &Op0, const Op1_t &Op1, const OpTys &...Ops) {
+ return PhiLikeRecipe_match<Op0_t, Op1_t, OpTys...>(Op0, Op1, Ops...);
+}
+
/// Match a call argument at a given argument index.
template <typename Opnd_t> struct Argument_match {
/// Call argument index to match.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 47a807794eb3d..f64ca6c6a449d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -328,8 +328,7 @@ VPPartialReductionRecipe::computeCost(ElementCount VF,
// Pick out opcode, type/ext information and use sub side effects from a widen
// recipe.
auto HandleWiden = [&](VPWidenRecipe *Widen) {
- if (match(Widen,
- m_Binary<Instruction::Sub>(m_SpecificInt(0), m_VPValue(Op)))) {
+ if (match(Widen, m_Sub(m_SpecificInt(0), m_VPValue(Op)))) {
Widen = dyn_cast<VPWidenRecipe>(Op->getDefiningRecipe());
}
Opcode = Widen->getOpcode();
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index a7965a053e6e3..ace5b35522516 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -740,8 +740,7 @@ static VPWidenInductionRecipe *getOptimizableIVOf(VPValue *VPV) {
// IVStep will be the negated step of the subtraction. Check if Step == -1
// * IVStep.
VPValue *Step;
- if (!match(VPV,
- m_Binary<Instruction::Sub>(m_VPValue(), m_VPValue(Step))) ||
+ if (!match(VPV, m_Sub(m_VPValue(), m_VPValue(Step))) ||
!Step->isLiveIn() || !IVStep->isLiveIn())
return false;
auto *StepCI = dyn_cast<ConstantInt>(Step->getLiveInIRValue());
@@ -2386,19 +2385,38 @@ void VPlanTransforms::canonicalizeEVLLoops(VPlan &Plan) {
// Find EVL loop entries by locating VPEVLBasedIVPHIRecipe.
// There should be only one EVL PHI in the entire plan.
VPEVLBasedIVPHIRecipe *EVLPhi = nullptr;
+ VPValue *AVLNext = nullptr;
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_shallow(Plan.getEntry())))
- for (VPRecipeBase &R : VPBB->phis())
- if (auto *PhiR = dyn_cast<VPEVLBasedIVPHIRecipe>(&R)) {
+ for (VPRecipeBase &R : VPBB->phis()) {
+ auto *PhiR = dyn_cast<VPSingleDefRecipe>(&R);
+ if (!PhiR)
+ continue;
+ VPValue *Backedge;
+ if (auto *EVL = dyn_cast<VPEVLBasedIVPHIRecipe>(PhiR)) {
assert(!EVLPhi && "Found multiple EVL PHIs. Only one expected");
- EVLPhi = PhiR;
+ EVLPhi = EVL;
+ continue;
}
+ if (match(PhiR,
+ m_Phi(m_Specific(Plan.getTripCount()), m_VPValue(Backedge))) &&
+ match(Backedge, m_Sub(m_Specific(PhiR),
+ m_ZExtOrSelf(m_ExplicitVectorLength(m_CombineOr(
+ m_Specific(PhiR),
+ // The AVL may be capped to a safe distance.
+ m_Select(m_VPValue(), m_Specific(PhiR),
+ m_VPValue()))))))) {
+ AVLNext = Backedge;
+ }
+ }
// Early return if no EVL PHI is found.
if (!EVLPhi)
return;
+ assert(AVLNext && "Didn't find AVL backedge?");
+
VPBasicBlock *HeaderVPBB = EVLPhi->getParent();
VPValue *EVLIncrement = EVLPhi->getBackedgeValue();
@@ -2425,7 +2443,7 @@ void VPlanTransforms::canonicalizeEVLLoops(VPlan &Plan) {
// Replace the use of VectorTripCount in the latch-exiting block.
// Before: (branch-on-count EVLIVInc, VectorTripCount)
- // After: (branch-on-count EVLIVInc, TripCount)
+ // After: (branch-on-count AVLNext, 0)
VPBasicBlock *LatchExiting =
HeaderVPBB->getPredecessors()[1]->getEntryBasicBlock();
@@ -2438,7 +2456,12 @@ void VPlanTransforms::canonicalizeEVLLoops(VPlan &Plan) {
m_BranchOnCount(m_VPValue(EVLIncrement),
m_Specific(&Plan.getVectorTripCount()))) &&
"Unexpected terminator in EVL loop");
- LatchExitingBr->setOperand(1, Plan.getTripCount());
+
+ Type *AVLTy = VPTypeAnalysis(Plan).inferScalarType(AVLNext);
+
+ LatchExitingBr->setOperand(0, AVLNext);
+ LatchExitingBr->setOperand(
+ 1, Plan.getOrAddLiveIn(ConstantInt::getNullValue(AVLTy)));
}
void VPlanTransforms::dropPoisonGeneratingRecipes(
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 5943684e17a76..f42850a719ed2 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -216,9 +216,10 @@ struct VPlanTransforms {
/// variable vector lengths instead of fixed lengths. This transformation:
/// * Makes EVL-Phi concrete.
// * Removes CanonicalIV and increment.
- /// * Replaces fixed-length stepping (branch-on-cond CanonicalIVInc,
- /// VectorTripCount) with variable-length stepping (branch-on-cond
- /// EVLIVInc, TripCount).
+ /// * Replaces the exit condition from
+ /// (branch-on-cond CanonicalIVInc, VectorTripCount)
+ /// to
+ /// (branch-on-cond AVLNext, 0)
static void canonicalizeEVLLoops(VPlan &Plan);
/// Lower abstract recipes to concrete ones, that can be codegen'd. Use \p
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll b/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll
index 5f13089ff17fd..3f2c4d9f94a07 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll
@@ -37,8 +37,8 @@ define void @test_wide_integer_induction(ptr noalias %a, i64 %N) {
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]]
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
+; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]]
; CHECK: scalar.ph:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll
index 6e2434aefce9d..d31ea53cde213 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll
@@ -141,7 +141,7 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) {
; IF-EVL-OUTLOOP-NEXT: [[TMP10]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[VP_OP]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP5]])
; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP5]], [[EVL_BASED_IV]]
; IF-EVL-OUTLOOP-NEXT: [[AVL_NEXT]] = sub nuw i32 [[AVL]], [[TMP5]]
-; IF-EVL-OUTLOOP-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_EVL_NEXT]], [[N]]
+; IF-EVL-OUTLOOP-NEXT: [[TMP11:%.*]] = icmp eq i32 [[AVL_NEXT]], 0
; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; IF-EVL-OUTLOOP: middle.block:
; IF-EVL-OUTLOOP-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP10]])
@@ -195,7 +195,7 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) {
; IF-EVL-INLOOP-NEXT: [[TMP11]] = add i32 [[TMP10]], [[VEC_PHI]]
; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP6]], [[EVL_BASED_IV]]
; IF-EVL-INLOOP-NEXT: [[AVL_NEXT]] = sub nuw i32 [[TMP5]], [[TMP6]]
-; IF-EVL-INLOOP-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_EVL_NEXT]], [[N]]
+; IF-EVL-INLOOP-NEXT: [[TMP12:%.*]] = icmp eq i32 [[AVL_NEXT]], 0
; IF-EVL-INLOOP-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; IF-EVL-INLOOP: middle.block:
; IF-EVL-INLOOP-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
@@ -362,8 +362,8 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) {
; IF-EVL-OUTLOOP-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64
; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
; IF-EVL-OUTLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]]
-; IF-EVL-OUTLOOP-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
-; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; IF-EVL-OUTLOOP-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
+; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; IF-EVL-OUTLOOP: middle.block:
; IF-EVL-OUTLOOP-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> [[TMP15]])
; IF-EVL-OUTLOOP-NEXT: br label [[FOR_END:%.*]]
@@ -410,7 +410,7 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) {
; IF-EVL-INLOOP-NEXT: [[TMP14:%.*]] = zext i32 [[TMP9]] to i64
; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP14]], [[EVL_BASED_IV]]
; IF-EVL-INLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP14]]
-; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
+; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
; IF-EVL-INLOOP-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; IF-EVL-INLOOP: middle.block:
; IF-EVL-INLOOP-NEXT: br label [[FOR_END:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll
index 976ce77d2ba29..73d1b9c307ff4 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll
@@ -143,7 +143,7 @@ define void @masked_strided_factor2(ptr noalias nocapture readonly %p, ptr noali
; PREDICATED_DATA-WITH-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP1]], [[EVL_BASED_IV]]
; PREDICATED_DATA-WITH-EVL-NEXT: [[AVL_NEXT]] = sub nuw i32 [[AVL]], [[TMP1]]
; PREDICATED_DATA-WITH-EVL-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]]
-; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_EVL_NEXT]], 1024
+; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP15:%.*]] = icmp eq i32 [[AVL_NEXT]], 0
; PREDICATED_DATA-WITH-EVL-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; PREDICATED_DATA-WITH-EVL: middle.block:
; PREDICATED_DATA-WITH-EVL-NEXT: br label [[FOR_END:%.*]]
@@ -334,7 +334,7 @@ define void @masked_strided_factor4(ptr noalias nocapture readonly %p, ptr noali
; PREDICATED_DATA-WITH-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP1]], [[EVL_BASED_IV]]
; PREDICATED_DATA-WITH-EVL-NEXT: [[AVL_NEXT]] = sub nuw i32 [[AVL]], [[TMP1]]
; PREDICATED_DATA-WITH-EVL-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]]
-; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP19:%.*]] = icmp eq i32 [[INDEX_EVL_NEXT]], 1024
+; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP19:%.*]] = icmp eq i32 [[AVL_NEXT]], 0
; PREDICATED_DATA-WITH-EVL-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; PREDICATED_DATA-WITH-EVL: middle.block:
; PREDICATED_DATA-WITH-EVL-NEXT: br label [[FOR_END:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll
index 01df43618aad0..b4a164603b353 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll
@@ -50,8 +50,8 @@ define void @test(ptr %p, i64 %a, i8 %b) {
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP11]], [[EVL_BASED_IV]]
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i32 [[AVL]], [[TMP11]]
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT8]]
-; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[INDEX_EVL_NEXT]], 9
-; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_COND]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i32 [[AVL_NEXT]], 0
+; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_COND]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[EXIT1:%.*]]
; CHECK: scalar.ph:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll
index ed507961ef825....
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/152167
More information about the llvm-commits
mailing list