[llvm] [VPlan] Simplify CanonicalIVIncrement unrolling (PR #185739)
Ramkumar Ramachandra via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 11 02:56:40 PDT 2026
https://github.com/artagnon updated https://github.com/llvm/llvm-project/pull/185739
>From f097a78f1d18bd1899e24044bc06eafd7850baae Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <artagnon at tenstorrent.com>
Date: Wed, 11 Mar 2026 08:45:33 +0000
Subject: [PATCH] [VPlan] Simplify CanonicalIVIncrement unrolling
The mechanics of unrolling CanonicalIVIncrement can be made identical to
VectorPointerRecipe, simplifying some code, and fixing some no-wrap
flags along the way.
---
llvm/lib/Transforms/Vectorize/VPlan.h | 5 ++-
.../Transforms/Vectorize/VPlanPatternMatch.h | 10 ++++++
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 2 ++
.../Transforms/Vectorize/VPlanTransforms.cpp | 36 ++++++++++---------
llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 31 +++++-----------
.../AArch64/scalable-strict-fadd.ll | 12 +++----
.../AArch64/sve-tail-folding-unroll.ll | 8 ++---
.../VPlan/AArch64/sve-tail-folding-forced.ll | 2 +-
.../VPlan/AArch64/vplan-printing.ll | 2 +-
9 files changed, 57 insertions(+), 51 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index da2f6f8c7cd03..c18b13e39f847 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1209,7 +1209,10 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
ActiveLaneMask,
ExplicitVectorLength,
CalculateTripCountMinusVF,
- // Increment the canonical IV separately for each unrolled part.
+ // Increment the canonical IV separately for each unrolled part. Unrolling
+ // adds an extra offset operand for unrolled parts > 0 and it produces `add
+ // Start, Offset`, where Start is the first operand. The offset for unrolled
+ // part 0 is 0.
CanonicalIVIncrementForPart,
// Abstract instruction that compares two values and branches. This is
// lowered to ICmp + BranchOnCond during VPlan to VPlan transformation.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index 1205f04fb5c29..614866a42fbd5 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -866,6 +866,16 @@ inline auto m_c_LogicalOr(const Op0_t &Op0, const Op1_t &Op1) {
return m_c_Select(Op0, m_True(), Op1);
}
+inline auto m_CanonicalIVIncrement() {
+ return VPInstruction_match<VPInstruction::CanonicalIVIncrementForPart>();
+}
+
+template <typename Op0_t, typename Op1_t>
+inline auto m_CanonicalIVIncrement(const Op0_t &Op0, const Op1_t &Op1) {
+ return VPInstruction_match<VPInstruction::CanonicalIVIncrementForPart, Op0_t,
+ Op1_t>(Op0, Op1);
+}
+
inline auto m_CanonicalIV() { return class_match<VPCanonicalIVPHIRecipe>(); }
template <typename Op0_t, typename Op1_t, typename Op2_t>
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index d149723e11fb6..f72999daf1fcd 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -656,6 +656,8 @@ Value *VPInstruction::generate(VPTransformState &State) {
return EVL;
}
case VPInstruction::CanonicalIVIncrementForPart: {
+ assert(getNumOperands() == 2 &&
+ "Expected prior simplification of recipe without offset");
auto *IV = State.get(getOperand(0), VPLane(0));
auto *VFxPart = State.get(getOperand(1), VPLane(0));
// The canonical IV is incremented by the vectorization factor (num of
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 33e5b25bd9322..bddf7cd97dcd7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1627,6 +1627,11 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) {
if (!VPR->getOffset() || match(VPR->getOffset(), m_ZeroInt()))
return VPR->replaceAllUsesWith(VPR->getOperand(0));
+ // Similarly, simplify unrolled CanonicalIVIncrement.
+ if (match(Def, m_CanonicalIVIncrement()))
+ if (Def->getNumOperands() == 1 || match(Def->getOperand(1), m_ZeroInt()))
+ return Def->replaceAllUsesWith(Def->getOperand(0));
+
// VPScalarIVSteps after unrolling can be replaced by their start value, if
// the start index is zero and only the first lane 0 is demanded.
if (auto *Steps = dyn_cast<VPScalarIVStepsRecipe>(Def)) {
@@ -2065,18 +2070,18 @@ static bool tryToReplaceALMWithWideALM(VPlan &Plan, ElementCount VF,
m_ActiveLaneMask(m_VPValue(Index), m_VPValue(), m_VPValue()));
assert(Index && "Expected index from ActiveLaneMask instruction");
- uint64_t Part;
- if (match(Index,
- m_VPInstruction<VPInstruction::CanonicalIVIncrementForPart>(
- m_VPValue(), m_Mul(m_VPValue(), m_ConstantInt(Part)))))
- Phis[Part] = Phi;
- else {
- // Anything other than a CanonicalIVIncrementForPart is part 0
- assert(!match(
- Index,
- m_VPInstruction<VPInstruction::CanonicalIVIncrementForPart>()));
+ assert(match(Index, m_CanonicalIVIncrement()) &&
+ "Expected Index to be a CanonicalIVIncrement");
+ if (cast<VPInstruction>(Index)->getNumOperands() == 1) {
+ // Part 0 CanonicalIVIncrement.
Phis[0] = Phi;
+ continue;
}
+ uint64_t Part;
+ assert(match(Index, m_CanonicalIVIncrement(
+ m_VPValue(), m_c_Mul(m_Specific(&Plan.getVF()),
+ m_ConstantInt(Part)))));
+ Phis[Part] = Phi;
}
assert(all_of(Phis, [](VPActiveLaneMaskPHIRecipe *Phi) { return Phi; }) &&
@@ -2925,11 +2930,10 @@ addVPLaneMaskPhiAndUpdateExitBranch(VPlan &Plan) {
// Create the ActiveLaneMask instruction using the correct start values.
VPValue *TC = Plan.getTripCount();
- VPValue *VF = &Plan.getVF();
auto *EntryIncrement = Builder.createOverflowingOp(
- VPInstruction::CanonicalIVIncrementForPart, {StartV, VF}, {false, false},
- DL, "index.part.next");
+ VPInstruction::CanonicalIVIncrementForPart, StartV, {false, false}, DL,
+ "index.part.next");
// Create the active lane mask instruction in the VPlan preheader.
VPValue *ALMMultiplier =
@@ -2948,9 +2952,9 @@ addVPLaneMaskPhiAndUpdateExitBranch(VPlan &Plan) {
// original terminator.
VPRecipeBase *OriginalTerminator = EB->getTerminator();
Builder.setInsertPoint(OriginalTerminator);
- auto *InLoopIncrement = Builder.createOverflowingOp(
- VPInstruction::CanonicalIVIncrementForPart,
- {CanonicalIVIncrement, &Plan.getVF()}, {false, false}, DL);
+ auto *InLoopIncrement =
+ Builder.createOverflowingOp(VPInstruction::CanonicalIVIncrementForPart,
+ CanonicalIVIncrement, {false, false}, DL);
auto *ALM = Builder.createNaryOp(VPInstruction::ActiveLaneMask,
{InLoopIncrement, TC, ALMMultiplier}, DL,
"active.lane.mask.next");
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index 2d961808d3bcd..05af7210cdd8e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -351,6 +351,15 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
Copy->addOperand(VFxPart);
continue;
}
+ if (match(&R, m_CanonicalIVIncrement())) {
+ VPBuilder Builder(&R);
+ VPValue *VFxPart = Builder.createOverflowingOp(
+ Instruction::Mul, {&Plan.getVF(), getConstantInt(Part)},
+ {true, true});
+ Copy->setOperand(0, R.getOperand(0));
+ Copy->addOperand(VFxPart);
+ continue;
+ }
if (auto *Red = dyn_cast<VPReductionRecipe>(&R)) {
auto *Phi = dyn_cast<VPReductionPHIRecipe>(R.getOperand(0));
if (Phi && Phi->isOrdered()) {
@@ -380,14 +389,6 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
// requiring it.
if (isa<VPWidenCanonicalIVRecipe>(Copy))
Copy->addOperand(getConstantInt(Part));
-
- if (match(Copy,
- m_VPInstruction<VPInstruction::CanonicalIVIncrementForPart>())) {
- VPBuilder Builder(Copy);
- VPValue *ScaledByPart = Builder.createOverflowingOp(
- Instruction::Mul, {Copy->getOperand(1), getConstantInt(Part)});
- Copy->setOperand(1, ScaledByPart);
- }
}
if (auto *VEPR = dyn_cast<VPVectorEndPointerRecipe>(&R)) {
// Materialize Part0 offset for VectorEndPointer.
@@ -489,20 +490,6 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF) {
assert(UF > 0 && "Unroll factor must be positive");
Plan.setUF(UF);
llvm::scope_exit Cleanup([&Plan, UF]() {
- auto Iter = vp_depth_first_deep(Plan.getEntry());
- // Remove recipes that are redundant after unrolling.
- for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
- for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
- auto *VPI = dyn_cast<VPInstruction>(&R);
- if (VPI &&
- VPI->getOpcode() == VPInstruction::CanonicalIVIncrementForPart &&
- VPI->getOperand(1) == &Plan.getVF()) {
- VPI->replaceAllUsesWith(VPI->getOperand(0));
- VPI->eraseFromParent();
- }
- }
- }
-
Type *TCTy = VPTypeAnalysis(Plan).inferScalarType(Plan.getTripCount());
Plan.getUF().replaceAllUsesWith(Plan.getConstantInt(TCTy, UF));
});
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll
index b897982700400..b33f2c35b85cd 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll
@@ -304,10 +304,10 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 {
; CHECK-ORDERED-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 2
+; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP1]], 1
+; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = mul nuw nsw i64 [[TMP1]], 3
; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP1]]
-; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = shl i64 [[TMP1]], 1
; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP3]]
-; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = mul i64 [[TMP1]], 3
; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT2:%.*]] = add i64 0, [[TMP4]]
; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[N]])
; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY3:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_PART_NEXT]], i64 [[N]])
@@ -1320,10 +1320,10 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 {
; CHECK-ORDERED-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 2
+; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP1]], 1
+; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = mul nuw nsw i64 [[TMP1]], 3
; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP1]]
-; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = shl i64 [[TMP1]], 1
; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP3]]
-; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = mul i64 [[TMP1]], 3
; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT2:%.*]] = add i64 0, [[TMP4]]
; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[N]])
; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY3:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_PART_NEXT]], i64 [[N]])
@@ -1572,10 +1572,10 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 {
; CHECK-ORDERED-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 2
+; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP1]], 1
+; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = mul nuw nsw i64 [[TMP1]], 3
; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP1]]
-; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = shl i64 [[TMP1]], 1
; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP3]]
-; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = mul i64 [[TMP1]], 3
; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT2:%.*]] = add i64 0, [[TMP4]]
; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[N]])
; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY3:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_PART_NEXT]], i64 [[N]])
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll
index 396f9b5a93ddb..6e4655d6fe773 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll
@@ -13,10 +13,10 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 {
; CHECK-NEXT: [[TMP61:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP61]], 2
; CHECK-NEXT: [[TMP62:%.*]] = shl nuw i64 [[TMP1]], 2
+; CHECK-NEXT: [[TMP28:%.*]] = shl nuw nsw i64 [[TMP1]], 1
+; CHECK-NEXT: [[TMP30:%.*]] = mul nuw nsw i64 [[TMP1]], 3
; CHECK-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP1]]
-; CHECK-NEXT: [[TMP28:%.*]] = shl i64 [[TMP1]], 1
; CHECK-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP28]]
-; CHECK-NEXT: [[TMP30:%.*]] = mul i64 [[TMP1]], 3
; CHECK-NEXT: [[INDEX_PART_NEXT2:%.*]] = add i64 0, [[TMP30]]
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]])
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY3:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_PART_NEXT]], i64 [[UMAX]])
@@ -79,10 +79,10 @@ define void @cond_memset(i32 %val, ptr noalias readonly %cond_ptr, ptr noalias %
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP5]], 2
; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP1]], 2
+; CHECK-NEXT: [[TMP28:%.*]] = shl nuw nsw i64 [[TMP1]], 1
+; CHECK-NEXT: [[TMP30:%.*]] = mul nuw nsw i64 [[TMP1]], 3
; CHECK-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP1]]
-; CHECK-NEXT: [[TMP28:%.*]] = shl i64 [[TMP1]], 1
; CHECK-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP28]]
-; CHECK-NEXT: [[TMP30:%.*]] = mul i64 [[TMP1]], 3
; CHECK-NEXT: [[INDEX_PART_NEXT2:%.*]] = add i64 0, [[TMP30]]
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]])
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY3:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_PART_NEXT]], i64 [[UMAX]])
diff --git a/llvm/test/Transforms/LoopVectorize/VPlan/AArch64/sve-tail-folding-forced.ll b/llvm/test/Transforms/LoopVectorize/VPlan/AArch64/sve-tail-folding-forced.ll
index 56f6c430bd574..cdd5eeb8c9887 100644
--- a/llvm/test/Transforms/LoopVectorize/VPlan/AArch64/sve-tail-folding-forced.ll
+++ b/llvm/test/Transforms/LoopVectorize/VPlan/AArch64/sve-tail-folding-forced.ll
@@ -31,7 +31,7 @@ target triple = "aarch64-unknown-linux-gnu"
; VPLANS-NEXT: vp<[[VEC_PTR:%[0-9]+]]> = vector-pointer ir<%gep>
; VPLANS-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<%val>, vp<[[LANEMASK_PHI]]>
; VPLANS-NEXT: EMIT vp<[[INDV_UPDATE:%.+]]> = add vp<[[INDV]]>, vp<[[VFxUF]]>
-; VPLANS-NEXT: EMIT vp<[[INC:%[0-9]+]]> = VF * Part + vp<[[INDV_UPDATE]]>, vp<[[VF]]>
+; VPLANS-NEXT: EMIT vp<[[INC:%[0-9]+]]> = VF * Part + vp<[[INDV_UPDATE]]>
; VPLANS-NEXT: EMIT vp<[[LANEMASK_LOOP]]> = active lane mask vp<[[INC]]>, vp<[[TC]]>
; VPLANS-NEXT: EMIT vp<[[NOT:%[0-9]+]]> = not vp<[[LANEMASK_LOOP]]>
; VPLANS-NEXT: EMIT branch-on-cond vp<[[NOT]]>
diff --git a/llvm/test/Transforms/LoopVectorize/VPlan/AArch64/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/VPlan/AArch64/vplan-printing.ll
index 587531cda95c3..8fd73457db663 100644
--- a/llvm/test/Transforms/LoopVectorize/VPlan/AArch64/vplan-printing.ll
+++ b/llvm/test/Transforms/LoopVectorize/VPlan/AArch64/vplan-printing.ll
@@ -154,7 +154,7 @@ define i32 @print_partial_reduction_predication(ptr %a, ptr %b, i64 %N) "target-
; CHECK-NEXT: WIDEN ir<%load.b> = load vp<[[PTR_B]]>, vp<[[MASK]]>
; CHECK-NEXT: EXPRESSION vp<[[REDUCE]]> = vp<[[MASK]]> + partial.reduce.add (mul (ir<%load.b> zext to i32), (ir<%load.a> zext to i32), <badref>)
; CHECK-NEXT: EMIT vp<%index.next> = add vp<[[CAN_IV]]>, vp<[[VFxUF]]>
-; CHECK-NEXT: EMIT vp<[[PART_IDX:%[0-9]+]]> = VF * Part + vp<%index.next>, vp<[[VF]]>
+; CHECK-NEXT: EMIT vp<[[PART_IDX:%[0-9]+]]> = VF * Part + vp<%index.next>
; CHECK-NEXT: EMIT vp<%active.lane.mask.next> = active lane mask vp<[[PART_IDX]]>, ir<%N>, ir<1>
; CHECK-NEXT: EMIT vp<[[NOT_MASK:%[0-9]+]]> = not vp<%active.lane.mask.next>
; CHECK-NEXT: EMIT branch-on-cond vp<[[NOT_MASK]]>
More information about the llvm-commits
mailing list