[llvm] [VPlan] Replace UnrollPart for VPScalarIVSteps with start index op (NFC) (PR #170906)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 19 05:20:48 PST 2026
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/170906
>From 395377be8445531f536aeb3274f1b12b1733666d Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Fri, 5 Dec 2025 16:56:15 +0000
Subject: [PATCH 1/3] [VPlan] Replace UnrollPart for VPScalarIVSteps with start
index op (NFC)
Replace the unroll part operand for VPScalarIVStepsRecipe with the start
index. This simplifies https://github.com/llvm/llvm-project/pull/170053
and is also a first step to break down the recipe into its components.
---
.../Vectorize/LoopVectorizationPlanner.h | 11 +++++
llvm/lib/Transforms/Vectorize/VPlan.h | 10 ++---
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 20 ++-------
.../Transforms/Vectorize/VPlanTransforms.cpp | 4 +-
llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 41 ++++++++++++++++---
5 files changed, 57 insertions(+), 29 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 430d9469c7698..26d4e80749a91 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -322,6 +322,17 @@ class VPBuilder {
return createScalarCast(CastOp, Op, ResultTy, DL);
}
+ VPValue *createScalarSExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy,
+ DebugLoc DL) {
+ if (ResultTy == SrcTy)
+ return Op;
+ Instruction::CastOps CastOp =
+ ResultTy->getScalarSizeInBits() < SrcTy->getScalarSizeInBits()
+ ? Instruction::Trunc
+ : Instruction::SExt;
+ return createScalarCast(CastOp, Op, ResultTy, DL);
+ }
+
VPWidenCastRecipe *createWidenCast(Instruction::CastOps Opcode, VPValue *Op,
Type *ResultTy) {
VPIRFlags Flags;
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index e2dfc4678c6d0..46261f38af2c5 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -3805,9 +3805,9 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe {
};
/// A recipe for handling phi nodes of integer and floating-point inductions,
-/// producing their scalar values.
-class LLVM_ABI_FOR_TEST VPScalarIVStepsRecipe : public VPRecipeWithIRFlags,
- public VPUnrollPartAccessor<3> {
+/// producing their scalar values. Before unrolling the recipe has 3 operands:
+/// IV, step and VF. Unrolling adds an extra operand StartIndex.
+class LLVM_ABI_FOR_TEST VPScalarIVStepsRecipe : public VPRecipeWithIRFlags {
Instruction::BinaryOps InductionOpcode;
public:
@@ -3837,10 +3837,6 @@ class LLVM_ABI_FOR_TEST VPScalarIVStepsRecipe : public VPRecipeWithIRFlags,
getDebugLoc());
}
- /// Return true if this VPScalarIVStepsRecipe corresponds to part 0. Note that
- /// this is only accurate after the VPlan has been unrolled.
- bool isPart0() const { return getUnrollPart(*this) == 0; }
-
VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC)
/// Generate the scalarized versions of the phi node as needed by their users.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 991e32a24fe2d..13133e225ac3e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2407,8 +2407,6 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
// iteration.
bool FirstLaneOnly = vputils::onlyFirstLaneUsed(this);
// Compute the scalar steps and save the results in State.
- Type *IntStepTy =
- IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());
unsigned StartLane = 0;
unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();
@@ -2417,20 +2415,10 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
EndLane = StartLane + 1;
}
Value *StartIdx0;
- if (getUnrollPart(*this) == 0)
- StartIdx0 = ConstantInt::get(IntStepTy, 0);
- else {
- StartIdx0 = State.get(getOperand(2), true);
- if (getUnrollPart(*this) != 1) {
- StartIdx0 =
- Builder.CreateMul(StartIdx0, ConstantInt::get(StartIdx0->getType(),
- getUnrollPart(*this)));
- }
- StartIdx0 = Builder.CreateSExtOrTrunc(StartIdx0, IntStepTy);
- }
-
- if (BaseIVTy->isFloatingPointTy())
- StartIdx0 = Builder.CreateSIToFP(StartIdx0, BaseIVTy);
+ if (getNumOperands() == 3) {
+ StartIdx0 = getSignedIntOrFpConstant(BaseIVTy, 0);
+ } else
+ StartIdx0 = State.get(getOperand(3), true);
for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) {
Value *StartIdx = Builder.CreateBinOp(
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 8f0cf7d5beeed..d157e9d9ca14b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1526,7 +1526,9 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) {
// VPScalarIVSteps for part 0 can be replaced by their start value, if only
// the first lane is demanded.
if (auto *Steps = dyn_cast<VPScalarIVStepsRecipe>(Def)) {
- if (Steps->isPart0() && vputils::onlyFirstLaneUsed(Steps)) {
+ if ((Steps->getNumOperands() == 3 ||
+ match(Steps->getOperand(3), m_ZeroInt())) &&
+ vputils::onlyFirstLaneUsed(Steps)) {
Steps->replaceAllUsesWith(Steps->getOperand(0));
return;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index 8198945764936..965b02a07c40b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -53,6 +53,9 @@ class UnrollState {
/// Unroll replicate region \p VPR by cloning the region UF - 1 times.
void unrollReplicateRegionByUF(VPRegionBlock *VPR);
+ /// Add a start index operand to \p Steps for \p Part.
+ void addStartIndexForScalarSteps(VPScalarIVStepsRecipe *Steps, unsigned Part);
+
/// Unroll recipe \p R by cloning it UF - 1 times, unless it is uniform across
/// all parts.
void unrollRecipeByUF(VPRecipeBase &R);
@@ -123,6 +126,33 @@ class UnrollState {
};
} // namespace
+void UnrollState::addStartIndexForScalarSteps(VPScalarIVStepsRecipe *Steps,
+ unsigned Part) {
+ if (Part == 0) {
+ Steps->addOperand(getConstantInt(Part));
+ return;
+ }
+
+ VPBuilder Builder(Steps);
+ Type *BaseIVTy = TypeInfo.inferScalarType(Steps->getOperand(0));
+ Type *IntStepTy =
+ IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());
+ VPValue *StartIdx0 = Steps->getOperand(2);
+ StartIdx0 = Builder.createOverflowingOp(
+ Instruction::Mul,
+ {StartIdx0,
+ Plan.getConstantInt(TypeInfo.inferScalarType(StartIdx0), Part)});
+ StartIdx0 = Builder.createScalarSExtOrTrunc(
+ StartIdx0, IntStepTy, TypeInfo.inferScalarType(StartIdx0),
+ DebugLoc::getUnknown());
+
+ if (BaseIVTy->isFloatingPointTy())
+ StartIdx0 = Builder.createScalarCast(Instruction::SIToFP, StartIdx0,
+ BaseIVTy, DebugLoc::getUnknown());
+
+ Steps->addOperand(StartIdx0);
+}
+
void UnrollState::unrollReplicateRegionByUF(VPRegionBlock *VPR) {
VPBlockBase *InsertPt = VPR->getSingleSuccessor();
for (unsigned Part = 1; Part != UF; ++Part) {
@@ -136,9 +166,8 @@ void UnrollState::unrollReplicateRegionByUF(VPRegionBlock *VPR) {
VPBlockUtils::blocksOnly<VPBasicBlock>(Part0))) {
for (const auto &[PartIR, Part0R] : zip(*PartIVPBB, *Part0VPBB)) {
remapOperands(&PartIR, Part);
- if (auto *ScalarIVSteps = dyn_cast<VPScalarIVStepsRecipe>(&PartIR)) {
- ScalarIVSteps->addOperand(getConstantInt(Part));
- }
+ if (auto *Steps = dyn_cast<VPScalarIVStepsRecipe>(&PartIR))
+ addStartIndexForScalarSteps(Steps, Part);
addRecipeForPart(&Part0R, &PartIR, Part);
}
@@ -327,10 +356,12 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
}
remapOperands(Copy, Part);
+ if (auto *ScalarIVSteps = dyn_cast<VPScalarIVStepsRecipe>(Copy))
+ addStartIndexForScalarSteps(ScalarIVSteps, Part);
+
// Add operand indicating the part to generate code for, to recipes still
// requiring it.
- if (isa<VPScalarIVStepsRecipe, VPWidenCanonicalIVRecipe,
- VPVectorEndPointerRecipe>(Copy) ||
+ if (isa<VPWidenCanonicalIVRecipe, VPVectorEndPointerRecipe>(Copy) ||
match(Copy,
m_VPInstruction<VPInstruction::CanonicalIVIncrementForPart>()))
Copy->addOperand(getConstantInt(Part));
>From f764e0dcef5fcd6644eae374adcab8945e2e21c2 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 8 Jan 2026 22:20:48 +0000
Subject: [PATCH 2/3] !fixup address comments, thanks
---
llvm/lib/Transforms/Vectorize/VPlan.h | 2 ++
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 4 ++--
llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 22 +++++++++----------
3 files changed, 14 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index f208cf82d8e03..0f5c83e1cb992 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -3812,6 +3812,8 @@ class LLVM_ABI_FOR_TEST VPScalarIVStepsRecipe : public VPRecipeWithIRFlags {
VPValue *getStepValue() const { return getOperand(1); }
+ VPValue *getVFValue() const { return getOperand(2); }
+
/// Returns true if the recipe only uses the first lane of operand \p Op.
bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index ac7e61805c18d..9956379cd6cbb 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2434,9 +2434,9 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
EndLane = StartLane + 1;
}
Value *StartIdx0;
- if (getNumOperands() == 3) {
+ if (getNumOperands() == 3)
StartIdx0 = getSignedIntOrFpConstant(BaseIVTy, 0);
- } else
+ else
StartIdx0 = State.get(getOperand(3), true);
for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index 152c0d5158c6e..c957019e8a45d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -128,29 +128,27 @@ class UnrollState {
void UnrollState::addStartIndexForScalarSteps(VPScalarIVStepsRecipe *Steps,
unsigned Part) {
- if (Part == 0) {
- Steps->addOperand(getConstantInt(Part));
+ if (Part == 0)
return;
- }
VPBuilder Builder(Steps);
Type *BaseIVTy = TypeInfo.inferScalarType(Steps->getOperand(0));
Type *IntStepTy =
IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());
- VPValue *StartIdx0 = Steps->getOperand(2);
- StartIdx0 = Builder.createOverflowingOp(
+ VPValue *StartIndex = Steps->getVFValue();
+ StartIndex = Builder.createOverflowingOp(
Instruction::Mul,
- {StartIdx0,
- Plan.getConstantInt(TypeInfo.inferScalarType(StartIdx0), Part)});
- StartIdx0 = Builder.createScalarSExtOrTrunc(
- StartIdx0, IntStepTy, TypeInfo.inferScalarType(StartIdx0),
+ {StartIndex,
+ Plan.getConstantInt(TypeInfo.inferScalarType(StartIndex), Part)});
+ StartIndex = Builder.createScalarSExtOrTrunc(
+ StartIndex, IntStepTy, TypeInfo.inferScalarType(StartIndex),
DebugLoc::getUnknown());
if (BaseIVTy->isFloatingPointTy())
- StartIdx0 = Builder.createScalarCast(Instruction::SIToFP, StartIdx0,
- BaseIVTy, DebugLoc::getUnknown());
+ StartIndex = Builder.createScalarCast(Instruction::SIToFP, StartIndex,
+ BaseIVTy, DebugLoc::getUnknown());
- Steps->addOperand(StartIdx0);
+ Steps->addOperand(StartIndex);
}
void UnrollState::unrollReplicateRegionByUF(VPRegionBlock *VPR) {
>From 994914348a77797f55cd81c3de1404d8f323306b Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 19 Jan 2026 13:11:42 +0000
Subject: [PATCH 3/3] !fixup address latest comments, thanks
---
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 9 ++-------
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 3 +--
llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 2 ++
.../LoopVectorize/iv-select-cmp-trunc.ll | 15 +++++++++++++++
4 files changed, 20 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 025aab356933f..a1517e95a29b8 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2491,8 +2491,7 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
MulOp = Instruction::FMul;
}
- // Determine the number of scalars we need to generate for each unroll
- // iteration.
+ // Determine the number of scalars we need to generate.
bool FirstLaneOnly = vputils::onlyFirstLaneUsed(this);
// Compute the scalar steps and save the results in State.
@@ -2502,11 +2501,7 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
StartLane = State.Lane->getKnownLane();
EndLane = StartLane + 1;
}
- Value *StartIdx0;
- if (getNumOperands() == 3)
- StartIdx0 = Constant::getNullValue(BaseIVTy);
- else
- StartIdx0 = State.get(getOperand(3), true);
+ Value *StartIdx0 = getNumOperands() == 3 ? Constant::getNullValue(BaseIVTy) : State.get(getOperand(3), true);
for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) {
// It is okay if the induction variable type cannot hold the lane number,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 04b84a53438e3..ea87f8fda2567 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1550,8 +1550,7 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) {
// VPScalarIVSteps for part 0 can be replaced by their start value, if only
// the first lane is demanded.
if (auto *Steps = dyn_cast<VPScalarIVStepsRecipe>(Def)) {
- if ((Steps->getNumOperands() == 3 ||
- match(Steps->getOperand(3), m_ZeroInt())) &&
+ if (Steps->getNumOperands() == 3 &&
vputils::onlyFirstLaneUsed(Steps)) {
Steps->replaceAllUsesWith(Steps->getOperand(0));
return;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index 77db67da64301..f3943405f46b7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -136,10 +136,12 @@ void UnrollState::addStartIndexForScalarSteps(VPScalarIVStepsRecipe *Steps,
Type *IntStepTy =
IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());
VPValue *StartIndex = Steps->getVFValue();
+ if (Part > 1) {
StartIndex = Builder.createOverflowingOp(
Instruction::Mul,
{StartIndex,
Plan.getConstantInt(TypeInfo.inferScalarType(StartIndex), Part)});
+ }
StartIndex = Builder.createScalarSExtOrTrunc(
StartIndex, IntStepTy, TypeInfo.inferScalarType(StartIndex),
DebugLoc::getUnknown());
diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll
index 04ed8ae68db99..ceb06f2e3971e 100644
--- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll
+++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll
@@ -1714,6 +1714,21 @@ define i1 @select_with_trunc_i1_iv(i64 %n, i64 %start) {
; CHECK-VF1IC4-LABEL: define i1 @select_with_trunc_i1_iv(
; CHECK-VF1IC4-SAME: i64 [[N:%.*]], i64 [[START:%.*]]) {
; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]:
+; CHECK-VF1IC4-NEXT: br label %[[LOOP:.*]]
+; CHECK-VF1IC4: [[LOOP]]:
+; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[START]], %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-VF1IC4-NEXT: [[CTR:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[CTR_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-VF1IC4-NEXT: [[ACCUM:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ [[SEL:%.*]], %[[LOOP]] ]
+; CHECK-VF1IC4-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV]], 0
+; CHECK-VF1IC4-NEXT: [[TRUNC:%.*]] = trunc i64 [[CTR]] to i1
+; CHECK-VF1IC4-NEXT: [[SEL]] = select i1 [[CMP]], i1 [[ACCUM]], i1 [[TRUNC]]
+; CHECK-VF1IC4-NEXT: [[CTR_NEXT]] = add i64 [[CTR]], 1
+; CHECK-VF1IC4-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-VF1IC4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[N]], [[CTR]]
+; CHECK-VF1IC4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]]
+; CHECK-VF1IC4: [[EXIT]]:
+; CHECK-VF1IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i1 [ [[SEL]], %[[LOOP]] ]
+; CHECK-VF1IC4-NEXT: ret i1 [[SEL_LCSSA]]
;
entry:
br label %loop
More information about the llvm-commits
mailing list