[llvm] [VPlan] Replace UnrollPart for VPScalarIVSteps with start index op (NFC) (PR #170906)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 8 14:21:18 PST 2026
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/170906
>From 395377be8445531f536aeb3274f1b12b1733666d Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Fri, 5 Dec 2025 16:56:15 +0000
Subject: [PATCH 1/2] [VPlan] Replace UnrollPart for VPScalarIVSteps with start
index op (NFC)
Replace the unroll part operand for VPScalarIVStepsRecipe with the start
index. This simplifies https://github.com/llvm/llvm-project/pull/170053
and is also a first step to break down the recipe into its components.
---
.../Vectorize/LoopVectorizationPlanner.h | 11 +++++
llvm/lib/Transforms/Vectorize/VPlan.h | 10 ++---
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 20 ++-------
.../Transforms/Vectorize/VPlanTransforms.cpp | 4 +-
llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 41 ++++++++++++++++---
5 files changed, 57 insertions(+), 29 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 430d9469c7698..26d4e80749a91 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -322,6 +322,17 @@ class VPBuilder {
return createScalarCast(CastOp, Op, ResultTy, DL);
}
+ VPValue *createScalarSExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy,
+ DebugLoc DL) {
+ if (ResultTy == SrcTy)
+ return Op;
+ Instruction::CastOps CastOp =
+ ResultTy->getScalarSizeInBits() < SrcTy->getScalarSizeInBits()
+ ? Instruction::Trunc
+ : Instruction::SExt;
+ return createScalarCast(CastOp, Op, ResultTy, DL);
+ }
+
VPWidenCastRecipe *createWidenCast(Instruction::CastOps Opcode, VPValue *Op,
Type *ResultTy) {
VPIRFlags Flags;
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index e2dfc4678c6d0..46261f38af2c5 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -3805,9 +3805,9 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe {
};
/// A recipe for handling phi nodes of integer and floating-point inductions,
-/// producing their scalar values.
-class LLVM_ABI_FOR_TEST VPScalarIVStepsRecipe : public VPRecipeWithIRFlags,
- public VPUnrollPartAccessor<3> {
+/// producing their scalar values. Before unrolling the recipe has 3 operands:
+/// IV, step and VF. Unrolling adds an extra operand StartIndex.
+class LLVM_ABI_FOR_TEST VPScalarIVStepsRecipe : public VPRecipeWithIRFlags {
Instruction::BinaryOps InductionOpcode;
public:
@@ -3837,10 +3837,6 @@ class LLVM_ABI_FOR_TEST VPScalarIVStepsRecipe : public VPRecipeWithIRFlags,
getDebugLoc());
}
- /// Return true if this VPScalarIVStepsRecipe corresponds to part 0. Note that
- /// this is only accurate after the VPlan has been unrolled.
- bool isPart0() const { return getUnrollPart(*this) == 0; }
-
VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC)
/// Generate the scalarized versions of the phi node as needed by their users.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 991e32a24fe2d..13133e225ac3e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2407,8 +2407,6 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
// iteration.
bool FirstLaneOnly = vputils::onlyFirstLaneUsed(this);
// Compute the scalar steps and save the results in State.
- Type *IntStepTy =
- IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());
unsigned StartLane = 0;
unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();
@@ -2417,20 +2415,10 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
EndLane = StartLane + 1;
}
Value *StartIdx0;
- if (getUnrollPart(*this) == 0)
- StartIdx0 = ConstantInt::get(IntStepTy, 0);
- else {
- StartIdx0 = State.get(getOperand(2), true);
- if (getUnrollPart(*this) != 1) {
- StartIdx0 =
- Builder.CreateMul(StartIdx0, ConstantInt::get(StartIdx0->getType(),
- getUnrollPart(*this)));
- }
- StartIdx0 = Builder.CreateSExtOrTrunc(StartIdx0, IntStepTy);
- }
-
- if (BaseIVTy->isFloatingPointTy())
- StartIdx0 = Builder.CreateSIToFP(StartIdx0, BaseIVTy);
+ if (getNumOperands() == 3) {
+ StartIdx0 = getSignedIntOrFpConstant(BaseIVTy, 0);
+ } else
+ StartIdx0 = State.get(getOperand(3), true);
for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) {
Value *StartIdx = Builder.CreateBinOp(
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 8f0cf7d5beeed..d157e9d9ca14b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1526,7 +1526,9 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) {
// VPScalarIVSteps for part 0 can be replaced by their start value, if only
// the first lane is demanded.
if (auto *Steps = dyn_cast<VPScalarIVStepsRecipe>(Def)) {
- if (Steps->isPart0() && vputils::onlyFirstLaneUsed(Steps)) {
+ if ((Steps->getNumOperands() == 3 ||
+ match(Steps->getOperand(3), m_ZeroInt())) &&
+ vputils::onlyFirstLaneUsed(Steps)) {
Steps->replaceAllUsesWith(Steps->getOperand(0));
return;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index 8198945764936..965b02a07c40b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -53,6 +53,9 @@ class UnrollState {
/// Unroll replicate region \p VPR by cloning the region UF - 1 times.
void unrollReplicateRegionByUF(VPRegionBlock *VPR);
+ /// Add a start index operand to \p Steps for \p Part.
+ void addStartIndexForScalarSteps(VPScalarIVStepsRecipe *Steps, unsigned Part);
+
/// Unroll recipe \p R by cloning it UF - 1 times, unless it is uniform across
/// all parts.
void unrollRecipeByUF(VPRecipeBase &R);
@@ -123,6 +126,33 @@ class UnrollState {
};
} // namespace
+void UnrollState::addStartIndexForScalarSteps(VPScalarIVStepsRecipe *Steps,
+ unsigned Part) {
+ if (Part == 0) {
+ Steps->addOperand(getConstantInt(Part));
+ return;
+ }
+
+ VPBuilder Builder(Steps);
+ Type *BaseIVTy = TypeInfo.inferScalarType(Steps->getOperand(0));
+ Type *IntStepTy =
+ IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());
+ VPValue *StartIdx0 = Steps->getOperand(2);
+ StartIdx0 = Builder.createOverflowingOp(
+ Instruction::Mul,
+ {StartIdx0,
+ Plan.getConstantInt(TypeInfo.inferScalarType(StartIdx0), Part)});
+ StartIdx0 = Builder.createScalarSExtOrTrunc(
+ StartIdx0, IntStepTy, TypeInfo.inferScalarType(StartIdx0),
+ DebugLoc::getUnknown());
+
+ if (BaseIVTy->isFloatingPointTy())
+ StartIdx0 = Builder.createScalarCast(Instruction::SIToFP, StartIdx0,
+ BaseIVTy, DebugLoc::getUnknown());
+
+ Steps->addOperand(StartIdx0);
+}
+
void UnrollState::unrollReplicateRegionByUF(VPRegionBlock *VPR) {
VPBlockBase *InsertPt = VPR->getSingleSuccessor();
for (unsigned Part = 1; Part != UF; ++Part) {
@@ -136,9 +166,8 @@ void UnrollState::unrollReplicateRegionByUF(VPRegionBlock *VPR) {
VPBlockUtils::blocksOnly<VPBasicBlock>(Part0))) {
for (const auto &[PartIR, Part0R] : zip(*PartIVPBB, *Part0VPBB)) {
remapOperands(&PartIR, Part);
- if (auto *ScalarIVSteps = dyn_cast<VPScalarIVStepsRecipe>(&PartIR)) {
- ScalarIVSteps->addOperand(getConstantInt(Part));
- }
+ if (auto *Steps = dyn_cast<VPScalarIVStepsRecipe>(&PartIR))
+ addStartIndexForScalarSteps(Steps, Part);
addRecipeForPart(&Part0R, &PartIR, Part);
}
@@ -327,10 +356,12 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
}
remapOperands(Copy, Part);
+ if (auto *ScalarIVSteps = dyn_cast<VPScalarIVStepsRecipe>(Copy))
+ addStartIndexForScalarSteps(ScalarIVSteps, Part);
+
// Add operand indicating the part to generate code for, to recipes still
// requiring it.
- if (isa<VPScalarIVStepsRecipe, VPWidenCanonicalIVRecipe,
- VPVectorEndPointerRecipe>(Copy) ||
+ if (isa<VPWidenCanonicalIVRecipe, VPVectorEndPointerRecipe>(Copy) ||
match(Copy,
m_VPInstruction<VPInstruction::CanonicalIVIncrementForPart>()))
Copy->addOperand(getConstantInt(Part));
>From f764e0dcef5fcd6644eae374adcab8945e2e21c2 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 8 Jan 2026 22:20:48 +0000
Subject: [PATCH 2/2] !fixup address comments, thanks
---
llvm/lib/Transforms/Vectorize/VPlan.h | 2 ++
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 4 ++--
llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 22 +++++++++----------
3 files changed, 14 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index f208cf82d8e03..0f5c83e1cb992 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -3812,6 +3812,8 @@ class LLVM_ABI_FOR_TEST VPScalarIVStepsRecipe : public VPRecipeWithIRFlags {
VPValue *getStepValue() const { return getOperand(1); }
+ VPValue *getVFValue() const { return getOperand(2); }
+
/// Returns true if the recipe only uses the first lane of operand \p Op.
bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index ac7e61805c18d..9956379cd6cbb 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2434,9 +2434,9 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
EndLane = StartLane + 1;
}
Value *StartIdx0;
- if (getNumOperands() == 3) {
+ if (getNumOperands() == 3)
StartIdx0 = getSignedIntOrFpConstant(BaseIVTy, 0);
- } else
+ else
StartIdx0 = State.get(getOperand(3), true);
for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index 152c0d5158c6e..c957019e8a45d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -128,29 +128,27 @@ class UnrollState {
void UnrollState::addStartIndexForScalarSteps(VPScalarIVStepsRecipe *Steps,
unsigned Part) {
- if (Part == 0) {
- Steps->addOperand(getConstantInt(Part));
+ if (Part == 0)
return;
- }
VPBuilder Builder(Steps);
Type *BaseIVTy = TypeInfo.inferScalarType(Steps->getOperand(0));
Type *IntStepTy =
IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());
- VPValue *StartIdx0 = Steps->getOperand(2);
- StartIdx0 = Builder.createOverflowingOp(
+ VPValue *StartIndex = Steps->getVFValue();
+ StartIndex = Builder.createOverflowingOp(
Instruction::Mul,
- {StartIdx0,
- Plan.getConstantInt(TypeInfo.inferScalarType(StartIdx0), Part)});
- StartIdx0 = Builder.createScalarSExtOrTrunc(
- StartIdx0, IntStepTy, TypeInfo.inferScalarType(StartIdx0),
+ {StartIndex,
+ Plan.getConstantInt(TypeInfo.inferScalarType(StartIndex), Part)});
+ StartIndex = Builder.createScalarSExtOrTrunc(
+ StartIndex, IntStepTy, TypeInfo.inferScalarType(StartIndex),
DebugLoc::getUnknown());
if (BaseIVTy->isFloatingPointTy())
- StartIdx0 = Builder.createScalarCast(Instruction::SIToFP, StartIdx0,
- BaseIVTy, DebugLoc::getUnknown());
+ StartIndex = Builder.createScalarCast(Instruction::SIToFP, StartIndex,
+ BaseIVTy, DebugLoc::getUnknown());
- Steps->addOperand(StartIdx0);
+ Steps->addOperand(StartIndex);
}
void UnrollState::unrollReplicateRegionByUF(VPRegionBlock *VPR) {
More information about the llvm-commits
mailing list