[llvm] [VPlan] Replace UnrollPart for VPScalarIVSteps with start index op (NFC) (PR #170906)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 5 11:28:11 PST 2025
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/170906
>From b19080589f6b63229030832eb6d33632f59735e2 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Fri, 5 Dec 2025 16:56:15 +0000
Subject: [PATCH] [VPlan] Replace UnrollPart for VPScalarIVSteps with start
index op (NFC)
Replace the unroll part operand for VPScalarIVStepsRecipe with the start
index. This simplifies https://github.com/llvm/llvm-project/pull/170053
and is also a first step to break down the recipe into its components.
---
.../Vectorize/LoopVectorizationPlanner.h | 11 +++++
llvm/lib/Transforms/Vectorize/VPlan.h | 10 ++---
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 20 ++-------
.../Transforms/Vectorize/VPlanTransforms.cpp | 4 +-
llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 42 ++++++++++++++++---
5 files changed, 58 insertions(+), 29 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 741392247c0d6..d89c05e22cc4c 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -322,6 +322,17 @@ class VPBuilder {
return createScalarCast(CastOp, Op, ResultTy, DL);
}
+ VPValue *createScalarSExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy,
+ DebugLoc DL) {
+ if (ResultTy == SrcTy)
+ return Op;
+ Instruction::CastOps CastOp =
+ ResultTy->getScalarSizeInBits() < SrcTy->getScalarSizeInBits()
+ ? Instruction::Trunc
+ : Instruction::SExt;
+ return createScalarCast(CastOp, Op, ResultTy, DL);
+ }
+
VPWidenCastRecipe *createWidenCast(Instruction::CastOps Opcode, VPValue *Op,
Type *ResultTy) {
VPIRFlags Flags;
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index fe60e97d44997..24a1d75b4d7fb 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -3783,9 +3783,9 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe {
};
/// A recipe for handling phi nodes of integer and floating-point inductions,
-/// producing their scalar values.
-class LLVM_ABI_FOR_TEST VPScalarIVStepsRecipe : public VPRecipeWithIRFlags,
- public VPUnrollPartAccessor<3> {
+/// producing their scalar values. Before unrolling the recipe has 3 operands:
+/// IV, step and VF. Unrolling adds an extra operand StartIndex.
+class LLVM_ABI_FOR_TEST VPScalarIVStepsRecipe : public VPRecipeWithIRFlags {
Instruction::BinaryOps InductionOpcode;
public:
@@ -3815,10 +3815,6 @@ class LLVM_ABI_FOR_TEST VPScalarIVStepsRecipe : public VPRecipeWithIRFlags,
getDebugLoc());
}
- /// Return true if this VPScalarIVStepsRecipe corresponds to part 0. Note that
- /// this is only accurate after the VPlan has been unrolled.
- bool isPart0() const { return getUnrollPart(*this) == 0; }
-
VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC)
/// Generate the scalarized versions of the phi node as needed by their users.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 1b1308c78c76e..47baa0f54c8ff 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2380,8 +2380,6 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
// iteration.
bool FirstLaneOnly = vputils::onlyFirstLaneUsed(this);
// Compute the scalar steps and save the results in State.
- Type *IntStepTy =
- IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());
unsigned StartLane = 0;
unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();
@@ -2390,20 +2388,10 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
EndLane = StartLane + 1;
}
Value *StartIdx0;
- if (getUnrollPart(*this) == 0)
- StartIdx0 = ConstantInt::get(IntStepTy, 0);
- else {
- StartIdx0 = State.get(getOperand(2), true);
- if (getUnrollPart(*this) != 1) {
- StartIdx0 =
- Builder.CreateMul(StartIdx0, ConstantInt::get(StartIdx0->getType(),
- getUnrollPart(*this)));
- }
- StartIdx0 = Builder.CreateSExtOrTrunc(StartIdx0, IntStepTy);
- }
-
- if (BaseIVTy->isFloatingPointTy())
- StartIdx0 = Builder.CreateSIToFP(StartIdx0, BaseIVTy);
+ if (getNumOperands() == 3) {
+ StartIdx0 = getSignedIntOrFpConstant(BaseIVTy, 0);
+ } else
+ StartIdx0 = State.get(getOperand(3), true);
for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) {
Value *StartIdx = Builder.CreateBinOp(
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index a59c8cf9ea1ef..8432cf210807d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1459,7 +1459,9 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) {
// VPScalarIVSteps for part 0 can be replaced by their start value, if only
// the first lane is demanded.
if (auto *Steps = dyn_cast<VPScalarIVStepsRecipe>(Def)) {
- if (Steps->isPart0() && vputils::onlyFirstLaneUsed(Steps)) {
+ if ((Steps->getNumOperands() == 3 ||
+ match(Steps->getOperand(3), m_ZeroInt())) &&
+ vputils::onlyFirstLaneUsed(Steps)) {
Steps->replaceAllUsesWith(Steps->getOperand(0));
return;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index f215476b1e163..a1b6440a67c4e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -53,6 +53,9 @@ class UnrollState {
/// Unroll replicate region \p VPR by cloning the region UF - 1 times.
void unrollReplicateRegionByUF(VPRegionBlock *VPR);
+ /// Add a start index operand to \p Steps for \p Part.
+ void addStartIndexForScalarSteps(VPScalarIVStepsRecipe *Steps, unsigned Part);
+
/// Unroll recipe \p R by cloning it UF - 1 times, unless it is uniform across
/// all parts.
void unrollRecipeByUF(VPRecipeBase &R);
@@ -123,6 +126,33 @@ class UnrollState {
};
} // namespace
+void UnrollState::addStartIndexForScalarSteps(VPScalarIVStepsRecipe *Steps,
+ unsigned Part) {
+ if (Part == 0) {
+ Steps->addOperand(getConstantInt(Part));
+ return;
+ }
+
+ VPBuilder Builder(Steps);
+ Type *BaseIVTy = TypeInfo.inferScalarType(Steps->getOperand(0));
+ Type *IntStepTy =
+ IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());
+ VPValue *StartIdx0 = Steps->getOperand(2);
+ StartIdx0 = Builder.createOverflowingOp(
+ Instruction::Mul,
+ {StartIdx0,
+ Plan.getConstantInt(TypeInfo.inferScalarType(StartIdx0), Part)});
+ StartIdx0 = Builder.createScalarSExtOrTrunc(
+ StartIdx0, IntStepTy, TypeInfo.inferScalarType(StartIdx0),
+ DebugLoc::getUnknown());
+
+ if (BaseIVTy->isFloatingPointTy())
+ StartIdx0 = Builder.createScalarCast(Instruction::SIToFP, StartIdx0,
+ BaseIVTy, DebugLoc::getUnknown());
+
+ Steps->addOperand(StartIdx0);
+}
+
void UnrollState::unrollReplicateRegionByUF(VPRegionBlock *VPR) {
VPBlockBase *InsertPt = VPR->getSingleSuccessor();
for (unsigned Part = 1; Part != UF; ++Part) {
@@ -136,9 +166,8 @@ void UnrollState::unrollReplicateRegionByUF(VPRegionBlock *VPR) {
VPBlockUtils::blocksOnly<VPBasicBlock>(Part0))) {
for (const auto &[PartIR, Part0R] : zip(*PartIVPBB, *Part0VPBB)) {
remapOperands(&PartIR, Part);
- if (auto *ScalarIVSteps = dyn_cast<VPScalarIVStepsRecipe>(&PartIR)) {
- ScalarIVSteps->addOperand(getConstantInt(Part));
- }
+ if (auto *Steps = dyn_cast<VPScalarIVStepsRecipe>(&PartIR))
+ addStartIndexForScalarSteps(Steps, Part);
addRecipeForPart(&Part0R, &PartIR, Part);
}
@@ -311,10 +340,13 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
}
remapOperands(Copy, Part);
+ if (auto *ScalarIVSteps = dyn_cast<VPScalarIVStepsRecipe>(Copy))
+ addStartIndexForScalarSteps(ScalarIVSteps, Part);
+
// Add operand indicating the part to generate code for, to recipes still
// requiring it.
- if (isa<VPScalarIVStepsRecipe, VPWidenCanonicalIVRecipe,
- VPVectorPointerRecipe, VPVectorEndPointerRecipe>(Copy) ||
+ if (isa<VPWidenCanonicalIVRecipe, VPVectorPointerRecipe,
+ VPVectorEndPointerRecipe>(Copy) ||
match(Copy,
m_VPInstruction<VPInstruction::CanonicalIVIncrementForPart>()))
Copy->addOperand(getConstantInt(Part));
More information about the llvm-commits
mailing list