[llvm] [VPlan] Replace UnrollPart for VPScalarIVSteps with start index op (NFC) (PR #170906)

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 8 14:21:18 PST 2026


https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/170906

>From 395377be8445531f536aeb3274f1b12b1733666d Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Fri, 5 Dec 2025 16:56:15 +0000
Subject: [PATCH 1/2] [VPlan] Replace UnrollPart for VPScalarIVSteps with start
 index op (NFC)

Replace the unroll part operand for VPScalarIVStepsRecipe with the start
index. This simplifies https://github.com/llvm/llvm-project/pull/170053
and is also a first step to break down the recipe into its components.
---
 .../Vectorize/LoopVectorizationPlanner.h      | 11 +++++
 llvm/lib/Transforms/Vectorize/VPlan.h         | 10 ++---
 .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 20 ++-------
 .../Transforms/Vectorize/VPlanTransforms.cpp  |  4 +-
 llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 41 ++++++++++++++++---
 5 files changed, 57 insertions(+), 29 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 430d9469c7698..26d4e80749a91 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -322,6 +322,17 @@ class VPBuilder {
     return createScalarCast(CastOp, Op, ResultTy, DL);
   }
 
+  VPValue *createScalarSExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy,
+                                   DebugLoc DL) {
+    if (ResultTy == SrcTy)
+      return Op;
+    Instruction::CastOps CastOp =
+        ResultTy->getScalarSizeInBits() < SrcTy->getScalarSizeInBits()
+            ? Instruction::Trunc
+            : Instruction::SExt;
+    return createScalarCast(CastOp, Op, ResultTy, DL);
+  }
+
   VPWidenCastRecipe *createWidenCast(Instruction::CastOps Opcode, VPValue *Op,
                                      Type *ResultTy) {
     VPIRFlags Flags;
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index e2dfc4678c6d0..46261f38af2c5 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -3805,9 +3805,9 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe {
 };
 
 /// A recipe for handling phi nodes of integer and floating-point inductions,
-/// producing their scalar values.
-class LLVM_ABI_FOR_TEST VPScalarIVStepsRecipe : public VPRecipeWithIRFlags,
-                                                public VPUnrollPartAccessor<3> {
+/// producing their scalar values. Before unrolling the recipe has 3 operands:
+/// IV, step and VF. Unrolling adds an extra operand StartIndex.
+class LLVM_ABI_FOR_TEST VPScalarIVStepsRecipe : public VPRecipeWithIRFlags {
   Instruction::BinaryOps InductionOpcode;
 
 public:
@@ -3837,10 +3837,6 @@ class LLVM_ABI_FOR_TEST VPScalarIVStepsRecipe : public VPRecipeWithIRFlags,
         getDebugLoc());
   }
 
-  /// Return true if this VPScalarIVStepsRecipe corresponds to part 0. Note that
-  /// this is only accurate after the VPlan has been unrolled.
-  bool isPart0() const { return getUnrollPart(*this) == 0; }
-
   VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC)
 
   /// Generate the scalarized versions of the phi node as needed by their users.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 991e32a24fe2d..13133e225ac3e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2407,8 +2407,6 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
   // iteration.
   bool FirstLaneOnly = vputils::onlyFirstLaneUsed(this);
   // Compute the scalar steps and save the results in State.
-  Type *IntStepTy =
-      IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());
 
   unsigned StartLane = 0;
   unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();
@@ -2417,20 +2415,10 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
     EndLane = StartLane + 1;
   }
   Value *StartIdx0;
-  if (getUnrollPart(*this) == 0)
-    StartIdx0 = ConstantInt::get(IntStepTy, 0);
-  else {
-    StartIdx0 = State.get(getOperand(2), true);
-    if (getUnrollPart(*this) != 1) {
-      StartIdx0 =
-          Builder.CreateMul(StartIdx0, ConstantInt::get(StartIdx0->getType(),
-                                                        getUnrollPart(*this)));
-    }
-    StartIdx0 = Builder.CreateSExtOrTrunc(StartIdx0, IntStepTy);
-  }
-
-  if (BaseIVTy->isFloatingPointTy())
-    StartIdx0 = Builder.CreateSIToFP(StartIdx0, BaseIVTy);
+  if (getNumOperands() == 3) {
+    StartIdx0 = getSignedIntOrFpConstant(BaseIVTy, 0);
+  } else
+    StartIdx0 = State.get(getOperand(3), true);
 
   for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) {
     Value *StartIdx = Builder.CreateBinOp(
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 8f0cf7d5beeed..d157e9d9ca14b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1526,7 +1526,9 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) {
   // VPScalarIVSteps for part 0 can be replaced by their start value, if only
   // the first lane is demanded.
   if (auto *Steps = dyn_cast<VPScalarIVStepsRecipe>(Def)) {
-    if (Steps->isPart0() && vputils::onlyFirstLaneUsed(Steps)) {
+    if ((Steps->getNumOperands() == 3 ||
+         match(Steps->getOperand(3), m_ZeroInt())) &&
+        vputils::onlyFirstLaneUsed(Steps)) {
       Steps->replaceAllUsesWith(Steps->getOperand(0));
       return;
     }
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index 8198945764936..965b02a07c40b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -53,6 +53,9 @@ class UnrollState {
   /// Unroll replicate region \p VPR by cloning the region UF - 1 times.
   void unrollReplicateRegionByUF(VPRegionBlock *VPR);
 
+  /// Add a start index operand to \p Steps for \p Part.
+  void addStartIndexForScalarSteps(VPScalarIVStepsRecipe *Steps, unsigned Part);
+
   /// Unroll recipe \p R by cloning it UF - 1 times, unless it is uniform across
   /// all parts.
   void unrollRecipeByUF(VPRecipeBase &R);
@@ -123,6 +126,33 @@ class UnrollState {
 };
 } // namespace
 
+void UnrollState::addStartIndexForScalarSteps(VPScalarIVStepsRecipe *Steps,
+                                              unsigned Part) {
+  if (Part == 0) {
+    Steps->addOperand(getConstantInt(Part));
+    return;
+  }
+
+  VPBuilder Builder(Steps);
+  Type *BaseIVTy = TypeInfo.inferScalarType(Steps->getOperand(0));
+  Type *IntStepTy =
+      IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());
+  VPValue *StartIdx0 = Steps->getOperand(2);
+  StartIdx0 = Builder.createOverflowingOp(
+      Instruction::Mul,
+      {StartIdx0,
+       Plan.getConstantInt(TypeInfo.inferScalarType(StartIdx0), Part)});
+  StartIdx0 = Builder.createScalarSExtOrTrunc(
+      StartIdx0, IntStepTy, TypeInfo.inferScalarType(StartIdx0),
+      DebugLoc::getUnknown());
+
+  if (BaseIVTy->isFloatingPointTy())
+    StartIdx0 = Builder.createScalarCast(Instruction::SIToFP, StartIdx0,
+                                         BaseIVTy, DebugLoc::getUnknown());
+
+  Steps->addOperand(StartIdx0);
+}
+
 void UnrollState::unrollReplicateRegionByUF(VPRegionBlock *VPR) {
   VPBlockBase *InsertPt = VPR->getSingleSuccessor();
   for (unsigned Part = 1; Part != UF; ++Part) {
@@ -136,9 +166,8 @@ void UnrollState::unrollReplicateRegionByUF(VPRegionBlock *VPR) {
              VPBlockUtils::blocksOnly<VPBasicBlock>(Part0))) {
       for (const auto &[PartIR, Part0R] : zip(*PartIVPBB, *Part0VPBB)) {
         remapOperands(&PartIR, Part);
-        if (auto *ScalarIVSteps = dyn_cast<VPScalarIVStepsRecipe>(&PartIR)) {
-          ScalarIVSteps->addOperand(getConstantInt(Part));
-        }
+        if (auto *Steps = dyn_cast<VPScalarIVStepsRecipe>(&PartIR))
+          addStartIndexForScalarSteps(Steps, Part);
 
         addRecipeForPart(&Part0R, &PartIR, Part);
       }
@@ -327,10 +356,12 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
     }
     remapOperands(Copy, Part);
 
+    if (auto *ScalarIVSteps = dyn_cast<VPScalarIVStepsRecipe>(Copy))
+      addStartIndexForScalarSteps(ScalarIVSteps, Part);
+
     // Add operand indicating the part to generate code for, to recipes still
     // requiring it.
-    if (isa<VPScalarIVStepsRecipe, VPWidenCanonicalIVRecipe,
-            VPVectorEndPointerRecipe>(Copy) ||
+    if (isa<VPWidenCanonicalIVRecipe, VPVectorEndPointerRecipe>(Copy) ||
         match(Copy,
               m_VPInstruction<VPInstruction::CanonicalIVIncrementForPart>()))
       Copy->addOperand(getConstantInt(Part));

>From f764e0dcef5fcd6644eae374adcab8945e2e21c2 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 8 Jan 2026 22:20:48 +0000
Subject: [PATCH 2/2] !fixup address comments, thanks

---
 llvm/lib/Transforms/Vectorize/VPlan.h         |  2 ++
 .../lib/Transforms/Vectorize/VPlanRecipes.cpp |  4 ++--
 llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 22 +++++++++----------
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index f208cf82d8e03..0f5c83e1cb992 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -3812,6 +3812,8 @@ class LLVM_ABI_FOR_TEST VPScalarIVStepsRecipe : public VPRecipeWithIRFlags {
 
   VPValue *getStepValue() const { return getOperand(1); }
 
+  VPValue *getVFValue() const { return getOperand(2); }
+
   /// Returns true if the recipe only uses the first lane of operand \p Op.
   bool usesFirstLaneOnly(const VPValue *Op) const override {
     assert(is_contained(operands(), Op) &&
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index ac7e61805c18d..9956379cd6cbb 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2434,9 +2434,9 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
     EndLane = StartLane + 1;
   }
   Value *StartIdx0;
-  if (getNumOperands() == 3) {
+  if (getNumOperands() == 3)
     StartIdx0 = getSignedIntOrFpConstant(BaseIVTy, 0);
-  } else
+  else
     StartIdx0 = State.get(getOperand(3), true);
 
   for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index 152c0d5158c6e..c957019e8a45d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -128,29 +128,27 @@ class UnrollState {
 
 void UnrollState::addStartIndexForScalarSteps(VPScalarIVStepsRecipe *Steps,
                                               unsigned Part) {
-  if (Part == 0) {
-    Steps->addOperand(getConstantInt(Part));
+  if (Part == 0)
     return;
-  }
 
   VPBuilder Builder(Steps);
   Type *BaseIVTy = TypeInfo.inferScalarType(Steps->getOperand(0));
   Type *IntStepTy =
       IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());
-  VPValue *StartIdx0 = Steps->getOperand(2);
-  StartIdx0 = Builder.createOverflowingOp(
+  VPValue *StartIndex = Steps->getVFValue();
+  StartIndex = Builder.createOverflowingOp(
       Instruction::Mul,
-      {StartIdx0,
-       Plan.getConstantInt(TypeInfo.inferScalarType(StartIdx0), Part)});
-  StartIdx0 = Builder.createScalarSExtOrTrunc(
-      StartIdx0, IntStepTy, TypeInfo.inferScalarType(StartIdx0),
+      {StartIndex,
+       Plan.getConstantInt(TypeInfo.inferScalarType(StartIndex), Part)});
+  StartIndex = Builder.createScalarSExtOrTrunc(
+      StartIndex, IntStepTy, TypeInfo.inferScalarType(StartIndex),
       DebugLoc::getUnknown());
 
   if (BaseIVTy->isFloatingPointTy())
-    StartIdx0 = Builder.createScalarCast(Instruction::SIToFP, StartIdx0,
-                                         BaseIVTy, DebugLoc::getUnknown());
+    StartIndex = Builder.createScalarCast(Instruction::SIToFP, StartIndex,
+                                          BaseIVTy, DebugLoc::getUnknown());
 
-  Steps->addOperand(StartIdx0);
+  Steps->addOperand(StartIndex);
 }
 
 void UnrollState::unrollReplicateRegionByUF(VPRegionBlock *VPR) {



More information about the llvm-commits mailing list