[llvm] [VPlan] Make canonical IV part of the region (PR #156262)

via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 1 14:25:51 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-vectorizers

Author: Florian Hahn (fhahn)

<details>
<summary>Changes</summary>

The canonical IV is directly tied to a loop region. To directly ensure there's a single, unique canonical IV, directly define it by the region.

Depends on https://github.com/llvm/llvm-project/pull/161589 (include in the PR)

---

Patch is 179.72 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/156262.diff


45 Files Affected:

- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+17-17) 
- (modified) llvm/lib/Transforms/Vectorize/VPlan.cpp (+57-13) 
- (modified) llvm/lib/Transforms/Vectorize/VPlan.h (+62-96) 
- (modified) llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp (+21-22) 
- (modified) llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp (+15-13) 
- (modified) llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h (+12-1) 
- (modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+3-13) 
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+70-49) 
- (modified) llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp (+2-2) 
- (modified) llvm/lib/Transforms/Vectorize/VPlanUtils.cpp (+1-3) 
- (modified) llvm/lib/Transforms/Vectorize/VPlanValue.h (+21-4) 
- (modified) llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp (-6) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll (+6-6) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/fixed-wide-lane-mask.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/fully-unrolled-cost.ll (+2-8) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll (+4-4) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll (+2-1) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll (+8-8) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll (-3) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll (+3-3) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll (+1-7) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll (+1-4) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-overflow-checks.ll (-1) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll (+2-6) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll (+1-2) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll (+41-10) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-wide-lane-mask.ll (+6-6) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll (+3-3) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/uniform-args-call-variants.ll (+5-5) 
- (modified) llvm/test/Transforms/LoopVectorize/ARM/mve-icmpcost.ll (+4-11) 
- (modified) llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/CostModel/vpinstruction-cost.ll (-2) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/reduction-small-size.ll (+1-2) 
- (modified) llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll (+4-4) 
- (modified) llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll (+5-5) 
- (modified) llvm/test/Transforms/LoopVectorize/vplan-printing.ll (+41-27) 
- (modified) llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp (+1-4) 
- (modified) llvm/unittests/Transforms/Vectorize/VPlanPatternMatchTest.cpp (+14-15) 
- (modified) llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp (+5-22) 


``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index fa5be21dc2b8a..8e4d70aaf7538 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4030,7 +4030,6 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
       case VPDef::VPScalarIVStepsSC:
       case VPDef::VPReplicateSC:
       case VPDef::VPInstructionSC:
-      case VPDef::VPCanonicalIVPHISC:
       case VPDef::VPVectorPointerSC:
       case VPDef::VPVectorEndPointerSC:
       case VPDef::VPExpandSCEVSC:
@@ -8428,6 +8427,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
                             m_Specific(Plan->getCanonicalIV()), m_VPValue())) &&
            "Did not find the canonical IV increment");
     cast<VPRecipeWithIRFlags>(IVInc)->dropPoisonGeneratingFlags();
+    Plan->getCanonicalIVInfo().HasNUW = false;
   }
 
   // ---------------------------------------------------------------------------
@@ -8491,8 +8491,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
       // latter are added above for masking.
       // FIXME: Migrate code relying on the underlying instruction from VPlan0
       // to construct recipes below to not use the underlying instruction.
-      if (isa<VPCanonicalIVPHIRecipe, VPWidenCanonicalIVRecipe, VPBlendRecipe>(
-              &R) ||
+      if (isa<VPWidenCanonicalIVRecipe, VPBlendRecipe>(&R) ||
           (isa<VPInstruction>(&R) && !UnderlyingValue))
         continue;
 
@@ -8679,8 +8678,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
   VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
                                 Builder, BlockMaskCache, nullptr /*LVer*/);
   for (auto &R : Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
-    if (isa<VPCanonicalIVPHIRecipe>(&R))
-      continue;
     auto *HeaderR = cast<VPHeaderPHIRecipe>(&R);
     RecipeBuilder.setRecipe(HeaderR->getUnderlyingInstr(), HeaderR);
   }
@@ -9430,8 +9427,6 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
   SmallPtrSet<PHINode *, 2> EpiWidenedPhis;
   for (VPRecipeBase &R :
        EpiPlan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
-    if (isa<VPCanonicalIVPHIRecipe>(&R))
-      continue;
     EpiWidenedPhis.insert(
         cast<PHINode>(R.getVPSingleValue()->getUnderlyingValue()));
   }
@@ -9492,8 +9487,9 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
   VPPhi *ResumePhi = nullptr;
   if (ResumePhiIter == MainScalarPH->phis().end()) {
     VPBuilder ScalarPHBuilder(MainScalarPH, MainScalarPH->begin());
+    Type *Ty = VPTypeAnalysis(MainPlan).inferScalarType(VectorTC);
     ResumePhi = ScalarPHBuilder.createScalarPhi(
-        {VectorTC, MainPlan.getCanonicalIV()->getStartValue()}, {},
+        {VectorTC, MainPlan.getOrAddLiveIn(Constant::getNullValue(Ty))}, {},
         "vec.epilog.resume.val");
   } else {
     ResumePhi = cast<VPPhi>(&*ResumePhiIter);
@@ -9523,7 +9519,7 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop(
 
   // Ensure that the start values for all header phi recipes are updated before
   // vectorizing the epilogue loop.
-  VPCanonicalIVPHIRecipe *IV = Plan.getCanonicalIV();
+
   // When vectorizing the epilogue loop, the canonical induction start
   // value needs to be changed from zero to the value after the main
   // vector loop. Find the resume value created during execution of the main
@@ -9552,6 +9548,7 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop(
     EPI.VectorTripCount = EPResumeVal->getOperand(0);
   }
   VPValue *VPV = Plan.getOrAddLiveIn(EPResumeVal);
+  VPValue *IV = VectorLoop->getCanonicalIV();
   assert(all_of(IV->users(),
                 [](const VPUser *U) {
                   return isa<VPScalarIVStepsRecipe>(U) ||
@@ -9562,11 +9559,14 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop(
                 }) &&
          "the canonical IV should only be used by its increment or "
          "ScalarIVSteps when resetting the start value");
-  IV->setOperand(0, VPV);
+  VPBuilder Builder(Header, Header->getFirstNonPhi());
+  VPInstruction *Add = Builder.createNaryOp(Instruction::Add, {IV, VPV});
+  IV->replaceAllUsesWith(Add);
+  Add->setOperand(0, IV);
 
   DenseMap<Value *, Value *> ToFrozen;
   SmallVector<Instruction *> InstsToMove;
-  for (VPRecipeBase &R : drop_begin(Header->phis())) {
+  for (VPRecipeBase &R : Header->phis()) {
     Value *ResumeV = nullptr;
     // TODO: Move setting of resume values to prepareToExecute.
     if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(&R)) {
@@ -9596,12 +9596,12 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop(
         ToFrozen[StartV] = cast<PHINode>(ResumeV)->getIncomingValueForBlock(
             EPI.MainLoopIterationCountCheck);
 
-        // VPReductionPHIRecipe for FindFirstIV/FindLastIV reductions requires
-        // an adjustment to the resume value. The resume value is adjusted to
-        // the sentinel value when the final value from the main vector loop
-        // equals the start value. This ensures correctness when the start value
-        // might not be less than the minimum value of a monotonically
-        // increasing induction variable.
+        // VPReductionPHIRecipe for FindFirstIV/FindLastIV reductions
+        // requires an adjustment to the resume value. The resume value is
+        // adjusted to the sentinel value when the final value from the main
+        // vector loop equals the start value. This ensures correctness when
+        // the start value might not be less than the minimum value of a
+        // monotonically increasing induction variable.
         BasicBlock *ResumeBB = cast<Instruction>(ResumeV)->getParent();
         IRBuilder<> Builder(ResumeBB, ResumeBB->getFirstNonPHIIt());
         Value *Cmp = Builder.CreateICmpEQ(ResumeV, ToFrozen[StartV]);
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 02eb6375aac41..a36656ede623e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -768,10 +768,17 @@ static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry) {
 
 VPRegionBlock *VPRegionBlock::clone() {
   const auto &[NewEntry, NewExiting] = cloneFrom(getEntry());
-  auto *NewRegion = getPlan()->createVPRegionBlock(NewEntry, NewExiting,
-                                                   getName(), isReplicator());
+  auto *NewRegion =
+      getPlan()->createVPRegionBlock(NewEntry, NewExiting, getName());
   for (VPBlockBase *Block : vp_depth_first_shallow(NewEntry))
     Block->setParent(NewRegion);
+
+  if (CanIVInfo.CanIV) {
+    NewRegion->CanIVInfo.CanIV = new VPRegionValue();
+    NewRegion->CanIVInfo.HasNUW = CanIVInfo.HasNUW;
+    NewRegion->CanIVInfo.DL = CanIVInfo.DL;
+  }
+
   return NewRegion;
 }
 
@@ -856,6 +863,11 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
                           VPSlotTracker &SlotTracker) const {
   O << Indent << (isReplicator() ? "<xVFxUF> " : "<x1> ") << getName() << ": {";
   auto NewIndent = Indent + "  ";
+  if (auto *CanIV = getCanonicalIV()) {
+    O << '\n';
+    CanIV->print(O, SlotTracker);
+    O << '\n';
+  }
   for (auto *BlockBase : vp_depth_first_shallow(Entry)) {
     O << '\n';
     BlockBase->print(O, NewIndent, SlotTracker);
@@ -868,18 +880,37 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
 
 void VPRegionBlock::dissolveToCFGLoop() {
   auto *Header = cast<VPBasicBlock>(getEntry());
-  if (auto *CanIV = dyn_cast<VPCanonicalIVPHIRecipe>(&Header->front())) {
-    assert(this == getPlan()->getVectorLoopRegion() &&
-           "Canonical IV must be in the entry of the top-level loop region");
-    auto *ScalarR = VPBuilder(CanIV).createScalarPhi(
-        {CanIV->getStartValue(), CanIV->getBackedgeValue()},
-        CanIV->getDebugLoc(), "index");
+  auto *ExitingLatch = cast<VPBasicBlock>(getExiting());
+  VPValue *CanIV = getCanonicalIV();
+  if (CanIV && CanIV->getNumUsers() > 0) {
+    auto *ExitingTerm = ExitingLatch->getTerminator();
+    VPInstruction *CanIVInc = nullptr;
+    // Check if there's a canonical IV increment via an existing terminator.
+    if (match(ExitingTerm,
+              m_BranchOnCount(m_VPInstruction(CanIVInc), m_VPValue()))) {
+      assert(match(CanIVInc,
+                   m_Add(m_CombineOr(m_Specific(CanIV),
+                                     m_Add(m_Specific(CanIV), m_LiveIn())),
+                         m_VPValue())) &&
+             "invalid existing IV increment");
+    }
+    VPlan &Plan = *getPlan();
+    if (!CanIVInc) {
+      CanIVInc = VPBuilder(ExitingTerm)
+                     .createOverflowingOp(
+                         Instruction::Add, {CanIV, &Plan.getVFxUF()},
+                         {CanIVInfo.HasNUW, false}, CanIVInfo.DL, "index.next");
+    }
+    Type *CanIVTy = VPTypeAnalysis(Plan).inferScalarType(CanIV);
+    auto *ScalarR =
+        VPBuilder(Header, Header->begin())
+            .createScalarPhi(
+                {Plan.getOrAddLiveIn(ConstantInt::get(CanIVTy, 0)), CanIVInc},
+                CanIVInfo.DL, "index");
     CanIV->replaceAllUsesWith(ScalarR);
-    CanIV->eraseFromParent();
   }
 
   VPBlockBase *Preheader = getSinglePredecessor();
-  auto *ExitingLatch = cast<VPBasicBlock>(getExiting());
   VPBlockBase *Middle = getSingleSuccessor();
   VPBlockUtils::disconnectBlocks(Preheader, this);
   VPBlockUtils::disconnectBlocks(this, Middle);
@@ -916,7 +947,10 @@ VPlan::~VPlan() {
         for (unsigned I = 0, E = R.getNumOperands(); I != E; I++)
           R.setOperand(I, &DummyValue);
       }
+    } else if (auto *CanIV = cast<VPRegionBlock>(VPB)->getCanonicalIV()) {
+      CanIV->replaceAllUsesWith(&DummyValue);
     }
+
     delete VPB;
   }
   for (VPValue *VPV : getLiveIns())
@@ -1224,6 +1258,11 @@ VPlan *VPlan::duplicate() {
   // else NewTripCount will be created and inserted into Old2NewVPValues when
   // TripCount is cloned. In any case NewPlan->TripCount is updated below.
 
+  if (auto *LoopRegion = getVectorLoopRegion()) {
+    Old2NewVPValues[LoopRegion->getCanonicalIV()] =
+        NewPlan->getVectorLoopRegion()->getCanonicalIV();
+  }
+
   remapOperands(Entry, NewEntry, Old2NewVPValues);
 
   // Initialize remaining fields of cloned VPlan.
@@ -1404,6 +1443,8 @@ void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
 /// Returns true if there is a vector loop region and \p VPV is defined in a
 /// loop region.
 static bool isDefinedInsideLoopRegions(const VPValue *VPV) {
+  if (isa<VPRegionValue>(VPV))
+    return true;
   const VPRecipeBase *DefR = VPV->getDefiningRecipe();
   return DefR && (!DefR->getParent()->getPlan()->getVectorLoopRegion() ||
                   DefR->getParent()->getEnclosingLoopRegion());
@@ -1513,9 +1554,12 @@ void VPSlotTracker::assignNames(const VPlan &Plan) {
 
   ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<const VPBlockBase *>>
       RPOT(VPBlockDeepTraversalWrapper<const VPBlockBase *>(Plan.getEntry()));
-  for (const VPBasicBlock *VPBB :
-       VPBlockUtils::blocksOnly<const VPBasicBlock>(RPOT))
-    assignNames(VPBB);
+  for (const VPBlockBase *VPB : RPOT) {
+    if (auto *VPBB = dyn_cast<VPBasicBlock>(VPB)) {
+      assignNames(VPBB);
+    } else if (auto *CanIV = cast<VPRegionBlock>(VPB)->getCanonicalIV())
+      assignName(CanIV);
+  }
 }
 
 void VPSlotTracker::assignNames(const VPBasicBlock *VPBB) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index c167dd7f65fac..926bb88995348 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -551,7 +551,6 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
     case VPRecipeBase::VPWidenSelectSC:
     case VPRecipeBase::VPBlendSC:
     case VPRecipeBase::VPPredInstPHISC:
-    case VPRecipeBase::VPCanonicalIVPHISC:
     case VPRecipeBase::VPActiveLaneMaskPHISC:
     case VPRecipeBase::VPFirstOrderRecurrencePHISC:
     case VPRecipeBase::VPWidenPHISC:
@@ -1957,12 +1956,6 @@ class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
 /// the backedge is the second operand.
 ///
 /// Inductions are modeled using the following sub-classes:
-///  * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop,
-///    starting at a specified value (zero for the main vector loop, the resume
-///    value for the epilogue vector loop) and stepping by 1. The induction
-///    controls exiting of the vector loop by comparing against the vector trip
-///    count. Produces a single scalar PHI for the induction value per
-///    iteration.
 ///  * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
 ///    floating point inductions with arbitrary start and step values. Produces
 ///    a vector PHI per-part.
@@ -3435,63 +3428,6 @@ class VPExpandSCEVRecipe : public VPSingleDefRecipe {
   const SCEV *getSCEV() const { return Expr; }
 };
 
-/// Canonical scalar induction phi of the vector loop. Starting at the specified
-/// start value (either 0 or the resume value when vectorizing the epilogue
-/// loop). VPWidenCanonicalIVRecipe represents the vector version of the
-/// canonical induction variable.
-class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe {
-public:
-  VPCanonicalIVPHIRecipe(VPValue *StartV, DebugLoc DL)
-      : VPHeaderPHIRecipe(VPDef::VPCanonicalIVPHISC, nullptr, StartV, DL) {}
-
-  ~VPCanonicalIVPHIRecipe() override = default;
-
-  VPCanonicalIVPHIRecipe *clone() override {
-    auto *R = new VPCanonicalIVPHIRecipe(getOperand(0), getDebugLoc());
-    R->addOperand(getBackedgeValue());
-    return R;
-  }
-
-  VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC)
-
-  void execute(VPTransformState &State) override {
-    llvm_unreachable("cannot execute this recipe, should be replaced by a "
-                     "scalar phi recipe");
-  }
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-  /// Print the recipe.
-  void print(raw_ostream &O, const Twine &Indent,
-             VPSlotTracker &SlotTracker) const override;
-#endif
-
-  /// Returns the scalar type of the induction.
-  Type *getScalarType() const {
-    return getStartValue()->getLiveInIRValue()->getType();
-  }
-
-  /// Returns true if the recipe only uses the first lane of operand \p Op.
-  bool onlyFirstLaneUsed(const VPValue *Op) const override {
-    assert(is_contained(operands(), Op) &&
-           "Op must be an operand of the recipe");
-    return true;
-  }
-
-  /// Returns true if the recipe only uses the first part of operand \p Op.
-  bool onlyFirstPartUsed(const VPValue *Op) const override {
-    assert(is_contained(operands(), Op) &&
-           "Op must be an operand of the recipe");
-    return true;
-  }
-
-  /// Return the cost of this VPCanonicalIVPHIRecipe.
-  InstructionCost computeCost(ElementCount VF,
-                              VPCostContext &Ctx) const override {
-    // For now, match the behavior of the legacy cost model.
-    return 0;
-  }
-};
-
 /// A recipe for generating the active lane mask for the vector loop that is
 /// used to predicate the vector operations.
 /// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
@@ -3570,14 +3506,13 @@ class VPEVLBasedIVPHIRecipe : public VPHeaderPHIRecipe {
 class VPWidenCanonicalIVRecipe : public VPSingleDefRecipe,
                                  public VPUnrollPartAccessor<1> {
 public:
-  VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV)
+  VPWidenCanonicalIVRecipe(VPValue *CanonicalIV)
       : VPSingleDefRecipe(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}) {}
 
   ~VPWidenCanonicalIVRecipe() override = default;
 
   VPWidenCanonicalIVRecipe *clone() override {
-    return new VPWidenCanonicalIVRecipe(
-        cast<VPCanonicalIVPHIRecipe>(getOperand(0)));
+    return new VPWidenCanonicalIVRecipe(getOperand(0));
   }
 
   VP_CLASSOF_IMPL(VPDef::VPWidenCanonicalIVSC)
@@ -3616,8 +3551,7 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe {
 
 public:
   VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start,
-                    VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step,
-                    const Twine &Name = "")
+                    VPValue *CanonicalIV, VPValue *Step, const Twine &Name = "")
       : VPDerivedIVRecipe(
             IndDesc.getKind(),
             dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()),
@@ -3963,6 +3897,23 @@ class VPIRBasicBlock : public VPBasicBlock {
   BasicBlock *getIRBasicBlock() const { return IRBB; }
 };
 
+/// Track information about the canonical IV value of a region.
+struct VPCanonicalIVInfo {
+  VPRegionValue *CanIV = nullptr;
+  bool HasNUW = true;
+  DebugLoc DL = DebugLoc::getUnknown();
+
+  VPCanonicalIVInfo(VPRegionValue *CanIV, bool HasNUW, DebugLoc DL)
+      : CanIV(CanIV), HasNUW(HasNUW), DL(DL) {}
+
+  VPCanonicalIVInfo() {}
+
+  ~VPCanonicalIVInfo() {
+    if (CanIV)
+      delete CanIV;
+  }
+};
+
 /// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
 /// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
 /// A VPRegionBlock may indicate that its contents are to be replicated several
@@ -3981,23 +3932,35 @@ class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase {
   /// VPRegionBlock.
   VPBlockBase *Exiting;
 
-  /// An indicator whether this region is to generate multiple replicated
-  /// instances of output IR corresponding to its VPBlockBases.
-  bool IsReplicator;
+  /// Canonical IV of the loop region. If CanIV is nullptr, the region is a
+  /// replicating region.
+  VPCanonicalIVInfo CanIVInfo;
 
   /// Use VPlan::createVPRegionBlock to create VPRegionBlocks.
   VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
-                const std::string &Name = "", bool IsReplicator = false)
+                const std::string &Name = "")
+      : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
+        CanIVInfo() {
+    assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
+    assert(Exiting->getSuccessors().empty() && "Exit block has successors.");
+    Entry->setParent(this);
+    Exiting->setParent(this);
+  }
+
+  VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
+                const VPCanonicalIVInfo &CanIVInfo,
+                const std::string &Name = "")
       : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
-        IsReplicator(IsReplicator) {
+        CanIVInfo(CanIVInfo) {
     assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
     assert(Exiting->getSuccessors().empty() && "Exit block has successors.");
     Entry->setParent(this);
     Exiting->setParent(this);
   }
-  VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
+
+  VPRegionBlock(DebugLoc DL, const std::string &Name = "")
       : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr),
-        IsReplicator(IsReplicator) {}
+        CanIVInfo(new VPRegionValue(), true, DL) {}
 
 public:
   ~VPRegionBlock() override {}
@@ -4039,7 +4002,7 @@ class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase {
 
   /// An indicator whether this region is to generate multiple replicated
   /// instances of output IR corresponding to its VPBlockBases.
-  bool isReplicator() const { return IsReplicator; }
+  bool isReplicator() const { return !getCanonicalIV(); }
 
   /// The method which generates the output IR instructions that correspond to
   /// this VPRegionBlock, thereby "executing" the VPlan.
@@ -4067,6 +4030,13 @@ class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase {
   /// Remove the current region from its VPlan, connecting its predecessor to
   /// its entry, and its exiting block to its successor.
   void dissolveToCFGLoop();
+
+  /// Return the canonical induction variable of the region, null for
+  /// replicating regions.
+  VPValue *getCanonicalIV() { return CanIVInfo.CanIV; }
+  const VPValue *getCanonicalIV() const { return CanIVInfo.CanIV; }
+
+  VPCanonicalIVInfo &getCanonicalIVInfo() { return CanIVInfo; }
 };
 
 /// VPlan models a candidate for vectorization, encoding various decisions take
@@ -4378,14 +4348,10 @@ class VPlan {
 ...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/156262


More information about the llvm-commits mailing list