[llvm] [VPlan] Add VPIRInstruction, use for exit block live-outs. (PR #100735)

via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 26 06:02:32 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: Florian Hahn (fhahn)

<details>
<summary>Changes</summary>

Add a new VPIRInstruction recipe to wrap existing IR instructions not to
be modified during execution, execept for PHIs. For PHIs, a single VPValue
operand is allowed, and it is used to add a new incoming value for the
single predecessor VPBB. Expect PHIs, VPIRInstructions cannot have any
operands.

Depends on https://github.com/llvm/llvm-project/pull/100658.

---

Patch is 41.20 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/100735.diff


16 Files Affected:

- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+147-33) 
- (modified) llvm/lib/Transforms/Vectorize/VPlan.cpp (+11-3) 
- (modified) llvm/lib/Transforms/Vectorize/VPlan.h (+40) 
- (modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+30-7) 
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (-94) 
- (modified) llvm/lib/Transforms/Vectorize/VPlanValue.h (+1) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll (+4) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll (+6-6) 
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll (+2-1) 
- (modified) llvm/test/Transforms/LoopVectorize/instruction-only-used-outside-of-loop.ll (+9-11) 
- (modified) llvm/test/Transforms/LoopVectorize/pr36983.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/pr45259.ll (+3-2) 
- (modified) llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll (+2) 
- (modified) llvm/test/Transforms/LoopVectorize/vplan-printing.ll (+9-7) 


``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index ea19af10a1474..fabddbbce139a 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8434,25 +8434,133 @@ static void addUsersInExitBlock(
   if (!ExitBB || !ExitBB->getSinglePredecessor() || !ExitingBB)
     return;
 
-  // Introduce VPUsers modeling the exit values.
-  for (PHINode &ExitPhi : ExitBB->phis()) {
-    Value *IncomingValue =
-        ExitPhi.getIncomingValueForBlock(ExitingBB);
-    VPValue *V = Builder.getVPValueOrAddLiveIn(IncomingValue, Plan);
-    // Exit values for inductions are computed and updated outside of VPlan and
-    // independent of induction recipes.
-    // TODO: Compute induction exit values in VPlan, use VPLiveOuts to update
-    // live-outs.
-    if ((isa<VPWidenIntOrFpInductionRecipe>(V) &&
-         !cast<VPWidenIntOrFpInductionRecipe>(V)->getTruncInst()) ||
-        isa<VPWidenPointerInductionRecipe>(V) ||
-        (isa<Instruction>(IncomingValue) &&
-         any_of(IncomingValue->users(), [&Inductions](User *U) {
-           auto *P = dyn_cast<PHINode>(U);
-           return P && Inductions.contains(P);
-         })))
+  auto *MiddleBlock = Plan.getVectorLoopRegion()->getSingleSuccessor();
+  for (VPIRBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPIRBasicBlock>(
+           vp_depth_first_shallow(MiddleBlock))) {
+    if (VPBB->getIRBasicBlock() != ExitBB)
       continue;
-    Plan.addLiveOut(&ExitPhi, V);
+
+    for (auto &R : *VPBB) {
+      auto *IR = dyn_cast<VPIRInstruction>(&R);
+      if (!IR)
+        continue;
+      auto *ExitPhi = dyn_cast<PHINode>(&IR->getInstruction());
+      if (!ExitPhi)
+        break;
+      Value *IncomingValue = ExitPhi->getIncomingValueForBlock(ExitingBB);
+      VPValue *V = Builder.getVPValueOrAddLiveIn(IncomingValue, Plan);
+      // Exit values for inductions are computed and updated outside of VPlan
+      // and independent of induction recipes.
+      // TODO: Compute induction exit values in VPlan, use VPLiveOuts to update
+      // live-outs.
+      if ((isa<VPWidenIntOrFpInductionRecipe>(V) &&
+           !cast<VPWidenIntOrFpInductionRecipe>(V)->getTruncInst()) ||
+          isa<VPWidenPointerInductionRecipe>(V) ||
+          (isa<Instruction>(IncomingValue) &&
+           any_of(IncomingValue->users(), [&Inductions](User *U) {
+             auto *P = dyn_cast<PHINode>(U);
+             return P && Inductions.contains(P);
+           })))
+        continue;
+
+      auto MiddleVPBB =
+          cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
+      VPBuilder B(MiddleVPBB);
+      if (auto *Terminator = MiddleVPBB->getTerminator()) {
+        auto *Condition = dyn_cast<VPInstruction>(Terminator->getOperand(0));
+        assert((!Condition || Condition->getParent() == MiddleVPBB) &&
+               "Condition expected in MiddleVPBB");
+        B.setInsertPoint(Condition ? Condition : Terminator);
+      }
+
+      VPValue *Ext;
+      if (auto *FOR = dyn_cast_or_null<VPFirstOrderRecurrencePHIRecipe>(
+              V->getDefiningRecipe())) {
+        // This is the second phase of vectorizing first-order recurrences. An
+        // overview of the transformation is described below. Suppose we have
+        // the following loop with some use after the loop of the last a[i-1],
+        //
+        //   for (int i = 0; i < n; ++i) {
+        //     t = a[i - 1];
+        //     b[i] = a[i] - t;
+        //   }
+        //   use t;
+        //
+        // There is a first-order recurrence on "a". For this loop, the
+        // shorthand scalar IR looks like:
+        //
+        //   scalar.ph:
+        //     s_init = a[-1]
+        //     br scalar.body
+        //
+        //   scalar.body:
+        //     i = phi [0, scalar.ph], [i+1, scalar.body]
+        //     s1 = phi [s_init, scalar.ph], [s2, scalar.body]
+        //     s2 = a[i]
+        //     b[i] = s2 - s1
+        //     br cond, scalar.body, exit.block
+        //
+        //   exit.block:
+        //     use = lcssa.phi [s1, scalar.body]
+        //
+        // In this example, s1 is a recurrence because it's value depends on the
+        // previous iteration. In the first phase of vectorization, we created a
+        // vector phi v1 for s1. We now complete the vectorization and produce
+        // the shorthand vector IR shown below (for VF = 4, UF = 1).
+        //
+        //   vector.ph:
+        //     v_init = vector(..., ..., ..., a[-1])
+        //     br vector.body
+        //
+        //   vector.body
+        //     i = phi [0, vector.ph], [i+4, vector.body]
+        //     v1 = phi [v_init, vector.ph], [v2, vector.body]
+        //     v2 = a[i, i+1, i+2, i+3];
+        //     v3 = vector(v1(3), v2(0, 1, 2))
+        //     b[i, i+1, i+2, i+3] = v2 - v3
+        //     br cond, vector.body, middle.block
+        //
+        //   middle.block:
+        //     s_penultimate = v2(2) = v3(3)
+        //     s_resume = v2(3)
+        //     br cond, scalar.ph, exit.block
+        //
+        //   scalar.ph:
+        //     s_init' = phi [s_resume, middle.block], [s_init, otherwise]
+        //     br scalar.body
+        //
+        //   scalar.body:
+        //     i = phi [0, scalar.ph], [i+1, scalar.body]
+        //     s1 = phi [s_init', scalar.ph], [s2, scalar.body]
+        //     s2 = a[i]
+        //     b[i] = s2 - s1
+        //     br cond, scalar.body, exit.block
+        //
+        //   exit.block:
+        //     lo = lcssa.phi [s1, scalar.body], [s.penultimate, middle.block]
+        //
+        // After execution completes the vector loop, we extract the next value
+        // of the recurrence (x) to use as the initial value in the scalar loop.
+        // This is modeled by ExtractFromEnd.
+        //
+        // Extract the penultimate value of the recurrence and update VPLiveOut
+        // users of the recurrence splice. Note that the extract of the final
+        // value used to resume in the scalar loop is created earlier during
+        // VPlan construction.
+        Ext = B.createNaryOp(
+            VPInstruction::ExtractFromEnd,
+            {FOR->getBackedgeValue(),
+             Plan.getOrAddLiveIn(ConstantInt::get(
+                 IntegerType::get(ExitBB->getContext(), 32), 2))},
+            {}, "vector.recur.extract.for.phi");
+      } else {
+        Ext = B.createNaryOp(
+            VPInstruction::ExtractFromEnd,
+            {V, Plan.getOrAddLiveIn(ConstantInt::get(
+                    IntegerType::get(ExitBB->getContext(), 32), 1))});
+      }
+      IR->addOperand(Ext);
+    }
   }
 }
 
@@ -8660,14 +8768,6 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
   // After here, VPBB should not be used.
   VPBB = nullptr;
 
-  if (CM.requiresScalarEpilogue(Range)) {
-    // No edge from the middle block to the unique exit block has been inserted
-    // and there is nothing to fix from vector loop; phis should have incoming
-    // from scalar loop only.
-  } else
-    addUsersInExitBlock(OrigLoop, RecipeBuilder, *Plan,
-                        Legal->getInductionVars());
-
   assert(isa<VPRegionBlock>(Plan->getVectorLoopRegion()) &&
          !Plan->getVectorLoopRegion()->getEntryBasicBlock()->empty() &&
          "entry block must be set to a VPRegionBlock having a non-empty entry "
@@ -8676,6 +8776,14 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
 
   addLiveOutsForFirstOrderRecurrences(*Plan);
 
+  if (CM.requiresScalarEpilogue(Range)) {
+    // No edge from the middle block to the unique exit block has been inserted
+    // and there is nothing to fix from vector loop; phis should have incoming
+    // from scalar loop only.
+  } else
+    addUsersInExitBlock(OrigLoop, RecipeBuilder, *Plan,
+                        Legal->getInductionVars());
+
   // ---------------------------------------------------------------------------
   // Transform initial VPlan: Apply previously taken decisions, in order, to
   // bring the VPlan to its final state.
@@ -8884,10 +8992,12 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
     for (unsigned I = 0; I != Worklist.size(); ++I) {
       VPSingleDefRecipe *Cur = Worklist[I];
       for (VPUser *U : Cur->users()) {
-        auto *UserRecipe = dyn_cast<VPSingleDefRecipe>(U);
-        if (!UserRecipe) {
-          assert(isa<VPLiveOut>(U) &&
-                 "U must either be a VPSingleDef or VPLiveOut");
+        auto *UserRecipe = cast<VPSingleDefRecipe>(U);
+        if (!UserRecipe->getParent()->getParent()) {
+          assert(cast<VPInstruction>(U) &&
+                 cast<VPInstruction>(U)->getOpcode() ==
+                     VPInstruction::ExtractFromEnd &&
+                 "U must be an ExtractFromEnd VPInstruction");
           continue;
         }
         Worklist.insert(UserRecipe);
@@ -9105,8 +9215,10 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
         VPInstruction::ComputeReductionResult, {PhiR, NewExitingVPV}, ExitDL);
     FinalReductionResult->insertBefore(*MiddleVPBB, IP);
     OrigExitingVPV->replaceUsesWithIf(
-        FinalReductionResult,
-        [](VPUser &User, unsigned) { return isa<VPLiveOut>(&User); });
+        FinalReductionResult, [](VPUser &User, unsigned) {
+          auto *R = dyn_cast<VPInstruction>(&User);
+          return R && R->getOpcode() == VPInstruction::ExtractFromEnd;
+        });
   }
 
   VPlanTransforms::clearReductionWrapFlags(*Plan);
@@ -10055,7 +10167,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
         // directly in VPlan.
         EpilogILV.setTripCount(MainILV.getTripCount());
         for (auto &R : make_early_inc_range(*BestEpiPlan.getPreheader())) {
-          auto *ExpandR = cast<VPExpandSCEVRecipe>(&R);
+          auto *ExpandR = dyn_cast<VPExpandSCEVRecipe>(&R);
+          if (!ExpandR)
+            continue;
           auto *ExpandedVal = BestEpiPlan.getOrAddLiveIn(
               ExpandedSCEVs.find(ExpandR->getSCEV())->second);
           ExpandR->replaceAllUsesWith(ExpandedVal);
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 58de6256900f0..b16527386521b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -855,10 +855,18 @@ VPlan::~VPlan() {
     delete BackedgeTakenCount;
 }
 
+static VPIRBasicBlock *createVPIRBasicBlockFor(BasicBlock *BB) {
+  auto *VPIRBB = new VPIRBasicBlock(BB);
+  for (Instruction &I :
+       make_range(BB->begin(), BB->getTerminator()->getIterator()))
+    VPIRBB->appendRecipe(new VPIRInstruction(I));
+  return VPIRBB;
+}
+
 VPlanPtr VPlan::createInitialVPlan(const SCEV *TripCount, ScalarEvolution &SE,
                                    bool RequiresScalarEpilogueCheck,
                                    bool TailFolded, Loop *TheLoop) {
-  VPIRBasicBlock *Entry = new VPIRBasicBlock(TheLoop->getLoopPreheader());
+  VPIRBasicBlock *Entry = createVPIRBasicBlockFor(TheLoop->getLoopPreheader());
   VPBasicBlock *VecPreheader = new VPBasicBlock("vector.ph");
   auto Plan = std::make_unique<VPlan>(Entry, VecPreheader);
   Plan->TripCount =
@@ -890,7 +898,7 @@ VPlanPtr VPlan::createInitialVPlan(const SCEV *TripCount, ScalarEvolution &SE,
   //    we unconditionally branch to the scalar preheader.  Do nothing.
   // 3) Otherwise, construct a runtime check.
   BasicBlock *IRExitBlock = TheLoop->getUniqueExitBlock();
-  auto *VPExitBlock = new VPIRBasicBlock(IRExitBlock);
+  auto *VPExitBlock = createVPIRBasicBlockFor(IRExitBlock);
   // The connection order corresponds to the operands of the conditional branch.
   VPBlockUtils::insertBlockAfter(VPExitBlock, MiddleVPBB);
   VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
@@ -957,7 +965,7 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
 /// predecessor, which is rewired to the new VPIRBasicBlock. All successors of
 /// VPBB, if any, are rewired to the new VPIRBasicBlock.
 static void replaceVPBBWithIRVPBB(VPBasicBlock *VPBB, BasicBlock *IRBB) {
-  VPIRBasicBlock *IRMiddleVPBB = new VPIRBasicBlock(IRBB);
+  VPIRBasicBlock *IRMiddleVPBB = createVPIRBasicBlockFor(IRBB);
   for (auto &R : make_early_inc_range(*VPBB))
     R.moveBefore(*IRMiddleVPBB, IRMiddleVPBB->end());
   VPBlockBase *PredVPBB = VPBB->getSinglePredecessor();
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index c9da5e5d38a6b..2100734eead1b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -932,6 +932,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
     case VPRecipeBase::VPScalarCastSC:
       return true;
     case VPRecipeBase::VPInterleaveSC:
+    case VPRecipeBase::VPIRInstructionSC:
     case VPRecipeBase::VPBranchOnMaskSC:
     case VPRecipeBase::VPWidenLoadEVLSC:
     case VPRecipeBase::VPWidenLoadSC:
@@ -1399,6 +1400,45 @@ class VPInstruction : public VPRecipeWithIRFlags {
   bool isSingleScalar() const;
 };
 
+/// A recipe to wrap on original IR instruction not to be modified during
+/// execution, execept for PHIs. For PHIs, a single VPValue operand is allowed,
+/// and it is used to add a new incoming value for the single predecessor VPBB.
+/// Expect PHIs, VPIRInstructions cannot have any operands.
+class VPIRInstruction : public VPRecipeBase {
+  Instruction &I;
+
+public:
+  VPIRInstruction(Instruction &I)
+      : VPRecipeBase(VPDef::VPIRInstructionSC, ArrayRef<VPValue *>()), I(I) {}
+
+  ~VPIRInstruction() override = default;
+
+  VP_CLASSOF_IMPL(VPDef::VPIRInstructionSC)
+
+  VPIRInstruction *clone() override {
+    auto *R = new VPIRInstruction(I);
+    for (auto *Op : operands())
+      R->addOperand(Op);
+    return R;
+  }
+
+  void execute(VPTransformState &State) override;
+
+  Instruction &getInstruction() { return I; }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+  /// Print the recipe.
+  void print(raw_ostream &O, const Twine &Indent,
+             VPSlotTracker &SlotTracker) const override;
+#endif
+
+  bool usesScalars(const VPValue *Op) const override {
+    assert(is_contained(operands(), Op) &&
+           "Op must be an operand of the recipe");
+    return true;
+  }
+};
+
 /// VPWidenRecipe is a recipe for producing a widened instruction using the
 /// opcode and operands of the recipe. This recipe covers most of the
 /// traditional vectorization cases where each recipe transforms into a
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 2d6d67a55c17d..780eeff5a2b7b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -194,9 +194,6 @@ bool VPRecipeBase::mayHaveSideEffects() const {
 
 void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) {
   VPValue *ExitValue = getOperand(0);
-  auto Lane = vputils::isUniformAfterVectorization(ExitValue)
-                  ? VPLane::getFirstLane()
-                  : VPLane::getLastLaneForVF(State.VF);
   VPBasicBlock *MiddleVPBB =
       cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
   VPRecipeBase *ExitingRecipe = ExitValue->getDefiningRecipe();
@@ -207,10 +204,7 @@ void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) {
                        ? MiddleVPBB
                        : ExitingVPBB;
   BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB];
-  // Set insertion point in PredBB in case an extract needs to be generated.
-  // TODO: Model extracts explicitly.
-  State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt());
-  Value *V = State.get(ExitValue, VPIteration(State.UF - 1, Lane));
+  Value *V = State.get(ExitValue, VPIteration(0, 0));
   if (Phi->getBasicBlockIndex(PredBB) != -1)
     Phi->setIncomingValueForBlock(PredBB, V);
   else
@@ -862,6 +856,35 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
 }
 #endif
 
+void VPIRInstruction::execute(VPTransformState &State) {
+  assert(isa<VPIRBasicBlock>(getParent()) &&
+         "VPIRInstructions can only be placed in VPIRBasicBlocks");
+
+  if (getNumOperands() == 1 && isa<PHINode>(&I)) {
+    VPValue *ExitValue = getOperand(0);
+    auto Lane = vputils::isUniformAfterVectorization(ExitValue)
+                    ? VPLane::getFirstLane()
+                    : VPLane::getLastLaneForVF(State.VF);
+    auto *PredVPBB = cast<VPBasicBlock>(getParent()->getSinglePredecessor());
+    BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB];
+    // Set insertion point in PredBB in case an extract needs to be generated.
+    // TODO: Model extracts explicitly.
+    State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt());
+    Value *V = State.get(ExitValue, VPIteration(State.UF - 1, Lane));
+    auto *Phi = cast<PHINode>(&I);
+    Phi->addIncoming(V, PredBB);
+  }
+
+  State.Builder.SetInsertPoint(I.getParent(), std::next(I.getIterator()));
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void VPIRInstruction::print(raw_ostream &O, const Twine &Indent,
+                            VPSlotTracker &SlotTracker) const {
+  O << Indent << "IR " << I;
+}
+#endif
+
 void VPWidenCallRecipe::execute(VPTransformState &State) {
   assert(State.VF.isVector() && "not widening");
   Function *CalledScalarFn = getCalledScalarFunction();
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 045f6c356669f..a2496f067024c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -826,20 +826,6 @@ bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
     if (auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R))
       RecurrencePhis.push_back(FOR);
 
-  VPBasicBlock *MiddleVPBB =
-      cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
-  VPBuilder MiddleBuilder;
-  // Set insert point so new recipes are inserted before terminator and
-  // condition, if there is either the former or both.
-  if (auto *Term =
-          dyn_cast_or_null<VPInstruction>(MiddleVPBB->getTerminator())) {
-    if (auto *Cmp = dyn_cast<VPInstruction>(Term->getOperand(0)))
-      MiddleBuilder.setInsertPoint(Cmp);
-    else
-      MiddleBuilder.setInsertPoint(Term);
-  } else
-    MiddleBuilder.setInsertPoint(MiddleVPBB);
-
   for (VPFirstOrderRecurrencePHIRecipe *FOR : RecurrencePhis) {
     SmallPtrSet<VPFirstOrderRecurrencePHIRecipe *, 4> SeenPhis;
     VPRecipeBase *Previous = FOR->getBackedgeValue()->getDefiningRecipe();
@@ -872,86 +858,6 @@ bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
     // Set the first operand of RecurSplice to FOR again, after replacing
     // all users.
     RecurSplice->setOperand(0, FOR);
-
-    // This is the second phase of vectorizing first-order recurrences. An
-    // overview of the transformation is described below. Suppose we have the
-    // following loop with some use after the loop of the last a[i-1],
-    //
-    //   for (int i = 0; i < n; ++i) {
-    //     t = a[i - 1];
-    //     b[i] = a[i] - t;
-    //   }
-    //   use t;
-    //
-    // There is a first-order recurrence on "a". For this loop, the shorthand
-    // scalar IR looks like:
-    //
-    //   scalar.ph:
-    //     s_init = a[-1]
-    //     br scalar.body
-    //
-    //   scalar.body:
-    //     i = phi [0, scalar.ph], [i+1, scalar.body]
-    //     s1 = phi [s_init, scalar.ph], [s2, scalar.body]
-    //     s2 = a[i]
-    //     b[i] = s2 - s1
-    //     br cond, scalar.body, exit.block
-    //
-    //   exit.block:
-    //     use = lcssa.phi [s1, scalar.body]
-    //
-    // In this example, s1 is a recurrence because it's value depends on the
-    // previous iteration. In the first phase of vectorization, we created a
-    // vector phi v1 for s1. We now complete the vectorization and produce the
-    // shorthand vector IR shown below (for VF = 4, UF = 1).
-    //
-    //   vector.ph:
-    //     v_init = vector(..., ..., ..., a[-1])
-    //     br vector.body
-    //
-    //   vector.body
-    //     i = phi [0, vector.ph], [i+4, vector.body]
-    //     v1 = phi [v_init, vector.ph], [v2, vector.body]
-    //     v2 = a[i, i+1, i+2, i+3];
-    //     v3 = vector(v1(3), v2(0, 1, 2))
-    //     b[i, i+1, i+2, i+3] = v2 - v3
-    ...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/100735


More information about the llvm-commits mailing list