[llvm] [VPlan] Introduce ExitPhi VPInstruction, use to create phi for FOR. (PR #94760)

via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 7 07:33:52 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-systemz

Author: Florian Hahn (fhahn)

<details>
<summary>Changes</summary>

This patch introduces a new ExitPhi VPInstruction which creates a phi in
a leaf block of a VPlan. The first use is to create the phi node for
fixed-order recurrence resume values in the scalar preheader.

The VPInstruction takes 2 operands: 1) the incoming value from the
middle-block and a default value to be used for all other incoming
blocks.

In follow-up changes, it will also be used to create phis for reduction and
induction resume values.

Depends on https://github.com/llvm/llvm-project/pull/92651

---

Patch is 177.53 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/94760.diff


38 Files Affected:

- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+118-103) 
- (modified) llvm/lib/Transforms/Vectorize/VPlan.cpp (+91-12) 
- (modified) llvm/lib/Transforms/Vectorize/VPlan.h (+26-4) 
- (modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+56-16) 
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+19-12) 
- (modified) llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp (-4) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence-fold-tail.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/fixed-order-recurrence.ll (+4-4) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll (+4-4) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll (+4-4) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll (+4-4) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll (+3-3) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll (+48) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll (+16) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll (+18) 
- (modified) llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll (+6-6) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/pr72969.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/branch-weights.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll (+27-6) 
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll (+28-28) 
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll (+21-21) 
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll (+54-6) 
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll (+78-78) 
- (modified) llvm/test/Transforms/LoopVectorize/induction.ll (+21-21) 
- (modified) llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll (+18-1) 
- (modified) llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll (+3-3) 
- (modified) llvm/test/Transforms/LoopVectorize/pr36983.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/pr59319-loop-access-info-invalidation.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll (+8-8) 
- (modified) llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll (+9-1) 
- (modified) llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll (+17) 
- (modified) llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll (+10-1) 
- (modified) llvm/test/Transforms/LoopVectorize/vplan-printing.ll (+124-3) 
- (modified) llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll (+9-1) 
- (modified) llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll (+15) 
- (modified) llvm/unittests/Transforms/Vectorize/VPlanTestBase.h (+8-8) 


``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index c7c19ef456c7c..ae62df3aed207 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -607,10 +607,6 @@ class InnerLoopVectorizer {
                     BasicBlock *MiddleBlock, BasicBlock *VectorHeader,
                     VPlan &Plan, VPTransformState &State);
 
-  /// Create the phi node for the resume value of first order recurrences in the
-  /// scalar preheader and update the users in the scalar loop.
-  void fixFixedOrderRecurrence(VPLiveOut *LO, VPTransformState &State);
-
   /// Iteratively sink the scalarized operands of a predicated instruction into
   /// the block that was created for it.
   void sinkScalarOperands(Instruction *PredInst);
@@ -2972,22 +2968,7 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
       SplitBlock(LoopMiddleBlock, LoopMiddleBlock->getTerminator(), DT, LI,
                  nullptr, Twine(Prefix) + "scalar.ph");
 
-  auto *ScalarLatchTerm = OrigLoop->getLoopLatch()->getTerminator();
-
-  // Set up the middle block terminator.  Two cases:
-  // 1) If we know that we must execute the scalar epilogue, emit an
-  //    unconditional branch.
-  // 2) Otherwise, we must have a single unique exit block (due to how we
-  //    implement the multiple exit case).  In this case, set up a conditional
-  //    branch from the middle block to the loop scalar preheader, and the
-  //    exit block.  completeLoopSkeleton will update the condition to use an
-  //    iteration check, if required to decide whether to execute the remainder.
-  BranchInst *BrInst =
-      Cost->requiresScalarEpilogue(VF.isVector())
-          ? BranchInst::Create(LoopScalarPreHeader)
-          : BranchInst::Create(LoopExitBlock, LoopScalarPreHeader,
-                               Builder.getTrue());
-  BrInst->setDebugLoc(ScalarLatchTerm->getDebugLoc());
+  auto *BrInst = new UnreachableInst(LoopMiddleBlock->getContext());
   ReplaceInstWithInst(LoopMiddleBlock->getTerminator(), BrInst);
 
   // Update dominator for loop exit. During skeleton creation, only the vector
@@ -3094,51 +3075,6 @@ void InnerLoopVectorizer::createInductionResumeValues(
   }
 }
 
-BasicBlock *InnerLoopVectorizer::completeLoopSkeleton() {
-  // The trip counts should be cached by now.
-  Value *Count = getTripCount();
-  Value *VectorTripCount = getOrCreateVectorTripCount(LoopVectorPreHeader);
-
-  auto *ScalarLatchTerm = OrigLoop->getLoopLatch()->getTerminator();
-
-  // Add a check in the middle block to see if we have completed
-  // all of the iterations in the first vector loop.  Three cases:
-  // 1) If we require a scalar epilogue, there is no conditional branch as
-  //    we unconditionally branch to the scalar preheader.  Do nothing.
-  // 2) If (N - N%VF) == N, then we *don't* need to run the remainder.
-  //    Thus if tail is to be folded, we know we don't need to run the
-  //    remainder and we can use the previous value for the condition (true).
-  // 3) Otherwise, construct a runtime check.
-  if (!Cost->requiresScalarEpilogue(VF.isVector()) &&
-      !Cost->foldTailByMasking()) {
-    // Here we use the same DebugLoc as the scalar loop latch terminator instead
-    // of the corresponding compare because they may have ended up with
-    // different line numbers and we want to avoid awkward line stepping while
-    // debugging. Eg. if the compare has got a line number inside the loop.
-    // TODO: At the moment, CreateICmpEQ will simplify conditions with constant
-    // operands. Perform simplification directly on VPlan once the branch is
-    // modeled there.
-    IRBuilder<> B(LoopMiddleBlock->getTerminator());
-    B.SetCurrentDebugLocation(ScalarLatchTerm->getDebugLoc());
-    Value *CmpN = B.CreateICmpEQ(Count, VectorTripCount, "cmp.n");
-    BranchInst &BI = *cast<BranchInst>(LoopMiddleBlock->getTerminator());
-    BI.setCondition(CmpN);
-    if (hasBranchWeightMD(*ScalarLatchTerm)) {
-      // Assume that `Count % VectorTripCount` is equally distributed.
-      unsigned TripCount = UF * VF.getKnownMinValue();
-      assert(TripCount > 0 && "trip count should not be zero");
-      const uint32_t Weights[] = {1, TripCount - 1};
-      setBranchWeights(BI, Weights);
-    }
-  }
-
-#ifdef EXPENSIVE_CHECKS
-  assert(DT->verify(DominatorTree::VerificationLevel::Fast));
-#endif
-
-  return LoopVectorPreHeader;
-}
-
 std::pair<BasicBlock *, Value *>
 InnerLoopVectorizer::createVectorizedLoopSkeleton(
     const SCEV2ValueTy &ExpandedSCEVs) {
@@ -3198,7 +3134,7 @@ InnerLoopVectorizer::createVectorizedLoopSkeleton(
   // Emit phis for the new starting index of the scalar loop.
   createInductionResumeValues(ExpandedSCEVs);
 
-  return {completeLoopSkeleton(), nullptr};
+  return {LoopVectorPreHeader, nullptr};
 }
 
 // Fix up external users of the induction variable. At this point, we are
@@ -3399,8 +3335,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
   for (const auto &[_, LO] : to_vector(Plan.getLiveOuts())) {
     if (!Legal->isFixedOrderRecurrence(LO->getPhi()))
       continue;
-    fixFixedOrderRecurrence(LO, State);
-    Plan.removeLiveOut(LO->getPhi());
   }
 
   // Forget the original basic block.
@@ -3470,31 +3404,16 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
                                VF.getKnownMinValue() * UF);
 }
 
-void InnerLoopVectorizer::fixFixedOrderRecurrence(VPLiveOut *LO,
-                                                  VPTransformState &State) {
-  // Extract the last vector element in the middle block. This will be the
-  // initial value for the recurrence when jumping to the scalar loop.
-  VPValue *VPExtract = LO->getOperand(0);
-  using namespace llvm::VPlanPatternMatch;
-  assert(match(VPExtract, m_VPInstruction<VPInstruction::ExtractFromEnd>(
-                              m_VPValue(), m_VPValue())) &&
-         "FOR LiveOut expects to use an extract from end.");
-  Value *ResumeScalarFOR = State.get(VPExtract, UF - 1, true);
-
-  // Fix the initial value of the original recurrence in the scalar loop.
-  PHINode *ScalarHeaderPhi = LO->getPhi();
-  auto *InitScalarFOR =
-      ScalarHeaderPhi->getIncomingValueForBlock(LoopScalarPreHeader);
-  Builder.SetInsertPoint(LoopScalarPreHeader, LoopScalarPreHeader->begin());
-  auto *ScalarPreheaderPhi =
-      Builder.CreatePHI(ScalarHeaderPhi->getType(), 2, "scalar.recur.init");
-  for (auto *BB : predecessors(LoopScalarPreHeader)) {
-    auto *Incoming = BB == LoopMiddleBlock ? ResumeScalarFOR : InitScalarFOR;
-    ScalarPreheaderPhi->addIncoming(Incoming, BB);
-  }
-  ScalarHeaderPhi->setIncomingValueForBlock(LoopScalarPreHeader,
-                                            ScalarPreheaderPhi);
-  ScalarHeaderPhi->setName("scalar.recur");
+// Helper to reorder blocks so they match the original order even after the
+// order of the predecessors changes. This is only used to avoid a number of
+// test changes due to reordering of incoming blocks in phi nodes and should be
+// removed soon, with the tests being updated.
+static void reorderIncomingBlocks(SmallVectorImpl<BasicBlock *> &Blocks,
+                                  BasicBlock *LoopMiddleBlock) {
+  if (Blocks.front() == LoopMiddleBlock)
+    std::swap(Blocks.front(), Blocks.back());
+  if (Blocks.size() == 3)
+    std::swap(Blocks[0], Blocks[1]);
 }
 
 void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) {
@@ -7388,7 +7307,9 @@ static void createAndCollectMergePhiForReduction(
   // If we are fixing reductions in the epilogue loop then we should already
   // have created a bc.merge.rdx Phi after the main vector body. Ensure that
   // we carry over the incoming values correctly.
-  for (auto *Incoming : predecessors(LoopScalarPreHeader)) {
+  SmallVector<BasicBlock *> Blocks(predecessors(LoopScalarPreHeader));
+  reorderIncomingBlocks(Blocks, LoopMiddleBlock);
+  for (auto *Incoming : Blocks) {
     if (Incoming == LoopMiddleBlock)
       BCBlockPhi->addIncoming(FinalValue, Incoming);
     else if (ResumePhi && is_contained(ResumePhi->blocks(), Incoming))
@@ -7459,6 +7380,21 @@ LoopVectorizationPlanner::executePlan(
   std::tie(State.CFG.PrevBB, CanonicalIVStartValue) =
       ILV.createVectorizedLoopSkeleton(ExpandedSCEVs ? *ExpandedSCEVs
                                                      : State.ExpandedSCEVs);
+#ifdef EXPENSIVE_CHECKS
+  assert(DT->verify(DominatorTree::VerificationLevel::Fast));
+#endif
+
+  VPBasicBlock *MiddleVPBB =
+      cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion()->getSingleSuccessor());
+
+  using namespace llvm::VPlanPatternMatch;
+  if (MiddleVPBB->begin() != MiddleVPBB->end() &&
+      match(&MiddleVPBB->back(), m_BranchOnCond(m_VPValue()))) {
+    cast<VPIRBasicBlock>(MiddleVPBB->getSuccessors()[1])
+        ->resetBlock(OrigLoop->getLoopPreheader());
+  } else
+    cast<VPIRBasicBlock>(MiddleVPBB->getSuccessors()[0])
+        ->resetBlock(OrigLoop->getLoopPreheader());
 
   // Only use noalias metadata when using memory checks guaranteeing no overlap
   // across all iterations.
@@ -7539,6 +7475,18 @@ LoopVectorizationPlanner::executePlan(
 
   ILV.printDebugTracesAtEnd();
 
+  // Adjust branch weight of the branch in the middle block.
+  auto *MiddleTerm =
+      cast<BranchInst>(State.CFG.VPBB2IRBB[ExitVPBB]->getTerminator());
+  if (MiddleTerm->isConditional() &&
+      hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())) {
+    // Assume that `Count % VectorTripCount` is equally distributed.
+    unsigned TripCount = State.UF * State.VF.getKnownMinValue();
+    assert(TripCount > 0 && "trip count should not be zero");
+    const uint32_t Weights[] = {1, TripCount - 1};
+    setBranchWeights(*MiddleTerm, Weights);
+  }
+
   return {State.ExpandedSCEVs, ReductionResumeValues};
 }
 
@@ -7595,7 +7543,7 @@ EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton(
   // inductions in the epilogue loop are created before executing the plan for
   // the epilogue loop.
 
-  return {completeLoopSkeleton(), nullptr};
+  return {LoopVectorPreHeader, nullptr};
 }
 
 void EpilogueVectorizerMainLoop::printDebugTracesAtStart() {
@@ -7719,8 +7667,11 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
       VecEpilogueIterationCountCheck,
       VecEpilogueIterationCountCheck->getSinglePredecessor());
 
-  DT->changeImmediateDominator(LoopScalarPreHeader,
-                               EPI.EpilogueIterationCountCheck);
+  if (auto *N = DT->getNode(LoopScalarPreHeader))
+    DT->changeImmediateDominator(LoopScalarPreHeader,
+                                 EPI.EpilogueIterationCountCheck);
+  else
+    DT->addNewBlock(LoopScalarPreHeader, EPI.EpilogueIterationCountCheck);
   if (!Cost->requiresScalarEpilogue(EPI.EpilogueVF.isVector()))
     // If there is an epilogue which must run, there's no edge from the
     // middle block to exit blocks  and thus no need to update the immediate
@@ -7784,7 +7735,7 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
                               {VecEpilogueIterationCountCheck,
                                EPI.VectorTripCount} /* AdditionalBypass */);
 
-  return {completeLoopSkeleton(), EPResumeVal};
+  return {LoopVectorPreHeader, EPResumeVal};
 }
 
 BasicBlock *
@@ -8515,7 +8466,9 @@ static void addUsersInExitBlock(VPBasicBlock *HeaderVPBB, Loop *OrigLoop,
     Value *IncomingValue =
         ExitPhi.getIncomingValueForBlock(ExitingBB);
     VPValue *V = Builder.getVPValueOrAddLiveIn(IncomingValue, Plan);
-    Plan.addLiveOut(&ExitPhi, V);
+    Plan.addLiveOut(
+        &ExitPhi, V,
+        cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor()));
   }
 }
 
@@ -8534,9 +8487,25 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
   // modified; a basic block for the vector pre-header, followed by a region for
   // the vector loop, followed by the middle basic block. The skeleton vector
   // loop region contains a header and latch basic blocks.
+
+  // Add a check in the middle block to see if we have completed
+  // all of the iterations in the first vector loop.  Three cases:
+  // 1) If we require a scalar epilogue, there is no conditional branch as
+  //    we unconditionally branch to the scalar preheader.  Do nothing.
+  // 2) If (N - N%VF) == N, then we *don't* need to run the remainder.
+  //    Thus if tail is to be folded, we know we don't need to run the
+  //    remainder and we can use the previous value for the condition (true).
+  // 3) Otherwise, construct a runtime check.
+  bool RequiresScalarEpilogueCheck =
+      LoopVectorizationPlanner::getDecisionAndClampRange(
+          [this](ElementCount VF) {
+            return !CM.requiresScalarEpilogue(VF.isVector());
+          },
+          Range);
   VPlanPtr Plan = VPlan::createInitialVPlan(
       createTripCountSCEV(Legal->getWidestInductionType(), PSE, OrigLoop),
-      *PSE.getSE(), OrigLoop->getLoopPreheader());
+      *PSE.getSE(), RequiresScalarEpilogueCheck, CM.foldTailByMasking(),
+      OrigLoop);
   VPBasicBlock *HeaderVPBB = new VPBasicBlock("vector.body");
   VPBasicBlock *LatchVPBB = new VPBasicBlock("vector.latch");
   VPBlockUtils::insertBlockAfter(LatchVPBB, HeaderVPBB);
@@ -8679,6 +8648,49 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
          "VPBasicBlock");
   RecipeBuilder.fixHeaderPhis();
 
+  auto *MiddleVPBB =
+      cast<VPBasicBlock>(Plan->getVectorLoopRegion()->getSingleSuccessor());
+
+  VPBasicBlock *ScalarPH = nullptr;
+  for (VPBlockBase *Succ : MiddleVPBB->getSuccessors()) {
+    auto *VPIRBB = dyn_cast<VPIRBasicBlock>(Succ);
+    if (VPIRBB && VPIRBB->getIRBasicBlock() == OrigLoop->getHeader()) {
+      ScalarPH = VPIRBB;
+      break;
+    }
+  }
+
+  if (ScalarPH) {
+    for (auto &H : HeaderVPBB->phis()) {
+      auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&H);
+      if (!FOR)
+        continue;
+      VPBuilder B(ScalarPH);
+      VPBuilder MiddleBuilder;
+      // Set insert point so new recipes are inserted before terminator and
+      // condition, if there is either the former or both.
+      if (MiddleVPBB->getNumSuccessors() != 2)
+        MiddleBuilder.setInsertPoint(MiddleVPBB);
+      else if (isa<VPInstruction>(MiddleVPBB->getTerminator()->getOperand(0)))
+        MiddleBuilder.setInsertPoint(
+            &*std::prev(MiddleVPBB->getTerminator()->getIterator()));
+      else
+        MiddleBuilder.setInsertPoint(MiddleVPBB->getTerminator());
+
+      // Extract the resume value and create a new VPLiveOut for it.
+      auto *Resume = MiddleBuilder.createNaryOp(
+          VPInstruction::ExtractFromEnd,
+          {FOR->getBackedgeValue(),
+           Plan->getOrAddLiveIn(
+               ConstantInt::get(Plan->getCanonicalIV()->getScalarType(), 1))},
+          {}, "vector.recur.extract");
+      auto *R =
+          B.createNaryOp(VPInstruction::ExitPhi, {Resume, FOR->getStartValue()},
+                         {}, "scalar.recur.init");
+      Plan->addLiveOut(cast<PHINode>(FOR->getUnderlyingInstr()), R, ScalarPH);
+    }
+  }
+
   // ---------------------------------------------------------------------------
   // Transform initial VPlan: Apply previously taken decisions, in order, to
   // bring the VPlan to its final state.
@@ -8784,7 +8796,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
   // Create new empty VPlan
   auto Plan = VPlan::createInitialVPlan(
       createTripCountSCEV(Legal->getWidestInductionType(), PSE, OrigLoop),
-      *PSE.getSE(), OrigLoop->getLoopPreheader());
+      *PSE.getSE(), true, false, OrigLoop);
 
   // Build hierarchical CFG
   VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan);
@@ -8993,6 +9005,9 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
     }
   }
   Builder.setInsertPoint(&*LatchVPBB->begin());
+  VPBasicBlock *MiddleVPBB =
+      cast<VPBasicBlock>(VectorLoopRegion->getSingleSuccessor());
+  VPBasicBlock::iterator IP = MiddleVPBB->begin();
   for (VPRecipeBase &R :
        Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
     VPReductionPHIRecipe *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
@@ -9101,8 +9116,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
     // also modeled in VPlan.
     auto *FinalReductionResult = new VPInstruction(
         VPInstruction::ComputeReductionResult, {PhiR, NewExitingVPV}, ExitDL);
-    cast<VPBasicBlock>(VectorLoopRegion->getSingleSuccessor())
-        ->appendRecipe(FinalReductionResult);
+    FinalReductionResult->insertBefore(*MiddleVPBB, IP);
+    IP = std::next(FinalReductionResult->getIterator());
     OrigExitingVPV->replaceUsesWithIf(
         FinalReductionResult,
         [](VPUser &User, unsigned) { return isa<VPLiveOut>(&User); });
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index f17be451e6846..4138bee310ece 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -443,11 +443,29 @@ VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
 }
 
 void VPIRBasicBlock::execute(VPTransformState *State) {
-  assert(getHierarchicalPredecessors().empty() &&
-         "VPIRBasicBlock cannot have predecessors at the moment");
   assert(getHierarchicalSuccessors().empty() &&
          "VPIRBasicBlock cannot have successors at the moment");
 
+  for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
+    VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
+    auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors();
+    BasicBlock *PredBB = State->CFG.VPBB2IRBB[PredVPBB];
+
+    assert(PredBB && "Predecessor basic-block not found building successor.");
+    auto *PredBBTerminator = PredBB->getTerminator();
+    LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n');
+
+    auto *TermBr = dyn_cast<BranchInst>(PredBBTerminator);
+    if (TermBr) {
+      // Set each forward successor here when it is created, excluding
+      // backedges. A backward successor is set when the branch is created.
+      unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
+      assert(!TermBr->getSuccessor(idx) &&
+             "Trying to reset an existing successor block.");
+      TermBr->setSuccessor(idx, IRBB);
+    }
+  }
+
   State->Builder.SetInsertPoint(getIRBasicBlock()->getTerminator());
   executeRecipes(State, getIRBasicBlock());
 }
@@ -479,6 +497,14 @@ void VPBasicBlock::execute(VPTransformState *State) {
     // The Exit block of a loop is always set to be successor 0 of the Exiting
     // block.
     cast<BranchInst>(ExitingBB->getTerminator())->setSuccessor(0, NewBB);
+    // Set the insert point for recipe execution in the block.
+    State->Builder.SetInsertPoint(NewBB->getTerminator());
+    if (getSuccessors().size() == 1) {
+      BranchInst *Br = State->Builder.CreateBr(NewBB);
+      Br->setSuccessor(0, nullptr);
+      NewBB->getTerminator()->eraseFromParent();
+      State->Builder.SetInsertPoint(NewBB->getTerminator());
+    }
     State->CFG.DTU.applyUpdates({{DominatorTree::Insert, ExitingBB, NewBB}});
   } else if (PrevVPBB && /* A */
              !((SingleHPred = getSingleHierarchicalPredecessor()) &&
@@ -639,6 +665,7 @@ void VPBasicBlock::print(raw_ostream &O, const Twine &Indent,
 
   printSuccessors(O, Indent);
 }
+
 #endif
 
 static std::pair<VPBlockBase *, VPBlockBase *> cloneSESE(VPBlockBase *Entry);
@@ -654,12 +681,23 @@ static std::pair<VPBlockBase *, VPBlockBase *> cloneSESE(VPBlockBase *Entry) {
       Entry);
   for (VPBlockBase *BB : RPOT) {
     VPBlockBase *NewBB = BB->clone();
-    for (VPBlockBase *Pred : BB->getPredecessors())
-      VPBlockUtils::connectBlocks(Old2NewVPBlocks[Pred], NewBB);
-
     Old2NewVPBlocks[BB] = NewBB;
   }
 
+  for (VPBlockBase *BB : RPOT) {
+    VPBlockBase *NewBB = Old2NewVPBlocks[BB];
+    SmallVector<VPBlockBase *> NewPreds;
+    for (VPBlockBase *Pred : BB->getPredecessors()) {
+      NewPreds.push_back(Old2NewVPBlocks[Pred]);
+    }
+    NewBB->setPredecessors(NewPreds);
+    SmallVector<VPBlockBase *> NewSuccs;
+    for (VPBlockBase *Succ : BB->successors()) {
+      NewSuccs.push_back(Old2NewVPBlocks[Succ]);
+    }
+    NewBB->setSuccessors(NewSuccs);
+  }
+
 #if !defined(NDEBUG)
   // Verif...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/94760


More information about the llvm-commits mailing list