[llvm] [VPlan] Introduce ExitPhi VPInstruction, use to create phi for FOR. (PR #94760)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 7 07:33:52 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Florian Hahn (fhahn)
<details>
<summary>Changes</summary>
This patch introduces a new ExitPhi VPInstruction which creates a phi in
a leaf block of a VPlan. The first use is to create the phi node for
fixed-order recurrence resume values in the scalar preheader.
The VPInstruction takes 2 operands: 1) the incoming value from the
middle-block and a default value to be used for all other incoming
blocks.
In follow-up changes, it will also be used to create phis for reduction and
induction resume values.
Depends on https://github.com/llvm/llvm-project/pull/92651
---
Patch is 177.53 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/94760.diff
38 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+118-103)
- (modified) llvm/lib/Transforms/Vectorize/VPlan.cpp (+91-12)
- (modified) llvm/lib/Transforms/Vectorize/VPlan.h (+26-4)
- (modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+56-16)
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+19-12)
- (modified) llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp (-4)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence-fold-tail.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/fixed-order-recurrence.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll (+48)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll (+16)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll (+18)
- (modified) llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll (+6-6)
- (modified) llvm/test/Transforms/LoopVectorize/X86/pr72969.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/branch-weights.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll (+27-6)
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll (+28-28)
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll (+21-21)
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll (+54-6)
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll (+78-78)
- (modified) llvm/test/Transforms/LoopVectorize/induction.ll (+21-21)
- (modified) llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll (+18-1)
- (modified) llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/pr36983.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/pr59319-loop-access-info-invalidation.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll (+8-8)
- (modified) llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll (+9-1)
- (modified) llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll (+17)
- (modified) llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll (+10-1)
- (modified) llvm/test/Transforms/LoopVectorize/vplan-printing.ll (+124-3)
- (modified) llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll (+9-1)
- (modified) llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll (+15)
- (modified) llvm/unittests/Transforms/Vectorize/VPlanTestBase.h (+8-8)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index c7c19ef456c7c..ae62df3aed207 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -607,10 +607,6 @@ class InnerLoopVectorizer {
BasicBlock *MiddleBlock, BasicBlock *VectorHeader,
VPlan &Plan, VPTransformState &State);
- /// Create the phi node for the resume value of first order recurrences in the
- /// scalar preheader and update the users in the scalar loop.
- void fixFixedOrderRecurrence(VPLiveOut *LO, VPTransformState &State);
-
/// Iteratively sink the scalarized operands of a predicated instruction into
/// the block that was created for it.
void sinkScalarOperands(Instruction *PredInst);
@@ -2972,22 +2968,7 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
SplitBlock(LoopMiddleBlock, LoopMiddleBlock->getTerminator(), DT, LI,
nullptr, Twine(Prefix) + "scalar.ph");
- auto *ScalarLatchTerm = OrigLoop->getLoopLatch()->getTerminator();
-
- // Set up the middle block terminator. Two cases:
- // 1) If we know that we must execute the scalar epilogue, emit an
- // unconditional branch.
- // 2) Otherwise, we must have a single unique exit block (due to how we
- // implement the multiple exit case). In this case, set up a conditional
- // branch from the middle block to the loop scalar preheader, and the
- // exit block. completeLoopSkeleton will update the condition to use an
- // iteration check, if required to decide whether to execute the remainder.
- BranchInst *BrInst =
- Cost->requiresScalarEpilogue(VF.isVector())
- ? BranchInst::Create(LoopScalarPreHeader)
- : BranchInst::Create(LoopExitBlock, LoopScalarPreHeader,
- Builder.getTrue());
- BrInst->setDebugLoc(ScalarLatchTerm->getDebugLoc());
+ auto *BrInst = new UnreachableInst(LoopMiddleBlock->getContext());
ReplaceInstWithInst(LoopMiddleBlock->getTerminator(), BrInst);
// Update dominator for loop exit. During skeleton creation, only the vector
@@ -3094,51 +3075,6 @@ void InnerLoopVectorizer::createInductionResumeValues(
}
}
-BasicBlock *InnerLoopVectorizer::completeLoopSkeleton() {
- // The trip counts should be cached by now.
- Value *Count = getTripCount();
- Value *VectorTripCount = getOrCreateVectorTripCount(LoopVectorPreHeader);
-
- auto *ScalarLatchTerm = OrigLoop->getLoopLatch()->getTerminator();
-
- // Add a check in the middle block to see if we have completed
- // all of the iterations in the first vector loop. Three cases:
- // 1) If we require a scalar epilogue, there is no conditional branch as
- // we unconditionally branch to the scalar preheader. Do nothing.
- // 2) If (N - N%VF) == N, then we *don't* need to run the remainder.
- // Thus if tail is to be folded, we know we don't need to run the
- // remainder and we can use the previous value for the condition (true).
- // 3) Otherwise, construct a runtime check.
- if (!Cost->requiresScalarEpilogue(VF.isVector()) &&
- !Cost->foldTailByMasking()) {
- // Here we use the same DebugLoc as the scalar loop latch terminator instead
- // of the corresponding compare because they may have ended up with
- // different line numbers and we want to avoid awkward line stepping while
- // debugging. Eg. if the compare has got a line number inside the loop.
- // TODO: At the moment, CreateICmpEQ will simplify conditions with constant
- // operands. Perform simplification directly on VPlan once the branch is
- // modeled there.
- IRBuilder<> B(LoopMiddleBlock->getTerminator());
- B.SetCurrentDebugLocation(ScalarLatchTerm->getDebugLoc());
- Value *CmpN = B.CreateICmpEQ(Count, VectorTripCount, "cmp.n");
- BranchInst &BI = *cast<BranchInst>(LoopMiddleBlock->getTerminator());
- BI.setCondition(CmpN);
- if (hasBranchWeightMD(*ScalarLatchTerm)) {
- // Assume that `Count % VectorTripCount` is equally distributed.
- unsigned TripCount = UF * VF.getKnownMinValue();
- assert(TripCount > 0 && "trip count should not be zero");
- const uint32_t Weights[] = {1, TripCount - 1};
- setBranchWeights(BI, Weights);
- }
- }
-
-#ifdef EXPENSIVE_CHECKS
- assert(DT->verify(DominatorTree::VerificationLevel::Fast));
-#endif
-
- return LoopVectorPreHeader;
-}
-
std::pair<BasicBlock *, Value *>
InnerLoopVectorizer::createVectorizedLoopSkeleton(
const SCEV2ValueTy &ExpandedSCEVs) {
@@ -3198,7 +3134,7 @@ InnerLoopVectorizer::createVectorizedLoopSkeleton(
// Emit phis for the new starting index of the scalar loop.
createInductionResumeValues(ExpandedSCEVs);
- return {completeLoopSkeleton(), nullptr};
+ return {LoopVectorPreHeader, nullptr};
}
// Fix up external users of the induction variable. At this point, we are
@@ -3399,8 +3335,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
for (const auto &[_, LO] : to_vector(Plan.getLiveOuts())) {
if (!Legal->isFixedOrderRecurrence(LO->getPhi()))
continue;
- fixFixedOrderRecurrence(LO, State);
- Plan.removeLiveOut(LO->getPhi());
}
// Forget the original basic block.
@@ -3470,31 +3404,16 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
VF.getKnownMinValue() * UF);
}
-void InnerLoopVectorizer::fixFixedOrderRecurrence(VPLiveOut *LO,
- VPTransformState &State) {
- // Extract the last vector element in the middle block. This will be the
- // initial value for the recurrence when jumping to the scalar loop.
- VPValue *VPExtract = LO->getOperand(0);
- using namespace llvm::VPlanPatternMatch;
- assert(match(VPExtract, m_VPInstruction<VPInstruction::ExtractFromEnd>(
- m_VPValue(), m_VPValue())) &&
- "FOR LiveOut expects to use an extract from end.");
- Value *ResumeScalarFOR = State.get(VPExtract, UF - 1, true);
-
- // Fix the initial value of the original recurrence in the scalar loop.
- PHINode *ScalarHeaderPhi = LO->getPhi();
- auto *InitScalarFOR =
- ScalarHeaderPhi->getIncomingValueForBlock(LoopScalarPreHeader);
- Builder.SetInsertPoint(LoopScalarPreHeader, LoopScalarPreHeader->begin());
- auto *ScalarPreheaderPhi =
- Builder.CreatePHI(ScalarHeaderPhi->getType(), 2, "scalar.recur.init");
- for (auto *BB : predecessors(LoopScalarPreHeader)) {
- auto *Incoming = BB == LoopMiddleBlock ? ResumeScalarFOR : InitScalarFOR;
- ScalarPreheaderPhi->addIncoming(Incoming, BB);
- }
- ScalarHeaderPhi->setIncomingValueForBlock(LoopScalarPreHeader,
- ScalarPreheaderPhi);
- ScalarHeaderPhi->setName("scalar.recur");
+// Helper to reorder blocks so they match the original order even after the
+// order of the predecessors changes. This is only used to avoid a number of
+// test changes due to reordering of incoming blocks in phi nodes and should be
+// removed soon, with the tests being updated.
+static void reorderIncomingBlocks(SmallVectorImpl<BasicBlock *> &Blocks,
+ BasicBlock *LoopMiddleBlock) {
+ if (Blocks.front() == LoopMiddleBlock)
+ std::swap(Blocks.front(), Blocks.back());
+ if (Blocks.size() == 3)
+ std::swap(Blocks[0], Blocks[1]);
}
void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) {
@@ -7388,7 +7307,9 @@ static void createAndCollectMergePhiForReduction(
// If we are fixing reductions in the epilogue loop then we should already
// have created a bc.merge.rdx Phi after the main vector body. Ensure that
// we carry over the incoming values correctly.
- for (auto *Incoming : predecessors(LoopScalarPreHeader)) {
+ SmallVector<BasicBlock *> Blocks(predecessors(LoopScalarPreHeader));
+ reorderIncomingBlocks(Blocks, LoopMiddleBlock);
+ for (auto *Incoming : Blocks) {
if (Incoming == LoopMiddleBlock)
BCBlockPhi->addIncoming(FinalValue, Incoming);
else if (ResumePhi && is_contained(ResumePhi->blocks(), Incoming))
@@ -7459,6 +7380,21 @@ LoopVectorizationPlanner::executePlan(
std::tie(State.CFG.PrevBB, CanonicalIVStartValue) =
ILV.createVectorizedLoopSkeleton(ExpandedSCEVs ? *ExpandedSCEVs
: State.ExpandedSCEVs);
+#ifdef EXPENSIVE_CHECKS
+ assert(DT->verify(DominatorTree::VerificationLevel::Fast));
+#endif
+
+ VPBasicBlock *MiddleVPBB =
+ cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion()->getSingleSuccessor());
+
+ using namespace llvm::VPlanPatternMatch;
+ if (MiddleVPBB->begin() != MiddleVPBB->end() &&
+ match(&MiddleVPBB->back(), m_BranchOnCond(m_VPValue()))) {
+ cast<VPIRBasicBlock>(MiddleVPBB->getSuccessors()[1])
+ ->resetBlock(OrigLoop->getLoopPreheader());
+ } else
+ cast<VPIRBasicBlock>(MiddleVPBB->getSuccessors()[0])
+ ->resetBlock(OrigLoop->getLoopPreheader());
// Only use noalias metadata when using memory checks guaranteeing no overlap
// across all iterations.
@@ -7539,6 +7475,18 @@ LoopVectorizationPlanner::executePlan(
ILV.printDebugTracesAtEnd();
+ // Adjust branch weight of the branch in the middle block.
+ auto *MiddleTerm =
+ cast<BranchInst>(State.CFG.VPBB2IRBB[ExitVPBB]->getTerminator());
+ if (MiddleTerm->isConditional() &&
+ hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())) {
+ // Assume that `Count % VectorTripCount` is equally distributed.
+ unsigned TripCount = State.UF * State.VF.getKnownMinValue();
+ assert(TripCount > 0 && "trip count should not be zero");
+ const uint32_t Weights[] = {1, TripCount - 1};
+ setBranchWeights(*MiddleTerm, Weights);
+ }
+
return {State.ExpandedSCEVs, ReductionResumeValues};
}
@@ -7595,7 +7543,7 @@ EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton(
// inductions in the epilogue loop are created before executing the plan for
// the epilogue loop.
- return {completeLoopSkeleton(), nullptr};
+ return {LoopVectorPreHeader, nullptr};
}
void EpilogueVectorizerMainLoop::printDebugTracesAtStart() {
@@ -7719,8 +7667,11 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
VecEpilogueIterationCountCheck,
VecEpilogueIterationCountCheck->getSinglePredecessor());
- DT->changeImmediateDominator(LoopScalarPreHeader,
- EPI.EpilogueIterationCountCheck);
+ if (auto *N = DT->getNode(LoopScalarPreHeader))
+ DT->changeImmediateDominator(LoopScalarPreHeader,
+ EPI.EpilogueIterationCountCheck);
+ else
+ DT->addNewBlock(LoopScalarPreHeader, EPI.EpilogueIterationCountCheck);
if (!Cost->requiresScalarEpilogue(EPI.EpilogueVF.isVector()))
// If there is an epilogue which must run, there's no edge from the
// middle block to exit blocks and thus no need to update the immediate
@@ -7784,7 +7735,7 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
{VecEpilogueIterationCountCheck,
EPI.VectorTripCount} /* AdditionalBypass */);
- return {completeLoopSkeleton(), EPResumeVal};
+ return {LoopVectorPreHeader, EPResumeVal};
}
BasicBlock *
@@ -8515,7 +8466,9 @@ static void addUsersInExitBlock(VPBasicBlock *HeaderVPBB, Loop *OrigLoop,
Value *IncomingValue =
ExitPhi.getIncomingValueForBlock(ExitingBB);
VPValue *V = Builder.getVPValueOrAddLiveIn(IncomingValue, Plan);
- Plan.addLiveOut(&ExitPhi, V);
+ Plan.addLiveOut(
+ &ExitPhi, V,
+ cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor()));
}
}
@@ -8534,9 +8487,25 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
// modified; a basic block for the vector pre-header, followed by a region for
// the vector loop, followed by the middle basic block. The skeleton vector
// loop region contains a header and latch basic blocks.
+
+ // Add a check in the middle block to see if we have completed
+ // all of the iterations in the first vector loop. Three cases:
+ // 1) If we require a scalar epilogue, there is no conditional branch as
+ // we unconditionally branch to the scalar preheader. Do nothing.
+ // 2) If (N - N%VF) == N, then we *don't* need to run the remainder.
+ // Thus if tail is to be folded, we know we don't need to run the
+ // remainder and we can use the previous value for the condition (true).
+ // 3) Otherwise, construct a runtime check.
+ bool RequiresScalarEpilogueCheck =
+ LoopVectorizationPlanner::getDecisionAndClampRange(
+ [this](ElementCount VF) {
+ return !CM.requiresScalarEpilogue(VF.isVector());
+ },
+ Range);
VPlanPtr Plan = VPlan::createInitialVPlan(
createTripCountSCEV(Legal->getWidestInductionType(), PSE, OrigLoop),
- *PSE.getSE(), OrigLoop->getLoopPreheader());
+ *PSE.getSE(), RequiresScalarEpilogueCheck, CM.foldTailByMasking(),
+ OrigLoop);
VPBasicBlock *HeaderVPBB = new VPBasicBlock("vector.body");
VPBasicBlock *LatchVPBB = new VPBasicBlock("vector.latch");
VPBlockUtils::insertBlockAfter(LatchVPBB, HeaderVPBB);
@@ -8679,6 +8648,49 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
"VPBasicBlock");
RecipeBuilder.fixHeaderPhis();
+ auto *MiddleVPBB =
+ cast<VPBasicBlock>(Plan->getVectorLoopRegion()->getSingleSuccessor());
+
+ VPBasicBlock *ScalarPH = nullptr;
+ for (VPBlockBase *Succ : MiddleVPBB->getSuccessors()) {
+ auto *VPIRBB = dyn_cast<VPIRBasicBlock>(Succ);
+ if (VPIRBB && VPIRBB->getIRBasicBlock() == OrigLoop->getHeader()) {
+ ScalarPH = VPIRBB;
+ break;
+ }
+ }
+
+ if (ScalarPH) {
+ for (auto &H : HeaderVPBB->phis()) {
+ auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&H);
+ if (!FOR)
+ continue;
+ VPBuilder B(ScalarPH);
+ VPBuilder MiddleBuilder;
+ // Set insert point so new recipes are inserted before terminator and
+ // condition, if there is either the former or both.
+ if (MiddleVPBB->getNumSuccessors() != 2)
+ MiddleBuilder.setInsertPoint(MiddleVPBB);
+ else if (isa<VPInstruction>(MiddleVPBB->getTerminator()->getOperand(0)))
+ MiddleBuilder.setInsertPoint(
+ &*std::prev(MiddleVPBB->getTerminator()->getIterator()));
+ else
+ MiddleBuilder.setInsertPoint(MiddleVPBB->getTerminator());
+
+ // Extract the resume value and create a new VPLiveOut for it.
+ auto *Resume = MiddleBuilder.createNaryOp(
+ VPInstruction::ExtractFromEnd,
+ {FOR->getBackedgeValue(),
+ Plan->getOrAddLiveIn(
+ ConstantInt::get(Plan->getCanonicalIV()->getScalarType(), 1))},
+ {}, "vector.recur.extract");
+ auto *R =
+ B.createNaryOp(VPInstruction::ExitPhi, {Resume, FOR->getStartValue()},
+ {}, "scalar.recur.init");
+ Plan->addLiveOut(cast<PHINode>(FOR->getUnderlyingInstr()), R, ScalarPH);
+ }
+ }
+
// ---------------------------------------------------------------------------
// Transform initial VPlan: Apply previously taken decisions, in order, to
// bring the VPlan to its final state.
@@ -8784,7 +8796,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
// Create new empty VPlan
auto Plan = VPlan::createInitialVPlan(
createTripCountSCEV(Legal->getWidestInductionType(), PSE, OrigLoop),
- *PSE.getSE(), OrigLoop->getLoopPreheader());
+ *PSE.getSE(), true, false, OrigLoop);
// Build hierarchical CFG
VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan);
@@ -8993,6 +9005,9 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
}
}
Builder.setInsertPoint(&*LatchVPBB->begin());
+ VPBasicBlock *MiddleVPBB =
+ cast<VPBasicBlock>(VectorLoopRegion->getSingleSuccessor());
+ VPBasicBlock::iterator IP = MiddleVPBB->begin();
for (VPRecipeBase &R :
Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
VPReductionPHIRecipe *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
@@ -9101,8 +9116,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
// also modeled in VPlan.
auto *FinalReductionResult = new VPInstruction(
VPInstruction::ComputeReductionResult, {PhiR, NewExitingVPV}, ExitDL);
- cast<VPBasicBlock>(VectorLoopRegion->getSingleSuccessor())
- ->appendRecipe(FinalReductionResult);
+ FinalReductionResult->insertBefore(*MiddleVPBB, IP);
+ IP = std::next(FinalReductionResult->getIterator());
OrigExitingVPV->replaceUsesWithIf(
FinalReductionResult,
[](VPUser &User, unsigned) { return isa<VPLiveOut>(&User); });
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index f17be451e6846..4138bee310ece 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -443,11 +443,29 @@ VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
}
void VPIRBasicBlock::execute(VPTransformState *State) {
- assert(getHierarchicalPredecessors().empty() &&
- "VPIRBasicBlock cannot have predecessors at the moment");
assert(getHierarchicalSuccessors().empty() &&
"VPIRBasicBlock cannot have successors at the moment");
+ for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
+ VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
+ auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors();
+ BasicBlock *PredBB = State->CFG.VPBB2IRBB[PredVPBB];
+
+ assert(PredBB && "Predecessor basic-block not found building successor.");
+ auto *PredBBTerminator = PredBB->getTerminator();
+ LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n');
+
+ auto *TermBr = dyn_cast<BranchInst>(PredBBTerminator);
+ if (TermBr) {
+ // Set each forward successor here when it is created, excluding
+ // backedges. A backward successor is set when the branch is created.
+ unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
+ assert(!TermBr->getSuccessor(idx) &&
+ "Trying to reset an existing successor block.");
+ TermBr->setSuccessor(idx, IRBB);
+ }
+ }
+
State->Builder.SetInsertPoint(getIRBasicBlock()->getTerminator());
executeRecipes(State, getIRBasicBlock());
}
@@ -479,6 +497,14 @@ void VPBasicBlock::execute(VPTransformState *State) {
// The Exit block of a loop is always set to be successor 0 of the Exiting
// block.
cast<BranchInst>(ExitingBB->getTerminator())->setSuccessor(0, NewBB);
+ // Set the insert point for recipe execution in the block.
+ State->Builder.SetInsertPoint(NewBB->getTerminator());
+ if (getSuccessors().size() == 1) {
+ BranchInst *Br = State->Builder.CreateBr(NewBB);
+ Br->setSuccessor(0, nullptr);
+ NewBB->getTerminator()->eraseFromParent();
+ State->Builder.SetInsertPoint(NewBB->getTerminator());
+ }
State->CFG.DTU.applyUpdates({{DominatorTree::Insert, ExitingBB, NewBB}});
} else if (PrevVPBB && /* A */
!((SingleHPred = getSingleHierarchicalPredecessor()) &&
@@ -639,6 +665,7 @@ void VPBasicBlock::print(raw_ostream &O, const Twine &Indent,
printSuccessors(O, Indent);
}
+
#endif
static std::pair<VPBlockBase *, VPBlockBase *> cloneSESE(VPBlockBase *Entry);
@@ -654,12 +681,23 @@ static std::pair<VPBlockBase *, VPBlockBase *> cloneSESE(VPBlockBase *Entry) {
Entry);
for (VPBlockBase *BB : RPOT) {
VPBlockBase *NewBB = BB->clone();
- for (VPBlockBase *Pred : BB->getPredecessors())
- VPBlockUtils::connectBlocks(Old2NewVPBlocks[Pred], NewBB);
-
Old2NewVPBlocks[BB] = NewBB;
}
+ for (VPBlockBase *BB : RPOT) {
+ VPBlockBase *NewBB = Old2NewVPBlocks[BB];
+ SmallVector<VPBlockBase *> NewPreds;
+ for (VPBlockBase *Pred : BB->getPredecessors()) {
+ NewPreds.push_back(Old2NewVPBlocks[Pred]);
+ }
+ NewBB->setPredecessors(NewPreds);
+ SmallVector<VPBlockBase *> NewSuccs;
+ for (VPBlockBase *Succ : BB->successors()) {
+ NewSuccs.push_back(Old2NewVPBlocks[Succ]);
+ }
+ NewBB->setSuccessors(NewSuccs);
+ }
+
#if !defined(NDEBUG)
// Verif...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/94760
More information about the llvm-commits
mailing list