[llvm] [VPlan] Hook IR blocks into VPlan during skeleton creation (NFC) (PR #114292)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 7 13:41:21 PST 2024
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/114292
>From 1b89761c30d273ae665b6da4b2ede0675373f2b1 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Fri, 11 Oct 2024 10:04:40 +0100
Subject: [PATCH 1/3] [VPlan] Hook IR blocks into VPlan during skeleton
creation (NFC)
As a first step to move towards modeling the full skeleton in VPlan,
start by wrapping IR blocks created during legacy skeleton creation in
VPIRBasicBlocks and hook them into the VPlan. This means the skeleton
CFG is represented in VPlan, just before execute. This allows moving
parts of skeleton creation into recipes in the VPBBs gradually.
Note that this allows retiring some manual DT updates, as this will be
handled automatically during VPlan execution.
---
.../Transforms/Vectorize/LoopVectorize.cpp | 76 +++++++++++++------
llvm/lib/Transforms/Vectorize/VPlan.cpp | 67 +++++++++-------
llvm/lib/Transforms/Vectorize/VPlan.h | 51 ++++---------
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 16 ++--
.../Transforms/Vectorize/VPlanTransforms.cpp | 2 +
llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 2 -
llvm/lib/Transforms/Vectorize/VPlanUtils.cpp | 2 +-
.../Transforms/Vectorize/VPlanVerifier.cpp | 9 ---
.../Transforms/Vectorize/VPDomTreeTest.cpp | 9 ++-
.../Transforms/Vectorize/VPlanTest.cpp | 38 +++++++---
.../Vectorize/VPlanVerifierTest.cpp | 18 +++--
11 files changed, 167 insertions(+), 123 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 1c64bd2982d764..2dbee4331e238f 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2426,6 +2426,26 @@ InnerLoopVectorizer::getOrCreateVectorTripCount(BasicBlock *InsertBlock) {
return VectorTripCount;
}
+static void connectScalarPreheaderInVPlan(VPlan &Plan) {
+ VPBlockBase *VectorPH = Plan.getVectorPreheader();
+ VPBlockBase *ScalarPH = Plan.getScalarPreheader();
+ VPBlockBase *PredVPB = VectorPH->getSinglePredecessor();
+ VPBlockUtils::disconnectBlocks(Plan.getEntry(), VectorPH);
+ VPBlockUtils::connectBlocks(PredVPB, ScalarPH);
+ VPBlockUtils::connectBlocks(PredVPB, VectorPH);
+}
+
+static void connectCheckBlockInVPlan(VPlan &Plan, BasicBlock *CheckIRBB) {
+ VPBlockBase *ScalarPH = Plan.getScalarPreheader();
+ VPBlockBase *VectorPH = Plan.getVectorPreheader();
+ VPBlockBase *PredVPB = VectorPH->getSinglePredecessor();
+ VPBlockUtils::disconnectBlocks(PredVPB, VectorPH);
+ VPIRBasicBlock *CheckVPIRBB = VPIRBasicBlock::fromBasicBlock(CheckIRBB);
+ VPBlockUtils::connectBlocks(PredVPB, CheckVPIRBB);
+ VPBlockUtils::connectBlocks(CheckVPIRBB, ScalarPH);
+ VPBlockUtils::connectBlocks(CheckVPIRBB, VectorPH);
+}
+
void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
Value *Count = getTripCount();
// Reuse existing vector loop preheader for TC checks.
@@ -2511,13 +2531,14 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
"TC check is expected to dominate Bypass");
// Update dominator for Bypass & LoopExit (if needed).
- DT->changeImmediateDominator(Bypass, TCCheckBlock);
BranchInst &BI =
*BranchInst::Create(Bypass, LoopVectorPreHeader, CheckMinIters);
if (hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator()))
setBranchWeights(BI, MinItersBypassWeights, /*IsExpected=*/false);
ReplaceInstWithInst(TCCheckBlock->getTerminator(), &BI);
LoopBypassBlocks.push_back(TCCheckBlock);
+
+ connectScalarPreheaderInVPlan(Plan);
}
BasicBlock *InnerLoopVectorizer::emitSCEVChecks(BasicBlock *Bypass) {
@@ -2534,6 +2555,8 @@ BasicBlock *InnerLoopVectorizer::emitSCEVChecks(BasicBlock *Bypass) {
"Should already be a bypass block due to iteration count check");
LoopBypassBlocks.push_back(SCEVCheckBlock);
AddedSafetyChecks = true;
+
+ connectCheckBlockInVPlan(Plan, SCEVCheckBlock);
return SCEVCheckBlock;
}
@@ -2570,6 +2593,7 @@ BasicBlock *InnerLoopVectorizer::emitMemRuntimeChecks(BasicBlock *Bypass) {
AddedSafetyChecks = true;
+ connectCheckBlockInVPlan(Plan, MemCheckBlock);
return MemCheckBlock;
}
@@ -7648,10 +7672,10 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
// 0. Generate SCEV-dependent code into the preheader, including TripCount,
// before making any changes to the CFG.
- if (!BestVPlan.getPreheader()->empty()) {
+ if (!BestVPlan.getEntry()->empty()) {
State.CFG.PrevBB = OrigLoop->getLoopPreheader();
State.Builder.SetInsertPoint(OrigLoop->getLoopPreheader()->getTerminator());
- BestVPlan.getPreheader()->execute(&State);
+ BestVPlan.getEntry()->execute(&State);
}
if (!ILV.getTripCount())
ILV.setTripCount(State.get(BestVPlan.getTripCount(), VPLane(0)));
@@ -7859,8 +7883,6 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
DT->getNode(Bypass)->getIDom()) &&
"TC check is expected to dominate Bypass");
- // Update dominator for Bypass.
- DT->changeImmediateDominator(Bypass, TCCheckBlock);
LoopBypassBlocks.push_back(TCCheckBlock);
// Save the trip count so we don't have to regenerate it in the
@@ -7875,6 +7897,12 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
setBranchWeights(BI, MinItersBypassWeights, /*IsExpected=*/false);
ReplaceInstWithInst(TCCheckBlock->getTerminator(), &BI);
+ VPBlockBase *VectorPH = Plan.getVectorPreheader();
+ VPBlockBase *PredVPB = VectorPH->getSinglePredecessor();
+ if (PredVPB->getNumSuccessors() == 1)
+ connectScalarPreheaderInVPlan(Plan);
+ else
+ connectCheckBlockInVPlan(Plan, TCCheckBlock);
return TCCheckBlock;
}
@@ -7905,32 +7933,19 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
EPI.MainLoopIterationCountCheck->getTerminator()->replaceUsesOfWith(
VecEpilogueIterationCountCheck, LoopVectorPreHeader);
- DT->changeImmediateDominator(LoopVectorPreHeader,
- EPI.MainLoopIterationCountCheck);
-
EPI.EpilogueIterationCountCheck->getTerminator()->replaceUsesOfWith(
VecEpilogueIterationCountCheck, LoopScalarPreHeader);
if (EPI.SCEVSafetyCheck)
EPI.SCEVSafetyCheck->getTerminator()->replaceUsesOfWith(
VecEpilogueIterationCountCheck, LoopScalarPreHeader);
- if (EPI.MemSafetyCheck)
+ if (EPI.MemSafetyCheck) {
EPI.MemSafetyCheck->getTerminator()->replaceUsesOfWith(
VecEpilogueIterationCountCheck, LoopScalarPreHeader);
-
- DT->changeImmediateDominator(
- VecEpilogueIterationCountCheck,
- VecEpilogueIterationCountCheck->getSinglePredecessor());
+ }
DT->changeImmediateDominator(LoopScalarPreHeader,
EPI.EpilogueIterationCountCheck);
- if (!Cost->requiresScalarEpilogue(EPI.EpilogueVF.isVector()))
- // If there is an epilogue which must run, there's no edge from the
- // middle block to exit blocks and thus no need to update the immediate
- // dominator of the exit blocks.
- DT->changeImmediateDominator(LoopExitBlock,
- EPI.EpilogueIterationCountCheck);
-
// Keep track of bypass blocks, as they feed start values to the induction and
// reduction phis in the scalar loop preheader.
if (EPI.SCEVSafetyCheck)
@@ -8033,6 +8048,20 @@ EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck(
}
ReplaceInstWithInst(Insert->getTerminator(), &BI);
LoopBypassBlocks.push_back(Insert);
+
+ // A new entry block has been created for the epilogue VPlan. Hook it in.
+ VPIRBasicBlock *NewEntry = VPIRBasicBlock::fromBasicBlock(Insert);
+ VPBasicBlock *OldEntry = Plan.getEntry();
+ VPBlockUtils::reassociateBlocks(OldEntry, NewEntry);
+ Plan.setEntry(NewEntry);
+ for (auto &R : make_early_inc_range(*NewEntry)) {
+ auto *VPIR = dyn_cast<VPIRInstruction>(&R);
+ if (!VPIR || !isa<PHINode>(VPIR->getInstruction()))
+ break;
+ VPIR->eraseFromParent();
+ }
+
+ connectScalarPreheaderInVPlan(Plan);
return Insert;
}
@@ -10256,7 +10285,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// should be removed once induction resume value creation is done
// directly in VPlan.
EpilogILV.setTripCount(MainILV.getTripCount());
- for (auto &R : make_early_inc_range(*BestEpiPlan.getPreheader())) {
+ for (auto &R : make_early_inc_range(*BestEpiPlan.getEntry())) {
auto *ExpandR = dyn_cast<VPExpandSCEVRecipe>(&R);
if (!ExpandR)
continue;
@@ -10316,8 +10345,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
cast<VPHeaderPHIRecipe>(&R)->setStartValue(StartVal);
}
- assert(DT->verify(DominatorTree::VerificationLevel::Fast) &&
- "DT not preserved correctly");
LVP.executePlan(EPI.EpilogueVF, EPI.EpilogueUF, BestEpiPlan, EpilogILV,
DT, true, &ExpandedSCEVs);
++LoopsEpilogueVectorized;
@@ -10345,6 +10372,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
checkMixedPrecision(L, ORE);
}
+ assert(DT->verify(DominatorTree::VerificationLevel::Fast) &&
+ "DT not preserved correctly");
+
std::optional<MDNode *> RemainderLoopID =
makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll,
LLVMLoopVectorizeFollowupEpilogue});
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 54aae122877b98..7de8830bd866ce 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -170,9 +170,8 @@ VPBasicBlock *VPBlockBase::getEntryBasicBlock() {
}
void VPBlockBase::setPlan(VPlan *ParentPlan) {
- assert(
- (ParentPlan->getEntry() == this || ParentPlan->getPreheader() == this) &&
- "Can only set plan on its entry or preheader block.");
+ assert(ParentPlan->getEntry() == this &&
+ "Can only set plan on its entry or preheader block.");
Plan = ParentPlan;
}
@@ -463,7 +462,6 @@ void VPIRBasicBlock::execute(VPTransformState *State) {
(getNumSuccessors() == 0 || isa<BranchInst>(IRBB->getTerminator())) &&
"other blocks must be terminated by a branch");
}
-
for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
BasicBlock *PredBB = State->CFG.VPBB2IRBB[PredVPBB];
@@ -851,9 +849,6 @@ VPlan::~VPlan() {
Block->dropAllReferences(&DummyValue);
VPBlockBase::deleteCFG(Entry);
-
- Preheader->dropAllReferences(&DummyValue);
- delete Preheader;
}
for (VPValue *VPV : VPLiveInsToFree)
delete VPV;
@@ -876,9 +871,10 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
VPIRBasicBlock *Entry =
VPIRBasicBlock::fromBasicBlock(TheLoop->getLoopPreheader());
VPBasicBlock *VecPreheader = new VPBasicBlock("vector.ph");
+ VPBlockUtils::connectBlocks(Entry, VecPreheader);
VPIRBasicBlock *ScalarHeader =
VPIRBasicBlock::fromBasicBlock(TheLoop->getHeader());
- auto Plan = std::make_unique<VPlan>(Entry, VecPreheader, ScalarHeader);
+ auto Plan = std::make_unique<VPlan>(Entry, ScalarHeader);
// Create SCEV and VPValue for the trip count.
@@ -1021,8 +1017,9 @@ void VPlan::execute(VPTransformState *State) {
BasicBlock *VectorPreHeader = State->CFG.PrevBB;
State->Builder.SetInsertPoint(VectorPreHeader->getTerminator());
- // Disconnect VectorPreHeader from ExitBB in both the CFG and DT.
- cast<BranchInst>(VectorPreHeader->getTerminator())->setSuccessor(0, nullptr);
+ replaceVPBBWithIRVPBB(
+ cast<VPBasicBlock>(getVectorLoopRegion()->getSinglePredecessor()),
+ VectorPreHeader);
State->CFG.DTU.applyUpdates(
{{DominatorTree::Delete, VectorPreHeader, State->CFG.ExitBB}});
@@ -1049,8 +1046,10 @@ void VPlan::execute(VPTransformState *State) {
State->CFG.DTU.applyUpdates(
{{DominatorTree::Delete, ScalarPh, ScalarPh->getSingleSuccessor()}});
+ ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
+ Entry);
// Generate code in the loop pre-header and body.
- for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
+ for (VPBlockBase *Block : make_range(RPOT.begin(), RPOT.end()))
Block->execute(State);
VPBasicBlock *LatchVPBB = getVectorLoopRegion()->getExitingBasicBlock();
@@ -1101,9 +1100,6 @@ void VPlan::execute(VPTransformState *State) {
}
State->CFG.DTU.flush();
- assert(State->CFG.DTU.getDomTree().verify(
- DominatorTree::VerificationLevel::Fast) &&
- "DT not preserved correctly");
}
InstructionCost VPlan::cost(ElementCount VF, VPCostContext &Ctx) {
@@ -1156,12 +1152,10 @@ void VPlan::print(raw_ostream &O) const {
printLiveIns(O);
- if (!getPreheader()->empty()) {
- O << "\n";
- getPreheader()->print(O, "", SlotTracker);
- }
+ ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<const VPBlockBase *>>
+ RPOT(getEntry());
- for (const VPBlockBase *Block : vp_depth_first_shallow(getEntry())) {
+ for (const VPBlockBase *Block : RPOT) {
O << '\n';
Block->print(O, "", SlotTracker);
}
@@ -1192,6 +1186,20 @@ std::string VPlan::getName() const {
return Out;
}
+VPRegionBlock *VPlan::getVectorLoopRegion() {
+ for (VPBlockBase *B : vp_depth_first_shallow(getEntry()))
+ if (auto *R = dyn_cast<VPRegionBlock>(B))
+ return R;
+ return nullptr;
+}
+
+const VPRegionBlock *VPlan::getVectorLoopRegion() const {
+ for (const VPBlockBase *B : vp_depth_first_shallow(getEntry()))
+ if (auto *R = dyn_cast<VPRegionBlock>(B))
+ return R;
+ return nullptr;
+}
+
LLVM_DUMP_METHOD
void VPlan::printDOT(raw_ostream &O) const {
VPlanPrinter Printer(O, *this);
@@ -1242,7 +1250,6 @@ static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry,
VPlan *VPlan::duplicate() {
// Clone blocks.
- VPBasicBlock *NewPreheader = Preheader->clone();
const auto &[NewEntry, __] = cloneFrom(Entry);
BasicBlock *ScalarHeaderIRBB = getScalarHeader()->getIRBasicBlock();
@@ -1252,8 +1259,7 @@ VPlan *VPlan::duplicate() {
return VPIRBB && VPIRBB->getIRBasicBlock() == ScalarHeaderIRBB;
}));
// Create VPlan, clone live-ins and remap operands in the cloned blocks.
- auto *NewPlan =
- new VPlan(NewPreheader, cast<VPBasicBlock>(NewEntry), NewScalarHeader);
+ auto *NewPlan = new VPlan(cast<VPBasicBlock>(NewEntry), NewScalarHeader);
DenseMap<VPValue *, VPValue *> Old2NewVPValues;
for (VPValue *OldLiveIn : VPLiveInsToFree) {
Old2NewVPValues[OldLiveIn] =
@@ -1273,7 +1279,6 @@ VPlan *VPlan::duplicate() {
// else NewTripCount will be created and inserted into Old2NewVPValues when
// TripCount is cloned. In any case NewPlan->TripCount is updated below.
- remapOperands(Preheader, NewPreheader, Old2NewVPValues);
remapOperands(Entry, NewEntry, Old2NewVPValues);
// Initialize remaining fields of cloned VPlan.
@@ -1287,6 +1292,19 @@ VPlan *VPlan::duplicate() {
return NewPlan;
}
+VPBasicBlock *VPlan::getScalarPreheader() {
+ auto *MiddleVPBB =
+ cast<VPBasicBlock>(getVectorLoopRegion()->getSingleSuccessor());
+ if (MiddleVPBB->getNumSuccessors() == 2) {
+ // Order is strict: first is the exit block, second is the scalar preheader.
+ return cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[1]);
+ }
+ if (auto *IRVPBB = dyn_cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor()))
+ return IRVPBB;
+
+ return nullptr;
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
Twine VPlanPrinter::getUID(const VPBlockBase *Block) {
@@ -1325,8 +1343,6 @@ void VPlanPrinter::dump() {
OS << "edge [fontname=Courier, fontsize=30]\n";
OS << "compound=true\n";
- dumpBlock(Plan.getPreheader());
-
for (const VPBlockBase *Block : vp_depth_first_shallow(Plan.getEntry()))
dumpBlock(Block);
@@ -1587,7 +1603,6 @@ void VPSlotTracker::assignNames(const VPlan &Plan) {
assignName(Plan.BackedgeTakenCount);
for (VPValue *LI : Plan.VPLiveInsToFree)
assignName(LI);
- assignNames(Plan.getPreheader());
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<const VPBlockBase *>>
RPOT(VPBlockDeepTraversalWrapper<const VPBlockBase *>(Plan.getEntry()));
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 035b9e66fd062a..c9393958521295 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -3703,11 +3703,6 @@ class VPlan {
/// preheader of the vector loop.
VPBasicBlock *Entry;
- /// VPBasicBlock corresponding to the original preheader. Used to place
- /// VPExpandSCEV recipes for expressions used during skeleton creation and the
- /// rest of VPlan execution.
- VPBasicBlock *Preheader;
-
/// VPIRBasicBlock wrapping the header of the original scalar loop.
VPIRBasicBlock *ScalarHeader;
@@ -3752,36 +3747,27 @@ class VPlan {
DenseMap<const SCEV *, VPValue *> SCEVToExpansion;
public:
- /// Construct a VPlan with original preheader \p Preheader, trip count \p TC,
- /// \p Entry to the plan and with \p ScalarHeader wrapping the original header
- /// of the scalar loop. At the moment, \p Preheader and \p Entry need to be
- /// disconnected, as the bypass blocks between them are not yet modeled in
- /// VPlan.
- VPlan(VPBasicBlock *Preheader, VPValue *TC, VPBasicBlock *Entry,
- VPIRBasicBlock *ScalarHeader)
- : VPlan(Preheader, Entry, ScalarHeader) {
+ /// Construct a VPlan with \p Entry entering the plan and trip count \p TC.
+ VPlan(VPBasicBlock *Entry, VPValue *TC, VPIRBasicBlock *ScalarHeader)
+ : VPlan(Entry, ScalarHeader) {
TripCount = TC;
}
- /// Construct a VPlan with original preheader \p Preheader, \p Entry to
- /// the plan and with \p ScalarHeader wrapping the original header of the
- /// scalar loop. At the moment, \p Preheader and \p Entry need to be
- /// disconnected, as the bypass blocks between them are not yet modeled in
- /// VPlan.
- VPlan(VPBasicBlock *Preheader, VPBasicBlock *Entry,
- VPIRBasicBlock *ScalarHeader)
- : Entry(Entry), Preheader(Preheader), ScalarHeader(ScalarHeader) {
+ /// Construct a VPlan with \p Entry to the plan.
+ VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader)
+ : Entry(Entry), ScalarHeader(ScalarHeader) {
Entry->setPlan(this);
- Preheader->setPlan(this);
- assert(Preheader->getNumSuccessors() == 0 &&
- Preheader->getNumPredecessors() == 0 &&
- "preheader must be disconnected");
assert(ScalarHeader->getNumSuccessors() == 0 &&
"scalar header must be a leaf node");
}
~VPlan();
+ void setEntry(VPBasicBlock *VPBB) {
+ Entry = VPBB;
+ VPBB->setPlan(this);
+ }
+
/// Create initial VPlan, having an "entry" VPBasicBlock (wrapping
/// original scalar pre-header ) which contains SCEV expansions that need
/// to happen before the CFG is modified; a VPBasicBlock for the vector
@@ -3932,12 +3918,9 @@ class VPlan {
#endif
/// Returns the VPRegionBlock of the vector loop.
- VPRegionBlock *getVectorLoopRegion() {
- return cast<VPRegionBlock>(getEntry()->getSingleSuccessor());
- }
- const VPRegionBlock *getVectorLoopRegion() const {
- return cast<VPRegionBlock>(getEntry()->getSingleSuccessor());
- }
+ VPRegionBlock *getVectorLoopRegion();
+
+ const VPRegionBlock *getVectorLoopRegion() const;
/// Returns the preheader of the vector loop region.
VPBasicBlock *getVectorPreheader() {
@@ -3963,13 +3946,11 @@ class VPlan {
SCEVToExpansion[S] = V;
}
- /// \return The block corresponding to the original preheader.
- VPBasicBlock *getPreheader() { return Preheader; }
- const VPBasicBlock *getPreheader() const { return Preheader; }
-
/// Clone the current VPlan, update all VPValues of the new VPlan and cloned
/// recipes to refer to the clones, and return it.
VPlan *duplicate();
+
+ VPBasicBlock *getScalarPreheader();
};
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 6254ea15191819..39e06736cfb320 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -627,11 +627,11 @@ Value *VPInstruction::generate(VPTransformState &State) {
Builder.CreatePHI(IncomingFromOtherPreds->getType(), 2, Name);
BasicBlock *VPlanPred =
State.CFG
- .VPBB2IRBB[cast<VPBasicBlock>(getParent()->getSinglePredecessor())];
+ .VPBB2IRBB[cast<VPBasicBlock>(getParent()->getPredecessors()[0])];
NewPhi->addIncoming(IncomingFromVPlanPred, VPlanPred);
for (auto *OtherPred : predecessors(Builder.GetInsertBlock())) {
- assert(OtherPred != VPlanPred &&
- "VPlan predecessors should not be connected yet");
+ if (OtherPred == VPlanPred)
+ continue;
NewPhi->addIncoming(IncomingFromOtherPreds, OtherPred);
}
return NewPhi;
@@ -3235,13 +3235,17 @@ void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent,
void VPExpandSCEVRecipe::execute(VPTransformState &State) {
assert(!State.Lane && "cannot be used in per-lane");
+ if (State.ExpandedSCEVs.contains(Expr)) {
+ State.Builder.SetInsertPoint(State.CFG.VPBB2IRBB[getParent()]);
+ return;
+ }
+
const DataLayout &DL = State.CFG.PrevBB->getDataLayout();
SCEVExpander Exp(SE, DL, "induction");
-
Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
&*State.Builder.GetInsertPoint());
- assert(!State.ExpandedSCEVs.contains(Expr) &&
- "Same SCEV expanded multiple times");
+ /* assert(!State.ExpandedSCEVs.contains(Expr) &&*/
+ /*"Same SCEV expanded multiple times");*/
State.ExpandedSCEVs[Expr] = Res;
State.set(this, Res, VPLane(0));
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index a4f0df17f5832f..33ef10dbefc507 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -381,6 +381,8 @@ static bool mergeBlocksIntoPredecessors(VPlan &Plan) {
dyn_cast_or_null<VPBasicBlock>(VPBB->getSinglePredecessor());
if (!PredVPBB || PredVPBB->getNumSuccessors() != 1)
continue;
+ if (isa<VPIRBasicBlock>(PredVPBB))
+ continue;
WorkList.push_back(VPBB);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index dd005682203b75..ae53774bff98db 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -412,8 +412,6 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF, LLVMContext &Ctx) {
UnrollState Unroller(Plan, UF, Ctx);
- Unroller.unrollBlock(Plan.getPreheader());
-
// Iterate over all blocks in the plan starting from Entry, and unroll
// recipes inside them. This includes the vector preheader and middle blocks,
// which may set up or post-process per-part values.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index 4621c28b051298..e40af3e2e3d30a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -34,7 +34,7 @@ VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr,
Expanded = Plan.getOrAddLiveIn(E->getValue());
else {
Expanded = new VPExpandSCEVRecipe(Expr, SE);
- Plan.getPreheader()->appendRecipe(Expanded->getDefiningRecipe());
+ Plan.getEntry()->appendRecipe(Expanded->getDefiningRecipe());
}
Plan.addSCEVExpansion(Expr, Expanded);
return Expanded;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
index 8bdb3133243582..fc6b61491f8b5e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
@@ -195,15 +195,6 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) {
RecipeNumbering[&R] = Cnt++;
for (const VPRecipeBase &R : *VPBB) {
- if (isa<VPIRInstruction>(&R) ^ isa<VPIRBasicBlock>(VPBB)) {
- errs() << "VPIRInstructions ";
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- R.dump();
- errs() << " ";
-#endif
- errs() << "not in a VPIRBasicBlock!\n";
- return false;
- }
for (const VPValue *V : R.definedValues()) {
for (const VPUser *U : V->users()) {
auto *UI = dyn_cast<VPRecipeBase>(U);
diff --git a/llvm/unittests/Transforms/Vectorize/VPDomTreeTest.cpp b/llvm/unittests/Transforms/Vectorize/VPDomTreeTest.cpp
index 86182ccae0b55b..604f87b634bba6 100644
--- a/llvm/unittests/Transforms/Vectorize/VPDomTreeTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPDomTreeTest.cpp
@@ -44,8 +44,9 @@ TEST(VPDominatorTreeTest, DominanceNoRegionsTest) {
LLVMContext C;
auto *ScalarHeader = BasicBlock::Create(C, "");
VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
+ VPBlockUtils::connectBlocks(VPPH, VPBB0);
VPBlockUtils::connectBlocks(R1, ScalarHeaderVPBB);
- VPlan Plan(VPPH, &*TC, VPBB0, ScalarHeaderVPBB);
+ VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB);
VPDominatorTree VPDT;
VPDT.recalculate(Plan);
@@ -124,8 +125,9 @@ TEST(VPDominatorTreeTest, DominanceRegionsTest) {
auto TC = std::make_unique<VPValue>();
VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
+ VPBlockUtils::connectBlocks(VPPH, VPBB0);
VPBlockUtils::connectBlocks(R2, ScalarHeaderVPBB);
- VPlan Plan(VPPH, &*TC, VPBB0, ScalarHeaderVPBB);
+ VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB);
VPDominatorTree VPDT;
VPDT.recalculate(Plan);
@@ -206,8 +208,9 @@ TEST(VPDominatorTreeTest, DominanceRegionsTest) {
auto TC = std::make_unique<VPValue>();
VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
+ VPBlockUtils::connectBlocks(VPPH, VPBB1);
VPBlockUtils::connectBlocks(VPBB2, ScalarHeaderVPBB);
- VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
+ VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB);
VPDominatorTree VPDT;
VPDT.recalculate(Plan);
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
index 4a8615cc086b08..81f1707aaf7fba 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
@@ -259,8 +259,9 @@ TEST(VPBasicBlockTest, getPlan) {
auto TC = std::make_unique<VPValue>();
VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
+ VPBlockUtils::connectBlocks(VPPH, VPBB1);
VPBlockUtils::connectBlocks(VPBB4, ScalarHeaderVPBB);
- VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
+ VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB);
EXPECT_EQ(&Plan, VPBB1->getPlan());
EXPECT_EQ(&Plan, VPBB2->getPlan());
@@ -281,8 +282,9 @@ TEST(VPBasicBlockTest, getPlan) {
auto TC = std::make_unique<VPValue>();
VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
+ VPBlockUtils::connectBlocks(VPPH, VPBB1);
VPBlockUtils::connectBlocks(R1, ScalarHeaderVPBB);
- VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
+ VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB);
EXPECT_EQ(&Plan, VPBB1->getPlan());
EXPECT_EQ(&Plan, R1->getPlan());
@@ -313,8 +315,9 @@ TEST(VPBasicBlockTest, getPlan) {
auto TC = std::make_unique<VPValue>();
VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
+ VPBlockUtils::connectBlocks(VPPH, VPBB1);
VPBlockUtils::connectBlocks(R2, ScalarHeaderVPBB);
- VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
+ VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB);
EXPECT_EQ(&Plan, VPBB1->getPlan());
EXPECT_EQ(&Plan, R1->getPlan());
@@ -359,8 +362,9 @@ TEST(VPBasicBlockTest, TraversingIteratorTest) {
// Use Plan to properly clean up created blocks.
auto TC = std::make_unique<VPValue>();
VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
+ VPBlockUtils::connectBlocks(VPPH, VPBB1);
VPBlockUtils::connectBlocks(VPBB4, ScalarHeaderVPBB);
- VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
+ VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB);
}
{
@@ -461,8 +465,9 @@ TEST(VPBasicBlockTest, TraversingIteratorTest) {
// Use Plan to properly clean up created blocks.
auto TC = std::make_unique<VPValue>();
VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
+ VPBlockUtils::connectBlocks(VPPH, VPBB0);
VPBlockUtils::connectBlocks(R2, ScalarHeaderVPBB);
- VPlan Plan(VPPH, &*TC, VPBB0, ScalarHeaderVPBB);
+ VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB);
}
{
@@ -546,8 +551,9 @@ TEST(VPBasicBlockTest, TraversingIteratorTest) {
// Use Plan to properly clean up created blocks.
auto TC = std::make_unique<VPValue>();
VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
+ VPBlockUtils::connectBlocks(VPPH, VPBB1);
VPBlockUtils::connectBlocks(VPBB2, ScalarHeaderVPBB);
- VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
+ VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB);
}
{
@@ -596,8 +602,9 @@ TEST(VPBasicBlockTest, TraversingIteratorTest) {
// Use Plan to properly clean up created blocks.
auto TC = std::make_unique<VPValue>();
VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
+ VPBlockUtils::connectBlocks(VPPH, VPBB1);
VPBlockUtils::connectBlocks(R1, ScalarHeaderVPBB);
- VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
+ VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB);
}
{
@@ -690,8 +697,9 @@ TEST(VPBasicBlockTest, TraversingIteratorTest) {
// Use Plan to properly clean up created blocks.
auto TC = std::make_unique<VPValue>();
VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
+ VPBlockUtils::connectBlocks(VPPH, VPBB1);
VPBlockUtils::connectBlocks(VPBB2, ScalarHeaderVPBB);
- VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
+ VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB);
}
delete ScalarHeader;
}
@@ -734,7 +742,8 @@ TEST(VPBasicBlockTest, print) {
auto *ScalarHeader = BasicBlock::Create(C, "");
// FIXME: This looks wrong.
auto ScalarHeaderVPBB = std::make_unique<VPIRBasicBlock>(ScalarHeader);
- VPlan Plan(VPBB0, TC, VPBB1, ScalarHeaderVPBB.get());
+ VPBlockUtils::connectBlocks(VPBB0, VPBB1);
+ VPlan Plan(VPBB0, TC, ScalarHeaderVPBB.get());
std::string FullDump;
raw_string_ostream OS(FullDump);
Plan.printDOT(OS);
@@ -820,8 +829,9 @@ TEST(VPBasicBlockTest, printPlanWithVFsAndUFs) {
LLVMContext C;
auto *ScalarHeader = BasicBlock::Create(C, "");
VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
+ VPBlockUtils::connectBlocks(VPBB0, VPBB1);
VPBlockUtils::connectBlocks(VPBB1, ScalarHeaderVPBB);
- VPlan Plan(VPBB0, TC, VPBB1, ScalarHeaderVPBB);
+ VPlan Plan(VPBB0, TC, ScalarHeaderVPBB);
Plan.setName("TestPlan");
Plan.addVF(ElementCount::getFixed(4));
@@ -1291,11 +1301,13 @@ TEST(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) {
TEST(VPRecipeTest, dumpRecipeInPlan) {
VPBasicBlock *VPBB0 = new VPBasicBlock("preheader");
VPBasicBlock *VPBB1 = new VPBasicBlock();
+ VPBlockUtils::connectBlocks(VPBB0, VPBB1);
+
LLVMContext C;
auto *ScalarHeader = BasicBlock::Create(C, "");
VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
VPBlockUtils::connectBlocks(VPBB1, ScalarHeaderVPBB);
- VPlan Plan(VPBB0, VPBB1, ScalarHeaderVPBB);
+ VPlan Plan(VPBB0, ScalarHeaderVPBB);
IntegerType *Int32 = IntegerType::get(C, 32);
auto *AI = BinaryOperator::CreateAdd(PoisonValue::get(Int32),
@@ -1363,11 +1375,13 @@ TEST(VPRecipeTest, dumpRecipeInPlan) {
TEST(VPRecipeTest, dumpRecipeUnnamedVPValuesInPlan) {
VPBasicBlock *VPBB0 = new VPBasicBlock("preheader");
VPBasicBlock *VPBB1 = new VPBasicBlock();
+ VPBlockUtils::connectBlocks(VPBB0, VPBB1);
+
LLVMContext C;
auto *ScalarHeader = BasicBlock::Create(C, "");
VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
VPBlockUtils::connectBlocks(VPBB1, ScalarHeaderVPBB);
- VPlan Plan(VPBB0, VPBB1, ScalarHeaderVPBB);
+ VPlan Plan(VPBB0, ScalarHeaderVPBB);
IntegerType *Int32 = IntegerType::get(C, 32);
auto *AI = BinaryOperator::CreateAdd(PoisonValue::get(Int32),
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp
index edb3f8a2952942..2c7566c973c4e1 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp
@@ -32,8 +32,9 @@ TEST(VPVerifierTest, VPInstructionUseBeforeDefSameBB) {
LLVMContext C;
auto *ScalarHeader = BasicBlock::Create(C, "");
VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
+ VPBlockUtils::connectBlocks(VPPH, VPBB1);
VPBlockUtils::connectBlocks(R1, ScalarHeaderVPBB);
- VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
+ VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB);
#if GTEST_HAS_STREAM_REDIRECTION
::testing::internal::CaptureStderr();
@@ -69,8 +70,9 @@ TEST(VPVerifierTest, VPInstructionUseBeforeDefDifferentBB) {
LLVMContext C;
auto *ScalarHeader = BasicBlock::Create(C, "");
VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
+ VPBlockUtils::connectBlocks(VPPH, VPBB1);
VPBlockUtils::connectBlocks(R1, ScalarHeaderVPBB);
- VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
+ VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB);
#if GTEST_HAS_STREAM_REDIRECTION
::testing::internal::CaptureStderr();
@@ -116,8 +118,9 @@ TEST(VPVerifierTest, VPBlendUseBeforeDefDifferentBB) {
auto TC = std::make_unique<VPValue>();
auto *ScalarHeader = BasicBlock::Create(C, "");
VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
+ VPBlockUtils::connectBlocks(VPPH, VPBB1);
VPBlockUtils::connectBlocks(R1, ScalarHeaderVPBB);
- VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
+ VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB);
#if GTEST_HAS_STREAM_REDIRECTION
::testing::internal::CaptureStderr();
@@ -157,8 +160,9 @@ TEST(VPVerifierTest, DuplicateSuccessorsOutsideRegion) {
LLVMContext C;
auto *ScalarHeader = BasicBlock::Create(C, "");
VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
+ VPBlockUtils::connectBlocks(VPPH, VPBB1);
VPBlockUtils::connectBlocks(R1, ScalarHeaderVPBB);
- VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
+ VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB);
#if GTEST_HAS_STREAM_REDIRECTION
::testing::internal::CaptureStderr();
@@ -199,8 +203,9 @@ TEST(VPVerifierTest, DuplicateSuccessorsInsideRegion) {
LLVMContext C;
auto *ScalarHeader = BasicBlock::Create(C, "");
VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
+ VPBlockUtils::connectBlocks(VPPH, VPBB1);
VPBlockUtils::connectBlocks(R1, ScalarHeaderVPBB);
- VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
+ VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB);
#if GTEST_HAS_STREAM_REDIRECTION
::testing::internal::CaptureStderr();
@@ -233,8 +238,9 @@ TEST(VPVerifierTest, BlockOutsideRegionWithParent) {
LLVMContext C;
auto *ScalarHeader = BasicBlock::Create(C, "");
VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
+ VPBlockUtils::connectBlocks(VPPH, R1);
VPBlockUtils::connectBlocks(R1, ScalarHeaderVPBB);
- VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
+ VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB);
#if GTEST_HAS_STREAM_REDIRECTION
::testing::internal::CaptureStderr();
>From b87cf14c957352373feda487be5523d6e0d729d5 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 7 Nov 2024 13:13:15 +0000
Subject: [PATCH 2/3] !fixup address comments, thanks!
---
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 65b11ed551fe2b..d6d8b28e96815c 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2429,6 +2429,10 @@ InnerLoopVectorizer::getOrCreateVectorTripCount(BasicBlock *InsertBlock) {
return VectorTripCount;
}
+/// Helper to connect both the vector and scalar preheaders to the vector
+/// preheader's predecessor. This is used when adjusting \p Plan during skeleton
+/// creation, i.e. adjusting the plan after introducing an initial runtime
+/// check.
static void connectScalarPreheaderInVPlan(VPlan &Plan) {
VPBlockBase *VectorPH = Plan.getVectorPreheader();
VPBlockBase *ScalarPH = Plan.getScalarPreheader();
>From 599e690f42637f6955f1d338b77229e869766d3f Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 7 Nov 2024 21:40:24 +0000
Subject: [PATCH 3/3] !fixup address latest comments, thanks!
---
.../Transforms/Vectorize/LoopVectorize.cpp | 34 +++++++++----------
llvm/lib/Transforms/Vectorize/VPlan.h | 18 +++++++---
2 files changed, 30 insertions(+), 22 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index d6d8b28e96815c..f1fbaf88ca6a50 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2429,28 +2429,29 @@ InnerLoopVectorizer::getOrCreateVectorTripCount(BasicBlock *InsertBlock) {
return VectorTripCount;
}
-/// Helper to connect both the vector and scalar preheaders to the vector
-/// preheader's predecessor. This is used when adjusting \p Plan during skeleton
+/// Helper to connect both the vector and scalar preheaders to the Plan's
+/// entry. This is used when adjusting \p Plan during skeleton
/// creation, i.e. adjusting the plan after introducing an initial runtime
/// check.
static void connectScalarPreheaderInVPlan(VPlan &Plan) {
VPBlockBase *VectorPH = Plan.getVectorPreheader();
VPBlockBase *ScalarPH = Plan.getScalarPreheader();
- VPBlockBase *PredVPB = VectorPH->getSinglePredecessor();
- VPBlockUtils::disconnectBlocks(Plan.getEntry(), VectorPH);
- VPBlockUtils::connectBlocks(PredVPB, ScalarPH);
- VPBlockUtils::connectBlocks(PredVPB, VectorPH);
+ VPBlockBase *PredVPB = Plan.getEntry();
+ VPBlockUtils::connectBlocks(PredVPB, ScalarPH, -1, 0);
+ VPBlockUtils::connectBlocks(PredVPB, VectorPH, 0, -1);
}
-static void connectCheckBlockInVPlan(VPlan &Plan, BasicBlock *CheckIRBB) {
+/// Introduces a new VPIRBasicBlock for \p CheckIRBB to \p Plan between the
+/// vector preheader and its predecessor, also connecting to the scalar
+/// preheader.
+static void introduceCheckBlockInVPlan(VPlan &Plan, BasicBlock *CheckIRBB) {
VPBlockBase *ScalarPH = Plan.getScalarPreheader();
VPBlockBase *VectorPH = Plan.getVectorPreheader();
- VPBlockBase *PredVPB = VectorPH->getSinglePredecessor();
- VPBlockUtils::disconnectBlocks(PredVPB, VectorPH);
+ VPBlockBase *PreVectorPH = VectorPH->getSinglePredecessor();
VPIRBasicBlock *CheckVPIRBB = VPIRBasicBlock::fromBasicBlock(CheckIRBB);
- VPBlockUtils::connectBlocks(PredVPB, CheckVPIRBB);
+ VPBlockUtils::connectBlocks(PreVectorPH, CheckVPIRBB, -1, 1);
VPBlockUtils::connectBlocks(CheckVPIRBB, ScalarPH);
- VPBlockUtils::connectBlocks(CheckVPIRBB, VectorPH);
+ VPBlockUtils::connectBlocks(CheckVPIRBB, VectorPH, 0, -1);
}
void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
@@ -2537,7 +2538,6 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
DT->getNode(Bypass)->getIDom()) &&
"TC check is expected to dominate Bypass");
- // Update dominator for Bypass & LoopExit (if needed).
BranchInst &BI =
*BranchInst::Create(Bypass, LoopVectorPreHeader, CheckMinIters);
if (hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator()))
@@ -2545,6 +2545,7 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
ReplaceInstWithInst(TCCheckBlock->getTerminator(), &BI);
LoopBypassBlocks.push_back(TCCheckBlock);
+ // TODO: Wrap LoopVectorPreHeader in VPIRBasicBlock here.
connectScalarPreheaderInVPlan(Plan);
}
@@ -2563,7 +2564,7 @@ BasicBlock *InnerLoopVectorizer::emitSCEVChecks(BasicBlock *Bypass) {
LoopBypassBlocks.push_back(SCEVCheckBlock);
AddedSafetyChecks = true;
- connectCheckBlockInVPlan(Plan, SCEVCheckBlock);
+ introduceCheckBlockInVPlan(Plan, SCEVCheckBlock);
return SCEVCheckBlock;
}
@@ -2600,7 +2601,7 @@ BasicBlock *InnerLoopVectorizer::emitMemRuntimeChecks(BasicBlock *Bypass) {
AddedSafetyChecks = true;
- connectCheckBlockInVPlan(Plan, MemCheckBlock);
+ introduceCheckBlockInVPlan(Plan, MemCheckBlock);
return MemCheckBlock;
}
@@ -7909,7 +7910,7 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
if (PredVPB->getNumSuccessors() == 1)
connectScalarPreheaderInVPlan(Plan);
else
- connectCheckBlockInVPlan(Plan, TCCheckBlock);
+ introduceCheckBlockInVPlan(Plan, TCCheckBlock);
return TCCheckBlock;
}
@@ -7946,10 +7947,9 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
if (EPI.SCEVSafetyCheck)
EPI.SCEVSafetyCheck->getTerminator()->replaceUsesOfWith(
VecEpilogueIterationCountCheck, LoopScalarPreHeader);
- if (EPI.MemSafetyCheck) {
+ if (EPI.MemSafetyCheck)
EPI.MemSafetyCheck->getTerminator()->replaceUsesOfWith(
VecEpilogueIterationCountCheck, LoopScalarPreHeader);
- }
DT->changeImmediateDominator(LoopScalarPreHeader,
EPI.EpilogueIterationCountCheck);
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 5414b30db595fa..46ba6328f0e1a4 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1410,7 +1410,7 @@ class VPIRInstruction : public VPRecipeBase {
InstructionCost computeCost(ElementCount VF,
VPCostContext &Ctx) const override;
- Instruction &getInstruction() { return I; }
+ Instruction &getInstruction() const { return I; }
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
@@ -4098,13 +4098,21 @@ class VPBlockUtils {
/// the successors of \p From and \p From to the predecessors of \p To. Both
/// VPBlockBases must have the same parent, which can be null. Both
/// VPBlockBases can be already connected to other VPBlockBases.
- static void connectBlocks(VPBlockBase *From, VPBlockBase *To) {
+ static void connectBlocks(VPBlockBase *From, VPBlockBase *To,
+ unsigned PredIdx = -1u, unsigned SuccIdx = -1u) {
assert((From->getParent() == To->getParent()) &&
"Can't connect two block with different parents");
- assert(From->getNumSuccessors() < 2 &&
+ assert((SuccIdx != -1u || From->getNumSuccessors() < 2) &&
"Blocks can't have more than two successors.");
- From->appendSuccessor(To);
- To->appendPredecessor(From);
+ if (SuccIdx == -1u)
+ From->appendSuccessor(To);
+ else
+ From->getSuccessors()[SuccIdx] = To;
+
+ if (PredIdx == -1u)
+ To->appendPredecessor(From);
+ else
+ To->getPredecessors()[PredIdx] = From;
}
/// Disconnect VPBlockBases \p From and \p To bi-directionally. Remove \p To
More information about the llvm-commits
mailing list