[llvm] bc03d6c - [VPlan] Introduce all loop regions as VPlan transform. (NFC) (#129402)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 16 04:30:49 PDT 2025
Author: Florian Hahn
Date: 2025-04-16T13:30:45+02:00
New Revision: bc03d6cce25712601423398350f56114e64e4e29
URL: https://github.com/llvm/llvm-project/commit/bc03d6cce25712601423398350f56114e64e4e29
DIFF: https://github.com/llvm/llvm-project/commit/bc03d6cce25712601423398350f56114e64e4e29.diff
LOG: [VPlan] Introduce all loop regions as VPlan transform. (NFC) (#129402)
Further simplify VPlan CFG builder by moving introduction of inner
regions to a VPlan transform, building on
https://github.com/llvm/llvm-project/pull/128419.
The HCFG builder now only constructs plain CFGs. I will move it to
VPlanConstruction as follow-up.
Depends on https://github.com/llvm/llvm-project/pull/128419.
PR: https://github.com/llvm/llvm-project/pull/129402
Added:
Modified:
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/lib/Transforms/Vectorize/VPlan.h
llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
llvm/lib/Transforms/Vectorize/VPlanTransforms.h
llvm/lib/Transforms/Vectorize/VPlanValue.h
llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll
llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index af94dc01c8c5c..dd7f05465a50b 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9544,14 +9544,14 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
Range);
auto Plan = std::make_unique<VPlan>(OrigLoop);
// Build hierarchical CFG.
- // Convert to VPlan-transform and consoliate all transforms for VPlan
+ // TODO: Convert to VPlan-transform and consolidate all transforms for VPlan
// creation.
VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan);
- HCFGBuilder.buildHierarchicalCFG();
+ HCFGBuilder.buildPlainCFG();
- VPlanTransforms::introduceTopLevelVectorLoopRegion(
- *Plan, Legal->getWidestInductionType(), PSE, RequiresScalarEpilogueCheck,
- CM.foldTailByMasking(), OrigLoop);
+ VPlanTransforms::createLoopRegions(*Plan, Legal->getWidestInductionType(),
+ PSE, RequiresScalarEpilogueCheck,
+ CM.foldTailByMasking(), OrigLoop);
// Don't use getDecisionAndClampRange here, because we don't know the UF
// so this function is better to be conservative, rather than to split
@@ -9851,10 +9851,10 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
auto Plan = std::make_unique<VPlan>(OrigLoop);
// Build hierarchical CFG
VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan);
- HCFGBuilder.buildHierarchicalCFG();
+ HCFGBuilder.buildPlainCFG();
- VPlanTransforms::introduceTopLevelVectorLoopRegion(
- *Plan, Legal->getWidestInductionType(), PSE, true, false, OrigLoop);
+ VPlanTransforms::createLoopRegions(*Plan, Legal->getWidestInductionType(),
+ PSE, true, false, OrigLoop);
for (ElementCount VF : Range)
Plan->addVF(VF);
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 94b5167c60089..7084676af6d5b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -57,6 +57,7 @@ class SCEV;
class Type;
class VPBasicBlock;
class VPBuilder;
+class VPDominatorTree;
class VPRegionBlock;
class VPlan;
class VPLane;
@@ -303,6 +304,13 @@ class VPBlockBase {
/// Remove all the successors of this block.
void clearSuccessors() { Successors.clear(); }
+ /// Swap predecessors of the block. The block must have exactly 2
+ /// predecessors.
+ void swapPredecessors() {
+ assert(Predecessors.size() == 2 && "must have 2 predecessors to swap");
+ std::swap(Predecessors[0], Predecessors[1]);
+ }
+
/// Swap successors of the block. The block must have exactly 2 successors.
// TODO: This should be part of introducing conditional branch recipes rather
// than being independent.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index f58f0290b5fa9..1e687d0879f18 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -14,26 +14,88 @@
#include "LoopVectorizationPlanner.h"
#include "VPlan.h"
#include "VPlanCFG.h"
+#include "VPlanDominatorTree.h"
#include "VPlanTransforms.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
using namespace llvm;
-void VPlanTransforms::introduceTopLevelVectorLoopRegion(
- VPlan &Plan, Type *InductionTy, PredicatedScalarEvolution &PSE,
- bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop) {
- // TODO: Generalize to introduce all loop regions.
- auto *HeaderVPBB = cast<VPBasicBlock>(Plan.getEntry()->getSingleSuccessor());
- VPBlockUtils::disconnectBlocks(Plan.getEntry(), HeaderVPBB);
+/// Checks if \p HeaderVPB is a loop header block in the plain CFG; that is, it
+/// has exactly 2 predecessors (preheader and latch), where the block
+/// dominates the latch and the preheader dominates the block. If it is a
+/// header block return true, making sure the preheader appears first and
+/// the latch second. Otherwise return false.
+static bool canonicalHeader(VPBlockBase *HeaderVPB,
+ const VPDominatorTree &VPDT) {
+ ArrayRef<VPBlockBase *> Preds = HeaderVPB->getPredecessors();
+ if (Preds.size() != 2)
+ return false;
- VPBasicBlock *OriginalLatch =
- cast<VPBasicBlock>(HeaderVPBB->getSinglePredecessor());
- VPBlockUtils::disconnectBlocks(OriginalLatch, HeaderVPBB);
- VPBasicBlock *VecPreheader = Plan.createVPBasicBlock("vector.ph");
- VPBlockUtils::connectBlocks(Plan.getEntry(), VecPreheader);
- assert(OriginalLatch->getNumSuccessors() == 0 &&
- "Plan should end at top level latch");
+ auto *PreheaderVPBB = Preds[0];
+ auto *LatchVPBB = Preds[1];
+ if (VPDT.dominates(PreheaderVPBB, HeaderVPB) &&
+ VPDT.dominates(HeaderVPB, LatchVPBB))
+ return true;
+
+ std::swap(PreheaderVPBB, LatchVPBB);
+
+ if (VPDT.dominates(PreheaderVPBB, HeaderVPB) &&
+ VPDT.dominates(HeaderVPB, LatchVPBB)) {
+ // Canonicalize predecessors of header so that preheader is first and latch
+ // second.
+ HeaderVPB->swapPredecessors();
+ for (VPRecipeBase &R : cast<VPBasicBlock>(HeaderVPB)->phis())
+ R.swapOperands();
+ return true;
+ }
+
+ return false;
+}
+
+/// Create a new VPRegionBlock for the loop starting at \p HeaderVPB.
+static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB) {
+ auto *PreheaderVPBB = HeaderVPB->getPredecessors()[0];
+ auto *LatchVPBB = HeaderVPB->getPredecessors()[1];
+
+ VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPB);
+ VPBlockUtils::disconnectBlocks(LatchVPBB, HeaderVPB);
+ VPBlockBase *Succ = LatchVPBB->getSingleSuccessor();
+ assert(LatchVPBB->getNumSuccessors() <= 1 &&
+ "Latch has more than one successor");
+ if (Succ)
+ VPBlockUtils::disconnectBlocks(LatchVPBB, Succ);
+
+ auto *R = Plan.createVPRegionBlock(HeaderVPB, LatchVPBB, "",
+ false /*isReplicator*/);
+ R->setParent(HeaderVPB->getParent());
+ // All VPBB's reachable shallowly from HeaderVPB belong to top level loop,
+ // because VPlan is expected to end at top level latch disconnected above.
+ for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPB))
+ VPBB->setParent(R);
+
+ VPBlockUtils::insertBlockAfter(R, PreheaderVPBB);
+ if (Succ)
+ VPBlockUtils::connectBlocks(R, Succ);
+}
+
+void VPlanTransforms::createLoopRegions(VPlan &Plan, Type *InductionTy,
+ PredicatedScalarEvolution &PSE,
+ bool RequiresScalarEpilogueCheck,
+ bool TailFolded, Loop *TheLoop) {
+ VPDominatorTree VPDT;
+ VPDT.recalculate(Plan);
+ for (VPBlockBase *HeaderVPB : vp_depth_first_shallow(Plan.getEntry()))
+ if (canonicalHeader(HeaderVPB, VPDT))
+ createLoopRegion(Plan, HeaderVPB);
+
+ VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
+ auto *OrigExiting = TopRegion->getExiting();
+ VPBasicBlock *LatchVPBB = Plan.createVPBasicBlock("vector.latch");
+ VPBlockUtils::insertBlockAfter(LatchVPBB, OrigExiting);
+ TopRegion->setExiting(LatchVPBB);
+ TopRegion->setName("vector loop");
+ TopRegion->getEntryBasicBlock()->setName("vector.body");
// Create SCEV and VPValue for the trip count.
// We use the symbolic max backedge-taken-count, which works also when
@@ -47,18 +109,9 @@ void VPlanTransforms::introduceTopLevelVectorLoopRegion(
Plan.setTripCount(
vputils::getOrCreateVPValueForSCEVExpr(Plan, TripCount, SE));
- // Create VPRegionBlock, with existing header and new empty latch block, to be
- // filled.
- VPBasicBlock *LatchVPBB = Plan.createVPBasicBlock("vector.latch");
- VPBlockUtils::insertBlockAfter(LatchVPBB, OriginalLatch);
- auto *TopRegion = Plan.createVPRegionBlock(
- HeaderVPBB, LatchVPBB, "vector loop", false /*isReplicator*/);
- // All VPBB's reachable shallowly from HeaderVPBB belong to top level loop,
- // because VPlan is expected to end at top level latch.
- for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPBB))
- VPBB->setParent(TopRegion);
-
- VPBlockUtils::insertBlockAfter(TopRegion, VecPreheader);
+ VPBasicBlock *VecPreheader = Plan.createVPBasicBlock("vector.ph");
+ VPBlockUtils::insertBlockAfter(VecPreheader, Plan.getEntry());
+
VPBasicBlock *MiddleVPBB = Plan.createVPBasicBlock("middle.block");
VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
index 4b8a2420b3037..5bacd2d4e6d88 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
@@ -12,9 +12,7 @@
/// components and steps:
//
/// 1. PlainCFGBuilder class: builds a plain VPBasicBlock-based CFG that
-/// faithfully represents the CFG in the incoming IR. A VPRegionBlock (Top
-/// Region) is created to enclose and serve as parent of all the VPBasicBlocks
-/// in the plain CFG.
+/// faithfully represents the CFG in the incoming IR.
/// NOTE: At this point, there is a direct correspondence between all the
/// VPBasicBlocks created for the initial plain CFG and the incoming
/// BasicBlocks. However, this might change in the future.
@@ -57,12 +55,8 @@ class PlainCFGBuilder {
// Hold phi node's that need to be fixed once the plain CFG has been built.
SmallVector<PHINode *, 8> PhisToFix;
- /// Maps loops in the original IR to their corresponding region.
- DenseMap<Loop *, VPRegionBlock *> Loop2Region;
-
// Utility functions.
void setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB);
- void setRegionPredsFromBB(VPRegionBlock *VPBB, BasicBlock *BB);
void fixHeaderPhis();
VPBasicBlock *getOrCreateVPBB(BasicBlock *BB);
#ifndef NDEBUG
@@ -83,25 +77,6 @@ class PlainCFGBuilder {
// Set predecessors of \p VPBB in the same order as they are in \p BB. \p VPBB
// must have no predecessors.
void PlainCFGBuilder::setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB) {
- auto GetLatchOfExit = [this](BasicBlock *BB) -> BasicBlock * {
- auto *SinglePred = BB->getSinglePredecessor();
- Loop *LoopForBB = LI->getLoopFor(BB);
- if (!SinglePred || LI->getLoopFor(SinglePred) == LoopForBB)
- return nullptr;
- // The input IR must be in loop-simplify form, ensuring a single predecessor
- // for exit blocks.
- assert(SinglePred == LI->getLoopFor(SinglePred)->getLoopLatch() &&
- "SinglePred must be the only loop latch");
- return SinglePred;
- };
- if (auto *LatchBB = GetLatchOfExit(BB)) {
- auto *PredRegion = getOrCreateVPBB(LatchBB)->getParent();
- assert(VPBB == cast<VPBasicBlock>(PredRegion->getSingleSuccessor()) &&
- "successor must already be set for PredRegion; it must have VPBB "
- "as single successor");
- VPBB->setPredecessors({PredRegion});
- return;
- }
// Collect VPBB predecessors.
SmallVector<VPBlockBase *, 2> VPBBPreds;
for (BasicBlock *Pred : predecessors(BB))
@@ -113,13 +88,6 @@ static bool isHeaderBB(BasicBlock *BB, Loop *L) {
return L && BB == L->getHeader();
}
-void PlainCFGBuilder::setRegionPredsFromBB(VPRegionBlock *Region,
- BasicBlock *BB) {
- // BB is a loop header block. Connect the region to the loop preheader.
- Loop *LoopOfBB = LI->getLoopFor(BB);
- Region->setPredecessors({getOrCreateVPBB(LoopOfBB->getLoopPredecessor())});
-}
-
// Add operands to VPInstructions representing phi nodes from the input IR.
void PlainCFGBuilder::fixHeaderPhis() {
for (auto *Phi : PhisToFix) {
@@ -130,43 +98,18 @@ void PlainCFGBuilder::fixHeaderPhis() {
auto *VPPhi = cast<VPWidenPHIRecipe>(VPVal);
assert(VPPhi->getNumOperands() == 0 &&
"Expected VPInstruction with no operands.");
-
- Loop *L = LI->getLoopFor(Phi->getParent());
- assert(isHeaderBB(Phi->getParent(), L));
- // For header phis, make sure the incoming value from the loop
- // predecessor is the first operand of the recipe.
+ assert(isHeaderBB(Phi->getParent(), LI->getLoopFor(Phi->getParent())) &&
+ "Expected Phi in header block.");
assert(Phi->getNumOperands() == 2 &&
"header phi must have exactly 2 operands");
- BasicBlock *LoopPred = L->getLoopPredecessor();
- VPPhi->addOperand(
- getOrCreateVPOperand(Phi->getIncomingValueForBlock(LoopPred)));
- BasicBlock *LoopLatch = L->getLoopLatch();
- VPPhi->addOperand(
- getOrCreateVPOperand(Phi->getIncomingValueForBlock(LoopLatch)));
- }
-}
-
-static bool isHeaderVPBB(VPBasicBlock *VPBB) {
- return VPBB->getParent() && VPBB->getParent()->getEntry() == VPBB;
-}
-
-/// Return true of \p L loop is contained within \p OuterLoop.
-static bool doesContainLoop(const Loop *L, const Loop *OuterLoop) {
- if (L->getLoopDepth() < OuterLoop->getLoopDepth())
- return false;
- const Loop *P = L;
- while (P) {
- if (P == OuterLoop)
- return true;
- P = P->getParentLoop();
+ for (BasicBlock *Pred : predecessors(Phi->getParent()))
+ VPPhi->addOperand(
+ getOrCreateVPOperand(Phi->getIncomingValueForBlock(Pred)));
}
- return false;
}
-// Create a new empty VPBasicBlock for an incoming BasicBlock in the region
-// corresponding to the containing loop or retrieve an existing one if it was
-// already created. If no region exists yet for the loop containing \p BB, a new
-// one is created.
+// Create a new empty VPBasicBlock for an incoming BasicBlock or retrieve an
+// existing one if it was already created.
VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) {
if (auto *VPBB = BB2VPBB.lookup(BB)) {
// Retrieve existing VPBB.
@@ -174,32 +117,10 @@ VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) {
}
// Create new VPBB.
- StringRef Name = isHeaderBB(BB, TheLoop) ? "vector.body" : BB->getName();
+ StringRef Name = BB->getName();
LLVM_DEBUG(dbgs() << "Creating VPBasicBlock for " << Name << "\n");
VPBasicBlock *VPBB = Plan.createVPBasicBlock(Name);
BB2VPBB[BB] = VPBB;
-
- // Get or create a region for the loop containing BB, except for the top
- // region of TheLoop which is created later.
- Loop *LoopOfBB = LI->getLoopFor(BB);
- if (!LoopOfBB || LoopOfBB == TheLoop || !doesContainLoop(LoopOfBB, TheLoop))
- return VPBB;
-
- auto *RegionOfVPBB = Loop2Region.lookup(LoopOfBB);
- if (!isHeaderBB(BB, LoopOfBB)) {
- assert(RegionOfVPBB &&
- "Region should have been created by visiting header earlier");
- VPBB->setParent(RegionOfVPBB);
- return VPBB;
- }
-
- assert(!RegionOfVPBB &&
- "First visit of a header basic block expects to register its region.");
- // Handle a header - take care of its Region.
- RegionOfVPBB = Plan.createVPRegionBlock(Name.str(), false /*isReplicator*/);
- RegionOfVPBB->setParent(Loop2Region[LoopOfBB->getParentLoop()]);
- RegionOfVPBB->setEntry(VPBB);
- Loop2Region[LoopOfBB] = RegionOfVPBB;
return VPBB;
}
@@ -351,6 +272,8 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
// Main interface to build the plain CFG.
void PlainCFGBuilder::buildPlainCFG(
DenseMap<VPBlockBase *, BasicBlock *> &VPB2IRBB) {
+ VPIRBasicBlock *Entry = cast<VPIRBasicBlock>(Plan.getEntry());
+ BB2VPBB[Entry->getIRBasicBlock()] = Entry;
// 1. Scan the body of the loop in a topological order to visit each basic
// block after having visited its predecessor basic blocks. Create a VPBB for
@@ -376,26 +299,13 @@ void PlainCFGBuilder::buildPlainCFG(
for (BasicBlock *BB : RPO) {
// Create or retrieve the VPBasicBlock for this BB.
VPBasicBlock *VPBB = getOrCreateVPBB(BB);
- VPRegionBlock *Region = VPBB->getParent();
Loop *LoopForBB = LI->getLoopFor(BB);
// Set VPBB predecessors in the same order as they are in the incoming BB.
- if (!isHeaderBB(BB, LoopForBB)) {
- setVPBBPredsFromBB(VPBB, BB);
- } else if (Region) {
- // BB is a loop header and there's a corresponding region, set the
- // predecessor for it.
- setRegionPredsFromBB(Region, BB);
- }
+ setVPBBPredsFromBB(VPBB, BB);
// Create VPInstructions for BB.
createVPInstructionsForVPBB(VPBB, BB);
- if (BB == TheLoop->getLoopLatch()) {
- VPBasicBlock *HeaderVPBB = getOrCreateVPBB(LoopForBB->getHeader());
- VPBlockUtils::connectBlocks(VPBB, HeaderVPBB);
- continue;
- }
-
// Set VPBB successors. We create empty VPBBs for successors if they don't
// exist already. Recipes will be created when the successor is visited
// during the RPO traversal.
@@ -410,10 +320,7 @@ void PlainCFGBuilder::buildPlainCFG(
auto *BI = cast<BranchInst>(BB->getTerminator());
unsigned NumSuccs = succ_size(BB);
if (NumSuccs == 1) {
- auto *Successor = getOrCreateVPBB(BB->getSingleSuccessor());
- VPBB->setOneSuccessor(isHeaderVPBB(Successor)
- ? Successor->getParent()
- : static_cast<VPBlockBase *>(Successor));
+ VPBB->setOneSuccessor(getOrCreateVPBB(BB->getSingleSuccessor()));
continue;
}
assert(BI->isConditional() && NumSuccs == 2 && BI->isConditional() &&
@@ -423,21 +330,11 @@ void PlainCFGBuilder::buildPlainCFG(
BasicBlock *IRSucc1 = BI->getSuccessor(1);
VPBasicBlock *Successor0 = getOrCreateVPBB(IRSucc0);
VPBasicBlock *Successor1 = getOrCreateVPBB(IRSucc1);
- if (BB == LoopForBB->getLoopLatch()) {
- // For a latch we need to set the successor of the region rather than that
- // of VPBB and it should be set to the exit, i.e., non-header successor,
- // except for the top region, which is handled elsewhere.
- assert(LoopForBB != TheLoop &&
- "Latch of the top region should have been handled earlier");
- Region->setOneSuccessor(isHeaderVPBB(Successor0) ? Successor1
- : Successor0);
- Region->setExiting(VPBB);
- continue;
- }
- // Don't connect any blocks outside the current loop except the latch for
- // now. The latch is handled above.
- if (LoopForBB) {
+ // Don't connect any blocks outside the current loop except the latches for
+ // inner loops.
+ // TODO: Also connect exit blocks during initial VPlan construction.
+ if (LoopForBB == TheLoop || BB != LoopForBB->getLoopLatch()) {
if (!LoopForBB->contains(IRSucc0)) {
VPBB->setOneSuccessor(Successor1);
continue;
@@ -456,21 +353,16 @@ void PlainCFGBuilder::buildPlainCFG(
// corresponding VPlan operands.
fixHeaderPhis();
- VPBlockUtils::connectBlocks(Plan.getEntry(),
- getOrCreateVPBB(TheLoop->getHeader()));
+ Plan.getEntry()->setOneSuccessor(getOrCreateVPBB(TheLoop->getHeader()));
+ Plan.getEntry()->setPlan(&Plan);
for (const auto &[IRBB, VPB] : BB2VPBB)
VPB2IRBB[VPB] = IRBB;
+
+ LLVM_DEBUG(Plan.setName("Plain CFG\n"); dbgs() << Plan);
}
void VPlanHCFGBuilder::buildPlainCFG() {
PlainCFGBuilder PCFGBuilder(TheLoop, LI, Plan);
PCFGBuilder.buildPlainCFG(VPB2IRBB);
}
-
-// Public interface to build a H-CFG.
-void VPlanHCFGBuilder::buildHierarchicalCFG() {
- // Build Top Region enclosing the plain CFG.
- buildPlainCFG();
- LLVM_DEBUG(Plan.setName("HCFGBuilder: Plain CFG\n"); dbgs() << Plan);
-}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
index f7f98ed7b1755..f2e90d3f4d9b3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
@@ -30,7 +30,6 @@ namespace llvm {
class Loop;
class LoopInfo;
-class VPRegionBlock;
class VPlan;
class VPlanTestIRBase;
class VPBlockBase;
@@ -54,15 +53,12 @@ class VPlanHCFGBuilder {
/// created for a input IR basic block.
DenseMap<VPBlockBase *, BasicBlock *> VPB2IRBB;
- /// Build plain CFG for TheLoop and connects it to Plan's entry.
- void buildPlainCFG();
-
public:
VPlanHCFGBuilder(Loop *Lp, LoopInfo *LI, VPlan &P)
: TheLoop(Lp), LI(LI), Plan(P) {}
- /// Build H-CFG for TheLoop and update Plan accordingly.
- void buildHierarchicalCFG();
+ /// Build plain CFG for TheLoop and connects it to Plan's entry.
+ void buildPlainCFG();
/// Return the input IR BasicBlock corresponding to \p VPB. Returns nullptr if
/// there is no such corresponding block.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index ee3642a8aff73..a9461b261ddb6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -52,20 +52,19 @@ struct VPlanTransforms {
verifyVPlanIsValid(Plan);
}
- /// Introduce the top-level VPRegionBlock for the main loop in \p Plan. Coming
- /// into this function, \p Plan's top-level loop is modeled using a plain CFG.
- /// This transform wraps the plain CFG of the top-level loop within a
- /// VPRegionBlock and creates a VPValue expression for the original trip
- /// count. It will also introduce a dedicated VPBasicBlock for the vector
- /// pre-header as well a VPBasicBlock as exit block of the region
- /// (middle.block). If a check is needed to guard executing the scalar
+ /// Replace loops in \p Plan's flat CFG with VPRegionBlocks, turing \p Plan's
+ /// flat CFG into a hierarchical CFG. It also creates a VPValue expression for
+ /// the original trip count. It will also introduce a dedicated VPBasicBlock
+ /// for the vector pre-header as well a VPBasicBlock as exit block of the
+ /// region (middle.block). If a check is needed to guard executing the scalar
/// epilogue loop, it will be added to the middle block, together with
/// VPBasicBlocks for the scalar preheader and exit blocks. \p InductionTy is
/// the type of the canonical induction and used for related values, like the
/// trip count expression.
- static void introduceTopLevelVectorLoopRegion(
- VPlan &Plan, Type *InductionTy, PredicatedScalarEvolution &PSE,
- bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop);
+ static void createLoopRegions(VPlan &Plan, Type *InductionTy,
+ PredicatedScalarEvolution &PSE,
+ bool RequiresScalarEpilogueCheck,
+ bool TailFolded, Loop *TheLoop);
/// Replaces the VPInstructions in \p Plan with corresponding
/// widen recipes. Returns false if any VPInstructions could not be converted
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index ced60a30ad56e..638156eab7a84 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -246,6 +246,12 @@ class VPUser {
New->addUser(*this);
}
+ /// Swap operands of the VPUser. It must have exactly 2 operands.
+ void swapOperands() {
+ assert(Operands.size() == 2 && "must have 2 operands to swap");
+ std::swap(Operands[0], Operands[1]);
+ }
+
/// Replaces all uses of \p From in the VPUser with \p To.
void replaceUsesOfWith(VPValue *From, VPValue *To);
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
index 625a32c098f94..91a5ea6b7fe36 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
@@ -6,35 +6,32 @@
@arr = external global [8 x [8 x i64]], align 16
define void @foo(i64 %n) {
-; CHECK: VPlan 'HCFGBuilder: Plain CFG
+; CHECK: VPlan 'Plain CFG
; CHECK-NEXT: {
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
-; CHECK-NEXT: Successor(s): vector.body
+; CHECK-NEXT: Successor(s): outer.header
; CHECK-EMPTY:
-; CHECK-NEXT: vector.body:
-; CHECK-NEXT: WIDEN-PHI ir<%outer.iv> = phi ir<0>, ir<%outer.iv.next>
+; CHECK-NEXT: outer.header:
+; CHECK-NEXT: WIDEN-PHI ir<%outer.iv> = phi ir<%outer.iv.next>, ir<0>
; CHECK-NEXT: EMIT ir<%gep.1> = getelementptr ir<@arr2>, ir<0>, ir<%outer.iv>
; CHECK-NEXT: EMIT store ir<%outer.iv>, ir<%gep.1>
; CHECK-NEXT: EMIT ir<%add> = add ir<%outer.iv>, ir<%n>
; CHECK-NEXT: Successor(s): inner
; CHECK-EMPTY:
-; CHECK-NEXT: <x1> inner: {
-; CHECK-NEXT: inner:
-; CHECK-NEXT: WIDEN-PHI ir<%inner.iv> = phi ir<0>, ir<%inner.iv.next>
-; CHECK-NEXT: EMIT ir<%gep.2> = getelementptr ir<@arr>, ir<0>, ir<%inner.iv>, ir<%outer.iv>
-; CHECK-NEXT: EMIT store ir<%add>, ir<%gep.2>
-; CHECK-NEXT: EMIT ir<%inner.iv.next> = add ir<%inner.iv>, ir<1>
-; CHECK-NEXT: EMIT ir<%inner.ec> = icmp ir<%inner.iv.next>, ir<8>
-; CHECK-NEXT: EMIT branch-on-cond ir<%inner.ec>
-; CHECK-NEXT: No successors
-; CHECK-NEXT: }
-; CHECK-NEXT: Successor(s): outer.latch
+; CHECK-NEXT: inner:
+; CHECK-NEXT: WIDEN-PHI ir<%inner.iv> = phi ir<%inner.iv.next>, ir<0>
+; CHECK-NEXT: EMIT ir<%gep.2> = getelementptr ir<@arr>, ir<0>, ir<%inner.iv>, ir<%outer.iv>
+; CHECK-NEXT: EMIT store ir<%add>, ir<%gep.2>
+; CHECK-NEXT: EMIT ir<%inner.iv.next> = add ir<%inner.iv>, ir<1>
+; CHECK-NEXT: EMIT ir<%inner.ec> = icmp ir<%inner.iv.next>, ir<8>
+; CHECK-NEXT: EMIT branch-on-cond ir<%inner.ec>
+; CHECK-NEXT: Successor(s): outer.latch, inner
; CHECK-EMPTY:
; CHECK-NEXT: outer.latch:
; CHECK-NEXT: EMIT ir<%outer.iv.next> = add ir<%outer.iv>, ir<1>
; CHECK-NEXT: EMIT ir<%outer.ec> = icmp ir<%outer.iv.next>, ir<8>
-; CHECK-NEXT: Successor(s): vector.body
+; CHECK-NEXT: Successor(s): outer.header
; CHECK-NEXT: }
entry:
br label %outer.header
diff --git a/llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll b/llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll
index 89eaca0cfa8c8..29aeb7c4e97f9 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll
@@ -4,7 +4,7 @@
; Verify that the stress testing flag for the VPlan H-CFG builder works as
; expected with and without enabling the VPlan H-CFG Verifier.
-; CHECK: VPlan 'HCFGBuilder: Plain CFG
+; CHECK: VPlan 'Plain CFG
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
index caf5d2357411d..486296535996b 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
+++ b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
@@ -73,9 +73,9 @@ class VPlanTestIRBase : public testing::Test {
PredicatedScalarEvolution PSE(*SE, *L);
auto Plan = std::make_unique<VPlan>(L);
VPlanHCFGBuilder HCFGBuilder(L, LI.get(), *Plan);
- HCFGBuilder.buildHierarchicalCFG();
- VPlanTransforms::introduceTopLevelVectorLoopRegion(
- *Plan, IntegerType::get(*Ctx, 64), PSE, true, false, L);
+ HCFGBuilder.buildPlainCFG();
+ VPlanTransforms::createLoopRegions(*Plan, IntegerType::get(*Ctx, 64), PSE,
+ true, false, L);
return Plan;
}
};
More information about the llvm-commits
mailing list