[llvm] [VPlan] Introduce child regions as VPlan transform. (PR #129402)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 4 08:34:58 PDT 2025
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/129402
>From d5ba9a38d09e021d9acfb67bf6a2395ce37fc9b4 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sat, 1 Mar 2025 21:07:00 +0000
Subject: [PATCH 1/3] [VPlan] Introduce child regions as VPlan transform.
Further simplify VPlan CFG builder by moving introduction of inner
regions to a VPlan transform, building on
https://github.com/llvm/llvm-project/pull/128419
The HCFG builder now only constructs plain CFGs. I will move it to
VPlanConstruction as follow-up.
---
.../Transforms/Vectorize/LoopVectorize.cpp | 6 +-
.../Vectorize/VPlanConstruction.cpp | 69 +++++++----
.../Transforms/Vectorize/VPlanHCFGBuilder.cpp | 112 +++---------------
.../Transforms/Vectorize/VPlanHCFGBuilder.h | 8 +-
.../vplan-printing-outer-loop.ll | 21 ++--
.../LoopVectorize/vplan_hcfg_stress_test.ll | 2 +-
.../Transforms/Vectorize/VPlanTestBase.h | 2 +-
7 files changed, 79 insertions(+), 141 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 3fc5e716e3757..54d727e38b633 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9308,10 +9308,10 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
Range);
auto Plan = std::make_unique<VPlan>(OrigLoop);
// Build hierarchical CFG.
- // Convert to VPlan-transform and consoliate all transforms for VPlan
+ // TODO: Convert to VPlan-transform and consoliate all transforms for VPlan
// creation.
VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan);
- HCFGBuilder.buildHierarchicalCFG();
+ HCFGBuilder.buildPlainCFG();
VPlanTransforms::introduceTopLevelVectorLoopRegion(
*Plan, Legal->getWidestInductionType(), PSE, RequiresScalarEpilogueCheck,
@@ -9615,7 +9615,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
auto Plan = std::make_unique<VPlan>(OrigLoop);
// Build hierarchical CFG
VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan);
- HCFGBuilder.buildHierarchicalCFG();
+ HCFGBuilder.buildPlainCFG();
VPlanTransforms::introduceTopLevelVectorLoopRegion(
*Plan, Legal->getWidestInductionType(), PSE, true, false, OrigLoop);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index f58f0290b5fa9..17a758682905b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -14,26 +14,57 @@
#include "LoopVectorizationPlanner.h"
#include "VPlan.h"
#include "VPlanCFG.h"
+#include "VPlanDominatorTree.h"
#include "VPlanTransforms.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
using namespace llvm;
+/// Create and return a new VPRegionBlock for loop starting at \p HeaderVPBB, if
+/// it is a header of a loop.
+static VPRegionBlock *introduceRegion(VPlan &Plan, VPBlockBase *HeaderVPBB,
+ VPDominatorTree &VPDT) {
+ if (HeaderVPBB->getNumPredecessors() != 2)
+ return nullptr;
+ VPBlockBase *PreheaderVPBB = HeaderVPBB->getPredecessors()[0];
+ VPBlockBase *LatchVPBB = HeaderVPBB->getPredecessors()[1];
+ if (!VPDT.dominates(HeaderVPBB, LatchVPBB))
+ return nullptr;
+ assert(VPDT.dominates(PreheaderVPBB, HeaderVPBB) &&
+ "preheader must dominate header");
+ VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPBB);
+ VPBlockUtils::disconnectBlocks(LatchVPBB, HeaderVPBB);
+ VPBlockBase *Succ = LatchVPBB->getSingleSuccessor();
+ if (Succ)
+ VPBlockUtils::disconnectBlocks(LatchVPBB, Succ);
+
+ auto *R = Plan.createVPRegionBlock(HeaderVPBB, LatchVPBB, "",
+ false /*isReplicator*/);
+ // All VPBB's reachable shallowly from HeaderVPBB belong to top level loop,
+ // because VPlan is expected to end at top level latch.
+ for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPBB))
+ VPBB->setParent(R);
+
+ VPBlockUtils::insertBlockAfter(R, PreheaderVPBB);
+ if (Succ)
+ VPBlockUtils::connectBlocks(R, Succ);
+ return R;
+}
+
void VPlanTransforms::introduceTopLevelVectorLoopRegion(
VPlan &Plan, Type *InductionTy, PredicatedScalarEvolution &PSE,
bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop) {
- // TODO: Generalize to introduce all loop regions.
- auto *HeaderVPBB = cast<VPBasicBlock>(Plan.getEntry()->getSingleSuccessor());
- VPBlockUtils::disconnectBlocks(Plan.getEntry(), HeaderVPBB);
+ VPDominatorTree VPDT;
+ VPDT.recalculate(Plan);
- VPBasicBlock *OriginalLatch =
- cast<VPBasicBlock>(HeaderVPBB->getSinglePredecessor());
- VPBlockUtils::disconnectBlocks(OriginalLatch, HeaderVPBB);
- VPBasicBlock *VecPreheader = Plan.createVPBasicBlock("vector.ph");
- VPBlockUtils::connectBlocks(Plan.getEntry(), VecPreheader);
- assert(OriginalLatch->getNumSuccessors() == 0 &&
- "Plan should end at top level latch");
+ auto *HeaderVPBB = cast<VPBasicBlock>(Plan.getEntry()->getSingleSuccessor());
+ VPRegionBlock *TopRegion = introduceRegion(Plan, HeaderVPBB, VPDT);
+ auto *OrigExiting = TopRegion->getExiting();
+ VPBasicBlock *LatchVPBB = Plan.createVPBasicBlock("vector.latch");
+ VPBlockUtils::insertBlockAfter(LatchVPBB, OrigExiting);
+ TopRegion->setExiting(LatchVPBB);
+ TopRegion->setName("vector loop");
// Create SCEV and VPValue for the trip count.
// We use the symbolic max backedge-taken-count, which works also when
@@ -47,18 +78,9 @@ void VPlanTransforms::introduceTopLevelVectorLoopRegion(
Plan.setTripCount(
vputils::getOrCreateVPValueForSCEVExpr(Plan, TripCount, SE));
- // Create VPRegionBlock, with existing header and new empty latch block, to be
- // filled.
- VPBasicBlock *LatchVPBB = Plan.createVPBasicBlock("vector.latch");
- VPBlockUtils::insertBlockAfter(LatchVPBB, OriginalLatch);
- auto *TopRegion = Plan.createVPRegionBlock(
- HeaderVPBB, LatchVPBB, "vector loop", false /*isReplicator*/);
- // All VPBB's reachable shallowly from HeaderVPBB belong to top level loop,
- // because VPlan is expected to end at top level latch.
- for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPBB))
- VPBB->setParent(TopRegion);
+ VPBasicBlock *VecPreheader = Plan.createVPBasicBlock("vector.ph");
+ VPBlockUtils::insertBlockAfter(VecPreheader, Plan.getEntry());
- VPBlockUtils::insertBlockAfter(TopRegion, VecPreheader);
VPBasicBlock *MiddleVPBB = Plan.createVPBasicBlock("middle.block");
VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion);
@@ -98,4 +120,9 @@ void VPlanTransforms::introduceTopLevelVectorLoopRegion(
ScalarLatchTerm->getDebugLoc(), "cmp.n");
Builder.createNaryOp(VPInstruction::BranchOnCond, {Cmp},
ScalarLatchTerm->getDebugLoc());
+
+ for (VPBlockBase *HeaderVPBB :
+ vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry())) {
+ introduceRegion(Plan, HeaderVPBB, VPDT);
+ }
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
index 4b8a2420b3037..4e06ce86caad7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
@@ -12,9 +12,7 @@
/// components and steps:
//
/// 1. PlainCFGBuilder class: builds a plain VPBasicBlock-based CFG that
-/// faithfully represents the CFG in the incoming IR. A VPRegionBlock (Top
-/// Region) is created to enclose and serve as parent of all the VPBasicBlocks
-/// in the plain CFG.
+/// faithfully represents the CFG in the incoming IR.
/// NOTE: At this point, there is a direct correspondence between all the
/// VPBasicBlocks created for the initial plain CFG and the incoming
/// BasicBlocks. However, this might change in the future.
@@ -57,12 +55,8 @@ class PlainCFGBuilder {
// Hold phi node's that need to be fixed once the plain CFG has been built.
SmallVector<PHINode *, 8> PhisToFix;
- /// Maps loops in the original IR to their corresponding region.
- DenseMap<Loop *, VPRegionBlock *> Loop2Region;
-
// Utility functions.
void setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB);
- void setRegionPredsFromBB(VPRegionBlock *VPBB, BasicBlock *BB);
void fixHeaderPhis();
VPBasicBlock *getOrCreateVPBB(BasicBlock *BB);
#ifndef NDEBUG
@@ -83,25 +77,6 @@ class PlainCFGBuilder {
// Set predecessors of \p VPBB in the same order as they are in \p BB. \p VPBB
// must have no predecessors.
void PlainCFGBuilder::setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB) {
- auto GetLatchOfExit = [this](BasicBlock *BB) -> BasicBlock * {
- auto *SinglePred = BB->getSinglePredecessor();
- Loop *LoopForBB = LI->getLoopFor(BB);
- if (!SinglePred || LI->getLoopFor(SinglePred) == LoopForBB)
- return nullptr;
- // The input IR must be in loop-simplify form, ensuring a single predecessor
- // for exit blocks.
- assert(SinglePred == LI->getLoopFor(SinglePred)->getLoopLatch() &&
- "SinglePred must be the only loop latch");
- return SinglePred;
- };
- if (auto *LatchBB = GetLatchOfExit(BB)) {
- auto *PredRegion = getOrCreateVPBB(LatchBB)->getParent();
- assert(VPBB == cast<VPBasicBlock>(PredRegion->getSingleSuccessor()) &&
- "successor must already be set for PredRegion; it must have VPBB "
- "as single successor");
- VPBB->setPredecessors({PredRegion});
- return;
- }
// Collect VPBB predecessors.
SmallVector<VPBlockBase *, 2> VPBBPreds;
for (BasicBlock *Pred : predecessors(BB))
@@ -113,13 +88,6 @@ static bool isHeaderBB(BasicBlock *BB, Loop *L) {
return L && BB == L->getHeader();
}
-void PlainCFGBuilder::setRegionPredsFromBB(VPRegionBlock *Region,
- BasicBlock *BB) {
- // BB is a loop header block. Connect the region to the loop preheader.
- Loop *LoopOfBB = LI->getLoopFor(BB);
- Region->setPredecessors({getOrCreateVPBB(LoopOfBB->getLoopPredecessor())});
-}
-
// Add operands to VPInstructions representing phi nodes from the input IR.
void PlainCFGBuilder::fixHeaderPhis() {
for (auto *Phi : PhisToFix) {
@@ -150,19 +118,6 @@ static bool isHeaderVPBB(VPBasicBlock *VPBB) {
return VPBB->getParent() && VPBB->getParent()->getEntry() == VPBB;
}
-/// Return true of \p L loop is contained within \p OuterLoop.
-static bool doesContainLoop(const Loop *L, const Loop *OuterLoop) {
- if (L->getLoopDepth() < OuterLoop->getLoopDepth())
- return false;
- const Loop *P = L;
- while (P) {
- if (P == OuterLoop)
- return true;
- P = P->getParentLoop();
- }
- return false;
-}
-
// Create a new empty VPBasicBlock for an incoming BasicBlock in the region
// corresponding to the containing loop or retrieve an existing one if it was
// already created. If no region exists yet for the loop containing \p BB, a new
@@ -178,28 +133,6 @@ VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) {
LLVM_DEBUG(dbgs() << "Creating VPBasicBlock for " << Name << "\n");
VPBasicBlock *VPBB = Plan.createVPBasicBlock(Name);
BB2VPBB[BB] = VPBB;
-
- // Get or create a region for the loop containing BB, except for the top
- // region of TheLoop which is created later.
- Loop *LoopOfBB = LI->getLoopFor(BB);
- if (!LoopOfBB || LoopOfBB == TheLoop || !doesContainLoop(LoopOfBB, TheLoop))
- return VPBB;
-
- auto *RegionOfVPBB = Loop2Region.lookup(LoopOfBB);
- if (!isHeaderBB(BB, LoopOfBB)) {
- assert(RegionOfVPBB &&
- "Region should have been created by visiting header earlier");
- VPBB->setParent(RegionOfVPBB);
- return VPBB;
- }
-
- assert(!RegionOfVPBB &&
- "First visit of a header basic block expects to register its region.");
- // Handle a header - take care of its Region.
- RegionOfVPBB = Plan.createVPRegionBlock(Name.str(), false /*isReplicator*/);
- RegionOfVPBB->setParent(Loop2Region[LoopOfBB->getParentLoop()]);
- RegionOfVPBB->setEntry(VPBB);
- Loop2Region[LoopOfBB] = RegionOfVPBB;
return VPBB;
}
@@ -351,6 +284,8 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
// Main interface to build the plain CFG.
void PlainCFGBuilder::buildPlainCFG(
DenseMap<VPBlockBase *, BasicBlock *> &VPB2IRBB) {
+ VPIRBasicBlock *Entry = cast<VPIRBasicBlock>(Plan.getEntry());
+ BB2VPBB[Entry->getIRBasicBlock()] = Entry;
// 1. Scan the body of the loop in a topological order to visit each basic
// block after having visited its predecessor basic blocks. Create a VPBB for
@@ -376,15 +311,13 @@ void PlainCFGBuilder::buildPlainCFG(
for (BasicBlock *BB : RPO) {
// Create or retrieve the VPBasicBlock for this BB.
VPBasicBlock *VPBB = getOrCreateVPBB(BB);
- VPRegionBlock *Region = VPBB->getParent();
Loop *LoopForBB = LI->getLoopFor(BB);
// Set VPBB predecessors in the same order as they are in the incoming BB.
if (!isHeaderBB(BB, LoopForBB)) {
setVPBBPredsFromBB(VPBB, BB);
- } else if (Region) {
- // BB is a loop header and there's a corresponding region, set the
- // predecessor for it.
- setRegionPredsFromBB(Region, BB);
+ } else {
+ VPBB->setPredecessors({getOrCreateVPBB(LoopForBB->getLoopPredecessor()),
+ getOrCreateVPBB(LoopForBB->getLoopLatch())});
}
// Create VPInstructions for BB.
@@ -392,7 +325,7 @@ void PlainCFGBuilder::buildPlainCFG(
if (BB == TheLoop->getLoopLatch()) {
VPBasicBlock *HeaderVPBB = getOrCreateVPBB(LoopForBB->getHeader());
- VPBlockUtils::connectBlocks(VPBB, HeaderVPBB);
+ VPBB->setOneSuccessor(HeaderVPBB);
continue;
}
@@ -423,21 +356,11 @@ void PlainCFGBuilder::buildPlainCFG(
BasicBlock *IRSucc1 = BI->getSuccessor(1);
VPBasicBlock *Successor0 = getOrCreateVPBB(IRSucc0);
VPBasicBlock *Successor1 = getOrCreateVPBB(IRSucc1);
- if (BB == LoopForBB->getLoopLatch()) {
- // For a latch we need to set the successor of the region rather than that
- // of VPBB and it should be set to the exit, i.e., non-header successor,
- // except for the top region, which is handled elsewhere.
- assert(LoopForBB != TheLoop &&
- "Latch of the top region should have been handled earlier");
- Region->setOneSuccessor(isHeaderVPBB(Successor0) ? Successor1
- : Successor0);
- Region->setExiting(VPBB);
- continue;
- }
- // Don't connect any blocks outside the current loop except the latch for
- // now. The latch is handled above.
- if (LoopForBB) {
+ // Don't connect any blocks outside the current loop except the latch, which
+ // is handled below.
+ if (LoopForBB &&
+ (LoopForBB == TheLoop || BB != LoopForBB->getLoopLatch())) {
if (!LoopForBB->contains(IRSucc0)) {
VPBB->setOneSuccessor(Successor1);
continue;
@@ -456,21 +379,16 @@ void PlainCFGBuilder::buildPlainCFG(
// corresponding VPlan operands.
fixHeaderPhis();
- VPBlockUtils::connectBlocks(Plan.getEntry(),
- getOrCreateVPBB(TheLoop->getHeader()));
+ Plan.getEntry()->setOneSuccessor(getOrCreateVPBB(TheLoop->getHeader()));
+ Plan.getEntry()->setPlan(&Plan);
for (const auto &[IRBB, VPB] : BB2VPBB)
VPB2IRBB[VPB] = IRBB;
+
+ LLVM_DEBUG(Plan.setName("Plain CFG\n"); dbgs() << Plan);
}
void VPlanHCFGBuilder::buildPlainCFG() {
PlainCFGBuilder PCFGBuilder(TheLoop, LI, Plan);
PCFGBuilder.buildPlainCFG(VPB2IRBB);
}
-
-// Public interface to build a H-CFG.
-void VPlanHCFGBuilder::buildHierarchicalCFG() {
- // Build Top Region enclosing the plain CFG.
- buildPlainCFG();
- LLVM_DEBUG(Plan.setName("HCFGBuilder: Plain CFG\n"); dbgs() << Plan);
-}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
index f7f98ed7b1755..f2e90d3f4d9b3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
@@ -30,7 +30,6 @@ namespace llvm {
class Loop;
class LoopInfo;
-class VPRegionBlock;
class VPlan;
class VPlanTestIRBase;
class VPBlockBase;
@@ -54,15 +53,12 @@ class VPlanHCFGBuilder {
/// created for a input IR basic block.
DenseMap<VPBlockBase *, BasicBlock *> VPB2IRBB;
- /// Build plain CFG for TheLoop and connects it to Plan's entry.
- void buildPlainCFG();
-
public:
VPlanHCFGBuilder(Loop *Lp, LoopInfo *LI, VPlan &P)
: TheLoop(Lp), LI(LI), Plan(P) {}
- /// Build H-CFG for TheLoop and update Plan accordingly.
- void buildHierarchicalCFG();
+ /// Build plain CFG for TheLoop and connects it to Plan's entry.
+ void buildPlainCFG();
/// Return the input IR BasicBlock corresponding to \p VPB. Returns nullptr if
/// there is no such corresponding block.
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
index 625a32c098f94..b4b6d3d760349 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
@@ -6,7 +6,7 @@
@arr = external global [8 x [8 x i64]], align 16
define void @foo(i64 %n) {
-; CHECK: VPlan 'HCFGBuilder: Plain CFG
+; CHECK: VPlan 'Plain CFG
; CHECK-NEXT: {
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
@@ -19,17 +19,14 @@ define void @foo(i64 %n) {
; CHECK-NEXT: EMIT ir<%add> = add ir<%outer.iv>, ir<%n>
; CHECK-NEXT: Successor(s): inner
; CHECK-EMPTY:
-; CHECK-NEXT: <x1> inner: {
-; CHECK-NEXT: inner:
-; CHECK-NEXT: WIDEN-PHI ir<%inner.iv> = phi ir<0>, ir<%inner.iv.next>
-; CHECK-NEXT: EMIT ir<%gep.2> = getelementptr ir<@arr>, ir<0>, ir<%inner.iv>, ir<%outer.iv>
-; CHECK-NEXT: EMIT store ir<%add>, ir<%gep.2>
-; CHECK-NEXT: EMIT ir<%inner.iv.next> = add ir<%inner.iv>, ir<1>
-; CHECK-NEXT: EMIT ir<%inner.ec> = icmp ir<%inner.iv.next>, ir<8>
-; CHECK-NEXT: EMIT branch-on-cond ir<%inner.ec>
-; CHECK-NEXT: No successors
-; CHECK-NEXT: }
-; CHECK-NEXT: Successor(s): outer.latch
+; CHECK-NEXT: inner:
+; CHECK-NEXT: WIDEN-PHI ir<%inner.iv> = phi ir<0>, ir<%inner.iv.next>
+; CHECK-NEXT: EMIT ir<%gep.2> = getelementptr ir<@arr>, ir<0>, ir<%inner.iv>, ir<%outer.iv>
+; CHECK-NEXT: EMIT store ir<%add>, ir<%gep.2>
+; CHECK-NEXT: EMIT ir<%inner.iv.next> = add ir<%inner.iv>, ir<1>
+; CHECK-NEXT: EMIT ir<%inner.ec> = icmp ir<%inner.iv.next>, ir<8>
+; CHECK-NEXT: EMIT branch-on-cond ir<%inner.ec>
+; CHECK-NEXT: Successor(s): outer.latch, inner
; CHECK-EMPTY:
; CHECK-NEXT: outer.latch:
; CHECK-NEXT: EMIT ir<%outer.iv.next> = add ir<%outer.iv>, ir<1>
diff --git a/llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll b/llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll
index 89eaca0cfa8c8..29aeb7c4e97f9 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll
@@ -4,7 +4,7 @@
; Verify that the stress testing flag for the VPlan H-CFG builder works as
; expected with and without enabling the VPlan H-CFG Verifier.
-; CHECK: VPlan 'HCFGBuilder: Plain CFG
+; CHECK: VPlan 'Plain CFG
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
index caf5d2357411d..92961e44c5e54 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
+++ b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
@@ -73,7 +73,7 @@ class VPlanTestIRBase : public testing::Test {
PredicatedScalarEvolution PSE(*SE, *L);
auto Plan = std::make_unique<VPlan>(L);
VPlanHCFGBuilder HCFGBuilder(L, LI.get(), *Plan);
- HCFGBuilder.buildHierarchicalCFG();
+ HCFGBuilder.buildPlainCFG();
VPlanTransforms::introduceTopLevelVectorLoopRegion(
*Plan, IntegerType::get(*Ctx, 64), PSE, true, false, L);
return Plan;
>From a88f03e42b57cdf42bec4a16e06fa78f06eb1b3e Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Fri, 4 Apr 2025 13:51:30 +0100
Subject: [PATCH 2/3] !fixup address comments, thanks!
---
llvm/lib/Transforms/Vectorize/VPlan.cpp | 11 +++++++
llvm/lib/Transforms/Vectorize/VPlan.h | 6 ++++
.../Vectorize/VPlanConstruction.cpp | 32 ++++++++-----------
3 files changed, 31 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 1e2f70e5c103e..759f1b7091037 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -648,6 +648,17 @@ bool VPBasicBlock::isExiting() const {
return getParent() && getParent()->getExitingBasicBlock() == this;
}
+bool VPBasicBlock::isHeader(const VPDominatorTree &VPDT) const {
+ if (getNumPredecessors() != 2)
+ return false;
+ VPBlockBase *LatchVPBB = getPredecessors()[1];
+ if (!VPDT.dominates(this, LatchVPBB))
+ return false;
+ assert(VPDT.dominates(getPredecessors()[0], this) &&
+ "preheader must dominate header");
+ return true;
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPBlockBase::print(raw_ostream &O) const {
VPSlotTracker SlotTracker(getPlan());
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 50baf220a1002..4f42c595df0f0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -57,6 +57,7 @@ class SCEV;
class Type;
class VPBasicBlock;
class VPBuilder;
+class VPDominatorTree;
class VPRegionBlock;
class VPlan;
class VPLane;
@@ -3251,6 +3252,11 @@ class VPBasicBlock : public VPBlockBase {
/// Returns true if the block is exiting it's parent region.
bool isExiting() const;
+ /// Returns true if the block is a loop header block in the plain CFG; that
+ /// is, it has exactly 2 predecessors (preheader and latch), where the block
+ /// dominates the latch and the preheader dominates the block.
+ bool isHeader(const VPDominatorTree &VPDT) const;
+
/// Clone the current block and it's recipes, without updating the operands of
/// the cloned recipes.
VPBasicBlock *clone() override;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 17a758682905b..0bea53c423760 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -21,28 +21,24 @@
using namespace llvm;
-/// Create and return a new VPRegionBlock for loop starting at \p HeaderVPBB, if
-/// it is a header of a loop.
-static VPRegionBlock *introduceRegion(VPlan &Plan, VPBlockBase *HeaderVPBB,
- VPDominatorTree &VPDT) {
- if (HeaderVPBB->getNumPredecessors() != 2)
- return nullptr;
+/// Create and return a new VPRegionBlock for loop starting at \p HeaderVPBB and
+/// return it.
+static VPRegionBlock *introduceRegion(VPlan &Plan, VPBlockBase *HeaderVPBB) {
VPBlockBase *PreheaderVPBB = HeaderVPBB->getPredecessors()[0];
VPBlockBase *LatchVPBB = HeaderVPBB->getPredecessors()[1];
- if (!VPDT.dominates(HeaderVPBB, LatchVPBB))
- return nullptr;
- assert(VPDT.dominates(PreheaderVPBB, HeaderVPBB) &&
- "preheader must dominate header");
VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPBB);
VPBlockUtils::disconnectBlocks(LatchVPBB, HeaderVPBB);
VPBlockBase *Succ = LatchVPBB->getSingleSuccessor();
+ assert(LatchVPBB->getNumSuccessors() <= 1 &&
+ "Latch has more than one successor");
if (Succ)
VPBlockUtils::disconnectBlocks(LatchVPBB, Succ);
auto *R = Plan.createVPRegionBlock(HeaderVPBB, LatchVPBB, "",
false /*isReplicator*/);
+ R->setParent(HeaderVPBB->getParent());
// All VPBB's reachable shallowly from HeaderVPBB belong to top level loop,
- // because VPlan is expected to end at top level latch.
+ // because VPlan is expected to end at top level latch disconnected above.
for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPBB))
VPBB->setParent(R);
@@ -57,9 +53,14 @@ void VPlanTransforms::introduceTopLevelVectorLoopRegion(
bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop) {
VPDominatorTree VPDT;
VPDT.recalculate(Plan);
+ for (VPBasicBlock *HeaderVPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
+ vp_depth_first_shallow(Plan.getEntry()))) {
+ if (!HeaderVPBB->isHeader(VPDT))
+ continue;
+ introduceRegion(Plan, HeaderVPBB);
+ }
- auto *HeaderVPBB = cast<VPBasicBlock>(Plan.getEntry()->getSingleSuccessor());
- VPRegionBlock *TopRegion = introduceRegion(Plan, HeaderVPBB, VPDT);
+ VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
auto *OrigExiting = TopRegion->getExiting();
VPBasicBlock *LatchVPBB = Plan.createVPBasicBlock("vector.latch");
VPBlockUtils::insertBlockAfter(LatchVPBB, OrigExiting);
@@ -120,9 +121,4 @@ void VPlanTransforms::introduceTopLevelVectorLoopRegion(
ScalarLatchTerm->getDebugLoc(), "cmp.n");
Builder.createNaryOp(VPInstruction::BranchOnCond, {Cmp},
ScalarLatchTerm->getDebugLoc());
-
- for (VPBlockBase *HeaderVPBB :
- vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry())) {
- introduceRegion(Plan, HeaderVPBB, VPDT);
- }
}
>From 29f6ddffd22ed109bac5f94532390c9c77e2d7a7 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Fri, 4 Apr 2025 16:21:13 +0100
Subject: [PATCH 3/3] !fixup adjust names and comments after recent changes.
---
.../Transforms/Vectorize/LoopVectorize.cpp | 10 +++++-----
.../Vectorize/VPlanConstruction.cpp | 7 ++++---
.../Transforms/Vectorize/VPlanTransforms.h | 19 +++++++++----------
.../Transforms/Vectorize/VPlanTestBase.h | 4 ++--
4 files changed, 20 insertions(+), 20 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 8ec9c003e841b..732cec7db81cb 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9332,9 +9332,9 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan);
HCFGBuilder.buildPlainCFG();
- VPlanTransforms::introduceTopLevelVectorLoopRegion(
- *Plan, Legal->getWidestInductionType(), PSE, RequiresScalarEpilogueCheck,
- CM.foldTailByMasking(), OrigLoop);
+ VPlanTransforms::introduceRegions(*Plan, Legal->getWidestInductionType(), PSE,
+ RequiresScalarEpilogueCheck,
+ CM.foldTailByMasking(), OrigLoop);
// Don't use getDecisionAndClampRange here, because we don't know the UF
// so this function is better to be conservative, rather than to split
@@ -9636,8 +9636,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan);
HCFGBuilder.buildPlainCFG();
- VPlanTransforms::introduceTopLevelVectorLoopRegion(
- *Plan, Legal->getWidestInductionType(), PSE, true, false, OrigLoop);
+ VPlanTransforms::introduceRegions(*Plan, Legal->getWidestInductionType(), PSE,
+ true, false, OrigLoop);
for (ElementCount VF : Range)
Plan->addVF(VF);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 0bea53c423760..876333d045f84 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -48,9 +48,10 @@ static VPRegionBlock *introduceRegion(VPlan &Plan, VPBlockBase *HeaderVPBB) {
return R;
}
-void VPlanTransforms::introduceTopLevelVectorLoopRegion(
- VPlan &Plan, Type *InductionTy, PredicatedScalarEvolution &PSE,
- bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop) {
+void VPlanTransforms::introduceRegions(VPlan &Plan, Type *InductionTy,
+ PredicatedScalarEvolution &PSE,
+ bool RequiresScalarEpilogueCheck,
+ bool TailFolded, Loop *TheLoop) {
VPDominatorTree VPDT;
VPDT.recalculate(Plan);
for (VPBasicBlock *HeaderVPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index c23ff38265670..ed8b7a08ea187 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -52,20 +52,19 @@ struct VPlanTransforms {
verifyVPlanIsValid(Plan);
}
- /// Introduce the top-level VPRegionBlock for the main loop in \p Plan. Coming
- /// into this function, \p Plan's top-level loop is modeled using a plain CFG.
- /// This transform wraps the plain CFG of the top-level loop within a
- /// VPRegionBlock and creates a VPValue expression for the original trip
- /// count. It will also introduce a dedicated VPBasicBlock for the vector
- /// pre-header as well a VPBasicBlock as exit block of the region
- /// (middle.block). If a check is needed to guard executing the scalar
+ /// Replace loops in \p Plan's flat CFG with VPRegionBlocks, turing \p Plan's
+ /// flat CFG into a hierarchical CFG. It also creates a VPValue expression for
+ /// the original trip count. It will also introduce a dedicated VPBasicBlock
+ /// for the vector pre-header as well a VPBasicBlock as exit block of the
+ /// region (middle.block). If a check is needed to guard executing the scalar
/// epilogue loop, it will be added to the middle block, together with
/// VPBasicBlocks for the scalar preheader and exit blocks. \p InductionTy is
/// the type of the canonical induction and used for related values, like the
/// trip count expression.
- static void introduceTopLevelVectorLoopRegion(
- VPlan &Plan, Type *InductionTy, PredicatedScalarEvolution &PSE,
- bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop);
+ static void introduceRegions(VPlan &Plan, Type *InductionTy,
+ PredicatedScalarEvolution &PSE,
+ bool RequiresScalarEpilogueCheck,
+ bool TailFolded, Loop *TheLoop);
/// Replaces the VPInstructions in \p Plan with corresponding
/// widen recipes. Returns false if any VPInstructions could not be converted
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
index 92961e44c5e54..f2d3d37b40ba9 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
+++ b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
@@ -74,8 +74,8 @@ class VPlanTestIRBase : public testing::Test {
auto Plan = std::make_unique<VPlan>(L);
VPlanHCFGBuilder HCFGBuilder(L, LI.get(), *Plan);
HCFGBuilder.buildPlainCFG();
- VPlanTransforms::introduceTopLevelVectorLoopRegion(
- *Plan, IntegerType::get(*Ctx, 64), PSE, true, false, L);
+ VPlanTransforms::introduceRegions(*Plan, IntegerType::get(*Ctx, 64), PSE,
+ true, false, L);
return Plan;
}
};
More information about the llvm-commits
mailing list