[llvm] [VPlan] Introduce child regions as VPlan transform. (PR #129402)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Sun Mar 9 10:19:57 PDT 2025
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/129402
>From f6de6d82cb1d13420d3a0cf67f1e9237c5b49aef Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sat, 1 Mar 2025 21:07:00 +0000
Subject: [PATCH] [VPlan] Introduce child regions as VPlan transform.
Further simplify VPlan CFG builder by moving introduction of inner
regions to a VPlan transform, building on
https://github.com/llvm/llvm-project/pull/128419
The HCFG builder now only constructs plain CFGs. I will move it to
VPlanConstruction as follow-up.
---
.../Transforms/Vectorize/LoopVectorize.cpp | 6 +-
.../Vectorize/VPlanConstruction.cpp | 33 ++++++
.../Transforms/Vectorize/VPlanHCFGBuilder.cpp | 104 ++----------------
.../Transforms/Vectorize/VPlanHCFGBuilder.h | 8 +-
.../vplan-printing-outer-loop.ll | 21 ++--
.../LoopVectorize/vplan_hcfg_stress_test.ll | 2 +-
.../Transforms/Vectorize/VPlanTestBase.h | 2 +-
7 files changed, 59 insertions(+), 117 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index b987863127994..d88bbb6692cc0 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9314,10 +9314,10 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
Range);
auto Plan = std::make_unique<VPlan>(OrigLoop);
// Build hierarchical CFG.
- // Convert to VPlan-transform and consoliate all transforms for VPlan
+ // TODO: Convert to VPlan-transform and consoliate all transforms for VPlan
// creation.
VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan);
- HCFGBuilder.buildHierarchicalCFG();
+ HCFGBuilder.buildPlainCFG();
VPlanTransforms::introduceTopLevelVectorLoopRegion(
*Plan, Legal->getWidestInductionType(), PSE, RequiresScalarEpilogueCheck,
@@ -9621,7 +9621,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
auto Plan = std::make_unique<VPlan>(OrigLoop);
// Build hierarchical CFG
VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan);
- HCFGBuilder.buildHierarchicalCFG();
+ HCFGBuilder.buildPlainCFG();
VPlanTransforms::introduceTopLevelVectorLoopRegion(
*Plan, Legal->getWidestInductionType(), PSE, true, false, OrigLoop);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index f58f0290b5fa9..ba68ec99b0c74 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -14,12 +14,43 @@
#include "LoopVectorizationPlanner.h"
#include "VPlan.h"
#include "VPlanCFG.h"
+#include "VPlanDominatorTree.h"
#include "VPlanTransforms.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
using namespace llvm;
+/// Introduce VPRegionBlocks for each loop modeled using a plain CFG in \p Plan.
+static void introduceInnerLoopRegions(VPlan &Plan) {
+ VPDominatorTree VPDT;
+ VPDT.recalculate(Plan);
+
+ for (VPBlockBase *HeaderVPBB :
+ vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry())) {
+ if (HeaderVPBB->getNumPredecessors() != 2)
+ continue;
+ VPBlockBase *PreheaderVPBB = HeaderVPBB->getPredecessors()[0];
+ VPBlockBase *LatchVPBB = HeaderVPBB->getPredecessors()[1];
+ if (!VPDT.dominates(HeaderVPBB, LatchVPBB))
+ continue;
+ assert(VPDT.dominates(PreheaderVPBB, HeaderVPBB) &&
+ "preheader must dominate header");
+ VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPBB);
+ VPBlockUtils::disconnectBlocks(LatchVPBB, HeaderVPBB);
+ VPBlockBase *Succ = LatchVPBB->getSingleSuccessor();
+ VPBlockUtils::disconnectBlocks(LatchVPBB, Succ);
+
+ auto *R = Plan.createVPRegionBlock(HeaderVPBB, LatchVPBB, "",
+ false /*isReplicator*/);
+ for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPBB))
+ VPBB->setParent(R);
+
+ VPBlockUtils::insertBlockAfter(R, PreheaderVPBB);
+ VPBlockUtils::connectBlocks(R, Succ);
+ }
+}
+
void VPlanTransforms::introduceTopLevelVectorLoopRegion(
VPlan &Plan, Type *InductionTy, PredicatedScalarEvolution &PSE,
bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop) {
@@ -98,4 +129,6 @@ void VPlanTransforms::introduceTopLevelVectorLoopRegion(
ScalarLatchTerm->getDebugLoc(), "cmp.n");
Builder.createNaryOp(VPInstruction::BranchOnCond, {Cmp},
ScalarLatchTerm->getDebugLoc());
+
+ introduceInnerLoopRegions(Plan);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
index 4b8a2420b3037..5e31b09bcd7d3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
@@ -12,9 +12,7 @@
/// components and steps:
//
/// 1. PlainCFGBuilder class: builds a plain VPBasicBlock-based CFG that
-/// faithfully represents the CFG in the incoming IR. A VPRegionBlock (Top
-/// Region) is created to enclose and serve as parent of all the VPBasicBlocks
-/// in the plain CFG.
+/// faithfully represents the CFG in the incoming IR.
/// NOTE: At this point, there is a direct correspondence between all the
/// VPBasicBlocks created for the initial plain CFG and the incoming
/// BasicBlocks. However, this might change in the future.
@@ -57,12 +55,8 @@ class PlainCFGBuilder {
// Hold phi node's that need to be fixed once the plain CFG has been built.
SmallVector<PHINode *, 8> PhisToFix;
- /// Maps loops in the original IR to their corresponding region.
- DenseMap<Loop *, VPRegionBlock *> Loop2Region;
-
// Utility functions.
void setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB);
- void setRegionPredsFromBB(VPRegionBlock *VPBB, BasicBlock *BB);
void fixHeaderPhis();
VPBasicBlock *getOrCreateVPBB(BasicBlock *BB);
#ifndef NDEBUG
@@ -83,25 +77,6 @@ class PlainCFGBuilder {
// Set predecessors of \p VPBB in the same order as they are in \p BB. \p VPBB
// must have no predecessors.
void PlainCFGBuilder::setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB) {
- auto GetLatchOfExit = [this](BasicBlock *BB) -> BasicBlock * {
- auto *SinglePred = BB->getSinglePredecessor();
- Loop *LoopForBB = LI->getLoopFor(BB);
- if (!SinglePred || LI->getLoopFor(SinglePred) == LoopForBB)
- return nullptr;
- // The input IR must be in loop-simplify form, ensuring a single predecessor
- // for exit blocks.
- assert(SinglePred == LI->getLoopFor(SinglePred)->getLoopLatch() &&
- "SinglePred must be the only loop latch");
- return SinglePred;
- };
- if (auto *LatchBB = GetLatchOfExit(BB)) {
- auto *PredRegion = getOrCreateVPBB(LatchBB)->getParent();
- assert(VPBB == cast<VPBasicBlock>(PredRegion->getSingleSuccessor()) &&
- "successor must already be set for PredRegion; it must have VPBB "
- "as single successor");
- VPBB->setPredecessors({PredRegion});
- return;
- }
// Collect VPBB predecessors.
SmallVector<VPBlockBase *, 2> VPBBPreds;
for (BasicBlock *Pred : predecessors(BB))
@@ -113,13 +88,6 @@ static bool isHeaderBB(BasicBlock *BB, Loop *L) {
return L && BB == L->getHeader();
}
-void PlainCFGBuilder::setRegionPredsFromBB(VPRegionBlock *Region,
- BasicBlock *BB) {
- // BB is a loop header block. Connect the region to the loop preheader.
- Loop *LoopOfBB = LI->getLoopFor(BB);
- Region->setPredecessors({getOrCreateVPBB(LoopOfBB->getLoopPredecessor())});
-}
-
// Add operands to VPInstructions representing phi nodes from the input IR.
void PlainCFGBuilder::fixHeaderPhis() {
for (auto *Phi : PhisToFix) {
@@ -150,19 +118,6 @@ static bool isHeaderVPBB(VPBasicBlock *VPBB) {
return VPBB->getParent() && VPBB->getParent()->getEntry() == VPBB;
}
-/// Return true of \p L loop is contained within \p OuterLoop.
-static bool doesContainLoop(const Loop *L, const Loop *OuterLoop) {
- if (L->getLoopDepth() < OuterLoop->getLoopDepth())
- return false;
- const Loop *P = L;
- while (P) {
- if (P == OuterLoop)
- return true;
- P = P->getParentLoop();
- }
- return false;
-}
-
// Create a new empty VPBasicBlock for an incoming BasicBlock in the region
// corresponding to the containing loop or retrieve an existing one if it was
// already created. If no region exists yet for the loop containing \p BB, a new
@@ -178,28 +133,6 @@ VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) {
LLVM_DEBUG(dbgs() << "Creating VPBasicBlock for " << Name << "\n");
VPBasicBlock *VPBB = Plan.createVPBasicBlock(Name);
BB2VPBB[BB] = VPBB;
-
- // Get or create a region for the loop containing BB, except for the top
- // region of TheLoop which is created later.
- Loop *LoopOfBB = LI->getLoopFor(BB);
- if (!LoopOfBB || LoopOfBB == TheLoop || !doesContainLoop(LoopOfBB, TheLoop))
- return VPBB;
-
- auto *RegionOfVPBB = Loop2Region.lookup(LoopOfBB);
- if (!isHeaderBB(BB, LoopOfBB)) {
- assert(RegionOfVPBB &&
- "Region should have been created by visiting header earlier");
- VPBB->setParent(RegionOfVPBB);
- return VPBB;
- }
-
- assert(!RegionOfVPBB &&
- "First visit of a header basic block expects to register its region.");
- // Handle a header - take care of its Region.
- RegionOfVPBB = Plan.createVPRegionBlock(Name.str(), false /*isReplicator*/);
- RegionOfVPBB->setParent(Loop2Region[LoopOfBB->getParentLoop()]);
- RegionOfVPBB->setEntry(VPBB);
- Loop2Region[LoopOfBB] = RegionOfVPBB;
return VPBB;
}
@@ -376,15 +309,13 @@ void PlainCFGBuilder::buildPlainCFG(
for (BasicBlock *BB : RPO) {
// Create or retrieve the VPBasicBlock for this BB.
VPBasicBlock *VPBB = getOrCreateVPBB(BB);
- VPRegionBlock *Region = VPBB->getParent();
Loop *LoopForBB = LI->getLoopFor(BB);
// Set VPBB predecessors in the same order as they are in the incoming BB.
if (!isHeaderBB(BB, LoopForBB)) {
setVPBBPredsFromBB(VPBB, BB);
- } else if (Region) {
- // BB is a loop header and there's a corresponding region, set the
- // predecessor for it.
- setRegionPredsFromBB(Region, BB);
+ } else if (LoopForBB != TheLoop) {
+ VPBB->setPredecessors({getOrCreateVPBB(LoopForBB->getLoopPredecessor()),
+ getOrCreateVPBB(LoopForBB->getLoopLatch())});
}
// Create VPInstructions for BB.
@@ -423,21 +354,11 @@ void PlainCFGBuilder::buildPlainCFG(
BasicBlock *IRSucc1 = BI->getSuccessor(1);
VPBasicBlock *Successor0 = getOrCreateVPBB(IRSucc0);
VPBasicBlock *Successor1 = getOrCreateVPBB(IRSucc1);
- if (BB == LoopForBB->getLoopLatch()) {
- // For a latch we need to set the successor of the region rather than that
- // of VPBB and it should be set to the exit, i.e., non-header successor,
- // except for the top region, which is handled elsewhere.
- assert(LoopForBB != TheLoop &&
- "Latch of the top region should have been handled earlier");
- Region->setOneSuccessor(isHeaderVPBB(Successor0) ? Successor1
- : Successor0);
- Region->setExiting(VPBB);
- continue;
- }
- // Don't connect any blocks outside the current loop except the latch for
- // now. The latch is handled above.
- if (LoopForBB) {
+ // Don't connect any blocks outside the current loop except the latch, which
+ // is handled below.
+ if (LoopForBB &&
+ (LoopForBB == TheLoop || BB != LoopForBB->getLoopLatch())) {
if (!LoopForBB->contains(IRSucc0)) {
VPBB->setOneSuccessor(Successor1);
continue;
@@ -461,16 +382,11 @@ void PlainCFGBuilder::buildPlainCFG(
for (const auto &[IRBB, VPB] : BB2VPBB)
VPB2IRBB[VPB] = IRBB;
+
+ LLVM_DEBUG(Plan.setName("Plain CFG\n"); dbgs() << Plan);
}
void VPlanHCFGBuilder::buildPlainCFG() {
PlainCFGBuilder PCFGBuilder(TheLoop, LI, Plan);
PCFGBuilder.buildPlainCFG(VPB2IRBB);
}
-
-// Public interface to build a H-CFG.
-void VPlanHCFGBuilder::buildHierarchicalCFG() {
- // Build Top Region enclosing the plain CFG.
- buildPlainCFG();
- LLVM_DEBUG(Plan.setName("HCFGBuilder: Plain CFG\n"); dbgs() << Plan);
-}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
index f7f98ed7b1755..f2e90d3f4d9b3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
@@ -30,7 +30,6 @@ namespace llvm {
class Loop;
class LoopInfo;
-class VPRegionBlock;
class VPlan;
class VPlanTestIRBase;
class VPBlockBase;
@@ -54,15 +53,12 @@ class VPlanHCFGBuilder {
/// created for a input IR basic block.
DenseMap<VPBlockBase *, BasicBlock *> VPB2IRBB;
- /// Build plain CFG for TheLoop and connects it to Plan's entry.
- void buildPlainCFG();
-
public:
VPlanHCFGBuilder(Loop *Lp, LoopInfo *LI, VPlan &P)
: TheLoop(Lp), LI(LI), Plan(P) {}
- /// Build H-CFG for TheLoop and update Plan accordingly.
- void buildHierarchicalCFG();
+ /// Build plain CFG for TheLoop and connects it to Plan's entry.
+ void buildPlainCFG();
/// Return the input IR BasicBlock corresponding to \p VPB. Returns nullptr if
/// there is no such corresponding block.
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
index 625a32c098f94..b4b6d3d760349 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
@@ -6,7 +6,7 @@
@arr = external global [8 x [8 x i64]], align 16
define void @foo(i64 %n) {
-; CHECK: VPlan 'HCFGBuilder: Plain CFG
+; CHECK: VPlan 'Plain CFG
; CHECK-NEXT: {
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
@@ -19,17 +19,14 @@ define void @foo(i64 %n) {
; CHECK-NEXT: EMIT ir<%add> = add ir<%outer.iv>, ir<%n>
; CHECK-NEXT: Successor(s): inner
; CHECK-EMPTY:
-; CHECK-NEXT: <x1> inner: {
-; CHECK-NEXT: inner:
-; CHECK-NEXT: WIDEN-PHI ir<%inner.iv> = phi ir<0>, ir<%inner.iv.next>
-; CHECK-NEXT: EMIT ir<%gep.2> = getelementptr ir<@arr>, ir<0>, ir<%inner.iv>, ir<%outer.iv>
-; CHECK-NEXT: EMIT store ir<%add>, ir<%gep.2>
-; CHECK-NEXT: EMIT ir<%inner.iv.next> = add ir<%inner.iv>, ir<1>
-; CHECK-NEXT: EMIT ir<%inner.ec> = icmp ir<%inner.iv.next>, ir<8>
-; CHECK-NEXT: EMIT branch-on-cond ir<%inner.ec>
-; CHECK-NEXT: No successors
-; CHECK-NEXT: }
-; CHECK-NEXT: Successor(s): outer.latch
+; CHECK-NEXT: inner:
+; CHECK-NEXT: WIDEN-PHI ir<%inner.iv> = phi ir<0>, ir<%inner.iv.next>
+; CHECK-NEXT: EMIT ir<%gep.2> = getelementptr ir<@arr>, ir<0>, ir<%inner.iv>, ir<%outer.iv>
+; CHECK-NEXT: EMIT store ir<%add>, ir<%gep.2>
+; CHECK-NEXT: EMIT ir<%inner.iv.next> = add ir<%inner.iv>, ir<1>
+; CHECK-NEXT: EMIT ir<%inner.ec> = icmp ir<%inner.iv.next>, ir<8>
+; CHECK-NEXT: EMIT branch-on-cond ir<%inner.ec>
+; CHECK-NEXT: Successor(s): outer.latch, inner
; CHECK-EMPTY:
; CHECK-NEXT: outer.latch:
; CHECK-NEXT: EMIT ir<%outer.iv.next> = add ir<%outer.iv>, ir<1>
diff --git a/llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll b/llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll
index 89eaca0cfa8c8..29aeb7c4e97f9 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll
@@ -4,7 +4,7 @@
; Verify that the stress testing flag for the VPlan H-CFG builder works as
; expected with and without enabling the VPlan H-CFG Verifier.
-; CHECK: VPlan 'HCFGBuilder: Plain CFG
+; CHECK: VPlan 'Plain CFG
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
index caf5d2357411d..92961e44c5e54 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
+++ b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
@@ -73,7 +73,7 @@ class VPlanTestIRBase : public testing::Test {
PredicatedScalarEvolution PSE(*SE, *L);
auto Plan = std::make_unique<VPlan>(L);
VPlanHCFGBuilder HCFGBuilder(L, LI.get(), *Plan);
- HCFGBuilder.buildHierarchicalCFG();
+ HCFGBuilder.buildPlainCFG();
VPlanTransforms::introduceTopLevelVectorLoopRegion(
*Plan, IntegerType::get(*Ctx, 64), PSE, true, false, L);
return Plan;
More information about the llvm-commits
mailing list