[llvm] [VPlan] Move predication to VPlanTransform (NFC) (WIP). (PR #128420)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Sat Apr 5 06:19:28 PDT 2025
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/128420
>From d5ba9a38d09e021d9acfb67bf6a2395ce37fc9b4 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sat, 1 Mar 2025 21:07:00 +0000
Subject: [PATCH 1/9] [VPlan] Introduce child regions as VPlan transform.
Further simplify VPlan CFG builder by moving introduction of inner
regions to a VPlan transform, building on
https://github.com/llvm/llvm-project/pull/128419
The HCFG builder now only constructs plain CFGs. I will move it to
VPlanConstruction as follow-up.
---
.../Transforms/Vectorize/LoopVectorize.cpp | 6 +-
.../Vectorize/VPlanConstruction.cpp | 69 +++++++----
.../Transforms/Vectorize/VPlanHCFGBuilder.cpp | 112 +++---------------
.../Transforms/Vectorize/VPlanHCFGBuilder.h | 8 +-
.../vplan-printing-outer-loop.ll | 21 ++--
.../LoopVectorize/vplan_hcfg_stress_test.ll | 2 +-
.../Transforms/Vectorize/VPlanTestBase.h | 2 +-
7 files changed, 79 insertions(+), 141 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 3fc5e716e3757..54d727e38b633 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9308,10 +9308,10 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
Range);
auto Plan = std::make_unique<VPlan>(OrigLoop);
// Build hierarchical CFG.
- // Convert to VPlan-transform and consoliate all transforms for VPlan
+ // TODO: Convert to VPlan-transform and consoliate all transforms for VPlan
// creation.
VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan);
- HCFGBuilder.buildHierarchicalCFG();
+ HCFGBuilder.buildPlainCFG();
VPlanTransforms::introduceTopLevelVectorLoopRegion(
*Plan, Legal->getWidestInductionType(), PSE, RequiresScalarEpilogueCheck,
@@ -9615,7 +9615,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
auto Plan = std::make_unique<VPlan>(OrigLoop);
// Build hierarchical CFG
VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan);
- HCFGBuilder.buildHierarchicalCFG();
+ HCFGBuilder.buildPlainCFG();
VPlanTransforms::introduceTopLevelVectorLoopRegion(
*Plan, Legal->getWidestInductionType(), PSE, true, false, OrigLoop);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index f58f0290b5fa9..17a758682905b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -14,26 +14,57 @@
#include "LoopVectorizationPlanner.h"
#include "VPlan.h"
#include "VPlanCFG.h"
+#include "VPlanDominatorTree.h"
#include "VPlanTransforms.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
using namespace llvm;
+/// Create and return a new VPRegionBlock for loop starting at \p HeaderVPBB, if
+/// it is a header of a loop.
+static VPRegionBlock *introduceRegion(VPlan &Plan, VPBlockBase *HeaderVPBB,
+ VPDominatorTree &VPDT) {
+ if (HeaderVPBB->getNumPredecessors() != 2)
+ return nullptr;
+ VPBlockBase *PreheaderVPBB = HeaderVPBB->getPredecessors()[0];
+ VPBlockBase *LatchVPBB = HeaderVPBB->getPredecessors()[1];
+ if (!VPDT.dominates(HeaderVPBB, LatchVPBB))
+ return nullptr;
+ assert(VPDT.dominates(PreheaderVPBB, HeaderVPBB) &&
+ "preheader must dominate header");
+ VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPBB);
+ VPBlockUtils::disconnectBlocks(LatchVPBB, HeaderVPBB);
+ VPBlockBase *Succ = LatchVPBB->getSingleSuccessor();
+ if (Succ)
+ VPBlockUtils::disconnectBlocks(LatchVPBB, Succ);
+
+ auto *R = Plan.createVPRegionBlock(HeaderVPBB, LatchVPBB, "",
+ false /*isReplicator*/);
+ // All VPBB's reachable shallowly from HeaderVPBB belong to top level loop,
+ // because VPlan is expected to end at top level latch.
+ for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPBB))
+ VPBB->setParent(R);
+
+ VPBlockUtils::insertBlockAfter(R, PreheaderVPBB);
+ if (Succ)
+ VPBlockUtils::connectBlocks(R, Succ);
+ return R;
+}
+
void VPlanTransforms::introduceTopLevelVectorLoopRegion(
VPlan &Plan, Type *InductionTy, PredicatedScalarEvolution &PSE,
bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop) {
- // TODO: Generalize to introduce all loop regions.
- auto *HeaderVPBB = cast<VPBasicBlock>(Plan.getEntry()->getSingleSuccessor());
- VPBlockUtils::disconnectBlocks(Plan.getEntry(), HeaderVPBB);
+ VPDominatorTree VPDT;
+ VPDT.recalculate(Plan);
- VPBasicBlock *OriginalLatch =
- cast<VPBasicBlock>(HeaderVPBB->getSinglePredecessor());
- VPBlockUtils::disconnectBlocks(OriginalLatch, HeaderVPBB);
- VPBasicBlock *VecPreheader = Plan.createVPBasicBlock("vector.ph");
- VPBlockUtils::connectBlocks(Plan.getEntry(), VecPreheader);
- assert(OriginalLatch->getNumSuccessors() == 0 &&
- "Plan should end at top level latch");
+ auto *HeaderVPBB = cast<VPBasicBlock>(Plan.getEntry()->getSingleSuccessor());
+ VPRegionBlock *TopRegion = introduceRegion(Plan, HeaderVPBB, VPDT);
+ auto *OrigExiting = TopRegion->getExiting();
+ VPBasicBlock *LatchVPBB = Plan.createVPBasicBlock("vector.latch");
+ VPBlockUtils::insertBlockAfter(LatchVPBB, OrigExiting);
+ TopRegion->setExiting(LatchVPBB);
+ TopRegion->setName("vector loop");
// Create SCEV and VPValue for the trip count.
// We use the symbolic max backedge-taken-count, which works also when
@@ -47,18 +78,9 @@ void VPlanTransforms::introduceTopLevelVectorLoopRegion(
Plan.setTripCount(
vputils::getOrCreateVPValueForSCEVExpr(Plan, TripCount, SE));
- // Create VPRegionBlock, with existing header and new empty latch block, to be
- // filled.
- VPBasicBlock *LatchVPBB = Plan.createVPBasicBlock("vector.latch");
- VPBlockUtils::insertBlockAfter(LatchVPBB, OriginalLatch);
- auto *TopRegion = Plan.createVPRegionBlock(
- HeaderVPBB, LatchVPBB, "vector loop", false /*isReplicator*/);
- // All VPBB's reachable shallowly from HeaderVPBB belong to top level loop,
- // because VPlan is expected to end at top level latch.
- for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPBB))
- VPBB->setParent(TopRegion);
+ VPBasicBlock *VecPreheader = Plan.createVPBasicBlock("vector.ph");
+ VPBlockUtils::insertBlockAfter(VecPreheader, Plan.getEntry());
- VPBlockUtils::insertBlockAfter(TopRegion, VecPreheader);
VPBasicBlock *MiddleVPBB = Plan.createVPBasicBlock("middle.block");
VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion);
@@ -98,4 +120,9 @@ void VPlanTransforms::introduceTopLevelVectorLoopRegion(
ScalarLatchTerm->getDebugLoc(), "cmp.n");
Builder.createNaryOp(VPInstruction::BranchOnCond, {Cmp},
ScalarLatchTerm->getDebugLoc());
+
+ for (VPBlockBase *HeaderVPBB :
+ vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry())) {
+ introduceRegion(Plan, HeaderVPBB, VPDT);
+ }
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
index 4b8a2420b3037..4e06ce86caad7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
@@ -12,9 +12,7 @@
/// components and steps:
//
/// 1. PlainCFGBuilder class: builds a plain VPBasicBlock-based CFG that
-/// faithfully represents the CFG in the incoming IR. A VPRegionBlock (Top
-/// Region) is created to enclose and serve as parent of all the VPBasicBlocks
-/// in the plain CFG.
+/// faithfully represents the CFG in the incoming IR.
/// NOTE: At this point, there is a direct correspondence between all the
/// VPBasicBlocks created for the initial plain CFG and the incoming
/// BasicBlocks. However, this might change in the future.
@@ -57,12 +55,8 @@ class PlainCFGBuilder {
// Hold phi node's that need to be fixed once the plain CFG has been built.
SmallVector<PHINode *, 8> PhisToFix;
- /// Maps loops in the original IR to their corresponding region.
- DenseMap<Loop *, VPRegionBlock *> Loop2Region;
-
// Utility functions.
void setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB);
- void setRegionPredsFromBB(VPRegionBlock *VPBB, BasicBlock *BB);
void fixHeaderPhis();
VPBasicBlock *getOrCreateVPBB(BasicBlock *BB);
#ifndef NDEBUG
@@ -83,25 +77,6 @@ class PlainCFGBuilder {
// Set predecessors of \p VPBB in the same order as they are in \p BB. \p VPBB
// must have no predecessors.
void PlainCFGBuilder::setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB) {
- auto GetLatchOfExit = [this](BasicBlock *BB) -> BasicBlock * {
- auto *SinglePred = BB->getSinglePredecessor();
- Loop *LoopForBB = LI->getLoopFor(BB);
- if (!SinglePred || LI->getLoopFor(SinglePred) == LoopForBB)
- return nullptr;
- // The input IR must be in loop-simplify form, ensuring a single predecessor
- // for exit blocks.
- assert(SinglePred == LI->getLoopFor(SinglePred)->getLoopLatch() &&
- "SinglePred must be the only loop latch");
- return SinglePred;
- };
- if (auto *LatchBB = GetLatchOfExit(BB)) {
- auto *PredRegion = getOrCreateVPBB(LatchBB)->getParent();
- assert(VPBB == cast<VPBasicBlock>(PredRegion->getSingleSuccessor()) &&
- "successor must already be set for PredRegion; it must have VPBB "
- "as single successor");
- VPBB->setPredecessors({PredRegion});
- return;
- }
// Collect VPBB predecessors.
SmallVector<VPBlockBase *, 2> VPBBPreds;
for (BasicBlock *Pred : predecessors(BB))
@@ -113,13 +88,6 @@ static bool isHeaderBB(BasicBlock *BB, Loop *L) {
return L && BB == L->getHeader();
}
-void PlainCFGBuilder::setRegionPredsFromBB(VPRegionBlock *Region,
- BasicBlock *BB) {
- // BB is a loop header block. Connect the region to the loop preheader.
- Loop *LoopOfBB = LI->getLoopFor(BB);
- Region->setPredecessors({getOrCreateVPBB(LoopOfBB->getLoopPredecessor())});
-}
-
// Add operands to VPInstructions representing phi nodes from the input IR.
void PlainCFGBuilder::fixHeaderPhis() {
for (auto *Phi : PhisToFix) {
@@ -150,19 +118,6 @@ static bool isHeaderVPBB(VPBasicBlock *VPBB) {
return VPBB->getParent() && VPBB->getParent()->getEntry() == VPBB;
}
-/// Return true of \p L loop is contained within \p OuterLoop.
-static bool doesContainLoop(const Loop *L, const Loop *OuterLoop) {
- if (L->getLoopDepth() < OuterLoop->getLoopDepth())
- return false;
- const Loop *P = L;
- while (P) {
- if (P == OuterLoop)
- return true;
- P = P->getParentLoop();
- }
- return false;
-}
-
// Create a new empty VPBasicBlock for an incoming BasicBlock in the region
// corresponding to the containing loop or retrieve an existing one if it was
// already created. If no region exists yet for the loop containing \p BB, a new
@@ -178,28 +133,6 @@ VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) {
LLVM_DEBUG(dbgs() << "Creating VPBasicBlock for " << Name << "\n");
VPBasicBlock *VPBB = Plan.createVPBasicBlock(Name);
BB2VPBB[BB] = VPBB;
-
- // Get or create a region for the loop containing BB, except for the top
- // region of TheLoop which is created later.
- Loop *LoopOfBB = LI->getLoopFor(BB);
- if (!LoopOfBB || LoopOfBB == TheLoop || !doesContainLoop(LoopOfBB, TheLoop))
- return VPBB;
-
- auto *RegionOfVPBB = Loop2Region.lookup(LoopOfBB);
- if (!isHeaderBB(BB, LoopOfBB)) {
- assert(RegionOfVPBB &&
- "Region should have been created by visiting header earlier");
- VPBB->setParent(RegionOfVPBB);
- return VPBB;
- }
-
- assert(!RegionOfVPBB &&
- "First visit of a header basic block expects to register its region.");
- // Handle a header - take care of its Region.
- RegionOfVPBB = Plan.createVPRegionBlock(Name.str(), false /*isReplicator*/);
- RegionOfVPBB->setParent(Loop2Region[LoopOfBB->getParentLoop()]);
- RegionOfVPBB->setEntry(VPBB);
- Loop2Region[LoopOfBB] = RegionOfVPBB;
return VPBB;
}
@@ -351,6 +284,8 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
// Main interface to build the plain CFG.
void PlainCFGBuilder::buildPlainCFG(
DenseMap<VPBlockBase *, BasicBlock *> &VPB2IRBB) {
+ VPIRBasicBlock *Entry = cast<VPIRBasicBlock>(Plan.getEntry());
+ BB2VPBB[Entry->getIRBasicBlock()] = Entry;
// 1. Scan the body of the loop in a topological order to visit each basic
// block after having visited its predecessor basic blocks. Create a VPBB for
@@ -376,15 +311,13 @@ void PlainCFGBuilder::buildPlainCFG(
for (BasicBlock *BB : RPO) {
// Create or retrieve the VPBasicBlock for this BB.
VPBasicBlock *VPBB = getOrCreateVPBB(BB);
- VPRegionBlock *Region = VPBB->getParent();
Loop *LoopForBB = LI->getLoopFor(BB);
// Set VPBB predecessors in the same order as they are in the incoming BB.
if (!isHeaderBB(BB, LoopForBB)) {
setVPBBPredsFromBB(VPBB, BB);
- } else if (Region) {
- // BB is a loop header and there's a corresponding region, set the
- // predecessor for it.
- setRegionPredsFromBB(Region, BB);
+ } else {
+ VPBB->setPredecessors({getOrCreateVPBB(LoopForBB->getLoopPredecessor()),
+ getOrCreateVPBB(LoopForBB->getLoopLatch())});
}
// Create VPInstructions for BB.
@@ -392,7 +325,7 @@ void PlainCFGBuilder::buildPlainCFG(
if (BB == TheLoop->getLoopLatch()) {
VPBasicBlock *HeaderVPBB = getOrCreateVPBB(LoopForBB->getHeader());
- VPBlockUtils::connectBlocks(VPBB, HeaderVPBB);
+ VPBB->setOneSuccessor(HeaderVPBB);
continue;
}
@@ -423,21 +356,11 @@ void PlainCFGBuilder::buildPlainCFG(
BasicBlock *IRSucc1 = BI->getSuccessor(1);
VPBasicBlock *Successor0 = getOrCreateVPBB(IRSucc0);
VPBasicBlock *Successor1 = getOrCreateVPBB(IRSucc1);
- if (BB == LoopForBB->getLoopLatch()) {
- // For a latch we need to set the successor of the region rather than that
- // of VPBB and it should be set to the exit, i.e., non-header successor,
- // except for the top region, which is handled elsewhere.
- assert(LoopForBB != TheLoop &&
- "Latch of the top region should have been handled earlier");
- Region->setOneSuccessor(isHeaderVPBB(Successor0) ? Successor1
- : Successor0);
- Region->setExiting(VPBB);
- continue;
- }
- // Don't connect any blocks outside the current loop except the latch for
- // now. The latch is handled above.
- if (LoopForBB) {
+ // Don't connect any blocks outside the current loop except the latch, which
+ // is handled below.
+ if (LoopForBB &&
+ (LoopForBB == TheLoop || BB != LoopForBB->getLoopLatch())) {
if (!LoopForBB->contains(IRSucc0)) {
VPBB->setOneSuccessor(Successor1);
continue;
@@ -456,21 +379,16 @@ void PlainCFGBuilder::buildPlainCFG(
// corresponding VPlan operands.
fixHeaderPhis();
- VPBlockUtils::connectBlocks(Plan.getEntry(),
- getOrCreateVPBB(TheLoop->getHeader()));
+ Plan.getEntry()->setOneSuccessor(getOrCreateVPBB(TheLoop->getHeader()));
+ Plan.getEntry()->setPlan(&Plan);
for (const auto &[IRBB, VPB] : BB2VPBB)
VPB2IRBB[VPB] = IRBB;
+
+ LLVM_DEBUG(Plan.setName("Plain CFG\n"); dbgs() << Plan);
}
void VPlanHCFGBuilder::buildPlainCFG() {
PlainCFGBuilder PCFGBuilder(TheLoop, LI, Plan);
PCFGBuilder.buildPlainCFG(VPB2IRBB);
}
-
-// Public interface to build a H-CFG.
-void VPlanHCFGBuilder::buildHierarchicalCFG() {
- // Build Top Region enclosing the plain CFG.
- buildPlainCFG();
- LLVM_DEBUG(Plan.setName("HCFGBuilder: Plain CFG\n"); dbgs() << Plan);
-}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
index f7f98ed7b1755..f2e90d3f4d9b3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
@@ -30,7 +30,6 @@ namespace llvm {
class Loop;
class LoopInfo;
-class VPRegionBlock;
class VPlan;
class VPlanTestIRBase;
class VPBlockBase;
@@ -54,15 +53,12 @@ class VPlanHCFGBuilder {
/// created for a input IR basic block.
DenseMap<VPBlockBase *, BasicBlock *> VPB2IRBB;
- /// Build plain CFG for TheLoop and connects it to Plan's entry.
- void buildPlainCFG();
-
public:
VPlanHCFGBuilder(Loop *Lp, LoopInfo *LI, VPlan &P)
: TheLoop(Lp), LI(LI), Plan(P) {}
- /// Build H-CFG for TheLoop and update Plan accordingly.
- void buildHierarchicalCFG();
+ /// Build plain CFG for TheLoop and connects it to Plan's entry.
+ void buildPlainCFG();
/// Return the input IR BasicBlock corresponding to \p VPB. Returns nullptr if
/// there is no such corresponding block.
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
index 625a32c098f94..b4b6d3d760349 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
@@ -6,7 +6,7 @@
@arr = external global [8 x [8 x i64]], align 16
define void @foo(i64 %n) {
-; CHECK: VPlan 'HCFGBuilder: Plain CFG
+; CHECK: VPlan 'Plain CFG
; CHECK-NEXT: {
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
@@ -19,17 +19,14 @@ define void @foo(i64 %n) {
; CHECK-NEXT: EMIT ir<%add> = add ir<%outer.iv>, ir<%n>
; CHECK-NEXT: Successor(s): inner
; CHECK-EMPTY:
-; CHECK-NEXT: <x1> inner: {
-; CHECK-NEXT: inner:
-; CHECK-NEXT: WIDEN-PHI ir<%inner.iv> = phi ir<0>, ir<%inner.iv.next>
-; CHECK-NEXT: EMIT ir<%gep.2> = getelementptr ir<@arr>, ir<0>, ir<%inner.iv>, ir<%outer.iv>
-; CHECK-NEXT: EMIT store ir<%add>, ir<%gep.2>
-; CHECK-NEXT: EMIT ir<%inner.iv.next> = add ir<%inner.iv>, ir<1>
-; CHECK-NEXT: EMIT ir<%inner.ec> = icmp ir<%inner.iv.next>, ir<8>
-; CHECK-NEXT: EMIT branch-on-cond ir<%inner.ec>
-; CHECK-NEXT: No successors
-; CHECK-NEXT: }
-; CHECK-NEXT: Successor(s): outer.latch
+; CHECK-NEXT: inner:
+; CHECK-NEXT: WIDEN-PHI ir<%inner.iv> = phi ir<0>, ir<%inner.iv.next>
+; CHECK-NEXT: EMIT ir<%gep.2> = getelementptr ir<@arr>, ir<0>, ir<%inner.iv>, ir<%outer.iv>
+; CHECK-NEXT: EMIT store ir<%add>, ir<%gep.2>
+; CHECK-NEXT: EMIT ir<%inner.iv.next> = add ir<%inner.iv>, ir<1>
+; CHECK-NEXT: EMIT ir<%inner.ec> = icmp ir<%inner.iv.next>, ir<8>
+; CHECK-NEXT: EMIT branch-on-cond ir<%inner.ec>
+; CHECK-NEXT: Successor(s): outer.latch, inner
; CHECK-EMPTY:
; CHECK-NEXT: outer.latch:
; CHECK-NEXT: EMIT ir<%outer.iv.next> = add ir<%outer.iv>, ir<1>
diff --git a/llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll b/llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll
index 89eaca0cfa8c8..29aeb7c4e97f9 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll
@@ -4,7 +4,7 @@
; Verify that the stress testing flag for the VPlan H-CFG builder works as
; expected with and without enabling the VPlan H-CFG Verifier.
-; CHECK: VPlan 'HCFGBuilder: Plain CFG
+; CHECK: VPlan 'Plain CFG
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
index caf5d2357411d..92961e44c5e54 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
+++ b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
@@ -73,7 +73,7 @@ class VPlanTestIRBase : public testing::Test {
PredicatedScalarEvolution PSE(*SE, *L);
auto Plan = std::make_unique<VPlan>(L);
VPlanHCFGBuilder HCFGBuilder(L, LI.get(), *Plan);
- HCFGBuilder.buildHierarchicalCFG();
+ HCFGBuilder.buildPlainCFG();
VPlanTransforms::introduceTopLevelVectorLoopRegion(
*Plan, IntegerType::get(*Ctx, 64), PSE, true, false, L);
return Plan;
>From a88f03e42b57cdf42bec4a16e06fa78f06eb1b3e Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Fri, 4 Apr 2025 13:51:30 +0100
Subject: [PATCH 2/9] !fixup address comments, thanks!
---
llvm/lib/Transforms/Vectorize/VPlan.cpp | 11 +++++++
llvm/lib/Transforms/Vectorize/VPlan.h | 6 ++++
.../Vectorize/VPlanConstruction.cpp | 32 ++++++++-----------
3 files changed, 31 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 1e2f70e5c103e..759f1b7091037 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -648,6 +648,17 @@ bool VPBasicBlock::isExiting() const {
return getParent() && getParent()->getExitingBasicBlock() == this;
}
+bool VPBasicBlock::isHeader(const VPDominatorTree &VPDT) const {
+ if (getNumPredecessors() != 2)
+ return false;
+ VPBlockBase *LatchVPBB = getPredecessors()[1];
+ if (!VPDT.dominates(this, LatchVPBB))
+ return false;
+ assert(VPDT.dominates(getPredecessors()[0], this) &&
+ "preheader must dominate header");
+ return true;
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPBlockBase::print(raw_ostream &O) const {
VPSlotTracker SlotTracker(getPlan());
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 50baf220a1002..4f42c595df0f0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -57,6 +57,7 @@ class SCEV;
class Type;
class VPBasicBlock;
class VPBuilder;
+class VPDominatorTree;
class VPRegionBlock;
class VPlan;
class VPLane;
@@ -3251,6 +3252,11 @@ class VPBasicBlock : public VPBlockBase {
/// Returns true if the block is exiting it's parent region.
bool isExiting() const;
+ /// Returns true if the block is a loop header block in the plain CFG; that
+ /// is, it has exactly 2 predecessors (preheader and latch), where the block
+ /// dominates the latch and the preheader dominates the block.
+ bool isHeader(const VPDominatorTree &VPDT) const;
+
/// Clone the current block and it's recipes, without updating the operands of
/// the cloned recipes.
VPBasicBlock *clone() override;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 17a758682905b..0bea53c423760 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -21,28 +21,24 @@
using namespace llvm;
-/// Create and return a new VPRegionBlock for loop starting at \p HeaderVPBB, if
-/// it is a header of a loop.
-static VPRegionBlock *introduceRegion(VPlan &Plan, VPBlockBase *HeaderVPBB,
- VPDominatorTree &VPDT) {
- if (HeaderVPBB->getNumPredecessors() != 2)
- return nullptr;
+/// Create and return a new VPRegionBlock for loop starting at \p HeaderVPBB and
+/// return it.
+static VPRegionBlock *introduceRegion(VPlan &Plan, VPBlockBase *HeaderVPBB) {
VPBlockBase *PreheaderVPBB = HeaderVPBB->getPredecessors()[0];
VPBlockBase *LatchVPBB = HeaderVPBB->getPredecessors()[1];
- if (!VPDT.dominates(HeaderVPBB, LatchVPBB))
- return nullptr;
- assert(VPDT.dominates(PreheaderVPBB, HeaderVPBB) &&
- "preheader must dominate header");
VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPBB);
VPBlockUtils::disconnectBlocks(LatchVPBB, HeaderVPBB);
VPBlockBase *Succ = LatchVPBB->getSingleSuccessor();
+ assert(LatchVPBB->getNumSuccessors() <= 1 &&
+ "Latch has more than one successor");
if (Succ)
VPBlockUtils::disconnectBlocks(LatchVPBB, Succ);
auto *R = Plan.createVPRegionBlock(HeaderVPBB, LatchVPBB, "",
false /*isReplicator*/);
+ R->setParent(HeaderVPBB->getParent());
// All VPBB's reachable shallowly from HeaderVPBB belong to top level loop,
- // because VPlan is expected to end at top level latch.
+ // because VPlan is expected to end at top level latch disconnected above.
for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPBB))
VPBB->setParent(R);
@@ -57,9 +53,14 @@ void VPlanTransforms::introduceTopLevelVectorLoopRegion(
bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop) {
VPDominatorTree VPDT;
VPDT.recalculate(Plan);
+ for (VPBasicBlock *HeaderVPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
+ vp_depth_first_shallow(Plan.getEntry()))) {
+ if (!HeaderVPBB->isHeader(VPDT))
+ continue;
+ introduceRegion(Plan, HeaderVPBB);
+ }
- auto *HeaderVPBB = cast<VPBasicBlock>(Plan.getEntry()->getSingleSuccessor());
- VPRegionBlock *TopRegion = introduceRegion(Plan, HeaderVPBB, VPDT);
+ VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
auto *OrigExiting = TopRegion->getExiting();
VPBasicBlock *LatchVPBB = Plan.createVPBasicBlock("vector.latch");
VPBlockUtils::insertBlockAfter(LatchVPBB, OrigExiting);
@@ -120,9 +121,4 @@ void VPlanTransforms::introduceTopLevelVectorLoopRegion(
ScalarLatchTerm->getDebugLoc(), "cmp.n");
Builder.createNaryOp(VPInstruction::BranchOnCond, {Cmp},
ScalarLatchTerm->getDebugLoc());
-
- for (VPBlockBase *HeaderVPBB :
- vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry())) {
- introduceRegion(Plan, HeaderVPBB, VPDT);
- }
}
>From 29f6ddffd22ed109bac5f94532390c9c77e2d7a7 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Fri, 4 Apr 2025 16:21:13 +0100
Subject: [PATCH 3/9] !fixup adjust names and comments after recent changes.
---
.../Transforms/Vectorize/LoopVectorize.cpp | 10 +++++-----
.../Vectorize/VPlanConstruction.cpp | 7 ++++---
.../Transforms/Vectorize/VPlanTransforms.h | 19 +++++++++----------
.../Transforms/Vectorize/VPlanTestBase.h | 4 ++--
4 files changed, 20 insertions(+), 20 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 8ec9c003e841b..732cec7db81cb 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9332,9 +9332,9 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan);
HCFGBuilder.buildPlainCFG();
- VPlanTransforms::introduceTopLevelVectorLoopRegion(
- *Plan, Legal->getWidestInductionType(), PSE, RequiresScalarEpilogueCheck,
- CM.foldTailByMasking(), OrigLoop);
+ VPlanTransforms::introduceRegions(*Plan, Legal->getWidestInductionType(), PSE,
+ RequiresScalarEpilogueCheck,
+ CM.foldTailByMasking(), OrigLoop);
// Don't use getDecisionAndClampRange here, because we don't know the UF
// so this function is better to be conservative, rather than to split
@@ -9636,8 +9636,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan);
HCFGBuilder.buildPlainCFG();
- VPlanTransforms::introduceTopLevelVectorLoopRegion(
- *Plan, Legal->getWidestInductionType(), PSE, true, false, OrigLoop);
+ VPlanTransforms::introduceRegions(*Plan, Legal->getWidestInductionType(), PSE,
+ true, false, OrigLoop);
for (ElementCount VF : Range)
Plan->addVF(VF);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 0bea53c423760..876333d045f84 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -48,9 +48,10 @@ static VPRegionBlock *introduceRegion(VPlan &Plan, VPBlockBase *HeaderVPBB) {
return R;
}
-void VPlanTransforms::introduceTopLevelVectorLoopRegion(
- VPlan &Plan, Type *InductionTy, PredicatedScalarEvolution &PSE,
- bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop) {
+void VPlanTransforms::introduceRegions(VPlan &Plan, Type *InductionTy,
+ PredicatedScalarEvolution &PSE,
+ bool RequiresScalarEpilogueCheck,
+ bool TailFolded, Loop *TheLoop) {
VPDominatorTree VPDT;
VPDT.recalculate(Plan);
for (VPBasicBlock *HeaderVPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index c23ff38265670..ed8b7a08ea187 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -52,20 +52,19 @@ struct VPlanTransforms {
verifyVPlanIsValid(Plan);
}
- /// Introduce the top-level VPRegionBlock for the main loop in \p Plan. Coming
- /// into this function, \p Plan's top-level loop is modeled using a plain CFG.
- /// This transform wraps the plain CFG of the top-level loop within a
- /// VPRegionBlock and creates a VPValue expression for the original trip
- /// count. It will also introduce a dedicated VPBasicBlock for the vector
- /// pre-header as well a VPBasicBlock as exit block of the region
- /// (middle.block). If a check is needed to guard executing the scalar
+ /// Replace loops in \p Plan's flat CFG with VPRegionBlocks, turing \p Plan's
+ /// flat CFG into a hierarchical CFG. It also creates a VPValue expression for
+ /// the original trip count. It will also introduce a dedicated VPBasicBlock
+ /// for the vector pre-header as well a VPBasicBlock as exit block of the
+ /// region (middle.block). If a check is needed to guard executing the scalar
/// epilogue loop, it will be added to the middle block, together with
/// VPBasicBlocks for the scalar preheader and exit blocks. \p InductionTy is
/// the type of the canonical induction and used for related values, like the
/// trip count expression.
- static void introduceTopLevelVectorLoopRegion(
- VPlan &Plan, Type *InductionTy, PredicatedScalarEvolution &PSE,
- bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop);
+ static void introduceRegions(VPlan &Plan, Type *InductionTy,
+ PredicatedScalarEvolution &PSE,
+ bool RequiresScalarEpilogueCheck,
+ bool TailFolded, Loop *TheLoop);
/// Replaces the VPInstructions in \p Plan with corresponding
/// widen recipes. Returns false if any VPInstructions could not be converted
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
index 92961e44c5e54..f2d3d37b40ba9 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
+++ b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
@@ -74,8 +74,8 @@ class VPlanTestIRBase : public testing::Test {
auto Plan = std::make_unique<VPlan>(L);
VPlanHCFGBuilder HCFGBuilder(L, LI.get(), *Plan);
HCFGBuilder.buildPlainCFG();
- VPlanTransforms::introduceTopLevelVectorLoopRegion(
- *Plan, IntegerType::get(*Ctx, 64), PSE, true, false, L);
+ VPlanTransforms::introduceRegions(*Plan, IntegerType::get(*Ctx, 64), PSE,
+ true, false, L);
return Plan;
}
};
>From cc818012a9683fd4d1fe4dceb3a5e2fb32d91a44 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sat, 5 Apr 2025 11:39:30 +0100
Subject: [PATCH 4/9] !fixup don't special case header/latch predecessor order.
---
llvm/lib/Transforms/Vectorize/VPlan.cpp | 23 +++++++++++--------
llvm/lib/Transforms/Vectorize/VPlan.h | 11 +++++----
.../Vectorize/VPlanConstruction.cpp | 12 ++++++----
.../Transforms/Vectorize/VPlanHCFGBuilder.cpp | 13 +----------
4 files changed, 29 insertions(+), 30 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 759f1b7091037..c97edcf3ecd3a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -648,15 +648,20 @@ bool VPBasicBlock::isExiting() const {
return getParent() && getParent()->getExitingBasicBlock() == this;
}
-bool VPBasicBlock::isHeader(const VPDominatorTree &VPDT) const {
- if (getNumPredecessors() != 2)
- return false;
- VPBlockBase *LatchVPBB = getPredecessors()[1];
- if (!VPDT.dominates(this, LatchVPBB))
- return false;
- assert(VPDT.dominates(getPredecessors()[0], this) &&
- "preheader must dominate header");
- return true;
+std::optional<std::pair<VPBasicBlock *, VPBasicBlock *>>
+VPBasicBlock::isHeader(const VPDominatorTree &VPDT) const {
+ ArrayRef<VPBlockBase *> Preds = getPredecessors();
+ if (Preds.size() != 2)
+ return std::nullopt;
+
+ for (unsigned Idx : {0, 1}) {
+ auto *PreheaderVPBB = cast<VPBasicBlock>(Preds[Idx]);
+ auto *LatchVPBB = cast<VPBasicBlock>(Preds[1 - Idx]);
+ if (VPDT.dominates(PreheaderVPBB, this) && VPDT.dominates(this, LatchVPBB))
+ return {std::make_pair(PreheaderVPBB, LatchVPBB)};
+ }
+
+ return std::nullopt;
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 5715c17a94b72..814990f6ea00b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -3255,10 +3255,13 @@ class VPBasicBlock : public VPBlockBase {
/// Returns true if the block is exiting it's parent region.
bool isExiting() const;
- /// Returns true if the block is a loop header block in the plain CFG; that
- /// is, it has exactly 2 predecessors (preheader and latch), where the block
- /// dominates the latch and the preheader dominates the block.
- bool isHeader(const VPDominatorTree &VPDT) const;
+ /// Checks if the block is a loop header block in the plain CFG; that is, it
+ /// has exactly 2 predecessors (preheader and latch), where the block
+ /// dominates the latch and the preheader dominates the block. If it is a
+ /// header block, returns a pair with the corresponding preheader and latch
+ /// blocks. Otherwise return std::nullopt.
+ std::optional<std::pair<VPBasicBlock *, VPBasicBlock *>>
+ isHeader(const VPDominatorTree &VPDT) const;
/// Clone the current block and it's recipes, without updating the operands of
/// the cloned recipes.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 876333d045f84..fb6f681497fa6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -23,9 +23,9 @@ using namespace llvm;
/// Create and return a new VPRegionBlock for loop starting at \p HeaderVPBB and
/// return it.
-static VPRegionBlock *introduceRegion(VPlan &Plan, VPBlockBase *HeaderVPBB) {
- VPBlockBase *PreheaderVPBB = HeaderVPBB->getPredecessors()[0];
- VPBlockBase *LatchVPBB = HeaderVPBB->getPredecessors()[1];
+static VPRegionBlock *introduceRegion(VPlan &Plan, VPBasicBlock *PreheaderVPBB,
+ VPBasicBlock *HeaderVPBB,
+ VPBasicBlock *LatchVPBB) {
VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPBB);
VPBlockUtils::disconnectBlocks(LatchVPBB, HeaderVPBB);
VPBlockBase *Succ = LatchVPBB->getSingleSuccessor();
@@ -56,9 +56,11 @@ void VPlanTransforms::introduceRegions(VPlan &Plan, Type *InductionTy,
VPDT.recalculate(Plan);
for (VPBasicBlock *HeaderVPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_shallow(Plan.getEntry()))) {
- if (!HeaderVPBB->isHeader(VPDT))
+ auto Res = HeaderVPBB->isHeader(VPDT);
+ if (!Res)
continue;
- introduceRegion(Plan, HeaderVPBB);
+ const auto &[PreheaderVPBB, LatchVPBB] = *Res;
+ introduceRegion(Plan, PreheaderVPBB, HeaderVPBB, LatchVPBB);
}
VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
index 4e06ce86caad7..6aa181c5a0fd6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
@@ -313,22 +313,11 @@ void PlainCFGBuilder::buildPlainCFG(
VPBasicBlock *VPBB = getOrCreateVPBB(BB);
Loop *LoopForBB = LI->getLoopFor(BB);
// Set VPBB predecessors in the same order as they are in the incoming BB.
- if (!isHeaderBB(BB, LoopForBB)) {
- setVPBBPredsFromBB(VPBB, BB);
- } else {
- VPBB->setPredecessors({getOrCreateVPBB(LoopForBB->getLoopPredecessor()),
- getOrCreateVPBB(LoopForBB->getLoopLatch())});
- }
+ setVPBBPredsFromBB(VPBB, BB);
// Create VPInstructions for BB.
createVPInstructionsForVPBB(VPBB, BB);
- if (BB == TheLoop->getLoopLatch()) {
- VPBasicBlock *HeaderVPBB = getOrCreateVPBB(LoopForBB->getHeader());
- VPBB->setOneSuccessor(HeaderVPBB);
- continue;
- }
-
// Set VPBB successors. We create empty VPBBs for successors if they don't
// exist already. Recipes will be created when the successor is visited
// during the RPO traversal.
>From 2a8344e4435a090afa19368dad3859921ee1bb1f Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sat, 5 Apr 2025 11:43:14 +0100
Subject: [PATCH 5/9] !fixup adjust comment
---
llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
index 6aa181c5a0fd6..1dc16d66750ae 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
@@ -118,10 +118,8 @@ static bool isHeaderVPBB(VPBasicBlock *VPBB) {
return VPBB->getParent() && VPBB->getParent()->getEntry() == VPBB;
}
-// Create a new empty VPBasicBlock for an incoming BasicBlock in the region
-// corresponding to the containing loop or retrieve an existing one if it was
-// already created. If no region exists yet for the loop containing \p BB, a new
-// one is created.
+// Create a new empty VPBasicBlock for an incoming BasicBlock or retrieve an
+// existing one if it was already created.
VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) {
if (auto *VPBB = BB2VPBB.lookup(BB)) {
// Retrieve existing VPBB.
>From fd4bcc7dbef0956498dfc29219fd9f29b6ac24b5 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sun, 23 Mar 2025 11:36:53 +0000
Subject: [PATCH 6/9] [VPlan] Move plain CFG construction to VPlanConstruction.
(NFC)
---
llvm/lib/Transforms/Vectorize/CMakeLists.txt | 1 -
.../Transforms/Vectorize/LoopVectorize.cpp | 22 +-
.../Vectorize/VPlanConstruction.cpp | 359 +++++++++++++++++
.../Transforms/Vectorize/VPlanHCFGBuilder.cpp | 381 ------------------
.../Transforms/Vectorize/VPlanHCFGBuilder.h | 73 ----
.../Transforms/Vectorize/VPlanTransforms.h | 4 +
.../Transforms/Vectorize/VPlanSlpTest.cpp | 1 -
.../Transforms/Vectorize/VPlanTestBase.h | 6 +-
8 files changed, 371 insertions(+), 476 deletions(-)
delete mode 100644 llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
delete mode 100644 llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
diff --git a/llvm/lib/Transforms/Vectorize/CMakeLists.txt b/llvm/lib/Transforms/Vectorize/CMakeLists.txt
index 7dac6d0059b26..0dc6a7d2f594f 100644
--- a/llvm/lib/Transforms/Vectorize/CMakeLists.txt
+++ b/llvm/lib/Transforms/Vectorize/CMakeLists.txt
@@ -24,7 +24,6 @@ add_llvm_component_library(LLVMVectorize
VPlan.cpp
VPlanAnalysis.cpp
VPlanConstruction.cpp
- VPlanHCFGBuilder.cpp
VPlanRecipes.cpp
VPlanSLP.cpp
VPlanTransforms.cpp
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 4f5ee1b5708ac..3f2cbbb26687c 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -59,7 +59,6 @@
#include "VPlan.h"
#include "VPlanAnalysis.h"
#include "VPlanCFG.h"
-#include "VPlanHCFGBuilder.h"
#include "VPlanHelpers.h"
#include "VPlanPatternMatch.h"
#include "VPlanTransforms.h"
@@ -9332,13 +9331,8 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
return !CM.requiresScalarEpilogue(VF.isVector());
},
Range);
- auto Plan = std::make_unique<VPlan>(OrigLoop);
- // Build hierarchical CFG.
- // TODO: Convert to VPlan-transform and consoliate all transforms for VPlan
- // creation.
- VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan);
- HCFGBuilder.buildPlainCFG();
-
+ DenseMap<VPBlockBase *, BasicBlock *> VPB2IRBB;
+ auto Plan = VPlanTransforms::buildPlainCFG(OrigLoop, *LI, VPB2IRBB);
VPlanTransforms::introduceRegions(*Plan, Legal->getWidestInductionType(), PSE,
RequiresScalarEpilogueCheck,
CM.foldTailByMasking(), OrigLoop);
@@ -9417,7 +9411,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
// Handle VPBBs down to the latch.
if (VPBB == LoopRegion->getExiting()) {
- assert(!HCFGBuilder.getIRBBForVPB(VPBB) &&
+ assert(!VPB2IRBB.contains(VPBB) &&
"the latch block shouldn't have a corresponding IRBB");
VPBlockUtils::connectBlocks(PrevVPBB, VPBB);
break;
@@ -9433,7 +9427,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
// FIXME: At the moment, masks need to be placed at the beginning of the
// block, as blends introduced for phi nodes need to use it. The created
// blends should be sunk after the mask recipes.
- RecipeBuilder.createBlockInMask(HCFGBuilder.getIRBBForVPB(VPBB));
+ RecipeBuilder.createBlockInMask(VPB2IRBB.lookup(VPBB));
}
// Convert input VPInstructions to widened recipes.
@@ -9637,12 +9631,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
assert(!OrigLoop->isInnermost());
assert(EnableVPlanNativePath && "VPlan-native path is not enabled.");
- // Create new empty VPlan
- auto Plan = std::make_unique<VPlan>(OrigLoop);
- // Build hierarchical CFG
- VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan);
- HCFGBuilder.buildPlainCFG();
-
+ DenseMap<VPBlockBase *, BasicBlock *> VPB2IRBB;
+ auto Plan = VPlanTransforms::buildPlainCFG(OrigLoop, *LI, VPB2IRBB);
VPlanTransforms::introduceRegions(*Plan, Legal->getWidestInductionType(), PSE,
true, false, OrigLoop);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index fb6f681497fa6..fd604ddda34f8 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -17,10 +17,369 @@
#include "VPlanDominatorTree.h"
#include "VPlanTransforms.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#define DEBUG_TYPE "loop-vectorize"
+
using namespace llvm;
+namespace {
+/// Class that is used to build the plain CFG for the incoming IR.
+class PlainCFGBuilder {
+private:
+ // The outermost loop of the input loop nest considered for vectorization.
+ Loop *TheLoop;
+
+ // Loop Info analysis.
+ LoopInfo &LI;
+
+ // Vectorization plan that we are working on.
+ VPlan &Plan;
+
+ // Builder of the VPlan instruction-level representation.
+ VPBuilder VPIRBuilder;
+
+ // NOTE: The following maps are intentionally destroyed after the plain CFG
+ // construction because subsequent VPlan-to-VPlan transformation may
+ // invalidate them.
+ // Map incoming BasicBlocks to their newly-created VPBasicBlocks.
+ DenseMap<BasicBlock *, VPBasicBlock *> BB2VPBB;
+ // Map incoming Value definitions to their newly-created VPValues.
+ DenseMap<Value *, VPValue *> IRDef2VPValue;
+
+ // Hold phi node's that need to be fixed once the plain CFG has been built.
+ SmallVector<PHINode *, 8> PhisToFix;
+
+ // Utility functions.
+ void setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB);
+ void fixHeaderPhis();
+ VPBasicBlock *getOrCreateVPBB(BasicBlock *BB);
+#ifndef NDEBUG
+ bool isExternalDef(Value *Val);
+#endif
+ VPValue *getOrCreateVPOperand(Value *IRVal);
+ void createVPInstructionsForVPBB(VPBasicBlock *VPBB, BasicBlock *BB);
+
+public:
+ PlainCFGBuilder(Loop *Lp, LoopInfo &LI, VPlan &P)
+ : TheLoop(Lp), LI(LI), Plan(P) {}
+
+ /// Build plain CFG for TheLoop and connects it to Plan's entry.
+ void buildPlainCFG(DenseMap<VPBlockBase *, BasicBlock *> &VPB2IRBB);
+};
+} // anonymous namespace
+
+// Set predecessors of \p VPBB in the same order as they are in \p BB. \p VPBB
+// must have no predecessors.
+void PlainCFGBuilder::setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB) {
+ // Collect VPBB predecessors.
+ SmallVector<VPBlockBase *, 2> VPBBPreds;
+ for (BasicBlock *Pred : predecessors(BB))
+ VPBBPreds.push_back(getOrCreateVPBB(Pred));
+ VPBB->setPredecessors(VPBBPreds);
+}
+
+static bool isHeaderBB(BasicBlock *BB, Loop *L) {
+ return L && BB == L->getHeader();
+}
+
+// Add operands to VPInstructions representing phi nodes from the input IR.
+void PlainCFGBuilder::fixHeaderPhis() {
+ for (auto *Phi : PhisToFix) {
+ assert(IRDef2VPValue.count(Phi) && "Missing VPInstruction for PHINode.");
+ VPValue *VPVal = IRDef2VPValue[Phi];
+ assert(isa<VPWidenPHIRecipe>(VPVal) &&
+ "Expected WidenPHIRecipe for phi node.");
+ auto *VPPhi = cast<VPWidenPHIRecipe>(VPVal);
+ assert(VPPhi->getNumOperands() == 0 &&
+ "Expected VPInstruction with no operands.");
+
+ Loop *L = LI.getLoopFor(Phi->getParent());
+ assert(isHeaderBB(Phi->getParent(), L));
+ // For header phis, make sure the incoming value from the loop
+ // predecessor is the first operand of the recipe.
+ assert(Phi->getNumOperands() == 2 &&
+ "header phi must have exactly 2 operands");
+ BasicBlock *LoopPred = L->getLoopPredecessor();
+ VPPhi->addOperand(
+ getOrCreateVPOperand(Phi->getIncomingValueForBlock(LoopPred)));
+ BasicBlock *LoopLatch = L->getLoopLatch();
+ VPPhi->addOperand(
+ getOrCreateVPOperand(Phi->getIncomingValueForBlock(LoopLatch)));
+ }
+}
+
+static bool isHeaderVPBB(VPBasicBlock *VPBB) {
+ return VPBB->getParent() && VPBB->getParent()->getEntry() == VPBB;
+}
+
+// Create a new empty VPBasicBlock for an incoming BasicBlock or retrieve an
+// existing one if it was already created.
+VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) {
+ if (auto *VPBB = BB2VPBB.lookup(BB)) {
+ // Retrieve existing VPBB.
+ return VPBB;
+ }
+
+ // Create new VPBB.
+ StringRef Name = isHeaderBB(BB, TheLoop) ? "vector.body" : BB->getName();
+ LLVM_DEBUG(dbgs() << "Creating VPBasicBlock for " << Name << "\n");
+ VPBasicBlock *VPBB = Plan.createVPBasicBlock(Name);
+ BB2VPBB[BB] = VPBB;
+ return VPBB;
+}
+
+#ifndef NDEBUG
+// Return true if \p Val is considered an external definition. An external
+// definition is either:
+// 1. A Value that is not an Instruction. This will be refined in the future.
+// 2. An Instruction that is outside of the CFG snippet represented in VPlan,
+// i.e., is not part of: a) the loop nest, b) outermost loop PH and, c)
+// outermost loop exits.
+bool PlainCFGBuilder::isExternalDef(Value *Val) {
+ // All the Values that are not Instructions are considered external
+ // definitions for now.
+ Instruction *Inst = dyn_cast<Instruction>(Val);
+ if (!Inst)
+ return true;
+
+ BasicBlock *InstParent = Inst->getParent();
+ assert(InstParent && "Expected instruction parent.");
+
+ // Check whether Instruction definition is in loop PH.
+ BasicBlock *PH = TheLoop->getLoopPreheader();
+ assert(PH && "Expected loop pre-header.");
+
+ if (InstParent == PH)
+ // Instruction definition is in outermost loop PH.
+ return false;
+
+ // Check whether Instruction definition is in a loop exit.
+ SmallVector<BasicBlock *> ExitBlocks;
+ TheLoop->getExitBlocks(ExitBlocks);
+ if (is_contained(ExitBlocks, InstParent)) {
+ // Instruction definition is in outermost loop exit.
+ return false;
+ }
+
+ // Check whether Instruction definition is in loop body.
+ return !TheLoop->contains(Inst);
+}
+#endif
+
+// Create a new VPValue or retrieve an existing one for the Instruction's
+// operand \p IRVal. This function must only be used to create/retrieve VPValues
+// for *Instruction's operands* and not to create regular VPInstruction's. For
+// the latter, please, look at 'createVPInstructionsForVPBB'.
+VPValue *PlainCFGBuilder::getOrCreateVPOperand(Value *IRVal) {
+ auto VPValIt = IRDef2VPValue.find(IRVal);
+ if (VPValIt != IRDef2VPValue.end())
+ // Operand has an associated VPInstruction or VPValue that was previously
+ // created.
+ return VPValIt->second;
+
+ // Operand doesn't have a previously created VPInstruction/VPValue. This
+ // means that operand is:
+ // A) a definition external to VPlan,
+ // B) any other Value without specific representation in VPlan.
+ // For now, we use VPValue to represent A and B and classify both as external
+ // definitions. We may introduce specific VPValue subclasses for them in the
+ // future.
+ assert(isExternalDef(IRVal) && "Expected external definition as operand.");
+
+ // A and B: Create VPValue and add it to the pool of external definitions and
+ // to the Value->VPValue map.
+ VPValue *NewVPVal = Plan.getOrAddLiveIn(IRVal);
+ IRDef2VPValue[IRVal] = NewVPVal;
+ return NewVPVal;
+}
+
+// Create new VPInstructions in a VPBasicBlock, given its BasicBlock
+// counterpart. This function must be invoked in RPO so that the operands of a
+// VPInstruction in \p BB have been visited before (except for Phi nodes).
+void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
+ BasicBlock *BB) {
+ VPIRBuilder.setInsertPoint(VPBB);
+ // TODO: Model and preserve debug intrinsics in VPlan.
+ for (Instruction &InstRef : BB->instructionsWithoutDebug(false)) {
+ Instruction *Inst = &InstRef;
+
+ // There shouldn't be any VPValue for Inst at this point. Otherwise, we
+ // visited Inst when we shouldn't, breaking the RPO traversal order.
+ assert(!IRDef2VPValue.count(Inst) &&
+ "Instruction shouldn't have been visited.");
+
+ if (auto *Br = dyn_cast<BranchInst>(Inst)) {
+ if (TheLoop->getLoopLatch() == BB ||
+ any_of(successors(BB),
+ [this](BasicBlock *Succ) { return !TheLoop->contains(Succ); }))
+ continue;
+
+ // Conditional branch instruction are represented using BranchOnCond
+ // recipes.
+ if (Br->isConditional()) {
+ VPValue *Cond = getOrCreateVPOperand(Br->getCondition());
+ VPIRBuilder.createNaryOp(VPInstruction::BranchOnCond, {Cond}, Inst);
+ }
+
+ // Skip the rest of the Instruction processing for Branch instructions.
+ continue;
+ }
+
+ if (auto *SI = dyn_cast<SwitchInst>(Inst)) {
+ SmallVector<VPValue *> Ops = {getOrCreateVPOperand(SI->getCondition())};
+ for (auto Case : SI->cases())
+ Ops.push_back(getOrCreateVPOperand(Case.getCaseValue()));
+ VPIRBuilder.createNaryOp(Instruction::Switch, Ops, Inst);
+ continue;
+ }
+
+ VPSingleDefRecipe *NewR;
+ if (auto *Phi = dyn_cast<PHINode>(Inst)) {
+ // Phi node's operands may have not been visited at this point. We create
+ // an empty VPInstruction that we will fix once the whole plain CFG has
+ // been built.
+ NewR = new VPWidenPHIRecipe(Phi, nullptr, Phi->getDebugLoc(), "vec.phi");
+ VPBB->appendRecipe(NewR);
+ if (isHeaderBB(Phi->getParent(), LI.getLoopFor(Phi->getParent()))) {
+ // Header phis need to be fixed after the VPBB for the latch has been
+ // created.
+ PhisToFix.push_back(Phi);
+ } else {
+ // Add operands for VPPhi in the order matching its predecessors in
+ // VPlan.
+ DenseMap<const VPBasicBlock *, VPValue *> VPPredToIncomingValue;
+ for (unsigned I = 0; I != Phi->getNumOperands(); ++I) {
+ VPPredToIncomingValue[BB2VPBB[Phi->getIncomingBlock(I)]] =
+ getOrCreateVPOperand(Phi->getIncomingValue(I));
+ }
+ for (VPBlockBase *Pred : VPBB->getPredecessors())
+ NewR->addOperand(
+ VPPredToIncomingValue.lookup(Pred->getExitingBasicBlock()));
+ }
+ } else {
+ // Translate LLVM-IR operands into VPValue operands and set them in the
+ // new VPInstruction.
+ SmallVector<VPValue *, 4> VPOperands;
+ for (Value *Op : Inst->operands())
+ VPOperands.push_back(getOrCreateVPOperand(Op));
+
+ // Build VPInstruction for any arbitrary Instruction without specific
+ // representation in VPlan.
+ NewR = cast<VPInstruction>(
+ VPIRBuilder.createNaryOp(Inst->getOpcode(), VPOperands, Inst));
+ }
+
+ IRDef2VPValue[Inst] = NewR;
+ }
+}
+
+// Main interface to build the plain CFG.
+void PlainCFGBuilder::buildPlainCFG(
+ DenseMap<VPBlockBase *, BasicBlock *> &VPB2IRBB) {
+ VPIRBasicBlock *Entry = cast<VPIRBasicBlock>(Plan.getEntry());
+ BB2VPBB[Entry->getIRBasicBlock()] = Entry;
+
+ // 1. Scan the body of the loop in a topological order to visit each basic
+ // block after having visited its predecessor basic blocks. Create a VPBB for
+ // each BB and link it to its successor and predecessor VPBBs. Note that
+ // predecessors must be set in the same order as they are in the incomming IR.
+ // Otherwise, there might be problems with existing phi nodes and algorithm
+ // based on predecessors traversal.
+
+ // Loop PH needs to be explicitly visited since it's not taken into account by
+ // LoopBlocksDFS.
+ BasicBlock *ThePreheaderBB = TheLoop->getLoopPreheader();
+ assert((ThePreheaderBB->getTerminator()->getNumSuccessors() == 1) &&
+ "Unexpected loop preheader");
+ for (auto &I : *ThePreheaderBB) {
+ if (I.getType()->isVoidTy())
+ continue;
+ IRDef2VPValue[&I] = Plan.getOrAddLiveIn(&I);
+ }
+
+ LoopBlocksRPO RPO(TheLoop);
+ RPO.perform(&LI);
+
+ for (BasicBlock *BB : RPO) {
+ // Create or retrieve the VPBasicBlock for this BB.
+ VPBasicBlock *VPBB = getOrCreateVPBB(BB);
+ Loop *LoopForBB = LI.getLoopFor(BB);
+ // Set VPBB predecessors in the same order as they are in the incoming BB.
+ setVPBBPredsFromBB(VPBB, BB);
+
+ // Create VPInstructions for BB.
+ createVPInstructionsForVPBB(VPBB, BB);
+
+ // Set VPBB successors. We create empty VPBBs for successors if they don't
+ // exist already. Recipes will be created when the successor is visited
+ // during the RPO traversal.
+ if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
+ SmallVector<VPBlockBase *> Succs = {
+ getOrCreateVPBB(SI->getDefaultDest())};
+ for (auto Case : SI->cases())
+ Succs.push_back(getOrCreateVPBB(Case.getCaseSuccessor()));
+ VPBB->setSuccessors(Succs);
+ continue;
+ }
+ auto *BI = cast<BranchInst>(BB->getTerminator());
+ unsigned NumSuccs = succ_size(BB);
+ if (NumSuccs == 1) {
+ auto *Successor = getOrCreateVPBB(BB->getSingleSuccessor());
+ VPBB->setOneSuccessor(isHeaderVPBB(Successor)
+ ? Successor->getParent()
+ : static_cast<VPBlockBase *>(Successor));
+ continue;
+ }
+ assert(BI->isConditional() && NumSuccs == 2 && BI->isConditional() &&
+ "block must have conditional branch with 2 successors");
+
+ BasicBlock *IRSucc0 = BI->getSuccessor(0);
+ BasicBlock *IRSucc1 = BI->getSuccessor(1);
+ VPBasicBlock *Successor0 = getOrCreateVPBB(IRSucc0);
+ VPBasicBlock *Successor1 = getOrCreateVPBB(IRSucc1);
+
+ // Don't connect any blocks outside the current loop except the latch, which
+ // is handled below.
+ if (LoopForBB &&
+ (LoopForBB == TheLoop || BB != LoopForBB->getLoopLatch())) {
+ if (!LoopForBB->contains(IRSucc0)) {
+ VPBB->setOneSuccessor(Successor1);
+ continue;
+ }
+ if (!LoopForBB->contains(IRSucc1)) {
+ VPBB->setOneSuccessor(Successor0);
+ continue;
+ }
+ }
+
+ VPBB->setTwoSuccessors(Successor0, Successor1);
+ }
+
+ // 2. The whole CFG has been built at this point so all the input Values must
+ // have a VPlan counterpart. Fix VPlan header phi by adding their
+ // corresponding VPlan operands.
+ fixHeaderPhis();
+
+ Plan.getEntry()->setOneSuccessor(getOrCreateVPBB(TheLoop->getHeader()));
+ Plan.getEntry()->setPlan(&Plan);
+
+ for (const auto &[IRBB, VPB] : BB2VPBB)
+ VPB2IRBB[VPB] = IRBB;
+
+ LLVM_DEBUG(Plan.setName("Plain CFG\n"); dbgs() << Plan);
+}
+
+std::unique_ptr<VPlan> VPlanTransforms::buildPlainCFG(
+ Loop *TheLoop, LoopInfo &LI,
+ DenseMap<VPBlockBase *, BasicBlock *> &VPB2IRBB) {
+ auto Plan = std::make_unique<VPlan>(TheLoop);
+ PlainCFGBuilder Builder(TheLoop, LI, *Plan);
+ Builder.buildPlainCFG(VPB2IRBB);
+ return Plan;
+}
+
/// Create and return a new VPRegionBlock for loop starting at \p HeaderVPBB and
/// return it.
static VPRegionBlock *introduceRegion(VPlan &Plan, VPBasicBlock *PreheaderVPBB,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
deleted file mode 100644
index 1dc16d66750ae..0000000000000
--- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
+++ /dev/null
@@ -1,381 +0,0 @@
-//===-- VPlanHCFGBuilder.cpp ----------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// This file implements the construction of a VPlan-based Hierarchical CFG
-/// (H-CFG) for an incoming IR. This construction comprises the following
-/// components and steps:
-//
-/// 1. PlainCFGBuilder class: builds a plain VPBasicBlock-based CFG that
-/// faithfully represents the CFG in the incoming IR.
-/// NOTE: At this point, there is a direct correspondence between all the
-/// VPBasicBlocks created for the initial plain CFG and the incoming
-/// BasicBlocks. However, this might change in the future.
-///
-//===----------------------------------------------------------------------===//
-
-#include "VPlanHCFGBuilder.h"
-#include "LoopVectorizationPlanner.h"
-#include "VPlanCFG.h"
-#include "llvm/Analysis/LoopIterator.h"
-
-#define DEBUG_TYPE "loop-vectorize"
-
-using namespace llvm;
-
-namespace {
-// Class that is used to build the plain CFG for the incoming IR.
-class PlainCFGBuilder {
-private:
- // The outermost loop of the input loop nest considered for vectorization.
- Loop *TheLoop;
-
- // Loop Info analysis.
- LoopInfo *LI;
-
- // Vectorization plan that we are working on.
- VPlan &Plan;
-
- // Builder of the VPlan instruction-level representation.
- VPBuilder VPIRBuilder;
-
- // NOTE: The following maps are intentionally destroyed after the plain CFG
- // construction because subsequent VPlan-to-VPlan transformation may
- // invalidate them.
- // Map incoming BasicBlocks to their newly-created VPBasicBlocks.
- DenseMap<BasicBlock *, VPBasicBlock *> BB2VPBB;
- // Map incoming Value definitions to their newly-created VPValues.
- DenseMap<Value *, VPValue *> IRDef2VPValue;
-
- // Hold phi node's that need to be fixed once the plain CFG has been built.
- SmallVector<PHINode *, 8> PhisToFix;
-
- // Utility functions.
- void setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB);
- void fixHeaderPhis();
- VPBasicBlock *getOrCreateVPBB(BasicBlock *BB);
-#ifndef NDEBUG
- bool isExternalDef(Value *Val);
-#endif
- VPValue *getOrCreateVPOperand(Value *IRVal);
- void createVPInstructionsForVPBB(VPBasicBlock *VPBB, BasicBlock *BB);
-
-public:
- PlainCFGBuilder(Loop *Lp, LoopInfo *LI, VPlan &P)
- : TheLoop(Lp), LI(LI), Plan(P) {}
-
- /// Build plain CFG for TheLoop and connects it to Plan's entry.
- void buildPlainCFG(DenseMap<VPBlockBase *, BasicBlock *> &VPB2IRBB);
-};
-} // anonymous namespace
-
-// Set predecessors of \p VPBB in the same order as they are in \p BB. \p VPBB
-// must have no predecessors.
-void PlainCFGBuilder::setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB) {
- // Collect VPBB predecessors.
- SmallVector<VPBlockBase *, 2> VPBBPreds;
- for (BasicBlock *Pred : predecessors(BB))
- VPBBPreds.push_back(getOrCreateVPBB(Pred));
- VPBB->setPredecessors(VPBBPreds);
-}
-
-static bool isHeaderBB(BasicBlock *BB, Loop *L) {
- return L && BB == L->getHeader();
-}
-
-// Add operands to VPInstructions representing phi nodes from the input IR.
-void PlainCFGBuilder::fixHeaderPhis() {
- for (auto *Phi : PhisToFix) {
- assert(IRDef2VPValue.count(Phi) && "Missing VPInstruction for PHINode.");
- VPValue *VPVal = IRDef2VPValue[Phi];
- assert(isa<VPWidenPHIRecipe>(VPVal) &&
- "Expected WidenPHIRecipe for phi node.");
- auto *VPPhi = cast<VPWidenPHIRecipe>(VPVal);
- assert(VPPhi->getNumOperands() == 0 &&
- "Expected VPInstruction with no operands.");
-
- Loop *L = LI->getLoopFor(Phi->getParent());
- assert(isHeaderBB(Phi->getParent(), L));
- // For header phis, make sure the incoming value from the loop
- // predecessor is the first operand of the recipe.
- assert(Phi->getNumOperands() == 2 &&
- "header phi must have exactly 2 operands");
- BasicBlock *LoopPred = L->getLoopPredecessor();
- VPPhi->addOperand(
- getOrCreateVPOperand(Phi->getIncomingValueForBlock(LoopPred)));
- BasicBlock *LoopLatch = L->getLoopLatch();
- VPPhi->addOperand(
- getOrCreateVPOperand(Phi->getIncomingValueForBlock(LoopLatch)));
- }
-}
-
-static bool isHeaderVPBB(VPBasicBlock *VPBB) {
- return VPBB->getParent() && VPBB->getParent()->getEntry() == VPBB;
-}
-
-// Create a new empty VPBasicBlock for an incoming BasicBlock or retrieve an
-// existing one if it was already created.
-VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) {
- if (auto *VPBB = BB2VPBB.lookup(BB)) {
- // Retrieve existing VPBB.
- return VPBB;
- }
-
- // Create new VPBB.
- StringRef Name = isHeaderBB(BB, TheLoop) ? "vector.body" : BB->getName();
- LLVM_DEBUG(dbgs() << "Creating VPBasicBlock for " << Name << "\n");
- VPBasicBlock *VPBB = Plan.createVPBasicBlock(Name);
- BB2VPBB[BB] = VPBB;
- return VPBB;
-}
-
-#ifndef NDEBUG
-// Return true if \p Val is considered an external definition. An external
-// definition is either:
-// 1. A Value that is not an Instruction. This will be refined in the future.
-// 2. An Instruction that is outside of the CFG snippet represented in VPlan,
-// i.e., is not part of: a) the loop nest, b) outermost loop PH and, c)
-// outermost loop exits.
-bool PlainCFGBuilder::isExternalDef(Value *Val) {
- // All the Values that are not Instructions are considered external
- // definitions for now.
- Instruction *Inst = dyn_cast<Instruction>(Val);
- if (!Inst)
- return true;
-
- BasicBlock *InstParent = Inst->getParent();
- assert(InstParent && "Expected instruction parent.");
-
- // Check whether Instruction definition is in loop PH.
- BasicBlock *PH = TheLoop->getLoopPreheader();
- assert(PH && "Expected loop pre-header.");
-
- if (InstParent == PH)
- // Instruction definition is in outermost loop PH.
- return false;
-
- // Check whether Instruction definition is in a loop exit.
- SmallVector<BasicBlock *> ExitBlocks;
- TheLoop->getExitBlocks(ExitBlocks);
- if (is_contained(ExitBlocks, InstParent)) {
- // Instruction definition is in outermost loop exit.
- return false;
- }
-
- // Check whether Instruction definition is in loop body.
- return !TheLoop->contains(Inst);
-}
-#endif
-
-// Create a new VPValue or retrieve an existing one for the Instruction's
-// operand \p IRVal. This function must only be used to create/retrieve VPValues
-// for *Instruction's operands* and not to create regular VPInstruction's. For
-// the latter, please, look at 'createVPInstructionsForVPBB'.
-VPValue *PlainCFGBuilder::getOrCreateVPOperand(Value *IRVal) {
- auto VPValIt = IRDef2VPValue.find(IRVal);
- if (VPValIt != IRDef2VPValue.end())
- // Operand has an associated VPInstruction or VPValue that was previously
- // created.
- return VPValIt->second;
-
- // Operand doesn't have a previously created VPInstruction/VPValue. This
- // means that operand is:
- // A) a definition external to VPlan,
- // B) any other Value without specific representation in VPlan.
- // For now, we use VPValue to represent A and B and classify both as external
- // definitions. We may introduce specific VPValue subclasses for them in the
- // future.
- assert(isExternalDef(IRVal) && "Expected external definition as operand.");
-
- // A and B: Create VPValue and add it to the pool of external definitions and
- // to the Value->VPValue map.
- VPValue *NewVPVal = Plan.getOrAddLiveIn(IRVal);
- IRDef2VPValue[IRVal] = NewVPVal;
- return NewVPVal;
-}
-
-// Create new VPInstructions in a VPBasicBlock, given its BasicBlock
-// counterpart. This function must be invoked in RPO so that the operands of a
-// VPInstruction in \p BB have been visited before (except for Phi nodes).
-void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
- BasicBlock *BB) {
- VPIRBuilder.setInsertPoint(VPBB);
- // TODO: Model and preserve debug intrinsics in VPlan.
- for (Instruction &InstRef : BB->instructionsWithoutDebug(false)) {
- Instruction *Inst = &InstRef;
-
- // There shouldn't be any VPValue for Inst at this point. Otherwise, we
- // visited Inst when we shouldn't, breaking the RPO traversal order.
- assert(!IRDef2VPValue.count(Inst) &&
- "Instruction shouldn't have been visited.");
-
- if (auto *Br = dyn_cast<BranchInst>(Inst)) {
- if (TheLoop->getLoopLatch() == BB ||
- any_of(successors(BB),
- [this](BasicBlock *Succ) { return !TheLoop->contains(Succ); }))
- continue;
-
- // Conditional branch instruction are represented using BranchOnCond
- // recipes.
- if (Br->isConditional()) {
- VPValue *Cond = getOrCreateVPOperand(Br->getCondition());
- VPIRBuilder.createNaryOp(VPInstruction::BranchOnCond, {Cond}, Inst);
- }
-
- // Skip the rest of the Instruction processing for Branch instructions.
- continue;
- }
-
- if (auto *SI = dyn_cast<SwitchInst>(Inst)) {
- SmallVector<VPValue *> Ops = {getOrCreateVPOperand(SI->getCondition())};
- for (auto Case : SI->cases())
- Ops.push_back(getOrCreateVPOperand(Case.getCaseValue()));
- VPIRBuilder.createNaryOp(Instruction::Switch, Ops, Inst);
- continue;
- }
-
- VPSingleDefRecipe *NewR;
- if (auto *Phi = dyn_cast<PHINode>(Inst)) {
- // Phi node's operands may have not been visited at this point. We create
- // an empty VPInstruction that we will fix once the whole plain CFG has
- // been built.
- NewR = new VPWidenPHIRecipe(Phi, nullptr, Phi->getDebugLoc(), "vec.phi");
- VPBB->appendRecipe(NewR);
- if (isHeaderBB(Phi->getParent(), LI->getLoopFor(Phi->getParent()))) {
- // Header phis need to be fixed after the VPBB for the latch has been
- // created.
- PhisToFix.push_back(Phi);
- } else {
- // Add operands for VPPhi in the order matching its predecessors in
- // VPlan.
- DenseMap<const VPBasicBlock *, VPValue *> VPPredToIncomingValue;
- for (unsigned I = 0; I != Phi->getNumOperands(); ++I) {
- VPPredToIncomingValue[BB2VPBB[Phi->getIncomingBlock(I)]] =
- getOrCreateVPOperand(Phi->getIncomingValue(I));
- }
- for (VPBlockBase *Pred : VPBB->getPredecessors())
- NewR->addOperand(
- VPPredToIncomingValue.lookup(Pred->getExitingBasicBlock()));
- }
- } else {
- // Translate LLVM-IR operands into VPValue operands and set them in the
- // new VPInstruction.
- SmallVector<VPValue *, 4> VPOperands;
- for (Value *Op : Inst->operands())
- VPOperands.push_back(getOrCreateVPOperand(Op));
-
- // Build VPInstruction for any arbitrary Instruction without specific
- // representation in VPlan.
- NewR = cast<VPInstruction>(
- VPIRBuilder.createNaryOp(Inst->getOpcode(), VPOperands, Inst));
- }
-
- IRDef2VPValue[Inst] = NewR;
- }
-}
-
-// Main interface to build the plain CFG.
-void PlainCFGBuilder::buildPlainCFG(
- DenseMap<VPBlockBase *, BasicBlock *> &VPB2IRBB) {
- VPIRBasicBlock *Entry = cast<VPIRBasicBlock>(Plan.getEntry());
- BB2VPBB[Entry->getIRBasicBlock()] = Entry;
-
- // 1. Scan the body of the loop in a topological order to visit each basic
- // block after having visited its predecessor basic blocks. Create a VPBB for
- // each BB and link it to its successor and predecessor VPBBs. Note that
- // predecessors must be set in the same order as they are in the incomming IR.
- // Otherwise, there might be problems with existing phi nodes and algorithm
- // based on predecessors traversal.
-
- // Loop PH needs to be explicitly visited since it's not taken into account by
- // LoopBlocksDFS.
- BasicBlock *ThePreheaderBB = TheLoop->getLoopPreheader();
- assert((ThePreheaderBB->getTerminator()->getNumSuccessors() == 1) &&
- "Unexpected loop preheader");
- for (auto &I : *ThePreheaderBB) {
- if (I.getType()->isVoidTy())
- continue;
- IRDef2VPValue[&I] = Plan.getOrAddLiveIn(&I);
- }
-
- LoopBlocksRPO RPO(TheLoop);
- RPO.perform(LI);
-
- for (BasicBlock *BB : RPO) {
- // Create or retrieve the VPBasicBlock for this BB.
- VPBasicBlock *VPBB = getOrCreateVPBB(BB);
- Loop *LoopForBB = LI->getLoopFor(BB);
- // Set VPBB predecessors in the same order as they are in the incoming BB.
- setVPBBPredsFromBB(VPBB, BB);
-
- // Create VPInstructions for BB.
- createVPInstructionsForVPBB(VPBB, BB);
-
- // Set VPBB successors. We create empty VPBBs for successors if they don't
- // exist already. Recipes will be created when the successor is visited
- // during the RPO traversal.
- if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
- SmallVector<VPBlockBase *> Succs = {
- getOrCreateVPBB(SI->getDefaultDest())};
- for (auto Case : SI->cases())
- Succs.push_back(getOrCreateVPBB(Case.getCaseSuccessor()));
- VPBB->setSuccessors(Succs);
- continue;
- }
- auto *BI = cast<BranchInst>(BB->getTerminator());
- unsigned NumSuccs = succ_size(BB);
- if (NumSuccs == 1) {
- auto *Successor = getOrCreateVPBB(BB->getSingleSuccessor());
- VPBB->setOneSuccessor(isHeaderVPBB(Successor)
- ? Successor->getParent()
- : static_cast<VPBlockBase *>(Successor));
- continue;
- }
- assert(BI->isConditional() && NumSuccs == 2 && BI->isConditional() &&
- "block must have conditional branch with 2 successors");
-
- BasicBlock *IRSucc0 = BI->getSuccessor(0);
- BasicBlock *IRSucc1 = BI->getSuccessor(1);
- VPBasicBlock *Successor0 = getOrCreateVPBB(IRSucc0);
- VPBasicBlock *Successor1 = getOrCreateVPBB(IRSucc1);
-
- // Don't connect any blocks outside the current loop except the latch, which
- // is handled below.
- if (LoopForBB &&
- (LoopForBB == TheLoop || BB != LoopForBB->getLoopLatch())) {
- if (!LoopForBB->contains(IRSucc0)) {
- VPBB->setOneSuccessor(Successor1);
- continue;
- }
- if (!LoopForBB->contains(IRSucc1)) {
- VPBB->setOneSuccessor(Successor0);
- continue;
- }
- }
-
- VPBB->setTwoSuccessors(Successor0, Successor1);
- }
-
- // 2. The whole CFG has been built at this point so all the input Values must
- // have a VPlan counterpart. Fix VPlan header phi by adding their
- // corresponding VPlan operands.
- fixHeaderPhis();
-
- Plan.getEntry()->setOneSuccessor(getOrCreateVPBB(TheLoop->getHeader()));
- Plan.getEntry()->setPlan(&Plan);
-
- for (const auto &[IRBB, VPB] : BB2VPBB)
- VPB2IRBB[VPB] = IRBB;
-
- LLVM_DEBUG(Plan.setName("Plain CFG\n"); dbgs() << Plan);
-}
-
-void VPlanHCFGBuilder::buildPlainCFG() {
- PlainCFGBuilder PCFGBuilder(TheLoop, LI, Plan);
- PCFGBuilder.buildPlainCFG(VPB2IRBB);
-}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
deleted file mode 100644
index f2e90d3f4d9b3..0000000000000
--- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
+++ /dev/null
@@ -1,73 +0,0 @@
-//===-- VPlanHCFGBuilder.h --------------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// This file defines the VPlanHCFGBuilder class which contains the public
-/// interface (buildHierarchicalCFG) to build a VPlan-based Hierarchical CFG
-/// (H-CFG) for an incoming IR.
-///
-/// A H-CFG in VPlan is a control-flow graph whose nodes are VPBasicBlocks
-/// and/or VPRegionBlocks (i.e., other H-CFGs). The outermost H-CFG of a VPlan
-/// consists of a VPRegionBlock, denoted Top Region, which encloses any other
-/// VPBlockBase in the H-CFG. This guarantees that any VPBlockBase in the H-CFG
-/// other than the Top Region will have a parent VPRegionBlock and allows us
-/// to easily add more nodes before/after the main vector loop (such as the
-/// reduction epilogue).
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_VPLANHCFGBUILDER_H
-#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_VPLANHCFGBUILDER_H
-
-#include "llvm/ADT/DenseMap.h"
-
-namespace llvm {
-
-class Loop;
-class LoopInfo;
-class VPlan;
-class VPlanTestIRBase;
-class VPBlockBase;
-class BasicBlock;
-
-/// Main class to build the VPlan H-CFG for an incoming IR.
-class VPlanHCFGBuilder {
- friend VPlanTestIRBase;
-
-private:
- // The outermost loop of the input loop nest considered for vectorization.
- Loop *TheLoop;
-
- // Loop Info analysis.
- LoopInfo *LI;
-
- // The VPlan that will contain the H-CFG we are building.
- VPlan &Plan;
-
- /// Map of create VP blocks to their input IR basic blocks, if they have been
- /// created for a input IR basic block.
- DenseMap<VPBlockBase *, BasicBlock *> VPB2IRBB;
-
-public:
- VPlanHCFGBuilder(Loop *Lp, LoopInfo *LI, VPlan &P)
- : TheLoop(Lp), LI(LI), Plan(P) {}
-
- /// Build plain CFG for TheLoop and connects it to Plan's entry.
- void buildPlainCFG();
-
- /// Return the input IR BasicBlock corresponding to \p VPB. Returns nullptr if
- /// there is no such corresponding block.
- /// FIXME: This is a temporary workaround to drive the createBlockInMask.
- /// Remove once mask creation is done on VPlan.
- BasicBlock *getIRBBForVPB(const VPBlockBase *VPB) const {
- return VPB2IRBB.lookup(VPB);
- }
-};
-} // namespace llvm
-
-#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_VPLANHCFGBUILDER_H
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index ed8b7a08ea187..be519d06d30f0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -52,6 +52,10 @@ struct VPlanTransforms {
verifyVPlanIsValid(Plan);
}
+ static std::unique_ptr<VPlan>
+ buildPlainCFG(Loop *TheLoop, LoopInfo &LI,
+ DenseMap<VPBlockBase *, BasicBlock *> &VPB2IRBB);
+
/// Replace loops in \p Plan's flat CFG with VPRegionBlocks, turing \p Plan's
/// flat CFG into a hierarchical CFG. It also creates a VPValue expression for
/// the original trip count. It will also introduce a dedicated VPBasicBlock
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanSlpTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanSlpTest.cpp
index cd8bd4a3565e4..1ffd1a6a7a9b9 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanSlpTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPlanSlpTest.cpp
@@ -8,7 +8,6 @@
#include "../lib/Transforms/Vectorize/VPlanSLP.h"
#include "../lib/Transforms/Vectorize/VPlan.h"
-#include "../lib/Transforms/Vectorize/VPlanHCFGBuilder.h"
#include "VPlanTestBase.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/VectorUtils.h"
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
index f2d3d37b40ba9..56b6a7b79eeb7 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
+++ b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
@@ -13,7 +13,6 @@
#define LLVM_UNITTESTS_TRANSFORMS_VECTORIZE_VPLANTESTBASE_H
#include "../lib/Transforms/Vectorize/VPlan.h"
-#include "../lib/Transforms/Vectorize/VPlanHCFGBuilder.h"
#include "../lib/Transforms/Vectorize/VPlanTransforms.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
@@ -71,9 +70,8 @@ class VPlanTestIRBase : public testing::Test {
Loop *L = LI->getLoopFor(LoopHeader);
PredicatedScalarEvolution PSE(*SE, *L);
- auto Plan = std::make_unique<VPlan>(L);
- VPlanHCFGBuilder HCFGBuilder(L, LI.get(), *Plan);
- HCFGBuilder.buildPlainCFG();
+ DenseMap<VPBlockBase *, BasicBlock *> VPB2IRBB;
+ auto Plan = VPlanTransforms::buildPlainCFG(L, *LI, VPB2IRBB);
VPlanTransforms::introduceRegions(*Plan, IntegerType::get(*Ctx, 64), PSE,
true, false, L);
return Plan;
>From 5652db74d37f1ac52b70d53c8ae7d1051a6f54ee Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sun, 23 Mar 2025 14:16:27 +0000
Subject: [PATCH 7/9] [VPlan] Add exit opernds early.
---
.../Transforms/Vectorize/LoopVectorize.cpp | 6 +----
.../Vectorize/VPlanConstruction.cpp | 22 +++++++++++++++++++
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 2 +-
.../Transforms/Vectorize/VPlanTransforms.cpp | 18 +++++++--------
4 files changed, 32 insertions(+), 16 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 3f2cbbb26687c..f6b300d263886 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9167,11 +9167,7 @@ collectUsersInExitBlocks(Loop *OrigLoop, VPRecipeBuilder &Builder,
continue;
}
- PHINode &ExitPhi = ExitIRI->getIRPhi();
- BasicBlock *ExitingBB = OrigLoop->getLoopLatch();
- Value *IncomingValue = ExitPhi.getIncomingValueForBlock(ExitingBB);
- VPValue *V = Builder.getVPValueOrAddLiveIn(IncomingValue);
- ExitIRI->addOperand(V);
+ VPValue *V = ExitIRI->getOperand(0);
if (V->isLiveIn())
continue;
assert(V->getDefiningRecipe()->getParent()->getEnclosingLoopRegion() &&
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index fd604ddda34f8..4c32490712c5e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -365,6 +365,23 @@ void PlainCFGBuilder::buildPlainCFG(
Plan.getEntry()->setOneSuccessor(getOrCreateVPBB(TheLoop->getHeader()));
Plan.getEntry()->setPlan(&Plan);
+ for (auto *EB : Plan.getExitBlocks()) {
+ for (VPRecipeBase &R : *EB) {
+ auto *PhiR = cast<VPIRInstruction>(&R);
+ auto *Phi = dyn_cast<PHINode>(&PhiR->getInstruction());
+ if (!Phi)
+ break;
+ for (Value *Inc : Phi->incoming_values())
+ PhiR->addOperand(getOrCreateVPOperand(Inc));
+ if (R.getNumOperands() > 1 &&
+ Phi->getIncomingBlock(0) != TheLoop->getLoopLatch()) {
+ VPValue *Tmp = R.getOperand(0);
+ R.setOperand(0, R.getOperand(1));
+ R.setOperand(1, Tmp);
+ }
+ }
+ }
+
for (const auto &[IRBB, VPB] : BB2VPBB)
VPB2IRBB[VPB] = IRBB;
@@ -451,6 +468,11 @@ void VPlanTransforms::introduceRegions(VPlan &Plan, Type *InductionTy,
VPBlockUtils::connectBlocks(ScalarPH, Plan.getScalarHeader());
if (!RequiresScalarEpilogueCheck) {
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
+ for (auto *EB : Plan.getExitBlocks()) {
+ for (VPRecipeBase &R : *EB)
+ for (unsigned Idx = 0; Idx != R.getNumOperands(); ++Idx)
+ R.setOperand(Idx, Plan.getOrAddLiveIn(PoisonValue::get(InductionTy)));
+ }
return;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 7b5c6b6f6f76e..6ab7fa73f15cf 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -1096,7 +1096,7 @@ InstructionCost VPIRInstruction::computeCost(ElementCount VF,
void VPIRInstruction::extractLastLaneOfOperand(VPBuilder &Builder) {
assert(isa<PHINode>(getInstruction()) &&
"can only add exiting operands to phi nodes");
- assert(getNumOperands() == 1 && "must have a single operand");
+ // assert(getNumOperands() == 1 && "must have a single operand");
VPValue *Exiting = getOperand(0);
if (!Exiting->isLiveIn()) {
LLVMContext &Ctx = getInstruction().getContext();
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 9815dfd31374b..21816814424c6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2410,29 +2410,27 @@ void VPlanTransforms::handleUncountableEarlyExit(
if (!ExitIRI)
break;
- PHINode &ExitPhi = ExitIRI->getIRPhi();
- VPValue *IncomingFromEarlyExit = RecipeBuilder.getVPValueOrAddLiveIn(
- ExitPhi.getIncomingValueForBlock(UncountableExitingBlock));
+ unsigned EarlyExitIdx = 0;
if (OrigLoop->getUniqueExitBlock()) {
+ EarlyExitIdx = 1;
// If there's a unique exit block, VPEarlyExitBlock has 2 predecessors
// (MiddleVPBB and NewMiddle). Add the incoming value from MiddleVPBB
// which is coming from the original latch.
- VPValue *IncomingFromLatch = RecipeBuilder.getVPValueOrAddLiveIn(
- ExitPhi.getIncomingValueForBlock(OrigLoop->getLoopLatch()));
- ExitIRI->addOperand(IncomingFromLatch);
ExitIRI->extractLastLaneOfOperand(MiddleBuilder);
}
+ VPValue *IncomingFromEarlyExit = ExitIRI->getOperand(EarlyExitIdx);
// Add the incoming value from the early exit.
if (!IncomingFromEarlyExit->isLiveIn() && !Plan.hasScalarVFOnly()) {
VPValue *FirstActiveLane = EarlyExitB.createNaryOp(
VPInstruction::FirstActiveLane, {EarlyExitTakenCond}, nullptr,
"first.active.lane");
- IncomingFromEarlyExit = EarlyExitB.createNaryOp(
- Instruction::ExtractElement, {IncomingFromEarlyExit, FirstActiveLane},
- nullptr, "early.exit.value");
+ ExitIRI->setOperand(
+ EarlyExitIdx,
+ EarlyExitB.createNaryOp(Instruction::ExtractElement,
+ {IncomingFromEarlyExit, FirstActiveLane},
+ nullptr, "early.exit.value"));
}
- ExitIRI->addOperand(IncomingFromEarlyExit);
}
MiddleBuilder.createNaryOp(VPInstruction::BranchOnCond, {IsEarlyExitTaken});
>From 317d975da01c83a9c04d0f5980265410b60860d9 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sun, 23 Mar 2025 14:50:48 +0000
Subject: [PATCH 8/9] [VPlan] Retain exit conditions early
---
.../Transforms/Vectorize/LoopVectorize.cpp | 35 ++++++++++---
llvm/lib/Transforms/Vectorize/VPlan.cpp | 9 ++--
.../Vectorize/VPlanConstruction.cpp | 51 +++++++------------
.../Transforms/Vectorize/VPlanTransforms.cpp | 36 ++++++++-----
.../Transforms/Vectorize/VPlanTransforms.h | 3 +-
.../vplan-printing-outer-loop.ll | 5 +-
6 files changed, 81 insertions(+), 58 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index f6b300d263886..5cefa54fba5d7 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9350,6 +9350,24 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
bool HasNUW = !IVUpdateMayOverflow || Style == TailFoldingStyle::None;
addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), HasNUW, DL);
+ if (auto *UncountableExitingBlock =
+ Legal->getUncountableEarlyExitingBlock()) {
+ VPlanTransforms::runPass(VPlanTransforms::handleUncountableEarlyExit, *Plan,
+ *PSE.getSE(), OrigLoop, UncountableExitingBlock);
+ } else {
+ SmallPtrSet<VPBlockBase *, 2> ExitBlocks(Plan->getExitBlocks().begin(),
+ Plan->getExitBlocks().end());
+ for (VPBlockBase *VPBB : to_vector(
+ vp_depth_first_shallow(Plan->getVectorLoopRegion()->getEntry()))) {
+ for (VPBlockBase *EB : ExitBlocks) {
+ if (is_contained(VPBB->getSuccessors(), EB)) {
+ cast<VPBasicBlock>(VPBB)->getTerminator()->eraseFromParent();
+ VPBlockUtils::disconnectBlocks(VPBB, EB);
+ }
+ }
+ }
+ }
+
VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
Builder);
@@ -9528,12 +9546,6 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
R->setOperand(1, WideIV->getStepValue());
}
- if (auto *UncountableExitingBlock =
- Legal->getUncountableEarlyExitingBlock()) {
- VPlanTransforms::runPass(VPlanTransforms::handleUncountableEarlyExit, *Plan,
- *PSE.getSE(), OrigLoop, UncountableExitingBlock,
- RecipeBuilder);
- }
DenseMap<VPValue *, VPValue *> IVEndValues;
addScalarResumePhis(RecipeBuilder, *Plan, IVEndValues);
SetVector<VPIRInstruction *> ExitUsersToFix =
@@ -9631,6 +9643,17 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
auto Plan = VPlanTransforms::buildPlainCFG(OrigLoop, *LI, VPB2IRBB);
VPlanTransforms::introduceRegions(*Plan, Legal->getWidestInductionType(), PSE,
true, false, OrigLoop);
+ SmallPtrSet<VPBlockBase *, 2> ExitBlocks(Plan->getExitBlocks().begin(),
+ Plan->getExitBlocks().end());
+ for (VPBlockBase *VPBB : to_vector(
+ vp_depth_first_shallow(Plan->getVectorLoopRegion()->getEntry()))) {
+ for (VPBlockBase *EB : ExitBlocks) {
+ if (is_contained(VPBB->getSuccessors(), EB)) {
+ cast<VPBasicBlock>(VPBB)->getTerminator()->eraseFromParent();
+ VPBlockUtils::disconnectBlocks(VPBB, EB);
+ }
+ }
+ }
for (ElementCount VF : Range)
Plan->addVF(VF);
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index c97edcf3ecd3a..66775ea2a4615 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -655,9 +655,10 @@ VPBasicBlock::isHeader(const VPDominatorTree &VPDT) const {
return std::nullopt;
for (unsigned Idx : {0, 1}) {
- auto *PreheaderVPBB = cast<VPBasicBlock>(Preds[Idx]);
- auto *LatchVPBB = cast<VPBasicBlock>(Preds[1 - Idx]);
- if (VPDT.dominates(PreheaderVPBB, this) && VPDT.dominates(this, LatchVPBB))
+ auto *PreheaderVPBB = dyn_cast<VPBasicBlock>(Preds[Idx]);
+ auto *LatchVPBB = dyn_cast<VPBasicBlock>(Preds[1 - Idx]);
+ if (PreheaderVPBB && LatchVPBB && VPDT.dominates(PreheaderVPBB, this) &&
+ VPDT.dominates(this, LatchVPBB))
return {std::make_pair(PreheaderVPBB, LatchVPBB)};
}
@@ -871,7 +872,7 @@ VPlan::VPlan(Loop *L) {
ScalarHeader = createVPIRBasicBlock(L->getHeader());
SmallVector<BasicBlock *> IRExitBlocks;
- L->getExitBlocks(IRExitBlocks);
+ L->getUniqueExitBlocks(IRExitBlocks);
for (BasicBlock *EB : IRExitBlocks)
ExitBlocks.push_back(createVPIRBasicBlock(EB));
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 4c32490712c5e..09b93a89a4884 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -122,6 +122,9 @@ VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) {
return VPBB;
}
+ if (!TheLoop->contains(BB))
+ return Plan.getExitBlock(BB);
+
// Create new VPBB.
StringRef Name = isHeaderBB(BB, TheLoop) ? "vector.body" : BB->getName();
LLVM_DEBUG(dbgs() << "Creating VPBasicBlock for " << Name << "\n");
@@ -155,14 +158,6 @@ bool PlainCFGBuilder::isExternalDef(Value *Val) {
// Instruction definition is in outermost loop PH.
return false;
- // Check whether Instruction definition is in a loop exit.
- SmallVector<BasicBlock *> ExitBlocks;
- TheLoop->getExitBlocks(ExitBlocks);
- if (is_contained(ExitBlocks, InstParent)) {
- // Instruction definition is in outermost loop exit.
- return false;
- }
-
// Check whether Instruction definition is in loop body.
return !TheLoop->contains(Inst);
}
@@ -211,11 +206,8 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
"Instruction shouldn't have been visited.");
if (auto *Br = dyn_cast<BranchInst>(Inst)) {
- if (TheLoop->getLoopLatch() == BB ||
- any_of(successors(BB),
- [this](BasicBlock *Succ) { return !TheLoop->contains(Succ); }))
+ if (TheLoop->getLoopLatch() == BB)
continue;
-
// Conditional branch instruction are represented using BranchOnCond
// recipes.
if (Br->isConditional()) {
@@ -305,7 +297,6 @@ void PlainCFGBuilder::buildPlainCFG(
for (BasicBlock *BB : RPO) {
// Create or retrieve the VPBasicBlock for this BB.
VPBasicBlock *VPBB = getOrCreateVPBB(BB);
- Loop *LoopForBB = LI.getLoopFor(BB);
// Set VPBB predecessors in the same order as they are in the incoming BB.
setVPBBPredsFromBB(VPBB, BB);
@@ -339,24 +330,12 @@ void PlainCFGBuilder::buildPlainCFG(
BasicBlock *IRSucc1 = BI->getSuccessor(1);
VPBasicBlock *Successor0 = getOrCreateVPBB(IRSucc0);
VPBasicBlock *Successor1 = getOrCreateVPBB(IRSucc1);
-
- // Don't connect any blocks outside the current loop except the latch, which
- // is handled below.
- if (LoopForBB &&
- (LoopForBB == TheLoop || BB != LoopForBB->getLoopLatch())) {
- if (!LoopForBB->contains(IRSucc0)) {
- VPBB->setOneSuccessor(Successor1);
- continue;
- }
- if (!LoopForBB->contains(IRSucc1)) {
- VPBB->setOneSuccessor(Successor0);
- continue;
- }
- }
-
VPBB->setTwoSuccessors(Successor0, Successor1);
}
+ for (auto *EB : Plan.getExitBlocks()) {
+ setVPBBPredsFromBB(EB, EB->getIRBasicBlock());
+ }
// 2. The whole CFG has been built at this point so all the input Values must
// have a VPlan counterpart. Fix VPlan header phi by adding their
// corresponding VPlan operands.
@@ -413,10 +392,15 @@ static VPRegionBlock *introduceRegion(VPlan &Plan, VPBasicBlock *PreheaderVPBB,
auto *R = Plan.createVPRegionBlock(HeaderVPBB, LatchVPBB, "",
false /*isReplicator*/);
R->setParent(HeaderVPBB->getParent());
+
// All VPBB's reachable shallowly from HeaderVPBB belong to top level loop,
// because VPlan is expected to end at top level latch disconnected above.
- for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPBB))
- VPBB->setParent(R);
+ SmallPtrSet<VPBlockBase *, 2> ExitBlocks(Plan.getExitBlocks().begin(),
+ Plan.getExitBlocks().end());
+ for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPBB)) {
+ if (!ExitBlocks.contains(VPBB))
+ VPBB->setParent(R);
+ }
VPBlockUtils::insertBlockAfter(R, PreheaderVPBB);
if (Succ)
@@ -466,7 +450,11 @@ void VPlanTransforms::introduceRegions(VPlan &Plan, Type *InductionTy,
VPBasicBlock *ScalarPH = Plan.createVPBasicBlock("scalar.ph");
VPBlockUtils::connectBlocks(ScalarPH, Plan.getScalarHeader());
+ BasicBlock *IRExitBlock = TheLoop->getUniqueLatchExitBlock();
+ auto *VPExitBlock = IRExitBlock ? Plan.getExitBlock(IRExitBlock) : nullptr;
if (!RequiresScalarEpilogueCheck) {
+ if (VPExitBlock)
+ VPBlockUtils::disconnectBlocks(MiddleVPBB, VPExitBlock);
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
for (auto *EB : Plan.getExitBlocks()) {
for (VPRecipeBase &R : *EB)
@@ -484,10 +472,7 @@ void VPlanTransforms::introduceRegions(VPlan &Plan, Type *InductionTy,
// 2) If we require a scalar epilogue, there is no conditional branch as
// we unconditionally branch to the scalar preheader. Do nothing.
// 3) Otherwise, construct a runtime check.
- BasicBlock *IRExitBlock = TheLoop->getUniqueLatchExitBlock();
- auto *VPExitBlock = Plan.getExitBlock(IRExitBlock);
// The connection order corresponds to the operands of the conditional branch.
- VPBlockUtils::insertBlockAfter(VPExitBlock, MiddleVPBB);
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
auto *ScalarLatchTerm = TheLoop->getLoopLatch()->getTerminator();
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 21816814424c6..4af913fd34bc6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2368,7 +2368,7 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan) {
void VPlanTransforms::handleUncountableEarlyExit(
VPlan &Plan, ScalarEvolution &SE, Loop *OrigLoop,
- BasicBlock *UncountableExitingBlock, VPRecipeBuilder &RecipeBuilder) {
+ BasicBlock *UncountableExitingBlock) {
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
auto *LatchVPBB = cast<VPBasicBlock>(LoopRegion->getExiting());
VPBuilder Builder(LatchVPBB->getTerminator());
@@ -2379,17 +2379,29 @@ void VPlanTransforms::handleUncountableEarlyExit(
// tracks if the uncountable early exit has been taken. Also split the middle
// block and have it conditionally branch to the early exit block if
// EarlyExitTaken.
- auto *EarlyExitingBranch =
- cast<BranchInst>(UncountableExitingBlock->getTerminator());
- BasicBlock *TrueSucc = EarlyExitingBranch->getSuccessor(0);
- BasicBlock *FalseSucc = EarlyExitingBranch->getSuccessor(1);
- BasicBlock *EarlyExitIRBB =
- !OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc;
- VPIRBasicBlock *VPEarlyExitBlock = Plan.getExitBlock(EarlyExitIRBB);
-
- VPValue *EarlyExitNotTakenCond = RecipeBuilder.getBlockInMask(
- OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc);
- auto *EarlyExitTakenCond = Builder.createNot(EarlyExitNotTakenCond);
+ VPBasicBlock *EEB = nullptr;
+ for (auto *EB : Plan.getExitBlocks()) {
+ for (VPBlockBase *Pred : EB->getPredecessors()) {
+ if (Pred != MiddleVPBB) {
+ EEB = cast<VPBasicBlock>(Pred);
+ break;
+ }
+ }
+ }
+
+ VPBlockBase *TrueSucc = EEB->getSuccessors()[0];
+ VPBlockBase *FalseSucc = EEB->getSuccessors()[1];
+ auto *VPEarlyExitBlock =
+ cast<VPIRBasicBlock>(TrueSucc->getParent() ? FalseSucc : TrueSucc);
+
+ VPValue *EarlyExitCond = EEB->getTerminator()->getOperand(0);
+ auto *EarlyExitTakenCond = TrueSucc == VPEarlyExitBlock
+ ? EarlyExitCond
+ : Builder.createNot(EarlyExitCond);
+
+ EEB->getTerminator()->eraseFromParent();
+ VPBlockUtils::disconnectBlocks(EEB, VPEarlyExitBlock);
+
IsEarlyExitTaken =
Builder.createNaryOp(VPInstruction::AnyOf, {EarlyExitTakenCond});
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index be519d06d30f0..6eba581d0c0bd 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -176,8 +176,7 @@ struct VPlanTransforms {
/// if taken.
static void handleUncountableEarlyExit(VPlan &Plan, ScalarEvolution &SE,
Loop *OrigLoop,
- BasicBlock *UncountableExitingBlock,
- VPRecipeBuilder &RecipeBuilder);
+ BasicBlock *UncountableExitingBlock);
/// Lower abstract recipes to concrete ones, that can be codegen'd.
static void convertToConcreteRecipes(VPlan &Plan);
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
index b4b6d3d760349..5798d7d926e12 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
@@ -31,7 +31,10 @@ define void @foo(i64 %n) {
; CHECK-NEXT: outer.latch:
; CHECK-NEXT: EMIT ir<%outer.iv.next> = add ir<%outer.iv>, ir<1>
; CHECK-NEXT: EMIT ir<%outer.ec> = icmp ir<%outer.iv.next>, ir<8>
-; CHECK-NEXT: Successor(s): vector.body
+; CHECK-NEXT: Successor(s): ir-bb<exit>, vector.body
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<exit>:
+; CHECK-NEXT: No successors
; CHECK-NEXT: }
entry:
br label %outer.header
>From a06af468385d480bf7039c3e9602e97b80fe0717 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sat, 22 Feb 2025 19:15:32 +0000
Subject: [PATCH 9/9] [VPlan] Move predication to VPlanTransform (NFC) (WIP).
This patch moves the logic to predicate and linearize a VPlan to a
dedicated VPlan transform.
The main logic to perform predication is ready to review, although
there are few things to note that should be improved, either directly in
the PR or in the future:
* Edge and block masks are cached in VPRecipeBuilder, so they can be
accessed during recipe construction. A better alternative may be to
add mask operands to all VPInstructions that need them and use that
during recipe construction
* The mask caching in a map also means that this map needs updating
each time a new recipe replaces a VPInstruction; this would also be
handled by adding mask operands.
Currently this is still WIP due to early-exit loop handling not working
due to the exit conditions not being available in the initial VPlans.
This will be fixed with https://github.com/llvm/llvm-project/pull/128419
and follow-ups
All tests except early-exit loops are passing
---
llvm/lib/Transforms/Vectorize/CMakeLists.txt | 1 +
.../Transforms/Vectorize/LoopVectorize.cpp | 332 ++++--------------
.../Transforms/Vectorize/VPRecipeBuilder.h | 45 +--
.../Vectorize/VPlanConstruction.cpp | 29 +-
.../Transforms/Vectorize/VPlanPredicator.cpp | 255 ++++++++++++++
.../Transforms/Vectorize/VPlanTransforms.h | 7 +-
.../Transforms/Vectorize/VPlanTestBase.h | 3 +-
7 files changed, 354 insertions(+), 318 deletions(-)
create mode 100644 llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
diff --git a/llvm/lib/Transforms/Vectorize/CMakeLists.txt b/llvm/lib/Transforms/Vectorize/CMakeLists.txt
index 0dc6a7d2f594f..e6c7142edd100 100644
--- a/llvm/lib/Transforms/Vectorize/CMakeLists.txt
+++ b/llvm/lib/Transforms/Vectorize/CMakeLists.txt
@@ -24,6 +24,7 @@ add_llvm_component_library(LLVMVectorize
VPlan.cpp
VPlanAnalysis.cpp
VPlanConstruction.cpp
+ VPlanPredicator.cpp
VPlanRecipes.cpp
VPlanSLP.cpp
VPlanTransforms.cpp
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 5cefa54fba5d7..fc0321d0f820f 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8136,185 +8136,6 @@ void EpilogueVectorizerEpilogueLoop::printDebugTracesAtEnd() {
});
}
-void VPRecipeBuilder::createSwitchEdgeMasks(SwitchInst *SI) {
- BasicBlock *Src = SI->getParent();
- assert(!OrigLoop->isLoopExiting(Src) &&
- all_of(successors(Src),
- [this](BasicBlock *Succ) {
- return OrigLoop->getHeader() != Succ;
- }) &&
- "unsupported switch either exiting loop or continuing to header");
- // Create masks where the terminator in Src is a switch. We create mask for
- // all edges at the same time. This is more efficient, as we can create and
- // collect compares for all cases once.
- VPValue *Cond = getVPValueOrAddLiveIn(SI->getCondition());
- BasicBlock *DefaultDst = SI->getDefaultDest();
- MapVector<BasicBlock *, SmallVector<VPValue *>> Dst2Compares;
- for (auto &C : SI->cases()) {
- BasicBlock *Dst = C.getCaseSuccessor();
- assert(!EdgeMaskCache.contains({Src, Dst}) && "Edge masks already created");
- // Cases whose destination is the same as default are redundant and can be
- // ignored - they will get there anyhow.
- if (Dst == DefaultDst)
- continue;
- auto &Compares = Dst2Compares[Dst];
- VPValue *V = getVPValueOrAddLiveIn(C.getCaseValue());
- Compares.push_back(Builder.createICmp(CmpInst::ICMP_EQ, Cond, V));
- }
-
- // We need to handle 2 separate cases below for all entries in Dst2Compares,
- // which excludes destinations matching the default destination.
- VPValue *SrcMask = getBlockInMask(Src);
- VPValue *DefaultMask = nullptr;
- for (const auto &[Dst, Conds] : Dst2Compares) {
- // 1. Dst is not the default destination. Dst is reached if any of the cases
- // with destination == Dst are taken. Join the conditions for each case
- // whose destination == Dst using an OR.
- VPValue *Mask = Conds[0];
- for (VPValue *V : ArrayRef<VPValue *>(Conds).drop_front())
- Mask = Builder.createOr(Mask, V);
- if (SrcMask)
- Mask = Builder.createLogicalAnd(SrcMask, Mask);
- EdgeMaskCache[{Src, Dst}] = Mask;
-
- // 2. Create the mask for the default destination, which is reached if none
- // of the cases with destination != default destination are taken. Join the
- // conditions for each case where the destination is != Dst using an OR and
- // negate it.
- DefaultMask = DefaultMask ? Builder.createOr(DefaultMask, Mask) : Mask;
- }
-
- if (DefaultMask) {
- DefaultMask = Builder.createNot(DefaultMask);
- if (SrcMask)
- DefaultMask = Builder.createLogicalAnd(SrcMask, DefaultMask);
- }
- EdgeMaskCache[{Src, DefaultDst}] = DefaultMask;
-}
-
-VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) {
- assert(is_contained(predecessors(Dst), Src) && "Invalid edge");
-
- // Look for cached value.
- std::pair<BasicBlock *, BasicBlock *> Edge(Src, Dst);
- EdgeMaskCacheTy::iterator ECEntryIt = EdgeMaskCache.find(Edge);
- if (ECEntryIt != EdgeMaskCache.end())
- return ECEntryIt->second;
-
- if (auto *SI = dyn_cast<SwitchInst>(Src->getTerminator())) {
- createSwitchEdgeMasks(SI);
- assert(EdgeMaskCache.contains(Edge) && "Mask for Edge not created?");
- return EdgeMaskCache[Edge];
- }
-
- VPValue *SrcMask = getBlockInMask(Src);
-
- // The terminator has to be a branch inst!
- BranchInst *BI = dyn_cast<BranchInst>(Src->getTerminator());
- assert(BI && "Unexpected terminator found");
- if (!BI->isConditional() || BI->getSuccessor(0) == BI->getSuccessor(1))
- return EdgeMaskCache[Edge] = SrcMask;
-
- // If source is an exiting block, we know the exit edge is dynamically dead
- // in the vector loop, and thus we don't need to restrict the mask. Avoid
- // adding uses of an otherwise potentially dead instruction unless we are
- // vectorizing a loop with uncountable exits. In that case, we always
- // materialize the mask.
- if (OrigLoop->isLoopExiting(Src) &&
- Src != Legal->getUncountableEarlyExitingBlock())
- return EdgeMaskCache[Edge] = SrcMask;
-
- VPValue *EdgeMask = getVPValueOrAddLiveIn(BI->getCondition());
- assert(EdgeMask && "No Edge Mask found for condition");
-
- if (BI->getSuccessor(0) != Dst)
- EdgeMask = Builder.createNot(EdgeMask, BI->getDebugLoc());
-
- if (SrcMask) { // Otherwise block in-mask is all-one, no need to AND.
- // The bitwise 'And' of SrcMask and EdgeMask introduces new UB if SrcMask
- // is false and EdgeMask is poison. Avoid that by using 'LogicalAnd'
- // instead which generates 'select i1 SrcMask, i1 EdgeMask, i1 false'.
- EdgeMask = Builder.createLogicalAnd(SrcMask, EdgeMask, BI->getDebugLoc());
- }
-
- return EdgeMaskCache[Edge] = EdgeMask;
-}
-
-VPValue *VPRecipeBuilder::getEdgeMask(BasicBlock *Src, BasicBlock *Dst) const {
- assert(is_contained(predecessors(Dst), Src) && "Invalid edge");
-
- // Look for cached value.
- std::pair<BasicBlock *, BasicBlock *> Edge(Src, Dst);
- EdgeMaskCacheTy::const_iterator ECEntryIt = EdgeMaskCache.find(Edge);
- assert(ECEntryIt != EdgeMaskCache.end() &&
- "looking up mask for edge which has not been created");
- return ECEntryIt->second;
-}
-
-void VPRecipeBuilder::createHeaderMask() {
- BasicBlock *Header = OrigLoop->getHeader();
-
- // When not folding the tail, use nullptr to model all-true mask.
- if (!CM.foldTailByMasking()) {
- BlockMaskCache[Header] = nullptr;
- return;
- }
-
- // Introduce the early-exit compare IV <= BTC to form header block mask.
- // This is used instead of IV < TC because TC may wrap, unlike BTC. Start by
- // constructing the desired canonical IV in the header block as its first
- // non-phi instructions.
-
- VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
- auto NewInsertionPoint = HeaderVPBB->getFirstNonPhi();
- auto *IV = new VPWidenCanonicalIVRecipe(Plan.getCanonicalIV());
- HeaderVPBB->insert(IV, NewInsertionPoint);
-
- VPBuilder::InsertPointGuard Guard(Builder);
- Builder.setInsertPoint(HeaderVPBB, NewInsertionPoint);
- VPValue *BlockMask = nullptr;
- VPValue *BTC = Plan.getOrCreateBackedgeTakenCount();
- BlockMask = Builder.createICmp(CmpInst::ICMP_ULE, IV, BTC);
- BlockMaskCache[Header] = BlockMask;
-}
-
-VPValue *VPRecipeBuilder::getBlockInMask(BasicBlock *BB) const {
- // Return the cached value.
- BlockMaskCacheTy::const_iterator BCEntryIt = BlockMaskCache.find(BB);
- assert(BCEntryIt != BlockMaskCache.end() &&
- "Trying to access mask for block without one.");
- return BCEntryIt->second;
-}
-
-void VPRecipeBuilder::createBlockInMask(BasicBlock *BB) {
- assert(OrigLoop->contains(BB) && "Block is not a part of a loop");
- assert(BlockMaskCache.count(BB) == 0 && "Mask for block already computed");
- assert(OrigLoop->getHeader() != BB &&
- "Loop header must have cached block mask");
-
- // All-one mask is modelled as no-mask following the convention for masked
- // load/store/gather/scatter. Initialize BlockMask to no-mask.
- VPValue *BlockMask = nullptr;
- // This is the block mask. We OR all unique incoming edges.
- for (auto *Predecessor :
- SetVector<BasicBlock *>(pred_begin(BB), pred_end(BB))) {
- VPValue *EdgeMask = createEdgeMask(Predecessor, BB);
- if (!EdgeMask) { // Mask of predecessor is all-one so mask of block is too.
- BlockMaskCache[BB] = EdgeMask;
- return;
- }
-
- if (!BlockMask) { // BlockMask has its initialized nullptr value.
- BlockMask = EdgeMask;
- continue;
- }
-
- BlockMask = Builder.createOr(BlockMask, EdgeMask, {});
- }
-
- BlockMaskCache[BB] = BlockMask;
-}
-
VPWidenMemoryRecipe *
VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
VFRange &Range) {
@@ -8339,7 +8160,7 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
VPValue *Mask = nullptr;
if (Legal->isMaskRequired(I))
- Mask = getBlockInMask(I->getParent());
+ Mask = getBlockInMask(Builder.getInsertBlock());
// Determine if the pointer operand of the access is either consecutive or
// reverse consecutive.
@@ -8458,38 +8279,6 @@ VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionTruncate(
return nullptr;
}
-VPBlendRecipe *VPRecipeBuilder::tryToBlend(PHINode *Phi,
- ArrayRef<VPValue *> Operands) {
- unsigned NumIncoming = Phi->getNumIncomingValues();
-
- // We know that all PHIs in non-header blocks are converted into selects, so
- // we don't have to worry about the insertion order and we can just use the
- // builder. At this point we generate the predication tree. There may be
- // duplications since this is a simple recursive scan, but future
- // optimizations will clean it up.
-
- // Map incoming IR BasicBlocks to incoming VPValues, for lookup below.
- // TODO: Add operands and masks in order from the VPlan predecessors.
- DenseMap<BasicBlock *, VPValue *> VPIncomingValues;
- for (const auto &[Idx, Pred] : enumerate(predecessors(Phi->getParent())))
- VPIncomingValues[Pred] = Operands[Idx];
-
- SmallVector<VPValue *, 2> OperandsWithMask;
- for (unsigned In = 0; In < NumIncoming; In++) {
- BasicBlock *Pred = Phi->getIncomingBlock(In);
- OperandsWithMask.push_back(VPIncomingValues.lookup(Pred));
- VPValue *EdgeMask = getEdgeMask(Pred, Phi->getParent());
- if (!EdgeMask) {
- assert(In == 0 && "Both null and non-null edge masks found");
- assert(all_equal(Operands) &&
- "Distinct incoming values with one having a full mask");
- break;
- }
- OperandsWithMask.push_back(EdgeMask);
- }
- return new VPBlendRecipe(Phi, OperandsWithMask);
-}
-
VPSingleDefRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
ArrayRef<VPValue *> Operands,
VFRange &Range) {
@@ -8565,7 +8354,7 @@ VPSingleDefRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
// all-true mask.
VPValue *Mask = nullptr;
if (Legal->isMaskRequired(CI))
- Mask = getBlockInMask(CI->getParent());
+ Mask = getBlockInMask(Builder.getInsertBlock());
else
Mask = Plan.getOrAddLiveIn(
ConstantInt::getTrue(IntegerType::getInt1Ty(CI->getContext())));
@@ -8607,7 +8396,7 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
// div/rem operation itself. Otherwise fall through to general handling below.
if (CM.isPredicatedInst(I)) {
SmallVector<VPValue *> Ops(Operands);
- VPValue *Mask = getBlockInMask(I->getParent());
+ VPValue *Mask = getBlockInMask(Builder.getInsertBlock());
VPValue *One =
Plan.getOrAddLiveIn(ConstantInt::get(I->getType(), 1u, false));
auto *SafeRHS = Builder.createSelect(Mask, Ops[1], One, I->getDebugLoc());
@@ -8689,7 +8478,7 @@ VPRecipeBuilder::tryToWidenHistogram(const HistogramInfo *HI,
// In case of predicated execution (due to tail-folding, or conditional
// execution, or both), pass the relevant mask.
if (Legal->isMaskRequired(HI->Store))
- HGramOps.push_back(getBlockInMask(HI->Store->getParent()));
+ HGramOps.push_back(getBlockInMask(Builder.getInsertBlock()));
return new VPHistogramRecipe(Opcode,
make_range(HGramOps.begin(), HGramOps.end()),
@@ -8745,7 +8534,7 @@ VPRecipeBuilder::handleReplication(Instruction *I, ArrayRef<VPValue *> Operands,
// added initially. Masked replicate recipes will later be placed under an
// if-then construct to prevent side-effects. Generate recipes to compute
// the block mask for this region.
- BlockInMask = getBlockInMask(I->getParent());
+ BlockInMask = getBlockInMask(Builder.getInsertBlock());
}
// Note that there is some custom logic to mark some intrinsics as uniform
@@ -8878,9 +8667,8 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(
// nodes, calls and memory operations.
VPRecipeBase *Recipe;
if (auto *Phi = dyn_cast<PHINode>(Instr)) {
- if (Phi->getParent() != OrigLoop->getHeader())
- return tryToBlend(Phi, Operands);
-
+ assert(Phi->getParent() == OrigLoop->getHeader() &&
+ "Non-header phis should have been handled during predication");
assert(Operands.size() == 2 && "Must have 2 operands for header phis");
if ((Recipe = tryToOptimizeInductionPHI(Phi, Operands, Range)))
return Recipe;
@@ -8985,7 +8773,7 @@ VPRecipeBuilder::tryToCreatePartialReduction(Instruction *Reduction,
ReductionOpcode == Instruction::Sub) &&
"Expected an ADD or SUB operation for predicated partial "
"reductions (because the neutral element in the mask is zero)!");
- VPValue *Mask = getBlockInMask(Reduction->getParent());
+ VPValue *Mask = getBlockInMask(Builder.getInsertBlock());
VPValue *Zero =
Plan.getOrAddLiveIn(ConstantInt::get(Reduction->getType(), 0));
BinOp = Builder.createSelect(Mask, BinOp, Zero, Reduction->getDebugLoc());
@@ -9327,8 +9115,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
return !CM.requiresScalarEpilogue(VF.isVector());
},
Range);
- DenseMap<VPBlockBase *, BasicBlock *> VPB2IRBB;
- auto Plan = VPlanTransforms::buildPlainCFG(OrigLoop, *LI, VPB2IRBB);
+ auto Plan = VPlanTransforms::buildPlainCFG(OrigLoop, *LI);
VPlanTransforms::introduceRegions(*Plan, Legal->getWidestInductionType(), PSE,
RequiresScalarEpilogueCheck,
CM.foldTailByMasking(), OrigLoop);
@@ -9368,9 +9155,6 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
}
}
- VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
- Builder);
-
// ---------------------------------------------------------------------------
// Pre-construction: record ingredients whose recipes we'll need to further
// process after constructing the initial VPlan.
@@ -9411,39 +9195,55 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
return Legal->blockNeedsPredication(BB) || NeedsBlends;
});
- RecipeBuilder.collectScaledReductions(Range);
auto *MiddleVPBB = Plan->getMiddleBlock();
+ VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
+ Builder);
+ if (NeedsMasks) {
+ VPlanTransforms::predicateAndLinearize(*Plan, CM.foldTailByMasking(),
+ RecipeBuilder);
+ }
+
+ {
+ VPBlockBase *PrevVPBB = nullptr;
+ VPBasicBlock *Header = LoopRegion->getEntryBasicBlock();
+ ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>>
+ RPOT(Header);
+
+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
+ // Handle VPBBs down to the latch.
+ if (VPBB == LoopRegion->getExiting()) {
+ VPBlockUtils::connectBlocks(PrevVPBB, VPBB);
+ break;
+ }
+
+ auto Successors = to_vector(VPBB->getSuccessors());
+ if (Successors.size() > 1)
+ VPBB->getTerminator()->eraseFromParent();
+
+ // Flatten the CFG in the loop. Masks for blocks have already been
+ // generated and added to recipes as needed. To do so, first disconnect
+ // VPBB from its successors. Then connect VPBB to the previously visited
+ // VPBB.
+ for (auto *Succ : Successors)
+ VPBlockUtils::disconnectBlocks(VPBB, Succ);
+ if (PrevVPBB)
+ VPBlockUtils::connectBlocks(PrevVPBB, VPBB);
+
+ PrevVPBB = VPBB;
+ }
+ }
+
+ RecipeBuilder.collectScaledReductions(Range);
+
// Scan the body of the loop in a topological order to visit each basic block
// after having visited its predecessor basic blocks.
ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
HeaderVPBB);
VPBasicBlock::iterator MBIP = MiddleVPBB->getFirstNonPhi();
- VPBlockBase *PrevVPBB = nullptr;
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
- // Handle VPBBs down to the latch.
- if (VPBB == LoopRegion->getExiting()) {
- assert(!VPB2IRBB.contains(VPBB) &&
- "the latch block shouldn't have a corresponding IRBB");
- VPBlockUtils::connectBlocks(PrevVPBB, VPBB);
- break;
- }
-
- // Create mask based on the IR BB corresponding to VPBB.
- // TODO: Predicate directly based on VPlan.
- Builder.setInsertPoint(VPBB, VPBB->begin());
- if (VPBB == HeaderVPBB) {
- Builder.setInsertPoint(VPBB, VPBB->getFirstNonPhi());
- RecipeBuilder.createHeaderMask();
- } else if (NeedsMasks) {
- // FIXME: At the moment, masks need to be placed at the beginning of the
- // block, as blends introduced for phi nodes need to use it. The created
- // blends should be sunk after the mask recipes.
- RecipeBuilder.createBlockInMask(VPB2IRBB.lookup(VPBB));
- }
-
// Convert input VPInstructions to widened recipes.
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
auto *SingleDef = cast<VPSingleDefRecipe>(&R);
@@ -9453,7 +9253,8 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
// latter are added above for masking.
// FIXME: Migrate code relying on the underlying instruction from VPlan0
// to construct recipes below to not use the underlying instruction.
- if (isa<VPCanonicalIVPHIRecipe, VPWidenCanonicalIVRecipe>(&R) ||
+ if (isa<VPCanonicalIVPHIRecipe, VPWidenCanonicalIVRecipe, VPBlendRecipe>(
+ &R) ||
(isa<VPInstruction>(&R) && !UnderlyingValue))
continue;
@@ -9462,14 +9263,6 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
assert((isa<VPWidenPHIRecipe>(&R) || isa<VPInstruction>(&R)) &&
UnderlyingValue && "unsupported recipe");
- if (isa<VPInstruction>(&R) &&
- (cast<VPInstruction>(&R)->getOpcode() ==
- VPInstruction::BranchOnCond ||
- (cast<VPInstruction>(&R)->getOpcode() == Instruction::Switch))) {
- R.eraseFromParent();
- break;
- }
-
// TODO: Gradually replace uses of underlying instruction by analyses on
// VPlan.
Instruction *Instr = cast<Instruction>(UnderlyingValue);
@@ -9505,22 +9298,17 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
} else {
Builder.insert(Recipe);
}
- if (Recipe->getNumDefinedValues() == 1)
+ if (Recipe->getNumDefinedValues() == 1) {
SingleDef->replaceAllUsesWith(Recipe->getVPSingleValue());
- else
+ for (auto &[_, V] : RecipeBuilder.BlockMaskCache) {
+ if (V == SingleDef)
+ V = Recipe->getVPSingleValue();
+ }
+ } else
assert(Recipe->getNumDefinedValues() == 0 &&
"Unexpected multidef recipe");
R.eraseFromParent();
}
-
- // Flatten the CFG in the loop. Masks for blocks have already been generated
- // and added to recipes as needed. To do so, first disconnect VPBB from its
- // successors. Then connect VPBB to the previously visited VPBB.
- for (auto *Succ : to_vector(VPBB->getSuccessors()))
- VPBlockUtils::disconnectBlocks(VPBB, Succ);
- if (PrevVPBB)
- VPBlockUtils::connectBlocks(PrevVPBB, VPBB);
- PrevVPBB = VPBB;
}
assert(isa<VPRegionBlock>(Plan->getVectorLoopRegion()) &&
@@ -9639,8 +9427,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
assert(!OrigLoop->isInnermost());
assert(EnableVPlanNativePath && "VPlan-native path is not enabled.");
- DenseMap<VPBlockBase *, BasicBlock *> VPB2IRBB;
- auto Plan = VPlanTransforms::buildPlainCFG(OrigLoop, *LI, VPB2IRBB);
+ auto Plan = VPlanTransforms::buildPlainCFG(OrigLoop, *LI);
VPlanTransforms::introduceRegions(*Plan, Legal->getWidestInductionType(), PSE,
true, false, OrigLoop);
SmallPtrSet<VPBlockBase *, 2> ExitBlocks(Plan->getExitBlocks().begin(),
@@ -9823,7 +9610,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
BasicBlock *BB = CurrentLinkI->getParent();
VPValue *CondOp = nullptr;
if (CM.blockNeedsPredicationForAnyReason(BB))
- CondOp = RecipeBuilder.getBlockInMask(BB);
+ CondOp = RecipeBuilder.getBlockInMask(CurrentLink->getParent());
// Non-FP RdxDescs will have all fast math flags set, so clear them.
FastMathFlags FMFs = isa<FPMathOperator>(CurrentLinkI)
@@ -9862,7 +9649,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
// different numbers of lanes. Partial reductions mask the input instead.
if (!PhiR->isInLoop() && CM.foldTailByMasking() &&
!isa<VPPartialReductionRecipe>(OrigExitingVPV->getDefiningRecipe())) {
- VPValue *Cond = RecipeBuilder.getBlockInMask(OrigLoop->getHeader());
+ VPValue *Cond =
+ RecipeBuilder.getBlockInMask(VectorLoopRegion->getEntryBasicBlock());
assert(OrigExitingVPV->getDefiningRecipe()->getParent() != LatchVPBB &&
"reduction recipe must be defined before latch");
Type *PhiTy = PhiR->getOperand(0)->getLiveInIRValue()->getType();
diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
index 334cfbad8bd7c..9900c4117c5f6 100644
--- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
+++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
@@ -73,11 +73,14 @@ class VPRecipeBuilder {
/// if-conversion currently takes place during VPlan-construction, so these
/// caches are only used at that stage.
using EdgeMaskCacheTy =
- DenseMap<std::pair<BasicBlock *, BasicBlock *>, VPValue *>;
- using BlockMaskCacheTy = DenseMap<BasicBlock *, VPValue *>;
+ DenseMap<std::pair<VPBasicBlock *, VPBasicBlock *>, VPValue *>;
+ using BlockMaskCacheTy = DenseMap<VPBasicBlock *, VPValue *>;
EdgeMaskCacheTy EdgeMaskCache;
+
+public:
BlockMaskCacheTy BlockMaskCache;
+private:
// VPlan construction support: Hold a mapping from ingredients to
// their recipe.
DenseMap<Instruction *, VPRecipeBase *> Ingredient2Recipe;
@@ -114,11 +117,6 @@ class VPRecipeBuilder {
tryToOptimizeInductionTruncate(TruncInst *I, ArrayRef<VPValue *> Operands,
VFRange &Range);
- /// Handle non-loop phi nodes. Return a new VPBlendRecipe otherwise. Currently
- /// all such phi nodes are turned into a sequence of select instructions as
- /// the vectorizer currently performs full if-conversion.
- VPBlendRecipe *tryToBlend(PHINode *Phi, ArrayRef<VPValue *> Operands);
-
/// Handle call instructions. If \p CI can be widened for \p Range.Start,
/// return a new VPWidenCallRecipe or VPWidenIntrinsicRecipe. Range.End may be
/// decreased to ensure same decision from \p Range.Start to \p Range.End.
@@ -187,27 +185,20 @@ class VPRecipeBuilder {
Ingredient2Recipe[I] = R;
}
- /// Create the mask for the vector loop header block.
- void createHeaderMask();
-
- /// A helper function that computes the predicate of the block BB, assuming
- /// that the header block of the loop is set to True or the loop mask when
- /// tail folding.
- void createBlockInMask(BasicBlock *BB);
-
+ void setBlockInMask(VPBasicBlock *BB, VPValue *Mask) {
+ assert(!BlockMaskCache.contains(BB) && "Mask already set");
+ BlockMaskCache[BB] = Mask;
+ }
/// Returns the *entry* mask for the block \p BB.
- VPValue *getBlockInMask(BasicBlock *BB) const;
-
- /// Create an edge mask for every destination of cases and/or default.
- void createSwitchEdgeMasks(SwitchInst *SI);
-
- /// A helper function that computes the predicate of the edge between SRC
- /// and DST.
- VPValue *createEdgeMask(BasicBlock *Src, BasicBlock *Dst);
-
- /// A helper that returns the previously computed predicate of the edge
- /// between SRC and DST.
- VPValue *getEdgeMask(BasicBlock *Src, BasicBlock *Dst) const;
+ VPValue *getBlockInMask(VPBasicBlock *BB) const {
+ return BlockMaskCache.lookup(BB);
+ }
+ void setEdgeMask(VPBasicBlock *Src, VPBasicBlock *Dst, VPValue *Mask) {
+ EdgeMaskCache[{Src, Dst}] = Mask;
+ }
+ VPValue *getEdgeMask(VPBasicBlock *Src, VPBasicBlock *Dst) const {
+ return EdgeMaskCache.lookup({Src, Dst});
+ }
/// Return the recipe created for given ingredient.
VPRecipeBase *getRecipe(Instruction *I) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 09b93a89a4884..22914b190a32e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -66,7 +66,7 @@ class PlainCFGBuilder {
: TheLoop(Lp), LI(LI), Plan(P) {}
/// Build plain CFG for TheLoop and connects it to Plan's entry.
- void buildPlainCFG(DenseMap<VPBlockBase *, BasicBlock *> &VPB2IRBB);
+ void buildPlainCFG();
};
} // anonymous namespace
@@ -257,10 +257,16 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
for (Value *Op : Inst->operands())
VPOperands.push_back(getOrCreateVPOperand(Op));
- // Build VPInstruction for any arbitrary Instruction without specific
- // representation in VPlan.
- NewR = cast<VPInstruction>(
- VPIRBuilder.createNaryOp(Inst->getOpcode(), VPOperands, Inst));
+ if (auto *ICmp = dyn_cast<ICmpInst>(Inst)) {
+ NewR = cast<VPInstruction>(VPIRBuilder.createICmp(
+ ICmp->getPredicate(), VPOperands[0], VPOperands[1]));
+ NewR->setUnderlyingValue(ICmp);
+ } else {
+ // Build VPInstruction for any arbitrary Instruction without specific
+ // representation in VPlan.
+ NewR = cast<VPInstruction>(
+ VPIRBuilder.createNaryOp(Inst->getOpcode(), VPOperands, Inst));
+ }
}
IRDef2VPValue[Inst] = NewR;
@@ -268,8 +274,7 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
}
// Main interface to build the plain CFG.
-void PlainCFGBuilder::buildPlainCFG(
- DenseMap<VPBlockBase *, BasicBlock *> &VPB2IRBB) {
+void PlainCFGBuilder::buildPlainCFG() {
VPIRBasicBlock *Entry = cast<VPIRBasicBlock>(Plan.getEntry());
BB2VPBB[Entry->getIRBasicBlock()] = Entry;
@@ -361,18 +366,14 @@ void PlainCFGBuilder::buildPlainCFG(
}
}
- for (const auto &[IRBB, VPB] : BB2VPBB)
- VPB2IRBB[VPB] = IRBB;
-
LLVM_DEBUG(Plan.setName("Plain CFG\n"); dbgs() << Plan);
}
-std::unique_ptr<VPlan> VPlanTransforms::buildPlainCFG(
- Loop *TheLoop, LoopInfo &LI,
- DenseMap<VPBlockBase *, BasicBlock *> &VPB2IRBB) {
+std::unique_ptr<VPlan> VPlanTransforms::buildPlainCFG(Loop *TheLoop,
+ LoopInfo &LI) {
auto Plan = std::make_unique<VPlan>(TheLoop);
PlainCFGBuilder Builder(TheLoop, LI, *Plan);
- Builder.buildPlainCFG(VPB2IRBB);
+ Builder.buildPlainCFG();
return Plan;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
new file mode 100644
index 0000000000000..1a76a32c7bf43
--- /dev/null
+++ b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
@@ -0,0 +1,255 @@
+//===-- VPlanPredicator.cpp - VPlan predicator ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements predication for VPlans.
+///
+//===----------------------------------------------------------------------===//
+
+#include "VPRecipeBuilder.h"
+#include "VPlan.h"
+#include "VPlanCFG.h"
+#include "VPlanTransforms.h"
+#include "VPlanUtils.h"
+#include "llvm/ADT/PostOrderIterator.h"
+
+using namespace llvm;
+
+struct VPPredicator {
+ /// When we if-convert we need to create edge masks. We have to cache values
+ /// so that we don't end up with exponential recursion/IR. Note that
+ /// if-conversion currently takes place during VPlan-construction, so these
+ /// caches are only used at that stage.
+ using EdgeMaskCacheTy =
+ DenseMap<std::pair<VPBasicBlock *, VPBasicBlock *>, VPValue *>;
+ using BlockMaskCacheTy = DenseMap<VPBasicBlock *, VPValue *>;
+
+ VPPredicator(VPRecipeBuilder &RecipeBuilder) : RecipeBuilder(RecipeBuilder) {}
+
+ VPRecipeBuilder &RecipeBuilder;
+
+ VPBuilder Builder;
+ VPValue *createEdgeMask(VPBasicBlock *Src, VPBasicBlock *Dst) {
+ assert(is_contained(Dst->getPredecessors(), Src) && "Invalid edge");
+
+ // Look for cached value.
+ VPValue *EdgeMask = RecipeBuilder.getEdgeMask(Src, Dst);
+ if (EdgeMask)
+ return EdgeMask;
+
+ VPValue *SrcMask = RecipeBuilder.getBlockInMask(Src);
+
+ // The terminator has to be a branch inst!
+ if (Src->empty() || Src->getNumSuccessors() == 1) {
+ RecipeBuilder.setEdgeMask(Src, Dst, SrcMask);
+ return SrcMask;
+ }
+
+ auto *Term = cast<VPInstruction>(Src->getTerminator());
+ if (Term->getOpcode() == Instruction::Switch) {
+ createSwitchEdgeMasks(Term);
+ return RecipeBuilder.getEdgeMask(Src, Dst);
+ }
+
+ auto *BI = cast<VPInstruction>(Src->getTerminator());
+ assert(BI->getOpcode() == VPInstruction::BranchOnCond);
+ if (Src->getSuccessors()[0] == Src->getSuccessors()[1]) {
+ RecipeBuilder.setEdgeMask(Src, Dst, SrcMask);
+ return SrcMask;
+ }
+
+ EdgeMask = BI->getOperand(0);
+ assert(EdgeMask && "No Edge Mask found for condition");
+
+ if (Src->getSuccessors()[0] != Dst)
+ EdgeMask = Builder.createNot(EdgeMask, BI->getDebugLoc());
+
+ if (SrcMask) { // Otherwise block in-mask is all-one, no need to AND.
+ // The bitwise 'And' of SrcMask and EdgeMask introduces new UB if SrcMask
+ // is false and EdgeMask is poison. Avoid that by using 'LogicalAnd'
+ // instead which generates 'select i1 SrcMask, i1 EdgeMask, i1 false'.
+ EdgeMask = Builder.createLogicalAnd(SrcMask, EdgeMask, BI->getDebugLoc());
+ }
+
+ RecipeBuilder.setEdgeMask(Src, Dst, EdgeMask);
+ return EdgeMask;
+ }
+
+ VPValue *createBlockInMask(VPBasicBlock *VPBB) {
+ Builder.setInsertPoint(VPBB, VPBB->begin());
+ // All-one mask is modelled as no-mask following the convention for masked
+ // load/store/gather/scatter. Initialize BlockMask to no-mask.
+ VPValue *BlockMask = nullptr;
+ // This is the block mask. We OR all unique incoming edges.
+ for (auto *Predecessor : SetVector<VPBlockBase *>(
+ VPBB->getPredecessors().begin(), VPBB->getPredecessors().end())) {
+ VPValue *EdgeMask = createEdgeMask(cast<VPBasicBlock>(Predecessor), VPBB);
+ if (!EdgeMask) { // Mask of predecessor is all-one so mask of block is
+ // too.
+ RecipeBuilder.setBlockInMask(VPBB, EdgeMask);
+ return EdgeMask;
+ }
+
+ if (!BlockMask) { // BlockMask has its initialized nullptr value.
+ BlockMask = EdgeMask;
+ continue;
+ }
+
+ BlockMask = Builder.createOr(BlockMask, EdgeMask, {});
+ }
+
+ RecipeBuilder.setBlockInMask(VPBB, BlockMask);
+ return BlockMask;
+ }
+
+ void createHeaderMask(VPBasicBlock *HeaderVPBB, bool FoldTail) {
+ if (!FoldTail) {
+ RecipeBuilder.setBlockInMask(HeaderVPBB, nullptr);
+ return;
+ }
+
+ // Introduce the early-exit compare IV <= BTC to form header block mask.
+ // This is used instead of IV < TC because TC may wrap, unlike BTC. Start by
+ // constructing the desired canonical IV in the header block as its first
+ // non-phi instructions.
+
+ auto NewInsertionPoint = HeaderVPBB->getFirstNonPhi();
+ auto &Plan = *HeaderVPBB->getPlan();
+ auto *IV = new VPWidenCanonicalIVRecipe(Plan.getCanonicalIV());
+ HeaderVPBB->insert(IV, NewInsertionPoint);
+
+ VPBuilder::InsertPointGuard Guard(Builder);
+ Builder.setInsertPoint(HeaderVPBB, NewInsertionPoint);
+ VPValue *BlockMask = nullptr;
+ VPValue *BTC = Plan.getOrCreateBackedgeTakenCount();
+ BlockMask = Builder.createICmp(CmpInst::ICMP_ULE, IV, BTC);
+ RecipeBuilder.setBlockInMask(HeaderVPBB, BlockMask);
+ }
+
+ void createSwitchEdgeMasks(VPInstruction *SI) {
+ VPBasicBlock *Src = SI->getParent();
+
+ // Create masks where the terminator in Src is a switch. We create mask for
+ // all edges at the same time. This is more efficient, as we can create and
+ // collect compares for all cases once.
+ VPValue *Cond = SI->getOperand(0);
+ VPBasicBlock *DefaultDst = cast<VPBasicBlock>(Src->getSuccessors()[0]);
+ MapVector<VPBasicBlock *, SmallVector<VPValue *>> Dst2Compares;
+ for (const auto &[Idx, Succ] :
+ enumerate(ArrayRef(Src->getSuccessors()).drop_front())) {
+ VPBasicBlock *Dst = cast<VPBasicBlock>(Succ);
+ // assert(!EdgeMaskCache.contains({Src, Dst}) && "Edge masks already
+ // created");
+ // Cases whose destination is the same as default are redundant and can
+ // be ignored - they will get there anyhow.
+ if (Dst == DefaultDst)
+ continue;
+ auto &Compares = Dst2Compares[Dst];
+ VPValue *V = SI->getOperand(Idx + 1);
+ Compares.push_back(Builder.createICmp(CmpInst::ICMP_EQ, Cond, V));
+ }
+
+ // We need to handle 2 separate cases below for all entries in Dst2Compares,
+ // which excludes destinations matching the default destination.
+ VPValue *SrcMask = RecipeBuilder.getBlockInMask(Src);
+ VPValue *DefaultMask = nullptr;
+ for (const auto &[Dst, Conds] : Dst2Compares) {
+ // 1. Dst is not the default destination. Dst is reached if any of the
+ // cases with destination == Dst are taken. Join the conditions for each
+ // case whose destination == Dst using an OR.
+ VPValue *Mask = Conds[0];
+ for (VPValue *V : ArrayRef<VPValue *>(Conds).drop_front())
+ Mask = Builder.createOr(Mask, V);
+ if (SrcMask)
+ Mask = Builder.createLogicalAnd(SrcMask, Mask);
+ RecipeBuilder.setEdgeMask(Src, Dst, Mask);
+
+ // 2. Create the mask for the default destination, which is reached if
+ // none of the cases with destination != default destination are taken.
+ // Join the conditions for each case where the destination is != Dst using
+ // an OR and negate it.
+ DefaultMask = DefaultMask ? Builder.createOr(DefaultMask, Mask) : Mask;
+ }
+
+ if (DefaultMask) {
+ DefaultMask = Builder.createNot(DefaultMask);
+ if (SrcMask)
+ DefaultMask = Builder.createLogicalAnd(SrcMask, DefaultMask);
+ }
+ RecipeBuilder.setEdgeMask(Src, DefaultDst, DefaultMask);
+ }
+};
+
+void VPlanTransforms::predicateAndLinearize(VPlan &Plan, bool FoldTail,
+ VPRecipeBuilder &RecipeBuilder) {
+ VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
+ // Scan the body of the loop in a topological order to visit each basic block
+ // after having visited its predecessor basic blocks.
+ VPBasicBlock *Header = LoopRegion->getEntryBasicBlock();
+ ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
+ Header);
+ VPPredicator Predicator(RecipeBuilder);
+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
+ // Handle VPBBs down to the latch.
+ if (VPBB == LoopRegion->getExiting())
+ break;
+
+ if (VPBB == Header) {
+ Predicator.createHeaderMask(Header, FoldTail);
+ continue;
+ }
+ SmallVector<VPWidenPHIRecipe *> Phis;
+ for (VPRecipeBase &R : VPBB->phis())
+ Phis.push_back(cast<VPWidenPHIRecipe>(&R));
+
+ Predicator.createBlockInMask(VPBB);
+
+ for (VPWidenPHIRecipe *Phi : Phis) {
+ PHINode *IRPhi = cast<PHINode>(Phi->getUnderlyingValue());
+
+ unsigned NumIncoming = IRPhi->getNumIncomingValues();
+
+ // We know that all PHIs in non-header blocks are converted into selects,
+ // so we don't have to worry about the insertion order and we can just use
+ // the builder. At this point we generate the predication tree. There may
+ // be duplications since this is a simple recursive scan, but future
+ // optimizations will clean it up.
+
+ // Map incoming IR BasicBlocks to incoming VPValues, for lookup below.
+ // TODO: Add operands and masks in order from the VPlan predecessors.
+ DenseMap<BasicBlock *, VPValue *> VPIncomingValues;
+ DenseMap<BasicBlock *, VPBasicBlock *> VPIncomingBlocks;
+ for (const auto &[Idx, Pred] :
+ enumerate(predecessors(IRPhi->getParent()))) {
+ VPIncomingValues[Pred] = Phi->getOperand(Idx);
+ VPIncomingBlocks[Pred] =
+ cast<VPBasicBlock>(VPBB->getPredecessors()[Idx]);
+ }
+
+ SmallVector<VPValue *, 2> OperandsWithMask;
+ for (unsigned In = 0; In < NumIncoming; In++) {
+ BasicBlock *Pred = IRPhi->getIncomingBlock(In);
+ OperandsWithMask.push_back(VPIncomingValues.lookup(Pred));
+ VPValue *EdgeMask =
+ RecipeBuilder.getEdgeMask(VPIncomingBlocks.lookup(Pred), VPBB);
+ if (!EdgeMask) {
+ assert(In == 0 && "Both null and non-null edge masks found");
+ assert(all_equal(Phi->operands()) &&
+ "Distinct incoming values with one having a full mask");
+ break;
+ }
+ OperandsWithMask.push_back(EdgeMask);
+ }
+ auto *Blend = new VPBlendRecipe(IRPhi, OperandsWithMask);
+ Blend->insertBefore(Phi);
+ Phi->replaceAllUsesWith(Blend);
+ Phi->eraseFromParent();
+ RecipeBuilder.setRecipe(IRPhi, Blend);
+ }
+ }
+}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 6eba581d0c0bd..10a6aba8243e8 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -52,9 +52,7 @@ struct VPlanTransforms {
verifyVPlanIsValid(Plan);
}
- static std::unique_ptr<VPlan>
- buildPlainCFG(Loop *TheLoop, LoopInfo &LI,
- DenseMap<VPBlockBase *, BasicBlock *> &VPB2IRBB);
+ static std::unique_ptr<VPlan> buildPlainCFG(Loop *TheLoop, LoopInfo &LI);
/// Replace loops in \p Plan's flat CFG with VPRegionBlocks, turing \p Plan's
/// flat CFG into a hierarchical CFG. It also creates a VPValue expression for
@@ -203,6 +201,9 @@ struct VPlanTransforms {
/// candidates.
static void narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
unsigned VectorRegWidth);
+
+ static void predicateAndLinearize(VPlan &Plan, bool FoldTail,
+ VPRecipeBuilder &RecipeBuilder);
};
} // namespace llvm
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
index 56b6a7b79eeb7..e360903fc33c0 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
+++ b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
@@ -70,8 +70,7 @@ class VPlanTestIRBase : public testing::Test {
Loop *L = LI->getLoopFor(LoopHeader);
PredicatedScalarEvolution PSE(*SE, *L);
- DenseMap<VPBlockBase *, BasicBlock *> VPB2IRBB;
- auto Plan = VPlanTransforms::buildPlainCFG(L, *LI, VPB2IRBB);
+ auto Plan = VPlanTransforms::buildPlainCFG(L, *LI);
VPlanTransforms::introduceRegions(*Plan, IntegerType::get(*Ctx, 64), PSE,
true, false, L);
return Plan;
More information about the llvm-commits
mailing list