[llvm] [VPlan] Introduce VPScalarPHIRecipe, use for can & EVL IV codegen (NFC). (PR #114305)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Sun Nov 24 11:28:52 PST 2024
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/114305
>From 1740a4f0f8a9a6bddddee0b5e19ca4d5d855003e Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Wed, 30 Oct 2024 20:25:00 +0000
Subject: [PATCH 1/2] [VPlan] Introduce VPScalarPHIRecipe, use for can & EVL IV
codegen (NFC).
Introduce a general recipe to generate a scalar phi. Lower
VPCanonicalIVPHIRecipe and VPEVLBasedIVRecipe to VPScalarIVPHIrecipe
before plan execution, avoiding the need for duplicated ::execute
implementations. There are other cases that could benefit, including
in-loop reduction phis.
Builds on a similar idea as
https://github.com/llvm/llvm-project/pull/82270.
---
.../Transforms/Vectorize/LoopVectorize.cpp | 1 +
llvm/lib/Transforms/Vectorize/VPlan.cpp | 7 ++-
llvm/lib/Transforms/Vectorize/VPlan.h | 52 +++++++++++++++++--
.../Transforms/Vectorize/VPlanAnalysis.cpp | 16 +++---
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 41 ++++++++-------
.../Transforms/Vectorize/VPlanTransforms.cpp | 21 ++++++++
.../Transforms/Vectorize/VPlanTransforms.h | 3 ++
llvm/lib/Transforms/Vectorize/VPlanValue.h | 1 +
8 files changed, 106 insertions(+), 36 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 1ebc62f9843905..6d857efdbcf038 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7706,6 +7706,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
BestVPlan.prepareToExecute(ILV.getTripCount(),
ILV.getOrCreateVectorTripCount(nullptr),
CanonicalIVStartValue, State);
+ VPlanTransforms::prepareToExecute(BestVPlan);
BestVPlan.execute(&State);
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 08db0d51ef3abb..8c6e44776a1df4 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -1080,10 +1080,9 @@ void VPlan::execute(VPTransformState *State) {
}
auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
- bool NeedsScalar =
- isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe>(PhiR) ||
- (isa<VPReductionPHIRecipe>(PhiR) &&
- cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
+ bool NeedsScalar = isa<VPScalarPHIRecipe>(PhiR) ||
+ (isa<VPReductionPHIRecipe>(PhiR) &&
+ cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
Value *Phi = State->get(PhiR, NeedsScalar);
Value *Val = State->get(PhiR->getBackedgeValue(), NeedsScalar);
cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index abfe97b4ab55b6..6853be6a9ce30c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -2237,6 +2237,45 @@ class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe,
#endif
};
+/// Recipe to generate a scalar PHI. Used to generate code for recipes that
+/// produce scalar header phis, including VPCanonicalIVPHIRecipe and
+/// VPEVLBasedIVPHIRecipe.
+class VPScalarPHIRecipe : public VPHeaderPHIRecipe {
+ std::string Name;
+
+public:
+ VPScalarPHIRecipe(VPValue *Start, VPValue *BackedgeValue, DebugLoc DL,
+ StringRef Name)
+ : VPHeaderPHIRecipe(VPDef::VPScalarPHISC, nullptr, Start, DL),
+ Name(Name.str()) {
+ addOperand(BackedgeValue);
+ }
+
+ ~VPScalarPHIRecipe() override = default;
+
+ VPScalarPHIRecipe *clone() override {
+ llvm_unreachable("cloning not implemented yet");
+ }
+
+ VP_CLASSOF_IMPL(VPDef::VPScalarPHISC)
+
+ /// Generate the phi/select nodes.
+ void execute(VPTransformState &State) override;
+
+ /// Returns true if the recipe only uses the first lane of operand \p Op.
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ return true;
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const override;
+#endif
+};
+
/// A recipe for handling phis that are widened in the vector loop.
/// In the VPlan native path, all incoming VPValues & VPBasicBlock pairs are
/// managed in the recipe directly.
@@ -3132,8 +3171,10 @@ class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe {
return D->getVPDefID() == VPDef::VPCanonicalIVPHISC;
}
- /// Generate the canonical scalar induction phi of the vector loop.
- void execute(VPTransformState &State) override;
+ void execute(VPTransformState &State) override {
+ llvm_unreachable(
+ "cannot execute this recipe, should be replaced by VPScalarPHIRecipe");
+ }
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
@@ -3229,9 +3270,10 @@ class VPEVLBasedIVPHIRecipe : public VPHeaderPHIRecipe {
return D->getVPDefID() == VPDef::VPEVLBasedIVPHISC;
}
- /// Generate phi for handling IV based on EVL over iterations correctly.
- /// TODO: investigate if it can share the code with VPCanonicalIVPHIRecipe.
- void execute(VPTransformState &State) override;
+ void execute(VPTransformState &State) override {
+ llvm_unreachable(
+ "cannot execute this recipe, should be replaced by VPScalarPHIRecipe");
+ }
/// Return the cost of this VPEVLBasedIVPHIRecipe.
InstructionCost computeCost(ElementCount VF,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index 8b8ab6be99b0d5..83efbc2f970cca 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -251,14 +251,14 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
TypeSwitch<const VPRecipeBase *, Type *>(V->getDefiningRecipe())
.Case<VPActiveLaneMaskPHIRecipe, VPCanonicalIVPHIRecipe,
VPFirstOrderRecurrencePHIRecipe, VPReductionPHIRecipe,
- VPWidenPointerInductionRecipe, VPEVLBasedIVPHIRecipe>(
- [this](const auto *R) {
- // Handle header phi recipes, except VPWidenIntOrFpInduction
- // which needs special handling due it being possibly truncated.
- // TODO: consider inferring/caching type of siblings, e.g.,
- // backedge value, here and in cases below.
- return inferScalarType(R->getStartValue());
- })
+ VPWidenPointerInductionRecipe, VPEVLBasedIVPHIRecipe,
+ VPScalarPHIRecipe>([this](const auto *R) {
+ // Handle header phi recipes, except VPWidenIntOrFpInduction
+ // which needs special handling due it being possibly truncated.
+ // TODO: consider inferring/caching type of siblings, e.g.,
+ // backedge value, here and in cases below.
+ return inferScalarType(R->getStartValue());
+ })
.Case<VPWidenIntOrFpInductionRecipe, VPDerivedIVRecipe>(
[](const auto *R) { return R->getScalarType(); })
.Case<VPReductionRecipe, VPPredInstPHIRecipe, VPWidenPHIRecipe,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index ef2ca9af7268d1..88bf1f775f80bc 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -3096,17 +3096,6 @@ InstructionCost VPInterleaveRecipe::computeCost(ElementCount VF,
VectorTy, std::nullopt, CostKind, 0);
}
-void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) {
- Value *Start = getStartValue()->getLiveInIRValue();
- PHINode *Phi = PHINode::Create(Start->getType(), 2, "index");
- Phi->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
-
- BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
- Phi->addIncoming(Start, VectorPH);
- Phi->setDebugLoc(getDebugLoc());
- State.set(this, Phi, /*IsScalar*/ true);
-}
-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
@@ -3148,8 +3137,6 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
assert(!onlyScalarsGenerated(State.VF.isScalable()) &&
"Recipe should have been replaced");
- auto *IVR = getParent()->getPlan()->getCanonicalIV();
- PHINode *CanonicalIV = cast<PHINode>(State.get(IVR, /*IsScalar*/ true));
unsigned CurrentPart = getUnrollPart(*this);
// Build a pointer phi
@@ -3159,6 +3146,12 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
PHINode *NewPointerPhi = nullptr;
if (CurrentPart == 0) {
+ auto *IVR = cast<VPHeaderPHIRecipe>(&getParent()
+ ->getPlan()
+ ->getVectorLoopRegion()
+ ->getEntryBasicBlock()
+ ->front());
+ PHINode *CanonicalIV = cast<PHINode>(State.get(IVR, /*IsScalar*/ true));
NewPointerPhi = PHINode::Create(ScStValueType, 2, "pointer.phi",
CanonicalIV->getIterator());
NewPointerPhi->addIncoming(ScalarStartValue, VectorPH);
@@ -3469,20 +3462,30 @@ void VPActiveLaneMaskPHIRecipe::print(raw_ostream &O, const Twine &Indent,
}
#endif
-void VPEVLBasedIVPHIRecipe::execute(VPTransformState &State) {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void VPEVLBasedIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << Indent << "EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI ";
+
+ printAsOperand(O, SlotTracker);
+ O << " = phi ";
+ printOperands(O, SlotTracker);
+}
+#endif
+
+void VPScalarPHIRecipe::execute(VPTransformState &State) {
BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
Value *Start = State.get(getOperand(0), VPLane(0));
- PHINode *Phi = State.Builder.CreatePHI(Start->getType(), 2, "evl.based.iv");
+ PHINode *Phi = State.Builder.CreatePHI(Start->getType(), 2, Name);
Phi->addIncoming(Start, VectorPH);
Phi->setDebugLoc(getDebugLoc());
State.set(this, Phi, /*IsScalar=*/true);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void VPEVLBasedIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const {
- O << Indent << "EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI ";
-
+void VPScalarPHIRecipe::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << Indent << "SCALAR-PHI";
printAsOperand(O, SlotTracker);
O << " = phi ";
printOperands(O, SlotTracker);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index b9ab8a8fe60107..14a88885a9cb0d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1794,3 +1794,24 @@ void VPlanTransforms::createInterleaveGroups(
}
}
}
+
+void VPlanTransforms::prepareToExecute(VPlan &Plan) {
+ ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
+ Plan.getVectorLoopRegion());
+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
+ vp_depth_first_deep(Plan.getEntry()))) {
+ for (VPRecipeBase &R : make_early_inc_range(VPBB->phis())) {
+ if (!isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe>(&R))
+ continue;
+ auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
+ StringRef Name =
+ isa<VPCanonicalIVPHIRecipe>(PhiR) ? "index" : "evl.based.iv";
+ auto *ScalarR =
+ new VPScalarPHIRecipe(PhiR->getStartValue(), PhiR->getBackedgeValue(),
+ PhiR->getDebugLoc(), Name);
+ ScalarR->insertBefore(PhiR);
+ PhiR->replaceAllUsesWith(ScalarR);
+ PhiR->eraseFromParent();
+ }
+ }
+}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 11e094db6294f6..1491e0a8df04d5 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -123,6 +123,9 @@ struct VPlanTransforms {
/// Remove dead recipes from \p Plan.
static void removeDeadRecipes(VPlan &Plan);
+
+ /// Lower abstract recipes to concrete ones, that can be codegen'd.
+ static void prepareToExecute(VPlan &Plan);
};
} // namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 691b0d40823cfb..957a602091c733 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -359,6 +359,7 @@ class VPDef {
VPFirstOrderRecurrencePHISC,
VPWidenIntOrFpInductionSC,
VPWidenPointerInductionSC,
+ VPScalarPHISC,
VPReductionPHISC,
// END: SubclassID for recipes that inherit VPHeaderPHIRecipe
// END: Phi-like recipes
>From 74d093d1787bfef6b3ac0d24b4c1cd12a6ca0a75 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sun, 24 Nov 2024 15:42:47 +0000
Subject: [PATCH 2/2] [VPlan] Replace VPRegionBlock with explicit CFG before
execute (NFCI).
Building on top of https://github.com/llvm/llvm-project/pull/114305,
replace VPRegionBlocks with explicit CFG before executing.
This will enable further simplifications of phi handling during execution
and transformations that do not have to preserve the canonical IV required
by loop regions. This for example could include replacing the canonical
IV with an EVL based phi while completely removing the original canonical
IV.
---
.../Transforms/Vectorize/LoopVectorize.cpp | 12 +-
llvm/lib/Transforms/Vectorize/VPlan.cpp | 165 ++++++++++--------
llvm/lib/Transforms/Vectorize/VPlan.h | 14 +-
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 12 +-
.../Transforms/Vectorize/VPlanTransforms.cpp | 18 +-
.../Transforms/Vectorize/VPlanTransforms.h | 3 +-
6 files changed, 128 insertions(+), 96 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index c056fcb7b67d5d..ec48f90ba4fcca 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2368,12 +2368,6 @@ void InnerLoopVectorizer::scalarizeInstruction(const Instruction *Instr,
// End if-block.
VPRegionBlock *Parent = RepRecipe->getParent()->getParent();
bool IfPredicateInstr = Parent ? Parent->isReplicator() : false;
- assert((Parent || all_of(RepRecipe->operands(),
- [](VPValue *Op) {
- return Op->isDefinedOutsideLoopRegions();
- })) &&
- "Expected a recipe is either within a region or all of its operands "
- "are defined outside the vectorized region.");
if (IfPredicateInstr)
PredicatedInstructions.push_back(Cloned);
}
@@ -2969,8 +2963,8 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
for (Instruction *PI : PredicatedInstructions)
sinkScalarOperands(&*PI);
- VPRegionBlock *VectorRegion = State.Plan->getVectorLoopRegion();
- VPBasicBlock *HeaderVPBB = VectorRegion->getEntryBasicBlock();
+ VPBasicBlock *HeaderVPBB = cast<VPBasicBlock>(
+ State.Plan->getVectorPreheader()->getSingleSuccessor());
BasicBlock *HeaderBB = State.CFG.VPBB2IRBB[HeaderVPBB];
// Remove redundant induction instructions.
@@ -7764,7 +7758,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
LLVMLoopVectorizeFollowupVectorized});
VPBasicBlock *HeaderVPBB =
- BestVPlan.getVectorLoopRegion()->getEntryBasicBlock();
+ cast<VPBasicBlock>(BestVPlan.getVectorPreheader()->getSingleSuccessor());
Loop *L = LI->getLoopFor(State.CFG.VPBB2IRBB[HeaderVPBB]);
if (VectorizedLoopID)
L->setLoopID(*VectorizedLoopID);
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index b801d1863e252c..a3e11c7f2f5f8d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -352,8 +352,8 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
}
BasicBlock *VPTransformState::CFGState::getPreheaderBBFor(VPRecipeBase *R) {
- VPRegionBlock *LoopRegion = R->getParent()->getEnclosingLoopRegion();
- return VPBB2IRBB[LoopRegion->getPreheaderVPBB()];
+
+ return VPBB2IRBB[cast<VPBasicBlock>(R->getParent()->getPredecessors()[0])];
}
void VPTransformState::addNewMetadata(Instruction *To,
@@ -425,6 +425,8 @@ void VPBasicBlock::connectToPredecessors(VPTransformState::CFGState &CFG) {
VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors();
BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB];
+ if (!PredBB)
+ continue;
assert(PredBB && "Predecessor basic-block not found building successor.");
auto *PredBBTerminator = PredBB->getTerminator();
@@ -432,6 +434,8 @@ void VPBasicBlock::connectToPredecessors(VPTransformState::CFGState &CFG) {
auto *TermBr = dyn_cast<BranchInst>(PredBBTerminator);
if (isa<UnreachableInst>(PredBBTerminator)) {
+ if (PredVPSuccessors.size() == 2)
+ continue;
assert(PredVPSuccessors.size() == 1 &&
"Predecessor ending w/o branch must have single successor.");
DebugLoc DL = PredBBTerminator->getDebugLoc();
@@ -480,6 +484,21 @@ void VPBasicBlock::execute(VPTransformState *State) {
bool Replica = bool(State->Lane);
BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
+ if (isHeader()) {
+ // Create and register the new vector loop.
+ State->CurrentVectorLoop = State->LI->AllocateLoop();
+ BasicBlock *VectorPH =
+ State->CFG.VPBB2IRBB[cast<VPBasicBlock>(getPredecessors()[0])];
+ Loop *ParentLoop = State->LI->getLoopFor(VectorPH);
+
+ // Insert the new loop into the loop nest and register the new basic blocks
+ // before calling any utilities such as SCEV that require valid LoopInfo.
+ if (ParentLoop)
+ ParentLoop->addChildLoop(State->CurrentVectorLoop);
+ else
+ State->LI->addTopLevelLoop(State->CurrentVectorLoop);
+ }
+
auto IsReplicateRegion = [](VPBlockBase *BB) {
auto *R = dyn_cast_or_null<VPRegionBlock>(BB);
return R && R->isReplicator();
@@ -718,37 +737,13 @@ void VPRegionBlock::dropAllReferences(VPValue *NewValue) {
}
void VPRegionBlock::execute(VPTransformState *State) {
- ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>>
- RPOT(Entry);
-
- if (!isReplicator()) {
- // Create and register the new vector loop.
- Loop *PrevLoop = State->CurrentVectorLoop;
- State->CurrentVectorLoop = State->LI->AllocateLoop();
- BasicBlock *VectorPH = State->CFG.VPBB2IRBB[getPreheaderVPBB()];
- Loop *ParentLoop = State->LI->getLoopFor(VectorPH);
-
- // Insert the new loop into the loop nest and register the new basic blocks
- // before calling any utilities such as SCEV that require valid LoopInfo.
- if (ParentLoop)
- ParentLoop->addChildLoop(State->CurrentVectorLoop);
- else
- State->LI->addTopLevelLoop(State->CurrentVectorLoop);
-
- // Visit the VPBlocks connected to "this", starting from it.
- for (VPBlockBase *Block : RPOT) {
- LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
- Block->execute(State);
- }
-
- State->CurrentVectorLoop = PrevLoop;
- return;
- }
-
+ assert(isReplicator() &&
+ "Loop regions should have been lowered to plain CFG");
assert(!State->Lane && "Replicating a Region with non-null instance.");
-
- // Enter replicating mode.
assert(!State->VF.isScalable() && "VF is assumed to be non scalable.");
+
+ ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
+ Entry);
State->Lane = VPLane(0);
for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF;
++Lane) {
@@ -823,6 +818,26 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
}
#endif
+void VPRegionBlock::removeRegion() {
+ auto *Header = cast<VPBasicBlock>(getEntry());
+ VPBlockBase *Preheader = getSinglePredecessor();
+ auto *Exiting = cast<VPBasicBlock>(getExiting());
+
+ VPBlockBase *Middle = getSingleSuccessor();
+ VPBlockUtils::disconnectBlocks(Preheader, this);
+ VPBlockUtils::disconnectBlocks(this, Middle);
+
+ for (VPBlockBase *VPB : vp_depth_first_shallow(Entry))
+ VPB->setParent(nullptr);
+
+ VPBlockUtils::connectBlocks(Preheader, Header);
+ VPBlockUtils::connectBlocks(Exiting, Middle);
+
+ // Set LoopRegion's Entry to nullptr, as the CFG from LoopRegion shouldn't
+ // be deleted when the region is deleted.
+ Entry = nullptr;
+}
+
VPlan::~VPlan() {
if (Entry) {
VPValue DummyValue;
@@ -1032,50 +1047,55 @@ void VPlan::execute(VPTransformState *State) {
for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
Block->execute(State);
- VPBasicBlock *LatchVPBB = getVectorLoopRegion()->getExitingBasicBlock();
- BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
-
// Fix the latch value of canonical, reduction and first-order recurrences
// phis in the vector loop.
- VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock();
- for (VPRecipeBase &R : Header->phis()) {
- // Skip phi-like recipes that generate their backedege values themselves.
- if (isa<VPWidenPHIRecipe>(&R))
+ for (VPBasicBlock *Header :
+ VPBlockUtils::blocksOnly<VPBasicBlock>(vp_depth_first_shallow(Entry))) {
+ if (!Header->isHeader())
continue;
+ for (VPRecipeBase &R : Header->phis()) {
+ VPBasicBlock *LatchVPBB =
+ cast<VPBasicBlock>(Header->getPredecessors()[1]);
+ BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
- if (isa<VPWidenPointerInductionRecipe>(&R) ||
- isa<VPWidenIntOrFpInductionRecipe>(&R)) {
- PHINode *Phi = nullptr;
- if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
- Phi = cast<PHINode>(State->get(R.getVPSingleValue()));
- } else {
- auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
- assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) &&
- "recipe generating only scalars should have been replaced");
- auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi));
- Phi = cast<PHINode>(GEP->getPointerOperand());
- }
-
- Phi->setIncomingBlock(1, VectorLatchBB);
+ // Skip phi-like recipes that generate their backedege values themselves.
+ if (isa<VPWidenPHIRecipe>(&R))
+ continue;
- // Move the last step to the end of the latch block. This ensures
- // consistent placement of all induction updates.
- Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
- Inc->moveBefore(VectorLatchBB->getTerminator()->getPrevNode());
+ if (isa<VPWidenPointerInductionRecipe>(&R) ||
+ isa<VPWidenIntOrFpInductionRecipe>(&R)) {
+ PHINode *Phi = nullptr;
+ if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
+ Phi = cast<PHINode>(State->get(R.getVPSingleValue()));
+ } else {
+ auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
+ assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) &&
+ "recipe generating only scalars should have been replaced");
+ auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi));
+ Phi = cast<PHINode>(GEP->getPointerOperand());
+ }
+
+ Phi->setIncomingBlock(1, VectorLatchBB);
+
+ // Move the last step to the end of the latch block. This ensures
+ // consistent placement of all induction updates.
+ Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
+ Inc->moveBefore(VectorLatchBB->getTerminator()->getPrevNode());
+
+ // Use the steps for the last part as backedge value for the induction.
+ if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R))
+ Inc->setOperand(0, State->get(IV->getLastUnrolledPartOperand()));
+ continue;
+ }
- // Use the steps for the last part as backedge value for the induction.
- if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R))
- Inc->setOperand(0, State->get(IV->getLastUnrolledPartOperand()));
- continue;
+ auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
+ bool NeedsScalar = isa<VPScalarPHIRecipe>(PhiR) ||
+ (isa<VPReductionPHIRecipe>(PhiR) &&
+ cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
+ Value *Phi = State->get(PhiR, NeedsScalar);
+ Value *Val = State->get(PhiR->getBackedgeValue(), NeedsScalar);
+ cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
}
-
- auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
- bool NeedsScalar = isa<VPScalarPHIRecipe>(PhiR) ||
- (isa<VPReductionPHIRecipe>(PhiR) &&
- cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
- Value *Phi = State->get(PhiR, NeedsScalar);
- Value *Val = State->get(PhiR->getBackedgeValue(), NeedsScalar);
- cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
}
State->CFG.DTU.flush();
@@ -1417,8 +1437,13 @@ void VPlanIngredient::print(raw_ostream &O) const {
#endif
bool VPValue::isDefinedOutsideLoopRegions() const {
- return !hasDefiningRecipe() ||
- !getDefiningRecipe()->getParent()->getEnclosingLoopRegion();
+ auto *DefR = getDefiningRecipe();
+ if (!DefR)
+ return true;
+
+ const VPBasicBlock *DefVPBB = DefR->getParent();
+ auto *Plan = DefVPBB->getPlan();
+ return DefVPBB == Plan->getPreheader() || DefVPBB == Plan->getEntry();
}
void VPValue::replaceAllUsesWith(VPValue *New) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 2db936ffcaf464..58e57db618ddd8 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -3585,6 +3585,8 @@ class VPBasicBlock : public VPBlockBase {
return NewBlock;
}
+ bool isHeader() { return any_of(phis(), IsaPred<VPHeaderPHIRecipe>); }
+
protected:
/// Execute the recipes in the IR basic block \p BB.
void executeRecipes(VPTransformState *State, BasicBlock *BB);
@@ -3742,6 +3744,10 @@ class VPRegionBlock : public VPBlockBase {
/// Clone all blocks in the single-entry single-exit region of the block and
/// their recipes without updating the operands of the cloned recipes.
VPRegionBlock *clone() override;
+
+ /// Remove the current region from its VPlan, connecting its predecessor to
+ /// its entry and exiting block to its successor.
+ void removeRegion();
};
/// VPlan models a candidate for vectorization, encoding various decisions take
@@ -3875,10 +3881,10 @@ class VPlan {
/// whether to execute the scalar tail loop or the exit block from the loop
/// latch.
const VPBasicBlock *getMiddleBlock() const {
- return cast<VPBasicBlock>(getVectorLoopRegion()->getSingleSuccessor());
+ return cast<VPBasicBlock>(getScalarPreheader()->getSinglePredecessor());
}
VPBasicBlock *getMiddleBlock() {
- return cast<VPBasicBlock>(getVectorLoopRegion()->getSingleSuccessor());
+ return cast<VPBasicBlock>(getScalarPreheader()->getSinglePredecessor());
}
/// Return an iterator range over the VPIRBasicBlock wrapping the exit blocks
@@ -4000,9 +4006,7 @@ class VPlan {
}
/// Returns the preheader of the vector loop region.
- VPBasicBlock *getVectorPreheader() {
- return cast<VPBasicBlock>(getVectorLoopRegion()->getSinglePredecessor());
- }
+ VPBasicBlock *getVectorPreheader() { return cast<VPBasicBlock>(getEntry()); }
/// Returns the canonical induction recipe of the vector loop.
VPCanonicalIVPHIRecipe *getCanonicalIV() {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index c9d15c8e296ae8..dc8a4435818475 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -491,11 +491,10 @@ Value *VPInstruction::generate(VPTransformState &State) {
CondBr->setSuccessor(0, nullptr);
Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
- if (!getParent()->isExiting())
+ VPBasicBlock *Header = cast<VPBasicBlock>(getParent()->getSuccessors()[1]);
+ if (!State.CFG.VPBB2IRBB.contains(Header))
return CondBr;
- VPRegionBlock *ParentRegion = getParent()->getParent();
- VPBasicBlock *Header = ParentRegion->getEntryBasicBlock();
CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]);
return CondBr;
}
@@ -506,9 +505,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
Value *Cond = Builder.CreateICmpEQ(IV, TC);
// Now create the branch.
- auto *Plan = getParent()->getPlan();
- VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
- VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
+ VPBasicBlock *Header = cast<VPBasicBlock>(getParent()->getSuccessors()[1]);
// Replace the temporary unreachable terminator with a new conditional
// branch, hooking it up to backward destination (the header) now and to the
@@ -3150,9 +3147,6 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
PHINode *NewPointerPhi = nullptr;
if (CurrentPart == 0) {
auto *IVR = cast<VPHeaderPHIRecipe>(&getParent()
- ->getPlan()
- ->getVectorLoopRegion()
- ->getEntryBasicBlock()
->front());
PHINode *CanonicalIV = cast<PHINode>(State.get(IVR, /*IsScalar*/ true));
NewPointerPhi = PHINode::Create(ScStValueType, 2, "pointer.phi",
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index ec39f03564898b..70e8107f2d27c7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1802,8 +1802,22 @@ void VPlanTransforms::createInterleaveGroups(
}
void VPlanTransforms::prepareToExecute(VPlan &Plan) {
- ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
- Plan.getVectorLoopRegion());
+ // Replace loop regions with explicity CFG.
+ SmallVector<VPRegionBlock *> LoopRegions;
+ for (VPRegionBlock *R : VPBlockUtils::blocksOnly<VPRegionBlock>(
+ vp_depth_first_deep(Plan.getEntry()))) {
+ if (!R->isReplicator())
+ LoopRegions.push_back(R);
+ }
+ for (VPRegionBlock *R : LoopRegions) {
+ VPBlockBase *Header = R->getEntry();
+ VPBlockBase *Latch = R->getExiting();
+ R->removeRegion();
+ // Add explicit backedge.
+ VPBlockUtils::connectBlocks(Latch, Header);
+ delete R;
+ }
+
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_deep(Plan.getEntry()))) {
for (VPRecipeBase &R : make_early_inc_range(VPBB->phis())) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 1491e0a8df04d5..6cb316e47ab626 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -124,7 +124,8 @@ struct VPlanTransforms {
/// Remove dead recipes from \p Plan.
static void removeDeadRecipes(VPlan &Plan);
- /// Lower abstract recipes to concrete ones, that can be codegen'd.
+ /// Lower abstract recipes to concrete ones, that can be codegen'd and replace
+ /// loop regions with explicit CFG.
static void prepareToExecute(VPlan &Plan);
};
More information about the llvm-commits
mailing list