[llvm] [VPlan] Introduce scalar loop header in plan, remove VPLiveOut. (PR #109975)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 25 06:10:54 PDT 2024
https://github.com/fhahn created https://github.com/llvm/llvm-project/pull/109975
Update VPlan to include the scalar loop header. This allows retiring VPLiveOut, as the remaining live-outs can now be handled by adding operands to the wrapped phis in the scalar loop header.
Note that the current version only includes the scalar loop header, no other loop blocks and also does not wrap it in a region block. This can either be included in this PR or in follow-ups as needed.
>From 39f0f730cb4312a6f61428f1429f5f2b2e6db689 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Wed, 25 Sep 2024 13:56:06 +0100
Subject: [PATCH] [VPlan] Introduce scalar loop header in plan, remove
VPLiveOut.
Update VPlan to include the scalar loop header. This allows retiring
VPLiveOut, as the remaining live-outs can now be handled by adding
operands to the wrapped phis in the scalar loop header.
Note that the current version only includes the scalar loop header, no
other loop blocks and also does not wrap it in a region block. This can
either be included in this PR or in follow-ups as needed.
---
.../Transforms/Vectorize/LoopVectorize.cpp | 13 +++--
llvm/lib/Transforms/Vectorize/VPlan.cpp | 38 ++++++-------
llvm/lib/Transforms/Vectorize/VPlan.h | 53 -------------------
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 34 ++----------
.../Transforms/Vectorize/VPlanTransforms.cpp | 2 +-
llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 12 ++---
.../Transforms/Vectorize/VPlanVerifier.cpp | 14 -----
.../vplan-sink-scalars-and-merge.ll | 18 +++++++
8 files changed, 52 insertions(+), 132 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 5e4f33c55610f1..4305750eda26fc 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2956,10 +2956,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
IVEndValues[Entry.first], LoopMiddleBlock, Plan, State);
}
- // Fix live-out phis not already fixed earlier.
- for (const auto &KV : Plan.getLiveOuts())
- KV.second->fixPhi(Plan, State);
-
for (Instruction *PI : PredicatedInstructions)
sinkScalarOperands(&*PI);
@@ -8816,7 +8812,14 @@ static void addLiveOutsForFirstOrderRecurrences(
VPInstruction::ResumePhi, {Resume, FOR->getStartValue()}, {},
"scalar.recur.init");
auto *FORPhi = cast<PHINode>(FOR->getUnderlyingInstr());
- Plan.addLiveOut(FORPhi, ResumePhiRecipe);
+ for (VPRecipeBase &R :
+ *cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor())) {
+ auto *IRI = cast<VPIRInstruction>(&R);
+ if (&IRI->getInstruction() == FORPhi) {
+ IRI->addOperand(ResumePhiRecipe);
+ break;
+ }
+ }
// Now update VPIRInstructions modeling LCSSA phis in the exit block.
// Extract the penultimate value of the recurrence and use it as operand for
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 5e4d487261c6f0..e77fd8c5dca840 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -457,10 +457,17 @@ void VPIRBasicBlock::execute(VPTransformState *State) {
State->Builder.SetInsertPoint(getIRBasicBlock()->getTerminator());
executeRecipes(State, getIRBasicBlock());
if (getSingleSuccessor()) {
- assert(isa<UnreachableInst>(getIRBasicBlock()->getTerminator()));
- auto *Br = State->Builder.CreateBr(getIRBasicBlock());
- Br->setOperand(0, nullptr);
- getIRBasicBlock()->getTerminator()->eraseFromParent();
+ auto *SuccVPIRBB = dyn_cast<VPIRBasicBlock>(getSingleSuccessor());
+ if (SuccVPIRBB && SuccVPIRBB->getIRBasicBlock() ==
+ getIRBasicBlock()->getSingleSuccessor()) {
+ cast<BranchInst>(getIRBasicBlock()->getTerminator())
+ ->setOperand(0, nullptr);
+ } else {
+ assert(isa<UnreachableInst>(getIRBasicBlock()->getTerminator()));
+ auto *Br = State->Builder.CreateBr(getIRBasicBlock());
+ Br->setOperand(0, nullptr);
+ getIRBasicBlock()->getTerminator()->eraseFromParent();
+ }
}
for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
@@ -844,10 +851,6 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
#endif
VPlan::~VPlan() {
- for (auto &KV : LiveOuts)
- delete KV.second;
- LiveOuts.clear();
-
if (Entry) {
VPValue DummyValue;
for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
@@ -902,6 +905,8 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion);
VPBasicBlock *ScalarPH = new VPBasicBlock("scalar.ph");
+ VPBasicBlock *ScalarHeader = createVPIRBasicBlockFor(TheLoop->getHeader());
+ VPBlockUtils::connectBlocks(ScalarPH, ScalarHeader);
if (!RequiresScalarEpilogueCheck) {
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
return Plan;
@@ -1051,6 +1056,8 @@ void VPlan::execute(VPTransformState *State) {
BrInst->insertBefore(MiddleBB->getTerminator());
MiddleBB->getTerminator()->eraseFromParent();
State->CFG.DTU.applyUpdates({{DominatorTree::Delete, MiddleBB, ScalarPh}});
+ State->CFG.DTU.applyUpdates(
+ {{DominatorTree::Delete, ScalarPh, ScalarPh->getSingleSuccessor()}});
// Generate code in the loop pre-header and body.
for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
@@ -1169,12 +1176,6 @@ void VPlan::print(raw_ostream &O) const {
Block->print(O, "", SlotTracker);
}
- if (!LiveOuts.empty())
- O << "\n";
- for (const auto &KV : LiveOuts) {
- KV.second->print(O, SlotTracker);
- }
-
O << "}\n";
}
@@ -1211,11 +1212,6 @@ LLVM_DUMP_METHOD
void VPlan::dump() const { print(dbgs()); }
#endif
-void VPlan::addLiveOut(PHINode *PN, VPValue *V) {
- assert(LiveOuts.count(PN) == 0 && "an exit value for PN already exists");
- LiveOuts.insert({PN, new VPLiveOut(PN, V)});
-}
-
static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry,
DenseMap<VPValue *, VPValue *> &Old2NewVPValues) {
// Update the operands of all cloned recipes starting at NewEntry. This
@@ -1283,10 +1279,6 @@ VPlan *VPlan::duplicate() {
remapOperands(Preheader, NewPreheader, Old2NewVPValues);
remapOperands(Entry, NewEntry, Old2NewVPValues);
- // Clone live-outs.
- for (const auto &[_, LO] : LiveOuts)
- NewPlan->addLiveOut(LO->getPhi(), Old2NewVPValues[LO->getOperand(0)]);
-
// Initialize remaining fields of cloned VPlan.
NewPlan->VFs = VFs;
NewPlan->UFs = UFs;
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index c886a39aec76e5..e2caf3dcdfd4af 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -671,48 +671,6 @@ class VPBlockBase {
virtual VPBlockBase *clone() = 0;
};
-/// A value that is used outside the VPlan. The operand of the user needs to be
-/// added to the associated phi node. The incoming block from VPlan is
-/// determined by where the VPValue is defined: if it is defined by a recipe
-/// outside a region, its parent block is used, otherwise the middle block is
-/// used.
-class VPLiveOut : public VPUser {
- PHINode *Phi;
-
-public:
- VPLiveOut(PHINode *Phi, VPValue *Op)
- : VPUser({Op}, VPUser::VPUserID::LiveOut), Phi(Phi) {}
-
- static inline bool classof(const VPUser *U) {
- return U->getVPUserID() == VPUser::VPUserID::LiveOut;
- }
-
- /// Fix the wrapped phi node. This means adding an incoming value to exit
- /// block phi's from the vector loop via middle block (values from scalar loop
- /// already reach these phi's), and updating the value to scalar header phi's
- /// from the scalar preheader.
- void fixPhi(VPlan &Plan, VPTransformState &State);
-
- /// Returns true if the VPLiveOut uses scalars of operand \p Op.
- bool usesScalars(const VPValue *Op) const override {
- assert(is_contained(operands(), Op) &&
- "Op must be an operand of the recipe");
- return true;
- }
-
- PHINode *getPhi() const { return Phi; }
-
- /// Live-outs are marked as only using the first part during the transition
- /// to unrolling directly on VPlan.
- /// TODO: Remove after unroller transition.
- bool onlyFirstPartUsed(const VPValue *Op) const override { return true; }
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- /// Print the VPLiveOut to \p O.
- void print(raw_ostream &O, VPSlotTracker &SlotTracker) const;
-#endif
-};
-
/// Struct to hold various analysis needed for cost computations.
struct VPCostContext {
const TargetTransformInfo &TTI;
@@ -3454,11 +3412,6 @@ class VPlan {
/// definitions are VPValues that hold a pointer to their underlying IR.
SmallVector<VPValue *, 16> VPLiveInsToFree;
- /// Values used outside the plan. It contains live-outs that need fixing. Any
- /// live-out that is fixed outside VPlan needs to be removed. The remaining
- /// live-outs are fixed via VPLiveOut::fixPhi.
- MapVector<PHINode *, VPLiveOut *> LiveOuts;
-
/// Mapping from SCEVs to the VPValues representing their expansions.
/// NOTE: This mapping is temporary and will be removed once all users have
/// been modeled in VPlan directly.
@@ -3638,12 +3591,6 @@ class VPlan {
return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
}
- void addLiveOut(PHINode *PN, VPValue *V);
-
- const MapVector<PHINode *, VPLiveOut *> &getLiveOuts() const {
- return LiveOuts;
- }
-
VPValue *getSCEVExpansion(const SCEV *S) const {
return SCEVToExpansion.lookup(S);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index f33293e65010f9..2ba659fb850852 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -197,35 +197,6 @@ bool VPRecipeBase::mayHaveSideEffects() const {
}
}
-void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) {
- VPValue *ExitValue = getOperand(0);
- VPBasicBlock *MiddleVPBB =
- cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
- VPRecipeBase *ExitingRecipe = ExitValue->getDefiningRecipe();
- auto *ExitingVPBB = ExitingRecipe ? ExitingRecipe->getParent() : nullptr;
- // Values leaving the vector loop reach live out phi's in the exiting block
- // via middle block.
- auto *PredVPBB = !ExitingVPBB || ExitingVPBB->getEnclosingLoopRegion()
- ? MiddleVPBB
- : ExitingVPBB;
- BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB];
- Value *V = State.get(ExitValue, VPIteration(0, 0));
- if (Phi->getBasicBlockIndex(PredBB) != -1)
- Phi->setIncomingValueForBlock(PredBB, V);
- else
- Phi->addIncoming(V, PredBB);
-}
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void VPLiveOut::print(raw_ostream &O, VPSlotTracker &SlotTracker) const {
- O << "Live-out ";
- getPhi()->printAsOperand(O);
- O << " = ";
- getOperand(0)->printAsOperand(O, SlotTracker);
- O << "\n";
-}
-#endif
-
void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
assert(!Parent && "Recipe already in some VPBasicBlock");
assert(InsertPos->getParent() &&
@@ -867,7 +838,10 @@ void VPIRInstruction::execute(VPTransformState &State) {
State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt());
Value *V = State.get(ExitValue, VPIteration(0, Lane));
auto *Phi = cast<PHINode>(&I);
- Phi->addIncoming(V, PredBB);
+ if (Phi->getBasicBlockIndex(PredBB) == -1)
+ Phi->addIncoming(V, PredBB);
+ else
+ Phi->setIncomingValueForBlock(PredBB, V);
}
// Advance the insert point after the wrapped IR instruction. This allows
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 3b37a1ec9560ee..00f12bf915e0d8 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -378,7 +378,7 @@ static bool mergeBlocksIntoPredecessors(VPlan &Plan) {
// Don't fold the exit block of the Plan into its single predecessor for
// now.
// TODO: Remove restriction once more of the skeleton is modeled in VPlan.
- if (VPBB->getNumSuccessors() == 0 && !VPBB->getParent())
+ if (!VPBB->getParent())
continue;
auto *PredVPBB =
dyn_cast_or_null<VPBasicBlock>(VPBB->getSinglePredecessor());
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index 4907d3f0397274..17a4cbd36bd30d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -265,6 +265,7 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
if (auto *VPI = dyn_cast<VPInstruction>(&R)) {
VPValue *Op0, *Op1;
+
if (match(VPI, m_VPInstruction<VPInstruction::ExtractFromEnd>(
m_VPValue(Op0), m_VPValue(Op1)))) {
VPI->setOperand(1, getValueForPart(Op1, UF - 1));
@@ -281,6 +282,11 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
}
return;
}
+ if (match(VPI, m_VPInstruction<VPInstruction::ResumePhi>(m_VPValue(Op0),
+ m_VPValue(Op1)))) {
+ addUniformForAllParts(VPI);
+ return;
+ }
if (vputils::onlyFirstPartUsed(VPI)) {
addUniformForAllParts(VPI);
@@ -467,11 +473,5 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF, LLVMContext &Ctx) {
Part++;
}
- // Remap the operand of live-outs to the last part.
- for (const auto &[_, LO] : Plan.getLiveOuts()) {
- VPValue *In = Unroller.getValueForPart(LO->getOperand(0), UF - 1);
- LO->setOperand(0, In);
- }
-
VPlanTransforms::removeDeadRecipes(Plan);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
index 99bc4c38a3c3cd..4badf295092827 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
@@ -244,14 +244,6 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) {
return false;
}
- VPBlockBase *MiddleBB =
- IRBB->getPlan()->getVectorLoopRegion()->getSingleSuccessor();
- if (IRBB != IRBB->getPlan()->getPreheader() &&
- IRBB->getSinglePredecessor() != MiddleBB) {
- errs() << "VPIRBasicBlock can only be used as pre-header or a successor of "
- "middle-block at the moment!\n";
- return false;
- }
return true;
}
@@ -416,12 +408,6 @@ bool VPlanVerifier::verify(const VPlan &Plan) {
return false;
}
- for (const auto &KV : Plan.getLiveOuts())
- if (KV.second->getNumOperands() != 1) {
- errs() << "live outs must have a single operand\n";
- return false;
- }
-
return true;
}
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll
index 0f3cd9d4ca4d61..2dddf766cb9cda 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll
@@ -1077,6 +1077,17 @@ define void @merge_with_dead_gep_between_regions(i32 %n, ptr noalias %src, ptr n
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
+; CHECK-NEXT: Successor(s): ir-bb<loop>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<loop>:
+; CHECK-NEXT: IR %iv = phi i32 [ %n, %entry ], [ %iv.next, %loop ]
+; CHECK-NEXT: IR %iv.next = add nsw i32 %iv, -1
+; CHECK-NEXT: IR %gep.src = getelementptr inbounds i32, ptr %src, i32 %iv
+; CHECK-NEXT: IR %l = load i32, ptr %gep.src, align 16
+; CHECK-NEXT: IR %dead_gep = getelementptr inbounds i32, ptr %dst, i64 1
+; CHECK-NEXT: IR %gep.dst = getelementptr inbounds i32, ptr %dst, i32 %iv
+; CHECK-NEXT: IR store i32 %l, ptr %gep.dst, align 16
+; CHECK-NEXT: IR %ec = icmp eq i32 %iv.next, 0
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
@@ -1156,6 +1167,13 @@ define void @ptr_induction_remove_dead_recipe(ptr %start, ptr %end) {
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT: Successor(s): ir-bb<loop.header>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<loop.header>:
+; CHECK-NEXT: IR %ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
+; CHECK-NEXT: IR %ptr.iv.next = getelementptr inbounds i8, ptr %ptr.iv, i64 -1
+; CHECK-NEXT: IR %l = load i8, ptr %ptr.iv.next, align 1
+; CHECK-NEXT: IR %c.1 = icmp eq i8 %l, 0
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
More information about the llvm-commits
mailing list