[llvm] [VPlan] Introduce scalar loop header in plan, remove VPLiveOut. (PR #109975)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 26 12:27:22 PDT 2024
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/109975
>From d0e7dae2bb95780c7af83473e1af101e55b065aa Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Wed, 25 Sep 2024 13:56:06 +0100
Subject: [PATCH] [VPlan] Introduce scalar loop header in plan, remove
VPLiveOut.
Update VPlan to include the scalar loop header. This allows retiring
VPLiveOut, as the remaining live-outs can now be handled by adding
operands to the wrapped phis in the scalar loop header.
Note that the current version only includes the scalar loop header, no
other loop blocks and also does not wrap it in a region block. This can
either be included in this PR or in follow-ups as needed.
---
.../Transforms/Vectorize/LoopVectorize.cpp | 13 +++--
llvm/lib/Transforms/Vectorize/VPlan.cpp | 38 ++++++-------
llvm/lib/Transforms/Vectorize/VPlan.h | 53 -------------------
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 34 ++----------
.../Transforms/Vectorize/VPlanTransforms.cpp | 2 +-
llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 13 ++---
.../Transforms/Vectorize/VPlanVerifier.cpp | 14 -----
.../vplan-sink-scalars-and-merge.ll | 18 +++++++
8 files changed, 53 insertions(+), 132 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index bd493fb2c1ba19..247e54884550cb 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2952,10 +2952,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
IVEndValues[Entry.first], LoopMiddleBlock, Plan, State);
}
- // Fix live-out phis not already fixed earlier.
- for (const auto &KV : Plan.getLiveOuts())
- KV.second->fixPhi(Plan, State);
-
for (Instruction *PI : PredicatedInstructions)
sinkScalarOperands(&*PI);
@@ -8818,7 +8814,14 @@ static void addLiveOutsForFirstOrderRecurrences(
VPInstruction::ResumePhi, {Resume, FOR->getStartValue()}, {},
"scalar.recur.init");
auto *FORPhi = cast<PHINode>(FOR->getUnderlyingInstr());
- Plan.addLiveOut(FORPhi, ResumePhiRecipe);
+ for (VPRecipeBase &R :
+ *cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor())) {
+ auto *IRI = cast<VPIRInstruction>(&R);
+ if (&IRI->getInstruction() == FORPhi) {
+ IRI->addOperand(ResumePhiRecipe);
+ break;
+ }
+ }
// Now update VPIRInstructions modeling LCSSA phis in the exit block.
// Extract the penultimate value of the recurrence and use it as operand for
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 6ddbfcf0ecfe58..383937b2ff72cc 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -456,10 +456,17 @@ void VPIRBasicBlock::execute(VPTransformState *State) {
State->Builder.SetInsertPoint(getIRBasicBlock()->getTerminator());
executeRecipes(State, getIRBasicBlock());
if (getSingleSuccessor()) {
- assert(isa<UnreachableInst>(getIRBasicBlock()->getTerminator()));
- auto *Br = State->Builder.CreateBr(getIRBasicBlock());
- Br->setOperand(0, nullptr);
- getIRBasicBlock()->getTerminator()->eraseFromParent();
+ auto *SuccVPIRBB = dyn_cast<VPIRBasicBlock>(getSingleSuccessor());
+ if (SuccVPIRBB && SuccVPIRBB->getIRBasicBlock() ==
+ getIRBasicBlock()->getSingleSuccessor()) {
+ cast<BranchInst>(getIRBasicBlock()->getTerminator())
+ ->setOperand(0, nullptr);
+ } else {
+ assert(isa<UnreachableInst>(getIRBasicBlock()->getTerminator()));
+ auto *Br = State->Builder.CreateBr(getIRBasicBlock());
+ Br->setOperand(0, nullptr);
+ getIRBasicBlock()->getTerminator()->eraseFromParent();
+ }
}
for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
@@ -843,10 +850,6 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
#endif
VPlan::~VPlan() {
- for (auto &KV : LiveOuts)
- delete KV.second;
- LiveOuts.clear();
-
if (Entry) {
VPValue DummyValue;
for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
@@ -901,6 +904,8 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion);
VPBasicBlock *ScalarPH = new VPBasicBlock("scalar.ph");
+ VPBasicBlock *ScalarHeader = createVPIRBasicBlockFor(TheLoop->getHeader());
+ VPBlockUtils::connectBlocks(ScalarPH, ScalarHeader);
if (!RequiresScalarEpilogueCheck) {
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
return Plan;
@@ -1050,6 +1055,8 @@ void VPlan::execute(VPTransformState *State) {
BrInst->insertBefore(MiddleBB->getTerminator());
MiddleBB->getTerminator()->eraseFromParent();
State->CFG.DTU.applyUpdates({{DominatorTree::Delete, MiddleBB, ScalarPh}});
+ State->CFG.DTU.applyUpdates(
+ {{DominatorTree::Delete, ScalarPh, ScalarPh->getSingleSuccessor()}});
// Generate code in the loop pre-header and body.
for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
@@ -1168,12 +1175,6 @@ void VPlan::print(raw_ostream &O) const {
Block->print(O, "", SlotTracker);
}
- if (!LiveOuts.empty())
- O << "\n";
- for (const auto &KV : LiveOuts) {
- KV.second->print(O, SlotTracker);
- }
-
O << "}\n";
}
@@ -1210,11 +1211,6 @@ LLVM_DUMP_METHOD
void VPlan::dump() const { print(dbgs()); }
#endif
-void VPlan::addLiveOut(PHINode *PN, VPValue *V) {
- assert(LiveOuts.count(PN) == 0 && "an exit value for PN already exists");
- LiveOuts.insert({PN, new VPLiveOut(PN, V)});
-}
-
static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry,
DenseMap<VPValue *, VPValue *> &Old2NewVPValues) {
// Update the operands of all cloned recipes starting at NewEntry. This
@@ -1282,10 +1278,6 @@ VPlan *VPlan::duplicate() {
remapOperands(Preheader, NewPreheader, Old2NewVPValues);
remapOperands(Entry, NewEntry, Old2NewVPValues);
- // Clone live-outs.
- for (const auto &[_, LO] : LiveOuts)
- NewPlan->addLiveOut(LO->getPhi(), Old2NewVPValues[LO->getOperand(0)]);
-
// Initialize remaining fields of cloned VPlan.
NewPlan->VFs = VFs;
NewPlan->UFs = UFs;
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 23a24ce293ef2d..8fd906c0a0fac6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -655,48 +655,6 @@ class VPBlockBase {
virtual VPBlockBase *clone() = 0;
};
-/// A value that is used outside the VPlan. The operand of the user needs to be
-/// added to the associated phi node. The incoming block from VPlan is
-/// determined by where the VPValue is defined: if it is defined by a recipe
-/// outside a region, its parent block is used, otherwise the middle block is
-/// used.
-class VPLiveOut : public VPUser {
- PHINode *Phi;
-
-public:
- VPLiveOut(PHINode *Phi, VPValue *Op)
- : VPUser({Op}, VPUser::VPUserID::LiveOut), Phi(Phi) {}
-
- static inline bool classof(const VPUser *U) {
- return U->getVPUserID() == VPUser::VPUserID::LiveOut;
- }
-
- /// Fix the wrapped phi node. This means adding an incoming value to exit
- /// block phi's from the vector loop via middle block (values from scalar loop
- /// already reach these phi's), and updating the value to scalar header phi's
- /// from the scalar preheader.
- void fixPhi(VPlan &Plan, VPTransformState &State);
-
- /// Returns true if the VPLiveOut uses scalars of operand \p Op.
- bool usesScalars(const VPValue *Op) const override {
- assert(is_contained(operands(), Op) &&
- "Op must be an operand of the recipe");
- return true;
- }
-
- PHINode *getPhi() const { return Phi; }
-
- /// Live-outs are marked as only using the first part during the transition
- /// to unrolling directly on VPlan.
- /// TODO: Remove after unroller transition.
- bool onlyFirstPartUsed(const VPValue *Op) const override { return true; }
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- /// Print the VPLiveOut to \p O.
- void print(raw_ostream &O, VPSlotTracker &SlotTracker) const;
-#endif
-};
-
/// Struct to hold various analysis needed for cost computations.
struct VPCostContext {
const TargetTransformInfo &TTI;
@@ -3446,11 +3404,6 @@ class VPlan {
/// definitions are VPValues that hold a pointer to their underlying IR.
SmallVector<VPValue *, 16> VPLiveInsToFree;
- /// Values used outside the plan. It contains live-outs that need fixing. Any
- /// live-out that is fixed outside VPlan needs to be removed. The remaining
- /// live-outs are fixed via VPLiveOut::fixPhi.
- MapVector<PHINode *, VPLiveOut *> LiveOuts;
-
/// Mapping from SCEVs to the VPValues representing their expansions.
/// NOTE: This mapping is temporary and will be removed once all users have
/// been modeled in VPlan directly.
@@ -3630,12 +3583,6 @@ class VPlan {
return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
}
- void addLiveOut(PHINode *PN, VPValue *V);
-
- const MapVector<PHINode *, VPLiveOut *> &getLiveOuts() const {
- return LiveOuts;
- }
-
VPValue *getSCEVExpansion(const SCEV *S) const {
return SCEVToExpansion.lookup(S);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 9a0aefb898e582..aceb66a10a53c8 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -197,35 +197,6 @@ bool VPRecipeBase::mayHaveSideEffects() const {
}
}
-void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) {
- VPValue *ExitValue = getOperand(0);
- VPBasicBlock *MiddleVPBB =
- cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
- VPRecipeBase *ExitingRecipe = ExitValue->getDefiningRecipe();
- auto *ExitingVPBB = ExitingRecipe ? ExitingRecipe->getParent() : nullptr;
- // Values leaving the vector loop reach live out phi's in the exiting block
- // via middle block.
- auto *PredVPBB = !ExitingVPBB || ExitingVPBB->getEnclosingLoopRegion()
- ? MiddleVPBB
- : ExitingVPBB;
- BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB];
- Value *V = State.get(ExitValue, VPLane(0));
- if (Phi->getBasicBlockIndex(PredBB) != -1)
- Phi->setIncomingValueForBlock(PredBB, V);
- else
- Phi->addIncoming(V, PredBB);
-}
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void VPLiveOut::print(raw_ostream &O, VPSlotTracker &SlotTracker) const {
- O << "Live-out ";
- getPhi()->printAsOperand(O);
- O << " = ";
- getOperand(0)->printAsOperand(O, SlotTracker);
- O << "\n";
-}
-#endif
-
void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
assert(!Parent && "Recipe already in some VPBasicBlock");
assert(InsertPos->getParent() &&
@@ -858,7 +829,10 @@ void VPIRInstruction::execute(VPTransformState &State) {
State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt());
Value *V = State.get(ExitValue, VPLane(Lane));
auto *Phi = cast<PHINode>(&I);
- Phi->addIncoming(V, PredBB);
+ if (Phi->getBasicBlockIndex(PredBB) == -1)
+ Phi->addIncoming(V, PredBB);
+ else
+ Phi->setIncomingValueForBlock(PredBB, V);
}
// Advance the insert point after the wrapped IR instruction. This allows
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index a878613c4ba483..1e6e32a6294c1f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -378,7 +378,7 @@ static bool mergeBlocksIntoPredecessors(VPlan &Plan) {
// Don't fold the exit block of the Plan into its single predecessor for
// now.
// TODO: Remove restriction once more of the skeleton is modeled in VPlan.
- if (VPBB->getNumSuccessors() == 0 && !VPBB->getParent())
+ if (!VPBB->getParent())
continue;
auto *PredVPBB =
dyn_cast_or_null<VPBasicBlock>(VPBB->getSinglePredecessor());
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index ca78f32506ef71..3fa3100b53c730 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -264,6 +264,13 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
return;
if (auto *VPI = dyn_cast<VPInstruction>(&R)) {
+ VPValue *Op0, *Op1;
+ if (match(VPI, m_VPInstruction<VPInstruction::ResumePhi>(m_VPValue(Op0),
+ m_VPValue(Op1)))) {
+ addUniformForAllParts(VPI);
+ return;
+ }
+
if (vputils::onlyFirstPartUsed(VPI)) {
addUniformForAllParts(VPI);
return;
@@ -449,11 +456,5 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF, LLVMContext &Ctx) {
Part++;
}
- // Remap the operand of live-outs to the last part.
- for (const auto &[_, LO] : Plan.getLiveOuts()) {
- VPValue *In = Unroller.getValueForPart(LO->getOperand(0), UF - 1);
- LO->setOperand(0, In);
- }
-
VPlanTransforms::removeDeadRecipes(Plan);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
index 99bc4c38a3c3cd..4badf295092827 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
@@ -244,14 +244,6 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) {
return false;
}
- VPBlockBase *MiddleBB =
- IRBB->getPlan()->getVectorLoopRegion()->getSingleSuccessor();
- if (IRBB != IRBB->getPlan()->getPreheader() &&
- IRBB->getSinglePredecessor() != MiddleBB) {
- errs() << "VPIRBasicBlock can only be used as pre-header or a successor of "
- "middle-block at the moment!\n";
- return false;
- }
return true;
}
@@ -416,12 +408,6 @@ bool VPlanVerifier::verify(const VPlan &Plan) {
return false;
}
- for (const auto &KV : Plan.getLiveOuts())
- if (KV.second->getNumOperands() != 1) {
- errs() << "live outs must have a single operand\n";
- return false;
- }
-
return true;
}
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll
index 0f3cd9d4ca4d61..2dddf766cb9cda 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll
@@ -1077,6 +1077,17 @@ define void @merge_with_dead_gep_between_regions(i32 %n, ptr noalias %src, ptr n
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
+; CHECK-NEXT: Successor(s): ir-bb<loop>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<loop>:
+; CHECK-NEXT: IR %iv = phi i32 [ %n, %entry ], [ %iv.next, %loop ]
+; CHECK-NEXT: IR %iv.next = add nsw i32 %iv, -1
+; CHECK-NEXT: IR %gep.src = getelementptr inbounds i32, ptr %src, i32 %iv
+; CHECK-NEXT: IR %l = load i32, ptr %gep.src, align 16
+; CHECK-NEXT: IR %dead_gep = getelementptr inbounds i32, ptr %dst, i64 1
+; CHECK-NEXT: IR %gep.dst = getelementptr inbounds i32, ptr %dst, i32 %iv
+; CHECK-NEXT: IR store i32 %l, ptr %gep.dst, align 16
+; CHECK-NEXT: IR %ec = icmp eq i32 %iv.next, 0
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
@@ -1156,6 +1167,13 @@ define void @ptr_induction_remove_dead_recipe(ptr %start, ptr %end) {
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT: Successor(s): ir-bb<loop.header>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<loop.header>:
+; CHECK-NEXT: IR %ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
+; CHECK-NEXT: IR %ptr.iv.next = getelementptr inbounds i8, ptr %ptr.iv, i64 -1
+; CHECK-NEXT: IR %l = load i8, ptr %ptr.iv.next, align 1
+; CHECK-NEXT: IR %c.1 = icmp eq i8 %l, 0
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
More information about the llvm-commits
mailing list