[llvm] [VPlan] Introduce scalar loop header in plan, remove VPLiveOut. (PR #109975)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 29 12:27:15 PDT 2024
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/109975
>From 8482c796377fb6add637b4d2993d99d5ae577716 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Wed, 25 Sep 2024 13:56:06 +0100
Subject: [PATCH 1/3] [VPlan] Introduce scalar loop header in plan, remove
VPLiveOut.
Update VPlan to include the scalar loop header. This allows retiring
VPLiveOut, as the remaining live-outs can now be handled by adding
operands to the wrapped phis in the scalar loop header.
Note that the current version only includes the scalar loop header, no
other loop blocks and also does not wrap it in a region block. This can
either be included in this PR or in follow-ups as needed.
---
.../Transforms/Vectorize/LoopVectorize.cpp | 29 +++----
llvm/lib/Transforms/Vectorize/VPlan.cpp | 61 +++++++-------
llvm/lib/Transforms/Vectorize/VPlan.h | 76 +++++------------
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 34 +-------
.../Transforms/Vectorize/VPlanTransforms.cpp | 2 +-
llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 13 +--
.../Transforms/Vectorize/VPlanVerifier.cpp | 14 ----
.../AArch64/sve2-histcnt-vplan.ll | 10 +++
.../AArch64/synthesize-mask-for-call.ll | 30 +++++++
.../widen-call-with-intrinsic-or-libfunc.ll | 10 +++
.../RISCV/riscv-vector-reverse.ll | 24 ++++++
.../RISCV/vplan-vp-intrinsics-reduction.ll | 18 ++++
.../first-order-recurrence-chains-vplan.ll | 22 +++--
...-order-recurrence-sink-replicate-region.ll | 49 ++++++++---
.../interleave-and-scalarize-only.ll | 14 +++-
.../LoopVectorize/vplan-iv-transforms.ll | 5 ++
.../LoopVectorize/vplan-predicate-switch.ll | 5 ++
.../vplan-printing-before-execute.ll | 12 +++
.../vplan-printing-outer-loop.ll | 7 ++
.../LoopVectorize/vplan-printing.ll | 82 ++++++++++++++++++-
.../vplan-sink-scalars-and-merge-vf1.ll | 6 ++
.../vplan-sink-scalars-and-merge.ll | 18 ++++
.../Transforms/Vectorize/VPDomTreeTest.cpp | 14 +++-
.../Transforms/Vectorize/VPlanHCFGTest.cpp | 12 +++
.../Transforms/Vectorize/VPlanTest.cpp | 48 +++++++----
.../Vectorize/VPlanVerifierTest.cpp | 30 +++++--
26 files changed, 440 insertions(+), 205 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 88086f24dfdce2..1ce7c9e868c7e0 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2969,10 +2969,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
IVEndValues[Entry.first], LoopMiddleBlock, Plan, State);
}
- // Fix live-out phis not already fixed earlier.
- for (const auto &KV : Plan.getLiveOuts())
- KV.second->fixPhi(Plan, State);
-
for (Instruction *PI : PredicatedInstructions)
sinkScalarOperands(&*PI);
@@ -8885,21 +8881,9 @@ static void addLiveOutsForFirstOrderRecurrences(
// Start by finding out if middle block branches to scalar preheader, which is
// not a VPIRBasicBlock, unlike Exit block - the other possible successor of
// middle block.
- // TODO: Should be replaced by
- // Plan->getScalarLoopRegion()->getSinglePredecessor() in the future once the
- // scalar region is modeled as well.
- auto *MiddleVPBB = cast<VPBasicBlock>(VectorRegion->getSingleSuccessor());
- VPBasicBlock *ScalarPHVPBB = nullptr;
- if (MiddleVPBB->getNumSuccessors() == 2) {
- // Order is strict: first is the exit block, second is the scalar preheader.
- ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[1]);
- } else if (ExitUsersToFix.empty()) {
- ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor());
- } else {
- llvm_unreachable("unsupported CFG in VPlan");
- }
-
+ VPBasicBlock *ScalarPHVPBB = Plan.getScalarPreheader();
VPBuilder ScalarPHBuilder(ScalarPHVPBB);
+ auto *MiddleVPBB = cast<VPBasicBlock>(VectorRegion->getSingleSuccessor());
VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
VPValue *OneVPV = Plan.getOrAddLiveIn(
ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 1));
@@ -8986,7 +8970,14 @@ static void addLiveOutsForFirstOrderRecurrences(
VPInstruction::ResumePhi, {Resume, FOR->getStartValue()}, {},
"scalar.recur.init");
auto *FORPhi = cast<PHINode>(FOR->getUnderlyingInstr());
- Plan.addLiveOut(FORPhi, ResumePhiRecipe);
+ for (VPRecipeBase &R :
+ *cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor())) {
+ auto *IRI = cast<VPIRInstruction>(&R);
+ if (&IRI->getInstruction() == FORPhi) {
+ IRI->addOperand(ResumePhiRecipe);
+ break;
+ }
+ }
// Now update VPIRInstructions modeling LCSSA phis in the exit block.
// Extract the penultimate value of the recurrence and use it as operand for
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 0484543d2d0398..9bb72c8f3a0b5d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -456,10 +456,17 @@ void VPIRBasicBlock::execute(VPTransformState *State) {
State->Builder.SetInsertPoint(IRBB->getTerminator());
executeRecipes(State, IRBB);
if (getSingleSuccessor()) {
- assert(isa<UnreachableInst>(IRBB->getTerminator()));
- auto *Br = State->Builder.CreateBr(IRBB);
- Br->setOperand(0, nullptr);
- IRBB->getTerminator()->eraseFromParent();
+ auto *SuccVPIRBB = dyn_cast<VPIRBasicBlock>(getSingleSuccessor());
+ if (SuccVPIRBB && SuccVPIRBB->getIRBasicBlock() ==
+ getIRBasicBlock()->getSingleSuccessor()) {
+ cast<BranchInst>(getIRBasicBlock()->getTerminator())
+ ->setOperand(0, nullptr);
+ } else {
+ assert(isa<UnreachableInst>(getIRBasicBlock()->getTerminator()));
+ auto *Br = State->Builder.CreateBr(getIRBasicBlock());
+ Br->setOperand(0, nullptr);
+ getIRBasicBlock()->getTerminator()->eraseFromParent();
+ }
}
for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
@@ -663,14 +670,16 @@ void VPBasicBlock::print(raw_ostream &O, const Twine &Indent,
}
#endif
-static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry);
+static std::tuple<VPBlockBase *, VPBlockBase *, VPIRBasicBlock *>
+cloneFrom(VPBlockBase *Entry, VPIRBasicBlock *ScalarHeader = nullptr);
// Clone the CFG for all nodes reachable from \p Entry, this includes cloning
// the blocks and their recipes. Operands of cloned recipes will NOT be updated.
// Remapping of operands must be done separately. Returns a pair with the new
// entry and exiting blocks of the cloned region. If \p Entry isn't part of a
// region, return nullptr for the exiting block.
-static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry) {
+static std::tuple<VPBlockBase *, VPBlockBase *, VPIRBasicBlock *>
+cloneFrom(VPBlockBase *Entry, VPIRBasicBlock *ScalarHeader) {
DenseMap<VPBlockBase *, VPBlockBase *> Old2NewVPBlocks;
VPBlockBase *Exiting = nullptr;
bool InRegion = Entry->getParent();
@@ -716,12 +725,14 @@ static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry) {
}
#endif
- return std::make_pair(Old2NewVPBlocks[Entry],
- Exiting ? Old2NewVPBlocks[Exiting] : nullptr);
+ return std::tuple(
+ Old2NewVPBlocks[Entry], Exiting ? Old2NewVPBlocks[Exiting] : nullptr,
+ ScalarHeader ? cast<VPIRBasicBlock>(Old2NewVPBlocks[ScalarHeader])
+ : nullptr);
}
VPRegionBlock *VPRegionBlock::clone() {
- const auto &[NewEntry, NewExiting] = cloneFrom(getEntry());
+ const auto &[NewEntry, NewExiting, _] = cloneFrom(getEntry());
auto *NewRegion =
new VPRegionBlock(NewEntry, NewExiting, getName(), isReplicator());
for (VPBlockBase *Block : vp_depth_first_shallow(NewEntry))
@@ -843,10 +854,6 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
#endif
VPlan::~VPlan() {
- for (auto &KV : LiveOuts)
- delete KV.second;
- LiveOuts.clear();
-
if (Entry) {
VPValue DummyValue;
for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
@@ -878,7 +885,9 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
VPIRBasicBlock *Entry =
VPIRBasicBlock::fromBasicBlock(TheLoop->getLoopPreheader());
VPBasicBlock *VecPreheader = new VPBasicBlock("vector.ph");
- auto Plan = std::make_unique<VPlan>(Entry, VecPreheader);
+ VPIRBasicBlock *ScalarHeader =
+ VPIRBasicBlock::fromBasicBlock(TheLoop->getHeader());
+ auto Plan = std::make_unique<VPlan>(Entry, VecPreheader, ScalarHeader);
// Create SCEV and VPValue for the trip count.
@@ -909,6 +918,7 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion);
VPBasicBlock *ScalarPH = new VPBasicBlock("scalar.ph");
+ VPBlockUtils::connectBlocks(ScalarPH, ScalarHeader);
if (!RequiresScalarEpilogueCheck) {
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
return Plan;
@@ -1054,6 +1064,8 @@ void VPlan::execute(VPTransformState *State) {
BrInst->insertBefore(MiddleBB->getTerminator());
MiddleBB->getTerminator()->eraseFromParent();
State->CFG.DTU.applyUpdates({{DominatorTree::Delete, MiddleBB, ScalarPh}});
+ State->CFG.DTU.applyUpdates(
+ {{DominatorTree::Delete, ScalarPh, ScalarPh->getSingleSuccessor()}});
// Generate code in the loop pre-header and body.
for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
@@ -1172,12 +1184,6 @@ void VPlan::print(raw_ostream &O) const {
Block->print(O, "", SlotTracker);
}
- if (!LiveOuts.empty())
- O << "\n";
- for (const auto &KV : LiveOuts) {
- KV.second->print(O, SlotTracker);
- }
-
O << "}\n";
}
@@ -1214,11 +1220,6 @@ LLVM_DUMP_METHOD
void VPlan::dump() const { print(dbgs()); }
#endif
-void VPlan::addLiveOut(PHINode *PN, VPValue *V) {
- assert(LiveOuts.count(PN) == 0 && "an exit value for PN already exists");
- LiveOuts.insert({PN, new VPLiveOut(PN, V)});
-}
-
static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry,
DenseMap<VPValue *, VPValue *> &Old2NewVPValues) {
// Update the operands of all cloned recipes starting at NewEntry. This
@@ -1260,10 +1261,12 @@ static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry,
VPlan *VPlan::duplicate() {
// Clone blocks.
VPBasicBlock *NewPreheader = Preheader->clone();
- const auto &[NewEntry, __] = cloneFrom(Entry);
+ const auto &[NewEntry, __, NewScalarHeader] =
+ cloneFrom(Entry, getScalarHeader());
// Create VPlan, clone live-ins and remap operands in the cloned blocks.
- auto *NewPlan = new VPlan(NewPreheader, cast<VPBasicBlock>(NewEntry));
+ auto *NewPlan =
+ new VPlan(NewPreheader, cast<VPBasicBlock>(NewEntry), NewScalarHeader);
DenseMap<VPValue *, VPValue *> Old2NewVPValues;
for (VPValue *OldLiveIn : VPLiveInsToFree) {
Old2NewVPValues[OldLiveIn] =
@@ -1286,10 +1289,6 @@ VPlan *VPlan::duplicate() {
remapOperands(Preheader, NewPreheader, Old2NewVPValues);
remapOperands(Entry, NewEntry, Old2NewVPValues);
- // Clone live-outs.
- for (const auto &[_, LO] : LiveOuts)
- NewPlan->addLiveOut(LO->getPhi(), Old2NewVPValues[LO->getOperand(0)]);
-
// Initialize remaining fields of cloned VPlan.
NewPlan->VFs = VFs;
NewPlan->UFs = UFs;
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index a34e34a0d71f1e..04c0fd41e918a6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -675,48 +675,6 @@ class VPBlockBase {
virtual VPBlockBase *clone() = 0;
};
-/// A value that is used outside the VPlan. The operand of the user needs to be
-/// added to the associated phi node. The incoming block from VPlan is
-/// determined by where the VPValue is defined: if it is defined by a recipe
-/// outside a region, its parent block is used, otherwise the middle block is
-/// used.
-class VPLiveOut : public VPUser {
- PHINode *Phi;
-
-public:
- VPLiveOut(PHINode *Phi, VPValue *Op)
- : VPUser({Op}, VPUser::VPUserID::LiveOut), Phi(Phi) {}
-
- static inline bool classof(const VPUser *U) {
- return U->getVPUserID() == VPUser::VPUserID::LiveOut;
- }
-
- /// Fix the wrapped phi node. This means adding an incoming value to exit
- /// block phi's from the vector loop via middle block (values from scalar loop
- /// already reach these phi's), and updating the value to scalar header phi's
- /// from the scalar preheader.
- void fixPhi(VPlan &Plan, VPTransformState &State);
-
- /// Returns true if the VPLiveOut uses scalars of operand \p Op.
- bool usesScalars(const VPValue *Op) const override {
- assert(is_contained(operands(), Op) &&
- "Op must be an operand of the recipe");
- return true;
- }
-
- PHINode *getPhi() const { return Phi; }
-
- /// Live-outs are marked as only using the first part during the transition
- /// to unrolling directly on VPlan.
- /// TODO: Remove after unroller transition.
- bool onlyFirstPartUsed(const VPValue *Op) const override { return true; }
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- /// Print the VPLiveOut to \p O.
- void print(raw_ostream &O, VPSlotTracker &SlotTracker) const;
-#endif
-};
-
/// Struct to hold various analysis needed for cost computations.
struct VPCostContext {
const TargetTransformInfo &TTI;
@@ -3649,6 +3607,9 @@ class VPlan {
/// rest of VPlan execution.
VPBasicBlock *Preheader;
+ /// VPIRBasicBlock wrapping the header of the original scalar loop.
+ VPIRBasicBlock *ScalarHeader;
+
/// Holds the VFs applicable to this VPlan.
SmallSetVector<ElementCount, 2> VFs;
@@ -3684,11 +3645,6 @@ class VPlan {
/// definitions are VPValues that hold a pointer to their underlying IR.
SmallVector<VPValue *, 16> VPLiveInsToFree;
- /// Values used outside the plan. It contains live-outs that need fixing. Any
- /// live-out that is fixed outside VPlan needs to be removed. The remaining
- /// live-outs are fixed via VPLiveOut::fixPhi.
- MapVector<PHINode *, VPLiveOut *> LiveOuts;
-
/// Mapping from SCEVs to the VPValues representing their expansions.
/// NOTE: This mapping is temporary and will be removed once all users have
/// been modeled in VPlan directly.
@@ -3699,8 +3655,9 @@ class VPlan {
/// and \p Entry to the plan. At the moment, \p Preheader and \p Entry need to
/// be disconnected, as the bypass blocks between them are not yet modeled in
/// VPlan.
- VPlan(VPBasicBlock *Preheader, VPValue *TC, VPBasicBlock *Entry)
- : VPlan(Preheader, Entry) {
+ VPlan(VPBasicBlock *Preheader, VPValue *TC, VPBasicBlock *Entry,
+ VPIRBasicBlock *ScalarHeader)
+ : VPlan(Preheader, Entry, ScalarHeader) {
TripCount = TC;
}
@@ -3708,13 +3665,16 @@ class VPlan {
/// the plan. At the moment, \p Preheader and \p Entry need to be
/// disconnected, as the bypass blocks between them are not yet modeled in
/// VPlan.
- VPlan(VPBasicBlock *Preheader, VPBasicBlock *Entry)
- : Entry(Entry), Preheader(Preheader) {
+ VPlan(VPBasicBlock *Preheader, VPBasicBlock *Entry,
+ VPIRBasicBlock *ScalarHeader)
+ : Entry(Entry), Preheader(Preheader), ScalarHeader(ScalarHeader) {
Entry->setPlan(this);
Preheader->setPlan(this);
assert(Preheader->getNumSuccessors() == 0 &&
Preheader->getNumPredecessors() == 0 &&
"preheader must be disconnected");
+ assert(ScalarHeader->getNumSuccessors() == 0 &&
+ "scalar header must be a leaf node");
}
~VPlan();
@@ -3746,6 +3706,14 @@ class VPlan {
VPBasicBlock *getEntry() { return Entry; }
const VPBasicBlock *getEntry() const { return Entry; }
+ /// Return the VPIRBasicBlock wrapping the header of the scalar loop.
+ VPIRBasicBlock *getScalarHeader() { return ScalarHeader; }
+
+ /// Return the VPBasicBlock for the preheader of the scalar loop.
+ VPBasicBlock *getScalarPreheader() {
+ return cast<VPBasicBlock>(ScalarHeader->getSinglePredecessor());
+ }
+
/// The trip count of the original loop.
VPValue *getTripCount() const {
assert(TripCount && "trip count needs to be set before accessing it");
@@ -3873,12 +3841,6 @@ class VPlan {
return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
}
- void addLiveOut(PHINode *PN, VPValue *V);
-
- const MapVector<PHINode *, VPLiveOut *> &getLiveOuts() const {
- return LiveOuts;
- }
-
VPValue *getSCEVExpansion(const SCEV *S) const {
return SCEVToExpansion.lookup(S);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index b1e6086398c4df..0de9d1815e5d21 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -214,35 +214,6 @@ bool VPRecipeBase::mayHaveSideEffects() const {
}
}
-void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) {
- VPValue *ExitValue = getOperand(0);
- VPBasicBlock *MiddleVPBB =
- cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
- VPRecipeBase *ExitingRecipe = ExitValue->getDefiningRecipe();
- auto *ExitingVPBB = ExitingRecipe ? ExitingRecipe->getParent() : nullptr;
- // Values leaving the vector loop reach live out phi's in the exiting block
- // via middle block.
- auto *PredVPBB = !ExitingVPBB || ExitingVPBB->getEnclosingLoopRegion()
- ? MiddleVPBB
- : ExitingVPBB;
- BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB];
- Value *V = State.get(ExitValue, VPLane(0));
- if (Phi->getBasicBlockIndex(PredBB) != -1)
- Phi->setIncomingValueForBlock(PredBB, V);
- else
- Phi->addIncoming(V, PredBB);
-}
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void VPLiveOut::print(raw_ostream &O, VPSlotTracker &SlotTracker) const {
- O << "Live-out ";
- getPhi()->printAsOperand(O);
- O << " = ";
- getOperand(0)->printAsOperand(O, SlotTracker);
- O << "\n";
-}
-#endif
-
void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
assert(!Parent && "Recipe already in some VPBasicBlock");
assert(InsertPos->getParent() &&
@@ -873,7 +844,10 @@ void VPIRInstruction::execute(VPTransformState &State) {
State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt());
Value *V = State.get(ExitValue, VPLane(Lane));
auto *Phi = cast<PHINode>(&I);
- Phi->addIncoming(V, PredBB);
+ if (Phi->getBasicBlockIndex(PredBB) == -1)
+ Phi->addIncoming(V, PredBB);
+ else
+ Phi->setIncomingValueForBlock(PredBB, V);
}
// Advance the insert point after the wrapped IR instruction. This allows
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 03c4110761ac6a..58ce8b58f96e1f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -379,7 +379,7 @@ static bool mergeBlocksIntoPredecessors(VPlan &Plan) {
// Don't fold the exit block of the Plan into its single predecessor for
// now.
// TODO: Remove restriction once more of the skeleton is modeled in VPlan.
- if (VPBB->getNumSuccessors() == 0 && !VPBB->getParent())
+ if (!VPBB->getParent())
continue;
auto *PredVPBB =
dyn_cast_or_null<VPBasicBlock>(VPBB->getSinglePredecessor());
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index 1e32865e8ee576..144a296fabad81 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -264,6 +264,13 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
return;
if (auto *VPI = dyn_cast<VPInstruction>(&R)) {
+ VPValue *Op0, *Op1;
+ if (match(VPI, m_VPInstruction<VPInstruction::ResumePhi>(m_VPValue(Op0),
+ m_VPValue(Op1)))) {
+ addUniformForAllParts(VPI);
+ return;
+ }
+
if (vputils::onlyFirstPartUsed(VPI)) {
addUniformForAllParts(VPI);
return;
@@ -449,11 +456,5 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF, LLVMContext &Ctx) {
Part++;
}
- // Remap the operand of live-outs to the last part.
- for (const auto &[_, LO] : Plan.getLiveOuts()) {
- VPValue *In = Unroller.getValueForPart(LO->getOperand(0), UF - 1);
- LO->setOperand(0, In);
- }
-
VPlanTransforms::removeDeadRecipes(Plan);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
index 7ea5ee341cc547..3b7ba61454899d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
@@ -248,14 +248,6 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) {
return false;
}
- VPBlockBase *MiddleBB =
- IRBB->getPlan()->getVectorLoopRegion()->getSingleSuccessor();
- if (IRBB != IRBB->getPlan()->getPreheader() &&
- IRBB->getSinglePredecessor() != MiddleBB) {
- errs() << "VPIRBasicBlock can only be used as pre-header or a successor of "
- "middle-block at the moment!\n";
- return false;
- }
return true;
}
@@ -420,12 +412,6 @@ bool VPlanVerifier::verify(const VPlan &Plan) {
return false;
}
- for (const auto &KV : Plan.getLiveOuts())
- if (KV.second->getNumOperands() != 1) {
- errs() << "live outs must have a single operand\n";
- return false;
- }
-
return true;
}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-vplan.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-vplan.ll
index 9be068ce880ea8..6257d3325f9796 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-vplan.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-vplan.ll
@@ -45,6 +45,11 @@ target triple = "aarch64-unknown-linux-gnu"
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT: Successor(s): ir-bb<for.body>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<for.body>:
+; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+; CHECK: IR %exitcond = icmp eq i64 %iv.next, %N
; CHECK-NEXT: No successors
; CHECK-NEXT: }
@@ -82,6 +87,11 @@ target triple = "aarch64-unknown-linux-gnu"
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT: Successor(s): ir-bb<for.body>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<for.body>:
+; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+; CHECK: IR %exitcond = icmp eq i64 %iv.next, %N
; CHECK-NEXT: No successors
; CHECK-NEXT: }
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll
index 74fd76df992597..994f2f5e377632 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll
@@ -43,6 +43,11 @@ target triple = "aarch64-unknown-linux-gnu"
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT: Successor(s): ir-bb<for.body>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<for.body>:
+; CHECK-NEXT: IR %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+; CHECK: IR %exitcond = icmp eq i64 %indvars.iv.next, 1024
; CHECK-NEXT: No successors
; CHECK-NEXT: }
@@ -80,6 +85,11 @@ target triple = "aarch64-unknown-linux-gnu"
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT: Successor(s): ir-bb<for.body>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<for.body>:
+; CHECK-NEXT: IR %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+; CHECK: IR %exitcond = icmp eq i64 %indvars.iv.next, 1024
; CHECK-NEXT: No successors
; CHECK-NEXT: }
@@ -122,6 +132,11 @@ target triple = "aarch64-unknown-linux-gnu"
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT: Successor(s): ir-bb<for.body>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<for.body>:
+; CHECK-NEXT: IR %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+; CHECK: IR %exitcond = icmp eq i64 %indvars.iv.next, 1024
; CHECK-NEXT: No successors
; CHECK-NEXT: }
@@ -159,6 +174,11 @@ target triple = "aarch64-unknown-linux-gnu"
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT: Successor(s): ir-bb<for.body>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<for.body>:
+; CHECK-NEXT: IR %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+; CHECK: IR %exitcond = icmp eq i64 %indvars.iv.next, 1024
; CHECK-NEXT: No successors
; CHECK-NEXT: }
@@ -200,6 +220,11 @@ target triple = "aarch64-unknown-linux-gnu"
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT: Successor(s): ir-bb<for.body>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<for.body>:
+; CHECK-NEXT: IR %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+; CHECK: IR %exitcond = icmp eq i64 %indvars.iv.next, 1024
; CHECK-NEXT: No successors
; CHECK-NEXT: }
@@ -237,6 +262,11 @@ target triple = "aarch64-unknown-linux-gnu"
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT: Successor(s): ir-bb<for.body>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<for.body>:
+; CHECK-NEXT: IR %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+; CHECK: IR %exitcond = icmp eq i64 %indvars.iv.next, 1024
; CHECK-NEXT: No successors
; CHECK-NEXT: }
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll b/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll
index e9303ec9d3eb76..afc2fd5a049ad1 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll
@@ -41,6 +41,11 @@ target triple = "arm64-apple-ios"
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT: Successor(s): ir-bb<loop>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<loop>:
+; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK: IR %cmp = icmp ne i64 %iv.next, 1024
; CHECK-NEXT: No successors
; CHECK-NEXT: }
@@ -78,6 +83,11 @@ target triple = "arm64-apple-ios"
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT: Successor(s): ir-bb<loop>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<loop>:
+; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK: IR %cmp = icmp ne i64 %iv.next, 1024
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
index c7bb1ffab23e79..a38835f5613fd8 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
@@ -96,6 +96,12 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT: Successor(s): ir-bb<for.body>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<for.body>:
+; CHECK-NEXT: IR %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+; CHECK-NEXT: IR %i.0.in8 = phi i32 [ %n, %for.body.preheader ], [ %i.0, %for.body ]
+; CHECK: IR %indvars.iv.next = add nsw i64 %indvars.iv, -1
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
@@ -181,6 +187,12 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT: Successor(s): ir-bb<for.body>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<for.body>:
+; CHECK-NEXT: IR %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+; CHECK-NEXT: IR %i.0.in8 = phi i32 [ %n, %for.body.preheader ], [ %i.0, %for.body ]
+; CHECK: IR %indvars.iv.next = add nsw i64 %indvars.iv, -1
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: LV: Loop does not require scalar epilogue
@@ -303,6 +315,12 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT: Successor(s): ir-bb<for.body>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<for.body>:
+; CHECK-NEXT: IR %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+; CHECK-NEXT: IR %i.0.in8 = phi i32 [ %n, %for.body.preheader ], [ %i.0, %for.body ]
+; CHECK: IR %indvars.iv.next = add nsw i64 %indvars.iv, -1
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
@@ -388,6 +406,12 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT: Successor(s): ir-bb<for.body>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<for.body>:
+; CHECK-NEXT: IR %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+; CHECK-NEXT: IR %i.0.in8 = phi i32 [ %n, %for.body.preheader ], [ %i.0, %for.body ]
+; CHECK: IR %indvars.iv.next = add nsw i64 %indvars.iv, -1
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: LV: Loop does not require scalar epilogue
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll
index 1326751a847d7d..5b400acea9ef28 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll
@@ -65,6 +65,12 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
; IF-EVL-INLOOP-NEXT: No successors
; IF-EVL-INLOOP-EMPTY:
; IF-EVL-INLOOP-NEXT: scalar.ph:
+; IF-EVL-INLOOP-NEXT: Successor(s): ir-bb<for.body>
+; IF-EVL-INLOOP-EMPTY:
+; IF-EVL-INLOOP-NEXT: ir-bb<for.body>:
+; IF-EVL-INLOOP-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+; IF-EVL-INLOOP-NEXT: IR %rdx = phi i32 [ %start, %entry ], [ %add, %for.body ]
+; IF-EVL-INLOOP: IR %exitcond.not = icmp eq i64 %iv.next, %n
; IF-EVL-INLOOP-NEXT: No successors
; IF-EVL-INLOOP-NEXT: }
;
@@ -104,6 +110,12 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
; NO-VP-OUTLOOP-NEXT: No successors
; NO-VP-OUTLOOP-EMPTY:
; NO-VP-OUTLOOP-NEXT: scalar.ph:
+; NO-VP-OUTLOOP-NEXT: Successor(s): ir-bb<for.body>
+; NO-VP-OUTLOOP-EMPTY:
+; NO-VP-OUTLOOP-NEXT: ir-bb<for.body>:
+; NO-VP-OUTLOOP-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+; NO-VP-OUTLOOP-NEXT: IR %rdx = phi i32 [ %start, %entry ], [ %add, %for.body ]
+; NO-VP-OUTLOOP: IR %exitcond.not = icmp eq i64 %iv.next, %n
; NO-VP-OUTLOOP-NEXT: No successors
; NO-VP-OUTLOOP-NEXT: }
;
@@ -143,6 +155,12 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
; NO-VP-INLOOP-NEXT: No successors
; NO-VP-INLOOP-EMPTY:
; NO-VP-INLOOP-NEXT: scalar.ph:
+; NO-VP-INLOOP-NEXT: Successor(s): ir-bb<for.body>
+; NO-VP-INLOOP-EMPTY:
+; NO-VP-INLOOP-NEXT: ir-bb<for.body>:
+; NO-VP-INLOOP-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+; NO-VP-INLOOP-NEXT: IR %rdx = phi i32 [ %start, %entry ], [ %add, %for.body ]
+; NO-VP-INLOOP: IR %exitcond.not = icmp eq i64 %iv.next, %n
; NO-VP-INLOOP-NEXT: No successors
; NO-VP-INLOOP-NEXT: }
;
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll
index fe48008792ff72..8eff9df9671db8 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll
@@ -45,10 +45,14 @@ define void @test_chained_first_order_recurrences_1(ptr %ptr) {
; CHECK-NEXT: scalar.ph
; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<22>
; CHECK-NEXT: EMIT vp<[[RESUME_2_P:%.*]]>.1 = resume-phi vp<[[RESUME_2]]>.1, ir<33>
-; CHECK-NEXT: No successors
+; CHECK-NEXT: Successor(s): ir-bb<loop>
; CHECK-EMPTY:
-; CHECK-NEXT: Live-out i16 %for.1 = vp<[[RESUME_1_P]]>
-; CHECK-NEXT: Live-out i16 %for.2 = vp<[[RESUME_2_P]]>.1
+; CHECK-NEXT: ir-bb<loop>:
+; CHECK-NEXT: IR %for.1 = phi i16 [ 22, %entry ], [ %for.1.next, %loop ] (extra operand: vp<[[RESUME_1_P]]>)
+; CHECK-NEXT: IR %for.2 = phi i16 [ 33, %entry ], [ %for.1, %loop ] (extra operand: vp<[[RESUME_2_P]]>.1)
+; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK: IR %exitcond.not = icmp eq i64 %iv.next, 1000
+; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
entry:
@@ -118,11 +122,15 @@ define void @test_chained_first_order_recurrences_3(ptr %ptr) {
; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<22>
; CHECK-NEXT: EMIT vp<[[RESUME_2_P:%.*]]>.1 = resume-phi vp<[[RESUME_2]]>.1, ir<33>
; CHECK-NEXT: EMIT vp<[[RESUME_3_P:%.*]]>.2 = resume-phi vp<[[RESUME_3]]>.2, ir<33>
-; CHECK-NEXT: No successors
+; CHECK-NEXT: Successor(s): ir-bb<loop>
; CHECK-EMPTY:
-; CHECK-NEXT: Live-out i16 %for.1 = vp<[[RESUME_1_P]]>
-; CHECK-NEXT: Live-out i16 %for.2 = vp<[[RESUME_2_P]]>.1
-; CHECK-NEXT: Live-out i16 %for.3 = vp<[[RESUME_3_P]]>.2
+; CHECK-NEXT: ir-bb<loop>:
+; CHECK-NEXT: IR %for.1 = phi i16 [ 22, %entry ], [ %for.1.next, %loop ] (extra operand: vp<[[RESUME_1_P]]>)
+; CHECK-NEXT: IR %for.2 = phi i16 [ 33, %entry ], [ %for.1, %loop ] (extra operand: vp<[[RESUME_2_P]]>.1)
+; CHECK-NEXT: IR %for.3 = phi i16 [ 33, %entry ], [ %for.2, %loop ] (extra operand: vp<[[RESUME_3_P]]>.2)
+; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK: IR %exitcond.not = icmp eq i64 %iv.next, 1000
+; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
entry:
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll
index 8e56614a2e3d5c..4cc8fec60be03f 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll
@@ -82,9 +82,13 @@ define void @sink_replicate_region_1(i32 %x, ptr %ptr, ptr noalias %dst) optsize
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0>
-; CHECK-NEXT: No successors
+; CHECK-NEXT: Successor(s): ir-bb<loop>
; CHECK-EMPTY:
-; CHECK-NEXT: Live-out i32 %0 = vp<[[RESUME_1_P]]>
+; CHECK-NEXT: ir-bb<loop>:
+; CHECK-NEXT: IR %0 = phi i32 [ 0, %entry ], [ %conv, %loop ] (extra operand: vp<[[RESUME_1_P]]>)
+; CHECK-NEXT: IR %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK: IR %ec = icmp eq i32 %iv.next, 20001
+; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
entry:
@@ -165,9 +169,13 @@ define void @sink_replicate_region_2(i32 %x, i8 %y, ptr %ptr) optsize {
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0>
-; CHECK-NEXT: No successors
+; CHECK-NEXT: Successor(s): ir-bb<loop>
; CHECK-EMPTY:
-; CHECK-NEXT: Live-out i32 %recur = vp<[[RESUME_1_P]]>
+; CHECK-NEXT: ir-bb<loop>:
+; CHECK-NEXT: IR %recur = phi i32 [ 0, %entry ], [ %recur.next, %loop ] (extra operand: vp<[[RESUME_1_P]]>)
+; CHECK-NEXT: IR %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK: IR %ec = icmp eq i32 %iv.next, 20001
+; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
entry:
@@ -232,9 +240,14 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, ptr %ptr) optsize {
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0>
-; CHECK-NEXT: No successors
+; CHECK-NEXT: Successor(s): ir-bb<loop>
; CHECK-EMPTY:
-; CHECK-NEXT: Live-out i32 %recur = vp<[[RESUME_1_P]]>
+; CHECK-NEXT: ir-bb<loop>:
+; CHECK-NEXT: IR %recur = phi i32 [ 0, %entry ], [ %recur.next, %loop ] (extra operand: vp<[[RESUME_1_P]]>)
+; CHECK-NEXT: IR %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK-NEXT: IR %and.red = phi i32 [ 1234, %entry ], [ %and.red.next, %loop ]
+; CHECK: IR %ec = icmp eq i32 %iv.next, 20001
+; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
entry:
@@ -338,9 +351,13 @@ define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, ptr
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0>
-; CHECK-NEXT: No successors
+; CHECK-NEXT: Successor(s): ir-bb<loop>
; CHECK-EMPTY:
-; CHECK-NEXT: Live-out i32 %0 = vp<[[RESUME_1_P]]>
+; CHECK-NEXT: ir-bb<loop>:
+; CHECK-NEXT: IR %0 = phi i32 [ 0, %entry ], [ %conv, %loop ] (extra operand: vp<[[RESUME_1_P]]>)
+; CHECK-NEXT: IR %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK: IR %ec = icmp eq i32 %iv.next, 20001
+; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
entry:
@@ -431,9 +448,13 @@ define void @sink_replicate_region_after_replicate_region(ptr %ptr, ptr noalias
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0>
-; CHECK-NEXT: No successors
+; CHECK-NEXT: Successor(s): ir-bb<loop>
; CHECK-EMPTY:
-; CHECK-NEXT: Live-out i32 %recur = vp<[[RESUME_1_P]]>
+; CHECK-NEXT: ir-bb<loop>:
+; CHECK-NEXT: IR %recur = phi i32 [ 0, %entry ], [ %recur.next, %loop ] (extra operand: vp<[[RESUME_1_P]]>)
+; CHECK-NEXT: IR %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK: IR %C = icmp sgt i32 %iv.next, %recur.next
+; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
entry:
@@ -513,9 +534,13 @@ define void @need_new_block_after_sinking_pr56146(i32 %x, ptr %src, ptr noalias
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0>
-; CHECK-NEXT: No successors
+; CHECK-NEXT: Successor(s): ir-bb<loop>
; CHECK-EMPTY:
-; CHECK-NEXT: Live-out i32 %.pn = vp<[[RESUME_1_P]]>
+; CHECK-NEXT: ir-bb<loop>:
+; CHECK-NEXT: IR %iv = phi i64 [ 2, %entry ], [ %iv.next, %loop ]
+; CHECK-NEXT: IR %.pn = phi i32 [ 0, %entry ], [ %l, %loop ] (extra operand: vp<[[RESUME_1_P]]>)
+; CHECK: IR %ec = icmp ugt i64 %iv, 3
+; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
entry:
diff --git a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
index b7f8ddbfa5d7c1..a71666d8c3167a 100644
--- a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
@@ -116,6 +116,12 @@ declare i32 @llvm.smin.i32(i32, i32)
; DBG-NEXT: No successors
; DBG-EMPTY:
; DBG-NEXT: scalar.ph:
+; DBG-NEXT: Successor(s): ir-bb<loop.header>
+; DBG-EMPTY:
+; DBG-NEXT: ir-bb<loop.header>:
+; DBG-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+; DBG-NEXT: IR %d = phi i1 [ false, %entry ], [ %d.next, %loop.latch ]
+; DBG-NEXT: IR %d.next = xor i1 %d, true
; DBG-NEXT: No successors
; DBG-NEXT: }
@@ -217,9 +223,13 @@ exit:
; DBG-EMPTY:
; DBG-NEXT: scalar.ph:
; DBG-NEXT: EMIT vp<[[RESUME_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0>
-; DBG-NEXT: No successors
+; DBG-NEXT: Successor(s): ir-bb<loop>
; DBG-EMPTY:
-; DBG-NEXT: Live-out i32 %for = vp<[[RESUME_P]]>
+; DBG-NEXT: ir-bb<loop>:
+; DBG-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+; DBG-NEXT: IR %for = phi i32 [ 0, %entry ], [ %iv.trunc, %loop ] (extra operand: vp<[[RESUME_P]]>)
+; DBG: IR %ec = icmp slt i32 %iv.next.trunc, %n
+; DBG-NEXT: No successors
; DBG-NEXT: }
define void @first_order_recurrence_using_induction(i32 %n, ptr %dst) {
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll b/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll
index cab784b61c5449..1f815899ed55c2 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll
@@ -35,6 +35,11 @@ define void @iv_no_binary_op_in_descriptor(i1 %c, ptr %dst) {
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT: Successor(s): ir-bb<loop.header>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<loop.header>:
+; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next.p, %loop.latch ]
+; CHECK: IR %iv.next = add i64 %iv, 1
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll
index a1e28999a40026..53f5a5658fb68d 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll
@@ -91,6 +91,11 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT: Successor(s): ir-bb<loop.header>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<loop.header>:
+; CHECK-NEXT: IR %ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
+; CHECK-NEXT: IR %l = load i8, ptr %ptr.iv, align 1
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll
index 81c56f75900793..c9612ced3eee01 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll
@@ -46,6 +46,12 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) {
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT: Successor(s): ir-bb<loop>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<loop>:
+; CHECK-NEXT: IR %iv = phi i64 [ %and, %entry ], [ %iv.next, %loop ]
+; CHECK-NEXT: IR %p.src = phi ptr [ %A, %entry ], [ %p.src.next, %loop ]
+; CHECK: IR %cmp = icmp eq i64 %iv.next, 0
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
@@ -93,6 +99,12 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) {
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT: Successor(s): ir-bb<loop>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<loop>:
+; CHECK-NEXT: IR %iv = phi i64 [ %and, %entry ], [ %iv.next, %loop ]
+; CHECK-NEXT: IR %p.src = phi ptr [ %A, %entry ], [ %p.src.next, %loop ]
+; CHECK: IR %cmp = icmp eq i64 %iv.next, 0
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
index 8c7a4e57f9d358..50d406d0c04164 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
@@ -51,6 +51,13 @@ define void @foo(i64 %n) {
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT: Successor(s): ir-bb<outer.header>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<outer.header>:
+; CHECK-NEXT: IR %outer.iv = phi i64 [ 0, %entry ], [ %outer.iv.next, %outer.latch ]
+; CHECK-NEXT: IR %gep.1 = getelementptr inbounds [8 x i64], ptr @arr2, i64 0, i64 %outer.iv
+; CHECK-NEXT: IR store i64 %outer.iv, ptr %gep.1, align 4
+; CHECK-NEXT: IR %add = add nsw i64 %outer.iv, %n
; CHECK-NEXT: No successors
; CHECK-NEXT: }
entry:
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
index 0dde507d08be74..fe9d2aff3d05ca 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
@@ -42,6 +42,11 @@ define void @print_call_and_memory(i64 %n, ptr noalias %y, ptr noalias %x) nounw
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
+; CHECK-NEXT: Successor(s): ir-bb<for.body>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<for.body>:
+; CHECK-NEXT: IR %iv = phi i64 [ %iv.next, %for.body ], [ 0, %for.body.preheader ]
+; CHECK: IR %exitcond = icmp eq i64 %iv.next, %n
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
@@ -104,6 +109,11 @@ define void @print_widen_gep_and_select(i64 %n, ptr noalias %y, ptr noalias %x,
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
+; CHECK-NEXT: Successor(s): ir-bb<for.body>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<for.body>:
+; CHECK-NEXT: IR %iv = phi i64 [ %iv.next, %for.body ], [ 0, %for.body.preheader ]
+; CHECK: IR %exitcond = icmp eq i64 %iv.next, %n
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
@@ -165,6 +175,11 @@ define float @print_reduction(i64 %n, ptr noalias %y) {
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
+; CHECK-NEXT: Successor(s): ir-bb<for.body>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<for.body>:
+; CHECK-NEXT: IR %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
+; CHECK: IR %exitcond = icmp eq i64 %iv.next, %n
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
@@ -221,6 +236,12 @@ define void @print_reduction_with_invariant_store(i64 %n, ptr noalias %y, ptr no
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
+; CHECK-NEXT: Successor(s): ir-bb<for.body>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<for.body>:
+; CHECK-NEXT: IR %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
+; CHECK-NEXT: IR %red = phi float [ %red.next, %for.body ], [ 0.000000e+00, %entry ]
+; CHECK: IR %exitcond = icmp eq i64 %iv.next, %n
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
@@ -300,6 +321,11 @@ define void @print_replicate_predicated_phi(i64 %n, ptr %x) {
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
+; CHECK-NEXT: Successor(s): ir-bb<for.body>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<for.body>:
+; CHECK-NEXT: IR %i = phi i64 [ 0, %entry ], [ %i.next, %for.inc ]
+; CHECK-NEXT: IR %cmp = icmp ult i64 %i, 5
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
@@ -372,6 +398,11 @@ define void @print_interleave_groups(i32 %C, i32 %D) {
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
+; CHECK-NEXT: Successor(s): ir-bb<for.body>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<for.body>:
+; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+; CHECK: IR %cmp = icmp slt i64 %iv.next, 1024
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
@@ -447,6 +478,12 @@ define float @print_fmuladd_strict(ptr %a, ptr %b, i64 %n) {
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
+; CHECK-NEXT: Successor(s): ir-bb<for.body>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<for.body>:
+; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+; CHECK-NEXT: IR %sum.07 = phi float [ 0.000000e+00, %entry ], [ %muladd, %for.body ]
+; CHECK: IR %exitcond.not = icmp eq i64 %iv.next, %n
; CHECK-NEXT: No successors
; CHECK-NEXT:}
@@ -529,7 +566,12 @@ define void @debug_loc_vpinstruction(ptr nocapture %asd, ptr nocapture %bsd) !db
; CHECK-NEXT: ir-bb<exit>
; CHECK-NEXT: No successors
; CHECK-EMPTY:
-; CHECK-NEXT: scalar.ph
+; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT: Successor(s): ir-bb<loop>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<loop>:
+; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %if.end ]
+; CHECK: IR %cmp1 = icmp slt i32 %lsd, 100
; CHECK-NEXT: No successors
; CHECK-NEXT:}
;
@@ -610,6 +652,11 @@ define void @print_expand_scev(i64 %y, ptr %ptr) {
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
+; CHECK-NEXT: Successor(s): ir-bb<loop>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<loop>:
+; CHECK-NEXT: IR %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
+; CHECK: IR %iv.next = add i64 %iv, %inc
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
@@ -670,6 +717,11 @@ define i32 @print_exit_value(ptr %ptr, i32 %off) {
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
+; CHECK-NEXT: Successor(s): ir-bb<loop>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<loop>:
+; CHECK-NEXT: IR %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK: IR %ec = icmp eq i32 %iv.next, 1000
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
@@ -728,6 +780,11 @@ define void @print_fast_math_flags(i64 %n, ptr noalias %y, ptr noalias %x, ptr %
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
+; CHECK-NEXT: Successor(s): ir-bb<loop>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<loop>:
+; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK: IR %exitcond = icmp eq i64 %iv.next, %n
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
@@ -788,6 +845,11 @@ define void @print_exact_flags(i64 %n, ptr noalias %x) {
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
+; CHECK-NEXT: Successor(s): ir-bb<loop>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<loop>:
+; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK: IR %exitcond = icmp eq i64 %iv.next, %n
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
@@ -868,6 +930,11 @@ define void @print_call_flags(ptr readonly %src, ptr noalias %dest, i64 %n) {
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
+; CHECK-NEXT: Successor(s): ir-bb<for.body>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<for.body>:
+; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.loop ]
+; CHECK: IR %ifcond = fcmp oeq float %ld.value, 5.0
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
@@ -937,6 +1004,11 @@ define void @print_disjoint_flags(i64 %n, ptr noalias %x) {
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
+; CHECK-NEXT: Successor(s): ir-bb<loop>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<loop>:
+; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK: IR %exitcond = icmp eq i64 %iv.next, %n
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
@@ -1041,9 +1113,13 @@ define i16 @print_first_order_recurrence_and_result(ptr %ptr) {
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
; CHECK-NEXT: EMIT vp<[[RESUME_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<22>
-; CHECK-NEXT: No successors
+; CHECK-NEXT: Successor(s): ir-bb<loop>
; CHECK-EMPTY:
-; CHECK-NEXT: Live-out i16 %for.1 = vp<[[RESUME_P]]>
+; CHECK-NEXT: ir-bb<loop>:
+; CHECK-NEXT: IR %for.1 = phi i16 [ 22, %entry ], [ %for.1.next, %loop ] (extra operand: vp<[[RESUME_P]]>)
+; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK: IR %exitcond.not = icmp eq i64 %iv.next, 1000
+; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
entry:
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll
index f846ba0166b2cb..cdeffeff84d03a 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll
@@ -53,6 +53,12 @@ define void @sink_with_sideeffects(i1 %c, ptr %ptr) {
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT: Successor(s): ir-bb<for.body>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<for.body>:
+; CHECK-NEXT: IR %tmp0 = phi i64 [ %tmp6, %for.inc ], [ 0, %entry ]
+; CHECK-NEXT: IR %tmp1 = phi i64 [ %tmp7, %for.inc ], [ 0, %entry ]
+; CHECK: IR %tmp5 = trunc i32 %tmp4 to i8
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll
index 446b720ad1ba49..88e7aaccfe2f35 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll
@@ -1077,6 +1077,17 @@ define void @merge_with_dead_gep_between_regions(i32 %n, ptr noalias %src, ptr n
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
+; CHECK-NEXT: Successor(s): ir-bb<loop>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<loop>:
+; CHECK-NEXT: IR %iv = phi i32 [ %n, %entry ], [ %iv.next, %loop ]
+; CHECK-NEXT: IR %iv.next = add nsw i32 %iv, -1
+; CHECK-NEXT: IR %gep.src = getelementptr inbounds i32, ptr %src, i32 %iv
+; CHECK-NEXT: IR %l = load i32, ptr %gep.src, align 16
+; CHECK-NEXT: IR %dead_gep = getelementptr inbounds i32, ptr %dst, i64 1
+; CHECK-NEXT: IR %gep.dst = getelementptr inbounds i32, ptr %dst, i32 %iv
+; CHECK-NEXT: IR store i32 %l, ptr %gep.dst, align 16
+; CHECK-NEXT: IR %ec = icmp eq i32 %iv.next, 0
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
@@ -1157,6 +1168,13 @@ define void @ptr_induction_remove_dead_recipe(ptr %start, ptr %end) {
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT: Successor(s): ir-bb<loop.header>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<loop.header>:
+; CHECK-NEXT: IR %ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
+; CHECK-NEXT: IR %ptr.iv.next = getelementptr inbounds i8, ptr %ptr.iv, i64 -1
+; CHECK-NEXT: IR %l = load i8, ptr %ptr.iv.next, align 1
+; CHECK-NEXT: IR %c.1 = icmp eq i8 %l, 0
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
diff --git a/llvm/unittests/Transforms/Vectorize/VPDomTreeTest.cpp b/llvm/unittests/Transforms/Vectorize/VPDomTreeTest.cpp
index bb064b5f63b724..37c505e8b37904 100644
--- a/llvm/unittests/Transforms/Vectorize/VPDomTreeTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPDomTreeTest.cpp
@@ -41,7 +41,11 @@ TEST(VPDominatorTreeTest, DominanceNoRegionsTest) {
VPBlockUtils::connectBlocks(VPBB3, VPBB4);
auto TC = std::make_unique<VPValue>();
- VPlan Plan(VPPH, &*TC, VPBB0);
+ LLVMContext C;
+ auto ScalarHeader = std::make_unique<BasicBlock *>(BasicBlock::Create(C, ""));
+ VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader);
+ VPlan Plan(VPPH, &*TC, VPBB0, ScalarHeaderVPBB);
+
VPDominatorTree VPDT;
VPDT.recalculate(Plan);
@@ -71,6 +75,8 @@ checkDomChildren(VPDominatorTree &VPDT, VPBlockBase *Src,
}
TEST(VPDominatorTreeTest, DominanceRegionsTest) {
+ LLVMContext C;
+ auto ScalarHeader = std::make_unique<BasicBlock *>(BasicBlock::Create(C, ""));
{
// 2 consecutive regions.
// VPBB0
@@ -115,7 +121,8 @@ TEST(VPDominatorTreeTest, DominanceRegionsTest) {
VPBlockUtils::connectBlocks(R1, R2);
auto TC = std::make_unique<VPValue>();
- VPlan Plan(VPPH, &*TC, VPBB0);
+ VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader);
+ VPlan Plan(VPPH, &*TC, VPBB0, ScalarHeaderVPBB);
VPDominatorTree VPDT;
VPDT.recalculate(Plan);
@@ -195,7 +202,8 @@ TEST(VPDominatorTreeTest, DominanceRegionsTest) {
VPBlockUtils::connectBlocks(R1, VPBB2);
auto TC = std::make_unique<VPValue>();
- VPlan Plan(VPPH, &*TC, VPBB1);
+ VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader);
+ VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
VPDominatorTree VPDT;
VPDT.recalculate(Plan);
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp
index 01d630124a4bbd..93277eed8be129 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp
@@ -145,6 +145,18 @@ compound=true
]
N6 [label =
"scalar.ph:\l" +
+ "Successor(s): ir-bb\<for.body\>\l"
+ ]
+ N6 -> N7 [ label=""]
+ N7 [label =
+ "ir-bb\<for.body\>:\l" +
+ " IR %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]\l" +
+ " IR %arr.idx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv\l" +
+ " IR %l1 = load i32, ptr %arr.idx, align 4\l" +
+ " IR %res = add i32 %l1, 10\l" +
+ " IR store i32 %res, ptr %arr.idx, align 4\l" +
+ " IR %indvars.iv.next = add i64 %indvars.iv, 1\l" +
+ " IR %exitcond = icmp ne i64 %indvars.iv.next, %N\l" +
"No successors\l"
]
}
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
index 0f170efac207b7..ae0122cd67916d 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
@@ -238,6 +238,8 @@ TEST(VPInstructionTest, releaseOperandsAtDeletion) {
delete VPV2;
}
TEST(VPBasicBlockTest, getPlan) {
+ LLVMContext C;
+ auto ScalarHeader = std::make_unique<BasicBlock *>(BasicBlock::Create(C, ""));
{
VPBasicBlock *VPPH = new VPBasicBlock("ph");
VPBasicBlock *VPBB1 = new VPBasicBlock();
@@ -256,7 +258,8 @@ TEST(VPBasicBlockTest, getPlan) {
VPBlockUtils::connectBlocks(VPBB3, VPBB4);
auto TC = std::make_unique<VPValue>();
- VPlan Plan(VPPH, &*TC, VPBB1);
+ VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader);
+ VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
EXPECT_EQ(&Plan, VPBB1->getPlan());
EXPECT_EQ(&Plan, VPBB2->getPlan());
@@ -276,7 +279,8 @@ TEST(VPBasicBlockTest, getPlan) {
VPBlockUtils::connectBlocks(VPBB1, R1);
auto TC = std::make_unique<VPValue>();
- VPlan Plan(VPPH, &*TC, VPBB1);
+ VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader);
+ VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
EXPECT_EQ(&Plan, VPBB1->getPlan());
EXPECT_EQ(&Plan, R1->getPlan());
@@ -306,7 +310,8 @@ TEST(VPBasicBlockTest, getPlan) {
VPBlockUtils::connectBlocks(R2, VPBB2);
auto TC = std::make_unique<VPValue>();
- VPlan Plan(VPPH, &*TC, VPBB1);
+ VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader);
+ VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
EXPECT_EQ(&Plan, VPBB1->getPlan());
EXPECT_EQ(&Plan, R1->getPlan());
@@ -320,6 +325,8 @@ TEST(VPBasicBlockTest, getPlan) {
}
TEST(VPBasicBlockTest, TraversingIteratorTest) {
+ LLVMContext C;
+ auto ScalarHeader = std::make_unique<BasicBlock *>(BasicBlock::Create(C, ""));
{
// VPBasicBlocks only
// VPBB1
@@ -347,7 +354,8 @@ TEST(VPBasicBlockTest, TraversingIteratorTest) {
// Use Plan to properly clean up created blocks.
auto TC = std::make_unique<VPValue>();
- VPlan Plan(VPPH, &*TC, VPBB1);
+ VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader);
+ VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
}
{
@@ -447,7 +455,8 @@ TEST(VPBasicBlockTest, TraversingIteratorTest) {
// Use Plan to properly clean up created blocks.
auto TC = std::make_unique<VPValue>();
- VPlan Plan(VPPH, &*TC, VPBB0);
+ VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader);
+ VPlan Plan(VPPH, &*TC, VPBB0, ScalarHeaderVPBB);
}
{
@@ -530,7 +539,8 @@ TEST(VPBasicBlockTest, TraversingIteratorTest) {
// Use Plan to properly clean up created blocks.
auto TC = std::make_unique<VPValue>();
- VPlan Plan(VPPH, &*TC, VPBB1);
+ VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader);
+ VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
}
{
@@ -578,7 +588,8 @@ TEST(VPBasicBlockTest, TraversingIteratorTest) {
// Use Plan to properly clean up created blocks.
auto TC = std::make_unique<VPValue>();
- VPlan Plan(VPPH, &*TC, VPBB1);
+ VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader);
+ VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
}
{
@@ -670,7 +681,8 @@ TEST(VPBasicBlockTest, TraversingIteratorTest) {
// Use Plan to properly clean up created blocks.
auto TC = std::make_unique<VPValue>();
- VPlan Plan(VPPH, &*TC, VPBB1);
+ VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader);
+ VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
}
}
@@ -708,7 +720,10 @@ TEST(VPBasicBlockTest, print) {
EXPECT_EQ("EMIT br <badref>, <badref>", I3Dump);
}
- VPlan Plan(VPBB0, TC, VPBB1);
+ LLVMContext C;
+ auto ScalarHeader = std::make_unique<BasicBlock *>(BasicBlock::Create(C, ""));
+ VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader);
+ VPlan Plan(VPBB0, TC, VPBB1, ScalarHeaderVPBB);
std::string FullDump;
raw_string_ostream OS(FullDump);
Plan.printDOT(OS);
@@ -790,7 +805,10 @@ TEST(VPBasicBlockTest, printPlanWithVFsAndUFs) {
VPBB1->appendRecipe(I1);
VPBB1->setName("bb1");
- VPlan Plan(VPBB0, TC, VPBB1);
+ LLVMContext C;
+ auto ScalarHeader = std::make_unique<BasicBlock *>(BasicBlock::Create(C, ""));
+ VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader);
+ VPlan Plan(VPBB0, TC, VPBB1, ScalarHeaderVPBB);
Plan.setName("TestPlan");
Plan.addVF(ElementCount::getFixed(4));
@@ -1250,9 +1268,10 @@ TEST(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) {
TEST(VPRecipeTest, dumpRecipeInPlan) {
VPBasicBlock *VPBB0 = new VPBasicBlock("preheader");
VPBasicBlock *VPBB1 = new VPBasicBlock();
- VPlan Plan(VPBB0, VPBB1);
-
LLVMContext C;
+ auto ScalarHeader = std::make_unique<BasicBlock *>(BasicBlock::Create(C, ""));
+ VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader);
+ VPlan Plan(VPBB0, VPBB1, ScalarHeaderVPBB);
IntegerType *Int32 = IntegerType::get(C, 32);
auto *AI = BinaryOperator::CreateAdd(PoisonValue::get(Int32),
@@ -1319,9 +1338,10 @@ TEST(VPRecipeTest, dumpRecipeInPlan) {
TEST(VPRecipeTest, dumpRecipeUnnamedVPValuesInPlan) {
VPBasicBlock *VPBB0 = new VPBasicBlock("preheader");
VPBasicBlock *VPBB1 = new VPBasicBlock();
- VPlan Plan(VPBB0, VPBB1);
-
LLVMContext C;
+ auto ScalarHeader = std::make_unique<BasicBlock *>(BasicBlock::Create(C, ""));
+ VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader);
+ VPlan Plan(VPBB0, VPBB1, ScalarHeaderVPBB);
IntegerType *Int32 = IntegerType::get(C, 32);
auto *AI = BinaryOperator::CreateAdd(PoisonValue::get(Int32),
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp
index 9958d6ea124f81..e70cd271b8d732 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp
@@ -28,7 +28,11 @@ TEST(VPVerifierTest, VPInstructionUseBeforeDefSameBB) {
VPBasicBlock *VPBB2 = new VPBasicBlock();
VPRegionBlock *R1 = new VPRegionBlock(VPBB2, VPBB2, "R1");
VPBlockUtils::connectBlocks(VPBB1, R1);
- VPlan Plan(VPPH, &*TC, VPBB1);
+
+ LLVMContext C;
+ auto ScalarHeader = std::make_unique<BasicBlock *>(BasicBlock::Create(C, ""));
+ VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader);
+ VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
#if GTEST_HAS_STREAM_REDIRECTION
::testing::internal::CaptureStderr();
@@ -60,7 +64,10 @@ TEST(VPVerifierTest, VPInstructionUseBeforeDefDifferentBB) {
VPBlockUtils::connectBlocks(VPBB1, R1);
auto TC = std::make_unique<VPValue>();
- VPlan Plan(VPPH, &*TC, VPBB1);
+ LLVMContext C;
+ auto ScalarHeader = std::make_unique<BasicBlock *>(BasicBlock::Create(C, ""));
+ VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader);
+ VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
#if GTEST_HAS_STREAM_REDIRECTION
::testing::internal::CaptureStderr();
@@ -103,7 +110,9 @@ TEST(VPVerifierTest, VPBlendUseBeforeDefDifferentBB) {
VPBB3->setParent(R1);
auto TC = std::make_unique<VPValue>();
- VPlan Plan(VPPH, &*TC, VPBB1);
+ auto ScalarHeader = std::make_unique<BasicBlock *>(BasicBlock::Create(C, ""));
+ VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader);
+ VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
#if GTEST_HAS_STREAM_REDIRECTION
::testing::internal::CaptureStderr();
@@ -139,7 +148,10 @@ TEST(VPVerifierTest, DuplicateSuccessorsOutsideRegion) {
VPBlockUtils::connectBlocks(VPBB1, R1);
auto TC = std::make_unique<VPValue>();
- VPlan Plan(VPPH, &*TC, VPBB1);
+ LLVMContext C;
+ auto ScalarHeader = std::make_unique<BasicBlock *>(BasicBlock::Create(C, ""));
+ VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader);
+ VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
#if GTEST_HAS_STREAM_REDIRECTION
::testing::internal::CaptureStderr();
@@ -176,7 +188,10 @@ TEST(VPVerifierTest, DuplicateSuccessorsInsideRegion) {
VPBB3->setParent(R1);
auto TC = std::make_unique<VPValue>();
- VPlan Plan(VPPH, &*TC, VPBB1);
+ LLVMContext C;
+ auto ScalarHeader = std::make_unique<BasicBlock *>(BasicBlock::Create(C, ""));
+ VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader);
+ VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
#if GTEST_HAS_STREAM_REDIRECTION
::testing::internal::CaptureStderr();
@@ -205,7 +220,10 @@ TEST(VPVerifierTest, BlockOutsideRegionWithParent) {
VPBB1->setParent(R1);
auto TC = std::make_unique<VPValue>();
- VPlan Plan(VPPH, &*TC, VPBB1);
+ LLVMContext C;
+ auto ScalarHeader = std::make_unique<BasicBlock *>(BasicBlock::Create(C, ""));
+ VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(*ScalarHeader);
+ VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB);
#if GTEST_HAS_STREAM_REDIRECTION
::testing::internal::CaptureStderr();
>From a688a02c16aec74fedc4ed850cedaee35286dfe1 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sun, 27 Oct 2024 16:19:38 +0100
Subject: [PATCH 2/3] !fixup address latest comments, thanks!
---
.../Transforms/Vectorize/LoopVectorize.cpp | 15 +++--
llvm/lib/Transforms/Vectorize/VPlan.cpp | 62 ++++++++-----------
llvm/lib/Transforms/Vectorize/VPlan.h | 12 ++--
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 2 +
.../Transforms/Vectorize/VPlanTransforms.cpp | 4 +-
llvm/lib/Transforms/Vectorize/VPlanValue.h | 1 -
6 files changed, 45 insertions(+), 51 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 1ce7c9e868c7e0..77a43848c3fb20 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8881,9 +8881,9 @@ static void addLiveOutsForFirstOrderRecurrences(
// Start by finding out if middle block branches to scalar preheader, which is
// not a VPIRBasicBlock, unlike Exit block - the other possible successor of
// middle block.
- VPBasicBlock *ScalarPHVPBB = Plan.getScalarPreheader();
- VPBuilder ScalarPHBuilder(ScalarPHVPBB);
+ auto *ScalarPHVPBB = Plan.getScalarPreheader();
auto *MiddleVPBB = cast<VPBasicBlock>(VectorRegion->getSingleSuccessor());
+ VPBuilder ScalarPHBuilder(ScalarPHVPBB);
VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
VPValue *OneVPV = Plan.getOrAddLiveIn(
ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 1));
@@ -8962,7 +8962,8 @@ static void addLiveOutsForFirstOrderRecurrences(
// lo = lcssa.phi [s1, scalar.body],
// [vector.recur.extract.for.phi, middle.block]
//
- // Extract the resume value and create a new VPLiveOut for it.
+ // Extract the resume value and update the VPIRInstrunction wrapping the
+ // phi in the scalar header block.
auto *Resume = MiddleBuilder.createNaryOp(VPInstruction::ExtractFromEnd,
{FOR->getBackedgeValue(), OneVPV},
{}, "vector.recur.extract");
@@ -8970,14 +8971,16 @@ static void addLiveOutsForFirstOrderRecurrences(
VPInstruction::ResumePhi, {Resume, FOR->getStartValue()}, {},
"scalar.recur.init");
auto *FORPhi = cast<PHINode>(FOR->getUnderlyingInstr());
- for (VPRecipeBase &R :
- *cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor())) {
- auto *IRI = cast<VPIRInstruction>(&R);
+ VPIRInstruction *IRI = nullptr;
+ for (VPRecipeBase &R : *Plan.getScalarHeader()) {
+ IRI = cast<VPIRInstruction>(&R);
if (&IRI->getInstruction() == FORPhi) {
IRI->addOperand(ResumePhiRecipe);
break;
}
+ IRI = nullptr;
}
+ assert(IRI && "IRI needs to be set, implying it had its operand updated");
// Now update VPIRInstructions modeling LCSSA phis in the exit block.
// Extract the penultimate value of the recurrence and use it as operand for
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 9bb72c8f3a0b5d..66c3ef72f58c69 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -455,18 +455,15 @@ void VPIRBasicBlock::execute(VPTransformState *State) {
"VPIRBasicBlock can have at most two successors at the moment!");
State->Builder.SetInsertPoint(IRBB->getTerminator());
executeRecipes(State, IRBB);
- if (getSingleSuccessor()) {
- auto *SuccVPIRBB = dyn_cast<VPIRBasicBlock>(getSingleSuccessor());
- if (SuccVPIRBB && SuccVPIRBB->getIRBasicBlock() ==
- getIRBasicBlock()->getSingleSuccessor()) {
- cast<BranchInst>(getIRBasicBlock()->getTerminator())
- ->setOperand(0, nullptr);
- } else {
- assert(isa<UnreachableInst>(getIRBasicBlock()->getTerminator()));
- auto *Br = State->Builder.CreateBr(getIRBasicBlock());
- Br->setOperand(0, nullptr);
- getIRBasicBlock()->getTerminator()->eraseFromParent();
- }
+ // Prepare branch instruction in IRBB. If there are no successors, there's
+ // nothing to do. If IRBB's terminator is already a BranchInst, there's
+ // nothing to do here. If it is unreachable, we don't cannot re-use an
+ // existing branch and no branch has been created during recipe execution.
+ // Create it now.
+ if (getSingleSuccessor() && isa<UnreachableInst>(IRBB->getTerminator())) {
+ auto *Br = State->Builder.CreateBr(IRBB);
+ Br->setOperand(0, nullptr);
+ IRBB->getTerminator()->eraseFromParent();
}
for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
@@ -481,7 +478,7 @@ void VPIRBasicBlock::execute(VPTransformState *State) {
// backedges. A backward successor is set when the branch is created.
const auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors();
unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
- assert(!TermBr->getSuccessor(idx) &&
+ assert((!TermBr->getSuccessor(idx) || TermBr->getSuccessor(idx) == IRBB) &&
"Trying to reset an existing successor block.");
TermBr->setSuccessor(idx, IRBB);
State->CFG.DTU.applyUpdates({{DominatorTree::Insert, PredBB, IRBB}});
@@ -670,16 +667,14 @@ void VPBasicBlock::print(raw_ostream &O, const Twine &Indent,
}
#endif
-static std::tuple<VPBlockBase *, VPBlockBase *, VPIRBasicBlock *>
-cloneFrom(VPBlockBase *Entry, VPIRBasicBlock *ScalarHeader = nullptr);
+static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry);
// Clone the CFG for all nodes reachable from \p Entry, this includes cloning
// the blocks and their recipes. Operands of cloned recipes will NOT be updated.
// Remapping of operands must be done separately. Returns a pair with the new
// entry and exiting blocks of the cloned region. If \p Entry isn't part of a
// region, return nullptr for the exiting block.
-static std::tuple<VPBlockBase *, VPBlockBase *, VPIRBasicBlock *>
-cloneFrom(VPBlockBase *Entry, VPIRBasicBlock *ScalarHeader) {
+static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry) {
DenseMap<VPBlockBase *, VPBlockBase *> Old2NewVPBlocks;
VPBlockBase *Exiting = nullptr;
bool InRegion = Entry->getParent();
@@ -725,14 +720,12 @@ cloneFrom(VPBlockBase *Entry, VPIRBasicBlock *ScalarHeader) {
}
#endif
- return std::tuple(
- Old2NewVPBlocks[Entry], Exiting ? Old2NewVPBlocks[Exiting] : nullptr,
- ScalarHeader ? cast<VPIRBasicBlock>(Old2NewVPBlocks[ScalarHeader])
- : nullptr);
+ return std::make_pair(Old2NewVPBlocks[Entry],
+ Exiting ? Old2NewVPBlocks[Exiting] : nullptr);
}
VPRegionBlock *VPRegionBlock::clone() {
- const auto &[NewEntry, NewExiting, _] = cloneFrom(getEntry());
+ const auto &[NewEntry, NewExiting] = cloneFrom(getEntry());
auto *NewRegion =
new VPRegionBlock(NewEntry, NewExiting, getName(), isReplicator());
for (VPBlockBase *Block : vp_depth_first_shallow(NewEntry))
@@ -1042,19 +1035,8 @@ void VPlan::execute(VPTransformState *State) {
BasicBlock *MiddleBB = State->CFG.ExitBB;
VPBasicBlock *MiddleVPBB =
cast<VPBasicBlock>(getVectorLoopRegion()->getSingleSuccessor());
- // Find the VPBB for the scalar preheader, relying on the current structure
- // when creating the middle block and its successrs: if there's a single
- // predecessor, it must be the scalar preheader. Otherwise, the second
- // successor is the scalar preheader.
BasicBlock *ScalarPh = MiddleBB->getSingleSuccessor();
- auto &MiddleSuccs = MiddleVPBB->getSuccessors();
- assert((MiddleSuccs.size() == 1 || MiddleSuccs.size() == 2) &&
- "middle block has unexpected successors");
- VPBasicBlock *ScalarPhVPBB = cast<VPBasicBlock>(
- MiddleSuccs.size() == 1 ? MiddleSuccs[0] : MiddleSuccs[1]);
- assert(!isa<VPIRBasicBlock>(ScalarPhVPBB) &&
- "scalar preheader cannot be wrapped already");
- replaceVPBBWithIRVPBB(ScalarPhVPBB, ScalarPh);
+ replaceVPBBWithIRVPBB(getScalarPreheader(), ScalarPh);
replaceVPBBWithIRVPBB(MiddleVPBB, MiddleBB);
// Disconnect the middle block from its single successor (the scalar loop
@@ -1261,9 +1243,15 @@ static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry,
VPlan *VPlan::duplicate() {
// Clone blocks.
VPBasicBlock *NewPreheader = Preheader->clone();
- const auto &[NewEntry, __, NewScalarHeader] =
- cloneFrom(Entry, getScalarHeader());
-
+ const auto &[NewEntry, _] = cloneFrom(Entry);
+
+ BasicBlock *ScalarHeaderIRBB = getScalarHeader()->getIRBasicBlock();
+ VPIRBasicBlock *NewScalarHeader =
+ *find_if(VPBlockUtils::blocksOnly<VPIRBasicBlock>(
+ vp_depth_first_shallow(NewEntry)),
+ [ScalarHeaderIRBB](VPIRBasicBlock *VPIRBB) {
+ return ScalarHeaderIRBB == VPIRBB->getIRBasicBlock();
+ });
// Create VPlan, clone live-ins and remap operands in the cloned blocks.
auto *NewPlan =
new VPlan(NewPreheader, cast<VPBasicBlock>(NewEntry), NewScalarHeader);
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 04c0fd41e918a6..5b0a244bff9628 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -3651,9 +3651,10 @@ class VPlan {
DenseMap<const SCEV *, VPValue *> SCEVToExpansion;
public:
- /// Construct a VPlan with original preheader \p Preheader, trip count \p TC
- /// and \p Entry to the plan. At the moment, \p Preheader and \p Entry need to
- /// be disconnected, as the bypass blocks between them are not yet modeled in
+ /// Construct a VPlan with original preheader \p Preheader, trip count \p TC,
+ /// \p Entry to the plan and with \p ScalarHeader wrapping the original header
+ /// of the scalar loop. At the moment, \p Preheader and \p Entry need to be
+ /// disconnected, as the bypass blocks between them are not yet modeled in
/// VPlan.
VPlan(VPBasicBlock *Preheader, VPValue *TC, VPBasicBlock *Entry,
VPIRBasicBlock *ScalarHeader)
@@ -3661,8 +3662,9 @@ class VPlan {
TripCount = TC;
}
- /// Construct a VPlan with original preheader \p Preheader and \p Entry to
- /// the plan. At the moment, \p Preheader and \p Entry need to be
+ /// Construct a VPlan with original preheader \p Preheader, \p Entry to
+ /// the plan and with \p ScalarHeader wrapping the original header of the
+ /// scalar loop. At the moment, \p Preheader and \p Entry need to be
/// disconnected, as the bypass blocks between them are not yet modeled in
/// VPlan.
VPlan(VPBasicBlock *Preheader, VPBasicBlock *Entry,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 0de9d1815e5d21..bcbf3865f72503 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -844,6 +844,8 @@ void VPIRInstruction::execute(VPTransformState &State) {
State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt());
Value *V = State.get(ExitValue, VPLane(Lane));
auto *Phi = cast<PHINode>(&I);
+ // If there is no existing block for PredBB in the phi, add a new incoming
+ // value. Otherwise update the existing incoming value for PredBB.
if (Phi->getBasicBlockIndex(PredBB) == -1)
Phi->addIncoming(V, PredBB);
else
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 58ce8b58f96e1f..9684f2b7be61c5 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -376,8 +376,8 @@ static bool mergeBlocksIntoPredecessors(VPlan &Plan) {
SmallVector<VPBasicBlock *> WorkList;
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_deep(Plan.getEntry()))) {
- // Don't fold the exit block of the Plan into its single predecessor for
- // now.
+ // Don't fold the blocks in the skeleton of the Plan into its single
+ // predecessor for now.
// TODO: Remove restriction once more of the skeleton is modeled in VPlan.
if (!VPBB->getParent())
continue;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 89b3ed72b8eb65..040fd88ebc55bd 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -202,7 +202,6 @@ class VPUser {
/// Subclass identifier (for isa/dyn_cast).
enum class VPUserID {
Recipe,
- LiveOut,
};
private:
>From 922d066da1f71e35e359b1d68bde6f972fbcdaa3 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Tue, 29 Oct 2024 19:26:13 +0000
Subject: [PATCH 3/3] !fixup address latest comments, thanks!
---
.../Transforms/Vectorize/LoopVectorize.cpp | 91 ++++++++++---------
llvm/lib/Transforms/Vectorize/VPlan.cpp | 9 +-
llvm/lib/Transforms/Vectorize/VPlan.h | 18 ++--
.../Transforms/Vectorize/VPlanTransforms.cpp | 36 +++-----
llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 7 --
llvm/lib/Transforms/Vectorize/VPlanValue.h | 20 +---
6 files changed, 78 insertions(+), 103 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 22f751b7dedc9a..01f39af2e5242b 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2711,7 +2711,8 @@ InnerLoopVectorizer::createVectorizedLoopSkeleton(
| |
(opt) v <-- edge from middle to exit iff epilogue is not required.
| [ ] \
- | [ ]_| <-- old scalar loop to handle remainder (scalar epilogue).
+ | [ ]_| <-- old scalar loop to handle remainder (scalar epilogue, header
+ | | wrapped in VPIRBasicBlock).
\ |
\ v
>[ ] <-- exit block(s). (wrapped in VPIRBasicBlock)
@@ -8786,6 +8787,31 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
{CanonicalIVIncrement, &Plan.getVectorTripCount()}, DL);
}
+/// Create resume phis in the scalar preheader for first-order recurrences and
+/// reductions and update the VPIRInstructions wrapping the original phis in the
+/// scalar header.
+static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
+ for (VPRecipeBase &R : *Plan.getScalarHeader()) {
+ auto *IRI = cast<VPIRInstruction>(&R);
+ if (!isa<PHINode>(IRI->getInstruction()))
+ break;
+
+ VPBuilder ScalarPHBuilder(Plan.getScalarPreheader());
+ auto *VectorR =
+ dyn_cast<VPHeaderPHIRecipe>(Builder.getRecipe(&IRI->getInstruction()));
+ if (isa<VPFirstOrderRecurrencePHIRecipe, VPReductionPHIRecipe>(VectorR)) {
+ StringRef Name = isa<VPFirstOrderRecurrencePHIRecipe>(VectorR)
+ ? "scalar.recur.init"
+ : "bc.merge.rdx";
+ auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp(
+ VPInstruction::ResumePhi,
+ {VectorR->getBackedgeValue(), VectorR->getStartValue()}, {}, Name);
+
+ IRI->addOperand(ResumePhiRecipe);
+ }
+ }
+}
+
// Collect VPIRInstructions for phis in the original exit block that are modeled
// in VPlan and add the exiting VPValue as operand. Some exiting values are not
// modeled explicitly yet and won't be included. Those are un-truncated
@@ -8872,10 +8898,6 @@ addUsersInExitBlock(VPlan &Plan,
static void addLiveOutsForFirstOrderRecurrences(
VPlan &Plan, SetVector<VPIRInstruction *> &ExitUsersToFix) {
VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion();
-
- // Start by finding out if middle block branches to scalar preheader, which is
- // not a VPIRBasicBlock, unlike Exit block - the other possible successor of
- // middle block.
auto *ScalarPHVPBB = Plan.getScalarPreheader();
auto *MiddleVPBB = cast<VPBasicBlock>(VectorRegion->getSingleSuccessor());
VPBuilder ScalarPHBuilder(ScalarPHVPBB);
@@ -8957,38 +8979,30 @@ static void addLiveOutsForFirstOrderRecurrences(
// lo = lcssa.phi [s1, scalar.body],
// [vector.recur.extract.for.phi, middle.block]
//
- // Extract the resume value and update the VPIRInstrunction wrapping the
- // phi in the scalar header block.
- auto *Resume = MiddleBuilder.createNaryOp(VPInstruction::ExtractFromEnd,
- {FOR->getBackedgeValue(), OneVPV},
- {}, "vector.recur.extract");
- auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp(
- VPInstruction::ResumePhi, {Resume, FOR->getStartValue()}, {},
- "scalar.recur.init");
- auto *FORPhi = cast<PHINode>(FOR->getUnderlyingInstr());
- VPIRInstruction *IRI = nullptr;
- for (VPRecipeBase &R : *Plan.getScalarHeader()) {
- IRI = cast<VPIRInstruction>(&R);
- if (&IRI->getInstruction() == FORPhi) {
- IRI->addOperand(ResumePhiRecipe);
- break;
- }
- IRI = nullptr;
- }
- assert(IRI && "IRI needs to be set, implying it had its operand updated");
-
+ // Extract the resume value.
// Now update VPIRInstructions modeling LCSSA phis in the exit block.
// Extract the penultimate value of the recurrence and use it as operand for
// the VPIRInstruction modeling the phi.
+ auto *FORPhi = cast<PHINode>(FOR->getUnderlyingInstr());
for (VPIRInstruction *ExitIRI : ExitUsersToFix) {
- if (ExitIRI->getOperand(0) != FOR)
+ if (&ExitIRI->getInstruction() != FORPhi)
continue;
VPValue *Ext = MiddleBuilder.createNaryOp(
VPInstruction::ExtractFromEnd, {FOR->getBackedgeValue(), TwoVPV}, {},
"vector.recur.extract.for.phi");
- ExitIRI->setOperand(0, Ext);
+ ExitIRI->addOperand(Ext);
ExitUsersToFix.remove(ExitIRI);
}
+
+ // Extract the resume value and update the ResumePhi users in the scalar
+ // preheader.
+ auto *Resume = MiddleBuilder.createNaryOp(VPInstruction::ExtractFromEnd,
+ {FOR->getBackedgeValue(), OneVPV},
+ {}, "vector.recur.extract");
+ FOR->getBackedgeValue()->replaceUsesWithIf(Resume, [&Plan](VPUser &U,
+ unsigned) {
+ return cast<VPRecipeBase>(&U)->getParent() == Plan.getScalarPreheader();
+ });
}
}
@@ -9160,11 +9174,11 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
"VPBasicBlock");
RecipeBuilder.fixHeaderPhis();
+ addScalarResumePhis(RecipeBuilder, *Plan);
SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlock(
OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars());
addLiveOutsForFirstOrderRecurrences(*Plan, ExitUsersToFix);
addUsersInExitBlock(*Plan, ExitUsersToFix);
-
// ---------------------------------------------------------------------------
// Transform initial VPlan: Apply previously taken decisions, in order, to
// bring the VPlan to its final state.
@@ -9186,9 +9200,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
// Replace VPValues for known constant strides guaranteed by predicate scalar
// evolution.
auto CanUseVersionedStride = [&Plan](VPUser &U, unsigned) {
- auto *R = dyn_cast<VPRecipeBase>(&U);
- if (!R)
- return false;
+ auto *R = cast<VPRecipeBase>(&U);
return R->getParent()->getParent() ||
R->getParent() ==
Plan->getVectorLoopRegion()->getSinglePredecessor();
@@ -9319,8 +9331,10 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
for (VPUser *U : Cur->users()) {
auto *UserRecipe = cast<VPSingleDefRecipe>(U);
if (!UserRecipe->getParent()->getEnclosingLoopRegion()) {
- assert(UserRecipe->getParent() == MiddleVPBB &&
- "U must be either in the loop region or the middle block.");
+ assert((UserRecipe->getParent() == MiddleVPBB ||
+ UserRecipe->getParent() == Plan->getScalarPreheader()) &&
+ "U must be either in the loop region, the middle block or the "
+ "scalar preheader.");
continue;
}
Worklist.insert(UserRecipe);
@@ -9507,17 +9521,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
});
FinalReductionResult->insertBefore(*MiddleVPBB, IP);
- // Order is strict: if there are multiple successors, the first is the exit
- // block, second is the scalar preheader.
- VPBasicBlock *ScalarPHVPBB =
- cast<VPBasicBlock>(MiddleVPBB->getSuccessors().back());
- VPBuilder ScalarPHBuilder(ScalarPHVPBB);
- auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp(
- VPInstruction::ResumePhi, {FinalReductionResult, PhiR->getStartValue()},
- {}, "bc.merge.rdx");
- auto *RedPhi = cast<PHINode>(PhiR->getUnderlyingInstr());
- Plan->addLiveOut(RedPhi, ResumePhiRecipe);
-
// Adjust AnyOf reductions; replace the reduction phi for the selected value
// with a boolean reduction phi node to check if the condition is true in
// any iteration. The final value is selected by the final
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 66c3ef72f58c69..0f8b22b5d8c4e3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -455,11 +455,8 @@ void VPIRBasicBlock::execute(VPTransformState *State) {
"VPIRBasicBlock can have at most two successors at the moment!");
State->Builder.SetInsertPoint(IRBB->getTerminator());
executeRecipes(State, IRBB);
- // Prepare branch instruction in IRBB. If there are no successors, there's
- // nothing to do. If IRBB's terminator is already a BranchInst, there's
- // nothing to do here. If it is unreachable, we don't cannot re-use an
- // existing branch and no branch has been created during recipe execution.
- // Create it now.
+ // Create a branch instruction to terminate IRBB if one was not created yet
+ // and is needed.
if (getSingleSuccessor() && isa<UnreachableInst>(IRBB->getTerminator())) {
auto *Br = State->Builder.CreateBr(IRBB);
Br->setOperand(0, nullptr);
@@ -1243,7 +1240,7 @@ static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry,
VPlan *VPlan::duplicate() {
// Clone blocks.
VPBasicBlock *NewPreheader = Preheader->clone();
- const auto &[NewEntry, _] = cloneFrom(Entry);
+ const auto &[NewEntry, __] = cloneFrom(Entry);
BasicBlock *ScalarHeaderIRBB = getScalarHeader()->getIRBasicBlock();
VPIRBasicBlock *NewScalarHeader =
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 5b0a244bff9628..bcf63824596d96 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -721,12 +721,12 @@ class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
public:
VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands,
DebugLoc DL = {})
- : VPDef(SC), VPUser(Operands, VPUser::VPUserID::Recipe), DL(DL) {}
+ : VPDef(SC), VPUser(Operands), DL(DL) {}
template <typename IterT>
VPRecipeBase(const unsigned char SC, iterator_range<IterT> Operands,
DebugLoc DL = {})
- : VPDef(SC), VPUser(Operands, VPUser::VPUserID::Recipe), DL(DL) {}
+ : VPDef(SC), VPUser(Operands), DL(DL) {}
virtual ~VPRecipeBase() = default;
/// Clone the current recipe.
@@ -780,9 +780,7 @@ class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
return true;
}
- static inline bool classof(const VPUser *U) {
- return U->getVPUserID() == VPUser::VPUserID::Recipe;
- }
+ static inline bool classof(const VPUser *U) { return true; }
/// Returns true if the recipe may have side-effects.
bool mayHaveSideEffects() const;
@@ -1423,6 +1421,12 @@ class VPIRInstruction : public VPRecipeBase {
"Op must be an operand of the recipe");
return true;
}
+
+ bool onlyFirstPartUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ return true;
+ }
};
/// VPWidenRecipe is a recipe for producing a widened instruction using the
@@ -3709,10 +3713,10 @@ class VPlan {
const VPBasicBlock *getEntry() const { return Entry; }
/// Return the VPIRBasicBlock wrapping the header of the scalar loop.
- VPIRBasicBlock *getScalarHeader() { return ScalarHeader; }
+ VPIRBasicBlock *getScalarHeader() const { return ScalarHeader; }
/// Return the VPBasicBlock for the preheader of the scalar loop.
- VPBasicBlock *getScalarPreheader() {
+ VPBasicBlock *getScalarPreheader() const {
return cast<VPBasicBlock>(ScalarHeader->getSinglePredecessor());
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 9684f2b7be61c5..f67dd33db4385f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -151,9 +151,7 @@ static bool sinkScalarOperands(VPlan &Plan) {
// SinkCandidate.
auto CanSinkWithUser = [SinkTo, &NeedsDuplicating,
SinkCandidate](VPUser *U) {
- auto *UI = dyn_cast<VPRecipeBase>(U);
- if (!UI)
- return false;
+ auto *UI = cast<VPRecipeBase>(U);
if (UI->getParent() == SinkTo)
return true;
NeedsDuplicating = UI->onlyFirstLaneUsed(SinkCandidate);
@@ -280,8 +278,7 @@ static bool mergeReplicateRegionsIntoSuccessors(VPlan &Plan) {
cast<VPPredInstPHIRecipe>(&Phi1ToMove)->getOperand(0);
VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue();
Phi1ToMoveV->replaceUsesWithIf(PredInst1, [Then2](VPUser &U, unsigned) {
- auto *UI = dyn_cast<VPRecipeBase>(&U);
- return UI && UI->getParent() == Then2;
+ return cast<VPRecipeBase>(&U)->getParent() == Then2;
});
// Remove phi recipes that are unused after merging the regions.
@@ -376,8 +373,8 @@ static bool mergeBlocksIntoPredecessors(VPlan &Plan) {
SmallVector<VPBasicBlock *> WorkList;
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_deep(Plan.getEntry()))) {
- // Don't fold the blocks in the skeleton of the Plan into its single
- // predecessor for now.
+ // Don't fold the blocks in the skeleton of the Plan into their single
+ // predecessors for now.
// TODO: Remove restriction once more of the skeleton is modeled in VPlan.
if (!VPBB->getParent())
continue;
@@ -750,9 +747,8 @@ sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR,
"only recipes with a single defined value expected");
for (VPUser *User : Current->getVPSingleValue()->users()) {
- if (auto *R = dyn_cast<VPRecipeBase>(User))
- if (!TryToPushSinkCandidate(R))
- return false;
+ if (!TryToPushSinkCandidate(cast<VPRecipeBase>(User)))
+ return false;
}
}
@@ -786,16 +782,14 @@ static bool hoistPreviousBeforeFORUsers(VPFirstOrderRecurrencePHIRecipe *FOR,
// Find the closest hoist point by looking at all users of FOR and selecting
// the recipe dominating all other users.
for (VPUser *U : FOR->users()) {
- auto *R = dyn_cast<VPRecipeBase>(U);
- if (!R)
- continue;
+ auto *R = cast<VPRecipeBase>(U);
if (!HoistPoint || VPDT.properlyDominates(R, HoistPoint))
HoistPoint = R;
}
assert(all_of(FOR->users(),
[&VPDT, HoistPoint](VPUser *U) {
- auto *R = dyn_cast<VPRecipeBase>(U);
- return !R || HoistPoint == R ||
+ auto *R = cast<VPRecipeBase>(U);
+ return HoistPoint == R ||
VPDT.properlyDominates(HoistPoint, R);
}) &&
"HoistPoint must dominate all users of FOR");
@@ -922,8 +916,8 @@ bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
static SmallVector<VPUser *> collectUsersRecursively(VPValue *V) {
SetVector<VPUser *> Users(V->user_begin(), V->user_end());
for (unsigned I = 0; I != Users.size(); ++I) {
- VPRecipeBase *Cur = dyn_cast<VPRecipeBase>(Users[I]);
- if (!Cur || isa<VPHeaderPHIRecipe>(Cur))
+ VPRecipeBase *Cur = cast<VPRecipeBase>(Users[I]);
+ if (isa<VPHeaderPHIRecipe>(Cur))
continue;
for (VPValue *V : Cur->definedValues())
Users.insert(V->user_begin(), V->user_end());
@@ -1044,9 +1038,7 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
R.getParent()->getPlan()->getCanonicalIV()->getScalarType());
assert(TypeInfo.inferScalarType(A) == TypeInfo2.inferScalarType(A));
for (VPUser *U : A->users()) {
- auto *R = dyn_cast<VPRecipeBase>(U);
- if (!R)
- continue;
+ auto *R = cast<VPRecipeBase>(U);
for (VPValue *VPV : R->definedValues())
assert(TypeInfo.inferScalarType(VPV) == TypeInfo2.inferScalarType(VPV));
}
@@ -1455,9 +1447,7 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
for (VPValue *HeaderMask : collectAllHeaderMasks(Plan)) {
for (VPUser *U : collectUsersRecursively(HeaderMask)) {
- auto *CurRecipe = dyn_cast<VPRecipeBase>(U);
- if (!CurRecipe)
- continue;
+ auto *CurRecipe = cast<VPRecipeBase>(U);
auto GetNewMask = [&](VPValue *OrigMask) -> VPValue * {
assert(OrigMask && "Unmasked recipe when folding tail");
return HeaderMask == OrigMask ? nullptr : OrigMask;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index 144a296fabad81..15dcf4dc0d91ee 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -264,13 +264,6 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
return;
if (auto *VPI = dyn_cast<VPInstruction>(&R)) {
- VPValue *Op0, *Op1;
- if (match(VPI, m_VPInstruction<VPInstruction::ResumePhi>(m_VPValue(Op0),
- m_VPValue(Op1)))) {
- addUniformForAllParts(VPI);
- return;
- }
-
if (vputils::onlyFirstPartUsed(VPI)) {
addUniformForAllParts(VPI);
return;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 040fd88ebc55bd..00a0304adb58d7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -198,33 +198,23 @@ raw_ostream &operator<<(raw_ostream &OS, const VPValue &V);
/// This class augments VPValue with operands which provide the inverse def-use
/// edges from VPValue's users to their defs.
class VPUser {
-public:
- /// Subclass identifier (for isa/dyn_cast).
- enum class VPUserID {
- Recipe,
- };
-
-private:
SmallVector<VPValue *, 2> Operands;
- VPUserID ID;
-
protected:
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the operands to \p O.
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const;
#endif
- VPUser(ArrayRef<VPValue *> Operands, VPUserID ID) : ID(ID) {
+ VPUser(ArrayRef<VPValue *> Operands) {
for (VPValue *Operand : Operands)
addOperand(Operand);
}
- VPUser(std::initializer_list<VPValue *> Operands, VPUserID ID)
- : VPUser(ArrayRef<VPValue *>(Operands), ID) {}
+ VPUser(std::initializer_list<VPValue *> Operands)
+ : VPUser(ArrayRef<VPValue *>(Operands)) {}
- template <typename IterT>
- VPUser(iterator_range<IterT> Operands, VPUserID ID) : ID(ID) {
+ template <typename IterT> VPUser(iterator_range<IterT> Operands) {
for (VPValue *Operand : Operands)
addOperand(Operand);
}
@@ -238,8 +228,6 @@ class VPUser {
Op->removeUser(*this);
}
- VPUserID getVPUserID() const { return ID; }
-
void addOperand(VPValue *Operand) {
Operands.push_back(Operand);
Operand->addUser(*this);
More information about the llvm-commits
mailing list