[llvm] [VPlan] Update scalar induction resume values in VPlan. (PR #110577)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 30 14:35:18 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-systemz
@llvm/pr-subscribers-llvm-transforms
Author: Florian Hahn (fhahn)
<details>
<summary>Changes</summary>
Updated ILV.crateInductionResumeValues to directly update the
VPIRInstructiosn wrapping the original phis with the created resume
values.
This is the first step towards modeling them completely in VPlan.
Subsequent patches will move creation of the resume values completely
into VPlan.
Builds on top of https://github.com/llvm/llvm-project/pull/109975, which
is included in this PR.
---
Patch is 351.92 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/110577.diff
56 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+93-55)
- (modified) llvm/lib/Transforms/Vectorize/VPlan.cpp (+16-23)
- (modified) llvm/lib/Transforms/Vectorize/VPlan.h (-53)
- (modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+6-31)
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+1-1)
- (modified) llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp (+7-6)
- (modified) llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp (-14)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence-fold-tail.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/fixed-order-recurrence.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll (+15-15)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll (+9-9)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/interleaving-load-store.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/ARM/tail-folding-scalar-epilogue-fallback.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll (+8-8)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll (+4-6)
- (modified) llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-hoist-load-across-store.ll (+8-8)
- (modified) llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/X86/pr55096-scalarize-add.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/X86/pr72969.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/X86/small-size.ll (+99-99)
- (modified) llvm/test/Transforms/LoopVectorize/branch-weights.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll (+6-6)
- (modified) llvm/test/Transforms/LoopVectorize/create-induction-resume.ll (+5-5)
- (modified) llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/epilog-vectorization-trunc-induction-steps.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll (+55-55)
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-interleave-only.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll (+13-13)
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll (+246-246)
- (modified) llvm/test/Transforms/LoopVectorize/float-induction.ll (+24-24)
- (modified) llvm/test/Transforms/LoopVectorize/if-pred-stores.ll (+23-23)
- (modified) llvm/test/Transforms/LoopVectorize/induction.ll (+171-171)
- (modified) llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll (+10-10)
- (modified) llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/pr45259.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/pr59319-loop-access-info-invalidation.ll (+16-16)
- (modified) llvm/test/Transforms/LoopVectorize/pr66616.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/scev-exit-phi-invalidation.ll (+11-11)
- (modified) llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll (+18)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 034765bee40e7b..674c2176f2a504 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -467,11 +467,12 @@ class InnerLoopVectorizer {
ElementCount MinProfitableTripCount,
unsigned UnrollFactor, LoopVectorizationLegality *LVL,
LoopVectorizationCostModel *CM, BlockFrequencyInfo *BFI,
- ProfileSummaryInfo *PSI, GeneratedRTChecks &RTChecks)
+ ProfileSummaryInfo *PSI, GeneratedRTChecks &RTChecks,
+ VPlan &Plan)
: OrigLoop(OrigLoop), PSE(PSE), LI(LI), DT(DT), TLI(TLI), TTI(TTI),
AC(AC), ORE(ORE), VF(VecWidth), UF(UnrollFactor),
Builder(PSE.getSE()->getContext()), Legal(LVL), Cost(CM), BFI(BFI),
- PSI(PSI), RTChecks(RTChecks) {
+ PSI(PSI), RTChecks(RTChecks), Plan(Plan) {
// Query this against the original loop and save it here because the profile
// of the original loop header may change as the transformation happens.
OptForSizeBasedOnProfile = llvm::shouldOptimizeForSize(
@@ -522,7 +523,7 @@ class InnerLoopVectorizer {
/// and the resume values can come from an additional bypass block, the \p
/// AdditionalBypass pair provides information about the bypass block and the
/// end value on the edge from bypass to this loop.
- PHINode *createInductionResumeValue(
+ void createInductionResumeValue(
PHINode *OrigPhi, const InductionDescriptor &ID, Value *Step,
ArrayRef<BasicBlock *> BypassBlocks,
std::pair<BasicBlock *, Value *> AdditionalBypass = {nullptr, nullptr});
@@ -535,6 +536,11 @@ class InnerLoopVectorizer {
/// count of the original loop for both main loop and epilogue vectorization.
void setTripCount(Value *TC) { TripCount = TC; }
+ std::pair<BasicBlock *, Value *>
+ getInductionBypassValue(PHINode *OrigPhi) const {
+ return InductionBypassValues.find(OrigPhi)->second;
+ }
+
protected:
friend class LoopVectorizationPlanner;
@@ -680,6 +686,11 @@ class InnerLoopVectorizer {
/// Structure to hold information about generated runtime checks, responsible
/// for cleaning the checks, if vectorization turns out unprofitable.
GeneratedRTChecks &RTChecks;
+
+ /// Mapping of induction phis to their bypass values and bypass blocks.
+ DenseMap<PHINode *, std::pair<BasicBlock *, Value *>> InductionBypassValues;
+
+ VPlan &Plan;
};
/// Encapsulate information regarding vectorization of a loop and its epilogue.
@@ -721,10 +732,10 @@ class InnerLoopAndEpilogueVectorizer : public InnerLoopVectorizer {
OptimizationRemarkEmitter *ORE, EpilogueLoopVectorizationInfo &EPI,
LoopVectorizationLegality *LVL, llvm::LoopVectorizationCostModel *CM,
BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
- GeneratedRTChecks &Checks)
+ GeneratedRTChecks &Checks, VPlan &Plan)
: InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE,
EPI.MainLoopVF, EPI.MainLoopVF, EPI.MainLoopUF, LVL,
- CM, BFI, PSI, Checks),
+ CM, BFI, PSI, Checks, Plan),
EPI(EPI) {}
// Override this function to handle the more complex control flow around the
@@ -761,9 +772,9 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
OptimizationRemarkEmitter *ORE, EpilogueLoopVectorizationInfo &EPI,
LoopVectorizationLegality *LVL, llvm::LoopVectorizationCostModel *CM,
BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
- GeneratedRTChecks &Check)
+ GeneratedRTChecks &Check, VPlan &Plan)
: InnerLoopAndEpilogueVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE,
- EPI, LVL, CM, BFI, PSI, Check) {}
+ EPI, LVL, CM, BFI, PSI, Check, Plan) {}
/// Implements the interface for creating a vectorized skeleton using the
/// *main loop* strategy (ie the first pass of vplan execution).
std::pair<BasicBlock *, Value *>
@@ -790,9 +801,9 @@ class EpilogueVectorizerEpilogueLoop : public InnerLoopAndEpilogueVectorizer {
OptimizationRemarkEmitter *ORE, EpilogueLoopVectorizationInfo &EPI,
LoopVectorizationLegality *LVL, llvm::LoopVectorizationCostModel *CM,
BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
- GeneratedRTChecks &Checks)
+ GeneratedRTChecks &Checks, VPlan &Plan)
: InnerLoopAndEpilogueVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE,
- EPI, LVL, CM, BFI, PSI, Checks) {
+ EPI, LVL, CM, BFI, PSI, Checks, Plan) {
TripCount = EPI.TripCount;
}
/// Implements the interface for creating a vectorized skeleton using the
@@ -2555,7 +2566,18 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
nullptr, Twine(Prefix) + "scalar.ph");
}
-PHINode *InnerLoopVectorizer::createInductionResumeValue(
+static void addOperandToPhiInVPIRBasicBlock(VPIRBasicBlock *VPBB, PHINode *P,
+ VPValue *Op) {
+ for (VPRecipeBase &R : *VPBB) {
+ auto *IRI = cast<VPIRInstruction>(&R);
+ if (&IRI->getInstruction() == P) {
+ IRI->addOperand(Op);
+ break;
+ }
+ }
+}
+
+void InnerLoopVectorizer::createInductionResumeValue(
PHINode *OrigPhi, const InductionDescriptor &II, Value *Step,
ArrayRef<BasicBlock *> BypassBlocks,
std::pair<BasicBlock *, Value *> AdditionalBypass) {
@@ -2590,27 +2612,28 @@ PHINode *InnerLoopVectorizer::createInductionResumeValue(
}
}
- // Create phi nodes to merge from the backedge-taken check block.
- PHINode *BCResumeVal =
- PHINode::Create(OrigPhi->getType(), 3, "bc.resume.val",
- LoopScalarPreHeader->getFirstNonPHIIt());
- // Copy original phi DL over to the new one.
- BCResumeVal->setDebugLoc(OrigPhi->getDebugLoc());
+ VPBasicBlock *MiddleVPBB =
+ cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
- // The new PHI merges the original incoming value, in case of a bypass,
- // or the value at the end of the vectorized loop.
- BCResumeVal->addIncoming(EndValue, LoopMiddleBlock);
+ VPBasicBlock *ScalarPHVPBB = nullptr;
+ if (MiddleVPBB->getNumSuccessors() == 2) {
+ // Order is strict: first is the exit block, second is the scalar preheader.
+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[1]);
+ } else {
+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor());
+ }
- // Fix the scalar body counter (PHI node).
- // The old induction's phi node in the scalar body needs the truncated
- // value.
- for (BasicBlock *BB : BypassBlocks)
- BCResumeVal->addIncoming(II.getStartValue(), BB);
+ VPBuilder ScalarPHBuilder(ScalarPHVPBB);
+ auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp(
+ VPInstruction::ResumePhi,
+ {Plan.getOrAddLiveIn(EndValue), Plan.getOrAddLiveIn(II.getStartValue())},
+ OrigPhi->getDebugLoc(), "bc.resume.val");
- if (AdditionalBypass.first)
- BCResumeVal->setIncomingValueForBlock(AdditionalBypass.first,
- EndValueFromAdditionalBypass);
- return BCResumeVal;
+ auto *ScalarLoopHeader =
+ cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor());
+ addOperandToPhiInVPIRBasicBlock(ScalarLoopHeader, OrigPhi, ResumePhiRecipe);
+ InductionBypassValues[OrigPhi] = {AdditionalBypass.first,
+ EndValueFromAdditionalBypass};
}
/// Return the expanded step for \p ID using \p ExpandedSCEVs to look up SCEV
@@ -2643,10 +2666,8 @@ void InnerLoopVectorizer::createInductionResumeValues(
for (const auto &InductionEntry : Legal->getInductionVars()) {
PHINode *OrigPhi = InductionEntry.first;
const InductionDescriptor &II = InductionEntry.second;
- PHINode *BCResumeVal = createInductionResumeValue(
- OrigPhi, II, getExpandedStep(II, ExpandedSCEVs), LoopBypassBlocks,
- AdditionalBypass);
- OrigPhi->setIncomingValueForBlock(LoopScalarPreHeader, BCResumeVal);
+ createInductionResumeValue(OrigPhi, II, getExpandedStep(II, ExpandedSCEVs),
+ LoopBypassBlocks, AdditionalBypass);
}
}
@@ -2931,10 +2952,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
IVEndValues[Entry.first], LoopMiddleBlock, Plan, State);
}
- // Fix live-out phis not already fixed earlier.
- for (const auto &KV : Plan.getLiveOuts())
- KV.second->fixPhi(Plan, State);
-
for (Instruction *PI : PredicatedInstructions)
sinkScalarOperands(&*PI);
@@ -7682,6 +7699,25 @@ EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton(
// the second pass for the scalar loop. The induction resume values for the
// inductions in the epilogue loop are created before executing the plan for
// the epilogue loop.
+ for (VPRecipeBase &R :
+ Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
+ // Create induction resume values for both widened pointer and
+ // integer/fp inductions and update the start value of the induction
+ // recipes to use the resume value.
+ PHINode *IndPhi = nullptr;
+ const InductionDescriptor *ID;
+ if (auto *Ind = dyn_cast<VPWidenPointerInductionRecipe>(&R)) {
+ IndPhi = cast<PHINode>(Ind->getUnderlyingValue());
+ ID = &Ind->getInductionDescriptor();
+ } else if (auto *WidenInd = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R)) {
+ IndPhi = WidenInd->getPHINode();
+ ID = &WidenInd->getInductionDescriptor();
+ } else
+ continue;
+
+ createInductionResumeValue(IndPhi, *ID, getExpandedStep(*ID, ExpandedSCEVs),
+ LoopBypassBlocks);
+ }
return {LoopVectorPreHeader, nullptr};
}
@@ -8852,7 +8888,9 @@ static void addLiveOutsForFirstOrderRecurrences(
VPInstruction::ResumePhi, {Resume, FOR->getStartValue()}, {},
"scalar.recur.init");
auto *FORPhi = cast<PHINode>(FOR->getUnderlyingInstr());
- Plan.addLiveOut(FORPhi, ResumePhiRecipe);
+ addOperandToPhiInVPIRBasicBlock(
+ cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor()), FORPhi,
+ ResumePhiRecipe);
// Now update VPIRInstructions modeling LCSSA phis in the exit block.
// Extract the penultimate value of the recurrence and use it as operand for
@@ -9579,7 +9617,7 @@ static bool processLoopInVPlanNativePath(
GeneratedRTChecks Checks(*PSE.getSE(), DT, LI, TTI,
F->getDataLayout(), AddBranchWeights);
InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width,
- VF.Width, 1, LVL, &CM, BFI, PSI, Checks);
+ VF.Width, 1, LVL, &CM, BFI, PSI, Checks, BestPlan);
LLVM_DEBUG(dbgs() << "Vectorizing outer loop in \""
<< L->getHeader()->getParent()->getName() << "\"\n");
LVP.executePlan(VF.Width, 1, BestPlan, LB, DT, false);
@@ -10067,11 +10105,11 @@ bool LoopVectorizePass::processLoop(Loop *L) {
assert(IC > 1 && "interleave count should not be 1 or 0");
// If we decided that it is not legal to vectorize the loop, then
// interleave it.
+ VPlan &BestPlan = LVP.getPlanFor(VF.Width);
InnerLoopVectorizer Unroller(
L, PSE, LI, DT, TLI, TTI, AC, ORE, ElementCount::getFixed(1),
- ElementCount::getFixed(1), IC, &LVL, &CM, BFI, PSI, Checks);
+ ElementCount::getFixed(1), IC, &LVL, &CM, BFI, PSI, Checks, BestPlan);
- VPlan &BestPlan = LVP.getPlanFor(VF.Width);
LVP.executePlan(VF.Width, IC, BestPlan, Unroller, DT, false);
ORE->emit([&]() {
@@ -10093,10 +10131,11 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// to be vectorized by executing the plan (potentially with a different
// factor) again shortly afterwards.
EpilogueLoopVectorizationInfo EPI(VF.Width, IC, EpilogueVF.Width, 1);
+ std::unique_ptr<VPlan> BestMainPlan(BestPlan.duplicate());
EpilogueVectorizerMainLoop MainILV(L, PSE, LI, DT, TLI, TTI, AC, ORE,
- EPI, &LVL, &CM, BFI, PSI, Checks);
+ EPI, &LVL, &CM, BFI, PSI, Checks,
+ *BestMainPlan);
- std::unique_ptr<VPlan> BestMainPlan(BestPlan.duplicate());
auto ExpandedSCEVs = LVP.executePlan(EPI.MainLoopVF, EPI.MainLoopUF,
*BestMainPlan, MainILV, DT, true);
++LoopsVectorized;
@@ -10105,11 +10144,11 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// edges from the first pass.
EPI.MainLoopVF = EPI.EpilogueVF;
EPI.MainLoopUF = EPI.EpilogueUF;
+ VPlan &BestEpiPlan = LVP.getPlanFor(EPI.EpilogueVF);
EpilogueVectorizerEpilogueLoop EpilogILV(L, PSE, LI, DT, TLI, TTI, AC,
ORE, EPI, &LVL, &CM, BFI, PSI,
- Checks);
+ Checks, BestEpiPlan);
- VPlan &BestEpiPlan = LVP.getPlanFor(EPI.EpilogueVF);
VPRegionBlock *VectorLoop = BestEpiPlan.getVectorLoopRegion();
VPBasicBlock *Header = VectorLoop->getEntryBasicBlock();
Header->setName("vec.epilog.vector.body");
@@ -10158,23 +10197,16 @@ bool LoopVectorizePass::processLoop(Loop *L) {
RdxDesc.getRecurrenceStartValue());
}
} else {
- // Create induction resume values for both widened pointer and
- // integer/fp inductions and update the start value of the induction
- // recipes to use the resume value.
+ // Retrive the induction resume values for wide inductions from
+ // their original phi nodes in the scalar loop
PHINode *IndPhi = nullptr;
- const InductionDescriptor *ID;
if (auto *Ind = dyn_cast<VPWidenPointerInductionRecipe>(&R)) {
IndPhi = cast<PHINode>(Ind->getUnderlyingValue());
- ID = &Ind->getInductionDescriptor();
} else {
auto *WidenInd = cast<VPWidenIntOrFpInductionRecipe>(&R);
IndPhi = WidenInd->getPHINode();
- ID = &WidenInd->getInductionDescriptor();
}
-
- ResumeV = MainILV.createInductionResumeValue(
- IndPhi, *ID, getExpandedStep(*ID, ExpandedSCEVs),
- {EPI.MainLoopIterationCountCheck});
+ ResumeV = IndPhi->getIncomingValueForBlock(L->getLoopPreheader());
}
assert(ResumeV && "Must have a resume value");
VPValue *StartVal = BestEpiPlan.getOrAddLiveIn(ResumeV);
@@ -10186,13 +10218,19 @@ bool LoopVectorizePass::processLoop(Loop *L) {
LVP.executePlan(EPI.EpilogueVF, EPI.EpilogueUF, BestEpiPlan, EpilogILV,
DT, true, &ExpandedSCEVs);
++LoopsEpilogueVectorized;
+ BasicBlock *PH = L->getLoopPreheader();
+ for (const auto &[IVPhi, _] : LVL.getInductionVars()) {
+ auto *Inc = cast<PHINode>(IVPhi->getIncomingValueForBlock(PH));
+ const auto &[BB, V] = EpilogILV.getInductionBypassValue(IVPhi);
+ Inc->setIncomingValueForBlock(BB, V);
+ }
if (!MainILV.areSafetyChecksAdded())
DisableRuntimeUnroll = true;
} else {
InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width,
VF.MinProfitableTripCount, IC, &LVL, &CM, BFI,
- PSI, Checks);
+ PSI, Checks, BestPlan);
LVP.executePlan(VF.Width, IC, BestPlan, LB, DT, false);
++LoopsVectorized;
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 4247d20cb0e530..64e5cde112d44e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -456,10 +456,17 @@ void VPIRBasicBlock::execute(VPTransformState *State) {
State->Builder.SetInsertPoint(getIRBasicBlock()->getTerminator());
executeRecipes(State, getIRBasicBlock());
if (getSingleSuccessor()) {
- assert(isa<UnreachableInst>(getIRBasicBlock()->getTerminator()));
- auto *Br = State->Builder.CreateBr(getIRBasicBlock());
- Br->setOperand(0, nullptr);
- getIRBasicBlock()->getTerminator()->eraseFromParent();
+ auto *SuccVPIRBB = dyn_cast<VPIRBasicBlock>(getSingleSuccessor());
+ if (SuccVPIRBB && SuccVPIRBB->getIRBasicBlock() ==
+ getIRBasicBlock()->getSingleSuccessor()) {
+ cast<BranchInst>(getIRBasicBlock()->getTerminator())
+ ->setOperand(0, nullptr);
+ } else {
+ assert(isa<UnreachableInst>(getIRBasicBlock()->getTerminator()));
+ auto *Br = State->Builder.CreateBr(getIRBasicBlock());
+ Br->setOperand(0, nullptr);
+ getIRBasicBlock()->getTerminator()->eraseFromParent();
+ }
}
for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
@@ -843,10 +850,6 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
#endif
VPlan::~VPlan() {
- for (auto &KV : LiveOuts)
- delete KV.second;
- LiveOuts.clear();
-
if (Entry) {
VPValue DummyValue;
for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
@@ -902,6 +905,9 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion);
VPBasicBlock *ScalarPH = new VPBasicBlock("scalar.ph");
+ VPBasicBlock *ScalarHeader =
+ VPIRBasicBlock::fromBasicBlock(TheLoop->getHeader());
+ VPBlockUtils::connectBlocks(ScalarPH, ScalarHeader);
if (!RequiresScalarEpilogueCheck) {
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
return Plan;
@@ -1051,6 +1057,8 @@ void VPlan::execute(VPTransformState *State) {
BrInst->insertBefore(MiddleBB->getTerminator());
MiddleBB->getTerminator()->eraseFromParent();
State->CFG.DTU.applyUpdates({{DominatorTree::Delete, MiddleBB, ScalarPh}});
+ State->CFG.DTU.applyUpdates(
+ {{DominatorTree::Delete, ScalarPh, ScalarPh->getSingleSuccessor()}});
// Generate code in the loop pre-header and body.
for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
@@ -1169,12 +1177,6 @@ void VPlan::print(raw_ostream &O) const {
Block->print(O, "", SlotTracker);
}
- if (!LiveOuts.empty())
- O << "\n";
- for (const auto &KV : LiveOuts) {
- KV.second->print(O, SlotTracker);
- }
-
O << "}\n";
}
@@ -1211,11 +1213,6 @@ LLVM_DUMP_METHOD
void VPlan::dump() const { print(dbgs()); }
#endif
-void VPlan::addLiveOut(PHINode *PN, VPValue *V) {
- assert(LiveOuts.count(PN) == 0 && "an exit value for PN already exists");
- LiveOuts.insert({PN, new VPLiveOut(PN, V)});
-}
-
static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry,
DenseMap<VPValue *, VPValue *> &Old2NewVPValues) {
// Update the operands of all cloned recipes starting at NewEntry. This
@@ -1283,10 +1280,6 @@ VPlan *VPlan::duplicate() {
remapOperands(Preheader, NewPreheader, Old2NewVPValues);
remapOperands(Entry, NewEntry, Old2NewVPValues);
- // Clone live-outs.
- for (const auto &[_, LO] : LiveOuts)
- NewPlan->addLiveOut(LO->getPhi(), Old2NewVPValues[LO->getOperand(0)]);
-
// Initialize remaining fields of cloned VPlan.
NewPlan->VFs = VFs;
NewPlan->UFs = UFs;
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 8392aec8ad396e..41339eb701b2cb 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -655,48 +655,6 @@ class VPBlockBase {
virtual VPBlockBase *clone() = 0;
};
-/// A value that is used outside the VPlan. The operand of the user needs to be
-/// added to the associated phi node. The incoming block from VPlan is
-/// determined by where the VPValue is defined: if it is defined by a recipe
-/// outside a region, its parent block is used, otherwise the middle block is
-/// used.
-class VPLiveOut : public VPUser {
- PHINode *Phi;
-
-public:
- VPLiveOut(PHINode *Phi, V...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/110577
More information about the llvm-commits
mailing list