[llvm] 256c6b0 - [VPlan] Model pre-header explicitly.
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Sat Apr 9 05:25:39 PDT 2022
Author: Florian Hahn
Date: 2022-04-09T14:19:47+02:00
New Revision: 256c6b0ba14e8a7ab6373b61b7193ea8c0a3651c
URL: https://github.com/llvm/llvm-project/commit/256c6b0ba14e8a7ab6373b61b7193ea8c0a3651c
DIFF: https://github.com/llvm/llvm-project/commit/256c6b0ba14e8a7ab6373b61b7193ea8c0a3651c.diff
LOG: [VPlan] Model pre-header explicitly.
This patch extends the scope of VPlan to also model the pre-header.
The pre-header can be used to place recipes that should be code-gen'd
outside the loop, like SCEV expansion.
Depends on D121623.
Reviewed By: Ayal
Differential Revision: https://reviews.llvm.org/D121624
Added:
Modified:
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/lib/Transforms/Vectorize/VPlan.cpp
llvm/lib/Transforms/Vectorize/VPlan.h
llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll
llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll
llvm/test/Transforms/LoopVectorize/X86/small-size.ll
llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll
llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll
llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll
llvm/test/Transforms/LoopVectorize/vplan-printing.ll
llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll
llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 5de2c3299765b..f0c14da981425 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3102,10 +3102,9 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
BrInst->setDebugLoc(ScalarLatchTerm->getDebugLoc());
ReplaceInstWithInst(LoopMiddleBlock->getTerminator(), BrInst);
- SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->getTerminator(), DT, LI,
- nullptr, Twine(Prefix) + "vector.body");
-
- // Update dominator for loop exit.
+ // Update dominator for loop exit. During skeleton creation, only the vector
+ // pre-header and the middle block are created. The vector loop is entirely
+ // created during VPlan exection.
if (!Cost->requiresScalarEpilogue(VF))
// If there is an epilogue which must run, there's no edge from the
// middle block to exit blocks and thus no need to update the immediate
@@ -3244,7 +3243,7 @@ InnerLoopVectorizer::createVectorizedLoopSkeleton() {
|/ |
| v
| [ ] \
- | [ ]_| <-- vector loop.
+ | [ ]_| <-- vector loop (created during VPlan execution).
| |
| v
\ -[ ] <--- middle-block.
@@ -7600,10 +7599,11 @@ void LoopVectorizationPlanner::executePlan(ElementCount BestVF, unsigned BestUF,
// Perform the actual loop transformation.
- // 1. Create a new empty loop. Unlink the old loop and connect the new one.
+ // 1. Set up the skeleton for vectorization, including vector pre-header and
+ // middle block. The vector loop is created during VPlan execution.
VPTransformState State{BestVF, BestUF, LI, DT, ILV.Builder, &ILV, &BestVPlan};
Value *CanonicalIVStartValue;
- std::tie(State.CFG.VectorPreHeader, CanonicalIVStartValue) =
+ std::tie(State.CFG.PrevBB, CanonicalIVStartValue) =
ILV.createVectorizedLoopSkeleton();
ILV.collectPoisonGeneratingRecipes(State);
@@ -8670,8 +8670,6 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, DebugLoc DL,
auto *CanonicalIVPHI = new VPCanonicalIVPHIRecipe(StartV, DL);
VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
VPBasicBlock *Header = TopRegion->getEntryBasicBlock();
- if (IsVPlanNative)
- Header = cast<VPBasicBlock>(Header->getSingleSuccessor());
Header->insert(CanonicalIVPHI, Header->begin());
auto *CanonicalIVIncrement =
@@ -8681,10 +8679,8 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, DebugLoc DL,
CanonicalIVPHI->addOperand(CanonicalIVIncrement);
VPBasicBlock *EB = TopRegion->getExitBasicBlock();
- if (IsVPlanNative) {
- EB = cast<VPBasicBlock>(EB->getSinglePredecessor());
+ if (IsVPlanNative)
EB->setCondBit(nullptr);
- }
EB->appendRecipe(CanonicalIVIncrement);
auto *BranchOnCount =
@@ -8753,12 +8749,17 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
// visit each basic block after having visited its predecessor basic blocks.
// ---------------------------------------------------------------------------
- // Create initial VPlan skeleton, with separate header and latch blocks.
- VPBasicBlock *HeaderVPBB = new VPBasicBlock();
+ // Create initial VPlan skeleton, starting with a block for the pre-header,
+ // followed by a region for the vector loop. The skeleton vector loop region
+ // contains a header and latch block.
+ VPBasicBlock *Preheader = new VPBasicBlock("vector.ph");
+ auto Plan = std::make_unique<VPlan>(Preheader);
+
+ VPBasicBlock *HeaderVPBB = new VPBasicBlock("vector.body");
VPBasicBlock *LatchVPBB = new VPBasicBlock("vector.latch");
VPBlockUtils::insertBlockAfter(LatchVPBB, HeaderVPBB);
auto *TopRegion = new VPRegionBlock(HeaderVPBB, LatchVPBB, "vector loop");
- auto Plan = std::make_unique<VPlan>(TopRegion);
+ VPBlockUtils::insertBlockAfter(TopRegion, Preheader);
Instruction *DLInst =
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction());
@@ -8777,7 +8778,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
// Relevant instructions from basic block BB will be grouped into VPRecipe
// ingredients and fill a new VPBasicBlock.
unsigned VPBBsForBB = 0;
- VPBB->setName(BB->getName());
+ if (VPBB != HeaderVPBB)
+ VPBB->setName(BB->getName());
Builder.setInsertPoint(VPBB);
// Introduce each ingredient into VPlan.
@@ -9065,6 +9067,31 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
[this](PHINode *P) { return Legal->getIntOrFpInductionDescriptor(P); },
DeadInstructions, *PSE.getSE());
+ // Update plan to be compatible with the inner loop vectorizer for
+ // code-generation.
+ VPRegionBlock *LoopRegion = Plan->getVectorLoopRegion();
+ VPBasicBlock *Preheader = LoopRegion->getEntryBasicBlock();
+ VPBasicBlock *Exit = LoopRegion->getExitBasicBlock();
+ VPBlockBase *Latch = Exit->getSinglePredecessor();
+ VPBlockBase *Header = Preheader->getSingleSuccessor();
+
+ // 1. Move preheader block out of main vector loop.
+ Preheader->setParent(LoopRegion->getParent());
+ VPBlockUtils::disconnectBlocks(Preheader, Header);
+ VPBlockUtils::connectBlocks(Preheader, LoopRegion);
+ Plan->setEntry(Preheader);
+
+ // 2. Disconnect backedge and exit block.
+ VPBlockUtils::disconnectBlocks(Latch, Header);
+ VPBlockUtils::disconnectBlocks(Latch, Exit);
+
+ // 3. Update entry and exit of main vector loop region.
+ LoopRegion->setEntry(Header);
+ LoopRegion->setExit(Latch);
+
+ // 4. Remove exit block.
+ delete Exit;
+
addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), DebugLoc(),
true, true);
return Plan;
@@ -9442,13 +9469,14 @@ void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
auto &DL = EntryVal->getModule()->getDataLayout();
+ BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
// Generate code for the induction step. Note that induction steps are
// required to be loop-invariant
auto CreateStepValue = [&](const SCEV *Step) -> Value * {
if (SE.isSCEVable(IV->getType())) {
SCEVExpander Exp(SE, DL, "induction");
return Exp.expandCodeFor(Step, Step->getType(),
- State.CFG.VectorPreHeader->getTerminator());
+ VectorPH->getTerminator());
}
return cast<SCEVUnknown>(Step)->getValue();
};
@@ -9466,7 +9494,7 @@ void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
// Construct the initial value of the vector IV in the vector loop preheader
auto CurrIP = Builder.saveIP();
- Builder.SetInsertPoint(State.CFG.VectorPreHeader->getTerminator());
+ Builder.SetInsertPoint(VectorPH->getTerminator());
if (isa<TruncInst>(EntryVal)) {
assert(Start->getType()->isIntegerTy() &&
"Truncation requires an integer type");
@@ -9530,13 +9558,13 @@ void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
}
LastInduction->setName("vec.ind.next");
- VecInd->addIncoming(SteppedStart, State.CFG.VectorPreHeader);
+ VecInd->addIncoming(SteppedStart, VectorPH);
// Add induction update using an incorrect block temporarily. The phi node
// will be fixed after VPlan execution. Note that at this point the latch
// block cannot be used, as it does not exist yet.
// TODO: Model increment value in VPlan, by turning the recipe into a
// multi-def and a subclass of VPHeaderPHIRecipe.
- VecInd->addIncoming(LastInduction, State.CFG.VectorPreHeader);
+ VecInd->addIncoming(LastInduction, VectorPH);
}
void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
@@ -9591,7 +9619,9 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
Type *ScStValueType = ScalarStartValue->getType();
PHINode *NewPointerPhi =
PHINode::Create(ScStValueType, 2, "pointer.phi", CanonicalIV);
- NewPointerPhi->addIncoming(ScalarStartValue, State.CFG.VectorPreHeader);
+
+ BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
+ NewPointerPhi->addIncoming(ScalarStartValue, VectorPH);
// A pointer induction, performed by using a gep
const DataLayout &DL = NewPointerPhi->getModule()->getDataLayout();
@@ -9612,7 +9642,7 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
// block cannot be used, as it does not exist yet.
// TODO: Model increment value in VPlan, by turning the recipe into a
// multi-def and a subclass of VPHeaderPHIRecipe.
- NewPointerPhi->addIncoming(InductionGEP, State.CFG.VectorPreHeader);
+ NewPointerPhi->addIncoming(InductionGEP, VectorPH);
// Create UF many actual address geps that use the pointer
// phi as base and a vectorized version of the step value
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 64d582cfbedfc..d531c7a6a761b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -245,6 +245,10 @@ Value *VPTransformState::get(VPValue *Def, const VPIteration &Instance) {
// set(Def, Extract, Instance);
return Extract;
}
+BasicBlock *VPTransformState::CFGState::getPreheaderBBFor(VPRecipeBase *R) {
+ VPRegionBlock *LoopRegion = R->getParent()->getEnclosingLoopRegion();
+ return VPBB2IRBB[LoopRegion->getPreheaderVPBB()];
+}
BasicBlock *
VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
@@ -277,20 +281,34 @@ VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
assert(PredBB && "Predecessor basic-block not found building successor.");
auto *PredBBTerminator = PredBB->getTerminator();
LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n');
- if (isa<UnreachableInst>(PredBBTerminator)) {
+
+ auto *TermBr = dyn_cast<BranchInst>(PredBBTerminator);
+ if (isa<UnreachableInst>(PredBBTerminator) ||
+ (TermBr && !TermBr->isConditional())) {
assert(PredVPSuccessors.size() == 1 &&
"Predecessor ending w/o branch must have single successor.");
- DebugLoc DL = PredBBTerminator->getDebugLoc();
- PredBBTerminator->eraseFromParent();
- auto *Br = BranchInst::Create(NewBB, PredBB);
- Br->setDebugLoc(DL);
+ if (TermBr) {
+ TermBr->setSuccessor(0, NewBB);
+ } else {
+ DebugLoc DL = PredBBTerminator->getDebugLoc();
+ PredBBTerminator->eraseFromParent();
+ auto *Br = BranchInst::Create(NewBB, PredBB);
+ Br->setDebugLoc(DL);
+ }
} else {
- assert(PredVPSuccessors.size() == 2 &&
- "Predecessor ending with branch must have two successors.");
- unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
- assert(!PredBBTerminator->getSuccessor(idx) &&
- "Trying to reset an existing successor block.");
- PredBBTerminator->setSuccessor(idx, NewBB);
+ if (PredVPSuccessors.size() == 2) {
+ unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
+ assert(!PredBBTerminator->getSuccessor(idx) &&
+ "Trying to reset an existing successor block.");
+ PredBBTerminator->setSuccessor(idx, NewBB);
+ } else {
+ auto *Reg = dyn_cast<VPRegionBlock>(PredVPBB->getParent());
+ assert(Reg && !Reg->isReplicator());
+ assert(this == Reg->getSingleSuccessor());
+ PredBBTerminator->setSuccessor(0, NewBB);
+ PredBBTerminator->setSuccessor(
+ 1, CFG.VPBB2IRBB[Reg->getEntryBasicBlock()]);
+ }
}
}
return NewBB;
@@ -302,40 +320,36 @@ void VPBasicBlock::execute(VPTransformState *State) {
VPBlockBase *SingleHPred = nullptr;
BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
+ auto IsNonReplicateR = [](VPBlockBase *BB) {
+ auto *R = dyn_cast<VPRegionBlock>(BB);
+ return R && !R->isReplicator();
+ };
+
// 1. Create an IR basic block, or reuse the last one if possible.
// The last IR basic block is reused, as an optimization, in three cases:
- // A. the first VPBB reuses the loop header BB - when PrevVPBB is null;
+ // A. the first VPBB reuses the loop pre-header BB - when PrevVPBB is null;
// B. when the current VPBB has a single (hierarchical) predecessor which
- // is PrevVPBB and the latter has a single (hierarchical) successor; and
+ // is PrevVPBB and the latter has a single (hierarchical) successor which
+ // both are in the same non-replicator region; and
// C. when the current VPBB is an entry of a region replica - where PrevVPBB
// is the exit of this region from a previous instance, or the predecessor
// of this region.
if (PrevVPBB && /* A */
!((SingleHPred = getSingleHierarchicalPredecessor()) &&
SingleHPred->getExitBasicBlock() == PrevVPBB &&
- PrevVPBB->getSingleHierarchicalSuccessor()) && /* B */
- !(Replica && getPredecessors().empty())) { /* C */
+ PrevVPBB->getSingleHierarchicalSuccessor() &&
+ (SingleHPred->getParent() == getEnclosingLoopRegion() &&
+ !IsNonReplicateR(SingleHPred))) && /* B */
+ !(Replica && getPredecessors().empty())) { /* C */
NewBB = createEmptyBasicBlock(State->CFG);
State->Builder.SetInsertPoint(NewBB);
// Temporarily terminate with unreachable until CFG is rewired.
UnreachableInst *Terminator = State->Builder.CreateUnreachable();
// Register NewBB in its loop. In innermost loops its the same for all BB's.
- State->CurrentVectorLoop->addBasicBlockToLoop(NewBB, *State->LI);
+ if (State->CurrentVectorLoop)
+ State->CurrentVectorLoop->addBasicBlockToLoop(NewBB, *State->LI);
State->Builder.SetInsertPoint(Terminator);
State->CFG.PrevBB = NewBB;
- } else {
- // If the current VPBB is re-using the header block from skeleton creation,
- // move it to the new vector loop.
- VPBasicBlock *HeaderVPBB =
- getPlan()->getVectorLoopRegion()->getEntryBasicBlock();
- if (EnableVPlanNativePath)
- HeaderVPBB = cast<VPBasicBlock>(HeaderVPBB->getSingleSuccessor());
- if (this == HeaderVPBB) {
- assert(State->CurrentVectorLoop);
- State->LI->removeBlock(State->CFG.PrevBB);
- State->CurrentVectorLoop->addBasicBlockToLoop(State->CFG.PrevBB,
- *State->LI);
- }
}
// 2. Fill the IR basic block with IR instructions.
@@ -409,6 +423,16 @@ VPBasicBlock *VPBasicBlock::splitAt(iterator SplitAt) {
return SplitBlock;
}
+VPRegionBlock *VPBasicBlock::getEnclosingLoopRegion() {
+ VPRegionBlock *P = getParent();
+ if (P && P->isReplicator()) {
+ P = P->getParent();
+ assert(!cast<VPRegionBlock>(P)->isReplicator() &&
+ "unexpected nested replicate regions");
+ }
+ return P;
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPBlockBase::printSuccessors(raw_ostream &O, const Twine &Indent) const {
if (getSuccessors().empty()) {
@@ -465,7 +489,8 @@ void VPRegionBlock::execute(VPTransformState *State) {
// Create and register the new vector loop.
Loop *PrevLoop = State->CurrentVectorLoop;
State->CurrentVectorLoop = State->LI->AllocateLoop();
- Loop *ParentLoop = State->LI->getLoopFor(State->CFG.VectorPreHeader);
+ BasicBlock *VectorPH = State->CFG.VPBB2IRBB[getPreheaderVPBB()];
+ Loop *ParentLoop = State->LI->getLoopFor(VectorPH);
// Insert the new loop into the loop nest and register the new basic blocks
// before calling any utilities such as SCEV that require valid LoopInfo.
@@ -476,20 +501,6 @@ void VPRegionBlock::execute(VPTransformState *State) {
// Visit the VPBlocks connected to "this", starting from it.
for (VPBlockBase *Block : RPOT) {
- if (EnableVPlanNativePath) {
- // The inner loop vectorization path does not represent loop preheader
- // and exit blocks as part of the VPlan. In the VPlan-native path, skip
- // vectorizing loop preheader block. In future, we may replace this
- // check with the check for loop preheader.
- if (Block->getNumPredecessors() == 0)
- continue;
-
- // Skip vectorizing loop exit block. In future, we may replace this
- // check with the check for loop exit.
- if (Block->getNumSuccessors() == 0)
- continue;
- }
-
LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
Block->execute(State);
}
@@ -886,7 +897,7 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
// Check if the backedge taken count is needed, and if so build it.
if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
- IRBuilder<> Builder(State.CFG.VectorPreHeader->getTerminator());
+ IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
auto *TCMO = Builder.CreateSub(TripCountV,
ConstantInt::get(TripCountV->getType(), 1),
"trip.count.minus.1");
@@ -923,9 +934,9 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
}
}
-/// Generate the code inside the body of the vectorized loop. Assumes a single
-/// LoopVectorBody basic-block was created for this. Introduce additional
-/// basic-blocks as needed, and fill them all.
+/// Generate the code inside the preheader and body of the vectorized loop.
+/// Assumes a single pre-header basic-block was created for this. Introduce
+/// additional basic-blocks as needed, and fill them all.
void VPlan::execute(VPTransformState *State) {
// Set the reverse mapping from VPValues to Values for code generation.
for (auto &Entry : Value2VPValue)
@@ -933,21 +944,11 @@ void VPlan::execute(VPTransformState *State) {
// Initialize CFG state.
State->CFG.PrevVPBB = nullptr;
- BasicBlock *VectorHeaderBB = State->CFG.VectorPreHeader->getSingleSuccessor();
- State->CFG.PrevBB = VectorHeaderBB;
- State->CFG.ExitBB = VectorHeaderBB->getSingleSuccessor();
- State->CurrentVectorLoop = State->LI->getLoopFor(VectorHeaderBB);
-
- // Remove the edge between Header and Latch to allow other connections.
- // Temporarily terminate with unreachable until CFG is rewired.
- // Note: this asserts the generated code's assumption that
- // getFirstInsertionPt() can be dereferenced into an Instruction.
- VectorHeaderBB->getTerminator()->eraseFromParent();
- State->Builder.SetInsertPoint(VectorHeaderBB);
- UnreachableInst *Terminator = State->Builder.CreateUnreachable();
- State->Builder.SetInsertPoint(Terminator);
-
- // Generate code in loop body.
+ State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor();
+ BasicBlock *VectorPreHeader = State->CFG.PrevBB;
+ State->Builder.SetInsertPoint(VectorPreHeader->getTerminator());
+
+ // Generate code in the loop pre-header and body.
for (VPBlockBase *Block : depth_first(Entry))
Block->execute(State);
@@ -974,10 +975,6 @@ void VPlan::execute(VPTransformState *State) {
// Fix the latch value of canonical, reduction and first-order recurrences
// phis in the vector loop.
VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock();
- if (Header->empty()) {
- assert(EnableVPlanNativePath);
- Header = cast<VPBasicBlock>(Header->getSingleSuccessor());
- }
for (VPRecipeBase &R : Header->phis()) {
// Skip phi-like recipes that generate their backedege values themselves.
if (isa<VPWidenPHIRecipe>(&R))
@@ -1029,9 +1026,12 @@ void VPlan::execute(VPTransformState *State) {
}
// We do not attempt to preserve DT for outer loop vectorization currently.
- if (!EnableVPlanNativePath)
+ if (!EnableVPlanNativePath) {
+ BasicBlock *VectorHeaderBB = State->CFG.VPBB2IRBB[Header];
+ State->DT->addNewBlock(VectorHeaderBB, VectorPreHeader);
updateDominatorTree(State->DT, VectorHeaderBB, VectorLatchBB,
State->CFG.ExitBB);
+ }
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -1452,7 +1452,9 @@ void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) {
Value *Start = getStartValue()->getLiveInIRValue();
PHINode *EntryPart = PHINode::Create(
Start->getType(), 2, "index", &*State.CFG.PrevBB->getFirstInsertionPt());
- EntryPart->addIncoming(Start, State.CFG.VectorPreHeader);
+
+ BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
+ EntryPart->addIncoming(Start, VectorPH);
EntryPart->setDebugLoc(DL);
for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
State.set(this, EntryPart, Part);
@@ -1469,11 +1471,12 @@ void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
void VPExpandSCEVRecipe::execute(VPTransformState &State) {
assert(!State.Instance && "cannot be used in per-lane");
- const DataLayout &DL =
- State.CFG.VectorPreHeader->getModule()->getDataLayout();
+ const DataLayout &DL = State.CFG.PrevBB->getModule()->getDataLayout();
SCEVExpander Exp(SE, DL, "induction");
- Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
- State.CFG.VectorPreHeader->getTerminator());
+
+ BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
+ Value *Res =
+ Exp.expandCodeFor(Expr, Expr->getType(), VectorPH->getTerminator());
for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
State.set(this, Res, Part);
@@ -1526,11 +1529,12 @@ void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) {
? VectorInit->getType()
: VectorType::get(VectorInit->getType(), State.VF);
+ BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
if (State.VF.isVector()) {
auto *IdxTy = Builder.getInt32Ty();
auto *One = ConstantInt::get(IdxTy, 1);
IRBuilder<>::InsertPointGuard Guard(Builder);
- Builder.SetInsertPoint(State.CFG.VectorPreHeader->getTerminator());
+ Builder.SetInsertPoint(VectorPH->getTerminator());
auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
VectorInit = Builder.CreateInsertElement(
@@ -1540,7 +1544,7 @@ void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) {
// Create a phi node for the new recurrence.
PHINode *EntryPart = PHINode::Create(
VecTy, 2, "vector.recur", &*State.CFG.PrevBB->getFirstInsertionPt());
- EntryPart->addIncoming(VectorInit, State.CFG.VectorPreHeader);
+ EntryPart->addIncoming(VectorInit, VectorPH);
State.set(this, EntryPart, 0);
}
@@ -1576,6 +1580,8 @@ void VPReductionPHIRecipe::execute(VPTransformState &State) {
State.set(this, EntryPart, Part);
}
+ BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
+
// Reductions do not have to start at zero. They can start with
// any loop invariant values.
VPValue *StartVPV = getStartValue();
@@ -1590,7 +1596,7 @@ void VPReductionPHIRecipe::execute(VPTransformState &State) {
Iden = StartV;
} else {
IRBuilderBase::InsertPointGuard IPBuilder(Builder);
- Builder.SetInsertPoint(State.CFG.VectorPreHeader->getTerminator());
+ Builder.SetInsertPoint(VectorPH->getTerminator());
StartV = Iden =
Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident");
}
@@ -1601,7 +1607,7 @@ void VPReductionPHIRecipe::execute(VPTransformState &State) {
if (!ScalarPHI) {
Iden = Builder.CreateVectorSplat(State.VF, Iden);
IRBuilderBase::InsertPointGuard IPBuilder(Builder);
- Builder.SetInsertPoint(State.CFG.VectorPreHeader->getTerminator());
+ Builder.SetInsertPoint(VectorPH->getTerminator());
Constant *Zero = Builder.getInt32(0);
StartV = Builder.CreateInsertElement(Iden, StartV, Zero);
}
@@ -1612,7 +1618,7 @@ void VPReductionPHIRecipe::execute(VPTransformState &State) {
// Make sure to add the reduction start value only to the
// first unroll part.
Value *StartVal = (Part == 0) ? StartV : Iden;
- cast<PHINode>(EntryPart)->addIncoming(StartVal, State.CFG.VectorPreHeader);
+ cast<PHINode>(EntryPart)->addIncoming(StartVal, VectorPH);
}
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 3fe981e4c0954..683613097ef4f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -311,13 +311,6 @@ struct VPTransformState {
/// vector loop.
BasicBlock *ExitBB = nullptr;
- /// The IR BasicBlock that is the preheader of the vector loop in the output
- /// IR.
- /// FIXME: The vector preheader should also be modeled in VPlan, so any code
- /// that needs to be added to the preheader gets directly generated by
- /// VPlan. There should be no need to manage a pointer to the IR BasicBlock.
- BasicBlock *VectorPreHeader = nullptr;
-
/// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case
/// of replication, maps the BasicBlock of the last replica created.
SmallDenseMap<VPBasicBlock *, BasicBlock *> VPBB2IRBB;
@@ -327,6 +320,10 @@ struct VPTransformState {
SmallVector<VPBasicBlock *, 8> VPBBsToFix;
CFGState() = default;
+
+ /// Returns the BasicBlock* mapped to the pre-header of the loop region
+ /// containing \p R.
+ BasicBlock *getPreheaderBBFor(VPRecipeBase *R);
} CFG;
/// Hold a pointer to LoopInfo to register new basic blocks in the loop.
@@ -2077,6 +2074,8 @@ class VPBasicBlock : public VPBlockBase {
/// SplitAt to the new block. Returns the new block.
VPBasicBlock *splitAt(iterator SplitAt);
+ VPRegionBlock *getEnclosingLoopRegion();
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
/// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
@@ -2170,6 +2169,12 @@ class VPRegionBlock : public VPBlockBase {
ExitBlock->setParent(this);
}
+ /// Returns the pre-header VPBasicBlock of the loop region.
+ VPBasicBlock *getPreheaderVPBB() {
+ assert(!isReplicator() && "should only get pre-header of loop regions");
+ return getSinglePredecessor()->getExitBasicBlock();
+ }
+
/// An indicator whether this region is to generate multiple replicated
/// instances of output IR corresponding to its VPBlockBases.
bool isReplicator() const { return IsReplicator; }
@@ -2647,10 +2652,14 @@ class VPlan {
/// Returns the VPRegionBlock of the vector loop.
VPRegionBlock *getVectorLoopRegion() {
- return cast<VPRegionBlock>(getEntry());
+ if (auto *R = dyn_cast<VPRegionBlock>(getEntry()))
+ return R;
+ return cast<VPRegionBlock>(getEntry()->getSingleSuccessor());
}
const VPRegionBlock *getVectorLoopRegion() const {
- return cast<VPRegionBlock>(getEntry());
+ if (auto *R = dyn_cast<VPRegionBlock>(getEntry()))
+ return R;
+ return cast<VPRegionBlock>(getEntry()->getSingleSuccessor());
}
/// Returns the canonical induction recipe of the vector loop.
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll
index 024e8e7296cec..6b4cc3f83464b 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll
@@ -10,6 +10,9 @@ target triple = "aarch64-unknown-linux-gnu"
; CHECK: VPlan 'Initial VPlan for VF={vscale x 2},UF>=1' {
; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
diff --git a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll
index 93bab6c1673c1..53a95d9a5ed93 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll
@@ -19,6 +19,7 @@ target triple = "x86_64-unknown-linux-gnu"
; CHECK-NOT: LV: Found uniform instruction: %tmp3 = getelementptr inbounds %data, %data* %d, i64 0, i32 0, i64 %i
; CHECK-NOT: LV: Found uniform instruction: %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
; CHECK-NOT: LV: Found uniform instruction: %i.next = add nuw nsw i64 %i, 5
+; CHECK: define void @PR31671(
; CHECK: vector.ph:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x float> poison, float %x, i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x float> [[BROADCAST_SPLATINSERT]], <16 x float> poison, <16 x i32> zeroinitializer
diff --git a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll
index d11b8fa3dd6bf..8510d956bfb2b 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll
@@ -136,8 +136,8 @@ define void @example2(i32 %n, i32 %x) optsize {
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i32 [[N]], 0
; CHECK-NEXT: br i1 [[TMP17]], label [[DOT_CRIT_EDGE:%.*]], label [[DOTLR_PH_PREHEADER:%.*]]
; CHECK: .lr.ph.preheader:
-; CHECK-NEXT: br i1 false, label [[SCALAR_PH8:%.*]], label [[VECTOR_PH10:%.*]]
-; CHECK: vector.ph10:
+; CHECK-NEXT: br i1 false, label [[SCALAR_PH8:%.*]], label [[VECTOR_PH9:%.*]]
+; CHECK: vector.ph9:
; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[N]], -1
; CHECK-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64
; CHECK-NEXT: [[N_RND_UP11:%.*]] = add nuw nsw i64 [[TMP19]], 4
@@ -145,8 +145,8 @@ define void @example2(i32 %n, i32 %x) optsize {
; CHECK-NEXT: [[BROADCAST_SPLATINSERT18:%.*]] = insertelement <4 x i64> poison, i64 [[TMP19]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT19:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT18]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY9:%.*]]
-; CHECK: vector.body9:
-; CHECK-NEXT: [[INDEX20:%.*]] = phi i64 [ 0, [[VECTOR_PH10]] ], [ [[INDEX_NEXT31:%.*]], [[PRED_STORE_CONTINUE30:%.*]] ]
+; CHECK: vector.body19:
+; CHECK-NEXT: [[INDEX20:%.*]] = phi i64 [ 0, [[VECTOR_PH9]] ], [ [[INDEX_NEXT31:%.*]], [[PRED_STORE_CONTINUE30:%.*]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[I_0_LCSSA]], [[INDEX20]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT21:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX20]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT22:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT21]], <4 x i64> poison, <4 x i32> zeroinitializer
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll
index fcbfa22cd74e8..9dd88f724e0aa 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll
@@ -13,6 +13,9 @@ define void @sink_replicate_region_1(i32 %x, i8* %ptr) optsize {
; CHECK-EMPTY:
; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
@@ -98,6 +101,9 @@ define void @sink_replicate_region_2(i32 %x, i8 %y, i32* %ptr) optsize {
; CHECK-EMPTY:
; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
@@ -168,6 +174,9 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, i32* %ptr) optsize
; CHECK-EMPTY:
; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
@@ -238,6 +247,9 @@ define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, i8*
; CHECK-EMPTY:
; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
@@ -333,6 +345,9 @@ define void @sink_replicate_region_after_replicate_region(i32* %ptr, i32 %x, i8
; CHECK-EMPTY:
; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
diff --git a/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll b/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll
index 3cdf854b50974..15ff79489b347 100644
--- a/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll
+++ b/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll
@@ -41,6 +41,9 @@ for.end:
; CHECK-EMPTY:
; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll
index d2da955ceb820..2e27a90787d92 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll
@@ -7,7 +7,17 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
; Verify that -vplan-print-in-dot-format option works.
define void @print_call_and_memory(i64 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
-; CHECK: subgraph cluster_N0 {
+; CHECK: digraph VPlan {
+; CHECK-NEXT: graph [labelloc=t, fontsize=30; label="Vectorization Plan\nInitial VPlan for VF=\{4\},UF\>=1"]
+; CHECK-NEXT: node [shape=rect, fontname=Courier, fontsize=30]
+; CHECK-NEXT: edge [fontname=Courier, fontsize=30]
+; CHECK-NEXT: compound=true
+; CHECK-NEXT: N0 [label =
+; CHECK-NEXT: "vector.ph:\l" +
+; CHECK-NEXT: "Successor(s): vector loop\l"
+; CHECK-NEXT: ]
+; CHECK-NEXT: N0 -> N1 [ label="" lhead=cluster_N2]
+; CHECK-NEXT: subgraph cluster_N2 {
; CHECK-NEXT: fontname=Courier
; CHECK-NEXT: label="\<x1\> vector loop"
; CHECK-NEXT: N1 [label =
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
index 6ec8fd102de8f..ab61a0e4437a8 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
@@ -11,6 +11,9 @@ define void @print_call_and_memory(i64 %n, float* noalias %y, float* noalias %x)
; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
@@ -51,6 +54,9 @@ define void @print_widen_gep_and_select(i64 %n, float* noalias %y, float* noalia
; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
@@ -96,6 +102,9 @@ define float @print_reduction(i64 %n, float* noalias %y) {
; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
@@ -133,6 +142,9 @@ define void @print_replicate_predicated_phi(i64 %n, i64* %x) {
; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
@@ -207,6 +219,9 @@ define void @print_interleave_groups(i32 %C, i32 %D) {
; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
@@ -267,6 +282,9 @@ define float @print_fmuladd_strict(float* %a, float* %b, i64 %n) {
; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
@@ -307,6 +325,9 @@ define void @debug_loc_vpinstruction(i32* nocapture %asd, i32* nocapture %bsd) !
; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
@@ -396,6 +417,9 @@ define void @print_expand_scev(i64 %y, i8* %ptr) {
; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
; CHECK-NEXT: Live-in vp<%0> = vector-trip-count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll
index 93c507618d460..fccc9becf12f7 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll
@@ -10,6 +10,9 @@ define void @sink_with_sideeffects(i1 %c, i8* %ptr) {
; CHECK: VPlan 'Initial VPlan for VF={1},UF>=1' {
; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll
index c31a486aa7fe2..89d21f6904dc8 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll
@@ -15,6 +15,9 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
; CHECK-EMPTY:
; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
@@ -82,6 +85,9 @@ exit:
; CHECK-EMPTY:
; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
@@ -162,6 +168,9 @@ exit:
; CHECK-EMPTY:
; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
@@ -244,6 +253,9 @@ define void @uniform_gep(i64 %k, i16* noalias %A, i16* noalias %B) {
; CHECK-EMPTY:
; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
@@ -335,6 +347,9 @@ define void @pred_cfg1(i32 %k, i32 %j) {
; CHECK-EMPTY:
; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
@@ -437,6 +452,9 @@ define void @pred_cfg2(i32 %k, i32 %j) {
; CHECK-EMPTY:
; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
@@ -554,6 +572,9 @@ define void @pred_cfg3(i32 %k, i32 %j) {
; CHECK-EMPTY:
; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
@@ -665,6 +686,9 @@ define void @merge_3_replicate_region(i32 %k, i32 %j) {
; CHECK-EMPTY:
; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
@@ -781,6 +805,9 @@ define void @update_2_uses_in_same_recipe_in_merged_block(i32 %k) {
; CHECK-EMPTY:
; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
@@ -849,6 +876,9 @@ define void @recipe_in_merge_candidate_used_by_first_order_recurrence(i32 %k) {
; CHECK-EMPTY:
; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
@@ -932,6 +962,9 @@ define void @update_multiple_users(i16* noalias %src, i8* noalias %dst, i1 %c) {
; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' {
; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
@@ -1003,6 +1036,9 @@ define void @sinking_requires_duplication(float* %addr) {
; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' {
; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
More information about the llvm-commits
mailing list