[llvm] [VPlan] Replace VPRegionBlock with explicit CFG before execute (NFCI). (PR #117506)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Sat May 10 14:26:02 PDT 2025
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/117506
>From c274eea4e968a1c9ac7aa8288c04f8cd1e8ec2ee Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Tue, 18 Mar 2025 22:37:13 +0000
Subject: [PATCH] [VPlan] Replace VPRegionBlock with explicit CFG before
execute (NFCI).
!fixup update more tests.
---
.../Transforms/Vectorize/LoopVectorize.cpp | 31 ++-
llvm/lib/Transforms/Vectorize/VPlan.cpp | 191 ++++++++++--------
llvm/lib/Transforms/Vectorize/VPlan.h | 7 +
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 13 +-
.../Transforms/Vectorize/VPlanTransforms.cpp | 16 ++
.../Transforms/Vectorize/VPlanTransforms.h | 5 +-
.../AArch64/epilog-iv-select-cmp.ll | 12 +-
.../AArch64/reduction-recurrence-costs-sve.ll | 10 +-
.../LoopVectorize/AArch64/vplan-printing.ll | 37 ++--
.../RISCV/riscv-vector-reverse.ll | 74 ++++---
.../RISCV/vplan-vp-select-intrinsics.ll | 51 +++--
.../LoopVectorize/vplan-predicate-switch.ll | 113 +++++------
12 files changed, 295 insertions(+), 265 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 0b8b0c7dcdfc9..ba9f01e2a330a 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2760,6 +2760,15 @@ LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI,
return TTI.getIntrinsicInstrCost(CostAttrs, CostKind);
}
+static VPBasicBlock *getHeaderForMainVectorLoop(VPlan &Plan,
+ VPDominatorTree &VPDT) {
+ return find_singleton<VPBasicBlock>(
+ vp_depth_first_shallow(Plan.getEntry()), [&VPDT](VPBlockBase *VPB, bool) {
+ auto *VPBB = dyn_cast<VPBasicBlock>(VPB);
+ return VPBB && VPBB->isHeader(VPDT) ? VPBB : nullptr;
+ });
+}
+
void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
// Fix widened non-induction PHIs by setting up the PHI operands.
if (EnableVPlanNativePath)
@@ -2778,13 +2787,13 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
PSE.getSE()->forgetLoop(OrigLoop);
PSE.getSE()->forgetBlockAndLoopDispositions();
- // Don't apply optimizations below when no vector region remains, as they all
- // require a vector loop at the moment.
- if (!State.Plan->getVectorLoopRegion())
+ // Don't apply optimizations below when no vector loop remains, as they all
+ // require one at the moment.
+ VPBasicBlock *HeaderVPBB =
+ getHeaderForMainVectorLoop(*State.Plan, State.VPDT);
+ if (!HeaderVPBB)
return;
- VPRegionBlock *VectorRegion = State.Plan->getVectorLoopRegion();
- VPBasicBlock *HeaderVPBB = VectorRegion->getEntryBasicBlock();
BasicBlock *HeaderBB = State.CFG.VPBB2IRBB[HeaderVPBB];
// Remove redundant induction instructions.
@@ -2809,7 +2818,7 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
}
void InnerLoopVectorizer::fixNonInductionPHIs(VPTransformState &State) {
- auto Iter = vp_depth_first_deep(Plan.getEntry());
+ auto Iter = vp_depth_first_shallow(Plan.getEntry());
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
for (VPRecipeBase &P : VPBB->phis()) {
VPWidenPHIRecipe *VPPhi = dyn_cast<VPWidenPHIRecipe>(&P);
@@ -7799,6 +7808,9 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
BestVPlan, BestVF,
TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector));
VPlanTransforms::removeDeadRecipes(BestVPlan);
+
+ VPBasicBlock *MiddleVPBB =
+ BestVPlan.getVectorLoopRegion() ? BestVPlan.getMiddleBlock() : nullptr;
VPlanTransforms::convertToConcreteRecipes(BestVPlan,
*Legal->getWidestInductionType());
@@ -7894,14 +7906,14 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
// 2.6. Maintain Loop Hints
// Keep all loop hints from the original loop on the vector loop (we'll
// replace the vectorizer-specific hints below).
- if (auto *LoopRegion = BestVPlan.getVectorLoopRegion()) {
+ VPBasicBlock *HeaderVPBB = getHeaderForMainVectorLoop(BestVPlan, State.VPDT);
+ if (HeaderVPBB) {
MDNode *OrigLoopID = OrigLoop->getLoopID();
std::optional<MDNode *> VectorizedLoopID =
makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll,
LLVMLoopVectorizeFollowupVectorized});
- VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock();
Loop *L = LI->getLoopFor(State.CFG.VPBB2IRBB[HeaderVPBB]);
if (VectorizedLoopID) {
L->setLoopID(*VectorizedLoopID);
@@ -7947,8 +7959,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
ILV.printDebugTracesAtEnd();
// 4. Adjust branch weight of the branch in the middle block.
- if (BestVPlan.getVectorLoopRegion()) {
- auto *MiddleVPBB = BestVPlan.getMiddleBlock();
+ if (HeaderVPBB) {
auto *MiddleTerm =
cast<BranchInst>(State.CFG.VPBB2IRBB[MiddleVPBB]->getTerminator());
if (MiddleTerm->isConditional() &&
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 167aff737d3fd..6d35862ef6ad4 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -207,6 +207,11 @@ VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() {
return Parent->getEnclosingBlockWithPredecessors();
}
+bool VPBasicBlock::isHeader(const VPDominatorTree &VPDT) const {
+ return getPredecessors().size() == 2 &&
+ VPDT.dominates(this, getPredecessors()[1]);
+}
+
VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() {
iterator It = begin();
while (It != end() && It->isPhi())
@@ -424,7 +429,9 @@ void VPBasicBlock::connectToPredecessors(VPTransformState &State) {
for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors();
- BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB];
+ BasicBlock *PredBB = CFG.VPBB2IRBB.lookup(PredVPBB);
+ if (!PredBB)
+ continue;
assert(PredBB && "Predecessor basic-block not found building successor.");
auto *PredBBTerminator = PredBB->getTerminator();
@@ -432,6 +439,8 @@ void VPBasicBlock::connectToPredecessors(VPTransformState &State) {
auto *TermBr = dyn_cast<BranchInst>(PredBBTerminator);
if (isa<UnreachableInst>(PredBBTerminator)) {
+ if (PredVPSuccessors.size() == 2)
+ continue;
assert(PredVPSuccessors.size() == 1 &&
"Predecessor ending w/o branch must have single successor.");
DebugLoc DL = PredBBTerminator->getDebugLoc();
@@ -487,11 +496,25 @@ void VPBasicBlock::execute(VPTransformState *State) {
bool Replica = bool(State->Lane);
BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
+ if (isHeader(State->VPDT)) {
+ // Create and register the new vector loop.
+ Loop *PrevParentLoop = State->CurrentParentLoop;
+ State->CurrentParentLoop = State->LI->AllocateLoop();
+
+ // Insert the new loop into the loop nest and register the new basic blocks
+ // before calling any utilities such as SCEV that require valid LoopInfo.
+ if (PrevParentLoop)
+ PrevParentLoop->addChildLoop(State->CurrentParentLoop);
+ else
+ State->LI->addTopLevelLoop(State->CurrentParentLoop);
+ }
+
auto IsReplicateRegion = [](VPBlockBase *BB) {
auto *R = dyn_cast_or_null<VPRegionBlock>(BB);
- return R && R->isReplicator();
+ assert((!R || R->isReplicator()) &&
+ "only replicate region blocks should remain");
+ return R;
};
-
// 1. Create an IR basic block.
if ((Replica && this == getParent()->getEntry()) ||
IsReplicateRegion(getSingleHierarchicalPredecessor())) {
@@ -514,6 +537,14 @@ void VPBasicBlock::execute(VPTransformState *State) {
// 2. Fill the IR basic block with IR instructions.
executeRecipes(State, NewBB);
+
+ // If this block is a latch, update CurrentParentLoop.
+ if (any_of(getSuccessors(), [State, this](VPBlockBase *Succ) {
+ auto *VPBB = dyn_cast<VPBasicBlock>(Succ);
+ return VPBB && VPBB->isHeader(State->VPDT) &&
+ State->VPDT.dominates(Succ, this);
+ }))
+ State->CurrentParentLoop = State->CurrentParentLoop->getParentLoop();
}
VPBasicBlock *VPBasicBlock::clone() {
@@ -725,35 +756,13 @@ VPRegionBlock *VPRegionBlock::clone() {
}
void VPRegionBlock::execute(VPTransformState *State) {
- ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>>
- RPOT(Entry);
-
- if (!isReplicator()) {
- // Create and register the new vector loop.
- Loop *PrevParentLoop = State->CurrentParentLoop;
- State->CurrentParentLoop = State->LI->AllocateLoop();
-
- // Insert the new loop into the loop nest and register the new basic blocks
- // before calling any utilities such as SCEV that require valid LoopInfo.
- if (PrevParentLoop)
- PrevParentLoop->addChildLoop(State->CurrentParentLoop);
- else
- State->LI->addTopLevelLoop(State->CurrentParentLoop);
-
- // Visit the VPBlocks connected to "this", starting from it.
- for (VPBlockBase *Block : RPOT) {
- LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
- Block->execute(State);
- }
-
- State->CurrentParentLoop = PrevParentLoop;
- return;
- }
-
+ assert(isReplicator() &&
+ "Loop regions should have been lowered to plain CFG");
assert(!State->Lane && "Replicating a Region with non-null instance.");
-
- // Enter replicating mode.
assert(!State->VF.isScalable() && "VF is assumed to be non scalable.");
+
+ ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
+ Entry);
State->Lane = VPLane(0);
for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF;
++Lane) {
@@ -847,6 +856,22 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
}
#endif
+void VPRegionBlock::removeRegion() {
+ auto *Header = cast<VPBasicBlock>(getEntry());
+ VPBlockBase *Preheader = getSinglePredecessor();
+ auto *Exiting = cast<VPBasicBlock>(getExiting());
+
+ VPBlockBase *Middle = getSingleSuccessor();
+ VPBlockUtils::disconnectBlocks(Preheader, this);
+ VPBlockUtils::disconnectBlocks(this, Middle);
+
+ for (VPBlockBase *VPB : vp_depth_first_shallow(Entry))
+ VPB->setParent(nullptr);
+
+ VPBlockUtils::connectBlocks(Preheader, Header);
+ VPBlockUtils::connectBlocks(Exiting, Middle);
+}
+
VPlan::VPlan(Loop *L) {
setEntry(createVPIRBasicBlock(L->getLoopPreheader()));
ScalarHeader = createVPIRBasicBlock(L->getHeader());
@@ -956,57 +981,57 @@ void VPlan::execute(VPTransformState *State) {
for (VPBlockBase *Block : RPOT)
Block->execute(State);
- State->CFG.DTU.flush();
-
- auto *LoopRegion = getVectorLoopRegion();
- if (!LoopRegion)
- return;
-
- VPBasicBlock *LatchVPBB = LoopRegion->getExitingBasicBlock();
- BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
-
// Fix the latch value of canonical, reduction and first-order recurrences
// phis in the vector loop.
- VPBasicBlock *Header = LoopRegion->getEntryBasicBlock();
- for (VPRecipeBase &R : Header->phis()) {
- // Skip phi-like recipes that generate their backedege values themselves.
- if (isa<VPWidenPHIRecipe>(&R))
+ for (VPBasicBlock *Header :
+ VPBlockUtils::blocksOnly<VPBasicBlock>(vp_depth_first_shallow(Entry))) {
+ if (!Header->isHeader(State->VPDT))
continue;
+ for (VPRecipeBase &R : Header->phis()) {
+ if (isa<VPWidenPHIRecipe>(&R))
+ continue;
- if (isa<VPWidenInductionRecipe>(&R)) {
- PHINode *Phi = nullptr;
- if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
- Phi = cast<PHINode>(State->get(R.getVPSingleValue()));
- } else {
- auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
- assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) &&
- "recipe generating only scalars should have been replaced");
- auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi));
- Phi = cast<PHINode>(GEP->getPointerOperand());
+ auto *LatchVPBB = cast<VPBasicBlock>(Header->getPredecessors()[1]);
+ BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
+
+ if (isa<VPWidenInductionRecipe>(&R)) {
+ PHINode *Phi = nullptr;
+ if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
+ Phi = cast<PHINode>(State->get(R.getVPSingleValue()));
+ } else {
+ auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
+ assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) &&
+ "recipe generating only scalars should have been replaced");
+ auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi));
+ Phi = cast<PHINode>(GEP->getPointerOperand());
+ }
+
+ Phi->setIncomingBlock(1, VectorLatchBB);
+
+ // Move the last step to the end of the latch block. This ensures
+ // consistent placement of all induction updates.
+ Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
+ Inc->moveBefore(
+ std::prev(VectorLatchBB->getTerminator()->getIterator()));
+
+ // Use the steps for the last part as backedge value for the induction.
+ if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R))
+ Inc->setOperand(0, State->get(IV->getLastUnrolledPartOperand()));
+ continue;
}
- Phi->setIncomingBlock(1, VectorLatchBB);
-
- // Move the last step to the end of the latch block. This ensures
- // consistent placement of all induction updates.
- Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
- Inc->moveBefore(std::prev(VectorLatchBB->getTerminator()->getIterator()));
-
- // Use the steps for the last part as backedge value for the induction.
- if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R))
- Inc->setOperand(0, State->get(IV->getLastUnrolledPartOperand()));
- continue;
+ auto *PhiR = cast<VPSingleDefRecipe>(&R);
+ // VPInstructions currently model scalar Phis only.
+ bool NeedsScalar = isa<VPInstruction>(PhiR) ||
+ (isa<VPReductionPHIRecipe>(PhiR) &&
+ cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
+
+ Value *Phi = State->get(PhiR, NeedsScalar);
+ // VPHeaderPHIRecipe supports getBackedgeValue() but VPInstruction does
+ // not.
+ Value *Val = State->get(PhiR->getOperand(1), NeedsScalar);
+ cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
}
-
- auto *PhiR = cast<VPSingleDefRecipe>(&R);
- // VPInstructions currently model scalar Phis only.
- bool NeedsScalar = isa<VPInstruction>(PhiR) ||
- (isa<VPReductionPHIRecipe>(PhiR) &&
- cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
- Value *Phi = State->get(PhiR, NeedsScalar);
- // VPHeaderPHIRecipe supports getBackedgeValue() but VPInstruction does not.
- Value *Val = State->get(PhiR->getOperand(1), NeedsScalar);
- cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
}
}
@@ -1365,16 +1390,16 @@ void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
#endif
-/// Returns true if there is a vector loop region and \p VPV is defined in a
-/// loop region.
-static bool isDefinedInsideLoopRegions(const VPValue *VPV) {
- const VPRecipeBase *DefR = VPV->getDefiningRecipe();
- return DefR && (!DefR->getParent()->getPlan()->getVectorLoopRegion() ||
- DefR->getParent()->getEnclosingLoopRegion());
-}
-
bool VPValue::isDefinedOutsideLoopRegions() const {
- return !isDefinedInsideLoopRegions(this);
+ auto *DefR = getDefiningRecipe();
+ if (!DefR)
+ return true;
+
+ const VPBasicBlock *DefVPBB = DefR->getParent();
+ auto *Plan = DefVPBB->getPlan();
+ if (Plan->getVectorLoopRegion())
+ return !DefR->getParent()->getEnclosingLoopRegion();
+ return DefVPBB == Plan->getEntry();
}
void VPValue::replaceAllUsesWith(VPValue *New) {
replaceUsesWithIf(New, [](VPUser &, unsigned) { return true; });
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 2c4cac7655ec9..3931583233bb6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -3415,6 +3415,9 @@ class VPBasicBlock : public VPBlockBase {
/// second predecessor is the exiting block of the region.
const VPBasicBlock *getCFGPredecessor(unsigned Idx) const;
+ /// Returns true if the block is a loop header in a plain-CFG VPlan.
+ bool isHeader(const VPDominatorTree &VPDT) const;
+
protected:
/// Execute the recipes in the IR basic block \p BB.
void executeRecipes(VPTransformState *State, BasicBlock *BB);
@@ -3566,6 +3569,10 @@ class VPRegionBlock : public VPBlockBase {
/// Clone all blocks in the single-entry single-exit region of the block and
/// their recipes without updating the operands of the cloned recipes.
VPRegionBlock *clone() override;
+
+ /// Remove the current region from its VPlan, connecting its predecessor to
+ /// its entry and exiting block to its successor.
+ void removeRegion();
};
/// VPlan models a candidate for vectorization, encoding various decisions take
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 6a4ffac200b1c..58da8610a354b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -583,11 +583,10 @@ Value *VPInstruction::generate(VPTransformState &State) {
CondBr->setSuccessor(0, nullptr);
Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
- if (!getParent()->isExiting())
+ VPBasicBlock *Header = cast<VPBasicBlock>(getParent()->getSuccessors()[1]);
+ if (!State.CFG.VPBB2IRBB.contains(Header))
return CondBr;
- VPRegionBlock *ParentRegion = getParent()->getParent();
- VPBasicBlock *Header = ParentRegion->getEntryBasicBlock();
CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]);
return CondBr;
}
@@ -598,9 +597,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
Value *Cond = Builder.CreateICmpEQ(IV, TC);
// Now create the branch.
- auto *Plan = getParent()->getPlan();
- VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
- VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
+ VPBasicBlock *Header = cast<VPBasicBlock>(getParent()->getSuccessors()[1]);
// Replace the temporary unreachable terminator with a new conditional
// branch, hooking it up to backward destination (the header) now and to the
@@ -1124,10 +1121,6 @@ void VPInstructionWithType::print(raw_ostream &O, const Twine &Indent,
void VPPhi::execute(VPTransformState &State) {
State.setDebugLocFrom(getDebugLoc());
- assert(getParent() ==
- getParent()->getPlan()->getVectorLoopRegion()->getEntry() &&
- "VPInstructions with PHI opcodes must be used for header phis only "
- "at the moment");
BasicBlock *VectorPH = State.CFG.VPBB2IRBB.at(getIncomingBlock(0));
Value *Start = State.get(getIncomingValue(0), VPLane(0));
PHINode *Phi = State.Builder.CreatePHI(Start->getType(), 2, getName());
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index b10b47cc1282a..e8481d0bf0c27 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2394,10 +2394,26 @@ void VPlanTransforms::createInterleaveGroups(
void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan,
Type &CanonicalIVTy) {
+ // Replace loop regions with explicity CFG.
+ SmallVector<VPRegionBlock *> LoopRegions;
+ for (VPRegionBlock *R : VPBlockUtils::blocksOnly<VPRegionBlock>(
+ vp_depth_first_deep(Plan.getEntry()))) {
+ if (!R->isReplicator())
+ LoopRegions.push_back(R);
+ }
+ for (VPRegionBlock *R : LoopRegions) {
+ VPBlockBase *Header = R->getEntry();
+ VPBlockBase *Latch = R->getExiting();
+ R->removeRegion();
+ // Add explicit backedge.
+ VPBlockUtils::connectBlocks(Latch, Header);
+ }
+
using namespace llvm::VPlanPatternMatch;
VPTypeAnalysis TypeInfo(&CanonicalIVTy);
SmallVector<VPRecipeBase *> ToRemove;
+
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_deep(Plan.getEntry()))) {
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index cb127d37661c7..4b80875b79f13 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -184,8 +184,9 @@ struct VPlanTransforms {
VPRecipeBuilder &RecipeBuilder,
VFRange &Range);
- /// Lower abstract recipes to concrete ones, that can be codegen'd. Use \p
- /// CanonicalIVTy as type for all un-typed live-ins in VPTypeAnalysis.
+ /// Lower abstract recipes to concrete ones, that can be codegen'd and replace
+ /// loop regions with explicit CFG. Use \p CanonicalIVTy as type for all
+ /// un-typed live-ins in VPTypeAnalysis.
static void convertToConcreteRecipes(VPlan &Plan, Type &CanonicalIVTy);
/// Perform instcombine-like simplifications on recipes in \p Plan. Use \p
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll
index c0806ea16a5fc..d4494089f7083 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll
@@ -153,11 +153,10 @@ define i32 @select_icmp_var_start_iv_trunc(i32 %N, i32 %start) #0 {
; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i32> [[STEP_ADD]], splat (i32 4)
; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i32> [[STEP_ADD_2]], splat (i32 4)
-; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
-; CHECK-NEXT: [[TMP3]] = select i1 [[TMP2]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]]
-; CHECK-NEXT: [[TMP4]] = select i1 [[TMP2]], <4 x i32> [[STEP_ADD]], <4 x i32> [[VEC_PHI2]]
-; CHECK-NEXT: [[TMP5]] = select i1 [[TMP2]], <4 x i32> [[STEP_ADD_2]], <4 x i32> [[VEC_PHI3]]
-; CHECK-NEXT: [[TMP6]] = select i1 [[TMP2]], <4 x i32> [[STEP_ADD_3]], <4 x i32> [[VEC_PHI4]]
+; CHECK-NEXT: [[TMP3]] = select <4 x i1> [[TMP1]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]]
+; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP1]], <4 x i32> [[STEP_ADD]], <4 x i32> [[VEC_PHI2]]
+; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP1]], <4 x i32> [[STEP_ADD_2]], <4 x i32> [[VEC_PHI3]]
+; CHECK-NEXT: [[TMP6]] = select <4 x i1> [[TMP1]], <4 x i32> [[STEP_ADD_3]], <4 x i32> [[VEC_PHI4]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD_3]], splat (i32 4)
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -196,8 +195,7 @@ define i32 @select_icmp_var_start_iv_trunc(i32 %N, i32 %start) #0 {
; CHECK-NEXT: [[INDEX11:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT17:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI12:%.*]] = phi <4 x i32> [ [[DOTSPLAT]], %[[VEC_EPILOG_PH]] ], [ [[TMP14:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND15:%.*]] = phi <4 x i32> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT16:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP11]], i32 0
-; CHECK-NEXT: [[TMP14]] = select i1 [[TMP13]], <4 x i32> [[VEC_IND15]], <4 x i32> [[VEC_PHI12]]
+; CHECK-NEXT: [[TMP14]] = select <4 x i1> [[TMP11]], <4 x i32> [[VEC_IND15]], <4 x i32> [[VEC_PHI12]]
; CHECK-NEXT: [[INDEX_NEXT17]] = add nuw i64 [[INDEX11]], 4
; CHECK-NEXT: [[VEC_IND_NEXT16]] = add <4 x i32> [[VEC_IND15]], splat (i32 4)
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT17]], [[N_VEC8]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll
index 969bb413f9c50..c2fe37ad214c6 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll
@@ -74,10 +74,7 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2)
; VSCALEFORTUNING2-NEXT: [[TMP13:%.*]] = and <vscale x 4 x i32> [[TMP12]], splat (i32 1)
; VSCALEFORTUNING2-NEXT: [[TMP14:%.*]] = xor <vscale x 4 x i32> [[TMP13]], splat (i32 1)
; VSCALEFORTUNING2-NEXT: [[TMP15:%.*]] = zext <vscale x 4 x i32> [[TMP14]] to <vscale x 4 x i64>
-; VSCALEFORTUNING2-NEXT: [[TMP16:%.*]] = extractelement <vscale x 4 x i64> [[TMP15]], i32 0
-; VSCALEFORTUNING2-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[SRC_2]], i64 [[TMP16]]
-; VSCALEFORTUNING2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[TMP17]], i64 0
-; VSCALEFORTUNING2-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x ptr> [[DOTSPLATINSERT]], <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer
+; VSCALEFORTUNING2-NEXT: [[DOTSPLAT:%.*]] = getelementptr i32, ptr [[SRC_2]], <vscale x 4 x i64> [[TMP15]]
; VSCALEFORTUNING2-NEXT: [[TMP18:%.*]] = call i32 @llvm.vscale.i32()
; VSCALEFORTUNING2-NEXT: [[TMP19:%.*]] = mul i32 [[TMP18]], 4
; VSCALEFORTUNING2-NEXT: [[TMP20:%.*]] = sub i32 [[TMP19]], 1
@@ -210,10 +207,7 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2)
; PRED-NEXT: [[TMP17:%.*]] = and <vscale x 4 x i32> [[TMP16]], splat (i32 1)
; PRED-NEXT: [[TMP18:%.*]] = xor <vscale x 4 x i32> [[TMP17]], splat (i32 1)
; PRED-NEXT: [[TMP19:%.*]] = zext <vscale x 4 x i32> [[TMP18]] to <vscale x 4 x i64>
-; PRED-NEXT: [[TMP20:%.*]] = extractelement <vscale x 4 x i64> [[TMP19]], i32 0
-; PRED-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[SRC_2]], i64 [[TMP20]]
-; PRED-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[TMP21]], i64 0
-; PRED-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x ptr> [[DOTSPLATINSERT]], <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer
+; PRED-NEXT: [[DOTSPLAT:%.*]] = getelementptr i32, ptr [[SRC_2]], <vscale x 4 x i64> [[TMP19]]
; PRED-NEXT: [[TMP22:%.*]] = call i32 @llvm.vscale.i32()
; PRED-NEXT: [[TMP23:%.*]] = mul i32 [[TMP22]], 4
; PRED-NEXT: [[TMP24:%.*]] = sub i32 [[TMP23]], 1
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll
index 567aa63483771..2e9d90f762ccd 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll
@@ -83,27 +83,24 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) {
; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, ir-bb<vector.ph>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<vector.ph>:
-; CHECK-NEXT: Successor(s): vector loop
+; CHECK-NEXT: Successor(s): vector.body
; CHECK-EMPTY:
-; CHECK-NEXT: <x1> vector loop: {
-; CHECK-NEXT: vector.body:
-; CHECK-NEXT: EMIT vp<[[EP_IV:%.+]]> = phi [ ir<0>, ir-bb<vector.ph> ], [ vp<%index.next>, vector.body ]
-; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%accum> = phi ir<0>, ir<%add> (VF scaled by 1/4)
-; CHECK-NEXT: CLONE ir<%gep.a> = getelementptr ir<%a>, vp<[[EP_IV]]>
-; CHECK-NEXT: vp<[[PTR_A:%.+]]> = vector-pointer ir<%gep.a>
-; CHECK-NEXT: WIDEN ir<%load.a> = load vp<[[PTR_A]]>
-; CHECK-NEXT: WIDEN-CAST ir<%ext.a> = zext ir<%load.a> to i32
-; CHECK-NEXT: CLONE ir<%gep.b> = getelementptr ir<%b>, vp<[[EP_IV]]>
-; CHECK-NEXT: vp<[[PTR_B:%.+]]> = vector-pointer ir<%gep.b>
-; CHECK-NEXT: WIDEN ir<%load.b> = load vp<[[PTR_B]]>
-; CHECK-NEXT: WIDEN-CAST ir<%ext.b> = zext ir<%load.b> to i32
-; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%ext.b>, ir<%ext.a>
-; CHECK-NEXT: PARTIAL-REDUCE ir<%add> = add ir<%accum>, ir<%mul>
-; CHECK-NEXT: EMIT vp<[[EP_IV_NEXT:%.+]]> = add nuw vp<[[EP_IV]]>, ir<16>
-; CHECK-NEXT: EMIT branch-on-count vp<[[EP_IV_NEXT]]>, ir<1024>
-; CHECK-NEXT: No successors
-; CHECK-NEXT: }
-; CHECK-NEXT: Successor(s): middle.block
+; CHECK-NEXT: vector.body:
+; CHECK-NEXT: EMIT vp<[[EP_IV:%.+]]> = phi [ ir<0>, ir-bb<vector.ph> ], [ vp<%index.next>, vector.body ]
+; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%accum> = phi ir<0>, ir<%add> (VF scaled by 1/4)
+; CHECK-NEXT: CLONE ir<%gep.a> = getelementptr ir<%a>, vp<[[EP_IV]]>
+; CHECK-NEXT: vp<[[PTR_A:%.+]]> = vector-pointer ir<%gep.a>
+; CHECK-NEXT: WIDEN ir<%load.a> = load vp<[[PTR_A]]>
+; CHECK-NEXT: WIDEN-CAST ir<%ext.a> = zext ir<%load.a> to i32
+; CHECK-NEXT: CLONE ir<%gep.b> = getelementptr ir<%b>, vp<[[EP_IV]]>
+; CHECK-NEXT: vp<[[PTR_B:%.+]]> = vector-pointer ir<%gep.b>
+; CHECK-NEXT: WIDEN ir<%load.b> = load vp<[[PTR_B]]>
+; CHECK-NEXT: WIDEN-CAST ir<%ext.b> = zext ir<%load.b> to i32
+; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%ext.b>, ir<%ext.a>
+; CHECK-NEXT: PARTIAL-REDUCE ir<%add> = add ir<%accum>, ir<%mul>
+; CHECK-NEXT: EMIT vp<[[EP_IV_NEXT:%.+]]> = add nuw vp<[[EP_IV]]>, ir<16>
+; CHECK-NEXT: EMIT branch-on-count vp<[[EP_IV_NEXT]]>, ir<1024>
+; CHECK-NEXT: Successor(s): middle.block, vector.body
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[RED_RESULT:%.+]]> = compute-reduction-result ir<%accum>, ir<%add>
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
index 9e77a0ca8bcc9..0d77dfc50dd70 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
@@ -193,26 +193,23 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
; CHECK-NEXT: IR %18 = mul i64 %17, 4
; CHECK-NEXT: vp<[[END1:%.+]]> = DERIVED-IV ir<%0> + ir<[[VEC_TC]]> * ir<-1>
; CHECK-NEXT: vp<[[END2:%.+]]> = DERIVED-IV ir<%n> + ir<[[VEC_TC]]> * ir<-1>
-; CHECK-NEXT: Successor(s): vector loop
-; CHECK-EMPTY:
-; CHECK-NEXT: <x1> vector loop: {
-; CHECK-NEXT: vector.body:
-; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = phi [ ir<0>, ir-bb<vector.ph> ], [ vp<[[CAN_IV_NEXT:%.+]]>, vector.body ]
-; CHECK-NEXT: vp<[[DEV_IV:%.+]]> = DERIVED-IV ir<%n> + vp<[[CAN_IV]]> * ir<-1>
-; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<[[DEV_IV]]>, ir<-1>
-; CHECK-NEXT: CLONE ir<%idxprom> = zext ir<%i.0>
-; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom>
-; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-end-pointer inbounds ir<%arrayidx>, ir<[[VF]]>
-; CHECK-NEXT: WIDEN ir<[[L:%.+]]> = load vp<[[VEC_PTR]]>
-; CHECK-NEXT: WIDEN ir<%add9> = add ir<[[L]]>, ir<1>
-; CHECK-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom>
-; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-end-pointer inbounds ir<%arrayidx3>, ir<[[VF]]>
-; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%add9>
-; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, ir<[[VFxUF]]>.1
-; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, ir<[[VEC_TC]]>
-; CHECK-NEXT: No successors
-; CHECK-NEXT: }
-; CHECK-NEXT: Successor(s): middle.block
+; CHECK-NEXT: Successor(s): vector.body
+; CHECK-EMPTY:
+; CHECK-NEXT: vector.body:
+; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = phi [ ir<0>, ir-bb<vector.ph> ], [ vp<[[CAN_IV_NEXT:%.+]]>, vector.body ]
+; CHECK-NEXT: vp<[[DEV_IV:%.+]]> = DERIVED-IV ir<%n> + vp<[[CAN_IV]]> * ir<-1>
+; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<[[DEV_IV]]>, ir<-1>
+; CHECK-NEXT: CLONE ir<%idxprom> = zext ir<%i.0>
+; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom>
+; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-end-pointer inbounds ir<%arrayidx>, ir<[[VF]]>
+; CHECK-NEXT: WIDEN ir<[[L:%.+]]> = load vp<[[VEC_PTR]]>
+; CHECK-NEXT: WIDEN ir<%add9> = add ir<[[L]]>, ir<1>
+; CHECK-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom>
+; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-end-pointer inbounds ir<%arrayidx3>, ir<[[VF]]>
+; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%add9>
+; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, ir<[[VFxUF]]>.1
+; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, ir<[[VEC_TC]]>
+; CHECK-NEXT: Successor(s): middle.block, vector.body
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<%0>, ir<[[VEC_TC]]>
@@ -444,26 +441,23 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
; CHECK-NEXT: IR %18 = mul i64 %17, 4
; CHECK-NEXT: vp<[[END1:%.+]]> = DERIVED-IV ir<%0> + ir<[[VEC_TC]]> * ir<-1>
; CHECK-NEXT: vp<[[END2:%.+]]> = DERIVED-IV ir<%n> + ir<[[VEC_TC]]> * ir<-1>
-; CHECK-NEXT: Successor(s): vector loop
-; CHECK-EMPTY:
-; CHECK-NEXT: <x1> vector loop: {
-; CHECK-NEXT: vector.body:
-; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = phi [ ir<0>, ir-bb<vector.ph> ], [ vp<[[CAN_IV_NEXT:%.+]]>, vector.body ]
-; CHECK-NEXT: vp<[[DEV_IV:%.+]]> = DERIVED-IV ir<%n> + vp<[[CAN_IV]]> * ir<-1>
-; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<[[DEV_IV]]>, ir<-1>
-; CHECK-NEXT: CLONE ir<%idxprom> = zext ir<%i.0>
-; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom>
-; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-end-pointer inbounds ir<%arrayidx>, ir<[[VF]]>
-; CHECK-NEXT: WIDEN ir<[[L:%.+]]> = load vp<[[VEC_PTR]]>
-; CHECK-NEXT: WIDEN ir<%conv1> = fadd ir<[[L]]>, ir<1.000000e+00>
-; CHECK-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom>
-; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-end-pointer inbounds ir<%arrayidx3>, ir<[[VF]]>
-; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<%conv1>
-; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, ir<[[VFxUF]]>.1
-; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, ir<[[VEC_TC]]>
-; CHECK-NEXT: No successors
-; CHECK-NEXT: }
-; CHECK-NEXT: Successor(s): middle.block
+; CHECK-NEXT: Successor(s): vector.body
+; CHECK-EMPTY:
+; CHECK-NEXT: vector.body:
+; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = phi [ ir<0>, ir-bb<vector.ph> ], [ vp<[[CAN_IV_NEXT:%.+]]>, vector.body ]
+; CHECK-NEXT: vp<[[DEV_IV:%.+]]> = DERIVED-IV ir<%n> + vp<[[CAN_IV]]> * ir<-1>
+; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<[[DEV_IV]]>, ir<-1>
+; CHECK-NEXT: CLONE ir<%idxprom> = zext ir<%i.0>
+; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom>
+; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-end-pointer inbounds ir<%arrayidx>, ir<[[VF]]>
+; CHECK-NEXT: WIDEN ir<[[L:%.+]]> = load vp<[[VEC_PTR]]>
+; CHECK-NEXT: WIDEN ir<%conv1> = fadd ir<[[L]]>, ir<1.000000e+00>
+; CHECK-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom>
+; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-end-pointer inbounds ir<%arrayidx3>, ir<[[VF]]>
+; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<%conv1>
+; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, ir<[[VFxUF]]>.1
+; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, ir<[[VEC_TC]]>
+; CHECK-NEXT: Successor(s): middle.block, vector.body
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<%0>, ir<[[VEC_TC]]>
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll
index b2ec86ea3ec53..86647b1386ec5 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll
@@ -28,33 +28,30 @@
; IF-EVL-NEXT: IR %n.vec = sub i64 %n.rnd.up, %n.mod.vf
; IF-EVL-NEXT: IR %7 = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT: IR %8 = mul i64 %7, 4
- ; IF-EVL-NEXT: Successor(s): vector loop
-
- ; IF-EVL: <x1> vector loop: {
- ; IF-EVL-NEXT: vector.body:
- ; IF-EVL-NEXT: EMIT vp<[[IV:%.+]]> = phi [ ir<0>, ir-bb<vector.ph> ], [ vp<[[IV_NEXT_EXIT:%.+]]>, vector.body ]
- ; IF-EVL-NEXT: EMIT vp<[[EVL_PHI:%.+]]> = phi [ ir<0>, ir-bb<vector.ph> ], [ vp<[[IV_NEX:%.+]]>, vector.body ]
- ; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]>
- ; IF-EVL-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
- ; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[EVL_PHI]]>
- ; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
- ; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
- ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[EVL_PHI]]>
- ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
- ; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]>
- ; IF-EVL-NEXT: WIDEN ir<[[CMP:%.+]]> = icmp sgt ir<[[LD1]]>, ir<[[LD2]]>
- ; IF-EVL-NEXT: WIDEN ir<[[SUB:%.+]]> = sub ir<0>, ir<[[LD2]]>
- ; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[SELECT:%.+]]> = call llvm.vp.select(ir<[[CMP]]>, ir<[[LD2]]>, ir<[[SUB]]>, vp<[[EVL]]>)
- ; IF-EVL-NEXT: WIDEN ir<[[ADD:%.+]]> = add vp<[[SELECT]]>, ir<[[LD1]]>
- ; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[EVL_PHI]]>
- ; IF-EVL-NEXT: vp<[[PTR3:%.+]]> = vector-pointer ir<[[GEP3]]>
- ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[ADD]]>, vp<[[EVL]]>
- ; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
- ; IF-EVL-NEXT: EMIT vp<[[IV_NEX]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
- ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT]]> = add vp<[[IV]]>, ir<[[VFUF]]>
- ; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, ir<[[VTC]]>
- ; IF-EVL-NEXT: No successors
- ; IF-EVL-NEXT: }
+ ; IF-EVL-NEXT: Successor(s): vector.body
+ ; IF-EVL-EMPTY:
+ ; IF-EVL-NEXT: vector.body:
+ ; IF-EVL-NEXT: EMIT vp<[[IV:%.+]]> = phi [ ir<0>, ir-bb<vector.ph> ], [ vp<[[IV_NEXT_EXIT:%.+]]>, vector.body ]
+ ; IF-EVL-NEXT: EMIT vp<[[EVL_PHI:%.+]]> = phi [ ir<0>, ir-bb<vector.ph> ], [ vp<[[IV_NEX:%.+]]>, vector.body ]
+ ; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]>
+ ; IF-EVL-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
+ ; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[EVL_PHI]]>
+ ; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
+ ; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
+ ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[EVL_PHI]]>
+ ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
+ ; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]>
+ ; IF-EVL-NEXT: WIDEN ir<[[CMP:%.+]]> = icmp sgt ir<[[LD1]]>, ir<[[LD2]]>
+ ; IF-EVL-NEXT: WIDEN ir<[[SUB:%.+]]> = sub ir<0>, ir<[[LD2]]>
+ ; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[SELECT:%.+]]> = call llvm.vp.select(ir<[[CMP]]>, ir<[[LD2]]>, ir<[[SUB]]>, vp<[[EVL]]>)
+ ; IF-EVL-NEXT: WIDEN ir<[[ADD:%.+]]> = add vp<[[SELECT]]>, ir<[[LD1]]>
+ ; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[EVL_PHI]]>
+ ; IF-EVL-NEXT: vp<[[PTR3:%.+]]> = vector-pointer ir<[[GEP3]]>
+ ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[ADD]]>, vp<[[EVL]]>
+ ; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
+ ; IF-EVL-NEXT: EMIT vp<[[IV_NEX]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
+ ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT]]> = add vp<[[IV]]>, ir<[[VFUF]]>
+ ; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, ir<[[VTC]]>
entry:
br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll
index 61a5bd69b7ba3..59e2664cc1402 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll
@@ -15,75 +15,72 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
; CHECK-NEXT: IR %n.mod.vf = urem i64 %0, 2
; CHECK-NEXT: IR %n.vec = sub i64 %0, %n.mod.vf
; CHECK-NEXT: vp<[[END:%.+]]> = DERIVED-IV ir<%start> + ir<%n.vec> * ir<1>
-; CHECK-NEXT: Successor(s): vector loop
-; CHECK-EMPTY:
-; CHECK-NEXT: <x1> vector loop: {
-; CHECK-NEXT: vector.body:
-; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = phi [ ir<0>, ir-bb<vector.ph> ], [ vp<[[CAN_IV_NEXT:%.+]]>, default.2 ]
-; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, ir<2>
-; CHECK-NEXT: EMIT vp<[[PTR:%.+]]> = ptradd ir<%start>, vp<[[STEPS]]>
-; CHECK-NEXT: vp<[[WIDE_PTR:%.+]]> = vector-pointer vp<[[PTR]]>
-; CHECK-NEXT: WIDEN ir<%l> = load vp<[[WIDE_PTR]]>
-; CHECK-NEXT: EMIT vp<[[C1:%.+]]> = icmp eq ir<%l>, ir<-12>
-; CHECK-NEXT: EMIT vp<[[C2:%.+]]> = icmp eq ir<%l>, ir<13>
-; CHECK-NEXT: EMIT vp<[[OR_CASES:%.+]]> = or vp<[[C1]]>, vp<[[C2]]>
-; CHECK-NEXT: EMIT vp<[[DEFAULT_MASK:%.+]]> = not vp<[[OR_CASES]]>
-; CHECK-NEXT: Successor(s): pred.store
-; CHECK-EMPTY:
-; CHECK-NEXT: <xVFxUF> pred.store: {
-; CHECK-NEXT: pred.store.entry:
-; CHECK-NEXT: BRANCH-ON-MASK vp<[[C2]]>
-; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
-; CHECK-EMPTY:
-; CHECK-NEXT: pred.store.if:
-; CHECK-NEXT: REPLICATE store ir<0>, vp<[[PTR]]>
-; CHECK-NEXT: Successor(s): pred.store.continue
-; CHECK-EMPTY:
-; CHECK-NEXT: pred.store.continue:
-; CHECK-NEXT: No successors
-; CHECK-NEXT: }
-; CHECK-NEXT: Successor(s): if.then.2.0
+; CHECK-NEXT: Successor(s): vector.body
+; CHECK-EMPTY:
+; CHECK-NEXT: vector.body:
+; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = phi [ ir<0>, ir-bb<vector.ph> ], [ vp<[[CAN_IV_NEXT:%.+]]>, default.2 ]
+; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, ir<2>
+; CHECK-NEXT: EMIT vp<[[PTR:%.+]]> = ptradd ir<%start>, vp<[[STEPS]]>
+; CHECK-NEXT: vp<[[WIDE_PTR:%.+]]> = vector-pointer vp<[[PTR]]>
+; CHECK-NEXT: WIDEN ir<%l> = load vp<[[WIDE_PTR]]>
+; CHECK-NEXT: EMIT vp<[[C1:%.+]]> = icmp eq ir<%l>, ir<-12>
+; CHECK-NEXT: EMIT vp<[[C2:%.+]]> = icmp eq ir<%l>, ir<13>
+; CHECK-NEXT: EMIT vp<[[OR_CASES:%.+]]> = or vp<[[C1]]>, vp<[[C2]]>
+; CHECK-NEXT: EMIT vp<[[DEFAULT_MASK:%.+]]> = not vp<[[OR_CASES]]>
+; CHECK-NEXT: Successor(s): pred.store
+; CHECK-EMPTY:
+; CHECK-NEXT: <xVFxUF> pred.store: {
+; CHECK-NEXT: pred.store.entry:
+; CHECK-NEXT: BRANCH-ON-MASK vp<[[C2]]>
+; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
+; CHECK-EMPTY:
+; CHECK-NEXT: pred.store.if:
+; CHECK-NEXT: REPLICATE store ir<0>, vp<[[PTR]]>
+; CHECK-NEXT: Successor(s): pred.store.continue
+; CHECK-EMPTY:
+; CHECK-NEXT: pred.store.continue:
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+; CHECK-NEXT: Successor(s): if.then.2.0
; CHECK-EMPTY:
-; CHECK-NEXT: if.then.2.0:
-; CHECK-NEXT: Successor(s): pred.store
+; CHECK-NEXT: if.then.2.0:
+; CHECK-NEXT: Successor(s): pred.store
; CHECK-EMPTY:
-; CHECK-NEXT: <xVFxUF> pred.store: {
-; CHECK-NEXT: pred.store.entry:
-; CHECK-NEXT: BRANCH-ON-MASK vp<[[C1]]>
-; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
+; CHECK-NEXT: <xVFxUF> pred.store: {
+; CHECK-NEXT: pred.store.entry:
+; CHECK-NEXT: BRANCH-ON-MASK vp<[[C1]]>
+; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
; CHECK-EMPTY:
; CHECK-NEXT: pred.store.if:
-; CHECK-NEXT: REPLICATE store ir<42>, vp<[[PTR]]>
-; CHECK-NEXT: Successor(s): pred.store.continue
+; CHECK-NEXT: REPLICATE store ir<42>, vp<[[PTR]]>
+; CHECK-NEXT: Successor(s): pred.store.continue
; CHECK-EMPTY:
-; CHECK-NEXT: pred.store.continue:
-; CHECK-NEXT: No successors
-; CHECK-NEXT: }
-; CHECK-NEXT: Successor(s): if.then.1.1
+; CHECK-NEXT: pred.store.continue:
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+; CHECK-NEXT: Successor(s): if.then.1.1
; CHECK-EMPTY:
-; CHECK-NEXT: if.then.1.1:
-; CHECK-NEXT: Successor(s): pred.store
+; CHECK-NEXT: if.then.1.1:
+; CHECK-NEXT: Successor(s): pred.store
; CHECK-EMPTY:
-; CHECK-NEXT: <xVFxUF> pred.store: {
-; CHECK-NEXT: pred.store.entry:
-; CHECK-NEXT: BRANCH-ON-MASK vp<[[DEFAULT_MASK]]>
-; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
+; CHECK-NEXT: <xVFxUF> pred.store: {
+; CHECK-NEXT: pred.store.entry:
+; CHECK-NEXT: BRANCH-ON-MASK vp<[[DEFAULT_MASK]]>
+; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
; CHECK-EMPTY:
-; CHECK-NEXT: pred.store.if:
-; CHECK-NEXT: REPLICATE store ir<2>, vp<[[PTR]]>
-; CHECK-NEXT: Successor(s): pred.store.continue
+; CHECK-NEXT: pred.store.if:
+; CHECK-NEXT: REPLICATE store ir<2>, vp<[[PTR]]>
+; CHECK-NEXT: Successor(s): pred.store.continue
; CHECK-EMPTY:
-; CHECK-NEXT: pred.store.continue:
-; CHECK-NEXT: No successors
-; CHECK-NEXT: }
-; CHECK-NEXT: Successor(s): default.2
-; CHECK-EMPTY:
-; CHECK-NEXT: default.2:
-; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, ir<[[VFxUF]]>
-; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, ir<[[VTC]]>
+; CHECK-NEXT: pred.store.continue:
; CHECK-NEXT: No successors
; CHECK-NEXT: }
-; CHECK-NEXT: Successor(s): middle.block
+; CHECK-NEXT: Successor(s): default.2
+; CHECK-EMPTY:
+; CHECK-NEXT: default.2:
+; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, ir<[[VFxUF]]>
+; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, ir<[[VTC]]>
+; CHECK-NEXT: Successor(s): middle.block, vector.body
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[MIDDLE_CMP:%.+]]> = icmp eq ir<%0>, ir<[[VTC]]>
More information about the llvm-commits
mailing list