[llvm] 826f237 - [VPlan] Don't added separate vector latch block (NFC).
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Sat Apr 26 14:04:10 PDT 2025
Author: Florian Hahn
Date: 2025-04-26T22:03:18+01:00
New Revision: 826f237cb4b44f6d95d66eea0e58f7d24585986b
URL: https://github.com/llvm/llvm-project/commit/826f237cb4b44f6d95d66eea0e58f7d24585986b
DIFF: https://github.com/llvm/llvm-project/commit/826f237cb4b44f6d95d66eea0e58f7d24585986b.diff
LOG: [VPlan] Don't added separate vector latch block (NFC).
Simplify initial VPlan construction by not creating a separate
vector.latch block, which isn't needed and will get folded away later.
This has been suggested as independent clean-up multiple times.
Added:
Modified:
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 4beeabd36d523..6b5b50a26c199 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9531,14 +9531,6 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
VPBasicBlock::iterator MBIP = MiddleVPBB->getFirstNonPhi();
VPBlockBase *PrevVPBB = nullptr;
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
- // Handle VPBBs down to the latch.
- if (VPBB == LoopRegion->getExiting()) {
- assert(!VPB2IRBB.contains(VPBB) &&
- "the latch block shouldn't have a corresponding IRBB");
- VPBlockUtils::connectBlocks(PrevVPBB, VPBB);
- break;
- }
-
// Create mask based on the IR BB corresponding to VPBB.
// TODO: Predicate directly based on VPlan.
Builder.setInsertPoint(VPBB, VPBB->begin());
@@ -9761,6 +9753,12 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
for (ElementCount VF : Range)
Plan->addVF(VF);
+ // Tail folding is not supported for outer loops, so the induction increment
+ // is guaranteed to not wrap.
+ bool HasNUW = true;
+ addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), HasNUW,
+ DebugLoc());
+
if (!VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
Plan,
[this](PHINode *P) {
@@ -9769,12 +9767,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
*PSE.getSE(), *TLI))
return nullptr;
- // Tail folding is not supported for outer loops, so the induction increment
- // is guaranteed to not wrap.
- bool HasNUW = true;
- addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), HasNUW,
- DebugLoc());
-
// Collect mapping of IR header phis to header phi recipes, to be used in
// addScalarResumePhis.
VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
@@ -9939,14 +9931,18 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
// ensure that it comes after all of it's inputs, including CondOp.
// Delete CurrentLink as it will be invalid if its operand is replaced
// with a reduction defined at the bottom of the block in the next link.
- LinkVPBB->appendRecipe(RedRecipe);
+ if (LinkVPBB->getNumSuccessors() == 0)
+ RedRecipe->insertBefore(&*std::prev(std::prev(LinkVPBB->end())));
+ else
+ LinkVPBB->appendRecipe(RedRecipe);
+
CurrentLink->replaceAllUsesWith(RedRecipe);
ToDelete.push_back(CurrentLink);
PreviousLink = RedRecipe;
}
}
VPBasicBlock *LatchVPBB = VectorLoopRegion->getExitingBasicBlock();
- Builder.setInsertPoint(&*LatchVPBB->begin());
+ Builder.setInsertPoint(&*std::prev(std::prev(LatchVPBB->end())));
VPBasicBlock::iterator IP = MiddleVPBB->getFirstNonPhi();
for (VPRecipeBase &R :
Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
@@ -9966,8 +9962,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
if (!PhiR->isInLoop() && CM.foldTailByMasking() &&
!isa<VPPartialReductionRecipe>(OrigExitingVPV->getDefiningRecipe())) {
VPValue *Cond = RecipeBuilder.getBlockInMask(OrigLoop->getHeader());
- assert(OrigExitingVPV->getDefiningRecipe()->getParent() != LatchVPBB &&
- "reduction recipe must be defined before latch");
Type *PhiTy = PhiR->getOperand(0)->getLiveInIRValue()->getType();
std::optional<FastMathFlags> FMFs =
PhiTy->isFloatingPointTy()
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index f24d42256caef..b374371667b5e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -451,10 +451,6 @@ void VPlanTransforms::createLoopRegions(VPlan &Plan, Type *InductionTy,
createLoopRegion(Plan, HeaderVPB);
VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
- auto *OrigExiting = TopRegion->getExiting();
- VPBasicBlock *LatchVPBB = Plan.createVPBasicBlock("vector.latch");
- VPBlockUtils::insertBlockAfter(LatchVPBB, OrigExiting);
- TopRegion->setExiting(LatchVPBB);
TopRegion->setName("vector loop");
TopRegion->getEntryBasicBlock()->setName("vector.body");
@@ -472,7 +468,6 @@ void VPlanTransforms::createLoopRegions(VPlan &Plan, Type *InductionTy,
VPBasicBlock *VecPreheader = Plan.createVPBasicBlock("vector.ph");
VPBlockUtils::insertBlockAfter(VecPreheader, Plan.getEntry());
-
VPBasicBlock *MiddleVPBB = Plan.createVPBasicBlock("middle.block");
VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 95dfeeaa57f3a..fa16a64ebd80a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -55,6 +55,9 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
make_early_inc_range(make_range(VPBB->begin(), EndIter))) {
VPValue *VPV = Ingredient.getVPSingleValue();
+ if (!VPV->getUnderlyingValue())
+ continue;
+
Instruction *Inst = cast<Instruction>(VPV->getUnderlyingValue());
VPRecipeBase *NewRecipe = nullptr;
@@ -387,9 +390,13 @@ static void addReplicateRegions(VPlan &Plan) {
SplitBlock->setName(
OrigBB->hasName() ? OrigBB->getName() + "." + Twine(BBNum++) : "");
// Record predicated instructions for above packing optimizations.
- VPBlockBase *Region = createReplicateRegion(RepR, Plan);
+ VPRegionBlock *Region = createReplicateRegion(RepR, Plan);
Region->setParent(CurrentBlock->getParent());
VPBlockUtils::insertOnEdge(CurrentBlock, SplitBlock, Region);
+
+ VPRegionBlock *ParentRegion = Region->getParent();
+ if (ParentRegion && ParentRegion->getExiting() == CurrentBlock)
+ ParentRegion->setExiting(SplitBlock);
}
}
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp
index be98e86bceed7..11abdf790d121 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp
@@ -53,14 +53,10 @@ TEST_F(VPlanHCFGTest, testBuildHCFGInnerLoop) {
VPBasicBlock *VecBB = Plan->getVectorLoopRegion()->getEntryBasicBlock();
EXPECT_EQ(7u, VecBB->size());
EXPECT_EQ(0u, VecBB->getNumPredecessors());
- EXPECT_EQ(1u, VecBB->getNumSuccessors());
+ EXPECT_EQ(0u, VecBB->getNumSuccessors());
EXPECT_EQ(VecBB->getParent()->getEntryBasicBlock(), VecBB);
EXPECT_EQ(&*Plan, VecBB->getPlan());
- VPBlockBase *VecLatch = VecBB->getSingleSuccessor();
- EXPECT_EQ(VecLatch->getParent()->getExitingBasicBlock(), VecLatch);
- EXPECT_EQ(0u, VecLatch->getNumSuccessors());
-
auto Iter = VecBB->begin();
VPWidenPHIRecipe *Phi = dyn_cast<VPWidenPHIRecipe>(&*Iter++);
EXPECT_NE(nullptr, Phi);
@@ -130,33 +126,28 @@ compound=true
" EMIT store ir\<%res\>, ir\<%arr.idx\>\l" +
" EMIT ir\<%indvars.iv.next\> = add ir\<%indvars.iv\>, ir\<1\>\l" +
" EMIT ir\<%exitcond\> = icmp ir\<%indvars.iv.next\>, ir\<%N\>\l" +
- "Successor(s): vector.latch\l"
- ]
- N2 -> N4 [ label=""]
- N4 [label =
- "vector.latch:\l" +
"No successors\l"
]
}
- N4 -> N5 [ label="" ltail=cluster_N3]
- N5 [label =
+ N2 -> N4 [ label="" ltail=cluster_N3]
+ N4 [label =
"middle.block:\l" +
" EMIT vp\<%cmp.n\> = icmp eq ir\<%N\>, vp\<%0\>\l" +
" EMIT branch-on-cond vp\<%cmp.n\>\l" +
"Successor(s): ir-bb\<for.end\>, scalar.ph\l"
]
- N5 -> N6 [ label="T"]
- N5 -> N7 [ label="F"]
- N6 [label =
+ N4 -> N5 [ label="T"]
+ N4 -> N6 [ label="F"]
+ N5 [label =
"ir-bb\<for.end\>:\l" +
"No successors\l"
]
- N7 [label =
+ N6 [label =
"scalar.ph:\l" +
"Successor(s): ir-bb\<for.body\>\l"
]
- N7 -> N8 [ label=""]
- N8 [label =
+ N6 -> N7 [ label=""]
+ N7 [label =
"ir-bb\<for.body\>:\l" +
" IR %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]\l" +
" IR %arr.idx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv\l" +
@@ -171,10 +162,6 @@ compound=true
)";
EXPECT_EQ(ExpectedStr, FullDump);
#endif
- TargetLibraryInfoImpl TLII(M.getTargetTriple());
- TargetLibraryInfo TLI(TLII);
- VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
- Plan, [](PHINode *P) { return nullptr; }, *SE, TLI);
}
TEST_F(VPlanHCFGTest, testVPInstructionToVPRecipesInner) {
@@ -203,6 +190,12 @@ TEST_F(VPlanHCFGTest, testVPInstructionToVPRecipesInner) {
TargetLibraryInfoImpl TLII(M.getTargetTriple());
TargetLibraryInfo TLI(TLII);
+ // Current VPlan construction doesn't add a terminator for top-level loop
+ // latches. Add it before running transform.
+ cast<VPBasicBlock>(Plan->getVectorLoopRegion()->getExiting())
+ ->appendRecipe(new VPInstruction(
+ VPInstruction::BranchOnCond,
+ {Plan->getOrAddLiveIn(ConstantInt::getTrue(F->getContext()))}));
VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
Plan, [](PHINode *P) { return nullptr; }, *SE, TLI);
@@ -214,15 +207,11 @@ TEST_F(VPlanHCFGTest, testVPInstructionToVPRecipesInner) {
// Check that the region following the preheader consists of a block for the
// original header and a separate latch.
VPBasicBlock *VecBB = Plan->getVectorLoopRegion()->getEntryBasicBlock();
- EXPECT_EQ(7u, VecBB->size());
+ EXPECT_EQ(8u, VecBB->size());
EXPECT_EQ(0u, VecBB->getNumPredecessors());
- EXPECT_EQ(1u, VecBB->getNumSuccessors());
+ EXPECT_EQ(0u, VecBB->getNumSuccessors());
EXPECT_EQ(VecBB->getParent()->getEntryBasicBlock(), VecBB);
- VPBlockBase *VecLatch = VecBB->getSingleSuccessor();
- EXPECT_EQ(VecLatch->getParent()->getExitingBasicBlock(), VecLatch);
- EXPECT_EQ(0u, VecLatch->getNumSuccessors());
-
auto Iter = VecBB->begin();
EXPECT_NE(nullptr, dyn_cast<VPWidenPHIRecipe>(&*Iter++));
EXPECT_NE(nullptr, dyn_cast<VPWidenGEPRecipe>(&*Iter++));
@@ -231,6 +220,7 @@ TEST_F(VPlanHCFGTest, testVPInstructionToVPRecipesInner) {
EXPECT_NE(nullptr, dyn_cast<VPWidenMemoryRecipe>(&*Iter++));
EXPECT_NE(nullptr, dyn_cast<VPWidenRecipe>(&*Iter++));
EXPECT_NE(nullptr, dyn_cast<VPWidenRecipe>(&*Iter++));
+ EXPECT_NE(nullptr, dyn_cast<VPInstruction>(&*Iter++));
EXPECT_EQ(VecBB->end(), Iter);
}
@@ -303,33 +293,28 @@ compound=true
" EMIT store ir\<%res\>, ir\<%arr.idx\>\l" +
" EMIT ir\<%iv.next\> = add ir\<%iv\>, ir\<1\>\l" +
" EMIT ir\<%exitcond\> = icmp ir\<%iv.next\>, ir\<%N\>\l" +
- "Successor(s): vector.latch\l"
- ]
- N4 -> N5 [ label=""]
- N5 [label =
- "vector.latch:\l" +
"No successors\l"
]
}
- N5 -> N6 [ label="" ltail=cluster_N3]
- N6 [label =
+ N4 -> N5 [ label="" ltail=cluster_N3]
+ N5 [label =
"middle.block:\l" +
" EMIT vp\<%cmp.n\> = icmp eq ir\<%N\>, vp\<%0\>\l" +
" EMIT branch-on-cond vp\<%cmp.n\>\l" +
"Successor(s): ir-bb\<exit.2\>, scalar.ph\l"
]
- N6 -> N7 [ label="T"]
- N6 -> N8 [ label="F"]
- N7 [label =
+ N5 -> N6 [ label="T"]
+ N5 -> N7 [ label="F"]
+ N6 [label =
"ir-bb\<exit.2\>:\l" +
"No successors\l"
]
- N8 [label =
+ N7 [label =
"scalar.ph:\l" +
"Successor(s): ir-bb\<loop.header\>\l"
]
- N8 -> N9 [ label=""]
- N9 [label =
+ N7 -> N8 [ label=""]
+ N8 [label =
"ir-bb\<loop.header\>:\l" +
" IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]\l" +
" IR %arr.idx = getelementptr inbounds i32, ptr %A, i64 %iv\l" +
More information about the llvm-commits
mailing list