[llvm] ede7c24 - [VPlan] Create header & latch blocks for skeleton up front (NFC).
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 22 04:45:44 PST 2021
Author: Florian Hahn
Date: 2021-12-22T12:44:25Z
New Revision: ede7c2438f39e8901a6479f180255ad0f5f36d32
URL: https://github.com/llvm/llvm-project/commit/ede7c2438f39e8901a6479f180255ad0f5f36d32
DIFF: https://github.com/llvm/llvm-project/commit/ede7c2438f39e8901a6479f180255ad0f5f36d32.diff
LOG: [VPlan] Create header & latch blocks for skeleton up front (NFC).
By creating the header and latch blocks up front and adding blocks and
recipes in between those 2 blocks we ensure that the entry and exits of
the plan remain valid throughout construction.
In order to avoid test changes and keep printing of the plans the same,
we use the new header block instead of creating a new block on the first
iteration of the loop traversing the original loop.
We also fold the latch into its predecessor.
This is a follow up to a post-commit suggestion in D114586.
Reviewed By: Ayal
Differential Revision: https://reviews.llvm.org/D115793
Added:
Modified:
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/lib/Transforms/Vectorize/VPlan.h
llvm/test/Transforms/LoopVectorize/reduction-order.ll
llvm/test/Transforms/LoopVectorize/select-reduction.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 5dafc561baf43..b3ba2aa13d376 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8795,13 +8795,17 @@ VPBasicBlock *VPRecipeBuilder::handleReplication(
return VPBB;
}
LLVM_DEBUG(dbgs() << "LV: Scalarizing and predicating:" << *I << "\n");
- assert(VPBB->getSuccessors().empty() &&
- "VPBB has successors when handling predicated replication.");
+
+ VPBlockBase *SingleSucc = VPBB->getSingleSuccessor();
+ assert(SingleSucc && "VPBB must have a single successor when handling "
+ "predicated replication.");
+ VPBlockUtils::disconnectBlocks(VPBB, SingleSucc);
// Record predicated instructions for above packing optimizations.
VPBlockBase *Region = createReplicateRegion(I, Recipe, Plan);
VPBlockUtils::insertBlockAfter(Region, VPBB);
auto *RegSucc = new VPBasicBlock();
VPBlockUtils::insertBlockAfter(RegSucc, Region);
+ VPBlockUtils::connectBlocks(RegSucc, SingleSucc);
return RegSucc;
}
@@ -9022,30 +9026,25 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
// visit each basic block after having visited its predecessor basic blocks.
// ---------------------------------------------------------------------------
- auto Plan = std::make_unique<VPlan>();
+ // Create initial VPlan skeleton, with separate header and latch blocks.
+ VPBasicBlock *HeaderVPBB = new VPBasicBlock();
+ VPBasicBlock *LatchVPBB = new VPBasicBlock("vector.latch");
+ VPBlockUtils::insertBlockAfter(LatchVPBB, HeaderVPBB);
+ auto *TopRegion = new VPRegionBlock(HeaderVPBB, LatchVPBB, "vector loop");
+ auto Plan = std::make_unique<VPlan>(TopRegion);
// Scan the body of the loop in a topological order to visit each basic block
// after having visited its predecessor basic blocks.
LoopBlocksDFS DFS(OrigLoop);
DFS.perform(LI);
- VPBasicBlock *VPBB = nullptr;
- VPBasicBlock *HeaderVPBB = nullptr;
+ VPBasicBlock *VPBB = HeaderVPBB;
SmallVector<VPWidenIntOrFpInductionRecipe *> InductionsToMove;
for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO())) {
// Relevant instructions from basic block BB will be grouped into VPRecipe
// ingredients and fill a new VPBasicBlock.
unsigned VPBBsForBB = 0;
- auto *FirstVPBBForBB = new VPBasicBlock(BB->getName());
- if (VPBB)
- VPBlockUtils::insertBlockAfter(FirstVPBBForBB, VPBB);
- else {
- auto *TopRegion = new VPRegionBlock("vector loop");
- TopRegion->setEntry(FirstVPBBForBB);
- Plan->setEntry(TopRegion);
- HeaderVPBB = FirstVPBBForBB;
- }
- VPBB = FirstVPBBForBB;
+ VPBB->setName(BB->getName());
Builder.setInsertPoint(VPBB);
// Introduce each ingredient into VPlan.
@@ -9112,8 +9111,17 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
: "");
}
}
+
+ VPBlockUtils::insertBlockAfter(new VPBasicBlock(), VPBB);
+ VPBB = cast<VPBasicBlock>(VPBB->getSingleSuccessor());
}
+ // Fold the last, empty block into its predecessor.
+ VPBB = VPBlockUtils::tryToMergeBlockIntoPredecessor(VPBB);
+ assert(VPBB && "expected to fold last (empty) block");
+ // After here, VPBB should not be used.
+ VPBB = nullptr;
+
assert(isa<VPRegionBlock>(Plan->getEntry()) &&
!Plan->getEntry()->getEntryBasicBlock()->empty() &&
"entry block must be set to a VPRegionBlock having a non-empty entry "
@@ -9183,13 +9191,9 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
VPBlockUtils::disconnectBlocks(SplitPred, SplitBlock);
VPBlockUtils::connectBlocks(SplitPred, SinkRegion);
VPBlockUtils::connectBlocks(SinkRegion, SplitBlock);
- if (VPBB == SplitPred)
- VPBB = SplitBlock;
}
}
- cast<VPRegionBlock>(Plan->getEntry())->setExit(VPBB);
-
VPlanTransforms::removeRedundantInductionCasts(*Plan);
// Now that sink-after is done, move induction recipes for optimized truncates
@@ -9198,7 +9202,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
Ind->moveBefore(*HeaderVPBB, HeaderVPBB->getFirstNonPhi());
// Adjust the recipes for any inloop reductions.
- adjustRecipesForReductions(VPBB, Plan, RecipeBuilder, Range.Start);
+ adjustRecipesForReductions(cast<VPBasicBlock>(TopRegion->getExit()), Plan,
+ RecipeBuilder, Range.Start);
// Introduce a recipe to combine the incoming and previous values of a
// first-order recurrence.
@@ -9278,6 +9283,11 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
RSO.flush();
Plan->setName(PlanName);
+ // Fold Exit block into its predecessor if possible.
+ // TODO: Fold block earlier once all VPlan transforms properly maintain a
+ // VPBasicBlock as exit.
+ VPBlockUtils::tryToMergeBlockIntoPredecessor(TopRegion->getExit());
+
assert(VPlanVerifier::verifyPlanIsValid(*Plan) && "VPlan is invalid");
return Plan;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index dfe75105ac425..f4a1883e35d5f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -2352,18 +2352,23 @@ class VPBlockUtils {
/// Insert disconnected VPBlockBase \p NewBlock after \p BlockPtr. Add \p
/// NewBlock as successor of \p BlockPtr and \p BlockPtr as predecessor of \p
- /// NewBlock, and propagate \p BlockPtr parent to \p NewBlock. If \p BlockPtr
- /// has more than one successor, its conditional bit is propagated to \p
- /// NewBlock. \p NewBlock must have neither successors nor predecessors.
+ /// NewBlock, and propagate \p BlockPtr parent to \p NewBlock. \p BlockPtr's
+ /// successors are moved from \p BlockPtr to \p NewBlock and \p BlockPtr's
+ /// conditional bit is propagated to \p NewBlock. \p NewBlock must have
+ /// neither successors nor predecessors.
static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr) {
assert(NewBlock->getSuccessors().empty() &&
- "Can't insert new block with successors.");
- // TODO: move successors from BlockPtr to NewBlock when this functionality
- // is necessary. For now, setBlockSingleSuccessor will assert if BlockPtr
- // already has successors.
- BlockPtr->setOneSuccessor(NewBlock);
- NewBlock->setPredecessors({BlockPtr});
+ NewBlock->getPredecessors().empty() &&
+ "Can't insert new block with predecessors or successors.");
NewBlock->setParent(BlockPtr->getParent());
+ SmallVector<VPBlockBase *> Succs(BlockPtr->successors());
+ for (VPBlockBase *Succ : Succs) {
+ disconnectBlocks(BlockPtr, Succ);
+ connectBlocks(NewBlock, Succ);
+ }
+ NewBlock->setCondBit(BlockPtr->getCondBit());
+ BlockPtr->setCondBit(nullptr);
+ connectBlocks(BlockPtr, NewBlock);
}
/// Insert disconnected VPBlockBases \p IfTrue and \p IfFalse after \p
@@ -2406,6 +2411,31 @@ class VPBlockUtils {
To->removePredecessor(From);
}
+ /// Try to merge \p Block into its single predecessor, if \p Block is a
+ /// VPBasicBlock and its predecessor has a single successor. Returns a pointer
+ /// to the predecessor \p Block was merged into or nullptr otherwise.
+ static VPBasicBlock *tryToMergeBlockIntoPredecessor(VPBlockBase *Block) {
+ auto *VPBB = dyn_cast<VPBasicBlock>(Block);
+ auto *PredVPBB =
+ dyn_cast_or_null<VPBasicBlock>(Block->getSinglePredecessor());
+ if (!VPBB || !PredVPBB || PredVPBB->getNumSuccessors() != 1)
+ return nullptr;
+
+ for (VPRecipeBase &R : make_early_inc_range(*VPBB))
+ R.moveBefore(*PredVPBB, PredVPBB->end());
+ VPBlockUtils::disconnectBlocks(PredVPBB, VPBB);
+ auto *ParentRegion = cast<VPRegionBlock>(Block->getParent());
+ if (ParentRegion->getExit() == Block)
+ ParentRegion->setExit(PredVPBB);
+ SmallVector<VPBlockBase *> Successors(Block->successors());
+ for (auto *Succ : Successors) {
+ VPBlockUtils::disconnectBlocks(Block, Succ);
+ VPBlockUtils::connectBlocks(PredVPBB, Succ);
+ }
+ delete Block;
+ return PredVPBB;
+ }
+
/// Returns true if the edge \p FromBlock -> \p ToBlock is a back-edge.
static bool isBackEdge(const VPBlockBase *FromBlock,
const VPBlockBase *ToBlock, const VPLoopInfo *VPLI) {
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-order.ll b/llvm/test/Transforms/LoopVectorize/reduction-order.ll
index 07201c0dfd78d..47ac8e424fd6a 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-order.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-order.ll
@@ -7,9 +7,9 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16
; in deterministic order.
; CHECK-LABEL: @foo(
; CHECK: vector.body:
-; CHECK: icmp ule <4 x i64>
-; CHECK-NEXT: %[[VAR1:.*]] = add <4 x i32> <i32 3, i32 3, i32 3, i32 3>, %vec.phi1
+; CHECK: %[[VAR1:.*]] = add <4 x i32> <i32 3, i32 3, i32 3, i32 3>, %vec.phi1
; CHECK-NEXT: %[[VAR2:.*]] = add <4 x i32> %vec.phi, <i32 5, i32 5, i32 5, i32 5>
+; CHECK-NEXT: icmp ule <4 x i64>
; CHECK-NEXT: select <4 x i1> {{.*}}, <4 x i32> %[[VAR2]], <4 x i32>
; CHECK-NEXT: select <4 x i1> {{.*}}, <4 x i32> %[[VAR1]], <4 x i32>
; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body
diff --git a/llvm/test/Transforms/LoopVectorize/select-reduction.ll b/llvm/test/Transforms/LoopVectorize/select-reduction.ll
index 70920bd2a9864..e487b3a34f780 100644
--- a/llvm/test/Transforms/LoopVectorize/select-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/select-reduction.ll
@@ -33,9 +33,9 @@ define i32 @test(i64 %N, i32 %x) {
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT3]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[VEC_IV:%.*]] = add <4 x i64> [[BROADCAST_SPLAT4]], <i64 0, i64 1, i64 2, i64 3>
-; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[VEC_PHI]], <i32 10, i32 10, i32 10, i32 10>
; CHECK-NEXT: [[TMP3]] = select <4 x i1> [[TMP2]], <4 x i32> [[VEC_PHI]], <4 x i32> <i32 10, i32 10, i32 10, i32 10>
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP3]], <4 x i32> [[VEC_PHI]]
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
More information about the llvm-commits
mailing list