[llvm] 14e3650 - Revert "Recommit "[LV] Remove unneeded createHeaderBranch.(NFCI)""
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 31 11:01:33 PDT 2022
Author: Florian Hahn
Date: 2022-03-31T19:00:48+01:00
New Revision: 14e3650f01d158f7e4117c353927a07ceebdd504
URL: https://github.com/llvm/llvm-project/commit/14e3650f01d158f7e4117c353927a07ceebdd504
DIFF: https://github.com/llvm/llvm-project/commit/14e3650f01d158f7e4117c353927a07ceebdd504.diff
LOG: Revert "Recommit "[LV] Remove unneeded createHeaderBranch.(NFCI)""
This reverts commit 8378a71b6cce611e01f42690713fd7b561ff3f30.
It looks like this patch uncovered another issue, e.g. see
https://lab.llvm.org/buildbot/#/builders/168/builds/5518
Added:
Modified:
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/lib/Transforms/Vectorize/VPlan.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index d330b8d766449..772b276df124a 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -569,6 +569,11 @@ class InnerLoopVectorizer {
Value *CountRoundDown, Value *EndValue,
BasicBlock *MiddleBlock, BasicBlock *VectorHeader);
+ /// Introduce a conditional branch (on true, condition to be set later) at the
+ /// end of the header=latch connecting it to itself (across the backedge) and
+ /// to the exit block of \p L.
+ void createHeaderBranch(Loop *L);
+
/// Handle all cross-iteration phis in the header.
void fixCrossIterationPHIs(VPTransformState &State);
@@ -625,8 +630,9 @@ class InnerLoopVectorizer {
BasicBlock *emitMemRuntimeChecks(BasicBlock *Bypass);
/// Emit basic blocks (prefixed with \p Prefix) for the iteration check,
- /// vector loop preheader, middle block and scalar preheader.
- void createVectorLoopSkeleton(StringRef Prefix);
+ /// vector loop preheader, middle block and scalar preheader. Also
+ /// allocate a loop object for the new vector loop and return it.
+ Loop *createVectorLoopSkeleton(StringRef Prefix);
/// Create new phi nodes for the induction variables to resume iteration count
/// in the scalar epilogue, from where the vectorized loop left off.
@@ -2827,6 +2833,23 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr,
PredicatedInstructions.push_back(Cloned);
}
+void InnerLoopVectorizer::createHeaderBranch(Loop *L) {
+ BasicBlock *Header = L->getHeader();
+ assert(!L->getLoopLatch() && "loop should not have a latch at this point");
+
+ IRBuilder<> B(Header->getTerminator());
+ Instruction *OldInst =
+ getDebugLocFromInstOrOperands(Legal->getPrimaryInduction());
+ setDebugLocFromInst(OldInst, &B);
+
+ // Connect the header to the exit and header blocks and replace the old
+ // terminator.
+ B.CreateCondBr(B.getTrue(), L->getUniqueExitBlock(), Header);
+
+ // Now we have two terminators. Remove the old one from the block.
+ Header->getTerminator()->eraseFromParent();
+}
+
Value *InnerLoopVectorizer::getOrCreateTripCount(BasicBlock *InsertBlock) {
if (TripCount)
return TripCount;
@@ -3069,7 +3092,7 @@ BasicBlock *InnerLoopVectorizer::emitMemRuntimeChecks(BasicBlock *Bypass) {
return MemCheckBlock;
}
-void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
+Loop *InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
LoopScalarBody = OrigLoop->getHeader();
LoopVectorPreHeader = OrigLoop->getLoopPreheader();
assert(LoopVectorPreHeader && "Invalid loop structure");
@@ -3101,8 +3124,12 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
BrInst->setDebugLoc(ScalarLatchTerm->getDebugLoc());
ReplaceInstWithInst(LoopMiddleBlock->getTerminator(), BrInst);
- SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->getTerminator(), DT,
- nullptr, nullptr, Twine(Prefix) + "vector.body");
+ // We intentionally don't let SplitBlock to update LoopInfo since
+ // LoopVectorBody should belong to another loop than LoopVectorPreHeader.
+ // LoopVectorBody is explicitly added to the correct place few lines later.
+ BasicBlock *LoopVectorBody =
+ SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->getTerminator(), DT,
+ nullptr, nullptr, Twine(Prefix) + "vector.body");
// Update dominator for loop exit.
if (!Cost->requiresScalarEpilogue(VF))
@@ -3110,6 +3137,20 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
// middle block to exit blocks and thus no need to update the immediate
// dominator of the exit blocks.
DT->changeImmediateDominator(LoopExitBlock, LoopMiddleBlock);
+
+ // Create and register the new vector loop.
+ Loop *Lp = LI->AllocateLoop();
+ Loop *ParentLoop = OrigLoop->getParentLoop();
+
+ // Insert the new loop into the loop nest and register the new basic blocks
+ // before calling any utilities such as SCEV that require valid LoopInfo.
+ if (ParentLoop) {
+ ParentLoop->addChildLoop(Lp);
+ } else {
+ LI->addTopLevelLoop(Lp);
+ }
+ Lp->addBasicBlockToLoop(LoopVectorBody, *LI);
+ return Lp;
}
void InnerLoopVectorizer::createInductionResumeValues(
@@ -3221,6 +3262,7 @@ BasicBlock *InnerLoopVectorizer::completeLoopSkeleton(MDNode *OrigLoopID) {
#ifdef EXPENSIVE_CHECKS
assert(DT->verify(DominatorTree::VerificationLevel::Fast));
+ LI->verify(*DT);
#endif
return LoopVectorPreHeader;
@@ -3274,7 +3316,7 @@ InnerLoopVectorizer::createVectorizedLoopSkeleton() {
// Create an empty vector loop, and prepare basic blocks for the runtime
// checks.
- createVectorLoopSkeleton("");
+ Loop *Lp = createVectorLoopSkeleton("");
// Now, compare the new count to zero. If it is zero skip the vector loop and
// jump to the scalar loop. This check also covers the case where the
@@ -3292,6 +3334,8 @@ InnerLoopVectorizer::createVectorizedLoopSkeleton() {
// faster.
emitMemRuntimeChecks(LoopScalarPreHeader);
+ createHeaderBranch(Lp);
+
// Emit phis for the new starting index of the scalar loop.
createInductionResumeValues();
@@ -7578,7 +7622,7 @@ void LoopVectorizationPlanner::executePlan(ElementCount BestVF, unsigned BestUF,
// 1. Create a new empty loop. Unlink the old loop and connect the new one.
VPTransformState State{BestVF, BestUF, LI, DT, ILV.Builder, &ILV, &BestVPlan};
Value *CanonicalIVStartValue;
- std::tie(State.CFG.VectorPreHeader, CanonicalIVStartValue) =
+ std::tie(State.CFG.PrevBB, CanonicalIVStartValue) =
ILV.createVectorizedLoopSkeleton();
ILV.collectPoisonGeneratingRecipes(State);
@@ -7695,7 +7739,7 @@ Value *InnerLoopUnroller::getBroadcastInstrs(Value *V) { return V; }
std::pair<BasicBlock *, Value *>
EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton() {
MDNode *OrigLoopID = OrigLoop->getLoopID();
- createVectorLoopSkeleton("");
+ Loop *Lp = createVectorLoopSkeleton("");
// Generate the code to check the minimum iteration count of the vector
// epilogue (see below).
@@ -7724,6 +7768,7 @@ EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton() {
// Generate the induction variable.
Value *CountRoundDown = getOrCreateVectorTripCount(LoopVectorPreHeader);
EPI.VectorTripCount = CountRoundDown;
+ createHeaderBranch(Lp);
// Skip induction resume value creation here because they will be created in
// the second pass. If we created them here, they wouldn't be used anyway,
@@ -7815,7 +7860,7 @@ EpilogueVectorizerMainLoop::emitMinimumIterationCountCheck(BasicBlock *Bypass,
std::pair<BasicBlock *, Value *>
EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton() {
MDNode *OrigLoopID = OrigLoop->getLoopID();
- createVectorLoopSkeleton("vec.epilog.");
+ Loop *Lp = createVectorLoopSkeleton("vec.epilog.");
// Now, compare the remaining count and if there aren't enough iterations to
// execute the vectorized epilogue skip to the scalar part.
@@ -7896,6 +7941,9 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton() {
EPResumeVal->addIncoming(ConstantInt::get(IdxTy, 0),
EPI.MainLoopIterationCountCheck);
+ // Generate the induction variable.
+ createHeaderBranch(Lp);
+
// Generate induction resume values. These variables save the new starting
// indexes for the scalar loop. They are used to test if there are any tail
// iterations left once the vector loop has completed.
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 05a92cba79b21..3a2cb7f161bcf 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -318,14 +318,9 @@ void VPBasicBlock::execute(VPTransformState *State) {
// Temporarily terminate with unreachable until CFG is rewired.
UnreachableInst *Terminator = State->Builder.CreateUnreachable();
State->Builder.SetInsertPoint(Terminator);
- State->CFG.PrevBB = NewBB;
- }
-
- if (State->CurrentVectorLoop &&
- !State->CurrentVectorLoop->contains(State->CFG.PrevBB)) {
// Register NewBB in its loop. In innermost loops its the same for all BB's.
- State->CurrentVectorLoop->addBasicBlockToLoop(State->CFG.PrevBB,
- *State->LI);
+ State->CurrentVectorLoop->addBasicBlockToLoop(NewBB, *State->LI);
+ State->CFG.PrevBB = NewBB;
}
// 2. Fill the IR basic block with IR instructions.
@@ -452,17 +447,6 @@ void VPRegionBlock::execute(VPTransformState *State) {
ReversePostOrderTraversal<VPBlockBase *> RPOT(Entry);
if (!isReplicator()) {
- // Create and register the new vector loop.
- State->CurrentVectorLoop = State->LI->AllocateLoop();
- Loop *ParentLoop = State->LI->getLoopFor(State->CFG.VectorPreHeader);
-
- // Insert the new loop into the loop nest and register the new basic blocks
- // before calling any utilities such as SCEV that require valid LoopInfo.
- if (ParentLoop)
- ParentLoop->addChildLoop(State->CurrentVectorLoop);
- else
- State->LI->addTopLevelLoop(State->CurrentVectorLoop);
-
// Visit the VPBlocks connected to "this", starting from it.
for (VPBlockBase *Block : RPOT) {
if (EnableVPlanNativePath) {
@@ -873,7 +857,7 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
// Check if the backedge taken count is needed, and if so build it.
if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
- IRBuilder<> Builder(State.CFG.VectorPreHeader->getTerminator());
+ IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
auto *TCMO = Builder.CreateSub(TripCountV,
ConstantInt::get(TripCountV->getType(), 1),
"trip.count.minus.1");
@@ -914,16 +898,17 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
/// LoopVectorBody basic-block was created for this. Introduce additional
/// basic-blocks as needed, and fill them all.
void VPlan::execute(VPTransformState *State) {
- // Set the reverse mapping from VPValues to Values for code generation.
+ // 0. Set the reverse mapping from VPValues to Values for code generation.
for (auto &Entry : Value2VPValue)
State->VPValue2Value[Entry.second] = Entry.first;
- // Initialize CFG state.
- State->CFG.PrevVPBB = nullptr;
- BasicBlock *VectorHeaderBB = State->CFG.VectorPreHeader->getSingleSuccessor();
- State->CFG.PrevBB = VectorHeaderBB;
- State->CFG.ExitBB = VectorHeaderBB->getSingleSuccessor();
+ BasicBlock *VectorPreHeaderBB = State->CFG.PrevBB;
+ State->CFG.VectorPreHeader = VectorPreHeaderBB;
+ BasicBlock *VectorHeaderBB = VectorPreHeaderBB->getSingleSuccessor();
+ assert(VectorHeaderBB && "Loop preheader does not have a single successor.");
+
State->CurrentVectorLoop = State->LI->getLoopFor(VectorHeaderBB);
+ State->CFG.ExitBB = State->CurrentVectorLoop->getExitBlock();
// Remove the edge between Header and Latch to allow other connections.
// Temporarily terminate with unreachable until CFG is rewired.
@@ -935,6 +920,9 @@ void VPlan::execute(VPTransformState *State) {
State->Builder.SetInsertPoint(Terminator);
// Generate code in loop body.
+ State->CFG.PrevVPBB = nullptr;
+ State->CFG.PrevBB = VectorHeaderBB;
+
for (VPBlockBase *Block : depth_first(Entry))
Block->execute(State);
More information about the llvm-commits
mailing list