[llvm] 7fe41ac - Revert "[LV] Unconditionally branch from middle to scalar preheader if the scalar loop must execute"
Adrian Kuegel via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 5 03:53:12 PST 2021
Author: Adrian Kuegel
Date: 2021-02-05T12:51:03+01:00
New Revision: 7fe41ac3dff2d44c3d2c31b28554fbe4a86eaa6c
URL: https://github.com/llvm/llvm-project/commit/7fe41ac3dff2d44c3d2c31b28554fbe4a86eaa6c
DIFF: https://github.com/llvm/llvm-project/commit/7fe41ac3dff2d44c3d2c31b28554fbe4a86eaa6c.diff
LOG: Revert "[LV] Unconditionally branch from middle to scalar preheader if the scalar loop must execute"
This reverts commit 3e5ce49e5371ce4feadbf97dd5c2b652d9db3d1d.
Tests started failing on PPC, for example:
http://lab.llvm.org:8011/#/builders/105/builds/5569
Added:
Modified:
llvm/lib/Transforms/Utils/LoopVersioning.cpp
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll
llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll
llvm/test/Transforms/LoopVectorize/loop-form.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Utils/LoopVersioning.cpp b/llvm/lib/Transforms/Utils/LoopVersioning.cpp
index 8a89158788cf..de4fb446fdf2 100644
--- a/llvm/lib/Transforms/Utils/LoopVersioning.cpp
+++ b/llvm/lib/Transforms/Utils/LoopVersioning.cpp
@@ -44,11 +44,11 @@ LoopVersioning::LoopVersioning(const LoopAccessInfo &LAI,
AliasChecks(Checks.begin(), Checks.end()),
Preds(LAI.getPSE().getUnionPredicate()), LAI(LAI), LI(LI), DT(DT),
SE(SE) {
+ assert(L->getUniqueExitBlock() && "No single exit block");
}
void LoopVersioning::versionLoop(
const SmallVectorImpl<Instruction *> &DefsUsedOutside) {
- assert(VersionedLoop->getUniqueExitBlock() && "No single exit block");
assert(VersionedLoop->isLoopSimplifyForm() &&
"Loop is not in loop-simplify form");
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 3277842edbfe..6bce0caeb36f 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -852,7 +852,7 @@ class InnerLoopVectorizer {
/// Middle Block between the vector and the scalar.
BasicBlock *LoopMiddleBlock;
- /// The unique ExitBlock of the scalar loop if one exists. Note that
+ /// The (unique) ExitBlock of the scalar loop. Note that
/// there can be multiple exiting edges reaching this block.
BasicBlock *LoopExitBlock;
@@ -3147,13 +3147,9 @@ void InnerLoopVectorizer::emitMinimumIterationCountCheck(Loop *L,
DT->getNode(Bypass)->getIDom()) &&
"TC check is expected to dominate Bypass");
- // Update dominator for Bypass & LoopExit (if needed).
+ // Update dominator for Bypass & LoopExit.
DT->changeImmediateDominator(Bypass, TCCheckBlock);
- if (!Cost->requiresScalarEpilogue())
- // If there is an epilogue which must run, there's no edge from the
- // middle block to exit blocks and thus no need to update the immediate
- // dominator of the exit blocks.
- DT->changeImmediateDominator(LoopExitBlock, TCCheckBlock);
+ DT->changeImmediateDominator(LoopExitBlock, TCCheckBlock);
ReplaceInstWithInst(
TCCheckBlock->getTerminator(),
@@ -3192,11 +3188,7 @@ void InnerLoopVectorizer::emitSCEVChecks(Loop *L, BasicBlock *Bypass) {
// Update dominator only if this is first RT check.
if (LoopBypassBlocks.empty()) {
DT->changeImmediateDominator(Bypass, SCEVCheckBlock);
- if (!Cost->requiresScalarEpilogue())
- // If there is an epilogue which must run, there's no edge from the
- // middle block to exit blocks and thus no need to update the immediate
- // dominator of the exit blocks.
- DT->changeImmediateDominator(LoopExitBlock, SCEVCheckBlock);
+ DT->changeImmediateDominator(LoopExitBlock, SCEVCheckBlock);
}
ReplaceInstWithInst(
@@ -3252,11 +3244,7 @@ void InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass) {
// Update dominator only if this is first RT check.
if (LoopBypassBlocks.empty()) {
DT->changeImmediateDominator(Bypass, MemCheckBlock);
- if (!Cost->requiresScalarEpilogue())
- // If there is an epilogue which must run, there's no edge from the
- // middle block to exit blocks and thus no need to update the immediate
- // dominator of the exit blocks.
- DT->changeImmediateDominator(LoopExitBlock, MemCheckBlock);
+ DT->changeImmediateDominator(LoopExitBlock, MemCheckBlock);
}
Instruction *FirstCheckInst;
@@ -3381,10 +3369,9 @@ Value *InnerLoopVectorizer::emitTransformedIndex(
Loop *InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
LoopScalarBody = OrigLoop->getHeader();
LoopVectorPreHeader = OrigLoop->getLoopPreheader();
+ LoopExitBlock = OrigLoop->getUniqueExitBlock();
+ assert(LoopExitBlock && "Must have an exit block");
assert(LoopVectorPreHeader && "Invalid loop structure");
- LoopExitBlock = OrigLoop->getUniqueExitBlock(); // may be nullptr
- assert((LoopExitBlock || Cost->requiresScalarEpilogue()) &&
- "multiple exit loop without required epilogue?");
LoopMiddleBlock =
SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->getTerminator(), DT,
@@ -3393,20 +3380,12 @@ Loop *InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
SplitBlock(LoopMiddleBlock, LoopMiddleBlock->getTerminator(), DT, LI,
nullptr, Twine(Prefix) + "scalar.ph");
+ // Set up branch from middle block to the exit and scalar preheader blocks.
+ // completeLoopSkeleton will update the condition to use an iteration check,
+ // if required to decide whether to execute the remainder.
+ BranchInst *BrInst =
+ BranchInst::Create(LoopExitBlock, LoopScalarPreHeader, Builder.getTrue());
auto *ScalarLatchTerm = OrigLoop->getLoopLatch()->getTerminator();
-
- // Set up the middle block terminator. Two cases:
- // 1) If we know that we must execute the scalar epilogue, emit an
- // unconditional branch.
- // 2) Otherwise, we must have a single unique exit block (due to how we
- // implement the multiple exit case). In this case, set up a conditonal
- // branch from the middle block to the loop scalar preheader, and the
- // exit block. completeLoopSkeleton will update the condition to use an
- // iteration check, if required to decide whether to execute the remainder.
- BranchInst *BrInst = Cost->requiresScalarEpilogue() ?
- BranchInst::Create(LoopScalarPreHeader) :
- BranchInst::Create(LoopExitBlock, LoopScalarPreHeader,
- Builder.getTrue());
BrInst->setDebugLoc(ScalarLatchTerm->getDebugLoc());
ReplaceInstWithInst(LoopMiddleBlock->getTerminator(), BrInst);
@@ -3418,11 +3397,7 @@ Loop *InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
nullptr, nullptr, Twine(Prefix) + "vector.body");
// Update dominator for loop exit.
- if (!Cost->requiresScalarEpilogue())
- // If there is an epilogue which must run, there's no edge from the
- // middle block to exit blocks and thus no need to update the immediate
- // dominator of the exit blocks.
- DT->changeImmediateDominator(LoopExitBlock, LoopMiddleBlock);
+ DT->changeImmediateDominator(LoopExitBlock, LoopMiddleBlock);
// Create and register the new vector loop.
Loop *Lp = LI->AllocateLoop();
@@ -3519,14 +3494,10 @@ BasicBlock *InnerLoopVectorizer::completeLoopSkeleton(Loop *L,
auto *ScalarLatchTerm = OrigLoop->getLoopLatch()->getTerminator();
// Add a check in the middle block to see if we have completed
- // all of the iterations in the first vector loop. Three cases:
- // 1) If we require a scalar epilogue, there is no conditional branch as
- // we unconditionally branch to the scalar preheader. Do nothing.
- // 2) If (N - N%VF) == N, then we *don't* need to run the remainder.
- // Thus if tail is to be folded, we know we don't need to run the
- // remainder and we can use the previous value for the condition (true).
- // 3) Otherwise, construct a runtime check.
- if (!Cost->requiresScalarEpilogue() && !Cost->foldTailByMasking()) {
+ // all of the iterations in the first vector loop.
+ // If (N - N%VF) == N, then we *don't* need to run the remainder.
+ // If tail is to be folded, we know we don't need to run the remainder.
+ if (!Cost->foldTailByMasking()) {
Instruction *CmpN = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
Count, VectorTripCount, "cmp.n",
LoopMiddleBlock->getTerminator());
@@ -3590,17 +3561,17 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
| [ ]_| <-- vector loop.
| |
| v
- \ -[ ] <--- middle-block.
- \/ |
- /\ v
- | ->[ ] <--- new preheader.
+ | -[ ] <--- middle-block.
+ | / |
+ | / v
+ -|- >[ ] <--- new preheader.
| |
- (opt) v <-- edge from middle to exit iff epilogue is not required.
+ | v
| [ ] \
- | [ ]_| <-- old scalar loop to handle remainder (scalar epilogue).
+ | [ ]_| <-- old scalar loop to handle remainder.
\ |
\ v
- >[ ] <-- exit block(s).
+ >[ ] <-- exit block.
...
*/
@@ -4021,18 +3992,13 @@ void InnerLoopVectorizer::fixVectorizedLoop() {
// Forget the original basic block.
PSE.getSE()->forgetLoop(OrigLoop);
- // If we inserted an edge from the middle block to the unique exit block,
- // update uses outside the loop (phis) to account for the newly inserted
- // edge.
- if (!Cost->requiresScalarEpilogue()) {
- // Fix-up external users of the induction variables.
- for (auto &Entry : Legal->getInductionVars())
- fixupIVUsers(Entry.first, Entry.second,
- getOrCreateVectorTripCount(LI->getLoopFor(LoopVectorBody)),
- IVEndValues[Entry.first], LoopMiddleBlock);
+ // Fix-up external users of the induction variables.
+ for (auto &Entry : Legal->getInductionVars())
+ fixupIVUsers(Entry.first, Entry.second,
+ getOrCreateVectorTripCount(LI->getLoopFor(LoopVectorBody)),
+ IVEndValues[Entry.first], LoopMiddleBlock);
- fixLCSSAPHIs();
- }
+ fixLCSSAPHIs();
for (Instruction *PI : PredicatedInstructions)
sinkScalarOperands(&*PI);
@@ -4250,13 +4216,12 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) {
// recurrence in the exit block, and then add an edge for the middle block.
// Note that LCSSA does not imply single entry when the original scalar loop
// had multiple exiting edges (as we always run the last iteration in the
- // scalar epilogue); in that case, there is no edge from middle to exit and
- // and thus no phis which needed updated.
- if (!Cost->requiresScalarEpilogue())
- for (PHINode &LCSSAPhi : LoopExitBlock->phis())
- if (any_of(LCSSAPhi.incoming_values(),
- [Phi](Value *V) { return V == Phi; }))
- LCSSAPhi.addIncoming(ExtractForPhiUsedOutsideLoop, LoopMiddleBlock);
+ // scalar epilogue); in that case, the exiting path through middle will be
+ // dynamically dead and the value picked for the phi doesn't matter.
+ for (PHINode &LCSSAPhi : LoopExitBlock->phis())
+ if (any_of(LCSSAPhi.incoming_values(),
+ [Phi](Value *V) { return V == Phi; }))
+ LCSSAPhi.addIncoming(ExtractForPhiUsedOutsideLoop, LoopMiddleBlock);
}
void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
@@ -4421,11 +4386,10 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
// We know that the loop is in LCSSA form. We need to update the PHI nodes
// in the exit blocks. See comment on analogous loop in
// fixFirstOrderRecurrence for a more complete explaination of the logic.
- if (!Cost->requiresScalarEpilogue())
- for (PHINode &LCSSAPhi : LoopExitBlock->phis())
- if (any_of(LCSSAPhi.incoming_values(),
- [LoopExitInst](Value *V) { return V == LoopExitInst; }))
- LCSSAPhi.addIncoming(ReducedPartRdx, LoopMiddleBlock);
+ for (PHINode &LCSSAPhi : LoopExitBlock->phis())
+ if (any_of(LCSSAPhi.incoming_values(),
+ [LoopExitInst](Value *V) { return V == LoopExitInst; }))
+ LCSSAPhi.addIncoming(ReducedPartRdx, LoopMiddleBlock);
// Fix the scalar loop reduction variable with the incoming reduction sum
// from the vector body and from the backedge value.
@@ -8074,11 +8038,7 @@ BasicBlock *EpilogueVectorizerMainLoop::emitMinimumIterationCountCheck(
// Update dominator for Bypass & LoopExit.
DT->changeImmediateDominator(Bypass, TCCheckBlock);
- if (!Cost->requiresScalarEpilogue())
- // For loops with multiple exits, there's no edge from the middle block
- // to exit blocks (as the epilogue must run) and thus no need to update
- // the immediate dominator of the exit blocks.
- DT->changeImmediateDominator(LoopExitBlock, TCCheckBlock);
+ DT->changeImmediateDominator(LoopExitBlock, TCCheckBlock);
LoopBypassBlocks.push_back(TCCheckBlock);
@@ -8142,12 +8102,7 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton() {
DT->changeImmediateDominator(LoopScalarPreHeader,
EPI.EpilogueIterationCountCheck);
- if (!Cost->requiresScalarEpilogue())
- // If there is an epilogue which must run, there's no edge from the
- // middle block to exit blocks and thus no need to update the immediate
- // dominator of the exit blocks.
- DT->changeImmediateDominator(LoopExitBlock,
- EPI.EpilogueIterationCountCheck);
+ DT->changeImmediateDominator(LoopExitBlock, EPI.EpilogueIterationCountCheck);
// Keep track of bypass blocks, as they feed start values to the induction
// phis in the scalar loop preheader.
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll
index ec280bf5d5e4..7d4a3c5c9935 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll
@@ -471,9 +471,10 @@ define i16 @multiple_exit(i16* %p, i32 %n) {
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]]
; CHECK: middle.block:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]]
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2
-; CHECK-NEXT: br label [[SCALAR_PH]]
+; CHECK-NEXT: br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
@@ -485,14 +486,14 @@ define i16 @multiple_exit(i16* %p, i32 %n) {
; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
; CHECK-NEXT: [[REC_NEXT]] = load i16, i16* [[B]], align 2
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
-; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
+; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END]]
; CHECK: for.body:
; CHECK-NEXT: store i16 [[SCALAR_RECUR]], i16* [[B]], align 4
; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1
; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], [[LOOP7:!llvm.loop !.*]]
; CHECK: if.end:
-; CHECK-NEXT: [[REC_LCSSA:%.*]] = phi i16 [ [[SCALAR_RECUR]], [[FOR_BODY]] ], [ [[SCALAR_RECUR]], [[FOR_COND]] ]
+; CHECK-NEXT: [[REC_LCSSA:%.*]] = phi i16 [ [[SCALAR_RECUR]], [[FOR_BODY]] ], [ [[SCALAR_RECUR]], [[FOR_COND]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i16 [[REC_LCSSA]]
;
entry:
@@ -557,9 +558,10 @@ define i16 @multiple_exit2(i16* %p, i32 %n) {
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP8:!llvm.loop !.*]]
; CHECK: middle.block:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]]
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2
-; CHECK-NEXT: br label [[SCALAR_PH]]
+; CHECK-NEXT: br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
@@ -571,14 +573,14 @@ define i16 @multiple_exit2(i16* %p, i32 %n) {
; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
; CHECK-NEXT: [[REC_NEXT]] = load i16, i16* [[B]], align 2
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
-; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
+; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END]]
; CHECK: for.body:
; CHECK-NEXT: store i16 [[SCALAR_RECUR]], i16* [[B]], align 4
; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1
; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], [[LOOP9:!llvm.loop !.*]]
; CHECK: if.end:
-; CHECK-NEXT: [[REC_LCSSA:%.*]] = phi i16 [ [[SCALAR_RECUR]], [[FOR_COND]] ], [ 10, [[FOR_BODY]] ]
+; CHECK-NEXT: [[REC_LCSSA:%.*]] = phi i16 [ [[SCALAR_RECUR]], [[FOR_COND]] ], [ 10, [[FOR_BODY]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i16 [[REC_LCSSA]]
;
entry:
diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll
index f0ba677348ab..0d4bdf0ecac3 100644
--- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll
@@ -447,7 +447,7 @@ define void @even_load_static_tc(i32* noalias nocapture readonly %A, i32* noalia
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 508
; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP12:!llvm.loop !.*]]
; CHECK: middle.block:
-; CHECK-NEXT: br label [[SCALAR_PH]]
+; CHECK-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1016, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
@@ -463,7 +463,7 @@ define void @even_load_static_tc(i32* noalias nocapture readonly %A, i32* noalia
; CHECK-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX2]], align 4
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 2
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[INDVARS_IV]], 1022
-; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], [[LOOP13:!llvm.loop !.*]]
+; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP]], [[LOOP13:!llvm.loop !.*]]
;
entry:
br label %for.body
@@ -528,7 +528,7 @@ define void @even_load_dynamic_tc(i32* noalias nocapture readonly %A, i32* noali
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP14:!llvm.loop !.*]]
; CHECK: middle.block:
-; CHECK-NEXT: br label [[SCALAR_PH]]
+; CHECK-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
@@ -544,7 +544,7 @@ define void @even_load_dynamic_tc(i32* noalias nocapture readonly %A, i32* noali
; CHECK-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX2]], align 4
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 2
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[INDVARS_IV_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], [[LOOP15:!llvm.loop !.*]]
+; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP]], [[LOOP15:!llvm.loop !.*]]
;
entry:
br label %for.body
@@ -973,7 +973,7 @@ define void @PR27626_0(%pair.i32 *%p, i32 %z, i64 %n) {
; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP24:!llvm.loop !.*]]
; CHECK: middle.block:
-; CHECK-NEXT: br label [[SCALAR_PH]]
+; CHECK-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
@@ -985,7 +985,7 @@ define void @PR27626_0(%pair.i32 *%p, i32 %z, i64 %n) {
; CHECK-NEXT: store i32 [[Z]], i32* [[P_I_Y]], align 4
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]], [[LOOP25:!llvm.loop !.*]]
+; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], [[LOOP25:!llvm.loop !.*]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
@@ -1066,7 +1066,7 @@ define i32 @PR27626_1(%pair.i32 *%p, i64 %n) {
; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[BIN_RDX4:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF3]]
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i32> [[BIN_RDX4]], i32 0
-; CHECK-NEXT: br label [[SCALAR_PH]]
+; CHECK-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP19]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
@@ -1081,9 +1081,10 @@ define i32 @PR27626_1(%pair.i32 *%p, i64 %n) {
; CHECK-NEXT: [[TMP21]] = add nsw i32 [[TMP20]], [[S]]
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]], [[LOOP27:!llvm.loop !.*]]
+; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], [[LOOP27:!llvm.loop !.*]]
; CHECK: for.end:
-; CHECK-NEXT: ret i32 [[TMP21]]
+; CHECK-NEXT: [[TMP22:%.*]] = phi i32 [ [[TMP21]], [[FOR_BODY]] ], [ [[TMP19]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: ret i32 [[TMP22]]
;
entry:
br label %for.body
@@ -1162,7 +1163,7 @@ define void @PR27626_2(%pair.i32 *%p, i64 %n, i32 %z) {
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP28:!llvm.loop !.*]]
; CHECK: middle.block:
-; CHECK-NEXT: br label [[SCALAR_PH]]
+; CHECK-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
@@ -1176,7 +1177,7 @@ define void @PR27626_2(%pair.i32 *%p, i64 %n, i32 %z) {
; CHECK-NEXT: store i32 [[TMP21]], i32* [[P_I_Y]], align 4
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]], [[LOOP29:!llvm.loop !.*]]
+; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], [[LOOP29:!llvm.loop !.*]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
@@ -1263,7 +1264,7 @@ define i32 @PR27626_3(%pair.i32 *%p, i64 %n, i32 %z) {
; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[BIN_RDX4:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF3]]
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i32> [[BIN_RDX4]], i32 0
-; CHECK-NEXT: br label [[SCALAR_PH]]
+; CHECK-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP22]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
@@ -1281,9 +1282,10 @@ define i32 @PR27626_3(%pair.i32 *%p, i64 %n, i32 %z) {
; CHECK-NEXT: [[TMP25]] = add nsw i32 [[TMP24]], [[S]]
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]], [[LOOP31:!llvm.loop !.*]]
+; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], [[LOOP31:!llvm.loop !.*]]
; CHECK: for.end:
-; CHECK-NEXT: ret i32 [[TMP25]]
+; CHECK-NEXT: [[TMP26:%.*]] = phi i32 [ [[TMP25]], [[FOR_BODY]] ], [ [[TMP22]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: ret i32 [[TMP26]]
;
entry:
br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/loop-form.ll b/llvm/test/Transforms/LoopVectorize/loop-form.ll
index f32002fae2b6..91780789088b 100644
--- a/llvm/test/Transforms/LoopVectorize/loop-form.ll
+++ b/llvm/test/Transforms/LoopVectorize/loop-form.ll
@@ -146,14 +146,15 @@ define void @early_exit(i16* %p, i32 %n) {
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]]
; CHECK: middle.block:
-; CHECK-NEXT: br label [[SCALAR_PH]]
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[FOR_COND:%.*]]
; CHECK: for.cond:
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
-; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
+; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END]]
; CHECK: for.body:
; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64
; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
@@ -285,14 +286,15 @@ define void @multiple_unique_exit(i16* %p, i32 %n) {
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]]
; CHECK: middle.block:
-; CHECK-NEXT: br label [[SCALAR_PH]]
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[FOR_COND:%.*]]
; CHECK: for.cond:
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
-; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
+; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END]]
; CHECK: for.body:
; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64
; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
@@ -372,14 +374,17 @@ define i32 @multiple_unique_exit2(i16* %p, i32 %n) {
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP8:!llvm.loop !.*]]
; CHECK: middle.block:
-; CHECK-NEXT: br label [[SCALAR_PH]]
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]]
+; CHECK-NEXT: [[IND_ESCAPE:%.*]] = sub i32 [[N_VEC]], 1
+; CHECK-NEXT: [[IND_ESCAPE1:%.*]] = sub i32 [[N_VEC]], 1
+; CHECK-NEXT: br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[FOR_COND:%.*]]
; CHECK: for.cond:
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
-; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
+; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END]]
; CHECK: for.body:
; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64
; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
@@ -388,7 +393,7 @@ define i32 @multiple_unique_exit2(i16* %p, i32 %n) {
; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], [[LOOP9:!llvm.loop !.*]]
; CHECK: if.end:
-; CHECK-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ], [ [[I]], [[FOR_COND]] ]
+; CHECK-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ], [ [[I]], [[FOR_COND]] ], [ [[IND_ESCAPE1]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[I_LCSSA]]
;
; TAILFOLD-LABEL: @multiple_unique_exit2(
@@ -461,14 +466,15 @@ define i32 @multiple_unique_exit3(i16* %p, i32 %n) {
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP10:!llvm.loop !.*]]
; CHECK: middle.block:
-; CHECK-NEXT: br label [[SCALAR_PH]]
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[FOR_COND:%.*]]
; CHECK: for.cond:
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
-; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
+; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END]]
; CHECK: for.body:
; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64
; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
@@ -477,7 +483,7 @@ define i32 @multiple_unique_exit3(i16* %p, i32 %n) {
; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], [[LOOP11:!llvm.loop !.*]]
; CHECK: if.end:
-; CHECK-NEXT: [[EXIT:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY]] ]
+; CHECK-NEXT: [[EXIT:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY]] ], [ 0, [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[EXIT]]
;
; TAILFOLD-LABEL: @multiple_unique_exit3(
@@ -994,7 +1000,8 @@ define void @scalar_predication(float* %addr) {
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP12:!llvm.loop !.*]]
; CHECK: middle.block:
-; CHECK-NEXT: br label [[SCALAR_PH]]
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 201, 200
+; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
@@ -1002,7 +1009,7 @@ define void @scalar_predication(float* %addr) {
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr float, float* [[ADDR]], i64 [[IV]]
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
-; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_BODY:%.*]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP_BODY:%.*]]
; CHECK: loop.body:
; CHECK-NEXT: [[TMP11:%.*]] = load float, float* [[GEP]], align 4
; CHECK-NEXT: [[PRED:%.*]] = fcmp oeq float [[TMP11]], 0.000000e+00
@@ -1088,7 +1095,8 @@ define i32 @me_reduction(i32* %addr) {
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <2 x i32> <i32 1, i32 undef>
; CHECK-NEXT: [[BIN_RDX:%.*]] = add <2 x i32> [[TMP5]], [[RDX_SHUF]]
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[BIN_RDX]], i32 0
-; CHECK-NEXT: br label [[SCALAR_PH]]
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 201, 200
+; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
@@ -1098,7 +1106,7 @@ define i32 @me_reduction(i32* %addr) {
; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[ADDR]], i64 [[IV]]
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
-; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP_LATCH]]
; CHECK: loop.latch:
; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[GEP]], align 4
; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP8]]
@@ -1106,7 +1114,7 @@ define i32 @me_reduction(i32* %addr) {
; CHECK-NEXT: [[EXITCOND2_NOT:%.*]] = icmp eq i64 [[IV]], 400
; CHECK-NEXT: br i1 [[EXITCOND2_NOT]], label [[EXIT]], label [[LOOP_HEADER]], [[LOOP15:!llvm.loop !.*]]
; CHECK: exit:
-; CHECK-NEXT: [[LCSSA:%.*]] = phi i32 [ 0, [[LOOP_HEADER]] ], [ [[ACCUM_NEXT]], [[LOOP_LATCH]] ]
+; CHECK-NEXT: [[LCSSA:%.*]] = phi i32 [ 0, [[LOOP_HEADER]] ], [ [[ACCUM_NEXT]], [[LOOP_LATCH]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[LCSSA]]
;
; TAILFOLD-LABEL: @me_reduction(
More information about the llvm-commits
mailing list