[llvm] [LoopPeel] Remove known trip count restriction when peeling last. (PR #140792)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Tue May 20 13:24:25 PDT 2025
https://github.com/fhahn created https://github.com/llvm/llvm-project/pull/140792
Remove the restriction that the loop must be known to execute at least 2 iterations when peeling the last iteration. If we cannot prove at least 2 iterations are executed, a check and branch to skip the peeled loop is inserted.
>From a01b94468b6f7d1d2a1bd11a3ccf11071b2ac1ee Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 19 May 2025 20:59:55 +0100
Subject: [PATCH] [LoopPeel] Remove known trip count restriction when peeling
last.
Remove the restriction that the loop must be known to execute at least 2
iterations when peeling the last iteration. If we cannot prove at least
2 iterations are executed, a check and branch to skip the peeled loop is
inserted.
---
llvm/lib/Transforms/Utils/LoopPeel.cpp | 56 ++++++++++++---
...last-iteration-with-constant-trip-count.ll | 14 ++--
...last-iteration-with-variable-trip-count.ll | 72 +++++++++++++++----
.../unroll-and-peel-last-iteration.ll | 2 +-
4 files changed, 114 insertions(+), 30 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp
index 4eaa3c9714370..4422f7cd9480b 100644
--- a/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -37,6 +37,7 @@
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>
#include <cassert>
@@ -332,11 +333,7 @@ bool llvm::canPeelLastIteration(const Loop &L, ScalarEvolution &SE) {
CmpPredicate Pred;
BasicBlock *Succ1;
BasicBlock *Succ2;
- // The loop must execute at least 2 iterations to guarantee that peeled
- // iteration executes.
- // TODO: Add checks during codegen.
- if (isa<SCEVCouldNotCompute>(BTC) ||
- !SE.isKnownPredicate(CmpInst::ICMP_UGT, BTC, SE.getZero(BTC->getType())))
+ if (isa<SCEVCouldNotCompute>(BTC))
return false;
// Check if the exit condition of the loop can be adjusted by the peeling
@@ -818,7 +815,7 @@ static void initBranchWeights(DenseMap<Instruction *, WeightInfo> &WeightInfos,
/// instructions in the last peeled-off iteration.
static void cloneLoopBlocks(
Loop *L, unsigned IterNumber, bool PeelLast, BasicBlock *InsertTop,
- BasicBlock *InsertBot,
+ BasicBlock *InsertBot, BasicBlock *OrigPreHeader,
SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *>> &ExitEdges,
SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT,
@@ -912,10 +909,19 @@ static void cloneLoopBlocks(
if (PeelLast) {
// For the last iteration, we use the value from the latch of the original
// loop directly.
+ //
+ IRBuilder<> B(InsertTop->getTerminator());
for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
- VMap[&*I] = NewPHI->getIncomingValueForBlock(Latch);
+ PHINode *PN = B.CreatePHI(NewPHI->getType(), 2);
NewPHI->eraseFromParent();
+ if (OrigPreHeader)
+ PN->addIncoming(cast<PHINode>(&*I)->getIncomingValueForBlock(PreHeader),
+ OrigPreHeader);
+
+ PN->addIncoming(cast<PHINode>(&*I)->getIncomingValueForBlock(Latch),
+ Latch);
+ VMap[&*I] = PN;
}
} else {
// For the first iteration, we use the value from the preheader directly.
@@ -1049,7 +1055,7 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
// Set up all the necessary basic blocks.
BasicBlock *InsertTop;
BasicBlock *InsertBot;
- BasicBlock *NewPreHeader;
+ BasicBlock *NewPreHeader = nullptr;
DenseMap<Instruction *, Value *> ExitValues;
if (PeelLast) {
// It is convenient to split the single exit block from the latch the
@@ -1080,11 +1086,40 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
for (PHINode &P : Exit->phis())
ExitValues[&P] = P.getIncomingValueForBlock(Latch);
+ const SCEV *BTC = SE->getBackedgeTakenCount(L);
+
InsertTop = SplitEdge(Latch, Exit, &DT, LI);
InsertBot = SplitBlock(InsertTop, InsertTop->getTerminator(), &DT, LI);
InsertTop->setName(Exit->getName() + ".peel.begin");
InsertBot->setName(Exit->getName() + ".peel.next");
+ NewPreHeader = nullptr;
+
+ // If the original loop may only execute a single iteration we need to
+ // insert a trip count check and skip the peeled loop if necessary.
+ if (!SE->isKnownPredicate(CmpInst::ICMP_UGT, BTC,
+ SE->getZero(BTC->getType()))) {
+ NewPreHeader = SplitEdge(PreHeader, Header, &DT, LI);
+ SCEVExpander Expander(*SE, Latch->getDataLayout(), "loop-peel");
+
+ BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
+ Value *BTCValue =
+ Expander.expandCodeFor(BTC, BTC->getType(), PreHeaderBR);
+ IRBuilder<> B(PreHeaderBR);
+ Value *Cond =
+ B.CreateICmpNE(BTCValue, ConstantInt::get(BTCValue->getType(), 0));
+ B.CreateCondBr(Cond, NewPreHeader, InsertTop);
+ PreHeaderBR->eraseFromParent();
+
+ // PreHeader now dominates InsertTop.
+ DT.changeImmediateDominator(InsertTop, PreHeader);
+
+ // If we branch from PreHeader to InsertTop, we are guaranteed to execute
+ // the peeled iteration, so the exit values from the original loop are
+ // dead. Use poison for them.
+ for (auto &PN : InsertTop->phis())
+ PN.addIncoming(PoisonValue::get(PN.getType()), PreHeader);
+ }
} else {
// It is convenient to split the preheader into 3 parts - two blocks to
// anchor the peeled copy of the loop body, and a new preheader for the
@@ -1158,8 +1193,9 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
for (unsigned Iter = 0; Iter < PeelCount; ++Iter) {
SmallVector<BasicBlock *, 8> NewBlocks;
- cloneLoopBlocks(L, Iter, PeelLast, InsertTop, InsertBot, ExitEdges,
- NewBlocks, LoopBlocks, VMap, LVMap, &DT, LI,
+ cloneLoopBlocks(L, Iter, PeelLast, InsertTop, InsertBot,
+ NewPreHeader ? PreHeader : nullptr, ExitEdges, NewBlocks,
+ LoopBlocks, VMap, LVMap, &DT, LI,
LoopLocalNoAliasDeclScopes, *SE);
// Remap to use values from the current iteration instead of the
diff --git a/llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-constant-trip-count.ll b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-constant-trip-count.ll
index f1290069bda0c..2e724e748aa67 100644
--- a/llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-constant-trip-count.ll
+++ b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-constant-trip-count.ll
@@ -12,8 +12,8 @@ define i64 @peel_single_block_loop_iv_step_1() {
; CHECK-NEXT: [[EC1:%.*]] = icmp ne i64 [[IV_NEXT1]], 63
; CHECK-NEXT: br i1 [[EC1]], label %[[LOOP]], label %[[EXIT_PEEL_BEGIN:.*]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[EXIT_PEEL_BEGIN]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT1]], %[[LOOP]] ]
; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i64 [ [[IV1]], %[[LOOP]] ]
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT1]], %[[LOOP]] ]
; CHECK-NEXT: br label %[[LOOP_PEEL:.*]]
; CHECK: [[LOOP_PEEL]]:
; CHECK-NEXT: [[CMP18_NOT:%.*]] = icmp eq i64 [[IV]], 63
@@ -91,8 +91,8 @@ define i64 @peel_single_block_loop_iv_step_1_eq_pred() {
; CHECK-NEXT: [[CMP_PEEL:%.*]] = icmp eq i64 [[IV_LCSSA]], 63
; CHECK-NEXT: br i1 [[CMP_PEEL]], label %[[EXIT_PEEL_BEGIN:.*]], label %[[LOOP]], !llvm.loop [[LOOP2:![0-9]+]]
; CHECK: [[EXIT_PEEL_BEGIN]]:
-; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_LCSSA]], %[[LOOP]] ]
; CHECK-NEXT: [[IV_LCSSA1:%.*]] = phi i64 [ [[IV]], %[[LOOP]] ]
+; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_LCSSA]], %[[LOOP]] ]
; CHECK-NEXT: br label %[[LOOP_PEEL:.*]]
; CHECK: [[LOOP_PEEL]]:
; CHECK-NEXT: [[CMP_PEEL1:%.*]] = icmp eq i64 [[IV_NEXT_LCSSA]], 63
@@ -170,8 +170,8 @@ define i64 @peel_single_block_loop_iv_step_1_nested_loop() {
; CHECK-NEXT: [[EC:%.*]] = icmp ne i64 [[IV_NEXT]], 63
; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[OUTER_LATCH_PEEL_BEGIN:.*]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: [[OUTER_LATCH_PEEL_BEGIN]]:
-; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_NEXT]], %[[LOOP]] ]
; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i64 [ [[IV]], %[[LOOP]] ]
+; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_NEXT]], %[[LOOP]] ]
; CHECK-NEXT: br label %[[LOOP_PEEL:.*]]
; CHECK: [[LOOP_PEEL]]:
; CHECK-NEXT: [[CMP_PEEL:%.*]] = icmp eq i64 [[IV_NEXT_LCSSA]], 63
@@ -236,8 +236,8 @@ define i64 @peel_multi_block_loop_iv_step_1() {
; CHECK-NEXT: [[EC:%.*]] = icmp ne i64 [[IV_NEXT]], 63
; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT_PEEL_BEGIN:.*]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: [[EXIT_PEEL_BEGIN]]:
-; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_NEXT]], %[[LATCH]] ]
; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i64 [ [[IV]], %[[LATCH]] ]
+; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_NEXT]], %[[LATCH]] ]
; CHECK-NEXT: br label %[[LOOP_PEEL:.*]]
; CHECK: [[LOOP_PEEL]]:
; CHECK-NEXT: [[CMP_PEEL:%.*]] = icmp eq i64 [[IV_NEXT_LCSSA]], 63
@@ -364,8 +364,8 @@ define i64 @peel_single_block_loop_iv_step_1_btc_1() {
; CHECK-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1
; CHECK-NEXT: br i1 false, label %[[LOOP]], label %[[EXIT_PEEL_BEGIN:.*]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: [[EXIT_PEEL_BEGIN]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT1]], %[[LOOP]] ]
; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i64 [ [[IV1]], %[[LOOP]] ]
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT1]], %[[LOOP]] ]
; CHECK-NEXT: br label %[[LOOP_PEEL:.*]]
; CHECK: [[LOOP_PEEL]]:
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV]], 1
@@ -483,9 +483,9 @@ define i32 @peel_loop_with_branch_and_phi_uses(ptr %x, i1 %c) {
; CHECK-NEXT: [[EC1:%.*]] = icmp ne i32 [[IV_NEXT1]], 99
; CHECK-NEXT: br i1 [[EC1]], label %[[LOOP_HEADER]], label %[[LOOPEXIT_PEEL_BEGIN:.*]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: [[LOOPEXIT_PEEL_BEGIN]]:
-; CHECK-NEXT: [[RED:%.*]] = phi i32 [ [[ADD1]], %[[LOOP_LATCH]] ]
-; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT1]], %[[LOOP_LATCH]] ]
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD1]], %[[LOOP_LATCH]] ]
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT1]], %[[LOOP_LATCH]] ]
+; CHECK-NEXT: [[RED:%.*]] = phi i32 [ [[ADD1]], %[[LOOP_LATCH]] ]
; CHECK-NEXT: br label %[[LOOP_HEADER_PEEL:.*]]
; CHECK: [[LOOP_HEADER_PEEL]]:
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[IV]], 99
diff --git a/llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-variable-trip-count.ll b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-variable-trip-count.ll
index 75f3674732f35..92c1e1e47d8bd 100644
--- a/llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-variable-trip-count.ll
+++ b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-variable-trip-count.ll
@@ -9,18 +9,38 @@ define i32 @peel_last_with_trip_count_check_lcssa_phi(i32 %n) {
; CHECK-SAME: i32 [[N:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[SUB:%.*]] = add i32 [[N]], -1
+; CHECK-NEXT: [[TMP0:%.*]] = icmp ne i32 [[SUB]], 0
+; CHECK-NEXT: br i1 [[TMP0]], label %[[ENTRY_SPLIT:.*]], label %[[EXIT_PEEL_BEGIN:.*]]
+; CHECK: [[ENTRY_SPLIT]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ 0, %[[ENTRY_SPLIT]] ], [ [[IV_NEXT1:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: call void @foo(i32 2)
+; CHECK-NEXT: [[IV_NEXT1]] = add nuw i32 [[IV1]], 1
+; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[N]], 1
+; CHECK-NEXT: [[EC1:%.*]] = icmp ne i32 [[IV_NEXT1]], [[TMP1]]
+; CHECK-NEXT: br i1 [[EC1]], label %[[LOOP]], label %[[EXIT_PEEL_BEGIN_LOOPEXIT:.*]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[EXIT_PEEL_BEGIN_LOOPEXIT]]:
+; CHECK-NEXT: [[SEL_LCSSA_PH:%.*]] = phi i32 [ 2, %[[LOOP]] ]
+; CHECK-NEXT: [[DOTPH:%.*]] = phi i32 [ [[IV_NEXT1]], %[[LOOP]] ]
+; CHECK-NEXT: br label %[[EXIT_PEEL_BEGIN]]
+; CHECK: [[EXIT_PEEL_BEGIN]]:
+; CHECK-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ poison, %[[ENTRY]] ], [ [[SEL_LCSSA_PH]], %[[EXIT_PEEL_BEGIN_LOOPEXIT]] ]
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[DOTPH]], %[[EXIT_PEEL_BEGIN_LOOPEXIT]] ]
+; CHECK-NEXT: br label %[[LOOP_PEEL:.*]]
+; CHECK: [[LOOP_PEEL]]:
; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[IV]], [[SUB]]
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C]], i32 1, i32 2
; CHECK-NEXT: call void @foo(i32 [[SEL]])
-; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT: [[IV_NEXT:%.*]] = add i32 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp ne i32 [[IV_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT_PEEL_NEXT:.*]], label %[[EXIT_PEEL_NEXT]]
+; CHECK: [[EXIT_PEEL_NEXT]]:
+; CHECK-NEXT: br label %[[LOOP_PEEL_NEXT:.*]]
+; CHECK: [[LOOP_PEEL_NEXT]]:
+; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ]
-; CHECK-NEXT: ret i32 [[SEL_LCSSA]]
+; CHECK-NEXT: ret i32 [[SEL]]
;
entry:
%sub = add i32 %n, -1
@@ -160,21 +180,44 @@ define void @peel_last_with_trip_count_check_nested_loop(i32 %n) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[SUB:%.*]] = add i32 [[N]], -1
; CHECK-NEXT: br label %[[OUTER_HEADER:.*]]
-; CHECK: [[OUTER_HEADER_LOOPEXIT:.*]]:
+; CHECK: [[OUTER_HEADER_LOOPEXIT_PEEL_BEGIN_LOOPEXIT:.*]]:
+; CHECK-NEXT: [[DOTPH:%.*]] = phi i32 [ [[IV_NEXT1:%.*]], %[[INNER_LATCH:.*]] ]
+; CHECK-NEXT: br label %[[OUTER_HEADER_LOOPEXIT_PEEL_BEGIN:.*]]
+; CHECK: [[OUTER_HEADER_LOOPEXIT_PEEL_BEGIN]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[OUTER_HEADER]] ], [ [[DOTPH]], %[[OUTER_HEADER_LOOPEXIT_PEEL_BEGIN_LOOPEXIT]] ]
+; CHECK-NEXT: br label %[[INNER_HEADER_PEEL:.*]]
+; CHECK: [[INNER_HEADER_PEEL]]:
+; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[IV]], [[SUB]]
+; CHECK-NEXT: br i1 [[C]], label %[[INNER_LATCH_PEEL:.*]], label %[[THEN_PEEL:.*]]
+; CHECK: [[THEN_PEEL]]:
+; CHECK-NEXT: call void @foo(i32 1)
+; CHECK-NEXT: br label %[[INNER_LATCH_PEEL]]
+; CHECK: [[INNER_LATCH_PEEL]]:
+; CHECK-NEXT: [[IV_NEXT:%.*]] = add i32 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[OUTER_HEADER_LOOPEXIT_PEEL_NEXT:.*]], label %[[OUTER_HEADER_LOOPEXIT_PEEL_NEXT]]
+; CHECK: [[OUTER_HEADER_LOOPEXIT_PEEL_NEXT]]:
+; CHECK-NEXT: br label %[[INNER_HEADER_PEEL_NEXT:.*]]
+; CHECK: [[INNER_HEADER_PEEL_NEXT]]:
+; CHECK-NEXT: br label %[[OUTER_HEADER_LOOPEXIT:.*]]
+; CHECK: [[OUTER_HEADER_LOOPEXIT]]:
; CHECK-NEXT: br label %[[OUTER_HEADER]]
; CHECK: [[OUTER_HEADER]]:
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[SUB]], 0
+; CHECK-NEXT: br i1 [[TMP1]], label %[[OUTER_HEADER_SPLIT:.*]], label %[[OUTER_HEADER_LOOPEXIT_PEEL_BEGIN]]
+; CHECK: [[OUTER_HEADER_SPLIT]]:
; CHECK-NEXT: br label %[[INNER_HEADER:.*]]
; CHECK: [[INNER_HEADER]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[OUTER_HEADER]] ], [ [[IV_NEXT:%.*]], %[[INNER_LATCH:.*]] ]
-; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[IV]], [[SUB]]
-; CHECK-NEXT: br i1 [[C]], label %[[INNER_LATCH]], label %[[THEN:.*]]
+; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ 0, %[[OUTER_HEADER_SPLIT]] ], [ [[IV_NEXT1]], %[[INNER_LATCH]] ]
+; CHECK-NEXT: br i1 false, label %[[INNER_LATCH]], label %[[THEN:.*]]
; CHECK: [[THEN]]:
; CHECK-NEXT: call void @foo(i32 1)
; CHECK-NEXT: br label %[[INNER_LATCH]]
; CHECK: [[INNER_LATCH]]:
-; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
-; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[OUTER_HEADER_LOOPEXIT]], label %[[INNER_HEADER]]
+; CHECK-NEXT: [[IV_NEXT1]] = add nuw i32 [[IV1]], 1
+; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[N]], 1
+; CHECK-NEXT: [[EXITCOND_NOT1:%.*]] = icmp eq i32 [[IV_NEXT1]], [[TMP2]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT1]], label %[[OUTER_HEADER_LOOPEXIT_PEEL_BEGIN_LOOPEXIT]], label %[[INNER_HEADER]], !llvm.loop [[LOOP2:![0-9]+]]
;
entry:
%sub = add i32 %n, -1
@@ -197,3 +240,8 @@ inner.latch:
%exitcond.not = icmp eq i32 %iv.next, %n
br i1 %exitcond.not, label %outer.header, label %inner.header
}
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.peeled.count", i32 1}
+; CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[META1]]}
+;.
diff --git a/llvm/test/Transforms/LoopUnroll/unroll-and-peel-last-iteration.ll b/llvm/test/Transforms/LoopUnroll/unroll-and-peel-last-iteration.ll
index f57fb2d9b7057..e36b834969c87 100644
--- a/llvm/test/Transforms/LoopUnroll/unroll-and-peel-last-iteration.ll
+++ b/llvm/test/Transforms/LoopUnroll/unroll-and-peel-last-iteration.ll
@@ -19,8 +19,8 @@ define i32 @peel_last_iter_of_outer_lcssa_phi_with_constant_after_unrolling_inne
; CHECK-NEXT: [[EC:%.*]] = icmp eq i16 [[IV_NEXT]], 999
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT_PEEL_BEGIN:.*]], label %[[OUTER_HEADER]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[EXIT_PEEL_BEGIN]]:
-; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i16 [ [[IV_NEXT]], %[[INNER_LATCH]] ]
; CHECK-NEXT: [[DOTLCSSA_LCSSA:%.*]] = phi i32 [ 1, %[[INNER_LATCH]] ]
+; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i16 [ [[IV_NEXT]], %[[INNER_LATCH]] ]
; CHECK-NEXT: br label %[[OUTER_HEADER_PEEL:.*]]
; CHECK: [[OUTER_HEADER_PEEL]]:
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i16 [[IV_NEXT_LCSSA]], 999
More information about the llvm-commits
mailing list