[llvm] [LoopPeel] Remove known trip count restriction when peeling last. (PR #140792)

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Fri May 23 09:02:48 PDT 2025


https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/140792

>From cb15550a75f33cf1845fb3ef0f0673f6c3b8f136 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 19 May 2025 20:59:55 +0100
Subject: [PATCH 1/3] [LoopPeel] Remove known trip count restriction when
 peeling last.

Remove the restriction that the loop must be known to execute at least 2
iterations when peeling the last iteration. If we cannot prove at least
2 iterations are executed, a check and branch to skip the peeled loop is
inserted.
---
 llvm/lib/Transforms/Utils/LoopPeel.cpp        | 56 ++++++++++++---
 ...last-iteration-with-constant-trip-count.ll | 14 ++--
 ...last-iteration-with-variable-trip-count.ll | 72 +++++++++++++++----
 .../unroll-and-peel-last-iteration.ll         |  2 +-
 4 files changed, 114 insertions(+), 30 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp
index 4eaa3c9714370..4422f7cd9480b 100644
--- a/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -37,6 +37,7 @@
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/LoopSimplify.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
 #include <algorithm>
 #include <cassert>
@@ -332,11 +333,7 @@ bool llvm::canPeelLastIteration(const Loop &L, ScalarEvolution &SE) {
   CmpPredicate Pred;
   BasicBlock *Succ1;
   BasicBlock *Succ2;
-  // The loop must execute at least 2 iterations to guarantee that peeled
-  // iteration executes.
-  // TODO: Add checks during codegen.
-  if (isa<SCEVCouldNotCompute>(BTC) ||
-      !SE.isKnownPredicate(CmpInst::ICMP_UGT, BTC, SE.getZero(BTC->getType())))
+  if (isa<SCEVCouldNotCompute>(BTC))
     return false;
 
   // Check if the exit condition of the loop can be adjusted by the peeling
@@ -818,7 +815,7 @@ static void initBranchWeights(DenseMap<Instruction *, WeightInfo> &WeightInfos,
 /// instructions in the last peeled-off iteration.
 static void cloneLoopBlocks(
     Loop *L, unsigned IterNumber, bool PeelLast, BasicBlock *InsertTop,
-    BasicBlock *InsertBot,
+    BasicBlock *InsertBot, BasicBlock *OrigPreHeader,
     SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *>> &ExitEdges,
     SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
     ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT,
@@ -912,10 +909,19 @@ static void cloneLoopBlocks(
   if (PeelLast) {
     // For the last iteration, we use the value from the latch of the original
     // loop directly.
+    //
+    IRBuilder<> B(InsertTop->getTerminator());
     for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
       PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
-      VMap[&*I] = NewPHI->getIncomingValueForBlock(Latch);
+      PHINode *PN = B.CreatePHI(NewPHI->getType(), 2);
       NewPHI->eraseFromParent();
+      if (OrigPreHeader)
+        PN->addIncoming(cast<PHINode>(&*I)->getIncomingValueForBlock(PreHeader),
+                        OrigPreHeader);
+
+      PN->addIncoming(cast<PHINode>(&*I)->getIncomingValueForBlock(Latch),
+                      Latch);
+      VMap[&*I] = PN;
     }
   } else {
     // For the first iteration, we use the value from the preheader directly.
@@ -1049,7 +1055,7 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
   // Set up all the necessary basic blocks.
   BasicBlock *InsertTop;
   BasicBlock *InsertBot;
-  BasicBlock *NewPreHeader;
+  BasicBlock *NewPreHeader = nullptr;
   DenseMap<Instruction *, Value *> ExitValues;
   if (PeelLast) {
     // It is convenient to split the single exit block from the latch the
@@ -1080,11 +1086,40 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
     for (PHINode &P : Exit->phis())
       ExitValues[&P] = P.getIncomingValueForBlock(Latch);
 
+    const SCEV *BTC = SE->getBackedgeTakenCount(L);
+
     InsertTop = SplitEdge(Latch, Exit, &DT, LI);
     InsertBot = SplitBlock(InsertTop, InsertTop->getTerminator(), &DT, LI);
 
     InsertTop->setName(Exit->getName() + ".peel.begin");
     InsertBot->setName(Exit->getName() + ".peel.next");
+    NewPreHeader = nullptr;
+
+    // If the original loop may only execute a single iteration we need to
+    // insert a trip count check and skip the peeled loop if necessary.
+    if (!SE->isKnownPredicate(CmpInst::ICMP_UGT, BTC,
+                              SE->getZero(BTC->getType()))) {
+      NewPreHeader = SplitEdge(PreHeader, Header, &DT, LI);
+      SCEVExpander Expander(*SE, Latch->getDataLayout(), "loop-peel");
+
+      BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
+      Value *BTCValue =
+          Expander.expandCodeFor(BTC, BTC->getType(), PreHeaderBR);
+      IRBuilder<> B(PreHeaderBR);
+      Value *Cond =
+          B.CreateICmpNE(BTCValue, ConstantInt::get(BTCValue->getType(), 0));
+      B.CreateCondBr(Cond, NewPreHeader, InsertTop);
+      PreHeaderBR->eraseFromParent();
+
+      // PreHeader now dominates InsertTop.
+      DT.changeImmediateDominator(InsertTop, PreHeader);
+
+      // If we branch from PreHeader to InsertTop, we are guaranteed to execute
+      // the peeled iteration, so the exit values from the original loop are
+      // dead. Use poison for them.
+      for (auto &PN : InsertTop->phis())
+        PN.addIncoming(PoisonValue::get(PN.getType()), PreHeader);
+    }
   } else {
     // It is convenient to split the preheader into 3 parts - two blocks to
     // anchor the peeled copy of the loop body, and a new preheader for the
@@ -1158,8 +1193,9 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
   for (unsigned Iter = 0; Iter < PeelCount; ++Iter) {
     SmallVector<BasicBlock *, 8> NewBlocks;
 
-    cloneLoopBlocks(L, Iter, PeelLast, InsertTop, InsertBot, ExitEdges,
-                    NewBlocks, LoopBlocks, VMap, LVMap, &DT, LI,
+    cloneLoopBlocks(L, Iter, PeelLast, InsertTop, InsertBot,
+                    NewPreHeader ? PreHeader : nullptr, ExitEdges, NewBlocks,
+                    LoopBlocks, VMap, LVMap, &DT, LI,
                     LoopLocalNoAliasDeclScopes, *SE);
 
     // Remap to use values from the current iteration instead of the
diff --git a/llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-constant-trip-count.ll b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-constant-trip-count.ll
index f1290069bda0c..2e724e748aa67 100644
--- a/llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-constant-trip-count.ll
+++ b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-constant-trip-count.ll
@@ -12,8 +12,8 @@ define i64 @peel_single_block_loop_iv_step_1() {
 ; CHECK-NEXT:    [[EC1:%.*]] = icmp ne i64 [[IV_NEXT1]], 63
 ; CHECK-NEXT:    br i1 [[EC1]], label %[[LOOP]], label %[[EXIT_PEEL_BEGIN:.*]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       [[EXIT_PEEL_BEGIN]]:
-; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT1]], %[[LOOP]] ]
 ; CHECK-NEXT:    [[IV_LCSSA:%.*]] = phi i64 [ [[IV1]], %[[LOOP]] ]
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT1]], %[[LOOP]] ]
 ; CHECK-NEXT:    br label %[[LOOP_PEEL:.*]]
 ; CHECK:       [[LOOP_PEEL]]:
 ; CHECK-NEXT:    [[CMP18_NOT:%.*]] = icmp eq i64 [[IV]], 63
@@ -91,8 +91,8 @@ define i64 @peel_single_block_loop_iv_step_1_eq_pred() {
 ; CHECK-NEXT:    [[CMP_PEEL:%.*]] = icmp eq i64 [[IV_LCSSA]], 63
 ; CHECK-NEXT:    br i1 [[CMP_PEEL]], label %[[EXIT_PEEL_BEGIN:.*]], label %[[LOOP]], !llvm.loop [[LOOP2:![0-9]+]]
 ; CHECK:       [[EXIT_PEEL_BEGIN]]:
-; CHECK-NEXT:    [[IV_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_LCSSA]], %[[LOOP]] ]
 ; CHECK-NEXT:    [[IV_LCSSA1:%.*]] = phi i64 [ [[IV]], %[[LOOP]] ]
+; CHECK-NEXT:    [[IV_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_LCSSA]], %[[LOOP]] ]
 ; CHECK-NEXT:    br label %[[LOOP_PEEL:.*]]
 ; CHECK:       [[LOOP_PEEL]]:
 ; CHECK-NEXT:    [[CMP_PEEL1:%.*]] = icmp eq i64 [[IV_NEXT_LCSSA]], 63
@@ -170,8 +170,8 @@ define i64 @peel_single_block_loop_iv_step_1_nested_loop() {
 ; CHECK-NEXT:    [[EC:%.*]] = icmp ne i64 [[IV_NEXT]], 63
 ; CHECK-NEXT:    br i1 [[EC]], label %[[LOOP]], label %[[OUTER_LATCH_PEEL_BEGIN:.*]], !llvm.loop [[LOOP3:![0-9]+]]
 ; CHECK:       [[OUTER_LATCH_PEEL_BEGIN]]:
-; CHECK-NEXT:    [[IV_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_NEXT]], %[[LOOP]] ]
 ; CHECK-NEXT:    [[IV_LCSSA:%.*]] = phi i64 [ [[IV]], %[[LOOP]] ]
+; CHECK-NEXT:    [[IV_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_NEXT]], %[[LOOP]] ]
 ; CHECK-NEXT:    br label %[[LOOP_PEEL:.*]]
 ; CHECK:       [[LOOP_PEEL]]:
 ; CHECK-NEXT:    [[CMP_PEEL:%.*]] = icmp eq i64 [[IV_NEXT_LCSSA]], 63
@@ -236,8 +236,8 @@ define i64 @peel_multi_block_loop_iv_step_1() {
 ; CHECK-NEXT:    [[EC:%.*]] = icmp ne i64 [[IV_NEXT]], 63
 ; CHECK-NEXT:    br i1 [[EC]], label %[[LOOP]], label %[[EXIT_PEEL_BEGIN:.*]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       [[EXIT_PEEL_BEGIN]]:
-; CHECK-NEXT:    [[IV_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_NEXT]], %[[LATCH]] ]
 ; CHECK-NEXT:    [[IV_LCSSA:%.*]] = phi i64 [ [[IV]], %[[LATCH]] ]
+; CHECK-NEXT:    [[IV_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_NEXT]], %[[LATCH]] ]
 ; CHECK-NEXT:    br label %[[LOOP_PEEL:.*]]
 ; CHECK:       [[LOOP_PEEL]]:
 ; CHECK-NEXT:    [[CMP_PEEL:%.*]] = icmp eq i64 [[IV_NEXT_LCSSA]], 63
@@ -364,8 +364,8 @@ define i64 @peel_single_block_loop_iv_step_1_btc_1() {
 ; CHECK-NEXT:    [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1
 ; CHECK-NEXT:    br i1 false, label %[[LOOP]], label %[[EXIT_PEEL_BEGIN:.*]], !llvm.loop [[LOOP5:![0-9]+]]
 ; CHECK:       [[EXIT_PEEL_BEGIN]]:
-; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT1]], %[[LOOP]] ]
 ; CHECK-NEXT:    [[IV_LCSSA:%.*]] = phi i64 [ [[IV1]], %[[LOOP]] ]
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT1]], %[[LOOP]] ]
 ; CHECK-NEXT:    br label %[[LOOP_PEEL:.*]]
 ; CHECK:       [[LOOP_PEEL]]:
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[IV]], 1
@@ -483,9 +483,9 @@ define i32 @peel_loop_with_branch_and_phi_uses(ptr %x, i1 %c) {
 ; CHECK-NEXT:    [[EC1:%.*]] = icmp ne i32 [[IV_NEXT1]], 99
 ; CHECK-NEXT:    br i1 [[EC1]], label %[[LOOP_HEADER]], label %[[LOOPEXIT_PEEL_BEGIN:.*]], !llvm.loop [[LOOP6:![0-9]+]]
 ; CHECK:       [[LOOPEXIT_PEEL_BEGIN]]:
-; CHECK-NEXT:    [[RED:%.*]] = phi i32 [ [[ADD1]], %[[LOOP_LATCH]] ]
-; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT1]], %[[LOOP_LATCH]] ]
 ; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD1]], %[[LOOP_LATCH]] ]
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT1]], %[[LOOP_LATCH]] ]
+; CHECK-NEXT:    [[RED:%.*]] = phi i32 [ [[ADD1]], %[[LOOP_LATCH]] ]
 ; CHECK-NEXT:    br label %[[LOOP_HEADER_PEEL:.*]]
 ; CHECK:       [[LOOP_HEADER_PEEL]]:
 ; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[IV]], 99
diff --git a/llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-variable-trip-count.ll b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-variable-trip-count.ll
index 75f3674732f35..92c1e1e47d8bd 100644
--- a/llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-variable-trip-count.ll
+++ b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-variable-trip-count.ll
@@ -9,18 +9,38 @@ define i32 @peel_last_with_trip_count_check_lcssa_phi(i32 %n) {
 ; CHECK-SAME: i32 [[N:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    [[SUB:%.*]] = add i32 [[N]], -1
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ne i32 [[SUB]], 0
+; CHECK-NEXT:    br i1 [[TMP0]], label %[[ENTRY_SPLIT:.*]], label %[[EXIT_PEEL_BEGIN:.*]]
+; CHECK:       [[ENTRY_SPLIT]]:
 ; CHECK-NEXT:    br label %[[LOOP:.*]]
 ; CHECK:       [[LOOP]]:
-; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[IV1:%.*]] = phi i32 [ 0, %[[ENTRY_SPLIT]] ], [ [[IV_NEXT1:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    call void @foo(i32 2)
+; CHECK-NEXT:    [[IV_NEXT1]] = add nuw i32 [[IV1]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[N]], 1
+; CHECK-NEXT:    [[EC1:%.*]] = icmp ne i32 [[IV_NEXT1]], [[TMP1]]
+; CHECK-NEXT:    br i1 [[EC1]], label %[[LOOP]], label %[[EXIT_PEEL_BEGIN_LOOPEXIT:.*]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       [[EXIT_PEEL_BEGIN_LOOPEXIT]]:
+; CHECK-NEXT:    [[SEL_LCSSA_PH:%.*]] = phi i32 [ 2, %[[LOOP]] ]
+; CHECK-NEXT:    [[DOTPH:%.*]] = phi i32 [ [[IV_NEXT1]], %[[LOOP]] ]
+; CHECK-NEXT:    br label %[[EXIT_PEEL_BEGIN]]
+; CHECK:       [[EXIT_PEEL_BEGIN]]:
+; CHECK-NEXT:    [[SEL_LCSSA:%.*]] = phi i32 [ poison, %[[ENTRY]] ], [ [[SEL_LCSSA_PH]], %[[EXIT_PEEL_BEGIN_LOOPEXIT]] ]
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[DOTPH]], %[[EXIT_PEEL_BEGIN_LOOPEXIT]] ]
+; CHECK-NEXT:    br label %[[LOOP_PEEL:.*]]
+; CHECK:       [[LOOP_PEEL]]:
 ; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[IV]], [[SUB]]
 ; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[C]], i32 1, i32 2
 ; CHECK-NEXT:    call void @foo(i32 [[SEL]])
-; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT:    [[IV_NEXT:%.*]] = add i32 [[IV]], 1
 ; CHECK-NEXT:    [[EC:%.*]] = icmp ne i32 [[IV_NEXT]], [[N]]
-; CHECK-NEXT:    br i1 [[EC]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK-NEXT:    br i1 [[EC]], label %[[EXIT_PEEL_NEXT:.*]], label %[[EXIT_PEEL_NEXT]]
+; CHECK:       [[EXIT_PEEL_NEXT]]:
+; CHECK-NEXT:    br label %[[LOOP_PEEL_NEXT:.*]]
+; CHECK:       [[LOOP_PEEL_NEXT]]:
+; CHECK-NEXT:    br label %[[EXIT:.*]]
 ; CHECK:       [[EXIT]]:
-; CHECK-NEXT:    [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ]
-; CHECK-NEXT:    ret i32 [[SEL_LCSSA]]
+; CHECK-NEXT:    ret i32 [[SEL]]
 ;
 entry:
   %sub = add i32 %n, -1
@@ -160,21 +180,44 @@ define void @peel_last_with_trip_count_check_nested_loop(i32 %n) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[SUB:%.*]] = add i32 [[N]], -1
 ; CHECK-NEXT:    br label %[[OUTER_HEADER:.*]]
-; CHECK:       [[OUTER_HEADER_LOOPEXIT:.*]]:
+; CHECK:       [[OUTER_HEADER_LOOPEXIT_PEEL_BEGIN_LOOPEXIT:.*]]:
+; CHECK-NEXT:    [[DOTPH:%.*]] = phi i32 [ [[IV_NEXT1:%.*]], %[[INNER_LATCH:.*]] ]
+; CHECK-NEXT:    br label %[[OUTER_HEADER_LOOPEXIT_PEEL_BEGIN:.*]]
+; CHECK:       [[OUTER_HEADER_LOOPEXIT_PEEL_BEGIN]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, %[[OUTER_HEADER]] ], [ [[DOTPH]], %[[OUTER_HEADER_LOOPEXIT_PEEL_BEGIN_LOOPEXIT]] ]
+; CHECK-NEXT:    br label %[[INNER_HEADER_PEEL:.*]]
+; CHECK:       [[INNER_HEADER_PEEL]]:
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[IV]], [[SUB]]
+; CHECK-NEXT:    br i1 [[C]], label %[[INNER_LATCH_PEEL:.*]], label %[[THEN_PEEL:.*]]
+; CHECK:       [[THEN_PEEL]]:
+; CHECK-NEXT:    call void @foo(i32 1)
+; CHECK-NEXT:    br label %[[INNER_LATCH_PEEL]]
+; CHECK:       [[INNER_LATCH_PEEL]]:
+; CHECK-NEXT:    [[IV_NEXT:%.*]] = add i32 [[IV]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label %[[OUTER_HEADER_LOOPEXIT_PEEL_NEXT:.*]], label %[[OUTER_HEADER_LOOPEXIT_PEEL_NEXT]]
+; CHECK:       [[OUTER_HEADER_LOOPEXIT_PEEL_NEXT]]:
+; CHECK-NEXT:    br label %[[INNER_HEADER_PEEL_NEXT:.*]]
+; CHECK:       [[INNER_HEADER_PEEL_NEXT]]:
+; CHECK-NEXT:    br label %[[OUTER_HEADER_LOOPEXIT:.*]]
+; CHECK:       [[OUTER_HEADER_LOOPEXIT]]:
 ; CHECK-NEXT:    br label %[[OUTER_HEADER]]
 ; CHECK:       [[OUTER_HEADER]]:
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i32 [[SUB]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[OUTER_HEADER_SPLIT:.*]], label %[[OUTER_HEADER_LOOPEXIT_PEEL_BEGIN]]
+; CHECK:       [[OUTER_HEADER_SPLIT]]:
 ; CHECK-NEXT:    br label %[[INNER_HEADER:.*]]
 ; CHECK:       [[INNER_HEADER]]:
-; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, %[[OUTER_HEADER]] ], [ [[IV_NEXT:%.*]], %[[INNER_LATCH:.*]] ]
-; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[IV]], [[SUB]]
-; CHECK-NEXT:    br i1 [[C]], label %[[INNER_LATCH]], label %[[THEN:.*]]
+; CHECK-NEXT:    [[IV1:%.*]] = phi i32 [ 0, %[[OUTER_HEADER_SPLIT]] ], [ [[IV_NEXT1]], %[[INNER_LATCH]] ]
+; CHECK-NEXT:    br i1 false, label %[[INNER_LATCH]], label %[[THEN:.*]]
 ; CHECK:       [[THEN]]:
 ; CHECK-NEXT:    call void @foo(i32 1)
 ; CHECK-NEXT:    br label %[[INNER_LATCH]]
 ; CHECK:       [[INNER_LATCH]]:
-; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
-; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]]
-; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label %[[OUTER_HEADER_LOOPEXIT]], label %[[INNER_HEADER]]
+; CHECK-NEXT:    [[IV_NEXT1]] = add nuw i32 [[IV1]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 [[N]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT1:%.*]] = icmp eq i32 [[IV_NEXT1]], [[TMP2]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT1]], label %[[OUTER_HEADER_LOOPEXIT_PEEL_BEGIN_LOOPEXIT]], label %[[INNER_HEADER]], !llvm.loop [[LOOP2:![0-9]+]]
 ;
 entry:
   %sub = add i32 %n, -1
@@ -197,3 +240,8 @@ inner.latch:
   %exitcond.not = icmp eq i32 %iv.next, %n
   br i1 %exitcond.not, label %outer.header, label %inner.header
 }
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.peeled.count", i32 1}
+; CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[META1]]}
+;.
diff --git a/llvm/test/Transforms/LoopUnroll/unroll-and-peel-last-iteration.ll b/llvm/test/Transforms/LoopUnroll/unroll-and-peel-last-iteration.ll
index f57fb2d9b7057..e36b834969c87 100644
--- a/llvm/test/Transforms/LoopUnroll/unroll-and-peel-last-iteration.ll
+++ b/llvm/test/Transforms/LoopUnroll/unroll-and-peel-last-iteration.ll
@@ -19,8 +19,8 @@ define i32 @peel_last_iter_of_outer_lcssa_phi_with_constant_after_unrolling_inne
 ; CHECK-NEXT:    [[EC:%.*]] = icmp eq i16 [[IV_NEXT]], 999
 ; CHECK-NEXT:    br i1 [[EC]], label %[[EXIT_PEEL_BEGIN:.*]], label %[[OUTER_HEADER]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       [[EXIT_PEEL_BEGIN]]:
-; CHECK-NEXT:    [[IV_NEXT_LCSSA:%.*]] = phi i16 [ [[IV_NEXT]], %[[INNER_LATCH]] ]
 ; CHECK-NEXT:    [[DOTLCSSA_LCSSA:%.*]] = phi i32 [ 1, %[[INNER_LATCH]] ]
+; CHECK-NEXT:    [[IV_NEXT_LCSSA:%.*]] = phi i16 [ [[IV_NEXT]], %[[INNER_LATCH]] ]
 ; CHECK-NEXT:    br label %[[OUTER_HEADER_PEEL:.*]]
 ; CHECK:       [[OUTER_HEADER_PEEL]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = icmp eq i16 [[IV_NEXT_LCSSA]], 999

>From 307783ca5f753e5144bb0ff674742af664db15bb Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Wed, 21 May 2025 18:19:53 +0100
Subject: [PATCH 2/3] !fixup address comments, thanks

---
 llvm/include/llvm/Transforms/Utils/LoopPeel.h |  3 +-
 llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp |  2 +-
 llvm/lib/Transforms/Utils/LoopPeel.cpp        | 45 ++++++++++---------
 .../peel-last-iteration-expansion-cost.ll     | 38 +++++++++++++---
 ...last-iteration-with-constant-trip-count.ll |  6 ---
 ...last-iteration-with-variable-trip-count.ll |  2 -
 .../unroll-and-peel-last-iteration.ll         |  1 -
 7 files changed, 60 insertions(+), 37 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Utils/LoopPeel.h b/llvm/include/llvm/Transforms/Utils/LoopPeel.h
index dd59a9c766e45..49dbc9aa1f2a9 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopPeel.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopPeel.h
@@ -44,7 +44,8 @@ gatherPeelingPreferences(Loop *L, ScalarEvolution &SE,
 void computePeelCount(Loop *L, unsigned LoopSize,
                       TargetTransformInfo::PeelingPreferences &PP,
                       unsigned TripCount, DominatorTree &DT,
-                      ScalarEvolution &SE, AssumptionCache *AC = nullptr,
+                      ScalarEvolution &SE, const TargetTransformInfo &TTI,
+                      AssumptionCache *AC = nullptr,
                       unsigned Threshold = UINT_MAX);
 
 } // end namespace llvm
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 0b9fee5727c6f..afa7abfea419e 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -1014,7 +1014,7 @@ bool llvm::computeUnrollCount(
   }
 
   // 5th priority is loop peeling.
-  computePeelCount(L, LoopSize, PP, TripCount, DT, SE, AC, UP.Threshold);
+  computePeelCount(L, LoopSize, PP, TripCount, DT, SE, TTI, AC, UP.Threshold);
   if (PP.PeelCount) {
     UP.Runtime = false;
     UP.Count = 1;
diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp
index 4422f7cd9480b..2f5d815db56ce 100644
--- a/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -357,12 +357,17 @@ bool llvm::canPeelLastIteration(const Loop &L, ScalarEvolution &SE) {
 /// is known at the second-to-last.
 static bool shouldPeelLastIteration(Loop &L, CmpPredicate Pred,
                                     const SCEVAddRecExpr *LeftAR,
-                                    const SCEV *RightSCEV,
-                                    ScalarEvolution &SE) {
+                                    const SCEV *RightSCEV, ScalarEvolution &SE,
+                                    const TargetTransformInfo &TTI) {
   if (!canPeelLastIteration(L, SE))
     return false;
 
   const SCEV *BTC = SE.getBackedgeTakenCount(&L);
+  SCEVExpander Expander(SE, L.getHeader()->getDataLayout(), "loop-peel");
+  if (!SE.isKnownNonZero(BTC) && Expander.isHighCostExpansion(BTC, &L, SCEVCheapExpansionBudget, &TTI,
+                                   L.getLoopPredecessor()->getTerminator()))
+    return false;
+
   const SCEV *ValAtLastIter = LeftAR->evaluateAtIteration(BTC, SE);
   const SCEV *ValAtSecondToLastIter = LeftAR->evaluateAtIteration(
       SE.getMinusSCEV(BTC, SE.getOne(BTC->getType())), SE);
@@ -384,7 +389,8 @@ static bool shouldPeelLastIteration(Loop &L, CmpPredicate Pred,
 //      ..
 //   }
 static std::pair<unsigned, unsigned>
-countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE) {
+countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE,
+                         const TargetTransformInfo &TTI) {
   assert(L.isLoopSimplifyForm() && "Loop needs to be in loop simplify form");
   unsigned DesiredPeelCount = 0;
   unsigned DesiredPeelCountLast = 0;
@@ -472,7 +478,7 @@ countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE) {
     const SCEV *Step = LeftAR->getStepRecurrence(SE);
     if (!PeelWhilePredicateIsKnown(NewPeelCount, IterVal, RightSCEV, Step,
                                    Pred)) {
-      if (shouldPeelLastIteration(L, Pred, LeftAR, RightSCEV, SE))
+      if (shouldPeelLastIteration(L, Pred, LeftAR, RightSCEV, SE, TTI))
         DesiredPeelCountLast = 1;
       return;
     }
@@ -586,8 +592,8 @@ static bool violatesLegacyMultiExitLoopCheck(Loop *L) {
 void llvm::computePeelCount(Loop *L, unsigned LoopSize,
                             TargetTransformInfo::PeelingPreferences &PP,
                             unsigned TripCount, DominatorTree &DT,
-                            ScalarEvolution &SE, AssumptionCache *AC,
-                            unsigned Threshold) {
+                            ScalarEvolution &SE, const TargetTransformInfo &TTI,
+                            AssumptionCache *AC, unsigned Threshold) {
   assert(LoopSize > 0 && "Zero loop size is not allowed!");
   // Save the PP.PeelCount value set by the target in
   // TTI.getPeelingPreferences or by the flag -unroll-peel-count.
@@ -649,7 +655,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
   }
 
   const auto &[CountToEliminateCmps, CountToEliminateCmpsLast] =
-      countToEliminateCompares(*L, MaxPeelCount, SE);
+      countToEliminateCompares(*L, MaxPeelCount, SE, TTI);
   DesiredPeelCount = std::max(DesiredPeelCount, CountToEliminateCmps);
 
   if (DesiredPeelCount == 0)
@@ -907,9 +913,10 @@ static void cloneLoopBlocks(
   // loop iteration. Since this copy is no longer part of the loop, we
   // resolve this statically:
   if (PeelLast) {
-    // For the last iteration, we use the value from the latch of the original
-    // loop directly.
-    //
+    // For the last iteration, we introduce new phis for each header phi in
+    // InsertTop, using the incoming value from the preheader for the original
+    // preheader (when skipping the main loop) and the incoming value from the
+    // latch for the latch (when continuing from the main loop).
     IRBuilder<> B(InsertTop->getTerminator());
     for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
       PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
@@ -1096,9 +1103,9 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
     NewPreHeader = nullptr;
 
     // If the original loop may only execute a single iteration we need to
-    // insert a trip count check and skip the peeled loop if necessary.
-    if (!SE->isKnownPredicate(CmpInst::ICMP_UGT, BTC,
-                              SE->getZero(BTC->getType()))) {
+    // insert a trip count check and skip the original loop with the last
+    // iteration peeled off if necessary.
+    if (!SE->isKnownNonZero(BTC)) {
       NewPreHeader = SplitEdge(PreHeader, Header, &DT, LI);
       SCEVExpander Expander(*SE, Latch->getDataLayout(), "loop-peel");
 
@@ -1113,12 +1120,6 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
 
       // PreHeader now dominates InsertTop.
       DT.changeImmediateDominator(InsertTop, PreHeader);
-
-      // If we branch from PreHeader to InsertTop, we are guaranteed to execute
-      // the peeled iteration, so the exit values from the original loop are
-      // dead. Use poison for them.
-      for (auto &PN : InsertTop->phis())
-        PN.addIncoming(PoisonValue::get(PN.getType()), PreHeader);
     }
   } else {
     // It is convenient to split the preheader into 3 parts - two blocks to
@@ -1248,9 +1249,11 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
 
   if (PeelLast) {
     // Now adjust users of the original exit values by replacing them with the
-    // exit value from the peeled iteration.
-    for (const auto &[P, E] : ExitValues)
+    // exit value from the peeled iteration and remove them.
+    for (const auto &[P, E] : ExitValues) {
       P->replaceAllUsesWith(isa<Constant>(E) ? E : &*VMap.lookup(E));
+      P->eraseFromParent();
+    }
     formLCSSA(*L, DT, LI, SE);
   } else {
     // Now adjust the phi nodes in the loop header to get their initial values
diff --git a/llvm/test/Transforms/LoopUnroll/peel-last-iteration-expansion-cost.ll b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-expansion-cost.ll
index 342c70170fef3..f3910f9bfc399 100644
--- a/llvm/test/Transforms/LoopUnroll/peel-last-iteration-expansion-cost.ll
+++ b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-expansion-cost.ll
@@ -25,17 +25,41 @@ define i32 @test_expansion_cost_2(i32 %start, i32 %end) {
 ; BUDGET3-SAME: i32 [[START:%.*]], i32 [[END:%.*]]) {
 ; BUDGET3-NEXT:  [[ENTRY:.*]]:
 ; BUDGET3-NEXT:    [[SUB:%.*]] = add i32 [[END]], -1
+; BUDGET3-NEXT:    [[TMP0:%.*]] = sub i32 [[SUB]], [[START]]
+; BUDGET3-NEXT:    [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0
+; BUDGET3-NEXT:    br i1 [[TMP1]], label %[[ENTRY_SPLIT:.*]], label %[[EXIT_PEEL_BEGIN:.*]]
+; BUDGET3:       [[ENTRY_SPLIT]]:
 ; BUDGET3-NEXT:    br label %[[LOOP_HEADER:.*]]
 ; BUDGET3:       [[LOOP_HEADER]]:
-; BUDGET3-NEXT:    [[TMP3:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[IV_NEXT_PEEL:%.*]], %[[LOOP_LATCH:.*]] ]
-; BUDGET3-NEXT:    [[C_PEEL:%.*]] = icmp eq i32 [[TMP3]], [[SUB]]
-; BUDGET3-NEXT:    br i1 [[C_PEEL]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
+; BUDGET3-NEXT:    [[IV:%.*]] = phi i32 [ [[START]], %[[ENTRY_SPLIT]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; BUDGET3-NEXT:    [[C:%.*]] = icmp eq i32 [[IV]], [[SUB]]
+; BUDGET3-NEXT:    br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
 ; BUDGET3:       [[THEN]]:
 ; BUDGET3-NEXT:    br label %[[LOOP_LATCH]]
 ; BUDGET3:       [[LOOP_LATCH]]:
-; BUDGET3-NEXT:    [[IV_NEXT_PEEL]] = add nsw i32 [[TMP3]], 1
+; BUDGET3-NEXT:    [[IV_NEXT]] = add nsw i32 [[IV]], 1
+; BUDGET3-NEXT:    [[TMP2:%.*]] = sub i32 [[END]], 1
+; BUDGET3-NEXT:    [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[TMP2]]
+; BUDGET3-NEXT:    br i1 [[EC]], label %[[EXIT_PEEL_BEGIN_LOOPEXIT:.*]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP0:![0-9]+]]
+; BUDGET3:       [[EXIT_PEEL_BEGIN_LOOPEXIT]]:
+; BUDGET3-NEXT:    [[DOTPH:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP_LATCH]] ]
+; BUDGET3-NEXT:    br label %[[EXIT_PEEL_BEGIN]]
+; BUDGET3:       [[EXIT_PEEL_BEGIN]]:
+; BUDGET3-NEXT:    [[TMP3:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[DOTPH]], %[[EXIT_PEEL_BEGIN_LOOPEXIT]] ]
+; BUDGET3-NEXT:    br label %[[LOOP_HEADER_PEEL:.*]]
+; BUDGET3:       [[LOOP_HEADER_PEEL]]:
+; BUDGET3-NEXT:    [[C_PEEL:%.*]] = icmp eq i32 [[TMP3]], [[SUB]]
+; BUDGET3-NEXT:    br i1 [[C_PEEL]], label %[[THEN_PEEL:.*]], label %[[LOOP_LATCH_PEEL:.*]]
+; BUDGET3:       [[THEN_PEEL]]:
+; BUDGET3-NEXT:    br label %[[LOOP_LATCH_PEEL]]
+; BUDGET3:       [[LOOP_LATCH_PEEL]]:
+; BUDGET3-NEXT:    [[IV_NEXT_PEEL:%.*]] = add nsw i32 [[TMP3]], 1
 ; BUDGET3-NEXT:    [[EC_PEEL:%.*]] = icmp eq i32 [[IV_NEXT_PEEL]], [[END]]
-; BUDGET3-NEXT:    br i1 [[EC_PEEL]], label %[[EXIT:.*]], label %[[LOOP_HEADER]]
+; BUDGET3-NEXT:    br i1 [[EC_PEEL]], label %[[EXIT_PEEL_NEXT:.*]], label %[[EXIT_PEEL_NEXT]]
+; BUDGET3:       [[EXIT_PEEL_NEXT]]:
+; BUDGET3-NEXT:    br label %[[LOOP_HEADER_PEEL_NEXT:.*]]
+; BUDGET3:       [[LOOP_HEADER_PEEL_NEXT]]:
+; BUDGET3-NEXT:    br label %[[EXIT:.*]]
 ; BUDGET3:       [[EXIT]]:
 ; BUDGET3-NEXT:    ret i32 0
 ;
@@ -59,3 +83,7 @@ loop.latch:
 exit:
   ret i32 0
 }
+;.
+; BUDGET3: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
+; BUDGET3: [[META1]] = !{!"llvm.loop.peeled.count", i32 1}
+;.
diff --git a/llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-constant-trip-count.ll b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-constant-trip-count.ll
index 2e724e748aa67..5c9b2fa6a9d60 100644
--- a/llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-constant-trip-count.ll
+++ b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-constant-trip-count.ll
@@ -12,7 +12,6 @@ define i64 @peel_single_block_loop_iv_step_1() {
 ; CHECK-NEXT:    [[EC1:%.*]] = icmp ne i64 [[IV_NEXT1]], 63
 ; CHECK-NEXT:    br i1 [[EC1]], label %[[LOOP]], label %[[EXIT_PEEL_BEGIN:.*]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       [[EXIT_PEEL_BEGIN]]:
-; CHECK-NEXT:    [[IV_LCSSA:%.*]] = phi i64 [ [[IV1]], %[[LOOP]] ]
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT1]], %[[LOOP]] ]
 ; CHECK-NEXT:    br label %[[LOOP_PEEL:.*]]
 ; CHECK:       [[LOOP_PEEL]]:
@@ -91,7 +90,6 @@ define i64 @peel_single_block_loop_iv_step_1_eq_pred() {
 ; CHECK-NEXT:    [[CMP_PEEL:%.*]] = icmp eq i64 [[IV_LCSSA]], 63
 ; CHECK-NEXT:    br i1 [[CMP_PEEL]], label %[[EXIT_PEEL_BEGIN:.*]], label %[[LOOP]], !llvm.loop [[LOOP2:![0-9]+]]
 ; CHECK:       [[EXIT_PEEL_BEGIN]]:
-; CHECK-NEXT:    [[IV_LCSSA1:%.*]] = phi i64 [ [[IV]], %[[LOOP]] ]
 ; CHECK-NEXT:    [[IV_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_LCSSA]], %[[LOOP]] ]
 ; CHECK-NEXT:    br label %[[LOOP_PEEL:.*]]
 ; CHECK:       [[LOOP_PEEL]]:
@@ -170,7 +168,6 @@ define i64 @peel_single_block_loop_iv_step_1_nested_loop() {
 ; CHECK-NEXT:    [[EC:%.*]] = icmp ne i64 [[IV_NEXT]], 63
 ; CHECK-NEXT:    br i1 [[EC]], label %[[LOOP]], label %[[OUTER_LATCH_PEEL_BEGIN:.*]], !llvm.loop [[LOOP3:![0-9]+]]
 ; CHECK:       [[OUTER_LATCH_PEEL_BEGIN]]:
-; CHECK-NEXT:    [[IV_LCSSA:%.*]] = phi i64 [ [[IV]], %[[LOOP]] ]
 ; CHECK-NEXT:    [[IV_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_NEXT]], %[[LOOP]] ]
 ; CHECK-NEXT:    br label %[[LOOP_PEEL:.*]]
 ; CHECK:       [[LOOP_PEEL]]:
@@ -236,7 +233,6 @@ define i64 @peel_multi_block_loop_iv_step_1() {
 ; CHECK-NEXT:    [[EC:%.*]] = icmp ne i64 [[IV_NEXT]], 63
 ; CHECK-NEXT:    br i1 [[EC]], label %[[LOOP]], label %[[EXIT_PEEL_BEGIN:.*]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       [[EXIT_PEEL_BEGIN]]:
-; CHECK-NEXT:    [[IV_LCSSA:%.*]] = phi i64 [ [[IV]], %[[LATCH]] ]
 ; CHECK-NEXT:    [[IV_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_NEXT]], %[[LATCH]] ]
 ; CHECK-NEXT:    br label %[[LOOP_PEEL:.*]]
 ; CHECK:       [[LOOP_PEEL]]:
@@ -364,7 +360,6 @@ define i64 @peel_single_block_loop_iv_step_1_btc_1() {
 ; CHECK-NEXT:    [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1
 ; CHECK-NEXT:    br i1 false, label %[[LOOP]], label %[[EXIT_PEEL_BEGIN:.*]], !llvm.loop [[LOOP5:![0-9]+]]
 ; CHECK:       [[EXIT_PEEL_BEGIN]]:
-; CHECK-NEXT:    [[IV_LCSSA:%.*]] = phi i64 [ [[IV1]], %[[LOOP]] ]
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT1]], %[[LOOP]] ]
 ; CHECK-NEXT:    br label %[[LOOP_PEEL:.*]]
 ; CHECK:       [[LOOP_PEEL]]:
@@ -483,7 +478,6 @@ define i32 @peel_loop_with_branch_and_phi_uses(ptr %x, i1 %c) {
 ; CHECK-NEXT:    [[EC1:%.*]] = icmp ne i32 [[IV_NEXT1]], 99
 ; CHECK-NEXT:    br i1 [[EC1]], label %[[LOOP_HEADER]], label %[[LOOPEXIT_PEEL_BEGIN:.*]], !llvm.loop [[LOOP6:![0-9]+]]
 ; CHECK:       [[LOOPEXIT_PEEL_BEGIN]]:
-; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD1]], %[[LOOP_LATCH]] ]
 ; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT1]], %[[LOOP_LATCH]] ]
 ; CHECK-NEXT:    [[RED:%.*]] = phi i32 [ [[ADD1]], %[[LOOP_LATCH]] ]
 ; CHECK-NEXT:    br label %[[LOOP_HEADER_PEEL:.*]]
diff --git a/llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-variable-trip-count.ll b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-variable-trip-count.ll
index 92c1e1e47d8bd..a8712e04d43c1 100644
--- a/llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-variable-trip-count.ll
+++ b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-variable-trip-count.ll
@@ -21,11 +21,9 @@ define i32 @peel_last_with_trip_count_check_lcssa_phi(i32 %n) {
 ; CHECK-NEXT:    [[EC1:%.*]] = icmp ne i32 [[IV_NEXT1]], [[TMP1]]
 ; CHECK-NEXT:    br i1 [[EC1]], label %[[LOOP]], label %[[EXIT_PEEL_BEGIN_LOOPEXIT:.*]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       [[EXIT_PEEL_BEGIN_LOOPEXIT]]:
-; CHECK-NEXT:    [[SEL_LCSSA_PH:%.*]] = phi i32 [ 2, %[[LOOP]] ]
 ; CHECK-NEXT:    [[DOTPH:%.*]] = phi i32 [ [[IV_NEXT1]], %[[LOOP]] ]
 ; CHECK-NEXT:    br label %[[EXIT_PEEL_BEGIN]]
 ; CHECK:       [[EXIT_PEEL_BEGIN]]:
-; CHECK-NEXT:    [[SEL_LCSSA:%.*]] = phi i32 [ poison, %[[ENTRY]] ], [ [[SEL_LCSSA_PH]], %[[EXIT_PEEL_BEGIN_LOOPEXIT]] ]
 ; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[DOTPH]], %[[EXIT_PEEL_BEGIN_LOOPEXIT]] ]
 ; CHECK-NEXT:    br label %[[LOOP_PEEL:.*]]
 ; CHECK:       [[LOOP_PEEL]]:
diff --git a/llvm/test/Transforms/LoopUnroll/unroll-and-peel-last-iteration.ll b/llvm/test/Transforms/LoopUnroll/unroll-and-peel-last-iteration.ll
index e36b834969c87..7d38c18d10667 100644
--- a/llvm/test/Transforms/LoopUnroll/unroll-and-peel-last-iteration.ll
+++ b/llvm/test/Transforms/LoopUnroll/unroll-and-peel-last-iteration.ll
@@ -19,7 +19,6 @@ define i32 @peel_last_iter_of_outer_lcssa_phi_with_constant_after_unrolling_inne
 ; CHECK-NEXT:    [[EC:%.*]] = icmp eq i16 [[IV_NEXT]], 999
 ; CHECK-NEXT:    br i1 [[EC]], label %[[EXIT_PEEL_BEGIN:.*]], label %[[OUTER_HEADER]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       [[EXIT_PEEL_BEGIN]]:
-; CHECK-NEXT:    [[DOTLCSSA_LCSSA:%.*]] = phi i32 [ 1, %[[INNER_LATCH]] ]
 ; CHECK-NEXT:    [[IV_NEXT_LCSSA:%.*]] = phi i16 [ [[IV_NEXT]], %[[INNER_LATCH]] ]
 ; CHECK-NEXT:    br label %[[OUTER_HEADER_PEEL:.*]]
 ; CHECK:       [[OUTER_HEADER_PEEL]]:

>From d552bc6af3e23d8b48a588677de26ac89f7a3197 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Fri, 23 May 2025 17:02:27 +0100
Subject: [PATCH 3/3] !fixup fix formatting

---
 llvm/lib/Transforms/Utils/LoopPeel.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp
index 2f5d815db56ce..9062a14bcaf6c 100644
--- a/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -364,7 +364,8 @@ static bool shouldPeelLastIteration(Loop &L, CmpPredicate Pred,
 
   const SCEV *BTC = SE.getBackedgeTakenCount(&L);
   SCEVExpander Expander(SE, L.getHeader()->getDataLayout(), "loop-peel");
-  if (!SE.isKnownNonZero(BTC) && Expander.isHighCostExpansion(BTC, &L, SCEVCheapExpansionBudget, &TTI,
+  if (!SE.isKnownNonZero(BTC) &&
+      Expander.isHighCostExpansion(BTC, &L, SCEVCheapExpansionBudget, &TTI,
                                    L.getLoopPredecessor()->getTerminator()))
     return false;
 



More information about the llvm-commits mailing list