[llvm] [LoopPeel] Peel to make Phis loop inductions (PR #121104)

Ryotaro Kasuga via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 24 04:43:26 PDT 2025


https://github.com/kasuga-fj updated https://github.com/llvm/llvm-project/pull/121104

>From 41fe38a27cfecf311f1be74a12a77879813340f6 Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Fri, 28 Feb 2025 12:15:43 +0000
Subject: [PATCH 01/10] [LoopPeel] Peel to make Phis loop inductions

LoopPeel now only handles Phis when they become loop invariants by
peeling. There are cases where peeling makes Phis loop invariants, and
peeling in such cases is also useful for other optimizations, such as
loop vectorization. For example, consider the following loops.

```
int im = N-1;
for (int i=0;i<N;i++) {
  a[i] = b[i]+b[im];
  im = i;
}
```

In this case, peeling by 1 iteration makes `im` a loop induction, so we
can vectorize the loop.
This patch allows to vectorize the kernel of s291 and s292 in TSVC. I
have measured on neoverse-v2 and  observed a speedup of more than 60%
(options: `-O3 -ffast-math -mcpu=neoverse-v2`).
Note that in some cases there was unnecessary peeling when tried with
llvm-test-suite. The causes include peeling for a remainder loop of
vectorization and the limitations of analysis by SCEV. However, as far
as I've tried, these unnecessary peels do not affect performance.
---
 llvm/lib/Transforms/Utils/LoopPeel.cpp        | 142 +++++++++--
 .../LoopUnroll/peel-loop-phi-analysis.ll      | 220 ++++++++++++++++++
 2 files changed, 342 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp
index 9a24c1b0d03de..fe875b5126b69 100644
--- a/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -13,6 +13,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/IVDescriptors.h"
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopIterator.h"
@@ -151,6 +152,32 @@ namespace {
 // corresponding calls to g are determined and the code for computing
 // x, y, and a can be removed.
 //
+// Similarly, there are cases where peeling makes Phi nodes loop-inductions
+// (i.e., the value is increased or decreased by a fixed amount on every
+// iteration). For example, consider the following function.
+//
+//   #define N 100
+//   void f(int a[], int b[]) {
+//     int im = N - 1;
+//     for (int i = 0; i < N; i++) {
+//       a[i] = b[i] + b[im];
+//       im = i;
+//     }
+//   }
+//
+// The IR of the loop will look something like the following.
+//
+//   %i = phi i32 [ 0, %entry ], [ %i.next, %for.body ]
+//   %im = phi i32 [ 99, %entry ], [ %i, %for.body ]
+//   ...
+//   %i.next = add nuw nsw i32 %i, 1
+//   ...
+//
+// In this case, %im becomes a loop-induction variable by peeling 1 iteration,
+// because %i is a loop-induction one. The peeling count can be determined by
+// the same algorithm with loop-invariant case. Such peeling is profitable for
+// loop-vectorization.
+//
 // The PhiAnalyzer class calculates how many times a loop should be
 // peeled based on the above analysis of the phi nodes in the loop while
 // respecting the maximum specified.
@@ -177,11 +204,15 @@ class PhiAnalyzer {
   // becomes an invariant.
   PeelCounter calculate(const Value &);
 
+  // Returns true if the \p Phi is an induction in the target loop. This
+  // function is a wrapper of `InductionDescriptor::isInductionPHI`.
+  bool isInductionPHI(const PHINode *Phi) const;
+
   const Loop &L;
   const unsigned MaxIterations;
 
-  // Map of Values to number of iterations to invariance
-  SmallDenseMap<const Value *, PeelCounter> IterationsToInvariance;
+  // Map of Values to number of iterations to invariance or induction
+  SmallDenseMap<const Value *, PeelCounter> IterationsToInvarianceOrInduction;
 };
 
 PhiAnalyzer::PhiAnalyzer(const Loop &L, unsigned MaxIterations)
@@ -190,6 +221,67 @@ PhiAnalyzer::PhiAnalyzer(const Loop &L, unsigned MaxIterations)
   assert(MaxIterations > 0 && "no peeling is allowed?");
 }
 
+// Test if \p Phi is induction variable or not. It can be checked by using SCEV,
+// but it's expensive to calculate it here. Instead, we perform the cheaper
+// checks, which cannot detect complex one but enough for some cases.
+bool PhiAnalyzer::isInductionPHI(const PHINode *Phi) const {
+  // Currently, we only support loops that consist of one basic block. In this
+  // case, the phi can become an IV if it has an incoming value from the basic
+  // block that this phi is also included.
+  int LoopIdx = -1;
+  for (unsigned I = 0; I != Phi->getNumIncomingValues(); I++) {
+    if (Phi->getIncomingBlock(I) == Phi->getParent()) {
+      LoopIdx = I;
+      break;
+    }
+  }
+  if (LoopIdx == -1)
+    return false;
+
+  Value *Cur = Phi->getIncomingValue(LoopIdx);
+  SmallPtrSet<Value *, 4> Visited;
+  bool VisitBinOp = false;
+
+  // Start at the incoming value of the phi and follow definitions. We consider
+  // the phi to be an IV if we can return to it again by traversing only add,
+  // sub, or cast instructions.
+  while (true) {
+    if (Cur == Phi)
+      break;
+
+    // Avoid infinite loop.
+    if (Visited.contains(Cur))
+      return false;
+
+    Instruction *I = dyn_cast<Instruction>(Cur);
+    if (!I || I->getParent() != Phi->getParent())
+      return false;
+
+    Visited.insert(Cur);
+
+    if (auto *Cast = dyn_cast<CastInst>(I)) {
+      Cur = Cast->getOperand(0);
+    } else if (auto *BinOp = dyn_cast<BinaryOperator>(I)) {
+      if (BinOp->getOpcode() != Instruction::Add &&
+          BinOp->getOpcode() != Instruction::Sub)
+        return false;
+      if (!BinOp->hasNoUnsignedWrap() || !BinOp->hasNoSignedWrap())
+        return false;
+      if (!isa<ConstantInt>(BinOp->getOperand(1)))
+        return false;
+
+      VisitBinOp = true;
+      Cur = BinOp->getOperand(0);
+    } else {
+      return false;
+    }
+  }
+
+  // If there are only cast instructions, the phi is not an IV. Return false in
+  // this case.
+  return VisitBinOp;
+}
+
 // This function calculates the number of iterations after which the value
 // becomes an invariant. The pre-calculated values are memorized in a map.
 // N.B. This number will be Unknown or <= MaxIterations.
@@ -208,25 +300,32 @@ PhiAnalyzer::PeelCounter PhiAnalyzer::calculate(const Value &V) {
   // If we already know the answer, take it from the map.
   // Otherwise, place Unknown to map to avoid infinite recursion. Such
   // cycles can never stop on an invariant.
-  auto [I, Inserted] = IterationsToInvariance.try_emplace(&V, Unknown);
+  auto [I, Inserted] =
+      IterationsToInvarianceOrInduction.try_emplace(&V, Unknown);
   if (!Inserted)
     return I->second;
 
   if (L.isLoopInvariant(&V))
     // Loop invariant so known at start.
-    return (IterationsToInvariance[&V] = 0);
+    return (IterationsToInvarianceOrInduction[&V] = 0);
   if (const PHINode *Phi = dyn_cast<PHINode>(&V)) {
     if (Phi->getParent() != L.getHeader()) {
       // Phi is not in header block so Unknown.
-      assert(IterationsToInvariance[&V] == Unknown && "unexpected value saved");
+      assert(IterationsToInvarianceOrInduction[&V] == Unknown &&
+             "unexpected value saved");
       return Unknown;
     }
+
+    // If Phi is an induction, register it as a starting point.
+    if (isInductionPHI(Phi))
+      return (IterationsToInvarianceOrInduction[&V] = 0);
+
     // We need to analyze the input from the back edge and add 1.
     Value *Input = Phi->getIncomingValueForBlock(L.getLoopLatch());
     PeelCounter Iterations = calculate(*Input);
-    assert(IterationsToInvariance[Input] == Iterations &&
+    assert(IterationsToInvarianceOrInduction[Input] == Iterations &&
            "unexpected value saved");
-    return (IterationsToInvariance[Phi] = addOne(Iterations));
+    return (IterationsToInvarianceOrInduction[Phi] = addOne(Iterations));
   }
   if (const Instruction *I = dyn_cast<Instruction>(&V)) {
     if (isa<CmpInst>(I) || I->isBinaryOp()) {
@@ -237,26 +336,29 @@ PhiAnalyzer::PeelCounter PhiAnalyzer::calculate(const Value &V) {
       PeelCounter RHS = calculate(*I->getOperand(1));
       if (RHS == Unknown)
         return Unknown;
-      return (IterationsToInvariance[I] = {std::max(*LHS, *RHS)});
+      return (IterationsToInvarianceOrInduction[I] = {std::max(*LHS, *RHS)});
     }
     if (I->isCast())
       // Cast instructions get the value of the operand.
-      return (IterationsToInvariance[I] = calculate(*I->getOperand(0)));
+      return (IterationsToInvarianceOrInduction[I] =
+                  calculate(*I->getOperand(0)));
   }
   // TODO: handle more expressions
 
   // Everything else is Unknown.
-  assert(IterationsToInvariance[&V] == Unknown && "unexpected value saved");
+  assert(IterationsToInvarianceOrInduction[&V] == Unknown &&
+         "unexpected value saved");
   return Unknown;
 }
 
 std::optional<unsigned> PhiAnalyzer::calculateIterationsToPeel() {
   unsigned Iterations = 0;
   for (auto &PHI : L.getHeader()->phis()) {
-    PeelCounter ToInvariance = calculate(PHI);
-    if (ToInvariance != Unknown) {
-      assert(*ToInvariance <= MaxIterations && "bad result in phi analysis");
-      Iterations = std::max(Iterations, *ToInvariance);
+    PeelCounter ToInvarianceOrInduction = calculate(PHI);
+    if (ToInvarianceOrInduction != Unknown) {
+      assert(*ToInvarianceOrInduction <= MaxIterations &&
+             "bad result in phi analysis");
+      Iterations = std::max(Iterations, *ToInvarianceOrInduction);
       if (Iterations == MaxIterations)
         break;
     }
@@ -585,11 +687,11 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
   // in TTI.getPeelingPreferences or by the flag -unroll-peel-count.
   unsigned DesiredPeelCount = TargetPeelCount;
 
-  // Here we try to get rid of Phis which become invariants after 1, 2, ..., N
-  // iterations of the loop. For this we compute the number for iterations after
-  // which every Phi is guaranteed to become an invariant, and try to peel the
-  // maximum number of iterations among these values, thus turning all those
-  // Phis into invariants.
+  // Here we try to get rid of Phis which become invariants or inductions after
+  // 1, 2, ..., N iterations of the loop. For this we compute the number for
+  // iterations after which every Phi is guaranteed to become an invariant or an
+  // induction, and try to peel the maximum number of iterations among these
+  // values, thus turning all those Phis into invariants or inductions.
   if (MaxPeelCount > DesiredPeelCount) {
     // Check how many iterations are useful for resolving Phis
     auto NumPeels = PhiAnalyzer(*L, MaxPeelCount).calculateIterationsToPeel();
@@ -610,7 +712,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
     if (DesiredPeelCount + AlreadyPeeled <= UnrollPeelMaxCount) {
       LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount
                         << " iteration(s) to turn"
-                        << " some Phis into invariants.\n");
+                        << " some Phis into invariants or inductions.\n");
       PP.PeelCount = DesiredPeelCount;
       PP.PeelProfiledIterations = false;
       return;
diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-phi-analysis.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-phi-analysis.ll
index e24eeef52de4e..838b1257278a3 100644
--- a/llvm/test/Transforms/LoopUnroll/peel-loop-phi-analysis.ll
+++ b/llvm/test/Transforms/LoopUnroll/peel-loop-phi-analysis.ll
@@ -197,3 +197,223 @@ for.body:
   %exitcond = icmp eq i32 %inc, 100000
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
 }
+
+; Check that phi analysis can handle a binary operator with induction variables.
+define void @binary_induction() {
+; The phis become induction through the chain of phis, with a unary
+; instruction on a loop induction.  Check that the phis for x, a, and y become
+; loop inductions since x is based on y, which is based on a, which is based
+; on a binary add of a constant and i, which is a loop induction.
+; Consider the calls to g:
+; First iteration: g(0), x=0, g(0), y=1, a=2
+; Second iteration: g(0), x=1, g(2), y=3, a=3
+; Third iteration: g(1), x=3, g(3), y=4, a=4
+; Fourth iteration (and subsequent): g(i), x=i+1, g(i+1), y=i+2, a=i+2
+; Therefore, peeling 3 times makes the phi nodes induction variables.
+;
+; void g(int);
+; void binary() {
+;   int x = 0;
+;   int y = 0;
+;   int a = 0;
+;   for(int i = 0; i <100000; ++i) {
+;     g(x);
+;     x = y;
+;     g(a);
+;     y = a + 1;
+;     a = i + 2;
+;   }
+; }
+; CHECK-LABEL: @binary_induction(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY_PEEL_BEGIN:%.*]]
+; CHECK:       for.body.peel.begin:
+; CHECK-NEXT:    br label [[FOR_BODY_PEEL:%.*]]
+; CHECK:       for.body.peel:
+; CHECK-NEXT:    tail call void @_Z1gi(i32 signext 0)
+; CHECK-NEXT:    tail call void @_Z1gi(i32 signext 0)
+; CHECK-NEXT:    [[ADD_PEEL:%.*]] = add nuw nsw i32 0, 2
+; CHECK-NEXT:    [[INC_PEEL:%.*]] = add nuw nsw i32 0, 1
+; CHECK-NEXT:    [[EXITCOND_PEEL:%.*]] = icmp ne i32 [[INC_PEEL]], 100000
+; CHECK-NEXT:    br i1 [[EXITCOND_PEEL]], label [[FOR_BODY_PEEL_NEXT:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.peel.next:
+; CHECK-NEXT:    br label [[FOR_BODY_PEEL2:%.*]]
+; CHECK:       for.body.peel2:
+; CHECK-NEXT:    tail call void @_Z1gi(i32 signext 0)
+; CHECK-NEXT:    tail call void @_Z1gi(i32 signext [[ADD_PEEL]])
+; CHECK-NEXT:    [[ADD_PEEL3:%.*]] = add nuw nsw i32 [[INC_PEEL]], 2
+; CHECK-NEXT:    [[INC_PEEL4:%.*]] = add nuw nsw i32 [[INC_PEEL]], 1
+; CHECK-NEXT:    [[EXITCOND_PEEL5:%.*]] = icmp ne i32 [[INC_PEEL4]], 100000
+; CHECK-NEXT:    br i1 [[EXITCOND_PEEL5]], label [[FOR_BODY_PEEL_NEXT1:%.*]], label [[FOR_COND_CLEANUP]]
+; CHECK:       for.body.peel.next1:
+; CHECK-NEXT:    br label [[FOR_BODY_PEEL7:%.*]]
+; CHECK:       for.body.peel7:
+; CHECK-NEXT:    tail call void @_Z1gi(i32 signext 0)
+; CHECK-NEXT:    tail call void @_Z1gi(i32 signext [[ADD_PEEL3]])
+; CHECK-NEXT:    [[ADD_PEEL8:%.*]] = add nuw nsw i32 [[INC_PEEL4]], 2
+; CHECK-NEXT:    [[INC_PEEL9:%.*]] = add nuw nsw i32 [[INC_PEEL4]], 1
+; CHECK-NEXT:    [[EXITCOND_PEEL10:%.*]] = icmp ne i32 [[INC_PEEL9]], 100000
+; CHECK-NEXT:    br i1 [[EXITCOND_PEEL10]], label [[FOR_BODY_PEEL_NEXT6:%.*]], label [[FOR_COND_CLEANUP]]
+; CHECK:       for.body.peel.next6:
+; CHECK-NEXT:    br label [[FOR_BODY_PEEL_NEXT11:%.*]]
+; CHECK:       for.body.peel.next11:
+; CHECK-NEXT:    br label [[ENTRY_PEEL_NEWPH:%.*]]
+; CHECK:       entry.peel.newph:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    ret void
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[INC_PEEL9]], [[ENTRY_PEEL_NEWPH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[X:%.*]] = phi i32 [ [[ADD_PEEL]], [[ENTRY_PEEL_NEWPH]] ], [ [[Y:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[A:%.*]] = phi i32 [ [[ADD_PEEL8]], [[ENTRY_PEEL_NEWPH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[Y]] = phi i32 [ [[ADD_PEEL3]], [[ENTRY_PEEL_NEWPH]] ], [ [[A]], [[FOR_BODY]] ]
+; CHECK-NEXT:    tail call void @_Z1gi(i32 signext [[X]])
+; CHECK-NEXT:    tail call void @_Z1gi(i32 signext [[A]])
+; CHECK-NEXT:    [[ADD]] = add nuw nsw i32 [[I]], 2
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[INC]], 100000
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], !llvm.loop [[LOOP3:![0-9]+]]
+;
+entry:
+  br label %for.body
+
+for.cond.cleanup:
+  ret void
+
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %x = phi i32 [ 0, %entry ], [ %y, %for.body ]
+  %a = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %y = phi i32 [ 0, %entry ], [ %a, %for.body ]
+  tail call void @_Z1gi(i32 signext %x)
+  tail call void @_Z1gi(i32 signext %a)
+  %add = add nuw nsw i32 %i, 2
+  %inc = add nuw nsw i32 %i, 1
+  %exitcond = icmp ne i32 %inc, 100000
+  br i1 %exitcond, label %for.body, label %for.cond.cleanup
+}
+
+; Check that phi analysis can handle an assignment from an induction.
+define void @induction_assignment(ptr noundef noalias %a, ptr noundef noalias %b) {
+; The phis become induction through the assignment from an induction. Check
+; that the phi im becomes a loop ; induction because i is a loop induction.
+; This test is based on TSVC s291.
+;
+; #define N 100
+; void f(int * restrict a, int * restrict b) {
+;   int im = N - 1;
+;   for (int i = 0; i < N; i++) {
+;     a[i] = b[i] + b[im];
+;     im = i;
+;   }
+; }
+; CHECK-LABEL: @induction_assignment(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY_PEEL_BEGIN:%.*]]
+; CHECK:       for.body.peel.begin:
+; CHECK-NEXT:    br label [[FOR_BODY_PEEL:%.*]]
+; CHECK:       for.body.peel:
+; CHECK-NEXT:    [[ARRAYIDX_PEEL:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 0
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX_PEEL]], align 4
+; CHECK-NEXT:    [[IDXPROM1_PEEL:%.*]] = zext nneg i32 99 to i64
+; CHECK-NEXT:    [[ARRAYIDX2_PEEL:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[IDXPROM1_PEEL]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2_PEEL]], align 4
+; CHECK-NEXT:    [[ADD_PEEL:%.*]] = add nsw i32 [[TMP1]], [[TMP0]]
+; CHECK-NEXT:    [[ARRAYIDX4_PEEL:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 0
+; CHECK-NEXT:    store i32 [[ADD_PEEL]], ptr [[ARRAYIDX4_PEEL]], align 4
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_PEEL:%.*]] = add nuw nsw i64 0, 1
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc nuw nsw i64 0 to i32
+; CHECK-NEXT:    [[EXITCOND_PEEL:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT_PEEL]], 100
+; CHECK-NEXT:    br i1 [[EXITCOND_PEEL]], label [[FOR_BODY_PEEL_NEXT:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.peel.next:
+; CHECK-NEXT:    br label [[FOR_BODY_PEEL_NEXT1:%.*]]
+; CHECK:       for.body.peel.next1:
+; CHECK-NEXT:    br label [[ENTRY_PEEL_NEWPH:%.*]]
+; CHECK:       entry.peel.newph:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PEEL]], [[ENTRY_PEEL_NEWPH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[IM_010:%.*]] = phi i32 [ [[TMP2]], [[ENTRY_PEEL_NEWPH]] ], [ [[TMP5:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[IDXPROM1:%.*]] = zext nneg i32 [[IM_010]] to i64
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[IDXPROM1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP3]]
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX4]], align 4
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[TMP5]] = trunc nuw nsw i64 [[INDVARS_IV]] to i32
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], 100
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %im.010 = phi i32 [ 99, %entry ], [ %2, %for.body ]
+  %arrayidx = getelementptr inbounds nuw i32, ptr %b, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %idxprom1 = zext nneg i32 %im.010 to i64
+  %arrayidx2 = getelementptr inbounds nuw i32, ptr %b, i64 %idxprom1
+  %1 = load i32, ptr %arrayidx2, align 4
+  %add = add nsw i32 %1, %0
+  %arrayidx4 = getelementptr inbounds nuw i32, ptr %a, i64 %indvars.iv
+  store i32 %add, ptr %arrayidx4, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %2 = trunc nuw nsw i64 %indvars.iv to i32
+  %exitcond = icmp ne i64 %indvars.iv.next, 100
+  br i1 %exitcond, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+  ret void
+}
+
+; Check that phi analysis can handle cast operations with induction variable.
+define void @induction_with_cast(ptr noundef %a, i64 noundef %size) {
+; The original code is like as follows. We don't need peel the loop to make
+; phis loop induction.
+;
+; void f(unsigned int *a, unsigned long N) {
+;   for (unsigned int i=0; i<N; i++)
+;     a[i] = 10;
+; }
+;
+; CHECK-LABEL: @induction_with_cast(
+; CHECK-NEXT:  for.body.preheader:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[CONV6:%.*]] = phi i64 [ [[CONV:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ]
+; CHECK-NEXT:    [[I_05:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 [[CONV6]]
+; CHECK-NEXT:    store i32 10, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ADD]] = add i32 [[I_05]], 1
+; CHECK-NEXT:    [[CONV]] = zext i32 [[ADD]] to i64
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[SIZE:%.*]], [[CONV]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    ret void
+;
+for.body.preheader:
+  br label %for.body
+
+for.body:
+  %conv6 = phi i64 [ %conv, %for.body ], [ 0, %for.body.preheader ]
+  %i.05 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds nuw i32, ptr %a, i64 %conv6
+  store i32 10, ptr %arrayidx, align 4
+  %add = add i32 %i.05, 1
+  %conv = zext i32 %add to i64
+  %cmp = icmp ugt i64 %size, %conv
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+  ret void
+}

>From 88272e4daf6e313e577c31f7958068ed0c61f23c Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Wed, 5 Mar 2025 16:09:25 +0900
Subject: [PATCH 02/10] Apply suggestions from code review

Co-authored-by: Nikita Popov <github at npopov.com>
---
 llvm/lib/Transforms/Utils/LoopPeel.cpp                    | 8 ++++----
 llvm/test/Transforms/LoopUnroll/peel-loop-phi-analysis.ll | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp
index fe875b5126b69..4ab1ed2783c5b 100644
--- a/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -221,9 +221,9 @@ PhiAnalyzer::PhiAnalyzer(const Loop &L, unsigned MaxIterations)
   assert(MaxIterations > 0 && "no peeling is allowed?");
 }
 
-// Test if \p Phi is induction variable or not. It can be checked by using SCEV,
-// but it's expensive to calculate it here. Instead, we perform the cheaper
-// checks, which cannot detect complex one but enough for some cases.
+/// Test if \p Phi is induction variable or not. It can be checked by using SCEV,
+/// but it's expensive to calculate it here. Instead, we perform the cheaper
+/// checks, which cannot detect complex one but enough for some cases.
 bool PhiAnalyzer::isInductionPHI(const PHINode *Phi) const {
   // Currently, we only support loops that consist of one basic block. In this
   // case, the phi can become an IV if it has an incoming value from the basic
@@ -253,7 +253,7 @@ bool PhiAnalyzer::isInductionPHI(const PHINode *Phi) const {
     if (Visited.contains(Cur))
       return false;
 
-    Instruction *I = dyn_cast<Instruction>(Cur);
+    auto *I = dyn_cast<Instruction>(Cur);
     if (!I || I->getParent() != Phi->getParent())
       return false;
 
diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-phi-analysis.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-phi-analysis.ll
index 838b1257278a3..4b2dfad3591cd 100644
--- a/llvm/test/Transforms/LoopUnroll/peel-loop-phi-analysis.ll
+++ b/llvm/test/Transforms/LoopUnroll/peel-loop-phi-analysis.ll
@@ -287,8 +287,8 @@ for.body:
   %x = phi i32 [ 0, %entry ], [ %y, %for.body ]
   %a = phi i32 [ 0, %entry ], [ %add, %for.body ]
   %y = phi i32 [ 0, %entry ], [ %a, %for.body ]
-  tail call void @_Z1gi(i32 signext %x)
-  tail call void @_Z1gi(i32 signext %a)
+  tail call void @_Z1gi(i32 %x)
+  tail call void @_Z1gi(i32 %a)
   %add = add nuw nsw i32 %i, 2
   %inc = add nuw nsw i32 %i, 1
   %exitcond = icmp ne i32 %inc, 100000

>From ee2673728ffc853e80f6ee2e2f7b7456630e5902 Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Wed, 5 Mar 2025 07:27:44 +0000
Subject: [PATCH 03/10] Apply clang-format

---
 llvm/lib/Transforms/Utils/LoopPeel.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp
index 4ab1ed2783c5b..cb760c21705a1 100644
--- a/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -221,9 +221,9 @@ PhiAnalyzer::PhiAnalyzer(const Loop &L, unsigned MaxIterations)
   assert(MaxIterations > 0 && "no peeling is allowed?");
 }
 
-/// Test if \p Phi is induction variable or not. It can be checked by using SCEV,
-/// but it's expensive to calculate it here. Instead, we perform the cheaper
-/// checks, which cannot detect complex one but enough for some cases.
+/// Test if \p Phi is induction variable or not. It can be checked by using
+/// SCEV, but it's expensive to calculate it here. Instead, we perform the
+/// cheaper checks, which cannot detect complex one but enough for some cases.
 bool PhiAnalyzer::isInductionPHI(const PHINode *Phi) const {
   // Currently, we only support loops that consist of one basic block. In this
   // case, the phi can become an IV if it has an incoming value from the basic

>From cd3fc9c68d0ea18ffbd707137ac7ea6b98ad4229 Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Wed, 5 Mar 2025 13:26:39 +0000
Subject: [PATCH 04/10] Address review comments

---
 llvm/lib/Transforms/Utils/LoopPeel.cpp        |  23 +--
 .../partial-unroll-dead-instructions.ll       |   2 +-
 .../LoopUnroll/peel-loop-phi-analysis.ll      | 145 +++++++++++++-----
 3 files changed, 115 insertions(+), 55 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp
index cb760c21705a1..57d625b9948cb 100644
--- a/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -13,7 +13,6 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/IVDescriptors.h"
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopIterator.h"
@@ -204,8 +203,8 @@ class PhiAnalyzer {
   // becomes an invariant.
   PeelCounter calculate(const Value &);
 
-  // Returns true if the \p Phi is an induction in the target loop. This
-  // function is a wrapper of `InductionDescriptor::isInductionPHI`.
+  // Returns true if the \p Phi is an induction in the target loop. This is a
+  // lightweight check and possible to detect an IV in some cases.
   bool isInductionPHI(const PHINode *Phi) const;
 
   const Loop &L;
@@ -225,20 +224,12 @@ PhiAnalyzer::PhiAnalyzer(const Loop &L, unsigned MaxIterations)
 /// SCEV, but it's expensive to calculate it here. Instead, we perform the
 /// cheaper checks, which cannot detect complex one but enough for some cases.
 bool PhiAnalyzer::isInductionPHI(const PHINode *Phi) const {
-  // Currently, we only support loops that consist of one basic block. In this
-  // case, the phi can become an IV if it has an incoming value from the basic
-  // block that this phi is also included.
-  int LoopIdx = -1;
-  for (unsigned I = 0; I != Phi->getNumIncomingValues(); I++) {
-    if (Phi->getIncomingBlock(I) == Phi->getParent()) {
-      LoopIdx = I;
-      break;
-    }
-  }
-  if (LoopIdx == -1)
+  // Currently we only support a loop that has single latch.
+  auto *Latch = L.getLoopLatch();
+  if (Latch == nullptr)
     return false;
 
-  Value *Cur = Phi->getIncomingValue(LoopIdx);
+  Value *Cur = Phi->getIncomingValueForBlock(Latch);
   SmallPtrSet<Value *, 4> Visited;
   bool VisitBinOp = false;
 
@@ -254,7 +245,7 @@ bool PhiAnalyzer::isInductionPHI(const PHINode *Phi) const {
       return false;
 
     auto *I = dyn_cast<Instruction>(Cur);
-    if (!I || I->getParent() != Phi->getParent())
+    if (!I || !L.contains(I))
       return false;
 
     Visited.insert(Cur);
diff --git a/llvm/test/Transforms/LoopUnroll/partial-unroll-dead-instructions.ll b/llvm/test/Transforms/LoopUnroll/partial-unroll-dead-instructions.ll
index 2a013fac63b4a..55e138a5bcc47 100644
--- a/llvm/test/Transforms/LoopUnroll/partial-unroll-dead-instructions.ll
+++ b/llvm/test/Transforms/LoopUnroll/partial-unroll-dead-instructions.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S < %s -passes=loop-unroll -unroll-allow-partial=1 | FileCheck %s
+; RUN: opt -S < %s -passes=loop-unroll -unroll-allow-partial=1 -unroll-allow-peeling=0 | FileCheck %s
 ;
 ; Bugpointed test that triggered UB while cleaning up dead
 ; instructions after simplifying indvars
diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-phi-analysis.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-phi-analysis.ll
index 4b2dfad3591cd..be6143b9b6942 100644
--- a/llvm/test/Transforms/LoopUnroll/peel-loop-phi-analysis.ll
+++ b/llvm/test/Transforms/LoopUnroll/peel-loop-phi-analysis.ll
@@ -230,8 +230,8 @@ define void @binary_induction() {
 ; CHECK:       for.body.peel.begin:
 ; CHECK-NEXT:    br label [[FOR_BODY_PEEL:%.*]]
 ; CHECK:       for.body.peel:
-; CHECK-NEXT:    tail call void @_Z1gi(i32 signext 0)
-; CHECK-NEXT:    tail call void @_Z1gi(i32 signext 0)
+; CHECK-NEXT:    tail call void @_Z1gi(i32 0)
+; CHECK-NEXT:    tail call void @_Z1gi(i32 0)
 ; CHECK-NEXT:    [[ADD_PEEL:%.*]] = add nuw nsw i32 0, 2
 ; CHECK-NEXT:    [[INC_PEEL:%.*]] = add nuw nsw i32 0, 1
 ; CHECK-NEXT:    [[EXITCOND_PEEL:%.*]] = icmp ne i32 [[INC_PEEL]], 100000
@@ -239,8 +239,8 @@ define void @binary_induction() {
 ; CHECK:       for.body.peel.next:
 ; CHECK-NEXT:    br label [[FOR_BODY_PEEL2:%.*]]
 ; CHECK:       for.body.peel2:
-; CHECK-NEXT:    tail call void @_Z1gi(i32 signext 0)
-; CHECK-NEXT:    tail call void @_Z1gi(i32 signext [[ADD_PEEL]])
+; CHECK-NEXT:    tail call void @_Z1gi(i32 0)
+; CHECK-NEXT:    tail call void @_Z1gi(i32 [[ADD_PEEL]])
 ; CHECK-NEXT:    [[ADD_PEEL3:%.*]] = add nuw nsw i32 [[INC_PEEL]], 2
 ; CHECK-NEXT:    [[INC_PEEL4:%.*]] = add nuw nsw i32 [[INC_PEEL]], 1
 ; CHECK-NEXT:    [[EXITCOND_PEEL5:%.*]] = icmp ne i32 [[INC_PEEL4]], 100000
@@ -248,8 +248,8 @@ define void @binary_induction() {
 ; CHECK:       for.body.peel.next1:
 ; CHECK-NEXT:    br label [[FOR_BODY_PEEL7:%.*]]
 ; CHECK:       for.body.peel7:
-; CHECK-NEXT:    tail call void @_Z1gi(i32 signext 0)
-; CHECK-NEXT:    tail call void @_Z1gi(i32 signext [[ADD_PEEL3]])
+; CHECK-NEXT:    tail call void @_Z1gi(i32 0)
+; CHECK-NEXT:    tail call void @_Z1gi(i32 [[ADD_PEEL3]])
 ; CHECK-NEXT:    [[ADD_PEEL8:%.*]] = add nuw nsw i32 [[INC_PEEL4]], 2
 ; CHECK-NEXT:    [[INC_PEEL9:%.*]] = add nuw nsw i32 [[INC_PEEL4]], 1
 ; CHECK-NEXT:    [[EXITCOND_PEEL10:%.*]] = icmp ne i32 [[INC_PEEL9]], 100000
@@ -269,8 +269,8 @@ define void @binary_induction() {
 ; CHECK-NEXT:    [[X:%.*]] = phi i32 [ [[ADD_PEEL]], [[ENTRY_PEEL_NEWPH]] ], [ [[Y:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[A:%.*]] = phi i32 [ [[ADD_PEEL8]], [[ENTRY_PEEL_NEWPH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[Y]] = phi i32 [ [[ADD_PEEL3]], [[ENTRY_PEEL_NEWPH]] ], [ [[A]], [[FOR_BODY]] ]
-; CHECK-NEXT:    tail call void @_Z1gi(i32 signext [[X]])
-; CHECK-NEXT:    tail call void @_Z1gi(i32 signext [[A]])
+; CHECK-NEXT:    tail call void @_Z1gi(i32 [[X]])
+; CHECK-NEXT:    tail call void @_Z1gi(i32 [[A]])
 ; CHECK-NEXT:    [[ADD]] = add nuw nsw i32 [[I]], 2
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[INC]], 100000
@@ -296,9 +296,8 @@ for.body:
 }
 
 ; Check that phi analysis can handle an assignment from an induction.
-define void @induction_assignment(ptr noundef noalias %a, ptr noundef noalias %b) {
-; The phis become induction through the assignment from an induction. Check
-; that the phi im becomes a loop ; induction because i is a loop induction.
+; The im becomes a loop induction by peeling the loop once, because i is a loop
+; induction.
 ; This test is based on TSVC s291.
 ;
 ; #define N 100
@@ -309,9 +308,10 @@ define void @induction_assignment(ptr noundef noalias %a, ptr noundef noalias %b
 ;     im = i;
 ;   }
 ; }
+define void @induction_assignment(ptr noundef noalias %a, ptr noundef noalias %b) {
 ; CHECK-LABEL: @induction_assignment(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[FOR_BODY_PEEL_BEGIN:%.*]]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body.peel.begin:
 ; CHECK-NEXT:    br label [[FOR_BODY_PEEL:%.*]]
 ; CHECK:       for.body.peel:
@@ -332,10 +332,10 @@ define void @induction_assignment(ptr noundef noalias %a, ptr noundef noalias %b
 ; CHECK:       for.body.peel.next1:
 ; CHECK-NEXT:    br label [[ENTRY_PEEL_NEWPH:%.*]]
 ; CHECK:       entry.peel.newph:
-; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK-NEXT:    br label [[FOR_BODY1:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PEEL]], [[ENTRY_PEEL_NEWPH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[IM_010:%.*]] = phi i32 [ [[TMP2]], [[ENTRY_PEEL_NEWPH]] ], [ [[TMP5:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PEEL]], [[ENTRY_PEEL_NEWPH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY1]] ]
+; CHECK-NEXT:    [[IM_010:%.*]] = phi i32 [ [[TMP2]], [[ENTRY_PEEL_NEWPH]] ], [ [[TMP5:%.*]], [[FOR_BODY1]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[IDXPROM1:%.*]] = zext nneg i32 [[IM_010]] to i64
@@ -347,7 +347,7 @@ define void @induction_assignment(ptr noundef noalias %a, ptr noundef noalias %b
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[TMP5]] = trunc nuw nsw i64 [[INDVARS_IV]] to i32
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], 100
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY1]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       for.cond.cleanup.loopexit:
 ; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
 ; CHECK:       for.cond.cleanup:
@@ -376,28 +376,42 @@ for.cond.cleanup:
   ret void
 }
 
-; Check that phi analysis can handle cast operations with induction variable.
-define void @induction_with_cast(ptr noundef %a, i64 noundef %size) {
-; The original code is like as follows. We don't need peel the loop to make
-; phis loop induction.
+; Check that the unnecessary peeling occurs in the following case. The cause is
+; that the analyzer determines a casted IV as a non-IV.
 ;
-; void f(unsigned int *a, unsigned long N) {
-;   for (unsigned int i=0; i<N; i++)
-;     a[i] = 10;
-; }
+; for (unsigned int i=0; i<10000; i++)
+;   a[(unsigned long)j] = 10;
 ;
-; CHECK-LABEL: @induction_with_cast(
+define void @induction_undesirable_peel1(ptr noundef %a) {
+; CHECK-LABEL: @induction_undesirable_peel1(
 ; CHECK-NEXT:  for.body.preheader:
+; CHECK-NEXT:    br label [[FOR_BODY_PEEL_BEGIN:%.*]]
+; CHECK:       for.body.peel.begin:
+; CHECK-NEXT:    br label [[FOR_BODY_PEEL:%.*]]
+; CHECK:       for.body.peel:
+; CHECK-NEXT:    [[ARRAYIDX_PEEL:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 0
+; CHECK-NEXT:    store i32 10, ptr [[ARRAYIDX_PEEL]], align 4
+; CHECK-NEXT:    [[IV_NEXT_PEEL:%.*]] = add nuw nsw i32 0, 1
+; CHECK-NEXT:    [[CONV_NEXT_PEEL:%.*]] = zext i32 [[IV_NEXT_PEEL]] to i64
+; CHECK-NEXT:    [[CMP_PEEL:%.*]] = icmp ugt i64 10000, [[CONV_NEXT_PEEL]]
+; CHECK-NEXT:    br i1 [[CMP_PEEL]], label [[FOR_BODY_PEEL_NEXT:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.peel.next:
+; CHECK-NEXT:    br label [[FOR_BODY_PEEL_NEXT1:%.*]]
+; CHECK:       for.body.peel.next1:
+; CHECK-NEXT:    br label [[FOR_BODY_PREHEADER_PEEL_NEWPH:%.*]]
+; CHECK:       for.body.preheader.peel.newph:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[CONV6:%.*]] = phi i64 [ [[CONV:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ]
-; CHECK-NEXT:    [[I_05:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 [[CONV6]]
+; CHECK-NEXT:    [[CONV:%.*]] = phi i64 [ [[CONV_NEXT:%.*]], [[FOR_BODY]] ], [ [[CONV_NEXT_PEEL]], [[FOR_BODY_PREHEADER_PEEL_NEWPH]] ]
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[IV_NEXT_PEEL]], [[FOR_BODY_PREHEADER_PEEL_NEWPH]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[CONV]]
 ; CHECK-NEXT:    store i32 10, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[ADD]] = add i32 [[I_05]], 1
-; CHECK-NEXT:    [[CONV]] = zext i32 [[ADD]] to i64
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[SIZE:%.*]], [[CONV]]
-; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT:    [[CONV_NEXT]] = zext i32 [[IV_NEXT]] to i64
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 10000, [[CONV_NEXT]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
 ; CHECK:       for.cond.cleanup:
 ; CHECK-NEXT:    ret void
 ;
@@ -405,15 +419,70 @@ for.body.preheader:
   br label %for.body
 
 for.body:
-  %conv6 = phi i64 [ %conv, %for.body ], [ 0, %for.body.preheader ]
-  %i.05 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds nuw i32, ptr %a, i64 %conv6
+  %conv = phi i64 [ %conv.next, %for.body ], [ 0, %for.body.preheader ]
+  %iv = phi i32 [ %iv.next, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds nuw i32, ptr %a, i64 %conv
   store i32 10, ptr %arrayidx, align 4
-  %add = add i32 %i.05, 1
-  %conv = zext i32 %add to i64
-  %cmp = icmp ugt i64 %size, %conv
+  %iv.next = add nsw nuw i32 %iv, 1
+  %conv.next = zext i32 %iv.next to i64
+  %cmp = icmp ugt i64 10000, %conv.next
   br i1 %cmp, label %for.body, label %for.cond.cleanup
 
 for.cond.cleanup:
   ret void
 }
+
+; Check that the unnecessary peeling occurs in the following case. The analyzer
+; cannot detect that the difference between the initial value of %i and %j is
+; equal to the increment of the %i.
+;
+; int j = 0;
+; for (unsigned int i=1; i<N; i++)
+;   a[j] = 10;
+;
+define void @induction_undesirable_peel2(ptr noundef %a) {
+; CHECK-LABEL: @induction_undesirable_peel2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY_PEEL_BEGIN:%.*]]
+; CHECK:       for.body.peel.begin:
+; CHECK-NEXT:    br label [[FOR_BODY_PEEL:%.*]]
+; CHECK:       for.body.peel:
+; CHECK-NEXT:    [[ARRAYIDX_PEEL:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 0
+; CHECK-NEXT:    store i32 10, ptr [[ARRAYIDX_PEEL]], align 4
+; CHECK-NEXT:    [[I_NEXT_PEEL:%.*]] = add nuw nsw i64 1, 1
+; CHECK-NEXT:    [[CMP_PEEL:%.*]] = icmp slt i64 1, 10000
+; CHECK-NEXT:    br i1 [[CMP_PEEL]], label [[FOR_BODY_PEEL_NEXT:%.*]], label [[EXIT:%.*]]
+; CHECK:       for.body.peel.next:
+; CHECK-NEXT:    br label [[FOR_BODY_PEEL_NEXT1:%.*]]
+; CHECK:       for.body.peel.next1:
+; CHECK-NEXT:    br label [[ENTRY_PEEL_NEWPH:%.*]]
+; CHECK:       entry.peel.newph:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT_PEEL]], [[ENTRY_PEEL_NEWPH]] ], [ [[I_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[J:%.*]] = phi i64 [ 1, [[ENTRY_PEEL_NEWPH]] ], [ [[I]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[J]]
+; CHECK-NEXT:    store i32 10, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[I]], 10000
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK:       exit.loopexit:
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ 1, %entry ], [ %i.next, %for.body ]
+  %j = phi i64 [ 0, %entry ], [ %i, %for.body ]
+  %arrayidx = getelementptr inbounds nuw i32, ptr %a, i64 %j
+  store i32 10, ptr %arrayidx, align 4
+  %i.next = add nsw nuw i64 %i, 1
+  %cmp = icmp slt i64 %i, 10000
+  br i1 %cmp, label %for.body, label %exit
+
+exit:
+  ret void
+}

>From 9b039544da041fb6a1d195aa7bd5f8d5f16c0e55 Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Thu, 6 Mar 2025 09:41:48 +0000
Subject: [PATCH 05/10] Move the comments of the test outisde the function

---
 llvm/test/Transforms/LoopUnroll/peel-loop-phi-analysis.ll | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-phi-analysis.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-phi-analysis.ll
index be6143b9b6942..7541ee0ae43f1 100644
--- a/llvm/test/Transforms/LoopUnroll/peel-loop-phi-analysis.ll
+++ b/llvm/test/Transforms/LoopUnroll/peel-loop-phi-analysis.ll
@@ -199,7 +199,6 @@ for.body:
 }
 
 ; Check that phi analysis can handle a binary operator with induction variables.
-define void @binary_induction() {
 ; The phis become induction through the chain of phis, with a unary
 ; instruction on a loop induction.  Check that the phis for x, a, and y become
 ; loop inductions since x is based on y, which is based on a, which is based
@@ -224,6 +223,8 @@ define void @binary_induction() {
 ;     a = i + 2;
 ;   }
 ; }
+;
+define void @binary_induction() {
 ; CHECK-LABEL: @binary_induction(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[FOR_BODY_PEEL_BEGIN:%.*]]

>From c6f1a3a074dfbeb56bb1f824c7608853333878b3 Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Thu, 6 Mar 2025 10:31:17 +0000
Subject: [PATCH 06/10] Fix the handling of binary instructions for inductions

---
 llvm/lib/Transforms/Utils/LoopPeel.cpp        |  58 +++++++--
 .../LoopUnroll/peel-loop-phi-analysis.ll      | 110 ++++++++++++++++--
 2 files changed, 149 insertions(+), 19 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp
index 57d625b9948cb..bcca4f11f5096 100644
--- a/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -189,20 +189,32 @@ class PhiAnalyzer {
   std::optional<unsigned> calculateIterationsToPeel();
 
 protected:
-  using PeelCounter = std::optional<unsigned>;
+  enum class PeelCounterType {
+    Invariant,
+    Induction,
+  };
+
+  using PeelCounterValue = std::pair<unsigned, PeelCounterType>;
+  using PeelCounter = std::optional<PeelCounterValue>;
   const PeelCounter Unknown = std::nullopt;
 
   // Add 1 respecting Unknown and return Unknown if result over MaxIterations
   PeelCounter addOne(PeelCounter PC) const {
     if (PC == Unknown)
       return Unknown;
-    return (*PC + 1 <= MaxIterations) ? PeelCounter{*PC + 1} : Unknown;
+    auto [Val, Ty] = *PC;
+    return (Val + 1 <= MaxIterations) ? PeelCounter({Val + 1, Ty}) : Unknown;
   }
 
-  // Calculate the number of iterations after which the given value
-  // becomes an invariant.
+  // Return a value representing zero for the given counter type.
+  PeelCounter makeZero(PeelCounterType Ty) const { return PeelCounter({0, Ty}); }
+
+  // Calculate the number of iterations after which the given value becomes an invariant or an induction.
   PeelCounter calculate(const Value &);
 
+  // Auxiliary function to calculate the number of iterations for a comparison instruction or a binary operator.
+  PeelCounter mergeTwoCounter(const Instruction &CmpOrBinaryOp, const PeelCounterValue &LHS, const PeelCounterValue &RHS) const;
+
   // Returns true if the \p Phi is an induction in the target loop. This is a
   // lightweight check and possible to detect an IV in some cases.
   bool isInductionPHI(const PHINode *Phi) const;
@@ -273,6 +285,29 @@ bool PhiAnalyzer::isInductionPHI(const PHINode *Phi) const {
   return VisitBinOp;
 }
 
+PhiAnalyzer::PeelCounter PhiAnalyzer::mergeTwoCounter(const Instruction &CmpOrBinaryOp, const PeelCounterValue &LHS, const PeelCounterValue &RHS) const {
+  auto &[LVal, LTy] = LHS;
+  auto &[RVal, RTy] = RHS;
+  unsigned NewVal = std::max(LVal, RVal);
+
+  // If either the type of LHS or the type of RHS is an induction, then the
+  // result of this instruction is also an induction only if it is an addition
+  // or a subtraction (after peeling enough times). Otherwise it can be a value
+  // that is neither an invariant nor an induction.
+  //
+  // If both the type of LHS and the type of RHS are invariants, then the
+  // result is also an invariant.
+  if (LTy == PeelCounterType::Induction || RTy == PeelCounterType::Induction) {
+    if (const auto *BinOp = dyn_cast<BinaryOperator>(&CmpOrBinaryOp)) {
+      if (BinOp->getOpcode() == Instruction::Add ||
+          BinOp->getOpcode() == Instruction::Sub)
+        return PeelCounter({NewVal, PeelCounterType::Induction});
+    }
+    return Unknown;
+  }
+  return PeelCounter({NewVal, PeelCounterType::Invariant});
+}
+
 // This function calculates the number of iterations after which the value
 // becomes an invariant. The pre-calculated values are memorized in a map.
 // N.B. This number will be Unknown or <= MaxIterations.
@@ -298,7 +333,7 @@ PhiAnalyzer::PeelCounter PhiAnalyzer::calculate(const Value &V) {
 
   if (L.isLoopInvariant(&V))
     // Loop invariant so known at start.
-    return (IterationsToInvarianceOrInduction[&V] = 0);
+    return (IterationsToInvarianceOrInduction[&V] = makeZero(PeelCounterType::Invariant));
   if (const PHINode *Phi = dyn_cast<PHINode>(&V)) {
     if (Phi->getParent() != L.getHeader()) {
       // Phi is not in header block so Unknown.
@@ -309,7 +344,7 @@ PhiAnalyzer::PeelCounter PhiAnalyzer::calculate(const Value &V) {
 
     // If Phi is an induction, register it as a starting point.
     if (isInductionPHI(Phi))
-      return (IterationsToInvarianceOrInduction[&V] = 0);
+      return (IterationsToInvarianceOrInduction[&V] = makeZero(PeelCounterType::Induction));
 
     // We need to analyze the input from the back edge and add 1.
     Value *Input = Phi->getIncomingValueForBlock(L.getLoopLatch());
@@ -327,12 +362,11 @@ PhiAnalyzer::PeelCounter PhiAnalyzer::calculate(const Value &V) {
       PeelCounter RHS = calculate(*I->getOperand(1));
       if (RHS == Unknown)
         return Unknown;
-      return (IterationsToInvarianceOrInduction[I] = {std::max(*LHS, *RHS)});
+      return (IterationsToInvarianceOrInduction[I] = mergeTwoCounter(*I, *LHS, *RHS));
     }
     if (I->isCast())
       // Cast instructions get the value of the operand.
-      return (IterationsToInvarianceOrInduction[I] =
-                  calculate(*I->getOperand(0)));
+      return (IterationsToInvarianceOrInduction[I] = calculate(*I->getOperand(0)));
   }
   // TODO: handle more expressions
 
@@ -347,9 +381,9 @@ std::optional<unsigned> PhiAnalyzer::calculateIterationsToPeel() {
   for (auto &PHI : L.getHeader()->phis()) {
     PeelCounter ToInvarianceOrInduction = calculate(PHI);
     if (ToInvarianceOrInduction != Unknown) {
-      assert(*ToInvarianceOrInduction <= MaxIterations &&
-             "bad result in phi analysis");
-      Iterations = std::max(Iterations, *ToInvarianceOrInduction);
+      unsigned Val = ToInvarianceOrInduction->first;
+      assert(Val <= MaxIterations && "bad result in phi analysis");
+      Iterations = std::max(Iterations, Val);
       if (Iterations == MaxIterations)
         break;
     }
diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-phi-analysis.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-phi-analysis.ll
index 7541ee0ae43f1..1b6f9dc8c89fb 100644
--- a/llvm/test/Transforms/LoopUnroll/peel-loop-phi-analysis.ll
+++ b/llvm/test/Transforms/LoopUnroll/peel-loop-phi-analysis.ll
@@ -309,10 +309,10 @@ for.body:
 ;     im = i;
 ;   }
 ; }
-define void @induction_assignment(ptr noundef noalias %a, ptr noundef noalias %b) {
-; CHECK-LABEL: @induction_assignment(
+define void @phi_refers_another_induction(ptr noundef noalias %a, ptr noundef noalias %b) {
+; CHECK-LABEL: @phi_refers_another_induction(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK-NEXT:    br label [[FOR_BODY_PEEL_BEGIN:%.*]]
 ; CHECK:       for.body.peel.begin:
 ; CHECK-NEXT:    br label [[FOR_BODY_PEEL:%.*]]
 ; CHECK:       for.body.peel:
@@ -333,10 +333,10 @@ define void @induction_assignment(ptr noundef noalias %a, ptr noundef noalias %b
 ; CHECK:       for.body.peel.next1:
 ; CHECK-NEXT:    br label [[ENTRY_PEEL_NEWPH:%.*]]
 ; CHECK:       entry.peel.newph:
-; CHECK-NEXT:    br label [[FOR_BODY1:%.*]]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PEEL]], [[ENTRY_PEEL_NEWPH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY1]] ]
-; CHECK-NEXT:    [[IM_010:%.*]] = phi i32 [ [[TMP2]], [[ENTRY_PEEL_NEWPH]] ], [ [[TMP5:%.*]], [[FOR_BODY1]] ]
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PEEL]], [[ENTRY_PEEL_NEWPH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[IM_010:%.*]] = phi i32 [ [[TMP2]], [[ENTRY_PEEL_NEWPH]] ], [ [[TMP5:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[IDXPROM1:%.*]] = zext nneg i32 [[IM_010]] to i64
@@ -348,7 +348,7 @@ define void @induction_assignment(ptr noundef noalias %a, ptr noundef noalias %b
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[TMP5]] = trunc nuw nsw i64 [[INDVARS_IV]] to i32
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], 100
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY1]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       for.cond.cleanup.loopexit:
 ; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
 ; CHECK:       for.cond.cleanup:
@@ -377,6 +377,102 @@ for.cond.cleanup:
   ret void
 }
 
+; Check that unnecessary peeling doesn't occur if there exist a comparison
+; instruction between an induction and another induction. The original code is
+; as below. Both i and j are inductions, but the comparison i < j is not an
+; induction.
+;
+; val = 42;
+; for (i=0,j=100; i<10000; i+=2,j+=1) {
+;   a[i] = val;
+;   val = i < j;
+; }
+;
+define void @dont_peel_cmp_ind_ind(ptr noundef %a) {
+; CHECK-LABEL: @dont_peel_cmp_ind_ind(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[IV_0:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_0_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[IV_1:%.*]] = phi i64 [ 100, [[ENTRY]] ], [ [[IV_1_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[VAL:%.*]] = phi i32 [ 42, [[ENTRY]] ], [ [[VAL_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 [[IV_0]]
+; CHECK-NEXT:    store i32 10, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[IV_0_NEXT]] = add nuw nsw i64 [[IV_0]], 2
+; CHECK-NEXT:    [[IV_1_NEXT]] = add nuw nsw i64 [[IV_1]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i64 [[IV_0_NEXT]], [[IV_1_NEXT]]
+; CHECK-NEXT:    [[VAL_NEXT]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp slt i64 [[IV_0_NEXT]], 10000
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv.0 = phi i64 [ 0, %entry ], [ %iv.0.next, %for.body ]
+  %iv.1 = phi i64 [ 100, %entry ] , [ %iv.1.next, %for.body ]
+  %val = phi i32 [ 42, %entry ], [ %val.next, %for.body ]
+  %arrayidx = getelementptr inbounds nuw i32, ptr %a, i64 %iv.0
+  store i32 10, ptr %arrayidx, align 4
+  %iv.0.next = add nsw nuw i64 %iv.0, 2
+  %iv.1.next = add nsw nuw i64 %iv.1, 1
+  %val.next.cmp = icmp slt i64 %iv.0.next, %iv.1.next
+  %val.next = zext i1 %val.next.cmp to i32
+  %exitcond = icmp slt i64 %iv.0.next, 10000
+  br i1 %exitcond, label %for.body, label %exit
+
+exit:
+  ret void
+}
+
+; Check that unnecessary peeling doesn't occur if there is an instruction that
+; is neither an add nor a sub and has an induction on its operand. The original
+; code is like as below. If the operator is either an add or a sub, then the
+; result would be an induction, so peeling makes sense. In this case, however,
+; the operator is a bitshift.
+;
+; val = 42;
+; for (i=0; i<10000; i++) {
+;   a[i] = val;
+;   val = 1 << i;
+; }
+;
+define void @dont_peel_shl_invariant_ind(ptr noundef %a) {
+; CHECK-LABEL: @dont_peel_shl_invariant_ind(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[VAL:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[VAL_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i64, ptr [[A:%.*]], i64 [[IV]]
+; CHECK-NEXT:    store i64 [[VAL]], ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[VAL_NEXT]] = shl nuw nsw i64 1, [[IV]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i64 [[IV_NEXT]], 10000
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %val = phi i64 [ 0, %entry ], [ %val.next, %for.body ]
+  %arrayidx = getelementptr inbounds nuw i64, ptr %a, i64 %iv
+  store i64 %val, ptr %arrayidx, align 4
+  %iv.next = add nsw nuw i64 %iv, 1
+  %val.next = shl nsw nuw i64 1, %iv
+  %cmp = icmp slt i64 %iv.next, 10000
+  br i1 %cmp, label %for.body, label %exit
+
+exit:
+  ret void
+}
+
+
 ; Check that the unnecessary peeling occurs in the following case. The cause is
 ; that the analyzer determines a casted IV as a non-IV.
 ;

>From 0f41b26337927bc6c001d123b51719cfdd884ed9 Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Thu, 6 Mar 2025 10:45:40 +0000
Subject: [PATCH 07/10] Apply clang-format

---
 llvm/lib/Transforms/Utils/LoopPeel.cpp | 31 ++++++++++++++++++--------
 1 file changed, 22 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp
index bcca4f11f5096..49c56298bf268 100644
--- a/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -207,13 +207,19 @@ class PhiAnalyzer {
   }
 
   // Return a value representing zero for the given counter type.
-  PeelCounter makeZero(PeelCounterType Ty) const { return PeelCounter({0, Ty}); }
+  PeelCounter makeZero(PeelCounterType Ty) const {
+    return PeelCounter({0, Ty});
+  }
 
-  // Calculate the number of iterations after which the given value becomes an invariant or an induction.
+  // Calculate the number of iterations after which the given value becomes an
+  // invariant or an induction.
   PeelCounter calculate(const Value &);
 
-  // Auxiliary function to calculate the number of iterations for a comparison instruction or a binary operator.
-  PeelCounter mergeTwoCounter(const Instruction &CmpOrBinaryOp, const PeelCounterValue &LHS, const PeelCounterValue &RHS) const;
+  // Auxiliary function to calculate the number of iterations for a comparison
+  // instruction or a binary operator.
+  PeelCounter mergeTwoCounter(const Instruction &CmpOrBinaryOp,
+                              const PeelCounterValue &LHS,
+                              const PeelCounterValue &RHS) const;
 
   // Returns true if the \p Phi is an induction in the target loop. This is a
   // lightweight check and possible to detect an IV in some cases.
@@ -285,7 +291,10 @@ bool PhiAnalyzer::isInductionPHI(const PHINode *Phi) const {
   return VisitBinOp;
 }
 
-PhiAnalyzer::PeelCounter PhiAnalyzer::mergeTwoCounter(const Instruction &CmpOrBinaryOp, const PeelCounterValue &LHS, const PeelCounterValue &RHS) const {
+PhiAnalyzer::PeelCounter
+PhiAnalyzer::mergeTwoCounter(const Instruction &CmpOrBinaryOp,
+                             const PeelCounterValue &LHS,
+                             const PeelCounterValue &RHS) const {
   auto &[LVal, LTy] = LHS;
   auto &[RVal, RTy] = RHS;
   unsigned NewVal = std::max(LVal, RVal);
@@ -333,7 +342,8 @@ PhiAnalyzer::PeelCounter PhiAnalyzer::calculate(const Value &V) {
 
   if (L.isLoopInvariant(&V))
     // Loop invariant so known at start.
-    return (IterationsToInvarianceOrInduction[&V] = makeZero(PeelCounterType::Invariant));
+    return (IterationsToInvarianceOrInduction[&V] =
+                makeZero(PeelCounterType::Invariant));
   if (const PHINode *Phi = dyn_cast<PHINode>(&V)) {
     if (Phi->getParent() != L.getHeader()) {
       // Phi is not in header block so Unknown.
@@ -344,7 +354,8 @@ PhiAnalyzer::PeelCounter PhiAnalyzer::calculate(const Value &V) {
 
     // If Phi is an induction, register it as a starting point.
     if (isInductionPHI(Phi))
-      return (IterationsToInvarianceOrInduction[&V] = makeZero(PeelCounterType::Induction));
+      return (IterationsToInvarianceOrInduction[&V] =
+                  makeZero(PeelCounterType::Induction));
 
     // We need to analyze the input from the back edge and add 1.
     Value *Input = Phi->getIncomingValueForBlock(L.getLoopLatch());
@@ -362,11 +373,13 @@ PhiAnalyzer::PeelCounter PhiAnalyzer::calculate(const Value &V) {
       PeelCounter RHS = calculate(*I->getOperand(1));
       if (RHS == Unknown)
         return Unknown;
-      return (IterationsToInvarianceOrInduction[I] = mergeTwoCounter(*I, *LHS, *RHS));
+      return (IterationsToInvarianceOrInduction[I] =
+                  mergeTwoCounter(*I, *LHS, *RHS));
     }
     if (I->isCast())
       // Cast instructions get the value of the operand.
-      return (IterationsToInvarianceOrInduction[I] = calculate(*I->getOperand(0)));
+      return (IterationsToInvarianceOrInduction[I] =
+                  calculate(*I->getOperand(0)));
   }
   // TODO: handle more expressions
 

>From b05a507a18a816c49b0a9de3ebc7c88144eff416 Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Fri, 14 Mar 2025 12:11:44 +0000
Subject: [PATCH 08/10] Fix comment

---
 llvm/test/Transforms/LoopUnroll/peel-loop-phi-analysis.ll | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-phi-analysis.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-phi-analysis.ll
index 1b6f9dc8c89fb..cbc93c9f01b62 100644
--- a/llvm/test/Transforms/LoopUnroll/peel-loop-phi-analysis.ll
+++ b/llvm/test/Transforms/LoopUnroll/peel-loop-phi-analysis.ll
@@ -534,8 +534,10 @@ for.cond.cleanup:
 ; equal to the increment of the %i.
 ;
 ; int j = 0;
-; for (unsigned int i=1; i<N; i++)
+; for (int i=1; i<N; i++) {
 ;   a[j] = 10;
+;   j = i;
+; }
 ;
 define void @induction_undesirable_peel2(ptr noundef %a) {
 ; CHECK-LABEL: @induction_undesirable_peel2(

>From bfddeff527fb85c81d043a0aa705b388419c1802 Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Fri, 21 Mar 2025 14:15:23 +0000
Subject: [PATCH 09/10] Remove nsw/nuw check

---
 llvm/lib/Transforms/Utils/LoopPeel.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp
index 49c56298bf268..b62ea64fccaf5 100644
--- a/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -243,7 +243,7 @@ PhiAnalyzer::PhiAnalyzer(const Loop &L, unsigned MaxIterations)
 /// cheaper checks, which cannot detect complex one but enough for some cases.
 bool PhiAnalyzer::isInductionPHI(const PHINode *Phi) const {
   // Currently we only support a loop that has single latch.
-  auto *Latch = L.getLoopLatch();
+  BasicBlock *Latch = L.getLoopLatch();
   if (Latch == nullptr)
     return false;
 
@@ -274,8 +274,6 @@ bool PhiAnalyzer::isInductionPHI(const PHINode *Phi) const {
       if (BinOp->getOpcode() != Instruction::Add &&
           BinOp->getOpcode() != Instruction::Sub)
         return false;
-      if (!BinOp->hasNoUnsignedWrap() || !BinOp->hasNoSignedWrap())
-        return false;
       if (!isa<ConstantInt>(BinOp->getOperand(1)))
         return false;
 

>From 1b18665f7d4dad4aae5f29e86fd08369ece375ce Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Mon, 24 Mar 2025 11:39:53 +0000
Subject: [PATCH 10/10] Add an option to toggle peeling to make PHIs into IVs

---
 llvm/lib/Transforms/Utils/LoopPeel.cpp        |  22 +-
 .../LoopUnroll/peel-loop-phi-analysis-iv.ll   | 407 ++++++++++++++++++
 .../LoopUnroll/peel-loop-phi-analysis.ll      | 387 -----------------
 3 files changed, 423 insertions(+), 393 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopUnroll/peel-loop-phi-analysis-iv.ll

diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp
index b62ea64fccaf5..d8fd98e54d79c 100644
--- a/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -77,6 +77,10 @@ static cl::opt<bool> DisableAdvancedPeeling(
     cl::desc(
         "Disable advance peeling. Issues for convergent targets (D134803)."));
 
+static cl::opt<bool>
+    EnablePeelingForIV("enable-peeling-for-iv", cl::init(false), cl::Hidden,
+                       cl::desc("Enable peeling to make a PHI into an IV"));
+
 static const char *PeeledCountMetaData = "llvm.loop.peeled.count";
 
 // Check whether we are capable of peeling this loop.
@@ -182,7 +186,7 @@ namespace {
 // respecting the maximum specified.
 class PhiAnalyzer {
 public:
-  PhiAnalyzer(const Loop &L, unsigned MaxIterations);
+  PhiAnalyzer(const Loop &L, unsigned MaxIterations, bool PeelForIV);
 
   // Calculate the sufficient minimum number of iterations of the loop to peel
   // such that phi instructions become determined (subject to allowable limits)
@@ -227,13 +231,14 @@ class PhiAnalyzer {
 
   const Loop &L;
   const unsigned MaxIterations;
+  const bool PeelForIV;
 
   // Map of Values to number of iterations to invariance or induction
   SmallDenseMap<const Value *, PeelCounter> IterationsToInvarianceOrInduction;
 };
 
-PhiAnalyzer::PhiAnalyzer(const Loop &L, unsigned MaxIterations)
-    : L(L), MaxIterations(MaxIterations) {
+PhiAnalyzer::PhiAnalyzer(const Loop &L, unsigned MaxIterations, bool PeelForIV)
+    : L(L), MaxIterations(MaxIterations), PeelForIV(PeelForIV) {
   assert(canPeel(&L) && "loop is not suitable for peeling");
   assert(MaxIterations > 0 && "no peeling is allowed?");
 }
@@ -351,7 +356,7 @@ PhiAnalyzer::PeelCounter PhiAnalyzer::calculate(const Value &V) {
     }
 
     // If Phi is an induction, register it as a starting point.
-    if (isInductionPHI(Phi))
+    if (PeelForIV && isInductionPHI(Phi))
       return (IterationsToInvarianceOrInduction[&V] =
                   makeZero(PeelCounterType::Induction));
 
@@ -729,8 +734,13 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
   // induction, and try to peel the maximum number of iterations among these
   // values, thus turning all those Phis into invariants or inductions.
   if (MaxPeelCount > DesiredPeelCount) {
-    // Check how many iterations are useful for resolving Phis
-    auto NumPeels = PhiAnalyzer(*L, MaxPeelCount).calculateIterationsToPeel();
+    // Check how many iterations are useful for resolving Phis.
+    // TODO: Compute `PeelForIV` with some heuristic. Peeling a loop to make a
+    // PHI into an IV is usually good for loop vectorization, so we should
+    // perform such peelings if the loop body is vectorizable (e.g., doesn't
+    // contain function calls).
+    auto NumPeels = PhiAnalyzer(*L, MaxPeelCount, EnablePeelingForIV)
+                        .calculateIterationsToPeel();
     if (NumPeels)
       DesiredPeelCount = std::max(DesiredPeelCount, *NumPeels);
   }
diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-phi-analysis-iv.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-phi-analysis-iv.ll
new file mode 100644
index 0000000000000..6718675b9d192
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/peel-loop-phi-analysis-iv.ll
@@ -0,0 +1,407 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -S -passes=loop-unroll -enable-peeling-for-iv | FileCheck %s
+; RUN: opt < %s -S -passes=loop-unroll-full -enable-peeling-for-iv | FileCheck %s
+
+; void g(int);
+declare void @_Z1gi(i32 signext)
+
+; Check that phi analysis can handle a binary operator with induction variables.
+; The phis become induction through the chain of phis, with a unary
+; instruction on a loop induction.  Check that the phis for x, a, and y become
+; loop inductions since x is based on y, which is based on a, which is based
+; on a binary add of a constant and i, which is a loop induction.
+; Consider the calls to g:
+; First iteration: g(0), x=0, g(0), y=1, a=2
+; Second iteration: g(0), x=1, g(2), y=3, a=3
+; Third iteration: g(1), x=3, g(3), y=4, a=4
+; Fourth iteration (and subsequent): g(i), x=i+1, g(i+1), y=i+2, a=i+2
+; Therefore, peeling 3 times makes the phi nodes induction variables.
+;
+; void g(int);
+; void binary() {
+;   int x = 0;
+;   int y = 0;
+;   int a = 0;
+;   for(int i = 0; i <100000; ++i) {
+;     g(x);
+;     x = y;
+;     g(a);
+;     y = a + 1;
+;     a = i + 2;
+;   }
+; }
+;
+define void @binary_induction() {
+; CHECK-LABEL: define void @binary_induction() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br label %[[FOR_BODY_PEEL_BEGIN:.*]]
+; CHECK:       [[FOR_BODY_PEEL_BEGIN]]:
+; CHECK-NEXT:    br label %[[FOR_BODY_PEEL:.*]]
+; CHECK:       [[FOR_BODY_PEEL]]:
+; CHECK-NEXT:    tail call void @_Z1gi(i32 0)
+; CHECK-NEXT:    tail call void @_Z1gi(i32 0)
+; CHECK-NEXT:    [[ADD_PEEL:%.*]] = add nuw nsw i32 0, 2
+; CHECK-NEXT:    [[INC_PEEL:%.*]] = add nuw nsw i32 0, 1
+; CHECK-NEXT:    [[EXITCOND_PEEL:%.*]] = icmp ne i32 [[INC_PEEL]], 100000
+; CHECK-NEXT:    br i1 [[EXITCOND_PEEL]], label %[[FOR_BODY_PEEL_NEXT:.*]], label %[[FOR_COND_CLEANUP:.*]]
+; CHECK:       [[FOR_BODY_PEEL_NEXT]]:
+; CHECK-NEXT:    br label %[[FOR_BODY_PEEL2:.*]]
+; CHECK:       [[FOR_BODY_PEEL2]]:
+; CHECK-NEXT:    tail call void @_Z1gi(i32 0)
+; CHECK-NEXT:    tail call void @_Z1gi(i32 [[ADD_PEEL]])
+; CHECK-NEXT:    [[ADD_PEEL3:%.*]] = add nuw nsw i32 [[INC_PEEL]], 2
+; CHECK-NEXT:    [[INC_PEEL4:%.*]] = add nuw nsw i32 [[INC_PEEL]], 1
+; CHECK-NEXT:    [[EXITCOND_PEEL5:%.*]] = icmp ne i32 [[INC_PEEL4]], 100000
+; CHECK-NEXT:    br i1 [[EXITCOND_PEEL5]], label %[[FOR_BODY_PEEL_NEXT1:.*]], label %[[FOR_COND_CLEANUP]]
+; CHECK:       [[FOR_BODY_PEEL_NEXT1]]:
+; CHECK-NEXT:    br label %[[FOR_BODY_PEEL7:.*]]
+; CHECK:       [[FOR_BODY_PEEL7]]:
+; CHECK-NEXT:    tail call void @_Z1gi(i32 0)
+; CHECK-NEXT:    tail call void @_Z1gi(i32 [[ADD_PEEL3]])
+; CHECK-NEXT:    [[ADD_PEEL8:%.*]] = add nuw nsw i32 [[INC_PEEL4]], 2
+; CHECK-NEXT:    [[INC_PEEL9:%.*]] = add nuw nsw i32 [[INC_PEEL4]], 1
+; CHECK-NEXT:    [[EXITCOND_PEEL10:%.*]] = icmp ne i32 [[INC_PEEL9]], 100000
+; CHECK-NEXT:    br i1 [[EXITCOND_PEEL10]], label %[[FOR_BODY_PEEL_NEXT6:.*]], label %[[FOR_COND_CLEANUP]]
+; CHECK:       [[FOR_BODY_PEEL_NEXT6]]:
+; CHECK-NEXT:    br label %[[FOR_BODY_PEEL_NEXT11:.*]]
+; CHECK:       [[FOR_BODY_PEEL_NEXT11]]:
+; CHECK-NEXT:    br label %[[ENTRY_PEEL_NEWPH:.*]]
+; CHECK:       [[ENTRY_PEEL_NEWPH]]:
+; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
+; CHECK:       [[FOR_COND_CLEANUP_LOOPEXIT:.*]]:
+; CHECK-NEXT:    br label %[[FOR_COND_CLEANUP]]
+; CHECK:       [[FOR_COND_CLEANUP]]:
+; CHECK-NEXT:    ret void
+; CHECK:       [[FOR_BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[INC_PEEL9]], %[[ENTRY_PEEL_NEWPH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT:    [[X:%.*]] = phi i32 [ [[ADD_PEEL]], %[[ENTRY_PEEL_NEWPH]] ], [ [[Y:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT:    [[A:%.*]] = phi i32 [ [[ADD_PEEL8]], %[[ENTRY_PEEL_NEWPH]] ], [ [[ADD:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT:    [[Y]] = phi i32 [ [[ADD_PEEL3]], %[[ENTRY_PEEL_NEWPH]] ], [ [[A]], %[[FOR_BODY]] ]
+; CHECK-NEXT:    tail call void @_Z1gi(i32 [[X]])
+; CHECK-NEXT:    tail call void @_Z1gi(i32 [[A]])
+; CHECK-NEXT:    [[ADD]] = add nuw nsw i32 [[I]], 2
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[INC]], 100000
+; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP_LOOPEXIT]], !llvm.loop [[LOOP0:![0-9]+]]
+;
+entry:
+  br label %for.body
+
+for.cond.cleanup:
+  ret void
+
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %x = phi i32 [ 0, %entry ], [ %y, %for.body ]
+  %a = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %y = phi i32 [ 0, %entry ], [ %a, %for.body ]
+  tail call void @_Z1gi(i32 %x)
+  tail call void @_Z1gi(i32 %a)
+  %add = add nuw nsw i32 %i, 2
+  %inc = add nuw nsw i32 %i, 1
+  %exitcond = icmp ne i32 %inc, 100000
+  br i1 %exitcond, label %for.body, label %for.cond.cleanup
+}
+
+; Check that phi analysis can handle an assignment from an induction.
+; The im becomes a loop induction by peeling the loop once, because i is a loop
+; induction.
+; This test is based on TSVC s291.
+;
+; #define N 100
+; void f(int * restrict a, int * restrict b) {
+;   int im = N - 1;
+;   for (int i = 0; i < N; i++) {
+;     a[i] = b[i] + b[im];
+;     im = i;
+;   }
+; }
+define void @phi_refers_another_induction(ptr noundef noalias %a, ptr noundef noalias %b) {
+; CHECK-LABEL: define void @phi_refers_another_induction(
+; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias noundef [[B:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br label %[[FOR_BODY_PEEL_BEGIN:.*]]
+; CHECK:       [[FOR_BODY_PEEL_BEGIN]]:
+; CHECK-NEXT:    br label %[[FOR_BODY_PEEL:.*]]
+; CHECK:       [[FOR_BODY_PEEL]]:
+; CHECK-NEXT:    [[ARRAYIDX_PEEL:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 0
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX_PEEL]], align 4
+; CHECK-NEXT:    [[IDXPROM1_PEEL:%.*]] = zext nneg i32 99 to i64
+; CHECK-NEXT:    [[ARRAYIDX2_PEEL:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[IDXPROM1_PEEL]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2_PEEL]], align 4
+; CHECK-NEXT:    [[ADD_PEEL:%.*]] = add nsw i32 [[TMP1]], [[TMP0]]
+; CHECK-NEXT:    [[ARRAYIDX4_PEEL:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 0
+; CHECK-NEXT:    store i32 [[ADD_PEEL]], ptr [[ARRAYIDX4_PEEL]], align 4
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_PEEL:%.*]] = add nuw nsw i64 0, 1
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc nuw nsw i64 0 to i32
+; CHECK-NEXT:    [[EXITCOND_PEEL:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT_PEEL]], 100
+; CHECK-NEXT:    br i1 [[EXITCOND_PEEL]], label %[[FOR_BODY_PEEL_NEXT:.*]], label %[[FOR_COND_CLEANUP:.*]]
+; CHECK:       [[FOR_BODY_PEEL_NEXT]]:
+; CHECK-NEXT:    br label %[[FOR_BODY_PEEL_NEXT1:.*]]
+; CHECK:       [[FOR_BODY_PEEL_NEXT1]]:
+; CHECK-NEXT:    br label %[[ENTRY_PEEL_NEWPH:.*]]
+; CHECK:       [[ENTRY_PEEL_NEWPH]]:
+; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
+; CHECK:       [[FOR_BODY]]:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PEEL]], %[[ENTRY_PEEL_NEWPH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT:    [[IM_010:%.*]] = phi i32 [ [[TMP2]], %[[ENTRY_PEEL_NEWPH]] ], [ [[TMP5:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[IDXPROM1:%.*]] = zext nneg i32 [[IM_010]] to i64
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[IDXPROM1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP3]]
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX4]], align 4
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[TMP5]] = trunc nuw nsw i64 [[INDVARS_IV]] to i32
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], 100
+; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP_LOOPEXIT:.*]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK:       [[FOR_COND_CLEANUP_LOOPEXIT]]:
+; CHECK-NEXT:    br label %[[FOR_COND_CLEANUP]]
+; CHECK:       [[FOR_COND_CLEANUP]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %im.010 = phi i32 [ 99, %entry ], [ %2, %for.body ]
+  %arrayidx = getelementptr inbounds nuw i32, ptr %b, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %idxprom1 = zext nneg i32 %im.010 to i64
+  %arrayidx2 = getelementptr inbounds nuw i32, ptr %b, i64 %idxprom1
+  %1 = load i32, ptr %arrayidx2, align 4
+  %add = add nsw i32 %1, %0
+  %arrayidx4 = getelementptr inbounds nuw i32, ptr %a, i64 %indvars.iv
+  store i32 %add, ptr %arrayidx4, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %2 = trunc nuw nsw i64 %indvars.iv to i32
+  %exitcond = icmp ne i64 %indvars.iv.next, 100
+  br i1 %exitcond, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+  ret void
+}
+
+; Check that unnecessary peeling doesn't occur if there exist a comparison
+; instruction between an induction and another induction. The original code is
+; as below. Both i and j are inductions, but the comparison i < j is not an
+; induction.
+;
+; val = 42;
+; for (i=0,j=100; i<10000; i+=2,j+=1) {
+;   a[i] = val;
+;   val = i < j;
+; }
+;
+define void @dont_peel_cmp_ind_ind(ptr noundef %a) {
+; CHECK-LABEL: define void @dont_peel_cmp_ind_ind(
+; CHECK-SAME: ptr noundef [[A:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
+; CHECK:       [[FOR_BODY]]:
+; CHECK-NEXT:    [[IV_0:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_0_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT:    [[IV_1:%.*]] = phi i64 [ 100, %[[ENTRY]] ], [ [[IV_1_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT:    [[VAL:%.*]] = phi i32 [ 42, %[[ENTRY]] ], [ [[VAL_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_0]]
+; CHECK-NEXT:    store i32 10, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[IV_0_NEXT]] = add nuw nsw i64 [[IV_0]], 2
+; CHECK-NEXT:    [[IV_1_NEXT]] = add nuw nsw i64 [[IV_1]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i64 [[IV_0_NEXT]], [[IV_1_NEXT]]
+; CHECK-NEXT:    [[VAL_NEXT]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp slt i64 [[IV_0_NEXT]], 10000
+; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[EXIT:.*]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv.0 = phi i64 [ 0, %entry ], [ %iv.0.next, %for.body ]
+  %iv.1 = phi i64 [ 100, %entry ] , [ %iv.1.next, %for.body ]
+  %val = phi i32 [ 42, %entry ], [ %val.next, %for.body ]
+  %arrayidx = getelementptr inbounds nuw i32, ptr %a, i64 %iv.0
+  store i32 10, ptr %arrayidx, align 4
+  %iv.0.next = add nsw nuw i64 %iv.0, 2
+  %iv.1.next = add nsw nuw i64 %iv.1, 1
+  %val.next.cmp = icmp slt i64 %iv.0.next, %iv.1.next
+  %val.next = zext i1 %val.next.cmp to i32
+  %exitcond = icmp slt i64 %iv.0.next, 10000
+  br i1 %exitcond, label %for.body, label %exit
+
+exit:
+  ret void
+}
+
+; Check that unnecessary peeling doesn't occur if there is an instruction that
+; is neither an add nor a sub and has an induction on its operand. The original
+; code is like as below. If the operator is either an add or a sub, then the
+; result would be an induction, so peeling makes sense. In this case, however,
+; the operator is a bitshift.
+;
+; val = 42;
+; for (i=0; i<10000; i++) {
+;   a[i] = val;
+;   val = 1 << i;
+; }
+;
+define void @dont_peel_shl_invariant_ind(ptr noundef %a) {
+; CHECK-LABEL: define void @dont_peel_shl_invariant_ind(
+; CHECK-SAME: ptr noundef [[A:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
+; CHECK:       [[FOR_BODY]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT:    [[VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[VAL_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i64, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT:    store i64 [[VAL]], ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[VAL_NEXT]] = shl nuw nsw i64 1, [[IV]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i64 [[IV_NEXT]], 10000
+; CHECK-NEXT:    br i1 [[CMP]], label %[[FOR_BODY]], label %[[EXIT:.*]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %val = phi i64 [ 0, %entry ], [ %val.next, %for.body ]
+  %arrayidx = getelementptr inbounds nuw i64, ptr %a, i64 %iv
+  store i64 %val, ptr %arrayidx, align 4
+  %iv.next = add nsw nuw i64 %iv, 1
+  %val.next = shl nsw nuw i64 1, %iv
+  %cmp = icmp slt i64 %iv.next, 10000
+  br i1 %cmp, label %for.body, label %exit
+
+exit:
+  ret void
+}
+
+
+; Check that the unnecessary peeling occurs in the following case. The cause is
+; that the analyzer determines a casted IV as a non-IV.
+;
+; for (unsigned int i=0; i<10000; i++)
+;   a[(unsigned long)j] = 10;
+;
+define void @induction_undesirable_peel1(ptr noundef %a) {
+; CHECK-LABEL: define void @induction_undesirable_peel1(
+; CHECK-SAME: ptr noundef [[A:%.*]]) {
+; CHECK-NEXT:  [[FOR_BODY_PREHEADER:.*:]]
+; CHECK-NEXT:    br label %[[FOR_BODY_PEEL_BEGIN:.*]]
+; CHECK:       [[FOR_BODY_PEEL_BEGIN]]:
+; CHECK-NEXT:    br label %[[FOR_BODY_PEEL:.*]]
+; CHECK:       [[FOR_BODY_PEEL]]:
+; CHECK-NEXT:    [[ARRAYIDX_PEEL:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 0
+; CHECK-NEXT:    store i32 10, ptr [[ARRAYIDX_PEEL]], align 4
+; CHECK-NEXT:    [[IV_NEXT_PEEL:%.*]] = add nuw nsw i32 0, 1
+; CHECK-NEXT:    [[CONV_NEXT_PEEL:%.*]] = zext i32 [[IV_NEXT_PEEL]] to i64
+; CHECK-NEXT:    [[CMP_PEEL:%.*]] = icmp ugt i64 10000, [[CONV_NEXT_PEEL]]
+; CHECK-NEXT:    br i1 [[CMP_PEEL]], label %[[FOR_BODY_PEEL_NEXT:.*]], label %[[FOR_COND_CLEANUP:.*]]
+; CHECK:       [[FOR_BODY_PEEL_NEXT]]:
+; CHECK-NEXT:    br label %[[FOR_BODY_PEEL_NEXT1:.*]]
+; CHECK:       [[FOR_BODY_PEEL_NEXT1]]:
+; CHECK-NEXT:    br label %[[FOR_BODY_PREHEADER_PEEL_NEWPH:.*]]
+; CHECK:       [[FOR_BODY_PREHEADER_PEEL_NEWPH]]:
+; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
+; CHECK:       [[FOR_BODY]]:
+; CHECK-NEXT:    [[CONV:%.*]] = phi i64 [ [[CONV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[CONV_NEXT_PEEL]], %[[FOR_BODY_PREHEADER_PEEL_NEWPH]] ]
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[IV_NEXT_PEEL]], %[[FOR_BODY_PREHEADER_PEEL_NEWPH]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[CONV]]
+; CHECK-NEXT:    store i32 10, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT:    [[CONV_NEXT]] = zext i32 [[IV_NEXT]] to i64
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 10000, [[CONV_NEXT]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP_LOOPEXIT:.*]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK:       [[FOR_COND_CLEANUP_LOOPEXIT]]:
+; CHECK-NEXT:    br label %[[FOR_COND_CLEANUP]]
+; CHECK:       [[FOR_COND_CLEANUP]]:
+; CHECK-NEXT:    ret void
+;
+for.body.preheader:
+  br label %for.body
+
+for.body:
+  %conv = phi i64 [ %conv.next, %for.body ], [ 0, %for.body.preheader ]
+  %iv = phi i32 [ %iv.next, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds nuw i32, ptr %a, i64 %conv
+  store i32 10, ptr %arrayidx, align 4
+  %iv.next = add nsw nuw i32 %iv, 1
+  %conv.next = zext i32 %iv.next to i64
+  %cmp = icmp ugt i64 10000, %conv.next
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+  ret void
+}
+
+; Check that the unnecessary peeling occurs in the following case. The analyzer
+; cannot detect that the difference between the initial value of %i and %j is
+; equal to the increment of the %i.
+;
+; int j = 0;
+; for (int i=1; i<N; i++) {
+;   a[j] = 10;
+;   j = i;
+; }
+;
+define void @induction_undesirable_peel2(ptr noundef %a) {
+; CHECK-LABEL: define void @induction_undesirable_peel2(
+; CHECK-SAME: ptr noundef [[A:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br label %[[FOR_BODY_PEEL_BEGIN:.*]]
+; CHECK:       [[FOR_BODY_PEEL_BEGIN]]:
+; CHECK-NEXT:    br label %[[FOR_BODY_PEEL:.*]]
+; CHECK:       [[FOR_BODY_PEEL]]:
+; CHECK-NEXT:    [[ARRAYIDX_PEEL:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 0
+; CHECK-NEXT:    store i32 10, ptr [[ARRAYIDX_PEEL]], align 4
+; CHECK-NEXT:    [[I_NEXT_PEEL:%.*]] = add nuw nsw i64 1, 1
+; CHECK-NEXT:    [[CMP_PEEL:%.*]] = icmp slt i64 1, 10000
+; CHECK-NEXT:    br i1 [[CMP_PEEL]], label %[[FOR_BODY_PEEL_NEXT:.*]], label %[[EXIT:.*]]
+; CHECK:       [[FOR_BODY_PEEL_NEXT]]:
+; CHECK-NEXT:    br label %[[FOR_BODY_PEEL_NEXT1:.*]]
+; CHECK:       [[FOR_BODY_PEEL_NEXT1]]:
+; CHECK-NEXT:    br label %[[ENTRY_PEEL_NEWPH:.*]]
+; CHECK:       [[ENTRY_PEEL_NEWPH]]:
+; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
+; CHECK:       [[FOR_BODY]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT_PEEL]], %[[ENTRY_PEEL_NEWPH]] ], [ [[I_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT:    [[J:%.*]] = phi i64 [ 1, %[[ENTRY_PEEL_NEWPH]] ], [ [[I]], %[[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[J]]
+; CHECK-NEXT:    store i32 10, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[I]], 10000
+; CHECK-NEXT:    br i1 [[CMP]], label %[[FOR_BODY]], label %[[EXIT_LOOPEXIT:.*]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK:       [[EXIT_LOOPEXIT]]:
+; CHECK-NEXT:    br label %[[EXIT]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ 1, %entry ], [ %i.next, %for.body ]
+  %j = phi i64 [ 0, %entry ], [ %i, %for.body ]
+  %arrayidx = getelementptr inbounds nuw i32, ptr %a, i64 %j
+  store i32 10, ptr %arrayidx, align 4
+  %i.next = add nsw nuw i64 %i, 1
+  %cmp = icmp slt i64 %i, 10000
+  br i1 %cmp, label %for.body, label %exit
+
+exit:
+  ret void
+}
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.peeled.count", i32 3}
+; CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]]}
+; CHECK: [[META3]] = !{!"llvm.loop.peeled.count", i32 1}
+; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META3]]}
+; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META3]]}
+;.
diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-phi-analysis.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-phi-analysis.ll
index cbc93c9f01b62..e1165bbb45a10 100644
--- a/llvm/test/Transforms/LoopUnroll/peel-loop-phi-analysis.ll
+++ b/llvm/test/Transforms/LoopUnroll/peel-loop-phi-analysis.ll
@@ -198,390 +198,3 @@ for.body:
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
 }
 
-; Check that phi analysis can handle a binary operator with induction variables.
-; The phis become induction through the chain of phis, with a unary
-; instruction on a loop induction.  Check that the phis for x, a, and y become
-; loop inductions since x is based on y, which is based on a, which is based
-; on a binary add of a constant and i, which is a loop induction.
-; Consider the calls to g:
-; First iteration: g(0), x=0, g(0), y=1, a=2
-; Second iteration: g(0), x=1, g(2), y=3, a=3
-; Third iteration: g(1), x=3, g(3), y=4, a=4
-; Fourth iteration (and subsequent): g(i), x=i+1, g(i+1), y=i+2, a=i+2
-; Therefore, peeling 3 times makes the phi nodes induction variables.
-;
-; void g(int);
-; void binary() {
-;   int x = 0;
-;   int y = 0;
-;   int a = 0;
-;   for(int i = 0; i <100000; ++i) {
-;     g(x);
-;     x = y;
-;     g(a);
-;     y = a + 1;
-;     a = i + 2;
-;   }
-; }
-;
-define void @binary_induction() {
-; CHECK-LABEL: @binary_induction(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[FOR_BODY_PEEL_BEGIN:%.*]]
-; CHECK:       for.body.peel.begin:
-; CHECK-NEXT:    br label [[FOR_BODY_PEEL:%.*]]
-; CHECK:       for.body.peel:
-; CHECK-NEXT:    tail call void @_Z1gi(i32 0)
-; CHECK-NEXT:    tail call void @_Z1gi(i32 0)
-; CHECK-NEXT:    [[ADD_PEEL:%.*]] = add nuw nsw i32 0, 2
-; CHECK-NEXT:    [[INC_PEEL:%.*]] = add nuw nsw i32 0, 1
-; CHECK-NEXT:    [[EXITCOND_PEEL:%.*]] = icmp ne i32 [[INC_PEEL]], 100000
-; CHECK-NEXT:    br i1 [[EXITCOND_PEEL]], label [[FOR_BODY_PEEL_NEXT:%.*]], label [[FOR_COND_CLEANUP:%.*]]
-; CHECK:       for.body.peel.next:
-; CHECK-NEXT:    br label [[FOR_BODY_PEEL2:%.*]]
-; CHECK:       for.body.peel2:
-; CHECK-NEXT:    tail call void @_Z1gi(i32 0)
-; CHECK-NEXT:    tail call void @_Z1gi(i32 [[ADD_PEEL]])
-; CHECK-NEXT:    [[ADD_PEEL3:%.*]] = add nuw nsw i32 [[INC_PEEL]], 2
-; CHECK-NEXT:    [[INC_PEEL4:%.*]] = add nuw nsw i32 [[INC_PEEL]], 1
-; CHECK-NEXT:    [[EXITCOND_PEEL5:%.*]] = icmp ne i32 [[INC_PEEL4]], 100000
-; CHECK-NEXT:    br i1 [[EXITCOND_PEEL5]], label [[FOR_BODY_PEEL_NEXT1:%.*]], label [[FOR_COND_CLEANUP]]
-; CHECK:       for.body.peel.next1:
-; CHECK-NEXT:    br label [[FOR_BODY_PEEL7:%.*]]
-; CHECK:       for.body.peel7:
-; CHECK-NEXT:    tail call void @_Z1gi(i32 0)
-; CHECK-NEXT:    tail call void @_Z1gi(i32 [[ADD_PEEL3]])
-; CHECK-NEXT:    [[ADD_PEEL8:%.*]] = add nuw nsw i32 [[INC_PEEL4]], 2
-; CHECK-NEXT:    [[INC_PEEL9:%.*]] = add nuw nsw i32 [[INC_PEEL4]], 1
-; CHECK-NEXT:    [[EXITCOND_PEEL10:%.*]] = icmp ne i32 [[INC_PEEL9]], 100000
-; CHECK-NEXT:    br i1 [[EXITCOND_PEEL10]], label [[FOR_BODY_PEEL_NEXT6:%.*]], label [[FOR_COND_CLEANUP]]
-; CHECK:       for.body.peel.next6:
-; CHECK-NEXT:    br label [[FOR_BODY_PEEL_NEXT11:%.*]]
-; CHECK:       for.body.peel.next11:
-; CHECK-NEXT:    br label [[ENTRY_PEEL_NEWPH:%.*]]
-; CHECK:       entry.peel.newph:
-; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
-; CHECK:       for.cond.cleanup.loopexit:
-; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
-; CHECK:       for.cond.cleanup:
-; CHECK-NEXT:    ret void
-; CHECK:       for.body:
-; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[INC_PEEL9]], [[ENTRY_PEEL_NEWPH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[X:%.*]] = phi i32 [ [[ADD_PEEL]], [[ENTRY_PEEL_NEWPH]] ], [ [[Y:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[A:%.*]] = phi i32 [ [[ADD_PEEL8]], [[ENTRY_PEEL_NEWPH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[Y]] = phi i32 [ [[ADD_PEEL3]], [[ENTRY_PEEL_NEWPH]] ], [ [[A]], [[FOR_BODY]] ]
-; CHECK-NEXT:    tail call void @_Z1gi(i32 [[X]])
-; CHECK-NEXT:    tail call void @_Z1gi(i32 [[A]])
-; CHECK-NEXT:    [[ADD]] = add nuw nsw i32 [[I]], 2
-; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I]], 1
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[INC]], 100000
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], !llvm.loop [[LOOP3:![0-9]+]]
-;
-entry:
-  br label %for.body
-
-for.cond.cleanup:
-  ret void
-
-for.body:
-  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %x = phi i32 [ 0, %entry ], [ %y, %for.body ]
-  %a = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %y = phi i32 [ 0, %entry ], [ %a, %for.body ]
-  tail call void @_Z1gi(i32 %x)
-  tail call void @_Z1gi(i32 %a)
-  %add = add nuw nsw i32 %i, 2
-  %inc = add nuw nsw i32 %i, 1
-  %exitcond = icmp ne i32 %inc, 100000
-  br i1 %exitcond, label %for.body, label %for.cond.cleanup
-}
-
-; Check that phi analysis can handle an assignment from an induction.
-; The im becomes a loop induction by peeling the loop once, because i is a loop
-; induction.
-; This test is based on TSVC s291.
-;
-; #define N 100
-; void f(int * restrict a, int * restrict b) {
-;   int im = N - 1;
-;   for (int i = 0; i < N; i++) {
-;     a[i] = b[i] + b[im];
-;     im = i;
-;   }
-; }
-define void @phi_refers_another_induction(ptr noundef noalias %a, ptr noundef noalias %b) {
-; CHECK-LABEL: @phi_refers_another_induction(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[FOR_BODY_PEEL_BEGIN:%.*]]
-; CHECK:       for.body.peel.begin:
-; CHECK-NEXT:    br label [[FOR_BODY_PEEL:%.*]]
-; CHECK:       for.body.peel:
-; CHECK-NEXT:    [[ARRAYIDX_PEEL:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 0
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX_PEEL]], align 4
-; CHECK-NEXT:    [[IDXPROM1_PEEL:%.*]] = zext nneg i32 99 to i64
-; CHECK-NEXT:    [[ARRAYIDX2_PEEL:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[IDXPROM1_PEEL]]
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2_PEEL]], align 4
-; CHECK-NEXT:    [[ADD_PEEL:%.*]] = add nsw i32 [[TMP1]], [[TMP0]]
-; CHECK-NEXT:    [[ARRAYIDX4_PEEL:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 0
-; CHECK-NEXT:    store i32 [[ADD_PEEL]], ptr [[ARRAYIDX4_PEEL]], align 4
-; CHECK-NEXT:    [[INDVARS_IV_NEXT_PEEL:%.*]] = add nuw nsw i64 0, 1
-; CHECK-NEXT:    [[TMP2:%.*]] = trunc nuw nsw i64 0 to i32
-; CHECK-NEXT:    [[EXITCOND_PEEL:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT_PEEL]], 100
-; CHECK-NEXT:    br i1 [[EXITCOND_PEEL]], label [[FOR_BODY_PEEL_NEXT:%.*]], label [[FOR_COND_CLEANUP:%.*]]
-; CHECK:       for.body.peel.next:
-; CHECK-NEXT:    br label [[FOR_BODY_PEEL_NEXT1:%.*]]
-; CHECK:       for.body.peel.next1:
-; CHECK-NEXT:    br label [[ENTRY_PEEL_NEWPH:%.*]]
-; CHECK:       entry.peel.newph:
-; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
-; CHECK:       for.body:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PEEL]], [[ENTRY_PEEL_NEWPH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[IM_010:%.*]] = phi i32 [ [[TMP2]], [[ENTRY_PEEL_NEWPH]] ], [ [[TMP5:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[IDXPROM1:%.*]] = zext nneg i32 [[IM_010]] to i64
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[IDXPROM1]]
-; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
-; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP3]]
-; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX4]], align 4
-; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[TMP5]] = trunc nuw nsw i64 [[INDVARS_IV]] to i32
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], 100
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], !llvm.loop [[LOOP4:![0-9]+]]
-; CHECK:       for.cond.cleanup.loopexit:
-; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
-; CHECK:       for.cond.cleanup:
-; CHECK-NEXT:    ret void
-;
-entry:
-  br label %for.body
-
-for.body:
-  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %im.010 = phi i32 [ 99, %entry ], [ %2, %for.body ]
-  %arrayidx = getelementptr inbounds nuw i32, ptr %b, i64 %indvars.iv
-  %0 = load i32, ptr %arrayidx, align 4
-  %idxprom1 = zext nneg i32 %im.010 to i64
-  %arrayidx2 = getelementptr inbounds nuw i32, ptr %b, i64 %idxprom1
-  %1 = load i32, ptr %arrayidx2, align 4
-  %add = add nsw i32 %1, %0
-  %arrayidx4 = getelementptr inbounds nuw i32, ptr %a, i64 %indvars.iv
-  store i32 %add, ptr %arrayidx4, align 4
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %2 = trunc nuw nsw i64 %indvars.iv to i32
-  %exitcond = icmp ne i64 %indvars.iv.next, 100
-  br i1 %exitcond, label %for.body, label %for.cond.cleanup
-
-for.cond.cleanup:
-  ret void
-}
-
-; Check that unnecessary peeling doesn't occur if there exist a comparison
-; instruction between an induction and another induction. The original code is
-; as below. Both i and j are inductions, but the comparison i < j is not an
-; induction.
-;
-; val = 42;
-; for (i=0,j=100; i<10000; i+=2,j+=1) {
-;   a[i] = val;
-;   val = i < j;
-; }
-;
-define void @dont_peel_cmp_ind_ind(ptr noundef %a) {
-; CHECK-LABEL: @dont_peel_cmp_ind_ind(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
-; CHECK:       for.body:
-; CHECK-NEXT:    [[IV_0:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_0_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[IV_1:%.*]] = phi i64 [ 100, [[ENTRY]] ], [ [[IV_1_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[VAL:%.*]] = phi i32 [ 42, [[ENTRY]] ], [ [[VAL_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 [[IV_0]]
-; CHECK-NEXT:    store i32 10, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[IV_0_NEXT]] = add nuw nsw i64 [[IV_0]], 2
-; CHECK-NEXT:    [[IV_1_NEXT]] = add nuw nsw i64 [[IV_1]], 1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i64 [[IV_0_NEXT]], [[IV_1_NEXT]]
-; CHECK-NEXT:    [[VAL_NEXT]] = zext i1 [[CMP]] to i32
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp slt i64 [[IV_0_NEXT]], 10000
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT:%.*]]
-; CHECK:       exit:
-; CHECK-NEXT:    ret void
-;
-entry:
-  br label %for.body
-
-for.body:
-  %iv.0 = phi i64 [ 0, %entry ], [ %iv.0.next, %for.body ]
-  %iv.1 = phi i64 [ 100, %entry ] , [ %iv.1.next, %for.body ]
-  %val = phi i32 [ 42, %entry ], [ %val.next, %for.body ]
-  %arrayidx = getelementptr inbounds nuw i32, ptr %a, i64 %iv.0
-  store i32 10, ptr %arrayidx, align 4
-  %iv.0.next = add nsw nuw i64 %iv.0, 2
-  %iv.1.next = add nsw nuw i64 %iv.1, 1
-  %val.next.cmp = icmp slt i64 %iv.0.next, %iv.1.next
-  %val.next = zext i1 %val.next.cmp to i32
-  %exitcond = icmp slt i64 %iv.0.next, 10000
-  br i1 %exitcond, label %for.body, label %exit
-
-exit:
-  ret void
-}
-
-; Check that unnecessary peeling doesn't occur if there is an instruction that
-; is neither an add nor a sub and has an induction on its operand. The original
-; code is like as below. If the operator is either an add or a sub, then the
-; result would be an induction, so peeling makes sense. In this case, however,
-; the operator is a bitshift.
-;
-; val = 42;
-; for (i=0; i<10000; i++) {
-;   a[i] = val;
-;   val = 1 << i;
-; }
-;
-define void @dont_peel_shl_invariant_ind(ptr noundef %a) {
-; CHECK-LABEL: @dont_peel_shl_invariant_ind(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
-; CHECK:       for.body:
-; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[VAL:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[VAL_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i64, ptr [[A:%.*]], i64 [[IV]]
-; CHECK-NEXT:    store i64 [[VAL]], ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
-; CHECK-NEXT:    [[VAL_NEXT]] = shl nuw nsw i64 1, [[IV]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i64 [[IV_NEXT]], 10000
-; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[EXIT:%.*]]
-; CHECK:       exit:
-; CHECK-NEXT:    ret void
-;
-entry:
-  br label %for.body
-
-for.body:
-  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %val = phi i64 [ 0, %entry ], [ %val.next, %for.body ]
-  %arrayidx = getelementptr inbounds nuw i64, ptr %a, i64 %iv
-  store i64 %val, ptr %arrayidx, align 4
-  %iv.next = add nsw nuw i64 %iv, 1
-  %val.next = shl nsw nuw i64 1, %iv
-  %cmp = icmp slt i64 %iv.next, 10000
-  br i1 %cmp, label %for.body, label %exit
-
-exit:
-  ret void
-}
-
-
-; Check that the unnecessary peeling occurs in the following case. The cause is
-; that the analyzer determines a casted IV as a non-IV.
-;
-; for (unsigned int i=0; i<10000; i++)
-;   a[(unsigned long)j] = 10;
-;
-define void @induction_undesirable_peel1(ptr noundef %a) {
-; CHECK-LABEL: @induction_undesirable_peel1(
-; CHECK-NEXT:  for.body.preheader:
-; CHECK-NEXT:    br label [[FOR_BODY_PEEL_BEGIN:%.*]]
-; CHECK:       for.body.peel.begin:
-; CHECK-NEXT:    br label [[FOR_BODY_PEEL:%.*]]
-; CHECK:       for.body.peel:
-; CHECK-NEXT:    [[ARRAYIDX_PEEL:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 0
-; CHECK-NEXT:    store i32 10, ptr [[ARRAYIDX_PEEL]], align 4
-; CHECK-NEXT:    [[IV_NEXT_PEEL:%.*]] = add nuw nsw i32 0, 1
-; CHECK-NEXT:    [[CONV_NEXT_PEEL:%.*]] = zext i32 [[IV_NEXT_PEEL]] to i64
-; CHECK-NEXT:    [[CMP_PEEL:%.*]] = icmp ugt i64 10000, [[CONV_NEXT_PEEL]]
-; CHECK-NEXT:    br i1 [[CMP_PEEL]], label [[FOR_BODY_PEEL_NEXT:%.*]], label [[FOR_COND_CLEANUP:%.*]]
-; CHECK:       for.body.peel.next:
-; CHECK-NEXT:    br label [[FOR_BODY_PEEL_NEXT1:%.*]]
-; CHECK:       for.body.peel.next1:
-; CHECK-NEXT:    br label [[FOR_BODY_PREHEADER_PEEL_NEWPH:%.*]]
-; CHECK:       for.body.preheader.peel.newph:
-; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
-; CHECK:       for.body:
-; CHECK-NEXT:    [[CONV:%.*]] = phi i64 [ [[CONV_NEXT:%.*]], [[FOR_BODY]] ], [ [[CONV_NEXT_PEEL]], [[FOR_BODY_PREHEADER_PEEL_NEWPH]] ]
-; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[IV_NEXT_PEEL]], [[FOR_BODY_PREHEADER_PEEL_NEWPH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[CONV]]
-; CHECK-NEXT:    store i32 10, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
-; CHECK-NEXT:    [[CONV_NEXT]] = zext i32 [[IV_NEXT]] to i64
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 10000, [[CONV_NEXT]]
-; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], !llvm.loop [[LOOP6:![0-9]+]]
-; CHECK:       for.cond.cleanup.loopexit:
-; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
-; CHECK:       for.cond.cleanup:
-; CHECK-NEXT:    ret void
-;
-for.body.preheader:
-  br label %for.body
-
-for.body:
-  %conv = phi i64 [ %conv.next, %for.body ], [ 0, %for.body.preheader ]
-  %iv = phi i32 [ %iv.next, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds nuw i32, ptr %a, i64 %conv
-  store i32 10, ptr %arrayidx, align 4
-  %iv.next = add nsw nuw i32 %iv, 1
-  %conv.next = zext i32 %iv.next to i64
-  %cmp = icmp ugt i64 10000, %conv.next
-  br i1 %cmp, label %for.body, label %for.cond.cleanup
-
-for.cond.cleanup:
-  ret void
-}
-
-; Check that the unnecessary peeling occurs in the following case. The analyzer
-; cannot detect that the difference between the initial value of %i and %j is
-; equal to the increment of the %i.
-;
-; int j = 0;
-; for (int i=1; i<N; i++) {
-;   a[j] = 10;
-;   j = i;
-; }
-;
-define void @induction_undesirable_peel2(ptr noundef %a) {
-; CHECK-LABEL: @induction_undesirable_peel2(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[FOR_BODY_PEEL_BEGIN:%.*]]
-; CHECK:       for.body.peel.begin:
-; CHECK-NEXT:    br label [[FOR_BODY_PEEL:%.*]]
-; CHECK:       for.body.peel:
-; CHECK-NEXT:    [[ARRAYIDX_PEEL:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 0
-; CHECK-NEXT:    store i32 10, ptr [[ARRAYIDX_PEEL]], align 4
-; CHECK-NEXT:    [[I_NEXT_PEEL:%.*]] = add nuw nsw i64 1, 1
-; CHECK-NEXT:    [[CMP_PEEL:%.*]] = icmp slt i64 1, 10000
-; CHECK-NEXT:    br i1 [[CMP_PEEL]], label [[FOR_BODY_PEEL_NEXT:%.*]], label [[EXIT:%.*]]
-; CHECK:       for.body.peel.next:
-; CHECK-NEXT:    br label [[FOR_BODY_PEEL_NEXT1:%.*]]
-; CHECK:       for.body.peel.next1:
-; CHECK-NEXT:    br label [[ENTRY_PEEL_NEWPH:%.*]]
-; CHECK:       entry.peel.newph:
-; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
-; CHECK:       for.body:
-; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT_PEEL]], [[ENTRY_PEEL_NEWPH]] ], [ [[I_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[J:%.*]] = phi i64 [ 1, [[ENTRY_PEEL_NEWPH]] ], [ [[I]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[J]]
-; CHECK-NEXT:    store i32 10, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[I]], 10000
-; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP7:![0-9]+]]
-; CHECK:       exit.loopexit:
-; CHECK-NEXT:    br label [[EXIT]]
-; CHECK:       exit:
-; CHECK-NEXT:    ret void
-;
-entry:
-  br label %for.body
-
-for.body:
-  %i = phi i64 [ 1, %entry ], [ %i.next, %for.body ]
-  %j = phi i64 [ 0, %entry ], [ %i, %for.body ]
-  %arrayidx = getelementptr inbounds nuw i32, ptr %a, i64 %j
-  store i32 10, ptr %arrayidx, align 4
-  %i.next = add nsw nuw i64 %i, 1
-  %cmp = icmp slt i64 %i, 10000
-  br i1 %cmp, label %for.body, label %exit
-
-exit:
-  ret void
-}



More information about the llvm-commits mailing list