[llvm] [SCEV] Infer loop max trip count from memory accesses (PR #70361)

Sat Mar 2 11:53:36 PST 2024

https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/70361

>From cf6d59b296f0f83a67efb2d37593629b6b60fcae Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Sat, 2 Mar 2024 14:47:31 -0500
Subject: [PATCH] [SCEV] Infer loop max trip count from memory accesses

Data references in a loop is assumed to not access elements over the statically
allocated size. We can therefore infer a loop max trip count from this undefined
behavior.

This patch is refined from the orignal one (https://reviews.llvm.org/D155049)
authored by @Peakulorain.
---
 llvm/include/llvm/Analysis/ScalarEvolution.h  |   7 +
 llvm/lib/Analysis/ScalarEvolution.cpp         | 265 +++++++++++++++++-
 .../infer-trip-count-idx-wrap.ll              | 110 ++++++++
 .../ScalarEvolution/infer-trip-count.ll       | 191 +++++++++++++
 4 files changed, 567 insertions(+), 6 deletions(-)
 create mode 100644 llvm/test/Analysis/ScalarEvolution/infer-trip-count-idx-wrap.ll
 create mode 100644 llvm/test/Analysis/ScalarEvolution/infer-trip-count.ll

diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
index 0880f9c65aa45d..36e2034a024777 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -1153,6 +1153,10 @@ class ScalarEvolution {
                                      bool ExitIfTrue, bool ControlsOnlyExit,
                                      bool AllowPredicates = false);
 
+  /// Compute the number of times the body of the specific loop will execute via
+  /// the memory access inside the loop body.
+  ExitLimit computeExitLimitFromMemAccess(const Loop *L);
+
   /// A predicate is said to be monotonically increasing if may go from being
   /// false to being true as the loop iterates, but never the other way
   /// around.  A predicate is said to be monotonically decreasing if may go
@@ -1804,6 +1808,9 @@ class ScalarEvolution {
                                          Value *ExitCond, bool ExitIfTrue,
                                          bool ControlsOnlyExit,
                                          bool AllowPredicates);
+  ExitLimit computeExitLimitFromMemAccessCached(ExitLimitCacheTy &Cache,
+                                                const Loop *L);
+  ExitLimit computeExitLimitFromMemAccessImpl(const Loop *L);
   std::optional<ScalarEvolution::ExitLimit> computeExitLimitFromCondFromBinOp(
       ExitLimitCacheTy &Cache, const Loop *L, Value *ExitCond, bool ExitIfTrue,
       bool ControlsOnlyExit, bool AllowPredicates);
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 4b2db80bc1ec30..f928626043e2b0 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -114,6 +114,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugCounter.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/KnownBits.h"
 #include "llvm/Support/SaveAndRestore.h"
@@ -249,6 +250,13 @@ static cl::opt<bool> UseContextForNoWrapFlagInference(
     cl::desc("Infer nuw/nsw flags using context where suitable"),
     cl::init(true));
 
+static cl::opt<bool> UseMemoryAccessUBForBEInference(
+    "scalar-evolution-infer-max-trip-count-from-memory-access", cl::Hidden,
+    cl::desc("Infer loop max trip count from memory access"), cl::init(false));
+
+DEBUG_COUNTER(MemoryAccessCounter, "memory-access-ub",
+              "Controls analysis of memory access");
+
 //===----------------------------------------------------------------------===//
 //                           SCEV class definitions
 //===----------------------------------------------------------------------===//
@@ -8253,6 +8261,210 @@ ScalarEvolution::getSmallConstantTripMultiple(const Loop *L,
   return getSmallConstantTripMultiple(L, ExitCount);
 }
 
+/// Collect all load/store instructions that must be executed in every iteration
+/// of loop \p L .
+static void
+collectExecLoadStoreInsideLoop(const Loop *L, DominatorTree &DT,
+                               SmallVector<Instruction *, 4> &MemInsts) {
+  // It is difficult to tell if the load/store instruction is executed on every
+  // iteration inside an irregular loop.
+  if (!L->isLoopSimplifyForm() || !L->isInnermost())
+    return;
+
+  const BasicBlock *LoopLatch = L->getLoopLatch();
+  assert(LoopLatch && "normal form loop doesn't have a latch");
+  assert(L->getExitingBlock() == LoopLatch);
+
+  // We will not continue if sanitizer is enabled.
+  const Function *F = LoopLatch->getParent();
+  if (F->hasFnAttribute(Attribute::SanitizeAddress) ||
+      F->hasFnAttribute(Attribute::SanitizeThread) ||
+      F->hasFnAttribute(Attribute::SanitizeMemory) ||
+      F->hasFnAttribute(Attribute::SanitizeHWAddress) ||
+      F->hasFnAttribute(Attribute::SanitizeMemTag))
+    return;
+
+  for (auto *BB : L->getBlocks()) {
+    // We need to make sure that max execution time of MemAccessBB in loop
+    // represents latch max excution time. The BB below should be skipped:
+    //            Entry
+    //              │
+    //        ┌─────▼─────┐
+    //        │Loop Header◄─────┐
+    //        └──┬──────┬─┘     │
+    //           │      │       │
+    //  ┌────────▼──┐ ┌─▼─────┐ │
+    //  │MemAccessBB│ │OtherBB│ │
+    //  └────────┬──┘ └─┬─────┘ │
+    //           │      │       │
+    //         ┌─▼──────▼─┐     │
+    //         │Loop Latch├─────┘
+    //         └────┬─────┘
+    //              ▼
+    //             Exit
+    if (!DT.dominates(BB, LoopLatch))
+      continue;
+
+    for (Instruction &I : *BB) {
+      if (isa<LoadInst>(&I) || isa<StoreInst>(&I))
+        MemInsts.push_back(&I);
+    }
+  }
+}
+
+/// Return a SCEV representing the memory size of pointer \p V .
+static const SCEV *getCertainSizeOfMem(const SCEV *V, Type *RTy,
+                                       const DataLayout &DL,
+                                       const TargetLibraryInfo &TLI,
+                                       ScalarEvolution *SE) {
+  const SCEVUnknown *PtrBase = dyn_cast<SCEVUnknown>(V);
+  if (!PtrBase)
+    return nullptr;
+  Value *Ptr = PtrBase->getValue();
+  uint64_t Size = 0;
+  if (!llvm::getObjectSize(Ptr, Size, DL, &TLI))
+    return nullptr;
+  return SE->getConstant(RTy, Size);
+}
+
+/// Get the range of given index represented by \p AddRec.
+static const SCEV *getIndexRange(const SCEVAddRecExpr *AddRec,
+                                 ScalarEvolution *SE) {
+  const SCEV *Range = SE->getConstant(SE->getUnsignedRangeMax(AddRec) -
+                                      SE->getUnsignedRangeMin(AddRec));
+  const SCEV *Step = AddRec->getStepRecurrence(*SE);
+  if (SE->isKnownNegative(Step))
+    Step = SE->getNegativeSCEV(Step);
+  return SE->getUDivCeilSCEV(Range, Step);
+}
+
+/// Get the underlying SCEVAddExpr from a cast expression if possible.
+const SCEV *peelCastExpr(const SCEVCastExpr *S, ScalarEvolution *SE) {
+  const SCEV *Op = S->getOperand();
+  if (isa<SCEVCouldNotCompute>(Op))
+    return Op;
+  if (isa<SCEVAddExpr>(Op))
+    return Op;
+  if (isa<SCEVCastExpr>(Op))
+    return peelCastExpr(cast<SCEVCastExpr>(Op), SE);
+  return SE->getCouldNotCompute();
+}
+
+static Value *peelExt(Value *V) {
+  if (isa<ZExtInst>(V) || isa<SExtInst>(V))
+    return peelExt(cast<Instruction>(V)->getOperand(0));
+  return V;
+}
+
+static bool isIndexInductionVariable(PHINode *InductionVar, Value *Index) {
+  if (InductionVar == Index)
+    return true;
+  if (peelExt(Index) == InductionVar)
+    return true;
+  return false;
+}
+
+/// Check whether the index can wrap and if we can still infer max trip count
+/// given the max trip count inferred from memory access.
+static const SCEV *checkIndexRange(Value *Ptr, PHINode *InductionVar,
+                                   ScalarEvolution *SE,
+                                   const SCEVConstant *MaxExecCount) {
+  SmallVector<const SCEV *> InferCountColl;
+  auto *PtrGEP = dyn_cast<GetElementPtrInst>(Ptr);
+  if (!PtrGEP)
+    return SE->getCouldNotCompute();
+  for (Value *Index : PtrGEP->indices()) {
+    Value *V = Index;
+    if (!isIndexInductionVariable(InductionVar, Index))
+      continue;
+    if (isa<ZExtInst>(V) || isa<SExtInst>(V))
+      V = cast<Instruction>(Index)->getOperand(0);
+    auto *SCEV = SE->getSCEV(V);
+    if (isa<SCEVCouldNotCompute>(SCEV))
+      return SE->getCouldNotCompute();
+    if (isa<SCEVCastExpr>(SCEV)) {
+      SCEV = peelCastExpr(cast<SCEVCastExpr>(SCEV), SE);
+      if (isa<SCEVCouldNotCompute>(SCEV))
+        return SE->getCouldNotCompute();
+    }
+    auto *AddRec = dyn_cast<SCEVAddRecExpr>(SCEV);
+    if (!AddRec)
+      return SE->getCouldNotCompute();
+    auto *IndexRange = getIndexRange(AddRec, SE);
+    auto *IndexRangeC = dyn_cast<SCEVConstant>(IndexRange);
+    if (!IndexRangeC)
+      return SE->getCouldNotCompute();
+    InferCountColl.push_back(IndexRange);
+    break;
+  }
+
+  if (InferCountColl.empty())
+    return SE->getCouldNotCompute();
+
+  InferCountColl.push_back(MaxExecCount);
+
+  return SE->getUMinFromMismatchedTypes(InferCountColl);
+}
+
+ScalarEvolution::ExitLimit
+ScalarEvolution::computeExitLimitFromMemAccessImpl(const Loop *L) {
+  SmallVector<Instruction *, 4> MemInsts;
+  collectExecLoadStoreInsideLoop(L, DT, MemInsts);
+
+  SmallVector<const SCEV *> InferCountColl;
+  const DataLayout &DL = getDataLayout();
+
+  for (Instruction *I : MemInsts) {
+    Value *Ptr = getLoadStorePointerOperand(I);
+    assert(Ptr && "empty pointer operand");
+    auto *AddRec = dyn_cast<SCEVAddRecExpr>(getSCEV(Ptr));
+    if (!AddRec || !AddRec->isAffine())
+      continue;
+    const SCEV *PtrBase = getPointerBase(AddRec);
+    const SCEV *Step = AddRec->getStepRecurrence(*this);
+    const SCEV *MemSize =
+        getCertainSizeOfMem(PtrBase, Step->getType(), DL, TLI, this);
+    if (!MemSize)
+      continue;
+    if (isKnownNegative(Step))
+      Step = getNegativeSCEV(Step);
+    // Now we can infer a max execution time by MemLength/StepLength.
+    auto *MaxExecCount = dyn_cast<SCEVConstant>(getUDivCeilSCEV(MemSize, Step));
+    if (!MaxExecCount || MaxExecCount->getAPInt().getActiveBits() > 32)
+      continue;
+    auto *Res = checkIndexRange(Ptr, L->getInductionVariable(*this), this,
+                                MaxExecCount);
+    if (isa<SCEVCouldNotCompute>(Res))
+      continue;
+    InferCountColl.push_back(Res);
+  }
+
+  if (InferCountColl.empty())
+    return getCouldNotCompute();
+
+  const SCEV *Count = getUMinFromMismatchedTypes(InferCountColl);
+
+  return {getCouldNotCompute(), Count, Count, /*MaxOrZero=*/false};
+}
+
+ScalarEvolution::ExitLimit
+ScalarEvolution::computeExitLimitFromMemAccessCached(ExitLimitCacheTy &Cache,
+                                                     const Loop *L) {
+  // We don't really need them but the cache does.
+  constexpr Value *ExitCond = nullptr;
+  constexpr const bool ExitIfTrue = true;
+  constexpr const bool ControlsOnlyExit = true;
+  constexpr const bool AllowPredicates = true;
+
+  if (auto MaybeEL = Cache.find(L, ExitCond, ExitIfTrue, ControlsOnlyExit,
+                                AllowPredicates))
+    return *MaybeEL;
+
+  ExitLimit EL = computeExitLimitFromMemAccessImpl(L);
+  Cache.insert(L, ExitCond, ExitIfTrue, ControlsOnlyExit, AllowPredicates, EL);
+  return EL;
+}
+
 const SCEV *ScalarEvolution::getExitCount(const Loop *L,
                                           const BasicBlock *ExitingBlock,
                                           ExitCountKind Kind) {
@@ -8835,6 +9047,16 @@ ScalarEvolution::computeExitLimit(const Loop *L, BasicBlock *ExitingBlock,
   if (!Latch || !DT.dominates(ExitingBlock, Latch))
     return getCouldNotCompute();
 
+  // FIXME: To make the case more typical, we only analyze loops that have one
+  // exiting block and the block must be the latch. It is easier to capture
+  // loops with memory access that will be executed in every iteration.
+  const SCEV *PotentiallyBetterConstantMax = getCouldNotCompute();
+  if (UseMemoryAccessUBForBEInference && Latch == L->getExitingBlock()) {
+    assert(Latch == ExitingBlock);
+    auto EL = computeExitLimitFromMemAccess(L);
+    PotentiallyBetterConstantMax = EL.ConstantMaxNotTaken;
+  }
+
   bool IsOnlyExit = (L->getExitingBlock() != nullptr);
   Instruction *Term = ExitingBlock->getTerminator();
   if (BranchInst *BI = dyn_cast<BranchInst>(Term)) {
@@ -8843,9 +9065,14 @@ ScalarEvolution::computeExitLimit(const Loop *L, BasicBlock *ExitingBlock,
     assert(ExitIfTrue == L->contains(BI->getSuccessor(1)) &&
            "It should have one successor in loop and one exit block!");
     // Proceed to the next level to examine the exit condition expression.
-    return computeExitLimitFromCond(L, BI->getCondition(), ExitIfTrue,
-                                    /*ControlsOnlyExit=*/IsOnlyExit,
-                                    AllowPredicates);
+    ExitLimit EL = computeExitLimitFromCond(L, BI->getCondition(), ExitIfTrue,
+                                            /*ControlsOnlyExit=*/IsOnlyExit,
+                                            AllowPredicates);
+    if (!isa<SCEVCouldNotCompute>(EL.ConstantMaxNotTaken) &&
+        !isa<SCEVCouldNotCompute>(PotentiallyBetterConstantMax))
+      EL.ConstantMaxNotTaken = getUMinFromMismatchedTypes(
+          EL.ConstantMaxNotTaken, PotentiallyBetterConstantMax);
+    return EL;
   }
 
   if (SwitchInst *SI = dyn_cast<SwitchInst>(Term)) {
@@ -8858,9 +9085,14 @@ ScalarEvolution::computeExitLimit(const Loop *L, BasicBlock *ExitingBlock,
         Exit = SBB;
       }
     assert(Exit && "Exiting block must have at least one exit");
-    return computeExitLimitFromSingleExitSwitch(
-        L, SI, Exit,
-        /*ControlsOnlyExit=*/IsOnlyExit);
+    ExitLimit EL =
+        computeExitLimitFromSingleExitSwitch(L, SI, Exit,
+                                             /*ControlsOnlyExit=*/IsOnlyExit);
+    if (!isa<SCEVCouldNotCompute>(EL.ConstantMaxNotTaken) &&
+        !isa<SCEVCouldNotCompute>(PotentiallyBetterConstantMax))
+      EL.ConstantMaxNotTaken = getUMinFromMismatchedTypes(
+          EL.ConstantMaxNotTaken, PotentiallyBetterConstantMax);
+    return EL;
   }
 
   return getCouldNotCompute();
@@ -8874,6 +9106,16 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCond(
                                         ControlsOnlyExit, AllowPredicates);
 }
 
+ScalarEvolution::ExitLimit
+ScalarEvolution::computeExitLimitFromMemAccess(const Loop *L) {
+  if (!DebugCounter::shouldExecute(MemoryAccessCounter))
+    return {getCouldNotCompute(), getCouldNotCompute(), getCouldNotCompute(),
+            /*MaxOrZero=*/false};
+  ScalarEvolution::ExitLimitCacheTy Cache(L, /* ExitIfTrue */ true,
+                                          /* AllowPredicates */ true);
+  return computeExitLimitFromMemAccessCached(Cache, L);
+}
+
 std::optional<ScalarEvolution::ExitLimit>
 ScalarEvolution::ExitLimitCache::find(const Loop *L, Value *ExitCond,
                                       bool ExitIfTrue, bool ControlsOnlyExit,
@@ -13544,6 +13786,17 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
     OS << ": ";
     OS << "Trip multiple is " << SE->getSmallConstantTripMultiple(L) << "\n";
   }
+
+  if (UseMemoryAccessUBForBEInference) {
+    unsigned SmallMaxTrip = SE->getSmallConstantMaxTripCount(L);
+    OS << "Loop ";
+    L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
+    OS << ": ";
+    if (SmallMaxTrip)
+      OS << "Small constant max trip is " << SmallMaxTrip << "\n";
+    else
+      OS << "Small constant max trip couldn't be computed.\n";
+  }
 }
 
 namespace llvm {
diff --git a/llvm/test/Analysis/ScalarEvolution/infer-trip-count-idx-wrap.ll b/llvm/test/Analysis/ScalarEvolution/infer-trip-count-idx-wrap.ll
new file mode 100644
index 00000000000000..2971cbd50d94a8
--- /dev/null
+++ b/llvm/test/Analysis/ScalarEvolution/infer-trip-count-idx-wrap.ll
@@ -0,0 +1,110 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" -scalar-evolution-classify-expressions=0 -scalar-evolution-infer-max-trip-count-from-memory-access 2>&1 | FileCheck %s
+
+define void @ComputeMaxTripCountFromArrayIdxWrap(i32 signext %len) {
+; CHECK-LABEL: 'ComputeMaxTripCountFromArrayIdxWrap'
+; CHECK-NEXT:  Determining loop execution counts for: @ComputeMaxTripCountFromArrayIdxWrap
+; CHECK-NEXT:  Loop %for.body: backedge-taken count is (-1 + %len)
+; CHECK-NEXT:  Loop %for.body: constant max backedge-taken count is 255
+; CHECK-NEXT:  Loop %for.body: symbolic max backedge-taken count is (-1 + %len)
+; CHECK-NEXT:  Loop %for.body: Predicated backedge-taken count is (-1 + %len)
+; CHECK-NEXT:   Predicates:
+; CHECK-NEXT:  Loop %for.body: Trip multiple is 1
+; CHECK-NEXT:  Loop %for.body: Small constant max trip is 256
+;
+entry:
+  %a = alloca [256 x i32], align 4
+  %cmp4 = icmp sgt i32 %len, 0
+  br i1 %cmp4, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+  br label %for.body
+
+for.cond.cleanup.loopexit:
+  br label %for.cond.cleanup
+
+for.cond.cleanup:
+  ret void
+
+for.body:
+  %iv = phi i8 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %idxprom = zext i8 %iv to i64
+  %arrayidx = getelementptr inbounds [256 x i32], [256 x i32]* %a, i64 0, i64 %idxprom
+  store i32 0, i32* %arrayidx, align 4
+  %inc = add nuw i8 %iv, 1
+  %inc_zext = zext i8 %inc to i32
+  %cmp = icmp slt i32 %inc_zext, %len
+  br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
+}
+
+define void @ComputeMaxTripCountFromArrayIdxWrap2(i32 signext %len) {
+; CHECK-LABEL: 'ComputeMaxTripCountFromArrayIdxWrap2'
+; CHECK-NEXT:  Determining loop execution counts for: @ComputeMaxTripCountFromArrayIdxWrap2
+; CHECK-NEXT:  Loop %for.body: backedge-taken count is (-1 + %len)
+; CHECK-NEXT:  Loop %for.body: constant max backedge-taken count is 127
+; CHECK-NEXT:  Loop %for.body: symbolic max backedge-taken count is (-1 + %len)
+; CHECK-NEXT:  Loop %for.body: Predicated backedge-taken count is (-1 + %len)
+; CHECK-NEXT:   Predicates:
+; CHECK-NEXT:  Loop %for.body: Trip multiple is 1
+; CHECK-NEXT:  Loop %for.body: Small constant max trip is 128
+;
+entry:
+  %a = alloca [127 x i32], align 4
+  %cmp4 = icmp sgt i32 %len, 0
+  br i1 %cmp4, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+  br label %for.body
+
+for.cond.cleanup.loopexit:
+  br label %for.cond.cleanup
+
+for.cond.cleanup:
+  ret void
+
+for.body:
+  %iv = phi i8 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %idxprom = zext i8 %iv to i64
+  %arrayidx = getelementptr inbounds [127 x i32], [127 x i32]* %a, i64 0, i64 %idxprom
+  store i32 0, i32* %arrayidx, align 4
+  %inc = add nuw i8 %iv, 1
+  %inc_zext = zext i8 %inc to i32
+  %cmp = icmp slt i32 %inc_zext, %len
+  br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
+}
+
+define void @ComputeMaxTripCountFromArrayIdxWrap3(i32 signext %len) {
+; CHECK-LABEL: 'ComputeMaxTripCountFromArrayIdxWrap3'
+; CHECK-NEXT:  Determining loop execution counts for: @ComputeMaxTripCountFromArrayIdxWrap3
+; CHECK-NEXT:  Loop %for.body: backedge-taken count is (-1 + %len)
+; CHECK-NEXT:  Loop %for.body: constant max backedge-taken count is 20
+; CHECK-NEXT:  Loop %for.body: symbolic max backedge-taken count is (-1 + %len)
+; CHECK-NEXT:  Loop %for.body: Predicated backedge-taken count is (-1 + %len)
+; CHECK-NEXT:   Predicates:
+; CHECK-NEXT:  Loop %for.body: Trip multiple is 1
+; CHECK-NEXT:  Loop %for.body: Small constant max trip is 21
+;
+entry:
+  %a = alloca [20 x i32], align 4
+  %cmp4 = icmp sgt i32 %len, 0
+  br i1 %cmp4, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+  br label %for.body
+
+for.cond.cleanup.loopexit:
+  br label %for.cond.cleanup
+
+for.cond.cleanup:
+  ret void
+
+for.body:
+  %iv = phi i8 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %idxprom = zext i8 %iv to i64
+  %arrayidx = getelementptr inbounds [20 x i32], [20 x i32]* %a, i64 0, i64 %idxprom
+  store i32 0, i32* %arrayidx, align 4
+  %inc = add nuw nsw i8 %iv, 1
+  %inc_zext = zext i8 %inc to i32
+  %cmp = icmp slt i32 %inc_zext, %len
+  br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
+}
diff --git a/llvm/test/Analysis/ScalarEvolution/infer-trip-count.ll b/llvm/test/Analysis/ScalarEvolution/infer-trip-count.ll
new file mode 100644
index 00000000000000..7c52385adae754
--- /dev/null
+++ b/llvm/test/Analysis/ScalarEvolution/infer-trip-count.ll
@@ -0,0 +1,191 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" -scalar-evolution-classify-expressions=0 -scalar-evolution-infer-max-trip-count-from-memory-access 2>&1 | FileCheck %s
+
+define void @ComputeMaxTripCountFromArrayNormal(i32 signext %len) {
+; CHECK-LABEL: 'ComputeMaxTripCountFromArrayNormal'
+; CHECK-NEXT:  Determining loop execution counts for: @ComputeMaxTripCountFromArrayNormal
+; CHECK-NEXT:  Loop %for.body: backedge-taken count is (-1 + %len)
+; CHECK-NEXT:  Loop %for.body: constant max backedge-taken count is 7
+; CHECK-NEXT:  Loop %for.body: symbolic max backedge-taken count is (-1 + %len)
+; CHECK-NEXT:  Loop %for.body: Predicated backedge-taken count is (-1 + %len)
+; CHECK-NEXT:   Predicates:
+; CHECK-NEXT:  Loop %for.body: Trip multiple is 1
+; CHECK-NEXT:  Loop %for.body: Small constant max trip is 8
+;
+entry:
+  %a = alloca [7 x i32], align 4
+  %cmp4 = icmp sgt i32 %len, 0
+  br i1 %cmp4, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+  br label %for.body
+
+for.cond.cleanup.loopexit:
+  br label %for.cond.cleanup
+
+for.cond.cleanup:
+  ret void
+
+for.body:
+  %iv = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %idxprom = zext i32 %iv to i64
+  %arrayidx = getelementptr inbounds [7 x i32], [7 x i32]* %a, i64 0, i64 %idxprom
+  store i32 0, i32* %arrayidx, align 4
+  %inc = add nuw nsw i32 %iv, 1
+  %cmp = icmp slt i32 %inc, %len
+  br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
+}
+
+
+define void @ComputeMaxTripCountFromZeroArray(i32 signext %len) {
+; CHECK-LABEL: 'ComputeMaxTripCountFromZeroArray'
+; CHECK-NEXT:  Determining loop execution counts for: @ComputeMaxTripCountFromZeroArray
+; CHECK-NEXT:  Loop %for.body: backedge-taken count is (-1 + %len)
+; CHECK-NEXT:  Loop %for.body: constant max backedge-taken count is 0
+; CHECK-NEXT:  Loop %for.body: symbolic max backedge-taken count is (-1 + %len)
+; CHECK-NEXT:  Loop %for.body: Predicated backedge-taken count is (-1 + %len)
+; CHECK-NEXT:   Predicates:
+; CHECK-NEXT:  Loop %for.body: Trip multiple is 1
+; CHECK-NEXT:  Loop %for.body: Small constant max trip is 1
+;
+entry:
+  %a = alloca [0 x i32], align 4
+  %cmp4 = icmp sgt i32 %len, 0
+  br i1 %cmp4, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+  br label %for.body
+
+for.cond.cleanup.loopexit:
+  br label %for.cond.cleanup
+
+for.cond.cleanup:
+  ret void
+
+for.body:
+  %iv = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %idxprom = zext i32 %iv to i64
+  %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* %a, i64 0, i64 %idxprom
+  store i32 0, i32* %arrayidx, align 4
+  %inc = add nuw nsw i32 %iv, 1
+  %cmp = icmp slt i32 %inc, %len
+  br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
+}
+
+define void @ComputeMaxTripCountFromExtremArray(i32 signext %len) {
+; CHECK-LABEL: 'ComputeMaxTripCountFromExtremArray'
+; CHECK-NEXT:  Determining loop execution counts for: @ComputeMaxTripCountFromExtremArray
+; CHECK-NEXT:  Loop %for.body: backedge-taken count is (-1 + %len)
+; CHECK-NEXT:  Loop %for.body: constant max backedge-taken count is 2147483646
+; CHECK-NEXT:  Loop %for.body: symbolic max backedge-taken count is (-1 + %len)
+; CHECK-NEXT:  Loop %for.body: Predicated backedge-taken count is (-1 + %len)
+; CHECK-NEXT:   Predicates:
+; CHECK-NEXT:  Loop %for.body: Trip multiple is 1
+; CHECK-NEXT:  Loop %for.body: Small constant max trip is 2147483647
+;
+entry:
+  %a = alloca [4294967295 x i1], align 4
+  %cmp4 = icmp sgt i32 %len, 0
+  br i1 %cmp4, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+  br label %for.body
+
+for.cond.cleanup.loopexit:
+  br label %for.cond.cleanup
+
+for.cond.cleanup:
+  ret void
+
+for.body:
+  %iv = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %idxprom = zext i32 %iv to i64
+  %arrayidx = getelementptr inbounds [4294967295 x i1], [4294967295 x i1]* %a, i64 0, i64 %idxprom
+  store i1 0, i1* %arrayidx, align 4
+  %inc = add nuw nsw i32 %iv, 1
+  %cmp = icmp slt i32 %inc, %len
+  br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
+}
+
+
+define void @ComputeMaxTripCountFromArrayInBranch(i32 signext %len) {
+; CHECK-LABEL: 'ComputeMaxTripCountFromArrayInBranch'
+; CHECK-NEXT:  Determining loop execution counts for: @ComputeMaxTripCountFromArrayInBranch
+; CHECK-NEXT:  Loop %for.cond: backedge-taken count is (0 smax %len)
+; CHECK-NEXT:  Loop %for.cond: constant max backedge-taken count is 2147483647
+; CHECK-NEXT:  Loop %for.cond: symbolic max backedge-taken count is (0 smax %len)
+; CHECK-NEXT:  Loop %for.cond: Predicated backedge-taken count is (0 smax %len)
+; CHECK-NEXT:   Predicates:
+; CHECK-NEXT:  Loop %for.cond: Trip multiple is 1
+; CHECK-NEXT:  Loop %for.cond: Small constant max trip is 2147483648
+;
+entry:
+  %a = alloca [8 x i32], align 4
+  br label %for.cond
+
+for.cond:
+  %iv = phi i32 [ %inc, %for.inc ], [ 0, %entry ]
+  %cmp = icmp slt i32 %iv, %len
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+  br label %for.end
+
+for.body:
+  %cmp1 = icmp slt i32 %iv, 8
+  br i1 %cmp1, label %if.then, label %if.end
+
+if.then:
+  %idxprom = sext i32 %iv to i64
+  %arrayidx = getelementptr inbounds [8 x i32], [8 x i32]* %a, i64 0, i64 %idxprom
+  store i32 0, i32* %arrayidx, align 4
+  br label %if.end
+
+if.end:
+  br label %for.inc
+
+for.inc:
+  %inc = add nsw i32 %iv, 1
+  br label %for.cond
+
+for.end:
+  ret void
+}
+
+define void @ComputeMaxTripCountFromMultiDimArray(i32 signext %len) {
+; CHECK-LABEL: 'ComputeMaxTripCountFromMultiDimArray'
+; CHECK-NEXT:  Determining loop execution counts for: @ComputeMaxTripCountFromMultiDimArray
+; CHECK-NEXT:  Loop %for.cond: backedge-taken count is (0 smax %len)
+; CHECK-NEXT:  Loop %for.cond: constant max backedge-taken count is 2147483647
+; CHECK-NEXT:  Loop %for.cond: symbolic max backedge-taken count is (0 smax %len)
+; CHECK-NEXT:  Loop %for.cond: Predicated backedge-taken count is (0 smax %len)
+; CHECK-NEXT:   Predicates:
+; CHECK-NEXT:  Loop %for.cond: Trip multiple is 1
+; CHECK-NEXT:  Loop %for.cond: Small constant max trip is 2147483648
+;
+entry:
+  %a = alloca [3 x [5 x i32]], align 4
+  br label %for.cond
+
+for.cond:
+  %iv = phi i32 [ %inc, %for.inc ], [ 0, %entry ]
+  %cmp = icmp slt i32 %iv, %len
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+  br label %for.end
+
+for.body:
+  %arrayidx = getelementptr inbounds [3 x [5 x i32]], [3 x [5 x i32]]* %a, i64 0, i64 3
+  %idxprom = sext i32 %iv to i64
+  %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %arrayidx, i64 0, i64 %idxprom
+  store i32 0, i32* %arrayidx1, align 4
+  br label %for.inc
+
+for.inc:
+  %inc = add nsw i32 %iv, 1
+  br label %for.cond
+
+for.end:
+  ret void
+}