[llvm] [LoopUnroll] Enhance the use of Optimization Remarks and `LLVM_DEBUG` (PR #178476)

Thu Apr 2 17:46:46 PDT 2026

https://github.com/justinfargnoli updated https://github.com/llvm/llvm-project/pull/178476

>From 8b818d41bf654aa35bdbfc06b6c1c8083951da1b Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Tue, 31 Mar 2026 22:04:29 +0000
Subject: [PATCH 1/2] [LoopUnroll] Enhance the use of Optimization Remarks and
 LLVM_DEBUG

---
 .../llvm/Transforms/Utils/UnrollLoop.h        |    6 +-
 llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp |  218 ++-
 llvm/lib/Transforms/Utils/LoopUnroll.cpp      |   17 +-
 .../LoopUnroll/debug-and-remarks.ll           | 1533 +++++++++++++++++
 llvm/test/Transforms/LoopUnroll/debug.ll      |    7 +
 5 files changed, 1740 insertions(+), 41 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopUnroll/debug-and-remarks.ll

diff --git a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
index 6171f3391cb2d..4245441101dc1 100644
--- a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
+++ b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
@@ -160,8 +160,10 @@ class UnrollCostEstimator {
                                const SmallPtrSetImpl<const Value *> &EphValues,
                                unsigned BEInsns);
 
-  /// Whether it is legal to unroll this loop.
-  LLVM_ABI bool canUnroll() const;
+  /// Whether it is legal to unroll this loop. If \p ORE and \p L are provided,
+  /// emit an optimization remark on failure.
+  LLVM_ABI bool canUnroll(OptimizationRemarkEmitter *ORE = nullptr,
+                          const Loop *L = nullptr) const;
 
   uint64_t getRolledLoopSize() const { return LoopSize.getValue(); }
 
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 9fdf7ef1b0a86..444504b365038 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -711,20 +711,28 @@ UnrollCostEstimator::UnrollCostEstimator(
     LoopSize = BEInsns + 1;
 }
 
-bool UnrollCostEstimator::canUnroll() const {
+bool UnrollCostEstimator::canUnroll(OptimizationRemarkEmitter *ORE,
+                                    const Loop *L) const {
+  auto ReportCannotUnroll = [&](StringRef Reason) {
+    LLVM_DEBUG(dbgs().indent(1) << "Not unrolling: " << Reason << ".\n");
+    if (ORE && L)
+      ORE->emit([&]() {
+        return OptimizationRemarkMissed(DEBUG_TYPE, "CannotUnrollLoop",
+                                        L->getStartLoc(), L->getHeader())
+               << "unable to unroll loop: " << Reason;
+      });
+  };
+
   if (Convergence == ConvergenceKind::ExtendedLoop) {
-    LLVM_DEBUG(dbgs().indent(1)
-               << "Not unrolling: contains convergent operations.\n");
+    ReportCannotUnroll("contains convergent operations");
     return false;
   }
   if (!LoopSize.isValid()) {
-    LLVM_DEBUG(dbgs().indent(1)
-               << "Not unrolling: loop size could not be computed.\n");
+    ReportCannotUnroll("loop size could not be computed");
     return false;
   }
   if (NotDuplicatable) {
-    LLVM_DEBUG(dbgs().indent(1)
-               << "Not unrolling: contains non-duplicatable instructions.\n");
+    ReportCannotUnroll("contains non-duplicatable instructions");
     return false;
   }
   return true;
@@ -802,7 +810,8 @@ static std::optional<unsigned>
 shouldPragmaUnroll(Loop *L, const UnrollPragmaInfo &PInfo,
                    const unsigned TripMultiple, const unsigned TripCount,
                    unsigned MaxTripCount, const UnrollCostEstimator UCE,
-                   const TargetTransformInfo::UnrollingPreferences &UP) {
+                   const TargetTransformInfo::UnrollingPreferences &UP,
+                   OptimizationRemarkEmitter *ORE) {
 
   // Using unroll pragma
   // 1st priority is unroll count set by "unroll-count" option.
@@ -832,6 +841,15 @@ shouldPragmaUnroll(Loop *L, const UnrollPragmaInfo &PInfo,
                << "Not unrolling with pragma count " << PInfo.PragmaCount
                << ": remainder not allowed, count does not divide trip "
                << "multiple " << TripMultiple << ".\n");
+    ORE->emit([&]() {
+      return OptimizationRemarkMissed(DEBUG_TYPE, "PragmaUnrollCountRejected",
+                                      L->getStartLoc(), L->getHeader())
+             << "unable to unroll loop with count "
+             << ore::NV("PragmaCount", PInfo.PragmaCount)
+             << ": remainder loop is restricted and count does not divide "
+                "trip multiple "
+             << ore::NV("TripMultiple", TripMultiple);
+    });
   }
 
   if (PInfo.PragmaFullUnroll) {
@@ -842,6 +860,14 @@ shouldPragmaUnroll(Loop *L, const UnrollPragmaInfo &PInfo,
       if (TripCount > PragmaUnrollFullMaxIterations) {
         LLVM_DEBUG(dbgs().indent(2)
                    << "Won't unroll; trip count is too large.\n");
+        ORE->emit([&]() {
+          return OptimizationRemarkMissed(DEBUG_TYPE,
+                                          "PragmaFullUnrollTripCountTooLarge",
+                                          L->getStartLoc(), L->getHeader())
+                 << "unable to fully unroll loop: trip count "
+                 << ore::NV("TripCount", TripCount) << " exceeds limit "
+                 << ore::NV("Limit", PragmaUnrollFullMaxIterations);
+        });
         return std::nullopt;
       }
 
@@ -849,6 +875,8 @@ shouldPragmaUnroll(Loop *L, const UnrollPragmaInfo &PInfo,
                  << "Fully unrolling with trip count: " << TripCount << ".\n");
       return TripCount;
     }
+    // Note: ORE for unknown trip count is emitted later in computeUnrollCount
+    // after we've exhausted all strategies.
     LLVM_DEBUG(dbgs().indent(2)
                << "Not fully unrolling: unknown trip count.\n");
   }
@@ -867,13 +895,26 @@ static std::optional<unsigned> shouldFullUnroll(
     Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT,
     ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues,
     const unsigned FullUnrollTripCount, const UnrollCostEstimator UCE,
-    const TargetTransformInfo::UnrollingPreferences &UP) {
+    const TargetTransformInfo::UnrollingPreferences &UP,
+    OptimizationRemarkEmitter *ORE) {
   assert(FullUnrollTripCount && "should be non-zero!");
 
+  const bool PragmaFullUnroll = hasUnrollFullPragma(L);
+
   if (FullUnrollTripCount > UP.FullUnrollMaxCount) {
     LLVM_DEBUG(dbgs().indent(2)
                << "Not unrolling: trip count " << FullUnrollTripCount
                << " exceeds max count " << UP.FullUnrollMaxCount << ".\n");
+    if (PragmaFullUnroll)
+      ORE->emit([&]() {
+        return OptimizationRemarkMissed(DEBUG_TYPE,
+                                        "FullUnrollTripCountTooLarge",
+                                        L->getStartLoc(), L->getHeader())
+               << "unable to fully unroll loop: trip count "
+               << ore::NV("TripCount", FullUnrollTripCount)
+               << " exceeds maximum full unroll count "
+               << ore::NV("MaxFullUnrollCount", UP.FullUnrollMaxCount);
+      });
     return std::nullopt;
   }
 
@@ -907,6 +948,25 @@ static std::optional<unsigned> shouldFullUnroll(
     LLVM_DEBUG(dbgs().indent(2)
                << "Not unrolling: cost " << Cost->UnrolledCost
                << " >= boosted threshold " << BoostedThreshold << ".\n");
+    if (PragmaFullUnroll)
+      ORE->emit([&]() {
+        return OptimizationRemarkMissed(DEBUG_TYPE, "FullUnrollNotProfitable",
+                                        L->getStartLoc(), L->getHeader())
+               << "unable to fully unroll loop: estimated unrolled cost "
+               << ore::NV("UnrolledCost", Cost->UnrolledCost)
+               << " exceeds boosted threshold "
+               << ore::NV("BoostedThreshold", BoostedThreshold);
+      });
+  } else {
+    LLVM_DEBUG(dbgs().indent(2) << "Skipping: cost analysis unavailable.\n");
+    if (PragmaFullUnroll)
+      ORE->emit([&]() {
+        return OptimizationRemarkMissed(DEBUG_TYPE, "FullUnrollSizeTooLarge",
+                                        L->getStartLoc(), L->getHeader())
+               << "unable to fully unroll loop: estimated unrolled size "
+               << ore::NV("UnrolledSize", UnrolledSize) << " exceeds threshold "
+               << ore::NV("Threshold", UP.Threshold);
+      });
   }
 
   return std::nullopt;
@@ -1031,7 +1091,7 @@ void llvm::computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
   // 2nd priority is unroll count set by pragma.
   LLVM_DEBUG(dbgs().indent(1) << "Trying pragma unroll...\n");
   if (auto UnrollFactor = shouldPragmaUnroll(L, PInfo, TripMultiple, TripCount,
-                                             MaxTripCount, UCE, UP)) {
+                                             MaxTripCount, UCE, UP, ORE)) {
     UP.Count = *UnrollFactor;
 
     if (PInfo.UserUnrollCount || (PInfo.PragmaCount > 0)) {
@@ -1058,7 +1118,7 @@ void llvm::computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
   if (TripCount) {
     UP.Count = TripCount;
     if (auto UnrollFactor = shouldFullUnroll(L, TTI, DT, SE, EphValues,
-                                             TripCount, UCE, UP)) {
+                                             TripCount, UCE, UP, ORE)) {
       UP.Count = *UnrollFactor;
       return;
     }
@@ -1081,7 +1141,7 @@ void llvm::computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
       MaxTripCount <= UP.MaxUpperBound) {
     UP.Count = MaxTripCount;
     if (auto UnrollFactor = shouldFullUnroll(L, TTI, DT, SE, EphValues,
-                                             MaxTripCount, UCE, UP)) {
+                                             MaxTripCount, UCE, UP, ORE)) {
       UP.Count = *UnrollFactor;
       return;
     }
@@ -1109,34 +1169,39 @@ void llvm::computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
   if (auto UnrollFactor = shouldPartialUnroll(LoopSize, TripCount, UCE, UP)) {
     UP.Count = *UnrollFactor;
 
-    if ((PInfo.PragmaFullUnroll || PInfo.PragmaEnableUnroll) && TripCount &&
-        UP.Count != TripCount)
+    if (UP.Count) {
+      if (PInfo.PragmaFullUnroll && UP.Count != TripCount) {
+        LLVM_DEBUG(dbgs().indent(1)
+                   << "Partial unroll instead of full: unrolled size "
+                      "too large. Unrolling "
+                   << UP.Count << " times instead of " << TripCount << ".\n");
+        ORE->emit([&]() {
+          return OptimizationRemarkMissed(DEBUG_TYPE,
+                                          "FullUnrollAsDirectedTooLarge",
+                                          L->getStartLoc(), L->getHeader())
+                 << "unable to fully unroll loop as directed by full unroll "
+                    "pragma because unrolled size is too large";
+        });
+      }
+    } else if (PInfo.PragmaFullUnroll || PInfo.PragmaEnableUnroll) {
+      LLVM_DEBUG(dbgs().indent(1)
+                 << "Not unrolling as directed: unrolled size too large.\n");
       ORE->emit([&]() {
-        return OptimizationRemarkMissed(DEBUG_TYPE,
-                                        "FullUnrollAsDirectedTooLarge",
+        return OptimizationRemarkMissed(DEBUG_TYPE, "UnrollAsDirectedTooLarge",
                                         L->getStartLoc(), L->getHeader())
                << "unable to fully unroll loop as directed by unroll metadata "
                   "because unrolled size is too large";
       });
-
-    if (UP.PartialThreshold != NoThreshold) {
-      if (UP.Count == 0) {
-        if (PInfo.PragmaEnableUnroll)
-          ORE->emit([&]() {
-            return OptimizationRemarkMissed(DEBUG_TYPE,
-                                            "UnrollAsDirectedTooLarge",
-                                            L->getStartLoc(), L->getHeader())
-                   << "unable to unroll loop as directed by "
-                      "llvm.loop.unroll.enable metadata because unrolled size "
-                      "is too large";
-          });
-      }
     }
+
     return;
   }
   assert(TripCount == 0 &&
          "All cases when TripCount is constant should be covered here.");
-  if (PInfo.PragmaFullUnroll)
+  if (PInfo.PragmaFullUnroll) {
+    LLVM_DEBUG(dbgs().indent(1)
+               << "Not fully unrolling as directed: loop has runtime "
+               << "trip count.\n");
     ORE->emit([&]() {
       return OptimizationRemarkMissed(
                  DEBUG_TYPE, "CantFullUnrollAsDirectedRuntimeTripCount",
@@ -1145,6 +1210,7 @@ void llvm::computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
                 "llvm.loop.unroll.full metadata because loop has a runtime "
                 "trip count";
     });
+  }
 
   // 7th priority is runtime unrolling.
   LLVM_DEBUG(dbgs().indent(1) << "Trying runtime unroll...\n");
@@ -1161,6 +1227,19 @@ void llvm::computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
     LLVM_DEBUG(dbgs().indent(2)
                << "Not runtime unrolling: max trip count " << MaxTripCount
                << " is small (< " << UP.MaxUpperBound << ") and not forced.\n");
+    // If user specified an unroll count but it was rejected earlier (e.g.,
+    // remainder not allowed or threshold exceeded), they've already been
+    // notified. Emit an additional remark since runtime unroll was their last
+    // chance.
+    if (PInfo.PragmaCount > 0) {
+      ORE->emit([&]() {
+        return OptimizationRemarkMissed(DEBUG_TYPE, "SmallMaxTripCount",
+                                        L->getStartLoc(), L->getHeader())
+               << "unable to runtime unroll loop: max trip count "
+               << ore::NV("MaxTripCount", MaxTripCount) << " is too small (< "
+               << ore::NV("MaxUpperBound", UP.MaxUpperBound) << ")";
+      });
+    }
     UP.Count = 0;
     return;
   }
@@ -1230,10 +1309,11 @@ void llvm::computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
   if (MaxTripCount && UP.Count > MaxTripCount)
     UP.Count = MaxTripCount;
 
-  LLVM_DEBUG(dbgs().indent(2)
-             << "Runtime unrolling with count: " << UP.Count << "\n");
   if (UP.Count < 2)
     UP.Count = 0;
+  else
+    LLVM_DEBUG(dbgs().indent(2)
+               << "Runtime unrolling with count: " << UP.Count << "\n");
   return;
 }
 
@@ -1257,7 +1337,23 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
                     << L->getHeader()->getParent()->getName() << "] Loop %"
                     << L->getHeader()->getName()
                     << " (depth=" << L->getLoopDepth() << ")\n");
+
   TransformationMode TM = hasUnrollTransformation(L);
+
+  if (TM & TM_ForcedByUser) {
+    if (const DebugLoc &StartLoc = L->getStartLoc()) {
+      if (const DILocation *InlinedAt = StartLoc->getInlinedAt()) {
+        ORE.emit([&]() {
+          return OptimizationRemarkAnalysis(DEBUG_TYPE, "InlinedLoop", StartLoc,
+                                            L->getHeader())
+                 << "loop is from inlined function; call site is at "
+                 << ore::NV("CallSiteFile", InlinedAt->getFilename()) << ":"
+                 << ore::NV("CallSiteLine", InlinedAt->getLine());
+        });
+      }
+    }
+  }
+
   if (TM & TM_Disable) {
     LLVM_DEBUG(dbgs().indent(1) << "Not unrolling: transformation disabled by "
                                 << "metadata.\n");
@@ -1291,6 +1387,13 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
   if (!L->isLoopSimplifyForm()) {
     LLVM_DEBUG(dbgs().indent(1)
                << "Not unrolling loop which is not in loop-simplify form.\n");
+    if (TM & TM_ForcedByUser) {
+      ORE.emit([&]() {
+        return OptimizationRemarkMissed(DEBUG_TYPE, "NotInLoopSimplifyForm",
+                                        L->getStartLoc(), L->getHeader())
+               << "unable to unroll loop: not in loop-simplify form";
+      });
+    }
     return LoopUnrollResult::Unmodified;
   }
 
@@ -1316,6 +1419,13 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
   if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0) &&
       !OptForSize) {
     LLVM_DEBUG(dbgs().indent(1) << "Not unrolling: all thresholds are zero.\n");
+    if (TM & TM_ForcedByUser) {
+      ORE.emit([&]() {
+        return OptimizationRemarkMissed(DEBUG_TYPE, "UnrollThresholdsZero",
+                                        L->getStartLoc(), L->getHeader())
+               << "unable to unroll loop: unroll threshold is zero";
+      });
+    }
     return LoopUnrollResult::Unmodified;
   }
 
@@ -1323,7 +1433,7 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
   CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
 
   UnrollCostEstimator UCE(L, TTI, EphValues, UP.BEInsns);
-  if (!UCE.canUnroll())
+  if (!UCE.canUnroll((TM & TM_ForcedByUser) ? &ORE : nullptr, L))
     return LoopUnrollResult::Unmodified;
 
   unsigned LoopSize = UCE.getRolledLoopSize();
@@ -1337,6 +1447,14 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
   if (UCE.NumInlineCandidates != 0) {
     LLVM_DEBUG(dbgs().indent(1)
                << "Not unrolling loop with inlinable calls.\n");
+    if (TM & TM_ForcedByUser) {
+      ORE.emit([&]() {
+        return OptimizationRemarkMissed(DEBUG_TYPE,
+                                        "InlineCandidatesPreventUnroll",
+                                        L->getStartLoc(), L->getHeader())
+               << "unable to unroll loop: contains inlinable calls";
+      });
+    }
     return LoopUnrollResult::Unmodified;
   }
 
@@ -1390,6 +1508,13 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
   if (!UP.Count) {
     LLVM_DEBUG(dbgs().indent(1)
                << "Not unrolling: no viable strategy found.\n");
+    if (UnrollPragmaInfo(L).ExplicitUnroll && (TM & TM_ForcedByUser)) {
+      ORE.emit([&]() {
+        return OptimizationRemarkMissed(DEBUG_TYPE, "NoUnrollStrategy",
+                                        L->getStartLoc(), L->getHeader())
+               << "unable to unroll loop: no viable unroll count found";
+      });
+    }
     return LoopUnrollResult::Unmodified;
   }
 
@@ -1436,6 +1561,33 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
   MDNode *OrigLoopID = L->getLoopID();
   UnrollPragmaInfo PInfo(L);
 
+  // Emit additional unrolling strategy context that difficult for UnrollLoop()
+  // to re-compute.
+  /* if (UseUpperBound) {
+    LLVM_DEBUG(dbgs() << "Attempting full unroll with upper bound trip count "
+                      << MaxTripCount << "\n");
+    ORE.emit([&]() {
+      return OptimizationRemarkAnalysis(DEBUG_TYPE, "AttemptUnrollUpperBound",
+                                        L->getStartLoc(), L->getHeader())
+             << "attempting full unroll using upper bound trip count "
+             << ore::NV("MaxTripCount", MaxTripCount);
+    });
+  } else*/
+  if (TripCount && TripCount > UP.Count && TripCount % UP.Count != 0) {
+    LLVM_DEBUG(dbgs() << "Attempting unroll by factor " << UP.Count
+                      << " with remainder loop (trip count " << TripCount
+                      << ")\n");
+    ORE.emit([&]() {
+      return OptimizationRemarkAnalysis(DEBUG_TYPE,
+                                        "AttemptUnrollWithRemainder",
+                                        L->getStartLoc(), L->getHeader())
+             << "attempting unroll by factor "
+             << ore::NV("UnrollCount", UP.Count)
+             << " with remainder loop (trip count "
+             << ore::NV("TripCount", TripCount) << ")";
+    });
+  }
+
   // Unroll the loop.
   Loop *RemainderLoop = nullptr;
   UnrollLoopOptions ULO;
diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index ab35d217f0d93..d11ae25aba3d0 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -620,11 +620,15 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
                << NV("UnrollCount", ULO.Count) << " iterations";
       });
   } else {
-    LLVM_DEBUG(dbgs() << "UNROLLING loop %" << Header->getName() << " by "
-                      << ULO.Count);
-    if (ULO.Runtime)
-      LLVM_DEBUG(dbgs() << " with run-time trip count");
-    LLVM_DEBUG(dbgs() << "!\n");
+    LLVM_DEBUG({
+      dbgs() << "UNROLLING loop %" << Header->getName() << " by " << ULO.Count;
+      if (ULO.Runtime) {
+        dbgs() << " with run-time trip count";
+        if (ULO.UnrollRemainder)
+          dbgs() << " (remainder unrolled)";
+      }
+      dbgs() << "!\n";
+    });
 
     if (ORE)
       ORE->emit([&]() {
@@ -632,7 +636,8 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
                                 L->getHeader());
         Diag << "unrolled loop by a factor of " << NV("UnrollCount", ULO.Count);
         if (ULO.Runtime)
-          Diag << " with run-time trip count";
+          Diag << " with run-time trip count"
+               << (ULO.UnrollRemainder ? " (remainder unrolled)" : "");
         return Diag;
       });
   }
diff --git a/llvm/test/Transforms/LoopUnroll/debug-and-remarks.ll b/llvm/test/Transforms/LoopUnroll/debug-and-remarks.ll
new file mode 100644
index 0000000000000..714d63d49bd10
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/debug-and-remarks.ll
@@ -0,0 +1,1533 @@
+; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -pass-remarks=loop-unroll \
+; RUN:     -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll < %s 2>&1 | FileCheck %s --match-full-lines --strict-whitespace
+; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -pass-remarks=loop-unroll \
+; RUN:     -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll -unroll-count=4 < %s 2>&1 \
+; RUN:     | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=PARTIAL-UNROLL
+; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -pass-remarks=loop-unroll \
+; RUN:     -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll -unroll-runtime < %s 2>&1 \
+; RUN:     | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=RUNTIME-UNROLL
+; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -pass-remarks=loop-unroll \
+; RUN:     -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll -unroll-allow-partial < %s 2>&1 \
+; RUN:     | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=PARTIAL-ALLOW
+; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -pass-remarks=loop-unroll \
+; RUN:     -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll -unroll-count=4 -unroll-allow-remainder=false < %s 2>&1 \
+; RUN:     | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=USER-COUNT-REJECT
+; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -pass-remarks=loop-unroll \
+; RUN:     -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll -unroll-allow-remainder=false < %s 2>&1 \
+; RUN:     | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=NO-REMAINDER
+; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -pass-remarks=loop-unroll \
+; RUN:     -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll -pragma-unroll-full-max-iterations=100 < %s 2>&1 \
+; RUN:     | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=PRAGMA-TC-TOO-LARGE
+; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -pass-remarks=loop-unroll \
+; RUN:     -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll -unroll-threshold=20 < %s 2>&1 \
+; RUN:     | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=COST-ANALYSIS
+; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -pass-remarks=loop-unroll \
+; RUN:     -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll -unroll-full-max-count=10 \
+; RUN:     -pragma-unroll-full-max-iterations=10 < %s 2>&1 \
+; RUN:     | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=MAX-COUNT-10
+; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -pass-remarks=loop-unroll \
+; RUN:     -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll -unroll-peel-count=2 < %s 2>&1 \
+; RUN:     | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=PEEL
+; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -pass-remarks=loop-unroll \
+; RUN:     -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll -unroll-threshold=0 -unroll-partial-threshold=0 < %s 2>&1 \
+; RUN:     | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=THRESHOLDS-ZERO
+; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -pass-remarks=loop-unroll \
+; RUN:     -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll -unroll-threshold=30 < %s 2>&1 \
+; RUN:     | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=NESTED-COST
+; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -pass-remarks=loop-unroll \
+; RUN:     -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll -unroll-allow-partial -unroll-partial-threshold=8 < %s 2>&1 \
+; RUN:     | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=NO-PROFIT
+; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -pass-remarks=loop-unroll \
+; RUN:     -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll -unroll-allow-partial -pragma-unroll-threshold=10 \
+; RUN:     -unroll-threshold=10 -unroll-partial-threshold=10 -pragma-unroll-full-max-iterations=10 < %s 2>&1 \
+; RUN:     | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=UNROLL-AS-DIRECTED-FAIL
+; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -pass-remarks=loop-unroll \
+; RUN:     -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll -unroll-count=8 -unroll-threshold=10 < %s 2>&1 \
+; RUN:     | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=USER-COUNT-EXCEED
+; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -pass-remarks=loop-unroll \
+; RUN:     -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll -unroll-threshold=1 -unroll-partial-threshold=1 < %s 2>&1 \
+; RUN:     | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=NO-STRATEGY
+; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -pass-remarks=loop-unroll \
+; RUN:     -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll -unroll-threshold=20 -pragma-unroll-threshold=20 < %s 2>&1 \
+; RUN:     | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=COST-NOT-PROFITABLE
+; RUN: opt -disable-output -O2 --disable-loop-unrolling -debug-only=loop-unroll -pass-remarks=loop-unroll \
+; RUN:     -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll < %s 2>&1 \
+; RUN:     | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=AUTO-DISABLED
+; RUN: opt -disable-output -passes='loop-unroll<upperbound>' -debug-only=loop-unroll -pass-remarks=loop-unroll \
+; RUN:     -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll < %s 2>&1 \
+; RUN:     | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=UPPER-BOUND-HEURISTIC
+; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -pass-remarks=loop-unroll \
+; RUN:     -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll -unroll-threshold=20 \
+; RUN:     -pragma-unroll-threshold=20 -pragma-unroll-full-max-iterations=8 < %s 2>&1 \
+; RUN:     | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=FULL-COST-NOT-PROFITABLE
+
+; REQUIRES: asserts
+
+; CHECK-LABEL:Loop Unroll: F[full_unroll_simple] Loop %for.body (depth=1)
+; CHECK-NEXT:Loop Size = 6
+; CHECK-NEXT: Computing unroll count: TripCount=8, MaxTripCount=0, TripMultiple=8
+; CHECK-NEXT: Trying pragma unroll...
+; CHECK-NEXT: Trying full unroll...
+; CHECK-NEXT:  Unrolling: size {{[0-9]+}} < threshold {{[0-9]+}}.
+; CHECK-NEXT:  Exiting block %for.body: TripCount=8, TripMultiple=0, BreakoutTrip=0
+; CHECK-NEXT:COMPLETELY UNROLLING loop %for.body with trip count 8!
+; CHECK-NEXT:remark: <unknown>:0:0: completely unrolled loop with 8 iterations
+
+define i32 @full_unroll_simple(ptr %A) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
+  %load = load i32, ptr %arrayidx
+  %add = add i32 %sum, %load
+  %inc = add i32 %i, 1
+  %cmp = icmp ult i32 %inc, 8
+  br i1 %cmp, label %for.body, label %exit
+
+exit:
+  ret i32 %add
+}
+
+; PARTIAL-UNROLL-LABEL:Loop Unroll: F[partial_unroll_user_count] Loop %for.body (depth=1)
+; PARTIAL-UNROLL-NEXT:Loop Size = 6
+; PARTIAL-UNROLL-NEXT: Computing unroll count: TripCount=16, MaxTripCount=0, TripMultiple=16
+; PARTIAL-UNROLL-NEXT: Explicit unroll requested: user-count
+; PARTIAL-UNROLL-NEXT: Trying pragma unroll...
+; PARTIAL-UNROLL-NEXT:  Unrolling with user-specified count: 4.
+; PARTIAL-UNROLL-NEXT:  Exiting block %for.body: TripCount=16, TripMultiple=0, BreakoutTrip=0
+; PARTIAL-UNROLL-NEXT:UNROLLING loop %for.body by 4!
+; PARTIAL-UNROLL-NEXT:remark: <unknown>:0:0: unrolled loop by a factor of 4
+
+define i32 @partial_unroll_user_count(ptr %A) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
+  %load = load i32, ptr %arrayidx
+  %add = add i32 %sum, %load
+  %inc = add i32 %i, 1
+  %cmp = icmp ult i32 %inc, 16
+  br i1 %cmp, label %for.body, label %exit
+
+exit:
+  ret i32 %add
+}
+
+; RUNTIME-UNROLL-LABEL:Loop Unroll: F[runtime_unroll_simple] Loop %for.body (depth=1)
+; RUNTIME-UNROLL-NEXT:Loop Size = 6
+; RUNTIME-UNROLL-NEXT: Computing unroll count: TripCount=0, MaxTripCount={{[0-9]+}}, TripMultiple=1
+; RUNTIME-UNROLL-NEXT: Trying pragma unroll...
+; RUNTIME-UNROLL-NEXT: Trying full unroll...
+; RUNTIME-UNROLL-NEXT: Trying upper-bound unroll...
+; RUNTIME-UNROLL-NEXT: Trying loop peeling...
+; RUNTIME-UNROLL-NEXT: Trying partial unroll...
+; RUNTIME-UNROLL-NEXT: Trying runtime unroll...
+; RUNTIME-UNROLL-NEXT:  Runtime unrolling with count: 8
+; RUNTIME-UNROLL-NEXT:  Exiting block %for.body: TripCount=0, TripMultiple=1, BreakoutTrip=1
+; RUNTIME-UNROLL-NEXT:Trying runtime unrolling on Loop: 
+; RUNTIME-UNROLL-NEXT:Loop at depth 1 containing: %for.body<header><latch><exiting>
+; RUNTIME-UNROLL-NEXT:Using epilog remainder.
+; RUNTIME-UNROLL-NEXT:UNROLLING loop %for.body by 8 with run-time trip count!
+; RUNTIME-UNROLL-NEXT:remark: <unknown>:0:0: unrolled loop by a factor of 8 with run-time trip count
+
+define i32 @runtime_unroll_simple(ptr %A, i32 %n) {
+entry:
+  %cmp.entry = icmp sgt i32 %n, 0
+  br i1 %cmp.entry, label %for.body, label %exit
+
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
+  %load = load i32, ptr %arrayidx
+  %add = add i32 %sum, %load
+  %inc = add i32 %i, 1
+  %cmp = icmp slt i32 %inc, %n
+  br i1 %cmp, label %for.body, label %exit
+
+exit:
+  %result = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %result
+}
+
+; CHECK-LABEL:Loop Unroll: F[pragma_full_unroll] Loop %for.body (depth=1)
+; CHECK-NEXT:Loop Size = 6
+; CHECK-NEXT: Computing unroll count: TripCount=4, MaxTripCount=0, TripMultiple=4
+; CHECK-NEXT: Explicit unroll requested: pragma-full
+; CHECK-NEXT: Trying pragma unroll...
+; CHECK-NEXT:  Fully unrolling with trip count: 4.
+; CHECK-NEXT:  Exiting block %for.body: TripCount=4, TripMultiple=0, BreakoutTrip=0
+; CHECK-NEXT:COMPLETELY UNROLLING loop %for.body with trip count 4!
+; CHECK-NEXT:remark: <unknown>:0:0: completely unrolled loop with 4 iterations
+
+define i32 @pragma_full_unroll(ptr %A) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
+  %load = load i32, ptr %arrayidx
+  %add = add i32 %sum, %load
+  %inc = add i32 %i, 1
+  %cmp = icmp ult i32 %inc, 4
+  br i1 %cmp, label %for.body, label %exit, !llvm.loop !0
+
+exit:
+  ret i32 %add
+}
+
+; CHECK-LABEL:Loop Unroll: F[pragma_unroll_count] Loop %for.body (depth=1)
+; CHECK-NEXT:Loop Size = 6
+; CHECK-NEXT: Computing unroll count: TripCount=16, MaxTripCount=0, TripMultiple=16
+; CHECK-NEXT: Explicit unroll requested: pragma-count(4)
+; CHECK-NEXT: Trying pragma unroll...
+; CHECK-NEXT:  Unrolling with pragma count: 4.
+; CHECK-NEXT:  Exiting block %for.body: TripCount=16, TripMultiple=0, BreakoutTrip=0
+; CHECK-NEXT:UNROLLING loop %for.body by 4!
+; CHECK-NEXT:remark: <unknown>:0:0: unrolled loop by a factor of 4
+
+define i32 @pragma_unroll_count(ptr %A) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
+  %load = load i32, ptr %arrayidx
+  %add = add i32 %sum, %load
+  %inc = add i32 %i, 1
+  %cmp = icmp ult i32 %inc, 16
+  br i1 %cmp, label %for.body, label %exit, !llvm.loop !2
+
+exit:
+  ret i32 %add
+}
+
+; CHECK-LABEL:Loop Unroll: F[pragma_full_unroll_unknown_tc] Loop %for.body (depth=1)
+; CHECK-NEXT:Loop Size = 6
+; CHECK-NEXT: Computing unroll count: TripCount=0, MaxTripCount={{[0-9]+}}, TripMultiple=1
+; CHECK-NEXT: Explicit unroll requested: pragma-full
+; CHECK-NEXT: Trying pragma unroll...
+; CHECK-NEXT:  Not fully unrolling: unknown trip count.
+; CHECK-NEXT: Trying full unroll...
+; CHECK-NEXT: Trying upper-bound unroll...
+; CHECK-NEXT: Trying loop peeling...
+; CHECK-NEXT: Trying partial unroll...
+; CHECK-NEXT: Not fully unrolling as directed: loop has runtime trip count.
+; CHECK-NEXT:remark: <unknown>:0:0: unable to fully unroll loop as directed by llvm.loop.unroll.full metadata because loop has a runtime trip count
+; CHECK-NEXT: Trying runtime unroll...
+; CHECK-NEXT:  Will not try to unroll loop with runtime trip count because -unroll-runtime not given
+; CHECK-NEXT: Not unrolling: no viable strategy found.
+
+define i32 @pragma_full_unroll_unknown_tc(ptr %A, i32 %n) {
+entry:
+  %cmp.entry = icmp sgt i32 %n, 0
+  br i1 %cmp.entry, label %for.body, label %exit
+
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
+  %load = load i32, ptr %arrayidx
+  %add = add i32 %sum, %load
+  %inc = add i32 %i, 1
+  %cmp = icmp slt i32 %inc, %n
+  br i1 %cmp, label %for.body, label %exit, !llvm.loop !0
+
+exit:
+  %result = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %result
+}
+
+; CHECK-LABEL:Loop Unroll: F[upper_bound_unroll] Loop %for.body (depth=1)
+; CHECK-NEXT:Loop Size = 6
+; CHECK-NEXT: Computing unroll count: TripCount=0, MaxTripCount=3, TripMultiple=1
+; CHECK-NEXT: Explicit unroll requested: pragma-enable
+; CHECK-NEXT: Trying pragma unroll...
+; CHECK-NEXT:  Unrolling with max trip count: 3.
+; CHECK-NEXT:  Exiting block %for.body: TripCount=0, TripMultiple=1, BreakoutTrip=1
+; CHECK-NEXT:COMPLETELY UNROLLING loop %for.body with trip count 3!
+; CHECK-NEXT:remark: <unknown>:0:0: completely unrolled loop with 3 iterations
+
+define i32 @upper_bound_unroll(ptr %A, i32 %n) {
+entry:
+  %n.clamped = and i32 %n, 3  ; max 3
+  br label %for.body
+
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
+  %load = load i32, ptr %arrayidx
+  %add = add i32 %sum, %load
+  %inc = add i32 %i, 1
+  %cmp = icmp ult i32 %inc, %n.clamped
+  br i1 %cmp, label %for.body, label %exit, !llvm.loop !1
+
+exit:
+  ret i32 %add
+}
+
+; CHECK-LABEL:Loop Unroll: F[full_unroll_cost_exceeds] Loop %for.body (depth=1)
+; CHECK-NEXT:Loop Size = 6
+; CHECK-NEXT: Computing unroll count: TripCount=100, MaxTripCount=0, TripMultiple=100
+; CHECK-NEXT: Trying pragma unroll...
+; CHECK-NEXT: Trying full unroll...
+; CHECK-NEXT:  Unrolled size {{[0-9]+}} exceeds threshold {{[0-9]+}}; checking for cost benefit.
+; CHECK-NEXT:   Not analyzing loop cost: trip count too large.
+; CHECK-NEXT:  Skipping: cost analysis unavailable.
+; CHECK-NEXT: Trying upper-bound unroll...
+; CHECK-NEXT: Trying loop peeling...
+; CHECK-NEXT: Trying partial unroll...
+; CHECK-NEXT:  Will not try to unroll partially because -unroll-allow-partial not given
+; CHECK-NEXT: Not unrolling: no viable strategy found.
+
+define i32 @full_unroll_cost_exceeds(ptr %A) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
+  %load = load i32, ptr %arrayidx
+  %add = add i32 %sum, %load
+  %inc = add i32 %i, 1
+  %cmp = icmp ult i32 %inc, 100
+  br i1 %cmp, label %for.body, label %exit
+
+exit:
+  ret i32 %add
+}
+
+; CHECK-LABEL:Loop Unroll: F[unroll_disabled_metadata] Loop %for.body (depth=1)
+; CHECK-NEXT: Not unrolling: transformation disabled by metadata.
+
+define i32 @unroll_disabled_metadata(ptr %A) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
+  %load = load i32, ptr %arrayidx
+  %add = add i32 %sum, %load
+  %inc = add i32 %i, 1
+  %cmp = icmp ult i32 %inc, 8
+  br i1 %cmp, label %for.body, label %exit, !llvm.loop !3
+
+exit:
+  ret i32 %add
+}
+
+; CHECK-LABEL:Loop Unroll: F[runtime_small_max_tc] Loop %for.body (depth=1)
+; CHECK-NEXT:Loop Size = 6
+; CHECK-NEXT: Computing unroll count: TripCount=0, MaxTripCount=3, TripMultiple=1
+; CHECK-NEXT: Trying pragma unroll...
+; CHECK-NEXT: Trying full unroll...
+; CHECK-NEXT: Trying upper-bound unroll...
+; CHECK-NEXT: Trying loop peeling...
+; CHECK-NEXT: Trying partial unroll...
+; CHECK-NEXT: Trying runtime unroll...
+; CHECK-NEXT:  Not runtime unrolling: max trip count 3 is small (< 8) and not forced.
+; CHECK-NEXT: Not unrolling: no viable strategy found.
+
+define i32 @runtime_small_max_tc(ptr %A, i32 %n) {
+entry:
+  %n.clamped = and i32 %n, 3
+  br label %for.body
+
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
+  %load = load i32, ptr %arrayidx
+  %add = add i32 %sum, %load
+  %inc = add i32 %i, 1
+  %cmp = icmp ult i32 %inc, %n.clamped
+  br i1 %cmp, label %for.body, label %exit
+
+exit:
+  ret i32 %add
+}
+
+; When using user-specified count on a trip count that isn't evenly divisible
+; PARTIAL-UNROLL-LABEL:Loop Unroll: F[partial_unroll_with_remainder] Loop %for.body (depth=1)
+; PARTIAL-UNROLL-NEXT:Loop Size = 6
+; PARTIAL-UNROLL-NEXT: Computing unroll count: TripCount=10, MaxTripCount=0, TripMultiple=10
+; PARTIAL-UNROLL-NEXT: Explicit unroll requested: user-count
+; PARTIAL-UNROLL-NEXT: Trying pragma unroll...
+; PARTIAL-UNROLL-NEXT:  Unrolling with user-specified count: 4.
+; PARTIAL-UNROLL-NEXT:Attempting unroll by factor 4 with remainder loop (trip count 10)
+; PARTIAL-UNROLL-NEXT:remark: <unknown>:0:0: attempting unroll by factor 4 with remainder loop (trip count 10)
+; PARTIAL-UNROLL-NEXT:  Exiting block %for.body: TripCount=10, TripMultiple=0, BreakoutTrip=2
+; PARTIAL-UNROLL-NEXT:UNROLLING loop %for.body by 4!
+; PARTIAL-UNROLL-NEXT:remark: <unknown>:0:0: unrolled loop by a factor of 4
+
+define i32 @partial_unroll_with_remainder(ptr %A) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
+  %load = load i32, ptr %arrayidx
+  %add = add i32 %sum, %load
+  %inc = add i32 %i, 1
+  %cmp = icmp ult i32 %inc, 10
+  br i1 %cmp, label %for.body, label %exit
+
+exit:
+  ret i32 %add
+}
+
+
+; PARTIAL-ALLOW-LABEL:Loop Unroll: F[partial_unroll_cost_analysis] Loop %for.body (depth=1)
+; PARTIAL-ALLOW-NEXT:Loop Size = 6
+; PARTIAL-ALLOW-NEXT: Computing unroll count: TripCount=200, MaxTripCount=0, TripMultiple=200
+; PARTIAL-ALLOW-NEXT: Trying pragma unroll...
+; PARTIAL-ALLOW-NEXT: Trying full unroll...
+; PARTIAL-ALLOW-NEXT:  Unrolled size {{[0-9]+}} exceeds threshold {{[0-9]+}}; checking for cost benefit.
+; PARTIAL-ALLOW-NEXT:   Not analyzing loop cost: trip count too large.
+; PARTIAL-ALLOW-NEXT:  Skipping: cost analysis unavailable.
+; PARTIAL-ALLOW-NEXT: Trying upper-bound unroll...
+; PARTIAL-ALLOW-NEXT: Trying loop peeling...
+; PARTIAL-ALLOW-NEXT: Trying partial unroll...
+; PARTIAL-ALLOW-NEXT:  Unrolled size exceeds threshold; reducing count from {{[0-9]+}} to {{[0-9]+}}.
+; PARTIAL-ALLOW-NEXT:  Partially unrolling with count: 25
+; PARTIAL-ALLOW-NEXT:  Exiting block %for.body: TripCount=200, TripMultiple=0, BreakoutTrip=0
+; PARTIAL-ALLOW-NEXT:UNROLLING loop %for.body by 25!
+; PARTIAL-ALLOW-NEXT:remark: <unknown>:0:0: unrolled loop by a factor of 25
+
+define i32 @partial_unroll_cost_analysis(ptr %A) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
+  %load = load i32, ptr %arrayidx
+  %add = add i32 %sum, %load
+  %inc = add i32 %i, 1
+  %cmp = icmp ult i32 %inc, 200
+  br i1 %cmp, label %for.body, label %exit
+
+exit:
+  ret i32 %add
+}
+
+; CHECK-LABEL:Loop Unroll: F[runtime_unroll_disabled_pragma] Loop %for.body (depth=1)
+; CHECK-NEXT:Loop Size = 6
+; CHECK-NEXT: Computing unroll count: TripCount=0, MaxTripCount={{[0-9]+}}, TripMultiple=1
+; CHECK-NEXT: Trying pragma unroll...
+; CHECK-NEXT: Trying full unroll...
+; CHECK-NEXT: Trying upper-bound unroll...
+; CHECK-NEXT: Trying loop peeling...
+; CHECK-NEXT: Trying partial unroll...
+; CHECK-NEXT: Trying runtime unroll...
+; CHECK-NEXT:  Not runtime unrolling: disabled by pragma.
+; CHECK-NEXT: Not unrolling: no viable strategy found.
+
+define i32 @runtime_unroll_disabled_pragma(ptr %A, i32 %n) {
+entry:
+  %cmp.entry = icmp sgt i32 %n, 0
+  br i1 %cmp.entry, label %for.body, label %exit
+
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
+  %load = load i32, ptr %arrayidx
+  %add = add i32 %sum, %load
+  %inc = add i32 %i, 1
+  %cmp = icmp slt i32 %inc, %n
+  br i1 %cmp, label %for.body, label %exit, !llvm.loop !4
+
+exit:
+  %result = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %result
+}
+
+; AUTO-DISABLED-LABEL:Loop Unroll: F[auto_unroll_not_enabled] Loop %for.body (depth=1)
+; AUTO-DISABLED-NEXT: Not unrolling: automatic unrolling disabled and loop not explicitly enabled.
+
+define i32 @auto_unroll_not_enabled(ptr %A) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
+  %load = load i32, ptr %arrayidx
+  %add = add i32 %sum, %load
+  %inc = add i32 %i, 1
+  %cmp = icmp ult i32 %inc, 4
+  br i1 %cmp, label %for.body, label %exit
+
+exit:
+  ret i32 %add
+}
+
+; USER-COUNT-REJECT-LABEL:Loop Unroll: F[user_count_rejected] Loop %for.body (depth=1)
+; USER-COUNT-REJECT-NEXT:Loop Size = 6
+; USER-COUNT-REJECT-NEXT: Computing unroll count: TripCount=10, MaxTripCount=0, TripMultiple=10
+; USER-COUNT-REJECT-NEXT: Explicit unroll requested: user-count
+; USER-COUNT-REJECT-NEXT: Trying pragma unroll...
+; USER-COUNT-REJECT-NEXT:  Not unrolling with user count 4: remainder not allowed.
+; USER-COUNT-REJECT-NEXT: Trying full unroll...
+; USER-COUNT-REJECT-NEXT:  Unrolling: size 42 < threshold 16384.
+; USER-COUNT-REJECT-NEXT:  Exiting block %for.body: TripCount=10, TripMultiple=0, BreakoutTrip=0
+; USER-COUNT-REJECT-NEXT:COMPLETELY UNROLLING loop %for.body with trip count 10!
+; USER-COUNT-REJECT-NEXT:remark: <unknown>:0:0: completely unrolled loop with 10 iterations
+
+define i32 @user_count_rejected(ptr %A) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
+  %load = load i32, ptr %arrayidx
+  %add = add i32 %sum, %load
+  %inc = add i32 %i, 1
+  %cmp = icmp ult i32 %inc, 10
+  br i1 %cmp, label %for.body, label %exit
+
+exit:
+  ret i32 %add
+}
+
+; NO-REMAINDER-LABEL:Loop Unroll: F[pragma_count_rejected] Loop %for.body (depth=1)
+; NO-REMAINDER-NEXT:Loop Size = 6
+; NO-REMAINDER-NEXT: Computing unroll count: TripCount=10, MaxTripCount=0, TripMultiple=10
+; NO-REMAINDER-NEXT: Explicit unroll requested: pragma-count(4)
+; NO-REMAINDER-NEXT: Trying pragma unroll...
+; NO-REMAINDER-NEXT:  Not unrolling with pragma count 4: remainder not allowed, count does not divide trip multiple 10.
+; NO-REMAINDER-NEXT:remark: <unknown>:0:0: unable to unroll loop with count 4: remainder loop is restricted and count does not divide trip multiple 10
+; NO-REMAINDER-NEXT: Trying full unroll...
+; NO-REMAINDER-NEXT:  Unrolling: size 42 < threshold 16384.
+; NO-REMAINDER-NEXT:  Exiting block %for.body: TripCount=10, TripMultiple=0, BreakoutTrip=0
+; NO-REMAINDER-NEXT:COMPLETELY UNROLLING loop %for.body with trip count 10!
+; NO-REMAINDER-NEXT:remark: <unknown>:0:0: completely unrolled loop with 10 iterations
+
+define i32 @pragma_count_rejected(ptr %A) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
+  %load = load i32, ptr %arrayidx
+  %add = add i32 %sum, %load
+  %inc = add i32 %i, 1
+  %cmp = icmp ult i32 %inc, 10
+  br i1 %cmp, label %for.body, label %exit, !llvm.loop !10
+
+exit:
+  ret i32 %add
+}
+
+; The contradictory "unable to fully unroll" then "completely unrolled" remarks are expected:
+; we're artificially limiting the pragma path with -pragma-unroll-full-max-iterations=100
+; while the heuristic path remains unconstrained. This won't happen with default flags.
+; PRAGMA-TC-TOO-LARGE-LABEL:Loop Unroll: F[pragma_full_tc_too_large] Loop %for.body (depth=1)
+; PRAGMA-TC-TOO-LARGE-NEXT:Loop Size = 6
+; PRAGMA-TC-TOO-LARGE-NEXT: Computing unroll count: TripCount=200, MaxTripCount=0, TripMultiple=200
+; PRAGMA-TC-TOO-LARGE-NEXT: Explicit unroll requested: pragma-full
+; PRAGMA-TC-TOO-LARGE-NEXT: Trying pragma unroll...
+; PRAGMA-TC-TOO-LARGE-NEXT:  Won't unroll; trip count is too large.
+; PRAGMA-TC-TOO-LARGE-NEXT:remark: <unknown>:0:0: unable to fully unroll loop: trip count 200 exceeds limit 100
+; PRAGMA-TC-TOO-LARGE-NEXT: Trying full unroll...
+; PRAGMA-TC-TOO-LARGE-NEXT:  Unrolling: size 802 < threshold 16384.
+; PRAGMA-TC-TOO-LARGE-NEXT:  Exiting block %for.body: TripCount=200, TripMultiple=0, BreakoutTrip=0
+; PRAGMA-TC-TOO-LARGE-NEXT:COMPLETELY UNROLLING loop %for.body with trip count 200!
+; PRAGMA-TC-TOO-LARGE-NEXT:remark: <unknown>:0:0: completely unrolled loop with 200 iterations
+
+define i32 @pragma_full_tc_too_large(ptr %A) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
+  %load = load i32, ptr %arrayidx
+  %add = add i32 %sum, %load
+  %inc = add i32 %i, 1
+  %cmp = icmp ult i32 %inc, 200
+  br i1 %cmp, label %for.body, label %exit, !llvm.loop !0
+
+exit:
+  ret i32 %add
+}
+
+; COST-ANALYSIS-LABEL:Loop Unroll: F[cost_analysis_detailed] Loop %for.body (depth=1)
+; COST-ANALYSIS-NEXT:Loop Size = 9
+; COST-ANALYSIS-NEXT: Computing unroll count: TripCount=10, MaxTripCount=0, TripMultiple=10
+; COST-ANALYSIS-NEXT: Trying pragma unroll...
+; COST-ANALYSIS-NEXT: Trying full unroll...
+; COST-ANALYSIS-NEXT:  Unrolled size {{[0-9]+}} exceeds threshold 20; checking for cost benefit.
+; COST-ANALYSIS-NEXT:   Starting LoopUnroll profitability analysis...
+; COST-ANALYSIS:   Analysis finished:
+; COST-ANALYSIS-NEXT:   UnrolledCost: {{[0-9]+}}, RolledDynamicCost: {{[0-9]+}}
+; COST-ANALYSIS-NEXT:  Not unrolling: cost {{[0-9]+}} >= boosted threshold {{[0-9]+}}.
+; COST-ANALYSIS-NEXT: Trying upper-bound unroll...
+; COST-ANALYSIS-NEXT: Trying loop peeling...
+; COST-ANALYSIS-NEXT: Trying partial unroll...
+; COST-ANALYSIS-NEXT:  Will not try to unroll partially because -unroll-allow-partial not given
+; COST-ANALYSIS-NEXT: Not unrolling: no viable strategy found.
+
+define i32 @cost_analysis_detailed(ptr %A, ptr %B) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
+  %load = load i32, ptr %arrayidx
+  %arrayidx2 = getelementptr inbounds i32, ptr %B, i32 %i
+  %load2 = load i32, ptr %arrayidx2
+  %mul = mul i32 %load, %load2
+  %add = add i32 %sum, %mul
+  %inc = add i32 %i, 1
+  %cmp = icmp ult i32 %inc, 10
+  br i1 %cmp, label %for.body, label %exit
+
+exit:
+  ret i32 %add
+}
+
+; MAX-COUNT-10-LABEL:Loop Unroll: F[exceeds_max_count] Loop %for.body (depth=1)
+; MAX-COUNT-10-NEXT:Loop Size = 6
+; MAX-COUNT-10-NEXT: Computing unroll count: TripCount=20, MaxTripCount=0, TripMultiple=20
+; MAX-COUNT-10-NEXT: Trying pragma unroll...
+; MAX-COUNT-10-NEXT: Trying full unroll...
+; MAX-COUNT-10-NEXT:  Not unrolling: trip count 20 exceeds max count 10.
+; MAX-COUNT-10-NEXT: Trying upper-bound unroll...
+; MAX-COUNT-10-NEXT: Trying loop peeling...
+; MAX-COUNT-10-NEXT: Trying partial unroll...
+; MAX-COUNT-10-NEXT:  Will not try to unroll partially because -unroll-allow-partial not given
+; MAX-COUNT-10-NEXT: Not unrolling: no viable strategy found.
+
+define i32 @exceeds_max_count(ptr %A) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
+  %load = load i32, ptr %arrayidx
+  %add = add i32 %sum, %load
+  %inc = add i32 %i, 1
+  %cmp = icmp ult i32 %inc, 20
+  br i1 %cmp, label %for.body, label %exit
+
+exit:
+  ret i32 %add
+}
+
+; PEEL-LABEL:Loop Unroll: F[explicit_peel] Loop %for.body (depth=1)
+; PEEL-NEXT:Loop Size = 6
+; PEEL-NEXT: Computing unroll count: TripCount=100, MaxTripCount=0, TripMultiple=100
+; PEEL-NEXT:  Using explicit peel count: 2.
+; PEEL-NEXT:PEELING loop %for.body with iteration count 2!
+; PEEL-NEXT:remark: <unknown>:0:0: peeled loop by 2 iterations
+
+define i32 @explicit_peel(ptr %A) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
+  %load = load i32, ptr %arrayidx
+  %add = add i32 %sum, %load
+  %inc = add i32 %i, 1
+  %cmp = icmp ult i32 %inc, 100
+  br i1 %cmp, label %for.body, label %exit
+
+exit:
+  ret i32 %add
+}
+
+; CHECK-LABEL:Loop Unroll: F[heuristic_peel] Loop %for.header (depth=1)
+; CHECK-NEXT:Loop Size = 11
+; CHECK-NEXT: Computing unroll count: TripCount=0, MaxTripCount={{[0-9]+}}, TripMultiple=1
+; CHECK-NEXT: Trying pragma unroll...
+; CHECK-NEXT: Trying full unroll...
+; CHECK-NEXT: Trying upper-bound unroll...
+; CHECK-NEXT: Trying loop peeling...
+; CHECK-NEXT:  Peeling with count: 1.
+; CHECK-NEXT:PEELING loop %for.header with iteration count 1!
+; CHECK-NEXT:remark: <unknown>:0:0: peeled loop by 1 iterations
+
+define i32 @heuristic_peel(ptr %A, i32 %n) {
+entry:
+  %cmp.entry = icmp sgt i32 %n, 0
+  br i1 %cmp.entry, label %for.header, label %exit.early
+
+for.header:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.latch ]
+  %sum = phi i32 [ 0, %entry ], [ %add, %for.latch ]
+  ; This comparison becomes false after first iteration - enables peeling
+  %first = icmp eq i32 %i, 0
+  br i1 %first, label %special, label %normal
+
+special:
+  %load1 = load i32, ptr %A
+  br label %for.latch
+
+normal:
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
+  %load2 = load i32, ptr %arrayidx
+  br label %for.latch
+
+for.latch:
+  %val = phi i32 [ %load1, %special ], [ %load2, %normal ]
+  %add = add i32 %sum, %val
+  %inc = add i32 %i, 1
+  %cmp = icmp slt i32 %inc, %n
+  br i1 %cmp, label %for.header, label %exit
+
+exit:
+  ret i32 %add
+
+exit.early:
+  ret i32 0
+}
+
+; THRESHOLDS-ZERO-LABEL:Loop Unroll: F[thresholds_zero] Loop %for.body (depth=1)
+; THRESHOLDS-ZERO-NEXT: Not unrolling: all thresholds are zero.
+; THRESHOLDS-ZERO-NEXT:remark: <unknown>:0:0: unable to unroll loop: unroll threshold is zero
+
+define i32 @thresholds_zero(ptr %A) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
+  %load = load i32, ptr %arrayidx
+  %add = add i32 %sum, %load
+  %inc = add i32 %i, 1
+  %cmp = icmp ult i32 %inc, 10
+  br i1 %cmp, label %for.body, label %exit, !llvm.loop !1
+
+exit:
+  ret i32 %add
+}
+
+; NESTED-COST-LABEL:Loop Unroll: F[nested_cost_analysis] Loop %inner.header (depth=2)
+; NESTED-COST-NEXT: Not unrolling: transformation disabled by metadata.
+; NESTED-COST-LABEL:Loop Unroll: F[nested_cost_analysis] Loop %outer.header (depth=1)
+; NESTED-COST-NEXT:Loop Size = 11
+; NESTED-COST-NEXT: Computing unroll count: TripCount=4, MaxTripCount=0, TripMultiple=4
+; NESTED-COST-NEXT: Trying pragma unroll...
+; NESTED-COST-NEXT: Trying full unroll...
+; NESTED-COST-NEXT:  Unrolled size {{[0-9]+}} exceeds threshold 30; checking for cost benefit.
+; NESTED-COST-NEXT:   Not analyzing loop cost: not an innermost loop.
+; NESTED-COST-NEXT:  Skipping: cost analysis unavailable.
+; NESTED-COST-NEXT: Trying upper-bound unroll...
+; NESTED-COST-NEXT: Trying loop peeling...
+; NESTED-COST-NEXT: Trying partial unroll...
+; NESTED-COST-NEXT:  Will not try to unroll partially because -unroll-allow-partial not given
+; NESTED-COST-NEXT: Not unrolling: no viable strategy found.
+
+define i32 @nested_cost_analysis(ptr %A) {
+entry:
+  br label %outer.header
+
+outer.header:
+  %i = phi i32 [ 0, %entry ], [ %i.inc, %outer.latch ]
+  br label %inner.header
+
+inner.header:
+  %j = phi i32 [ 0, %outer.header ], [ %j.inc, %inner.header ]
+  %sum = phi i32 [ 0, %outer.header ], [ %add, %inner.header ]
+  %idx = add i32 %i, %j
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %idx
+  %load = load i32, ptr %arrayidx
+  %add = add i32 %sum, %load
+  %j.inc = add i32 %j, 1
+  %inner.cmp = icmp ult i32 %j.inc, 100
+  br i1 %inner.cmp, label %inner.header, label %outer.latch, !llvm.loop !3
+
+outer.latch:
+  %i.inc = add i32 %i, 1
+  %outer.cmp = icmp ult i32 %i.inc, 4
+  br i1 %outer.cmp, label %outer.header, label %exit
+
+exit:
+  ret i32 %add
+}
+
+; PRAGMA-TC-TOO-LARGE-LABEL:Loop Unroll: F[partial_instead_of_full] Loop %for.body (depth=1)
+; PRAGMA-TC-TOO-LARGE-NEXT:Loop Size = 6
+; PRAGMA-TC-TOO-LARGE-NEXT: Computing unroll count: TripCount=5000, MaxTripCount=0, TripMultiple=5000
+; PRAGMA-TC-TOO-LARGE-NEXT: Explicit unroll requested: pragma-full
+; PRAGMA-TC-TOO-LARGE-NEXT: Trying pragma unroll...
+; PRAGMA-TC-TOO-LARGE-NEXT:  Won't unroll; trip count is too large.
+; PRAGMA-TC-TOO-LARGE-NEXT:remark: <unknown>:0:0: unable to fully unroll loop: trip count 5000 exceeds limit 100
+; PRAGMA-TC-TOO-LARGE-NEXT: Trying full unroll...
+; PRAGMA-TC-TOO-LARGE-NEXT:  Unrolled size {{[0-9]+}} exceeds threshold {{[0-9]+}}; checking for cost benefit.
+; PRAGMA-TC-TOO-LARGE-NEXT:   Not analyzing loop cost: trip count too large.
+; PRAGMA-TC-TOO-LARGE-NEXT:  Skipping: cost analysis unavailable.
+; PRAGMA-TC-TOO-LARGE-NEXT:remark: <unknown>:0:0: unable to fully unroll loop: estimated unrolled size {{[0-9]+}} exceeds threshold {{[0-9]+}}
+; PRAGMA-TC-TOO-LARGE-NEXT: Trying upper-bound unroll...
+; PRAGMA-TC-TOO-LARGE-NEXT: Trying loop peeling...
+; PRAGMA-TC-TOO-LARGE-NEXT: Trying partial unroll...
+; PRAGMA-TC-TOO-LARGE-NEXT:  Unrolled size exceeds threshold; reducing count from {{[0-9]+}} to {{[0-9]+}}.
+; PRAGMA-TC-TOO-LARGE-NEXT:  Partially unrolling with count: 2500
+; PRAGMA-TC-TOO-LARGE-NEXT: Partial unroll instead of full: unrolled size too large. Unrolling 2500 times instead of 5000.
+; PRAGMA-TC-TOO-LARGE-NEXT:remark: <unknown>:0:0: unable to fully unroll loop as directed by full unroll pragma because unrolled size is too large
+; PRAGMA-TC-TOO-LARGE-NEXT:  Exiting block %for.body: TripCount=5000, TripMultiple=0, BreakoutTrip=0
+; PRAGMA-TC-TOO-LARGE-NEXT:UNROLLING loop %for.body by 2500!
+; PRAGMA-TC-TOO-LARGE-NEXT:remark: <unknown>:0:0: unrolled loop by a factor of 2500
+
+define i32 @partial_instead_of_full(ptr %A) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
+  %load = load i32, ptr %arrayidx
+  %add = add i32 %sum, %load
+  %inc = add i32 %i, 1
+  %cmp = icmp ult i32 %inc, 5000
+  br i1 %cmp, label %for.body, label %exit, !llvm.loop !0
+
+exit:
+  ret i32 %add
+}
+
+; NO-PROFIT-LABEL:Loop Unroll: F[no_profitable_count] Loop %for.body (depth=1)
+; NO-PROFIT-NEXT:Loop Size = 6
+; NO-PROFIT-NEXT: Computing unroll count: TripCount=100, MaxTripCount=0, TripMultiple=100
+; NO-PROFIT-NEXT: Trying pragma unroll...
+; NO-PROFIT-NEXT: Trying full unroll...
+; NO-PROFIT-NEXT:  Unrolled size {{[0-9]+}} exceeds threshold {{[0-9]+}}; checking for cost benefit.
+; NO-PROFIT-NEXT:   Not analyzing loop cost: trip count too large.
+; NO-PROFIT-NEXT:  Skipping: cost analysis unavailable.
+; NO-PROFIT-NEXT: Trying upper-bound unroll...
+; NO-PROFIT-NEXT: Trying loop peeling...
+; NO-PROFIT-NEXT: Trying partial unroll...
+; NO-PROFIT-NEXT:  Unrolled size exceeds threshold; reducing count from {{[0-9]+}} to {{[0-9]+}}.
+; NO-PROFIT-NEXT:  Will not partially unroll: no profitable count.
+; NO-PROFIT-NEXT:  Partially unrolling with count: 0
+; NO-PROFIT-NEXT: Not unrolling: no viable strategy found.
+
+define i32 @no_profitable_count(ptr %A) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
+  %load = load i32, ptr %arrayidx
+  %add = add i32 %sum, %load
+  %inc = add i32 %i, 1
+  %cmp = icmp ult i32 %inc, 100
+  br i1 %cmp, label %for.body, label %exit
+
+exit:
+  ret i32 %add
+}
+
+; CHECK-LABEL:Loop Unroll: F[extended_convergence] Loop %for.body (depth=1)
+; CHECK-NEXT: Not unrolling: contains convergent operations.
+; CHECK-NEXT:remark: <unknown>:0:0: unable to unroll loop: contains convergent operations
+
+declare void @convergent_func() convergent
+declare token @llvm.experimental.convergence.anchor()
+
+define i32 @extended_convergence(ptr %A, i32 %n) {
+entry:
+  br label %for.body, !llvm.loop !1
+
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %tok = call token @llvm.experimental.convergence.anchor()
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
+  %load = load i32, ptr %arrayidx
+  %add = add i32 %sum, %load
+  %inc = add i32 %i, 1
+  %cmp = icmp slt i32 %inc, %n
+  br i1 %cmp, label %for.body, label %exit, !llvm.loop !1
+
+exit:
+  ; Using convergence token outside the loop creates ExtendedLoop convergence
+  call void @convergent_func() [ "convergencectrl"(token %tok) ]
+  ret i32 %add
+}
+
+; CHECK-LABEL:Loop Unroll: F[noduplicate_prevents_unroll] Loop %for.body (depth=1)
+; CHECK-NEXT: Not unrolling: contains non-duplicatable instructions.
+; CHECK-NEXT:remark: <unknown>:0:0: unable to unroll loop: contains non-duplicatable instructions
+
+declare void @noduplicate_func() noduplicate
+
+define i32 @noduplicate_prevents_unroll(ptr %A) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
+  %load = load i32, ptr %arrayidx
+  ; noduplicate attribute prevents loop unrolling
+  call void @noduplicate_func()
+  %add = add i32 %sum, %load
+  %inc = add i32 %i, 1
+  %cmp = icmp ult i32 %inc, 8
+  br i1 %cmp, label %for.body, label %exit, !llvm.loop !1
+
+exit:
+  ret i32 %add
+}
+
+; UNROLL-AS-DIRECTED-FAIL-LABEL:Loop Unroll: F[unroll_as_directed_fail] Loop %for.body (depth=1)
+; UNROLL-AS-DIRECTED-FAIL-NEXT:Loop Size = 19
+; UNROLL-AS-DIRECTED-FAIL-NEXT: Computing unroll count: TripCount=100, MaxTripCount=0, TripMultiple=100
+; UNROLL-AS-DIRECTED-FAIL-NEXT: Explicit unroll requested: pragma-enable
+; UNROLL-AS-DIRECTED-FAIL-NEXT: Trying pragma unroll...
+; UNROLL-AS-DIRECTED-FAIL-NEXT: Trying full unroll...
+; UNROLL-AS-DIRECTED-FAIL-NEXT:  Unrolled size {{[0-9]+}} exceeds threshold {{[0-9]+}}; checking for cost benefit.
+; UNROLL-AS-DIRECTED-FAIL-NEXT:   Not analyzing loop cost: trip count too large.
+; UNROLL-AS-DIRECTED-FAIL-NEXT:  Skipping: cost analysis unavailable.
+; UNROLL-AS-DIRECTED-FAIL-NEXT: Trying upper-bound unroll...
+; UNROLL-AS-DIRECTED-FAIL-NEXT: Trying loop peeling...
+; UNROLL-AS-DIRECTED-FAIL-NEXT: Trying partial unroll...
+; UNROLL-AS-DIRECTED-FAIL-NEXT:  Unrolled size exceeds threshold; reducing count from {{[0-9]+}} to 0.
+; UNROLL-AS-DIRECTED-FAIL-NEXT:  Will not partially unroll: no profitable count.
+; UNROLL-AS-DIRECTED-FAIL-NEXT:  Partially unrolling with count: 0
+; UNROLL-AS-DIRECTED-FAIL-NEXT: Not unrolling as directed: unrolled size too large.
+; UNROLL-AS-DIRECTED-FAIL-NEXT:remark: <unknown>:0:0: unable to fully unroll loop as directed by unroll metadata because unrolled size is too large
+
+define i32 @unroll_as_directed_fail(ptr %A, ptr %B, ptr %C, ptr %D) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %sum = phi i32 [ 0, %entry ], [ %add8, %for.body ]
+  %idx1 = add i32 %i, 0
+  %arrayidx1 = getelementptr inbounds i32, ptr %A, i32 %idx1
+  %load1 = load i32, ptr %arrayidx1
+  %idx2 = add i32 %i, 1
+  %arrayidx2 = getelementptr inbounds i32, ptr %B, i32 %idx2
+  %load2 = load i32, ptr %arrayidx2
+  %idx3 = add i32 %i, 2
+  %arrayidx3 = getelementptr inbounds i32, ptr %C, i32 %idx3
+  %load3 = load i32, ptr %arrayidx3
+  %idx4 = add i32 %i, 3
+  %arrayidx4 = getelementptr inbounds i32, ptr %D, i32 %idx4
+  %load4 = load i32, ptr %arrayidx4
+  %add1 = add i32 %sum, %load1
+  %add2 = add i32 %add1, %load2
+  %add3 = add i32 %add2, %load3
+  %add8 = add i32 %add3, %load4
+  %inc = add i32 %i, 1
+  %cmp = icmp ult i32 %inc, 100
+  br i1 %cmp, label %for.body, label %exit, !llvm.loop !1
+
+exit:
+  ret i32 %add8
+}
+
+; UNROLL-AS-DIRECTED-FAIL-LABEL:Loop Unroll: F[full_unroll_as_directed_fail] Loop %for.body (depth=1)
+; UNROLL-AS-DIRECTED-FAIL-NEXT:Loop Size = 19
+; UNROLL-AS-DIRECTED-FAIL-NEXT: Computing unroll count: TripCount=100, MaxTripCount=0, TripMultiple=100
+; UNROLL-AS-DIRECTED-FAIL-NEXT: Explicit unroll requested: pragma-full
+; UNROLL-AS-DIRECTED-FAIL-NEXT: Trying pragma unroll...
+; UNROLL-AS-DIRECTED-FAIL-NEXT:  Won't unroll; trip count is too large.
+; UNROLL-AS-DIRECTED-FAIL-NEXT:remark: <unknown>:0:0: unable to fully unroll loop: trip count 100 exceeds limit 10
+; UNROLL-AS-DIRECTED-FAIL-NEXT: Trying full unroll...
+; UNROLL-AS-DIRECTED-FAIL-NEXT:  Unrolled size {{[0-9]+}} exceeds threshold {{[0-9]+}}; checking for cost benefit.
+; UNROLL-AS-DIRECTED-FAIL-NEXT:   Not analyzing loop cost: trip count too large.
+; UNROLL-AS-DIRECTED-FAIL-NEXT:  Skipping: cost analysis unavailable.
+; UNROLL-AS-DIRECTED-FAIL-NEXT:remark: <unknown>:0:0: unable to fully unroll loop: estimated unrolled size {{[0-9]+}} exceeds threshold {{[0-9]+}}
+; UNROLL-AS-DIRECTED-FAIL-NEXT: Trying upper-bound unroll...
+; UNROLL-AS-DIRECTED-FAIL-NEXT: Trying loop peeling...
+; UNROLL-AS-DIRECTED-FAIL-NEXT: Trying partial unroll...
+; UNROLL-AS-DIRECTED-FAIL-NEXT:  Unrolled size exceeds threshold; reducing count from {{[0-9]+}} to 0.
+; UNROLL-AS-DIRECTED-FAIL-NEXT:  Will not partially unroll: no profitable count.
+; UNROLL-AS-DIRECTED-FAIL-NEXT:  Partially unrolling with count: 0
+; UNROLL-AS-DIRECTED-FAIL-NEXT: Not unrolling as directed: unrolled size too large.
+; UNROLL-AS-DIRECTED-FAIL-NEXT:remark: <unknown>:0:0: unable to fully unroll loop as directed by unroll metadata because unrolled size is too large
+
+define i32 @full_unroll_as_directed_fail(ptr %A, ptr %B, ptr %C, ptr %D) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %sum = phi i32 [ 0, %entry ], [ %add8, %for.body ]
+  %idx1 = add i32 %i, 0
+  %arrayidx1 = getelementptr inbounds i32, ptr %A, i32 %idx1
+  %load1 = load i32, ptr %arrayidx1
+  %idx2 = add i32 %i, 1
+  %arrayidx2 = getelementptr inbounds i32, ptr %B, i32 %idx2
+  %load2 = load i32, ptr %arrayidx2
+  %idx3 = add i32 %i, 2
+  %arrayidx3 = getelementptr inbounds i32, ptr %C, i32 %idx3
+  %load3 = load i32, ptr %arrayidx3
+  %idx4 = add i32 %i, 3
+  %arrayidx4 = getelementptr inbounds i32, ptr %D, i32 %idx4
+  %load4 = load i32, ptr %arrayidx4
+  %add1 = add i32 %sum, %load1
+  %add2 = add i32 %add1, %load2
+  %add3 = add i32 %add2, %load3
+  %add8 = add i32 %add3, %load4
+  %inc = add i32 %i, 1
+  %cmp = icmp ult i32 %inc, 100
+  br i1 %cmp, label %for.body, label %exit, !llvm.loop !0
+
+exit:
+  ret i32 %add8
+}
+
+; CHECK-LABEL:Loop Unroll: F[indirectbr_loop] Loop %for.body (depth=1)
+; CHECK-NEXT: Not unrolling loop which is not in loop-simplify form.
+; CHECK-NEXT:remark: <unknown>:0:0: unable to unroll loop: not in loop-simplify form
+
+define i32 @indirectbr_loop(ptr %A, ptr %target) {
+entry:
+  indirectbr ptr %target, [label %for.body, label %exit]
+
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
+  %load = load i32, ptr %arrayidx
+  %add = add i32 %sum, %load
+  %inc = add i32 %i, 1
+  %cmp = icmp ult i32 %inc, 10
+  br i1 %cmp, label %for.body, label %exit, !llvm.loop !1
+
+exit:
+  %result = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %result
+}
+
+; USER-COUNT-EXCEED-LABEL:Loop Unroll: F[user_count_exceed] Loop %for.body (depth=1)
+; USER-COUNT-EXCEED-NEXT:Loop Size = 5
+; USER-COUNT-EXCEED-NEXT: Computing unroll count: TripCount=16, MaxTripCount=0, TripMultiple=16
+; USER-COUNT-EXCEED-NEXT: Explicit unroll requested: user-count
+; USER-COUNT-EXCEED-NEXT: Trying pragma unroll...
+; USER-COUNT-EXCEED-NEXT:  Not unrolling with user count 8: exceeds threshold.
+; USER-COUNT-EXCEED-NEXT: Trying full unroll...
+; USER-COUNT-EXCEED-NEXT:  Unrolling: size 50 < threshold 16384.
+; USER-COUNT-EXCEED-NEXT:  Exiting block %for.body: TripCount=16, TripMultiple=0, BreakoutTrip=0
+; USER-COUNT-EXCEED-NEXT:COMPLETELY UNROLLING loop %for.body with trip count 16!
+; USER-COUNT-EXCEED-NEXT:remark: <unknown>:0:0: completely unrolled loop with 16 iterations
+
+define void @user_count_exceed(ptr %A) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
+  store i32 %i, ptr %arrayidx
+  %inc = add i32 %i, 1
+  %cmp = icmp ult i32 %inc, 16
+  br i1 %cmp, label %for.body, label %exit
+
+exit:
+  ret void
+}
+
+; CHECK-LABEL:Loop Unroll: F[inline_prevents_unroll] Loop %for.body (depth=1)
+; CHECK-NEXT:Loop Size = 8
+; CHECK-NEXT: Not unrolling loop with inlinable calls.
+; CHECK-NEXT:remark: <unknown>:0:0: unable to unroll loop: contains inlinable calls
+
+; Internal function with single use - this is an inline candidate
+define internal i32 @single_use_helper(i32 %x) {
+  %add = add i32 %x, 42
+  ret i32 %add
+}
+
+define i32 @inline_prevents_unroll(ptr %A) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
+  %load = load i32, ptr %arrayidx
+  %helper_result = call i32 @single_use_helper(i32 %load)
+  %add = add i32 %sum, %helper_result
+  %inc = add i32 %i, 1
+  %cmp = icmp ult i32 %inc, 10
+  br i1 %cmp, label %for.body, label %exit, !llvm.loop !11
+
+exit:
+  ret i32 %add
+}
+
+; NO-REMAINDER-LABEL:Loop Unroll: F[small_max_trip_count] Loop %for.body (depth=1)
+; NO-REMAINDER-NEXT:Loop Size = 5
+; NO-REMAINDER-NEXT: Computing unroll count: TripCount=0, MaxTripCount=5, TripMultiple=1
+; NO-REMAINDER-NEXT: Explicit unroll requested: pragma-count(4)
+; NO-REMAINDER-NEXT: Trying pragma unroll...
+; NO-REMAINDER-NEXT:  Not unrolling with pragma count 4: remainder not allowed, count does not divide trip multiple 1.
+; NO-REMAINDER-NEXT:remark: <unknown>:0:0: unable to unroll loop with count 4: remainder loop is restricted and count does not divide trip multiple 1
+; NO-REMAINDER-NEXT: Trying full unroll...
+; NO-REMAINDER-NEXT: Trying upper-bound unroll...
+; NO-REMAINDER-NEXT: Trying loop peeling...
+; NO-REMAINDER-NEXT: Trying partial unroll...
+; NO-REMAINDER-NEXT: Trying runtime unroll...
+; NO-REMAINDER-NEXT:  Not runtime unrolling: max trip count {{[0-9]+}} is small (< 8) and not forced.
+; NO-REMAINDER-NEXT:remark: <unknown>:0:0: unable to runtime unroll loop: max trip count {{[0-9]+}} is too small (< {{[0-9]+}})
+; NO-REMAINDER-NEXT: Not unrolling: no viable strategy found.
+
+define void @small_max_trip_count(ptr %A, i32 %n) {
+entry:
+  ; Clamp n to max of 5, so MaxTripCount will be 5 (< default MaxUpperBound of 8)
+  %clamped = call i32 @llvm.umin.i32(i32 %n, i32 5)
+  %cmp.entry = icmp ugt i32 %clamped, 0
+  br i1 %cmp.entry, label %for.body, label %exit
+
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
+  store i32 %i, ptr %arrayidx
+  %inc = add nuw nsw i32 %i, 1
+  %cmp = icmp ult i32 %inc, %clamped
+  br i1 %cmp, label %for.body, label %exit, !llvm.loop !13
+
+exit:
+  ret void
+}
+
+declare i32 @llvm.umin.i32(i32, i32)
+
+; NO-STRATEGY-LABEL:Loop Unroll: F[no_strategy_pragma] Loop %for.body (depth=1)
+; NO-STRATEGY-NEXT:Loop Size = 5
+; NO-STRATEGY-NEXT: Computing unroll count: TripCount=0, MaxTripCount={{[0-9]+}}, TripMultiple=1
+; NO-STRATEGY-NEXT: Explicit unroll requested: pragma-enable
+; NO-STRATEGY-NEXT: Trying pragma unroll...
+; NO-STRATEGY-NEXT: Trying full unroll...
+; NO-STRATEGY-NEXT: Trying upper-bound unroll...
+; NO-STRATEGY-NEXT: Trying loop peeling...
+; NO-STRATEGY-NEXT: Trying partial unroll...
+; NO-STRATEGY-NEXT: Trying runtime unroll...
+; NO-STRATEGY-NEXT: Not unrolling: no viable strategy found.
+; NO-STRATEGY-NEXT:remark: <unknown>:0:0: unable to unroll loop: no viable unroll count found
+
+define void @no_strategy_pragma(ptr %A, i32 %n) {
+entry:
+  %cmp.entry = icmp ugt i32 %n, 0
+  br i1 %cmp.entry, label %for.body, label %exit
+
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
+  store i32 %i, ptr %arrayidx
+  %inc = add nuw nsw i32 %i, 1
+  %cmp = icmp ult i32 %inc, %n
+  br i1 %cmp, label %for.body, label %exit, !llvm.loop !12
+
+exit:
+  ret void
+}
+
+; We get contradictory remarks here: full unroll is blocked by -unroll-full-max-count=10,
+; but partial unroll picks count=20 (the full trip count) anyway. This is a test artifact.
+; MAX-COUNT-10-LABEL:Loop Unroll: F[tc_exceeds_max_ore] Loop %for.body (depth=1)
+; MAX-COUNT-10-NEXT:Loop Size = 6
+; MAX-COUNT-10-NEXT: Computing unroll count: TripCount=20, MaxTripCount=0, TripMultiple=20
+; MAX-COUNT-10-NEXT: Explicit unroll requested: pragma-full
+; MAX-COUNT-10-NEXT: Trying pragma unroll...
+; MAX-COUNT-10-NEXT:  Won't unroll; trip count is too large.
+; MAX-COUNT-10-NEXT:remark: <unknown>:0:0: unable to fully unroll loop: trip count 20 exceeds limit 10
+; MAX-COUNT-10-NEXT: Trying full unroll...
+; MAX-COUNT-10-NEXT:  Not unrolling: trip count 20 exceeds max count 10.
+; MAX-COUNT-10-NEXT:remark: <unknown>:0:0: unable to fully unroll loop: trip count 20 exceeds maximum full unroll count 10
+; MAX-COUNT-10-NEXT: Trying upper-bound unroll...
+; MAX-COUNT-10-NEXT: Trying loop peeling...
+; MAX-COUNT-10-NEXT: Trying partial unroll...
+; MAX-COUNT-10-NEXT:  Partially unrolling with count: 20
+; MAX-COUNT-10-NEXT:  Exiting block %for.body: TripCount=20, TripMultiple=0, BreakoutTrip=0
+; MAX-COUNT-10-NEXT:COMPLETELY UNROLLING loop %for.body with trip count 20!
+; MAX-COUNT-10-NEXT:remark: <unknown>:0:0: completely unrolled loop with 20 iterations
+
+define i32 @tc_exceeds_max_ore(ptr %A) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
+  %load = load i32, ptr %arrayidx
+  %add = add i32 %sum, %load
+  %inc = add i32 %i, 1
+  %cmp = icmp ult i32 %inc, 20
+  br i1 %cmp, label %for.body, label %exit, !llvm.loop !0
+
+exit:
+  ret i32 %add
+}
+
+; CHECK-LABEL:Loop Unroll: F[caller_with_inlined_loop] Loop %for.body.i (depth=1)
+; CHECK-NEXT:remark: inlined.c:5:3: loop is from inlined function; call site is at caller.c:10
+; CHECK-NEXT:Loop Size = 6
+; CHECK-NEXT: Computing unroll count: TripCount=4, MaxTripCount=0, TripMultiple=4
+; CHECK-NEXT: Explicit unroll requested: pragma-enable
+; CHECK-NEXT: Trying pragma unroll...
+; CHECK-NEXT: Trying full unroll...
+; CHECK-NEXT:  Unrolling: size 18 < threshold 16384.
+; CHECK-NEXT:  Exiting block %for.body.i: TripCount=4, TripMultiple=0, BreakoutTrip=0
+; CHECK-NEXT:COMPLETELY UNROLLING loop %for.body.i with trip count 4!
+; CHECK-NEXT:remark: inlined.c:5:3: completely unrolled loop with 4 iterations
+
+define i32 @caller_with_inlined_loop(ptr %A) !dbg !20 {
+entry:
+  br label %for.body.i, !dbg !21
+
+for.body.i:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body.i ]
+  %sum = phi i32 [ 0, %entry ], [ %add, %for.body.i ]
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i, !dbg !22
+  %load = load i32, ptr %arrayidx, !dbg !22
+  %add = add i32 %sum, %load, !dbg !22
+  %inc = add i32 %i, 1, !dbg !22
+  %cmp = icmp ult i32 %inc, 4, !dbg !22
+  br i1 %cmp, label %for.body.i, label %exit, !dbg !22, !llvm.loop !23
+
+exit:
+  ret i32 %add, !dbg !21
+}
+
+; Same contradiction pattern as above: the low -unroll-threshold=20 -pragma-unroll-threshold=20
+; cause cost analysis to reject full unroll, but partial unroll uses different heuristics and
+; picks count=8 (the full trip count). Only happens with these artificial test flags.
+; COST-NOT-PROFITABLE-LABEL:Loop Unroll: F[cost_not_profitable] Loop %for.body (depth=1)
+; COST-NOT-PROFITABLE-NEXT:Loop Size = 14
+; COST-NOT-PROFITABLE-NEXT: Computing unroll count: TripCount=8, MaxTripCount=0, TripMultiple=8
+; COST-NOT-PROFITABLE-NEXT: Explicit unroll requested: pragma-enable
+; COST-NOT-PROFITABLE-NEXT: Trying pragma unroll...
+; COST-NOT-PROFITABLE-NEXT: Trying full unroll...
+; COST-NOT-PROFITABLE-NEXT:  Unrolled size {{[0-9]+}} exceeds threshold {{[0-9]+}}; checking for cost benefit.
+; COST-NOT-PROFITABLE-NEXT:   Starting LoopUnroll profitability analysis...
+; COST-NOT-PROFITABLE:   Analysis finished:
+; COST-NOT-PROFITABLE-NEXT:   UnrolledCost: {{[0-9]+}}, RolledDynamicCost: {{[0-9]+}}
+; COST-NOT-PROFITABLE-NEXT:  Not unrolling: cost {{[0-9]+}} >= boosted threshold {{[0-9]+}}.
+; COST-NOT-PROFITABLE-NEXT: Trying upper-bound unroll...
+; COST-NOT-PROFITABLE-NEXT: Trying loop peeling...
+; COST-NOT-PROFITABLE-NEXT: Trying partial unroll...
+; COST-NOT-PROFITABLE-NEXT:  Partially unrolling with count: 8
+; COST-NOT-PROFITABLE-NEXT:  Exiting block %for.body: TripCount=8, TripMultiple=0, BreakoutTrip=0
+; COST-NOT-PROFITABLE-NEXT:COMPLETELY UNROLLING loop %for.body with trip count 8!
+; COST-NOT-PROFITABLE-NEXT:remark: <unknown>:0:0: completely unrolled loop with 8 iterations
+
+define i32 @cost_not_profitable(ptr %A, ptr %B, ptr %C) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %sum = phi i32 [ 0, %entry ], [ %add3, %for.body ]
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
+  %load1 = load i32, ptr %arrayidx
+  %arrayidx2 = getelementptr inbounds i32, ptr %B, i32 %i
+  %load2 = load i32, ptr %arrayidx2
+  %arrayidx3 = getelementptr inbounds i32, ptr %C, i32 %i
+  %load3 = load i32, ptr %arrayidx3
+  %mul1 = mul i32 %load1, %load2
+  %mul2 = mul i32 %mul1, %load3
+  %add1 = add i32 %sum, %mul2
+  %add2 = add i32 %add1, %load1
+  %add3 = add i32 %add2, %load2
+  %inc = add i32 %i, 1
+  %cmp = icmp ult i32 %inc, 8
+  br i1 %cmp, label %for.body, label %exit, !llvm.loop !1
+
+exit:
+  ret i32 %add3
+}
+
+; UPPER-BOUND-HEURISTIC-LABEL:Loop Unroll: F[upper_bound_heuristic] Loop %for.body (depth=1)
+; UPPER-BOUND-HEURISTIC-NEXT:Loop Size = 6
+; UPPER-BOUND-HEURISTIC-NEXT: Computing unroll count: TripCount=0, MaxTripCount=3, TripMultiple=1
+; UPPER-BOUND-HEURISTIC-NEXT: Trying pragma unroll...
+; UPPER-BOUND-HEURISTIC-NEXT: Trying full unroll...
+; UPPER-BOUND-HEURISTIC-NEXT: Trying upper-bound unroll...
+; UPPER-BOUND-HEURISTIC-NEXT:  Unrolling: size {{[0-9]+}} < threshold {{[0-9]+}}.
+; UPPER-BOUND-HEURISTIC-NEXT:  Exiting block %for.body: TripCount=0, TripMultiple=1, BreakoutTrip=1
+; UPPER-BOUND-HEURISTIC-NEXT:COMPLETELY UNROLLING loop %for.body with trip count 3!
+; UPPER-BOUND-HEURISTIC-NEXT:remark: <unknown>:0:0: completely unrolled loop with 3 iterations
+
+define i32 @upper_bound_heuristic(ptr %A, i32 %n) {
+entry:
+  ; Clamp n to max of 3, so MaxTripCount will be 3
+  %n.clamped = and i32 %n, 3
+  br label %for.body
+
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
+  %load = load i32, ptr %arrayidx
+  %add = add i32 %sum, %load
+  %inc = add i32 %i, 1
+  %cmp = icmp ult i32 %inc, %n.clamped
+  br i1 %cmp, label %for.body, label %exit
+
+exit:
+  ret i32 %add
+}
+
+; FULL-COST-NOT-PROFITABLE-LABEL:Loop Unroll: F[pragma_full_cost_not_profitable] Loop %for.body (depth=1)
+; FULL-COST-NOT-PROFITABLE-NEXT:Loop Size = 9
+; FULL-COST-NOT-PROFITABLE-NEXT: Computing unroll count: TripCount=10, MaxTripCount=0, TripMultiple=10
+; FULL-COST-NOT-PROFITABLE-NEXT: Explicit unroll requested: pragma-full
+; FULL-COST-NOT-PROFITABLE-NEXT: Trying pragma unroll...
+; FULL-COST-NOT-PROFITABLE-NEXT:  Won't unroll; trip count is too large.
+; FULL-COST-NOT-PROFITABLE-NEXT:remark: <unknown>:0:0: unable to fully unroll loop: trip count 10 exceeds limit 8
+; FULL-COST-NOT-PROFITABLE-NEXT: Trying full unroll...
+; FULL-COST-NOT-PROFITABLE-NEXT:  Unrolled size {{[0-9]+}} exceeds threshold 20; checking for cost benefit.
+; FULL-COST-NOT-PROFITABLE-NEXT:   Starting LoopUnroll profitability analysis...
+; FULL-COST-NOT-PROFITABLE:   Analysis finished:
+; FULL-COST-NOT-PROFITABLE-NEXT:   UnrolledCost: {{[0-9]+}}, RolledDynamicCost: {{[0-9]+}}
+; FULL-COST-NOT-PROFITABLE-NEXT:  Not unrolling: cost {{[0-9]+}} >= boosted threshold {{[0-9]+}}.
+; FULL-COST-NOT-PROFITABLE-NEXT:remark: <unknown>:0:0: unable to fully unroll loop: estimated unrolled cost {{[0-9]+}} exceeds boosted threshold {{[0-9]+}}
+; FULL-COST-NOT-PROFITABLE-NEXT: Trying upper-bound unroll...
+; FULL-COST-NOT-PROFITABLE-NEXT: Trying loop peeling...
+; FULL-COST-NOT-PROFITABLE-NEXT: Trying partial unroll...
+; FULL-COST-NOT-PROFITABLE-NEXT:  Partially unrolling with count: 10
+; FULL-COST-NOT-PROFITABLE-NEXT:  Exiting block %for.body: TripCount=10, TripMultiple=0, BreakoutTrip=0
+; FULL-COST-NOT-PROFITABLE-NEXT:COMPLETELY UNROLLING loop %for.body with trip count 10!
+; FULL-COST-NOT-PROFITABLE-NEXT:remark: <unknown>:0:0: completely unrolled loop with 10 iterations
+
+define i32 @pragma_full_cost_not_profitable(ptr %A, ptr %B) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
+  %load = load i32, ptr %arrayidx
+  %arrayidx2 = getelementptr inbounds i32, ptr %B, i32 %i
+  %load2 = load i32, ptr %arrayidx2
+  %mul = mul i32 %load, %load2
+  %add = add i32 %sum, %mul
+  %inc = add i32 %i, 1
+  %cmp = icmp ult i32 %inc, 10
+  br i1 %cmp, label %for.body, label %exit, !llvm.loop !0
+
+exit:
+  ret i32 %add
+}
+
+; =============================================================================
+; Below are regression tests for edge cases in loop unrolling remarks.
+; =============================================================================
+
+; Test that a loop with multiple exits where one has a known trip count and
+; another has an unknown trip count is NOT labeled as upper-bound unroll.
+; CHECK-LABEL:Loop Unroll: F[multi_exit_known_and_unknown] Loop %for.header (depth=1)
+; CHECK-NEXT:Loop Size = 8
+; CHECK-NEXT: Computing unroll count: TripCount=5, MaxTripCount=0, TripMultiple=5
+; CHECK-NEXT: Trying pragma unroll...
+; CHECK-NEXT: Trying full unroll...
+; CHECK-NEXT:  Unrolling: size {{[0-9]+}} < threshold {{[0-9]+}}.
+; CHECK-NEXT:  Exiting block %for.header: TripCount=5, TripMultiple=0, BreakoutTrip=0
+; CHECK-NEXT:  Exiting block %for.body: TripCount=0, TripMultiple=1, BreakoutTrip=1
+; Note: This is a full unroll (not upper-bound) because we have a known trip count exit.
+; CHECK-NEXT:COMPLETELY UNROLLING loop %for.header with trip count 5!
+; CHECK-NEXT:remark: <unknown>:0:0: completely unrolled loop with 5 iterations
+
+define i32 @multi_exit_known_and_unknown(ptr %A, i1 %cond) {
+entry:
+  br label %for.header
+
+for.header:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.latch ]
+  %sum = phi i32 [ 0, %entry ], [ %add, %for.latch ]
+  ; This exit has known trip count = 5
+  %cmp = icmp ult i32 %i, 4
+  br i1 %cmp, label %for.body, label %exit
+
+for.body:
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
+  %load = load i32, ptr %arrayidx
+  %add = add i32 %sum, %load
+  ; This exit has unknown trip count (depends on runtime condition)
+  br i1 %cond, label %for.latch, label %exit
+
+for.latch:
+  %inc = add i32 %i, 1
+  br label %for.header
+
+exit:
+  %result = phi i32 [ %sum, %for.header ], [ %add, %for.body ]
+  ret i32 %result
+}
+
+; Test header-exiting while-style loop with partial unroll and remainder.
+; The latch is NOT the exiting block (unconditional branch), but the header is.
+; PARTIAL-UNROLL-LABEL:Loop Unroll: F[header_exit_with_remainder] Loop %while.header (depth=1)
+; PARTIAL-UNROLL-NEXT:Loop Size = 8
+; PARTIAL-UNROLL-NEXT: Computing unroll count: TripCount=11, MaxTripCount=0, TripMultiple=11
+; PARTIAL-UNROLL-NEXT: Explicit unroll requested: user-count
+; PARTIAL-UNROLL-NEXT: Trying pragma unroll...
+; PARTIAL-UNROLL-NEXT:  Unrolling with user-specified count: 4.
+; PARTIAL-UNROLL-NEXT:Attempting unroll by factor 4 with remainder loop (trip count 11)
+; PARTIAL-UNROLL-NEXT:remark: <unknown>:0:0: attempting unroll by factor 4 with remainder loop (trip count 11)
+; PARTIAL-UNROLL-NEXT:  Exiting block %while.header: TripCount=11, TripMultiple=0, BreakoutTrip=3
+; Note: Should say "with remainder" even though the latch is not the exiting block.
+; PARTIAL-UNROLL-NEXT:UNROLLING loop %while.header by 4!
+; PARTIAL-UNROLL-NEXT:remark: <unknown>:0:0: unrolled loop by a factor of 4
+
+define i32 @header_exit_with_remainder(ptr %A) {
+entry:
+  br label %while.header
+
+while.header:
+  %i = phi i32 [ 0, %entry ], [ %inc, %while.latch ]
+  %sum = phi i32 [ 0, %entry ], [ %add, %while.latch ]
+  ; Exit is in the header (while-style loop)
+  %cmp = icmp ult i32 %i, 10
+  br i1 %cmp, label %while.body, label %exit
+
+while.body:
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
+  %load = load i32, ptr %arrayidx
+  %add = add i32 %sum, %load
+  br label %while.latch
+
+while.latch:
+  ; Latch is NOT an exiting block - unconditional branch
+  %inc = add i32 %i, 1
+  br label %while.header
+
+exit:
+  ret i32 %sum
+}
+
+; CHECK-LABEL:Loop Unroll: F[switch_exit_full_unroll_bug] Loop %loop (depth=1)
+; CHECK-NEXT:Loop Size = 6
+; CHECK-NEXT: Computing unroll count: TripCount=5, MaxTripCount=0, TripMultiple=5
+; CHECK-NEXT: Trying pragma unroll...
+; CHECK-NEXT: Trying full unroll...
+; CHECK-NEXT:  Unrolling: size {{[0-9]+}} < threshold {{[0-9]+}}.
+; CHECK-NEXT:COMPLETELY UNROLLING loop %loop with trip count 5!
+; CHECK-NEXT:remark: <unknown>:0:0: completely unrolled loop with 5 iterations
+
+define i32 @switch_exit_full_unroll_bug(ptr %A) {
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ 0, %entry ], [ %inc, %loop.latch ]
+  %sum = phi i32 [ 0, %entry ], [ %add, %loop.latch ]
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
+  %load = load i32, ptr %arrayidx
+  %add = add i32 %sum, %load
+  %inc = add i32 %i, 1
+  ; Switch exit - not a branch, so won't be in ExitInfos
+  switch i32 %i, label %loop.latch [
+    i32 4, label %exit
+  ]
+
+loop.latch:
+  br label %loop
+
+exit:
+  ret i32 %add
+}
+
+; PARTIAL-UNROLL-LABEL:Loop Unroll: F[switch_exit_partial_remainder_bug] Loop %loop (depth=1)
+; PARTIAL-UNROLL-NEXT:Loop Size = 6
+; PARTIAL-UNROLL-NEXT: Computing unroll count: TripCount=11, MaxTripCount=0, TripMultiple=11
+; PARTIAL-UNROLL-NEXT: Explicit unroll requested: user-count
+; PARTIAL-UNROLL-NEXT: Trying pragma unroll...
+; PARTIAL-UNROLL-NEXT:  Unrolling with user-specified count: 4.
+; PARTIAL-UNROLL-NEXT:Attempting unroll by factor 4 with remainder loop (trip count 11)
+; PARTIAL-UNROLL-NEXT:remark: <unknown>:0:0: attempting unroll by factor 4 with remainder loop (trip count 11)
+; PARTIAL-UNROLL-NEXT:UNROLLING loop %loop by 4!
+; PARTIAL-UNROLL-NEXT:remark: <unknown>:0:0: unrolled loop by a factor of 4
+
+define i32 @switch_exit_partial_remainder_bug(ptr %A) {
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ 0, %entry ], [ %inc, %loop.latch ]
+  %sum = phi i32 [ 0, %entry ], [ %add, %loop.latch ]
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
+  %load = load i32, ptr %arrayidx
+  %add = add i32 %sum, %load
+  %inc = add i32 %i, 1
+  ; Switch exit with trip count 11
+  switch i32 %i, label %loop.latch [
+    i32 10, label %exit
+  ]
+
+loop.latch:
+  br label %loop
+
+exit:
+  ret i32 %add
+}
+
+; Metadata definitions
+!0 = distinct !{!0, !5}
+!1 = distinct !{!1, !6}
+!2 = distinct !{!2, !7}
+!3 = distinct !{!3, !8}
+!4 = distinct !{!4, !9}
+!5 = !{!"llvm.loop.unroll.full"}
+!6 = !{!"llvm.loop.unroll.enable"}
+!7 = !{!"llvm.loop.unroll.count", i32 4}
+!8 = !{!"llvm.loop.unroll.disable"}
+!9 = !{!"llvm.loop.unroll.runtime.disable"}
+!10 = distinct !{!10, !7}
+!11 = distinct !{!11, !6}
+!12 = distinct !{!12, !6}
+!13 = distinct !{!13, !7}
+
+; Debug info for inlined loop test
+!llvm.dbg.cu = !{!15}
+!llvm.module.flags = !{!19}
+
+!15 = distinct !DICompileUnit(language: DW_LANG_C99, file: !16, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug)
+!16 = !DIFile(filename: "caller.c", directory: "/tmp")
+!17 = !DIFile(filename: "inlined.c", directory: "/tmp")
+!18 = distinct !DISubprogram(name: "inlined_func", scope: !17, file: !17, line: 1, type: !24, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: true, unit: !15)
+!19 = !{i32 2, !"Debug Info Version", i32 3}
+!20 = distinct !DISubprogram(name: "caller_with_inlined_loop", scope: !16, file: !16, line: 8, type: !24, isLocal: false, isDefinition: true, scopeLine: 8, isOptimized: true, unit: !15)
+!21 = !DILocation(line: 10, column: 3, scope: !20)
+!22 = !DILocation(line: 5, column: 3, scope: !18, inlinedAt: !21)
+!23 = distinct !{!23, !22, !6}
+!24 = !DISubroutineType(types: !25)
+!25 = !{null}
diff --git a/llvm/test/Transforms/LoopUnroll/debug.ll b/llvm/test/Transforms/LoopUnroll/debug.ll
index 6b611952c0935..ba98003cc2af2 100644
--- a/llvm/test/Transforms/LoopUnroll/debug.ll
+++ b/llvm/test/Transforms/LoopUnroll/debug.ll
@@ -20,6 +20,7 @@
 ; CHECK-NEXT: Trying upper-bound unroll...
 ; CHECK-NEXT: Trying loop peeling...
 ; CHECK-NEXT: Trying partial unroll...
+; CHECK-NEXT: Not fully unrolling as directed: loop has runtime trip count.
 ; CHECK-NEXT: Trying runtime unroll...
 ; CHECK-NEXT:  Will not try to unroll loop with runtime trip count because -unroll-runtime not given
 ; CHECK-NEXT: Not unrolling: no viable strategy found.
@@ -51,6 +52,7 @@ exit:
 ; CHECK-NEXT: Trying full unroll...
 ; CHECK-NEXT:  Unrolled size {{[0-9]+}} exceeds threshold {{[0-9]+}}; checking for cost benefit.
 ; CHECK-NEXT:   Not analyzing loop cost: trip count too large.
+; CHECK-NEXT:  Skipping: cost analysis unavailable.
 ; CHECK-NEXT: Trying upper-bound unroll...
 ; CHECK-NEXT: Trying loop peeling...
 ; CHECK-NEXT: Trying partial unroll...
@@ -595,6 +597,7 @@ exit:
 ; PARTIAL-ALLOW-NEXT: Trying full unroll...
 ; PARTIAL-ALLOW-NEXT:  Unrolled size {{[0-9]+}} exceeds threshold {{[0-9]+}}; checking for cost benefit.
 ; PARTIAL-ALLOW-NEXT:   Not analyzing loop cost: trip count too large.
+; PARTIAL-ALLOW-NEXT:  Skipping: cost analysis unavailable.
 ; PARTIAL-ALLOW-NEXT: Trying upper-bound unroll...
 ; PARTIAL-ALLOW-NEXT: Trying loop peeling...
 ; PARTIAL-ALLOW-NEXT: Trying partial unroll...
@@ -630,11 +633,13 @@ exit:
 ; CHECK-NEXT: Trying full unroll...
 ; CHECK-NEXT:  Unrolled size {{[0-9]+}} exceeds threshold {{[0-9]+}}; checking for cost benefit.
 ; CHECK-NEXT:   Not analyzing loop cost: trip count too large.
+; CHECK-NEXT:  Skipping: cost analysis unavailable.
 ; CHECK-NEXT: Trying upper-bound unroll...
 ; CHECK-NEXT: Trying loop peeling...
 ; CHECK-NEXT: Trying partial unroll...
 ; CHECK-NEXT:  Unrolled size exceeds threshold; reducing count from {{[0-9]+}} to {{[0-9]+}}.
 ; CHECK-NEXT:  Partially unrolling with count: {{[0-9]+}}
+; CHECK-NEXT: Partial unroll instead of full: unrolled size too large. Unrolling {{[0-9]+}} times instead of {{[0-9]+}}.
 ; CHECK-NEXT:  Exiting block %for.body: TripCount=1000001, TripMultiple=0, BreakoutTrip=0
 ; CHECK-NEXT:UNROLLING loop %for.body by {{[0-9]+}}!
 
@@ -674,6 +679,7 @@ exit:
 ; CHECK-NEXT: Trying full unroll...
 ; CHECK-NEXT:  Unrolled size {{[0-9]+}} exceeds threshold {{[0-9]+}}; checking for cost benefit.
 ; CHECK-NEXT:   Not analyzing loop cost: not an innermost loop.
+; CHECK-NEXT:  Skipping: cost analysis unavailable.
 ; CHECK-NEXT: Trying upper-bound unroll...
 ; CHECK-NEXT: Trying loop peeling...
 ; CHECK-NEXT: Trying partial unroll...
@@ -843,6 +849,7 @@ exit:
 ; PARTIAL-NOPROFIT-NEXT: Trying full unroll...
 ; PARTIAL-NOPROFIT-NEXT:  Unrolled size {{[0-9]+}} exceeds threshold {{[0-9]+}}; checking for cost benefit.
 ; PARTIAL-NOPROFIT-NEXT:   Not analyzing loop cost: trip count too large.
+; PARTIAL-NOPROFIT-NEXT:  Skipping: cost analysis unavailable.
 ; PARTIAL-NOPROFIT-NEXT: Trying upper-bound unroll...
 ; PARTIAL-NOPROFIT-NEXT: Trying loop peeling...
 ; PARTIAL-NOPROFIT-NEXT: Trying partial unroll...

>From 74d72c9233508113bf0a453b7c5d8c5bf13b6a66 Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Thu, 2 Apr 2026 22:36:59 +0000
Subject: [PATCH 2/2] Refactor OREs

---
 llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp |  202 +-
 .../LoopUnroll/debug-and-remarks.ll           | 1624 ++++++-----------
 llvm/test/Transforms/LoopUnroll/debug.ll      |  922 ----------
 3 files changed, 596 insertions(+), 2152 deletions(-)
 delete mode 100644 llvm/test/Transforms/LoopUnroll/debug.ll

diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 444504b365038..36bf0d8461c61 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -842,11 +842,11 @@ shouldPragmaUnroll(Loop *L, const UnrollPragmaInfo &PInfo,
                << ": remainder not allowed, count does not divide trip "
                << "multiple " << TripMultiple << ".\n");
     ORE->emit([&]() {
-      return OptimizationRemarkMissed(DEBUG_TYPE, "PragmaUnrollCountRejected",
+      return OptimizationRemarkAnalysis(DEBUG_TYPE, "PragmaUnrollCountRejected",
                                       L->getStartLoc(), L->getHeader())
-             << "unable to unroll loop with count "
+             << "may be unable to unroll loop with count "
              << ore::NV("PragmaCount", PInfo.PragmaCount)
-             << ": remainder loop is restricted and count does not divide "
+             << ": remainder loop is not allowed and count does not divide "
                 "trip multiple "
              << ore::NV("TripMultiple", TripMultiple);
     });
@@ -861,10 +861,10 @@ shouldPragmaUnroll(Loop *L, const UnrollPragmaInfo &PInfo,
         LLVM_DEBUG(dbgs().indent(2)
                    << "Won't unroll; trip count is too large.\n");
         ORE->emit([&]() {
-          return OptimizationRemarkMissed(DEBUG_TYPE,
+          return OptimizationRemarkAnalysis(DEBUG_TYPE,
                                           "PragmaFullUnrollTripCountTooLarge",
                                           L->getStartLoc(), L->getHeader())
-                 << "unable to fully unroll loop: trip count "
+                 << "may be unable to fully unroll loop: trip count "
                  << ore::NV("TripCount", TripCount) << " exceeds limit "
                  << ore::NV("Limit", PragmaUnrollFullMaxIterations);
         });
@@ -875,10 +875,14 @@ shouldPragmaUnroll(Loop *L, const UnrollPragmaInfo &PInfo,
                  << "Fully unrolling with trip count: " << TripCount << ".\n");
       return TripCount;
     }
-    // Note: ORE for unknown trip count is emitted later in computeUnrollCount
-    // after we've exhausted all strategies.
     LLVM_DEBUG(dbgs().indent(2)
                << "Not fully unrolling: unknown trip count.\n");
+    ORE->emit([&]() {
+      return OptimizationRemarkAnalysis(DEBUG_TYPE,
+                                        "PragmaFullUnrollUnknownTripCount",
+                                        L->getStartLoc(), L->getHeader())
+             << "may be unable to fully unroll loop: trip count is unknown";
+    });
   }
 
   if (PInfo.PragmaEnableUnroll && !TripCount && MaxTripCount &&
@@ -895,26 +899,13 @@ static std::optional<unsigned> shouldFullUnroll(
     Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT,
     ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues,
     const unsigned FullUnrollTripCount, const UnrollCostEstimator UCE,
-    const TargetTransformInfo::UnrollingPreferences &UP,
-    OptimizationRemarkEmitter *ORE) {
+    const TargetTransformInfo::UnrollingPreferences &UP) {
   assert(FullUnrollTripCount && "should be non-zero!");
 
-  const bool PragmaFullUnroll = hasUnrollFullPragma(L);
-
   if (FullUnrollTripCount > UP.FullUnrollMaxCount) {
     LLVM_DEBUG(dbgs().indent(2)
                << "Not unrolling: trip count " << FullUnrollTripCount
                << " exceeds max count " << UP.FullUnrollMaxCount << ".\n");
-    if (PragmaFullUnroll)
-      ORE->emit([&]() {
-        return OptimizationRemarkMissed(DEBUG_TYPE,
-                                        "FullUnrollTripCountTooLarge",
-                                        L->getStartLoc(), L->getHeader())
-               << "unable to fully unroll loop: trip count "
-               << ore::NV("TripCount", FullUnrollTripCount)
-               << " exceeds maximum full unroll count "
-               << ore::NV("MaxFullUnrollCount", UP.FullUnrollMaxCount);
-      });
     return std::nullopt;
   }
 
@@ -948,25 +939,6 @@ static std::optional<unsigned> shouldFullUnroll(
     LLVM_DEBUG(dbgs().indent(2)
                << "Not unrolling: cost " << Cost->UnrolledCost
                << " >= boosted threshold " << BoostedThreshold << ".\n");
-    if (PragmaFullUnroll)
-      ORE->emit([&]() {
-        return OptimizationRemarkMissed(DEBUG_TYPE, "FullUnrollNotProfitable",
-                                        L->getStartLoc(), L->getHeader())
-               << "unable to fully unroll loop: estimated unrolled cost "
-               << ore::NV("UnrolledCost", Cost->UnrolledCost)
-               << " exceeds boosted threshold "
-               << ore::NV("BoostedThreshold", BoostedThreshold);
-      });
-  } else {
-    LLVM_DEBUG(dbgs().indent(2) << "Skipping: cost analysis unavailable.\n");
-    if (PragmaFullUnroll)
-      ORE->emit([&]() {
-        return OptimizationRemarkMissed(DEBUG_TYPE, "FullUnrollSizeTooLarge",
-                                        L->getStartLoc(), L->getHeader())
-               << "unable to fully unroll loop: estimated unrolled size "
-               << ore::NV("UnrolledSize", UnrolledSize) << " exceeds threshold "
-               << ore::NV("Threshold", UP.Threshold);
-      });
   }
 
   return std::nullopt;
@@ -1118,7 +1090,7 @@ void llvm::computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
   if (TripCount) {
     UP.Count = TripCount;
     if (auto UnrollFactor = shouldFullUnroll(L, TTI, DT, SE, EphValues,
-                                             TripCount, UCE, UP, ORE)) {
+                                             TripCount, UCE, UP)) {
       UP.Count = *UnrollFactor;
       return;
     }
@@ -1141,7 +1113,7 @@ void llvm::computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
       MaxTripCount <= UP.MaxUpperBound) {
     UP.Count = MaxTripCount;
     if (auto UnrollFactor = shouldFullUnroll(L, TTI, DT, SE, EphValues,
-                                             MaxTripCount, UCE, UP, ORE)) {
+                                             MaxTripCount, UCE, UP)) {
       UP.Count = *UnrollFactor;
       return;
     }
@@ -1168,49 +1140,10 @@ void llvm::computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
   LLVM_DEBUG(dbgs().indent(1) << "Trying partial unroll...\n");
   if (auto UnrollFactor = shouldPartialUnroll(LoopSize, TripCount, UCE, UP)) {
     UP.Count = *UnrollFactor;
-
-    if (UP.Count) {
-      if (PInfo.PragmaFullUnroll && UP.Count != TripCount) {
-        LLVM_DEBUG(dbgs().indent(1)
-                   << "Partial unroll instead of full: unrolled size "
-                      "too large. Unrolling "
-                   << UP.Count << " times instead of " << TripCount << ".\n");
-        ORE->emit([&]() {
-          return OptimizationRemarkMissed(DEBUG_TYPE,
-                                          "FullUnrollAsDirectedTooLarge",
-                                          L->getStartLoc(), L->getHeader())
-                 << "unable to fully unroll loop as directed by full unroll "
-                    "pragma because unrolled size is too large";
-        });
-      }
-    } else if (PInfo.PragmaFullUnroll || PInfo.PragmaEnableUnroll) {
-      LLVM_DEBUG(dbgs().indent(1)
-                 << "Not unrolling as directed: unrolled size too large.\n");
-      ORE->emit([&]() {
-        return OptimizationRemarkMissed(DEBUG_TYPE, "UnrollAsDirectedTooLarge",
-                                        L->getStartLoc(), L->getHeader())
-               << "unable to fully unroll loop as directed by unroll metadata "
-                  "because unrolled size is too large";
-      });
-    }
-
     return;
   }
   assert(TripCount == 0 &&
          "All cases when TripCount is constant should be covered here.");
-  if (PInfo.PragmaFullUnroll) {
-    LLVM_DEBUG(dbgs().indent(1)
-               << "Not fully unrolling as directed: loop has runtime "
-               << "trip count.\n");
-    ORE->emit([&]() {
-      return OptimizationRemarkMissed(
-                 DEBUG_TYPE, "CantFullUnrollAsDirectedRuntimeTripCount",
-                 L->getStartLoc(), L->getHeader())
-             << "unable to fully unroll loop as directed by "
-                "llvm.loop.unroll.full metadata because loop has a runtime "
-                "trip count";
-    });
-  }
 
   // 7th priority is runtime unrolling.
   LLVM_DEBUG(dbgs().indent(1) << "Trying runtime unroll...\n");
@@ -1227,19 +1160,6 @@ void llvm::computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
     LLVM_DEBUG(dbgs().indent(2)
                << "Not runtime unrolling: max trip count " << MaxTripCount
                << " is small (< " << UP.MaxUpperBound << ") and not forced.\n");
-    // If user specified an unroll count but it was rejected earlier (e.g.,
-    // remainder not allowed or threshold exceeded), they've already been
-    // notified. Emit an additional remark since runtime unroll was their last
-    // chance.
-    if (PInfo.PragmaCount > 0) {
-      ORE->emit([&]() {
-        return OptimizationRemarkMissed(DEBUG_TYPE, "SmallMaxTripCount",
-                                        L->getStartLoc(), L->getHeader())
-               << "unable to runtime unroll loop: max trip count "
-               << ore::NV("MaxTripCount", MaxTripCount) << " is too small (< "
-               << ore::NV("MaxUpperBound", UP.MaxUpperBound) << ")";
-      });
-    }
     UP.Count = 0;
     return;
   }
@@ -1285,22 +1205,6 @@ void llvm::computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
                   "multiple, "
                << TripMultiple << ".  Reducing unroll count from " << OrigCount
                << " to " << UP.Count << ".\n");
-
-    using namespace ore;
-
-    if (PInfo.PragmaCount > 0 && !UP.AllowRemainder)
-      ORE->emit([&]() {
-        return OptimizationRemarkMissed(DEBUG_TYPE,
-                                        "DifferentUnrollCountFromDirected",
-                                        L->getStartLoc(), L->getHeader())
-               << "Unable to unroll loop the number of times directed by "
-                  "llvm.loop.unroll.count metadata because remainder loop is "
-                  "restricted (that could be architecture specific or because "
-                  "the loop contains a convergent instruction) and so must "
-                  "have an unroll count that divides the loop trip multiple of "
-               << NV("TripMultiple", TripMultiple) << ".  Unrolling instead "
-               << NV("UnrollCount", UP.Count) << " time(s).";
-      });
   }
 
   if (UP.Count > UP.MaxCount)
@@ -1337,23 +1241,7 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
                     << L->getHeader()->getParent()->getName() << "] Loop %"
                     << L->getHeader()->getName()
                     << " (depth=" << L->getLoopDepth() << ")\n");
-
   TransformationMode TM = hasUnrollTransformation(L);
-
-  if (TM & TM_ForcedByUser) {
-    if (const DebugLoc &StartLoc = L->getStartLoc()) {
-      if (const DILocation *InlinedAt = StartLoc->getInlinedAt()) {
-        ORE.emit([&]() {
-          return OptimizationRemarkAnalysis(DEBUG_TYPE, "InlinedLoop", StartLoc,
-                                            L->getHeader())
-                 << "loop is from inlined function; call site is at "
-                 << ore::NV("CallSiteFile", InlinedAt->getFilename()) << ":"
-                 << ore::NV("CallSiteLine", InlinedAt->getLine());
-        });
-      }
-    }
-  }
-
   if (TM & TM_Disable) {
     LLVM_DEBUG(dbgs().indent(1) << "Not unrolling: transformation disabled by "
                                 << "metadata.\n");
@@ -1508,7 +1396,7 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
   if (!UP.Count) {
     LLVM_DEBUG(dbgs().indent(1)
                << "Not unrolling: no viable strategy found.\n");
-    if (UnrollPragmaInfo(L).ExplicitUnroll && (TM & TM_ForcedByUser)) {
+    if (TM & TM_ForcedByUser) {
       ORE.emit([&]() {
         return OptimizationRemarkMissed(DEBUG_TYPE, "NoUnrollStrategy",
                                         L->getStartLoc(), L->getHeader())
@@ -1560,33 +1448,8 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
   // Save loop properties before it is transformed.
   MDNode *OrigLoopID = L->getLoopID();
   UnrollPragmaInfo PInfo(L);
-
-  // Emit additional unrolling strategy context that difficult for UnrollLoop()
-  // to re-compute.
-  /* if (UseUpperBound) {
-    LLVM_DEBUG(dbgs() << "Attempting full unroll with upper bound trip count "
-                      << MaxTripCount << "\n");
-    ORE.emit([&]() {
-      return OptimizationRemarkAnalysis(DEBUG_TYPE, "AttemptUnrollUpperBound",
-                                        L->getStartLoc(), L->getHeader())
-             << "attempting full unroll using upper bound trip count "
-             << ore::NV("MaxTripCount", MaxTripCount);
-    });
-  } else*/
-  if (TripCount && TripCount > UP.Count && TripCount % UP.Count != 0) {
-    LLVM_DEBUG(dbgs() << "Attempting unroll by factor " << UP.Count
-                      << " with remainder loop (trip count " << TripCount
-                      << ")\n");
-    ORE.emit([&]() {
-      return OptimizationRemarkAnalysis(DEBUG_TYPE,
-                                        "AttemptUnrollWithRemainder",
-                                        L->getStartLoc(), L->getHeader())
-             << "attempting unroll by factor "
-             << ore::NV("UnrollCount", UP.Count)
-             << " with remainder loop (trip count "
-             << ore::NV("TripCount", TripCount) << ")";
-    });
-  }
+  DebugLoc LoopStartLoc = L->getStartLoc();
+  BasicBlock *LoopHeader = L->getHeader();
 
   // Unroll the loop.
   Loop *RemainderLoop = nullptr;
@@ -1603,8 +1466,37 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
   ULO.AddAdditionalAccumulators = UP.AddAdditionalAccumulators;
   LoopUnrollResult UnrollResult = UnrollLoop(
       L, ULO, LI, &SE, &DT, &AC, &TTI, &ORE, PreserveLCSSA, &RemainderLoop, AA);
-  if (UnrollResult == LoopUnrollResult::Unmodified)
+  if (UnrollResult == LoopUnrollResult::Unmodified) {
+    if (PInfo.ExplicitUnroll) {
+      LLVM_DEBUG(dbgs().indent(1)
+                 << "Failed to unroll loop as explicitly requested.\n");
+      ORE.emit([&]() {
+        return OptimizationRemarkMissed(DEBUG_TYPE, "FailedToUnrollAsRequested",
+                                        LoopStartLoc, LoopHeader)
+               << "failed to unroll loop as explicitly requested";
+      });
+    }
     return LoopUnrollResult::Unmodified;
+  }
+
+  if (PInfo.PragmaFullUnroll && ULO.Count != TripCount) {
+    ORE.emit([&]() {
+      return OptimizationRemarkMissed(DEBUG_TYPE, "FullUnrollAsDirectedFailed",
+                                      LoopStartLoc, LoopHeader)
+             << "unable to fully unroll loop as directed; "
+             << "unrolled by factor " << ore::NV("UnrollCount", ULO.Count);
+    });
+  }
+  if (PInfo.PragmaCount > 0 && ULO.Count != PInfo.PragmaCount) {
+    ORE.emit([&]() {
+      return OptimizationRemarkMissed(DEBUG_TYPE, "UnrollCountDiffers",
+                                      LoopStartLoc, LoopHeader)
+             << "unable to unroll loop with requested count "
+             << ore::NV("RequestedCount", PInfo.PragmaCount)
+             << "; unrolled by factor "
+             << ore::NV("UnrollCount", ULO.Count);
+    });
+  }
 
   if (RemainderLoop) {
     std::optional<MDNode *> RemainderLoopID =
diff --git a/llvm/test/Transforms/LoopUnroll/debug-and-remarks.ll b/llvm/test/Transforms/LoopUnroll/debug-and-remarks.ll
index 714d63d49bd10..41cd79f9873d0 100644
--- a/llvm/test/Transforms/LoopUnroll/debug-and-remarks.ll
+++ b/llvm/test/Transforms/LoopUnroll/debug-and-remarks.ll
@@ -1,81 +1,56 @@
 ; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -pass-remarks=loop-unroll \
-; RUN:     -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll < %s 2>&1 | FileCheck %s --match-full-lines --strict-whitespace
-; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -pass-remarks=loop-unroll \
-; RUN:     -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll -unroll-count=4 < %s 2>&1 \
-; RUN:     | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=PARTIAL-UNROLL
-; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -pass-remarks=loop-unroll \
-; RUN:     -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll -unroll-runtime < %s 2>&1 \
-; RUN:     | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=RUNTIME-UNROLL
+; RUN:     -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll < %s 2>&1 \
+; RUN:     | FileCheck %s --match-full-lines --strict-whitespace
 ; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -pass-remarks=loop-unroll \
 ; RUN:     -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll -unroll-allow-partial < %s 2>&1 \
 ; RUN:     | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=PARTIAL-ALLOW
 ; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -pass-remarks=loop-unroll \
-; RUN:     -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll -unroll-count=4 -unroll-allow-remainder=false < %s 2>&1 \
-; RUN:     | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=USER-COUNT-REJECT
-; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -pass-remarks=loop-unroll \
-; RUN:     -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll -unroll-allow-remainder=false < %s 2>&1 \
-; RUN:     | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=NO-REMAINDER
-; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -pass-remarks=loop-unroll \
-; RUN:     -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll -pragma-unroll-full-max-iterations=100 < %s 2>&1 \
-; RUN:     | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=PRAGMA-TC-TOO-LARGE
-; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -pass-remarks=loop-unroll \
-; RUN:     -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll -unroll-threshold=20 < %s 2>&1 \
-; RUN:     | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=COST-ANALYSIS
+; RUN:     -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll -unroll-count=4 < %s 2>&1 \
+; RUN:     | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=USER-COUNT
 ; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -pass-remarks=loop-unroll \
-; RUN:     -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll -unroll-full-max-count=10 \
-; RUN:     -pragma-unroll-full-max-iterations=10 < %s 2>&1 \
-; RUN:     | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=MAX-COUNT-10
+; RUN:     -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll -unroll-count=9999 < %s 2>&1 \
+; RUN:     | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=USER-COUNT-EXCEED
 ; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -pass-remarks=loop-unroll \
 ; RUN:     -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll -unroll-peel-count=2 < %s 2>&1 \
-; RUN:     | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=PEEL
-; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -pass-remarks=loop-unroll \
-; RUN:     -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll -unroll-threshold=0 -unroll-partial-threshold=0 < %s 2>&1 \
-; RUN:     | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=THRESHOLDS-ZERO
-; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -pass-remarks=loop-unroll \
-; RUN:     -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll -unroll-threshold=30 < %s 2>&1 \
-; RUN:     | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=NESTED-COST
+; RUN:     | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=EXPLICIT-PEEL
 ; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -pass-remarks=loop-unroll \
-; RUN:     -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll -unroll-allow-partial -unroll-partial-threshold=8 < %s 2>&1 \
-; RUN:     | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=NO-PROFIT
+; RUN:     -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll -unroll-threshold=0 < %s 2>&1 \
+; RUN:     | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=ZERO-THRESH
 ; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -pass-remarks=loop-unroll \
-; RUN:     -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll -unroll-allow-partial -pragma-unroll-threshold=10 \
-; RUN:     -unroll-threshold=10 -unroll-partial-threshold=10 -pragma-unroll-full-max-iterations=10 < %s 2>&1 \
-; RUN:     | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=UNROLL-AS-DIRECTED-FAIL
+; RUN:     -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll -unroll-full-max-count=2 < %s 2>&1 \
+; RUN:     | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=MAX-COUNT
 ; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -pass-remarks=loop-unroll \
-; RUN:     -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll -unroll-count=8 -unroll-threshold=10 < %s 2>&1 \
-; RUN:     | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=USER-COUNT-EXCEED
-; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -pass-remarks=loop-unroll \
-; RUN:     -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll -unroll-threshold=1 -unroll-partial-threshold=1 < %s 2>&1 \
-; RUN:     | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=NO-STRATEGY
+; RUN:     -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll -unroll-allow-partial -unroll-partial-threshold=4 < %s 2>&1 \
+; RUN:     | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=PARTIAL-NOPROFIT
 ; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -pass-remarks=loop-unroll \
-; RUN:     -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll -unroll-threshold=20 -pragma-unroll-threshold=20 < %s 2>&1 \
-; RUN:     | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=COST-NOT-PROFITABLE
-; RUN: opt -disable-output -O2 --disable-loop-unrolling -debug-only=loop-unroll -pass-remarks=loop-unroll \
-; RUN:     -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll < %s 2>&1 \
-; RUN:     | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=AUTO-DISABLED
-; RUN: opt -disable-output -passes='loop-unroll<upperbound>' -debug-only=loop-unroll -pass-remarks=loop-unroll \
-; RUN:     -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll < %s 2>&1 \
-; RUN:     | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=UPPER-BOUND-HEURISTIC
+; RUN:     -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll -unroll-allow-remainder=false < %s 2>&1 \
+; RUN:     | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=PRAGMA-NOREMAINDER
 ; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -pass-remarks=loop-unroll \
-; RUN:     -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll -unroll-threshold=20 \
-; RUN:     -pragma-unroll-threshold=20 -pragma-unroll-full-max-iterations=8 < %s 2>&1 \
-; RUN:     | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=FULL-COST-NOT-PROFITABLE
+; RUN:     -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll -unroll-remainder < %s 2>&1 \
+; RUN:     | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=REMAINDER
 
 ; REQUIRES: asserts
 
-; CHECK-LABEL:Loop Unroll: F[full_unroll_simple] Loop %for.body (depth=1)
+; CHECK-LABEL:Loop Unroll: F[pragma_full_unroll_unknown_tc] Loop %for.body (depth=1)
 ; CHECK-NEXT:Loop Size = 6
-; CHECK-NEXT: Computing unroll count: TripCount=8, MaxTripCount=0, TripMultiple=8
+; CHECK-NEXT: Computing unroll count: TripCount=0, MaxTripCount=2147483647, TripMultiple=1
+; CHECK-NEXT: Explicit unroll requested: pragma-full
 ; CHECK-NEXT: Trying pragma unroll...
+; CHECK-NEXT:  Not fully unrolling: unknown trip count.
+; CHECK-NEXT:remark: <unknown>:0:0: may be unable to fully unroll loop: trip count is unknown
 ; CHECK-NEXT: Trying full unroll...
-; CHECK-NEXT:  Unrolling: size {{[0-9]+}} < threshold {{[0-9]+}}.
-; CHECK-NEXT:  Exiting block %for.body: TripCount=8, TripMultiple=0, BreakoutTrip=0
-; CHECK-NEXT:COMPLETELY UNROLLING loop %for.body with trip count 8!
-; CHECK-NEXT:remark: <unknown>:0:0: completely unrolled loop with 8 iterations
+; CHECK-NEXT: Trying upper-bound unroll...
+; CHECK-NEXT: Trying loop peeling...
+; CHECK-NEXT: Trying partial unroll...
+; CHECK-NEXT: Trying runtime unroll...
+; CHECK-NEXT:  Will not try to unroll loop with runtime trip count because -unroll-runtime not given
+; CHECK-NEXT: Not unrolling: no viable strategy found.
+; CHECK-NEXT:remark: <unknown>:0:0: unable to unroll loop: no viable unroll count found
 
-define i32 @full_unroll_simple(ptr %A) {
+define i32 @pragma_full_unroll_unknown_tc(ptr %A, i32 %n) {
 entry:
-  br label %for.body
+  %cmp.entry = icmp sgt i32 %n, 0
+  br i1 %cmp.entry, label %for.body, label %exit
 
 for.body:
   %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
@@ -84,24 +59,28 @@ for.body:
   %load = load i32, ptr %arrayidx
   %add = add i32 %sum, %load
   %inc = add i32 %i, 1
-  %cmp = icmp ult i32 %inc, 8
-  br i1 %cmp, label %for.body, label %exit
+  %cmp = icmp slt i32 %inc, %n
+  br i1 %cmp, label %for.body, label %exit, !llvm.loop !0
 
 exit:
-  ret i32 %add
+  %result = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %result
 }
 
-; PARTIAL-UNROLL-LABEL:Loop Unroll: F[partial_unroll_user_count] Loop %for.body (depth=1)
-; PARTIAL-UNROLL-NEXT:Loop Size = 6
-; PARTIAL-UNROLL-NEXT: Computing unroll count: TripCount=16, MaxTripCount=0, TripMultiple=16
-; PARTIAL-UNROLL-NEXT: Explicit unroll requested: user-count
-; PARTIAL-UNROLL-NEXT: Trying pragma unroll...
-; PARTIAL-UNROLL-NEXT:  Unrolling with user-specified count: 4.
-; PARTIAL-UNROLL-NEXT:  Exiting block %for.body: TripCount=16, TripMultiple=0, BreakoutTrip=0
-; PARTIAL-UNROLL-NEXT:UNROLLING loop %for.body by 4!
-; PARTIAL-UNROLL-NEXT:remark: <unknown>:0:0: unrolled loop by a factor of 4
-
-define i32 @partial_unroll_user_count(ptr %A) {
+; CHECK-LABEL:Loop Unroll: F[full_unroll_cost_exceeds] Loop %for.body (depth=1)
+; CHECK-NEXT:Loop Size = 6
+; CHECK-NEXT: Computing unroll count: TripCount=100, MaxTripCount=0, TripMultiple=100
+; CHECK-NEXT: Trying pragma unroll...
+; CHECK-NEXT: Trying full unroll...
+; CHECK-NEXT:  Unrolled size {{[0-9]+}} exceeds threshold {{[0-9]+}}; checking for cost benefit.
+; CHECK-NEXT:   Not analyzing loop cost: trip count too large.
+; CHECK-NEXT: Trying upper-bound unroll...
+; CHECK-NEXT: Trying loop peeling...
+; CHECK-NEXT: Trying partial unroll...
+; CHECK-NEXT:  Will not try to unroll partially because -unroll-allow-partial not given
+; CHECK-NEXT: Not unrolling: no viable strategy found.
+
+define i32 @full_unroll_cost_exceeds(ptr %A) {
 entry:
   br label %for.body
 
@@ -112,61 +91,47 @@ for.body:
   %load = load i32, ptr %arrayidx
   %add = add i32 %sum, %load
   %inc = add i32 %i, 1
-  %cmp = icmp ult i32 %inc, 16
+  %cmp = icmp ult i32 %inc, 100
   br i1 %cmp, label %for.body, label %exit
 
 exit:
   ret i32 %add
 }
 
-; RUNTIME-UNROLL-LABEL:Loop Unroll: F[runtime_unroll_simple] Loop %for.body (depth=1)
-; RUNTIME-UNROLL-NEXT:Loop Size = 6
-; RUNTIME-UNROLL-NEXT: Computing unroll count: TripCount=0, MaxTripCount={{[0-9]+}}, TripMultiple=1
-; RUNTIME-UNROLL-NEXT: Trying pragma unroll...
-; RUNTIME-UNROLL-NEXT: Trying full unroll...
-; RUNTIME-UNROLL-NEXT: Trying upper-bound unroll...
-; RUNTIME-UNROLL-NEXT: Trying loop peeling...
-; RUNTIME-UNROLL-NEXT: Trying partial unroll...
-; RUNTIME-UNROLL-NEXT: Trying runtime unroll...
-; RUNTIME-UNROLL-NEXT:  Runtime unrolling with count: 8
-; RUNTIME-UNROLL-NEXT:  Exiting block %for.body: TripCount=0, TripMultiple=1, BreakoutTrip=1
-; RUNTIME-UNROLL-NEXT:Trying runtime unrolling on Loop: 
-; RUNTIME-UNROLL-NEXT:Loop at depth 1 containing: %for.body<header><latch><exiting>
-; RUNTIME-UNROLL-NEXT:Using epilog remainder.
-; RUNTIME-UNROLL-NEXT:UNROLLING loop %for.body by 8 with run-time trip count!
-; RUNTIME-UNROLL-NEXT:remark: <unknown>:0:0: unrolled loop by a factor of 8 with run-time trip count
+; CHECK-LABEL:Loop Unroll: F[extended_convergence] Loop %for.body (depth=1)
+; CHECK-NEXT: Not unrolling: contains convergent operations.
+; CHECK-NEXT:remark: <unknown>:0:0: unable to unroll loop: contains convergent operations
+
+declare void @convergent_func() convergent
+declare token @llvm.experimental.convergence.anchor()
 
-define i32 @runtime_unroll_simple(ptr %A, i32 %n) {
+define i32 @extended_convergence(ptr %A, i32 %n) {
 entry:
-  %cmp.entry = icmp sgt i32 %n, 0
-  br i1 %cmp.entry, label %for.body, label %exit
+  br label %for.body, !llvm.loop !1
 
 for.body:
   %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %tok = call token @llvm.experimental.convergence.anchor()
   %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
   %load = load i32, ptr %arrayidx
   %add = add i32 %sum, %load
   %inc = add i32 %i, 1
   %cmp = icmp slt i32 %inc, %n
-  br i1 %cmp, label %for.body, label %exit
+  br i1 %cmp, label %for.body, label %exit, !llvm.loop !1
 
 exit:
-  %result = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  ret i32 %result
+  call void @convergent_func() [ "convergencectrl"(token %tok) ]
+  ret i32 %add
 }
 
-; CHECK-LABEL:Loop Unroll: F[pragma_full_unroll] Loop %for.body (depth=1)
-; CHECK-NEXT:Loop Size = 6
-; CHECK-NEXT: Computing unroll count: TripCount=4, MaxTripCount=0, TripMultiple=4
-; CHECK-NEXT: Explicit unroll requested: pragma-full
-; CHECK-NEXT: Trying pragma unroll...
-; CHECK-NEXT:  Fully unrolling with trip count: 4.
-; CHECK-NEXT:  Exiting block %for.body: TripCount=4, TripMultiple=0, BreakoutTrip=0
-; CHECK-NEXT:COMPLETELY UNROLLING loop %for.body with trip count 4!
-; CHECK-NEXT:remark: <unknown>:0:0: completely unrolled loop with 4 iterations
+; CHECK-LABEL:Loop Unroll: F[noduplicate_prevents_unroll] Loop %for.body (depth=1)
+; CHECK-NEXT: Not unrolling: contains non-duplicatable instructions.
+; CHECK-NEXT:remark: <unknown>:0:0: unable to unroll loop: contains non-duplicatable instructions
+
+declare void @noduplicate_func() noduplicate
 
-define i32 @pragma_full_unroll(ptr %A) {
+define i32 @noduplicate_prevents_unroll(ptr %A) {
 entry:
   br label %for.body
 
@@ -175,28 +140,23 @@ for.body:
   %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
   %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
   %load = load i32, ptr %arrayidx
+  call void @noduplicate_func()
   %add = add i32 %sum, %load
   %inc = add i32 %i, 1
-  %cmp = icmp ult i32 %inc, 4
-  br i1 %cmp, label %for.body, label %exit, !llvm.loop !0
+  %cmp = icmp ult i32 %inc, 8
+  br i1 %cmp, label %for.body, label %exit, !llvm.loop !1
 
 exit:
   ret i32 %add
 }
 
-; CHECK-LABEL:Loop Unroll: F[pragma_unroll_count] Loop %for.body (depth=1)
-; CHECK-NEXT:Loop Size = 6
-; CHECK-NEXT: Computing unroll count: TripCount=16, MaxTripCount=0, TripMultiple=16
-; CHECK-NEXT: Explicit unroll requested: pragma-count(4)
-; CHECK-NEXT: Trying pragma unroll...
-; CHECK-NEXT:  Unrolling with pragma count: 4.
-; CHECK-NEXT:  Exiting block %for.body: TripCount=16, TripMultiple=0, BreakoutTrip=0
-; CHECK-NEXT:UNROLLING loop %for.body by 4!
-; CHECK-NEXT:remark: <unknown>:0:0: unrolled loop by a factor of 4
+; CHECK-LABEL:Loop Unroll: F[indirectbr_loop] Loop %for.body (depth=1)
+; CHECK-NEXT: Not unrolling loop which is not in loop-simplify form.
+; CHECK-NEXT:remark: <unknown>:0:0: unable to unroll loop: not in loop-simplify form
 
-define i32 @pragma_unroll_count(ptr %A) {
+define i32 @indirectbr_loop(ptr %A, ptr %target) {
 entry:
-  br label %for.body
+  indirectbr ptr %target, [label %for.body, label %exit]
 
 for.body:
   %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
@@ -205,146 +165,169 @@ for.body:
   %load = load i32, ptr %arrayidx
   %add = add i32 %sum, %load
   %inc = add i32 %i, 1
-  %cmp = icmp ult i32 %inc, 16
-  br i1 %cmp, label %for.body, label %exit, !llvm.loop !2
+  %cmp = icmp ult i32 %inc, 10
+  br i1 %cmp, label %for.body, label %exit, !llvm.loop !1
 
 exit:
-  ret i32 %add
+  %result = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %result
 }
 
-; CHECK-LABEL:Loop Unroll: F[pragma_full_unroll_unknown_tc] Loop %for.body (depth=1)
-; CHECK-NEXT:Loop Size = 6
-; CHECK-NEXT: Computing unroll count: TripCount=0, MaxTripCount={{[0-9]+}}, TripMultiple=1
-; CHECK-NEXT: Explicit unroll requested: pragma-full
-; CHECK-NEXT: Trying pragma unroll...
-; CHECK-NEXT:  Not fully unrolling: unknown trip count.
-; CHECK-NEXT: Trying full unroll...
-; CHECK-NEXT: Trying upper-bound unroll...
-; CHECK-NEXT: Trying loop peeling...
-; CHECK-NEXT: Trying partial unroll...
-; CHECK-NEXT: Not fully unrolling as directed: loop has runtime trip count.
-; CHECK-NEXT:remark: <unknown>:0:0: unable to fully unroll loop as directed by llvm.loop.unroll.full metadata because loop has a runtime trip count
-; CHECK-NEXT: Trying runtime unroll...
-; CHECK-NEXT:  Will not try to unroll loop with runtime trip count because -unroll-runtime not given
-; CHECK-NEXT: Not unrolling: no viable strategy found.
+; CHECK-LABEL:Loop Unroll: F[inline_prevents_unroll] Loop %for.body (depth=1)
+; CHECK-NEXT:Loop Size = 8
+; CHECK-NEXT: Not unrolling loop with inlinable calls.
+; CHECK-NEXT:remark: <unknown>:0:0: unable to unroll loop: contains inlinable calls
 
-define i32 @pragma_full_unroll_unknown_tc(ptr %A, i32 %n) {
+define internal i32 @single_use_helper(i32 %x) {
+  %add = add i32 %x, 42
+  ret i32 %add
+}
+
+define i32 @inline_prevents_unroll(ptr %A) {
 entry:
-  %cmp.entry = icmp sgt i32 %n, 0
-  br i1 %cmp.entry, label %for.body, label %exit
+  br label %for.body
 
 for.body:
   %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
   %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
   %load = load i32, ptr %arrayidx
-  %add = add i32 %sum, %load
+  %helper_result = call i32 @single_use_helper(i32 %load)
+  %add = add i32 %sum, %helper_result
   %inc = add i32 %i, 1
-  %cmp = icmp slt i32 %inc, %n
-  br i1 %cmp, label %for.body, label %exit, !llvm.loop !0
+  %cmp = icmp ult i32 %inc, 10
+  br i1 %cmp, label %for.body, label %exit, !llvm.loop !2
 
 exit:
-  %result = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  ret i32 %result
+  ret i32 %add
 }
 
-; CHECK-LABEL:Loop Unroll: F[upper_bound_unroll] Loop %for.body (depth=1)
-; CHECK-NEXT:Loop Size = 6
-; CHECK-NEXT: Computing unroll count: TripCount=0, MaxTripCount=3, TripMultiple=1
-; CHECK-NEXT: Explicit unroll requested: pragma-enable
+; CHECK-LABEL:Loop Unroll: F[full_unroll_profitability_analysis] Loop %for.body (depth=1)
+; CHECK-NEXT:Loop Size = {{[0-9]+}}
+; CHECK-NEXT: Computing unroll count: TripCount=10, MaxTripCount=0, TripMultiple=10
 ; CHECK-NEXT: Trying pragma unroll...
-; CHECK-NEXT:  Unrolling with max trip count: 3.
-; CHECK-NEXT:  Exiting block %for.body: TripCount=0, TripMultiple=1, BreakoutTrip=1
-; CHECK-NEXT:COMPLETELY UNROLLING loop %for.body with trip count 3!
-; CHECK-NEXT:remark: <unknown>:0:0: completely unrolled loop with 3 iterations
-
-define i32 @upper_bound_unroll(ptr %A, i32 %n) {
+; CHECK-NEXT: Trying full unroll...
+; CHECK-NEXT:  Unrolled size {{[0-9]+}} exceeds threshold {{[0-9]+}}; checking for cost benefit.
+; CHECK-NEXT:   Starting LoopUnroll profitability analysis...
+; CHECK-NEXT:   Analyzing iteration 0
+; CHECK-NEXT:   Analyzing iteration 1
+; CHECK-NEXT:   Analyzing iteration 2
+; CHECK-NEXT:   Analyzing iteration 3
+; CHECK-NEXT:   Analyzing iteration 4
+; CHECK-NEXT:   Analyzing iteration 5
+; CHECK-NEXT:   Analyzing iteration 6
+; CHECK-NEXT:   Analyzing iteration 7
+; CHECK-NEXT:   Analyzing iteration 8
+; CHECK-NEXT:   Analyzing iteration 9
+; CHECK:   Analysis finished:
+; CHECK-NEXT:   UnrolledCost: {{[0-9]+}}, RolledDynamicCost: {{[0-9]+}}
+; CHECK-NEXT:  Profitable after cost analysis.
+; CHECK-NEXT:  Exiting block %for.body: TripCount=10, TripMultiple=0, BreakoutTrip=0
+; CHECK-NEXT:COMPLETELY UNROLLING loop %for.body with trip count 10!
+; CHECK-NEXT:remark: <unknown>:0:0: completely unrolled loop with 10 iterations
+
+define i32 @full_unroll_profitability_analysis(ptr %A, ptr %B) {
 entry:
-  %n.clamped = and i32 %n, 3  ; max 3
   br label %for.body
 
 for.body:
   %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
-  %load = load i32, ptr %arrayidx
-  %add = add i32 %sum, %load
+  %sum = phi i32 [ 0, %entry ], [ %result, %for.body ]
+  %idxA = getelementptr inbounds i32, ptr %A, i32 %i
+  %loadA = load i32, ptr %idxA
+  %idxB = getelementptr inbounds i32, ptr %B, i32 %i
+  %loadB = load i32, ptr %idxB
+  %mul1 = mul i32 %loadA, %loadB
+  %add1 = add i32 %mul1, %loadA
+  %mul2 = mul i32 %add1, %loadB
+  %sub1 = sub i32 %mul2, %loadA
+  %add2 = add i32 %sub1, %loadB
+  %mul3 = mul i32 %add2, %loadA
+  %sub2 = sub i32 %mul3, %loadB
+  %xor1 = xor i32 %sub2, %loadA
+  %or1 = or i32 %xor1, %loadB
+  %result = add i32 %sum, %or1
   %inc = add i32 %i, 1
-  %cmp = icmp ult i32 %inc, %n.clamped
-  br i1 %cmp, label %for.body, label %exit, !llvm.loop !1
+  %cmp = icmp ult i32 %inc, 10
+  br i1 %cmp, label %for.body, label %exit
 
 exit:
-  ret i32 %add
+  ret i32 %result
 }
 
-; CHECK-LABEL:Loop Unroll: F[full_unroll_cost_exceeds] Loop %for.body (depth=1)
-; CHECK-NEXT:Loop Size = 6
-; CHECK-NEXT: Computing unroll count: TripCount=100, MaxTripCount=0, TripMultiple=100
+; CHECK-LABEL:Loop Unroll: F[cost_exceed_boosted_threshold] Loop %for.body (depth=1)
+; CHECK-NEXT:Loop Size = {{[0-9]+}}
+; CHECK-NEXT: Computing unroll count: TripCount=10, MaxTripCount=0, TripMultiple=10
 ; CHECK-NEXT: Trying pragma unroll...
 ; CHECK-NEXT: Trying full unroll...
 ; CHECK-NEXT:  Unrolled size {{[0-9]+}} exceeds threshold {{[0-9]+}}; checking for cost benefit.
-; CHECK-NEXT:   Not analyzing loop cost: trip count too large.
-; CHECK-NEXT:  Skipping: cost analysis unavailable.
+; CHECK-NEXT:   Starting LoopUnroll profitability analysis...
+; CHECK-NEXT:   Analyzing iteration 0
+; CHECK-NEXT:   Analyzing iteration 1
+; CHECK-NEXT:   Analyzing iteration 2
+; CHECK-NEXT:   Analyzing iteration 3
+; CHECK-NEXT:   Analyzing iteration 4
+; CHECK-NEXT:   Analyzing iteration 5
+; CHECK-NEXT:   Analyzing iteration 6
+; CHECK-NEXT:   Analyzing iteration 7
+; CHECK-NEXT:   Analyzing iteration 8
+; CHECK-NEXT:   Analyzing iteration 9
+; CHECK:   Analysis finished:
+; CHECK-NEXT:   UnrolledCost: {{[0-9]+}}, RolledDynamicCost: {{[0-9]+}}
+; CHECK-NEXT:  Not unrolling: cost {{[0-9]+}} >= boosted threshold {{[0-9]+}}.
 ; CHECK-NEXT: Trying upper-bound unroll...
 ; CHECK-NEXT: Trying loop peeling...
 ; CHECK-NEXT: Trying partial unroll...
 ; CHECK-NEXT:  Will not try to unroll partially because -unroll-allow-partial not given
 ; CHECK-NEXT: Not unrolling: no viable strategy found.
 
-define i32 @full_unroll_cost_exceeds(ptr %A) {
+define i32 @cost_exceed_boosted_threshold(ptr %A, ptr %B) {
 entry:
   br label %for.body
 
 for.body:
   %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
-  %load = load i32, ptr %arrayidx
-  %add = add i32 %sum, %load
+  %sum = phi i32 [ 0, %entry ], [ %result, %for.body ]
+  %idxA = getelementptr inbounds i32, ptr %A, i32 %i
+  %loadA = load i32, ptr %idxA
+  %idxB = getelementptr inbounds i32, ptr %B, i32 %i
+  %loadB = load i32, ptr %idxB
+  %mul1 = mul i32 %loadA, %loadB
+  %add1 = add i32 %mul1, %loadA
+  %mul2 = mul i32 %add1, %loadB
+  %sub1 = sub i32 %mul2, %loadA
+  %add2 = add i32 %sub1, %loadB
+  %mul3 = mul i32 %add2, %loadA
+  %sub2 = sub i32 %mul3, %loadB
+  %xor1 = xor i32 %sub2, %loadA
+  %or1 = or i32 %xor1, %loadB
+  %and1 = and i32 %or1, %loadA
+  %shl1 = shl i32 %and1, 2
+  %ashr1 = ashr i32 %shl1, 1
+  %mul4 = mul i32 %ashr1, %loadB
+  %add3 = add i32 %mul4, %loadA
+  %xor2 = xor i32 %add3, %loadB
+  %result = add i32 %sum, %xor2
   %inc = add i32 %i, 1
-  %cmp = icmp ult i32 %inc, 100
+  %cmp = icmp ult i32 %inc, 10
   br i1 %cmp, label %for.body, label %exit
 
 exit:
-  ret i32 %add
-}
-
-; CHECK-LABEL:Loop Unroll: F[unroll_disabled_metadata] Loop %for.body (depth=1)
-; CHECK-NEXT: Not unrolling: transformation disabled by metadata.
-
-define i32 @unroll_disabled_metadata(ptr %A) {
-entry:
-  br label %for.body
-
-for.body:
-  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
-  %load = load i32, ptr %arrayidx
-  %add = add i32 %sum, %load
-  %inc = add i32 %i, 1
-  %cmp = icmp ult i32 %inc, 8
-  br i1 %cmp, label %for.body, label %exit, !llvm.loop !3
-
-exit:
-  ret i32 %add
+  ret i32 %result
 }
 
-; CHECK-LABEL:Loop Unroll: F[runtime_small_max_tc] Loop %for.body (depth=1)
+; CHECK-LABEL:Loop Unroll: F[full_unroll_size_under_threshold] Loop %for.body (depth=1)
 ; CHECK-NEXT:Loop Size = 6
-; CHECK-NEXT: Computing unroll count: TripCount=0, MaxTripCount=3, TripMultiple=1
+; CHECK-NEXT: Computing unroll count: TripCount=4, MaxTripCount=0, TripMultiple=4
 ; CHECK-NEXT: Trying pragma unroll...
 ; CHECK-NEXT: Trying full unroll...
-; CHECK-NEXT: Trying upper-bound unroll...
-; CHECK-NEXT: Trying loop peeling...
-; CHECK-NEXT: Trying partial unroll...
-; CHECK-NEXT: Trying runtime unroll...
-; CHECK-NEXT:  Not runtime unrolling: max trip count 3 is small (< 8) and not forced.
-; CHECK-NEXT: Not unrolling: no viable strategy found.
+; CHECK-NEXT:  Unrolling: size {{[0-9]+}} < threshold {{[0-9]+}}.
+; CHECK-NEXT:  Exiting block %for.body: TripCount=4, TripMultiple=0, BreakoutTrip=0
+; CHECK-NEXT:COMPLETELY UNROLLING loop %for.body with trip count 4!
+; CHECK-NEXT:remark: <unknown>:0:0: completely unrolled loop with 4 iterations
 
-define i32 @runtime_small_max_tc(ptr %A, i32 %n) {
+define i32 @full_unroll_size_under_threshold(ptr %A) {
 entry:
-  %n.clamped = and i32 %n, 3
   br label %for.body
 
 for.body:
@@ -354,27 +337,24 @@ for.body:
   %load = load i32, ptr %arrayidx
   %add = add i32 %sum, %load
   %inc = add i32 %i, 1
-  %cmp = icmp ult i32 %inc, %n.clamped
+  %cmp = icmp ult i32 %inc, 4
   br i1 %cmp, label %for.body, label %exit
 
 exit:
   ret i32 %add
 }
 
-; When using user-specified count on a trip count that isn't evenly divisible
-; PARTIAL-UNROLL-LABEL:Loop Unroll: F[partial_unroll_with_remainder] Loop %for.body (depth=1)
-; PARTIAL-UNROLL-NEXT:Loop Size = 6
-; PARTIAL-UNROLL-NEXT: Computing unroll count: TripCount=10, MaxTripCount=0, TripMultiple=10
-; PARTIAL-UNROLL-NEXT: Explicit unroll requested: user-count
-; PARTIAL-UNROLL-NEXT: Trying pragma unroll...
-; PARTIAL-UNROLL-NEXT:  Unrolling with user-specified count: 4.
-; PARTIAL-UNROLL-NEXT:Attempting unroll by factor 4 with remainder loop (trip count 10)
-; PARTIAL-UNROLL-NEXT:remark: <unknown>:0:0: attempting unroll by factor 4 with remainder loop (trip count 10)
-; PARTIAL-UNROLL-NEXT:  Exiting block %for.body: TripCount=10, TripMultiple=0, BreakoutTrip=2
-; PARTIAL-UNROLL-NEXT:UNROLLING loop %for.body by 4!
-; PARTIAL-UNROLL-NEXT:remark: <unknown>:0:0: unrolled loop by a factor of 4
-
-define i32 @partial_unroll_with_remainder(ptr %A) {
+; CHECK-LABEL:Loop Unroll: F[pragma_full_known_tc] Loop %for.body (depth=1)
+; CHECK-NEXT:Loop Size = 6
+; CHECK-NEXT: Computing unroll count: TripCount=6, MaxTripCount=0, TripMultiple=6
+; CHECK-NEXT: Explicit unroll requested: pragma-full
+; CHECK-NEXT: Trying pragma unroll...
+; CHECK-NEXT:  Fully unrolling with trip count: 6.
+; CHECK-NEXT:  Exiting block %for.body: TripCount=6, TripMultiple=0, BreakoutTrip=0
+; CHECK-NEXT:COMPLETELY UNROLLING loop %for.body with trip count 6!
+; CHECK-NEXT:remark: <unknown>:0:0: completely unrolled loop with 6 iterations
+
+define i32 @pragma_full_known_tc(ptr %A) {
 entry:
   br label %for.body
 
@@ -385,32 +365,24 @@ for.body:
   %load = load i32, ptr %arrayidx
   %add = add i32 %sum, %load
   %inc = add i32 %i, 1
-  %cmp = icmp ult i32 %inc, 10
-  br i1 %cmp, label %for.body, label %exit
+  %cmp = icmp ult i32 %inc, 6
+  br i1 %cmp, label %for.body, label %exit, !llvm.loop !0
 
 exit:
   ret i32 %add
 }
 
+; CHECK-LABEL:Loop Unroll: F[pragma_count_unroll] Loop %for.body (depth=1)
+; CHECK-NEXT:Loop Size = 6
+; CHECK-NEXT: Computing unroll count: TripCount=12, MaxTripCount=0, TripMultiple=12
+; CHECK-NEXT: Explicit unroll requested: pragma-count(3)
+; CHECK-NEXT: Trying pragma unroll...
+; CHECK-NEXT:  Unrolling with pragma count: 3.
+; CHECK-NEXT:  Exiting block %for.body: TripCount=12, TripMultiple=0, BreakoutTrip=0
+; CHECK-NEXT:UNROLLING loop %for.body by 3!
+; CHECK-NEXT:remark: <unknown>:0:0: unrolled loop by a factor of 3
 
-; PARTIAL-ALLOW-LABEL:Loop Unroll: F[partial_unroll_cost_analysis] Loop %for.body (depth=1)
-; PARTIAL-ALLOW-NEXT:Loop Size = 6
-; PARTIAL-ALLOW-NEXT: Computing unroll count: TripCount=200, MaxTripCount=0, TripMultiple=200
-; PARTIAL-ALLOW-NEXT: Trying pragma unroll...
-; PARTIAL-ALLOW-NEXT: Trying full unroll...
-; PARTIAL-ALLOW-NEXT:  Unrolled size {{[0-9]+}} exceeds threshold {{[0-9]+}}; checking for cost benefit.
-; PARTIAL-ALLOW-NEXT:   Not analyzing loop cost: trip count too large.
-; PARTIAL-ALLOW-NEXT:  Skipping: cost analysis unavailable.
-; PARTIAL-ALLOW-NEXT: Trying upper-bound unroll...
-; PARTIAL-ALLOW-NEXT: Trying loop peeling...
-; PARTIAL-ALLOW-NEXT: Trying partial unroll...
-; PARTIAL-ALLOW-NEXT:  Unrolled size exceeds threshold; reducing count from {{[0-9]+}} to {{[0-9]+}}.
-; PARTIAL-ALLOW-NEXT:  Partially unrolling with count: 25
-; PARTIAL-ALLOW-NEXT:  Exiting block %for.body: TripCount=200, TripMultiple=0, BreakoutTrip=0
-; PARTIAL-ALLOW-NEXT:UNROLLING loop %for.body by 25!
-; PARTIAL-ALLOW-NEXT:remark: <unknown>:0:0: unrolled loop by a factor of 25
-
-define i32 @partial_unroll_cost_analysis(ptr %A) {
+define i32 @pragma_count_unroll(ptr %A) {
 entry:
   br label %for.body
 
@@ -421,26 +393,26 @@ for.body:
   %load = load i32, ptr %arrayidx
   %add = add i32 %sum, %load
   %inc = add i32 %i, 1
-  %cmp = icmp ult i32 %inc, 200
-  br i1 %cmp, label %for.body, label %exit
+  %cmp = icmp ult i32 %inc, 12
+  br i1 %cmp, label %for.body, label %exit, !llvm.loop !5
 
 exit:
   ret i32 %add
 }
 
-; CHECK-LABEL:Loop Unroll: F[runtime_unroll_disabled_pragma] Loop %for.body (depth=1)
+; CHECK-LABEL:Loop Unroll: F[no_viable_strategy] Loop %for.body (depth=1)
 ; CHECK-NEXT:Loop Size = 6
-; CHECK-NEXT: Computing unroll count: TripCount=0, MaxTripCount={{[0-9]+}}, TripMultiple=1
+; CHECK-NEXT: Computing unroll count: TripCount=0, MaxTripCount=2147483647, TripMultiple=1
 ; CHECK-NEXT: Trying pragma unroll...
 ; CHECK-NEXT: Trying full unroll...
 ; CHECK-NEXT: Trying upper-bound unroll...
 ; CHECK-NEXT: Trying loop peeling...
 ; CHECK-NEXT: Trying partial unroll...
 ; CHECK-NEXT: Trying runtime unroll...
-; CHECK-NEXT:  Not runtime unrolling: disabled by pragma.
+; CHECK-NEXT:  Will not try to unroll loop with runtime trip count because -unroll-runtime not given
 ; CHECK-NEXT: Not unrolling: no viable strategy found.
 
-define i32 @runtime_unroll_disabled_pragma(ptr %A, i32 %n) {
+define i32 @no_viable_strategy(ptr %A, i32 %n) {
 entry:
   %cmp.entry = icmp sgt i32 %n, 0
   br i1 %cmp.entry, label %for.body, label %exit
@@ -453,17 +425,17 @@ for.body:
   %add = add i32 %sum, %load
   %inc = add i32 %i, 1
   %cmp = icmp slt i32 %inc, %n
-  br i1 %cmp, label %for.body, label %exit, !llvm.loop !4
+  br i1 %cmp, label %for.body, label %exit
 
 exit:
   %result = phi i32 [ 0, %entry ], [ %add, %for.body ]
   ret i32 %result
 }
 
-; AUTO-DISABLED-LABEL:Loop Unroll: F[auto_unroll_not_enabled] Loop %for.body (depth=1)
-; AUTO-DISABLED-NEXT: Not unrolling: automatic unrolling disabled and loop not explicitly enabled.
+; CHECK-LABEL:Loop Unroll: F[disabled_by_metadata] Loop %for.body (depth=1)
+; CHECK-NEXT: Not unrolling: transformation disabled by metadata.
 
-define i32 @auto_unroll_not_enabled(ptr %A) {
+define i32 @disabled_by_metadata(ptr %A) {
 entry:
   br label %for.body
 
@@ -474,28 +446,28 @@ for.body:
   %load = load i32, ptr %arrayidx
   %add = add i32 %sum, %load
   %inc = add i32 %i, 1
-  %cmp = icmp ult i32 %inc, 4
-  br i1 %cmp, label %for.body, label %exit
+  %cmp = icmp ult i32 %inc, 8
+  br i1 %cmp, label %for.body, label %exit, !llvm.loop !7
 
 exit:
   ret i32 %add
 }
 
-; USER-COUNT-REJECT-LABEL:Loop Unroll: F[user_count_rejected] Loop %for.body (depth=1)
-; USER-COUNT-REJECT-NEXT:Loop Size = 6
-; USER-COUNT-REJECT-NEXT: Computing unroll count: TripCount=10, MaxTripCount=0, TripMultiple=10
-; USER-COUNT-REJECT-NEXT: Explicit unroll requested: user-count
-; USER-COUNT-REJECT-NEXT: Trying pragma unroll...
-; USER-COUNT-REJECT-NEXT:  Not unrolling with user count 4: remainder not allowed.
-; USER-COUNT-REJECT-NEXT: Trying full unroll...
-; USER-COUNT-REJECT-NEXT:  Unrolling: size 42 < threshold 16384.
-; USER-COUNT-REJECT-NEXT:  Exiting block %for.body: TripCount=10, TripMultiple=0, BreakoutTrip=0
-; USER-COUNT-REJECT-NEXT:COMPLETELY UNROLLING loop %for.body with trip count 10!
-; USER-COUNT-REJECT-NEXT:remark: <unknown>:0:0: completely unrolled loop with 10 iterations
-
-define i32 @user_count_rejected(ptr %A) {
+; CHECK-LABEL:Loop Unroll: F[upper_bound_unroll] Loop %for.body (depth=1)
+; CHECK-NEXT:Loop Size = 6
+; CHECK-NEXT: Computing unroll count: TripCount=0, MaxTripCount=3, TripMultiple=1
+; CHECK-NEXT: Explicit unroll requested: pragma-enable
+; CHECK-NEXT: Trying pragma unroll...
+; CHECK-NEXT:  Unrolling with max trip count: 3.
+; CHECK-NEXT:  Exiting block %for.body: TripCount=0, TripMultiple=1, BreakoutTrip=1
+; CHECK-NEXT:COMPLETELY UNROLLING loop %for.body with trip count 3!
+; CHECK-NEXT:remark: <unknown>:0:0: completely unrolled loop with 3 iterations
+
+define i32 @upper_bound_unroll(ptr %A, i32 %n) {
 entry:
-  br label %for.body
+  %masked = and i32 %n, 3
+  %cmp.entry = icmp sgt i32 %masked, 0
+  br i1 %cmp.entry, label %for.body, label %exit
 
 for.body:
   %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
@@ -504,130 +476,31 @@ for.body:
   %load = load i32, ptr %arrayidx
   %add = add i32 %sum, %load
   %inc = add i32 %i, 1
-  %cmp = icmp ult i32 %inc, 10
-  br i1 %cmp, label %for.body, label %exit
+  %cmp = icmp slt i32 %inc, %masked
+  br i1 %cmp, label %for.body, label %exit, !llvm.loop !9
 
 exit:
-  ret i32 %add
-}
-
-; NO-REMAINDER-LABEL:Loop Unroll: F[pragma_count_rejected] Loop %for.body (depth=1)
-; NO-REMAINDER-NEXT:Loop Size = 6
-; NO-REMAINDER-NEXT: Computing unroll count: TripCount=10, MaxTripCount=0, TripMultiple=10
-; NO-REMAINDER-NEXT: Explicit unroll requested: pragma-count(4)
-; NO-REMAINDER-NEXT: Trying pragma unroll...
-; NO-REMAINDER-NEXT:  Not unrolling with pragma count 4: remainder not allowed, count does not divide trip multiple 10.
-; NO-REMAINDER-NEXT:remark: <unknown>:0:0: unable to unroll loop with count 4: remainder loop is restricted and count does not divide trip multiple 10
-; NO-REMAINDER-NEXT: Trying full unroll...
-; NO-REMAINDER-NEXT:  Unrolling: size 42 < threshold 16384.
-; NO-REMAINDER-NEXT:  Exiting block %for.body: TripCount=10, TripMultiple=0, BreakoutTrip=0
-; NO-REMAINDER-NEXT:COMPLETELY UNROLLING loop %for.body with trip count 10!
-; NO-REMAINDER-NEXT:remark: <unknown>:0:0: completely unrolled loop with 10 iterations
-
-define i32 @pragma_count_rejected(ptr %A) {
-entry:
-  br label %for.body
-
-for.body:
-  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
-  %load = load i32, ptr %arrayidx
-  %add = add i32 %sum, %load
-  %inc = add i32 %i, 1
-  %cmp = icmp ult i32 %inc, 10
-  br i1 %cmp, label %for.body, label %exit, !llvm.loop !10
-
-exit:
-  ret i32 %add
-}
-
-; The contradictory "unable to fully unroll" then "completely unrolled" remarks are expected:
-; we're artificially limiting the pragma path with -pragma-unroll-full-max-iterations=100
-; while the heuristic path remains unconstrained. This won't happen with default flags.
-; PRAGMA-TC-TOO-LARGE-LABEL:Loop Unroll: F[pragma_full_tc_too_large] Loop %for.body (depth=1)
-; PRAGMA-TC-TOO-LARGE-NEXT:Loop Size = 6
-; PRAGMA-TC-TOO-LARGE-NEXT: Computing unroll count: TripCount=200, MaxTripCount=0, TripMultiple=200
-; PRAGMA-TC-TOO-LARGE-NEXT: Explicit unroll requested: pragma-full
-; PRAGMA-TC-TOO-LARGE-NEXT: Trying pragma unroll...
-; PRAGMA-TC-TOO-LARGE-NEXT:  Won't unroll; trip count is too large.
-; PRAGMA-TC-TOO-LARGE-NEXT:remark: <unknown>:0:0: unable to fully unroll loop: trip count 200 exceeds limit 100
-; PRAGMA-TC-TOO-LARGE-NEXT: Trying full unroll...
-; PRAGMA-TC-TOO-LARGE-NEXT:  Unrolling: size 802 < threshold 16384.
-; PRAGMA-TC-TOO-LARGE-NEXT:  Exiting block %for.body: TripCount=200, TripMultiple=0, BreakoutTrip=0
-; PRAGMA-TC-TOO-LARGE-NEXT:COMPLETELY UNROLLING loop %for.body with trip count 200!
-; PRAGMA-TC-TOO-LARGE-NEXT:remark: <unknown>:0:0: completely unrolled loop with 200 iterations
-
-define i32 @pragma_full_tc_too_large(ptr %A) {
-entry:
-  br label %for.body
-
-for.body:
-  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
-  %load = load i32, ptr %arrayidx
-  %add = add i32 %sum, %load
-  %inc = add i32 %i, 1
-  %cmp = icmp ult i32 %inc, 200
-  br i1 %cmp, label %for.body, label %exit, !llvm.loop !0
-
-exit:
-  ret i32 %add
+  %result = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %result
 }
 
-; COST-ANALYSIS-LABEL:Loop Unroll: F[cost_analysis_detailed] Loop %for.body (depth=1)
-; COST-ANALYSIS-NEXT:Loop Size = 9
-; COST-ANALYSIS-NEXT: Computing unroll count: TripCount=10, MaxTripCount=0, TripMultiple=10
-; COST-ANALYSIS-NEXT: Trying pragma unroll...
-; COST-ANALYSIS-NEXT: Trying full unroll...
-; COST-ANALYSIS-NEXT:  Unrolled size {{[0-9]+}} exceeds threshold 20; checking for cost benefit.
-; COST-ANALYSIS-NEXT:   Starting LoopUnroll profitability analysis...
-; COST-ANALYSIS:   Analysis finished:
-; COST-ANALYSIS-NEXT:   UnrolledCost: {{[0-9]+}}, RolledDynamicCost: {{[0-9]+}}
-; COST-ANALYSIS-NEXT:  Not unrolling: cost {{[0-9]+}} >= boosted threshold {{[0-9]+}}.
-; COST-ANALYSIS-NEXT: Trying upper-bound unroll...
-; COST-ANALYSIS-NEXT: Trying loop peeling...
-; COST-ANALYSIS-NEXT: Trying partial unroll...
-; COST-ANALYSIS-NEXT:  Will not try to unroll partially because -unroll-allow-partial not given
-; COST-ANALYSIS-NEXT: Not unrolling: no viable strategy found.
-
-define i32 @cost_analysis_detailed(ptr %A, ptr %B) {
-entry:
-  br label %for.body
-
-for.body:
-  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
-  %load = load i32, ptr %arrayidx
-  %arrayidx2 = getelementptr inbounds i32, ptr %B, i32 %i
-  %load2 = load i32, ptr %arrayidx2
-  %mul = mul i32 %load, %load2
-  %add = add i32 %sum, %mul
-  %inc = add i32 %i, 1
-  %cmp = icmp ult i32 %inc, 10
-  br i1 %cmp, label %for.body, label %exit
-
-exit:
-  ret i32 %add
-}
+; CHECK-LABEL:Loop Unroll: F[runtime_small_max_tc] Loop %for.body (depth=1)
+; CHECK-NEXT:Loop Size = 6
+; CHECK-NEXT: Computing unroll count: TripCount=0, MaxTripCount=3, TripMultiple=1
+; CHECK-NEXT: Trying pragma unroll...
+; CHECK-NEXT: Trying full unroll...
+; CHECK-NEXT: Trying upper-bound unroll...
+; CHECK-NEXT: Trying loop peeling...
+; CHECK-NEXT: Trying partial unroll...
+; CHECK-NEXT: Trying runtime unroll...
+; CHECK-NEXT:  Not runtime unrolling: max trip count 3 is small (< 8) and not forced.
+; CHECK-NEXT: Not unrolling: no viable strategy found.
 
-; MAX-COUNT-10-LABEL:Loop Unroll: F[exceeds_max_count] Loop %for.body (depth=1)
-; MAX-COUNT-10-NEXT:Loop Size = 6
-; MAX-COUNT-10-NEXT: Computing unroll count: TripCount=20, MaxTripCount=0, TripMultiple=20
-; MAX-COUNT-10-NEXT: Trying pragma unroll...
-; MAX-COUNT-10-NEXT: Trying full unroll...
-; MAX-COUNT-10-NEXT:  Not unrolling: trip count 20 exceeds max count 10.
-; MAX-COUNT-10-NEXT: Trying upper-bound unroll...
-; MAX-COUNT-10-NEXT: Trying loop peeling...
-; MAX-COUNT-10-NEXT: Trying partial unroll...
-; MAX-COUNT-10-NEXT:  Will not try to unroll partially because -unroll-allow-partial not given
-; MAX-COUNT-10-NEXT: Not unrolling: no viable strategy found.
-
-define i32 @exceeds_max_count(ptr %A) {
+define i32 @runtime_small_max_tc(ptr %A, i32 %n) {
 entry:
-  br label %for.body
+  %masked = and i32 %n, 3
+  %cmp.entry = icmp sgt i32 %masked, 0
+  br i1 %cmp.entry, label %for.body, label %exit
 
 for.body:
   %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
@@ -636,23 +509,30 @@ for.body:
   %load = load i32, ptr %arrayidx
   %add = add i32 %sum, %load
   %inc = add i32 %i, 1
-  %cmp = icmp ult i32 %inc, 20
+  %cmp = icmp slt i32 %inc, %masked
   br i1 %cmp, label %for.body, label %exit
 
 exit:
-  ret i32 %add
+  %result = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %result
 }
 
-; PEEL-LABEL:Loop Unroll: F[explicit_peel] Loop %for.body (depth=1)
-; PEEL-NEXT:Loop Size = 6
-; PEEL-NEXT: Computing unroll count: TripCount=100, MaxTripCount=0, TripMultiple=100
-; PEEL-NEXT:  Using explicit peel count: 2.
-; PEEL-NEXT:PEELING loop %for.body with iteration count 2!
-; PEEL-NEXT:remark: <unknown>:0:0: peeled loop by 2 iterations
+; CHECK-LABEL:Loop Unroll: F[runtime_unroll_disabled_pragma] Loop %for.body (depth=1)
+; CHECK-NEXT:Loop Size = 6
+; CHECK-NEXT: Computing unroll count: TripCount=0, MaxTripCount=2147483647, TripMultiple=1
+; CHECK-NEXT: Trying pragma unroll...
+; CHECK-NEXT: Trying full unroll...
+; CHECK-NEXT: Trying upper-bound unroll...
+; CHECK-NEXT: Trying loop peeling...
+; CHECK-NEXT: Trying partial unroll...
+; CHECK-NEXT: Trying runtime unroll...
+; CHECK-NEXT:  Not runtime unrolling: disabled by pragma.
+; CHECK-NEXT: Not unrolling: no viable strategy found.
 
-define i32 @explicit_peel(ptr %A) {
+define i32 @runtime_unroll_disabled_pragma(ptr %A, i32 %n) {
 entry:
-  br label %for.body
+  %cmp.entry = icmp sgt i32 %n, 0
+  br i1 %cmp.entry, label %for.body, label %exit
 
 for.body:
   %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
@@ -661,16 +541,17 @@ for.body:
   %load = load i32, ptr %arrayidx
   %add = add i32 %sum, %load
   %inc = add i32 %i, 1
-  %cmp = icmp ult i32 %inc, 100
-  br i1 %cmp, label %for.body, label %exit
+  %cmp = icmp slt i32 %inc, %n
+  br i1 %cmp, label %for.body, label %exit, !llvm.loop !11
 
 exit:
-  ret i32 %add
+  %result = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %result
 }
 
 ; CHECK-LABEL:Loop Unroll: F[heuristic_peel] Loop %for.header (depth=1)
-; CHECK-NEXT:Loop Size = 11
-; CHECK-NEXT: Computing unroll count: TripCount=0, MaxTripCount={{[0-9]+}}, TripMultiple=1
+; CHECK-NEXT:Loop Size = 9
+; CHECK-NEXT: Computing unroll count: TripCount=0, MaxTripCount=2147483647, TripMultiple=1
 ; CHECK-NEXT: Trying pragma unroll...
 ; CHECK-NEXT: Trying full unroll...
 ; CHECK-NEXT: Trying upper-bound unroll...
@@ -679,48 +560,67 @@ exit:
 ; CHECK-NEXT:PEELING loop %for.header with iteration count 1!
 ; CHECK-NEXT:remark: <unknown>:0:0: peeled loop by 1 iterations
 
-define i32 @heuristic_peel(ptr %A, i32 %n) {
+declare void @foo()
+
+define void @heuristic_peel(ptr %A, i32 %n) {
 entry:
   %cmp.entry = icmp sgt i32 %n, 0
-  br i1 %cmp.entry, label %for.header, label %exit.early
+  br i1 %cmp.entry, label %for.header, label %exit
 
 for.header:
   %i = phi i32 [ 0, %entry ], [ %inc, %for.latch ]
-  %sum = phi i32 [ 0, %entry ], [ %add, %for.latch ]
-  ; This comparison becomes false after first iteration - enables peeling
-  %first = icmp eq i32 %i, 0
-  br i1 %first, label %special, label %normal
-
-special:
-  %load1 = load i32, ptr %A
-  br label %for.latch
+  %cmp.zero = icmp eq i32 %i, 0
+  br i1 %cmp.zero, label %then, label %for.latch
 
-normal:
-  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
-  %load2 = load i32, ptr %arrayidx
+then:
+  call void @foo()
   br label %for.latch
 
 for.latch:
-  %val = phi i32 [ %load1, %special ], [ %load2, %normal ]
-  %add = add i32 %sum, %val
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
+  %load = load i32, ptr %arrayidx
   %inc = add i32 %i, 1
   %cmp = icmp slt i32 %inc, %n
   br i1 %cmp, label %for.header, label %exit
 
 exit:
-  ret i32 %add
-
-exit.early:
-  ret i32 0
+  ret void
 }
 
-; THRESHOLDS-ZERO-LABEL:Loop Unroll: F[thresholds_zero] Loop %for.body (depth=1)
-; THRESHOLDS-ZERO-NEXT: Not unrolling: all thresholds are zero.
-; THRESHOLDS-ZERO-NEXT:remark: <unknown>:0:0: unable to unroll loop: unroll threshold is zero
+; CHECK-LABEL:Loop Unroll: F[runtime_unroll_simple] Loop %for.body (depth=1)
+; CHECK-NEXT:Loop Size = 6
+; CHECK-NEXT: Computing unroll count: TripCount=0, MaxTripCount=2147483647, TripMultiple=1
+; CHECK-NEXT: Explicit unroll requested: pragma-enable
+; CHECK-NEXT: Trying pragma unroll...
+; CHECK-NEXT: Trying full unroll...
+; CHECK-NEXT: Trying upper-bound unroll...
+; CHECK-NEXT: Trying loop peeling...
+; CHECK-NEXT: Trying partial unroll...
+; CHECK-NEXT: Trying runtime unroll...
+; CHECK-NEXT:  Runtime unrolling with count: 8
+; CHECK-NEXT:  Exiting block %for.body: TripCount=0, TripMultiple=1, BreakoutTrip=1
+; CHECK:UNROLLING loop %for.body by 8 with run-time trip count!
+; CHECK-NEXT:remark: <unknown>:0:0: unrolled loop by a factor of 8 with run-time trip count
+;
+; REMAINDER-LABEL:Loop Unroll: F[runtime_unroll_simple] Loop %for.body (depth=1)
+; REMAINDER-NEXT:Loop Size = 6
+; REMAINDER-NEXT: Computing unroll count: TripCount=0, MaxTripCount=2147483647, TripMultiple=1
+; REMAINDER-NEXT: Explicit unroll requested: pragma-enable
+; REMAINDER-NEXT: Trying pragma unroll...
+; REMAINDER-NEXT: Trying full unroll...
+; REMAINDER-NEXT: Trying upper-bound unroll...
+; REMAINDER-NEXT: Trying loop peeling...
+; REMAINDER-NEXT: Trying partial unroll...
+; REMAINDER-NEXT: Trying runtime unroll...
+; REMAINDER-NEXT:  Runtime unrolling with count: 8
+; REMAINDER-NEXT:  Exiting block %for.body: TripCount=0, TripMultiple=1, BreakoutTrip=1
+; REMAINDER:UNROLLING loop %for.body by 8 with run-time trip count (remainder unrolled)!
+; REMAINDER-NEXT:remark: <unknown>:0:0: unrolled loop by a factor of 8 with run-time trip count (remainder unrolled)
 
-define i32 @thresholds_zero(ptr %A) {
+define i32 @runtime_unroll_simple(ptr %A, i32 %n) {
 entry:
-  br label %for.body
+  %cmp.entry = icmp sgt i32 %n, 0
+  br i1 %cmp.entry, label %for.body, label %exit
 
 for.body:
   %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
@@ -729,115 +629,31 @@ for.body:
   %load = load i32, ptr %arrayidx
   %add = add i32 %sum, %load
   %inc = add i32 %i, 1
-  %cmp = icmp ult i32 %inc, 10
-  br i1 %cmp, label %for.body, label %exit, !llvm.loop !1
-
-exit:
-  ret i32 %add
-}
-
-; NESTED-COST-LABEL:Loop Unroll: F[nested_cost_analysis] Loop %inner.header (depth=2)
-; NESTED-COST-NEXT: Not unrolling: transformation disabled by metadata.
-; NESTED-COST-LABEL:Loop Unroll: F[nested_cost_analysis] Loop %outer.header (depth=1)
-; NESTED-COST-NEXT:Loop Size = 11
-; NESTED-COST-NEXT: Computing unroll count: TripCount=4, MaxTripCount=0, TripMultiple=4
-; NESTED-COST-NEXT: Trying pragma unroll...
-; NESTED-COST-NEXT: Trying full unroll...
-; NESTED-COST-NEXT:  Unrolled size {{[0-9]+}} exceeds threshold 30; checking for cost benefit.
-; NESTED-COST-NEXT:   Not analyzing loop cost: not an innermost loop.
-; NESTED-COST-NEXT:  Skipping: cost analysis unavailable.
-; NESTED-COST-NEXT: Trying upper-bound unroll...
-; NESTED-COST-NEXT: Trying loop peeling...
-; NESTED-COST-NEXT: Trying partial unroll...
-; NESTED-COST-NEXT:  Will not try to unroll partially because -unroll-allow-partial not given
-; NESTED-COST-NEXT: Not unrolling: no viable strategy found.
-
-define i32 @nested_cost_analysis(ptr %A) {
-entry:
-  br label %outer.header
-
-outer.header:
-  %i = phi i32 [ 0, %entry ], [ %i.inc, %outer.latch ]
-  br label %inner.header
-
-inner.header:
-  %j = phi i32 [ 0, %outer.header ], [ %j.inc, %inner.header ]
-  %sum = phi i32 [ 0, %outer.header ], [ %add, %inner.header ]
-  %idx = add i32 %i, %j
-  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %idx
-  %load = load i32, ptr %arrayidx
-  %add = add i32 %sum, %load
-  %j.inc = add i32 %j, 1
-  %inner.cmp = icmp ult i32 %j.inc, 100
-  br i1 %inner.cmp, label %inner.header, label %outer.latch, !llvm.loop !3
-
-outer.latch:
-  %i.inc = add i32 %i, 1
-  %outer.cmp = icmp ult i32 %i.inc, 4
-  br i1 %outer.cmp, label %outer.header, label %exit
+  %cmp = icmp slt i32 %inc, %n
+  br i1 %cmp, label %for.body, label %exit, !llvm.loop !13
 
 exit:
-  ret i32 %add
+  %result = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %result
 }
 
-; PRAGMA-TC-TOO-LARGE-LABEL:Loop Unroll: F[partial_instead_of_full] Loop %for.body (depth=1)
-; PRAGMA-TC-TOO-LARGE-NEXT:Loop Size = 6
-; PRAGMA-TC-TOO-LARGE-NEXT: Computing unroll count: TripCount=5000, MaxTripCount=0, TripMultiple=5000
-; PRAGMA-TC-TOO-LARGE-NEXT: Explicit unroll requested: pragma-full
-; PRAGMA-TC-TOO-LARGE-NEXT: Trying pragma unroll...
-; PRAGMA-TC-TOO-LARGE-NEXT:  Won't unroll; trip count is too large.
-; PRAGMA-TC-TOO-LARGE-NEXT:remark: <unknown>:0:0: unable to fully unroll loop: trip count 5000 exceeds limit 100
-; PRAGMA-TC-TOO-LARGE-NEXT: Trying full unroll...
-; PRAGMA-TC-TOO-LARGE-NEXT:  Unrolled size {{[0-9]+}} exceeds threshold {{[0-9]+}}; checking for cost benefit.
-; PRAGMA-TC-TOO-LARGE-NEXT:   Not analyzing loop cost: trip count too large.
-; PRAGMA-TC-TOO-LARGE-NEXT:  Skipping: cost analysis unavailable.
-; PRAGMA-TC-TOO-LARGE-NEXT:remark: <unknown>:0:0: unable to fully unroll loop: estimated unrolled size {{[0-9]+}} exceeds threshold {{[0-9]+}}
-; PRAGMA-TC-TOO-LARGE-NEXT: Trying upper-bound unroll...
-; PRAGMA-TC-TOO-LARGE-NEXT: Trying loop peeling...
-; PRAGMA-TC-TOO-LARGE-NEXT: Trying partial unroll...
-; PRAGMA-TC-TOO-LARGE-NEXT:  Unrolled size exceeds threshold; reducing count from {{[0-9]+}} to {{[0-9]+}}.
-; PRAGMA-TC-TOO-LARGE-NEXT:  Partially unrolling with count: 2500
-; PRAGMA-TC-TOO-LARGE-NEXT: Partial unroll instead of full: unrolled size too large. Unrolling 2500 times instead of 5000.
-; PRAGMA-TC-TOO-LARGE-NEXT:remark: <unknown>:0:0: unable to fully unroll loop as directed by full unroll pragma because unrolled size is too large
-; PRAGMA-TC-TOO-LARGE-NEXT:  Exiting block %for.body: TripCount=5000, TripMultiple=0, BreakoutTrip=0
-; PRAGMA-TC-TOO-LARGE-NEXT:UNROLLING loop %for.body by 2500!
-; PRAGMA-TC-TOO-LARGE-NEXT:remark: <unknown>:0:0: unrolled loop by a factor of 2500
-
-define i32 @partial_instead_of_full(ptr %A) {
-entry:
-  br label %for.body
-
-for.body:
-  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
-  %load = load i32, ptr %arrayidx
-  %add = add i32 %sum, %load
-  %inc = add i32 %i, 1
-  %cmp = icmp ult i32 %inc, 5000
-  br i1 %cmp, label %for.body, label %exit, !llvm.loop !0
-
-exit:
-  ret i32 %add
-}
+; PARTIAL-ALLOW-LABEL:Loop Unroll: F[partial_unroll_cost_analysis] Loop %for.body (depth=1)
+; PARTIAL-ALLOW-NEXT:Loop Size = 6
+; PARTIAL-ALLOW-NEXT: Computing unroll count: TripCount=200, MaxTripCount=0, TripMultiple=200
+; PARTIAL-ALLOW-NEXT: Trying pragma unroll...
+; PARTIAL-ALLOW-NEXT: Trying full unroll...
+; PARTIAL-ALLOW-NEXT:  Unrolled size {{[0-9]+}} exceeds threshold {{[0-9]+}}; checking for cost benefit.
+; PARTIAL-ALLOW-NEXT:   Not analyzing loop cost: trip count too large.
+; PARTIAL-ALLOW-NEXT: Trying upper-bound unroll...
+; PARTIAL-ALLOW-NEXT: Trying loop peeling...
+; PARTIAL-ALLOW-NEXT: Trying partial unroll...
+; PARTIAL-ALLOW-NEXT:  Unrolled size exceeds threshold; reducing count from {{[0-9]+}} to {{[0-9]+}}.
+; PARTIAL-ALLOW-NEXT:  Partially unrolling with count: {{[0-9]+}}
+; PARTIAL-ALLOW-NEXT:  Exiting block %for.body: TripCount=200, TripMultiple=0, BreakoutTrip=0
+; PARTIAL-ALLOW-NEXT:UNROLLING loop %for.body by {{[0-9]+}}!
+; PARTIAL-ALLOW-NEXT:remark: <unknown>:0:0: unrolled loop by a factor of {{[0-9]+}}
 
-; NO-PROFIT-LABEL:Loop Unroll: F[no_profitable_count] Loop %for.body (depth=1)
-; NO-PROFIT-NEXT:Loop Size = 6
-; NO-PROFIT-NEXT: Computing unroll count: TripCount=100, MaxTripCount=0, TripMultiple=100
-; NO-PROFIT-NEXT: Trying pragma unroll...
-; NO-PROFIT-NEXT: Trying full unroll...
-; NO-PROFIT-NEXT:  Unrolled size {{[0-9]+}} exceeds threshold {{[0-9]+}}; checking for cost benefit.
-; NO-PROFIT-NEXT:   Not analyzing loop cost: trip count too large.
-; NO-PROFIT-NEXT:  Skipping: cost analysis unavailable.
-; NO-PROFIT-NEXT: Trying upper-bound unroll...
-; NO-PROFIT-NEXT: Trying loop peeling...
-; NO-PROFIT-NEXT: Trying partial unroll...
-; NO-PROFIT-NEXT:  Unrolled size exceeds threshold; reducing count from {{[0-9]+}} to {{[0-9]+}}.
-; NO-PROFIT-NEXT:  Will not partially unroll: no profitable count.
-; NO-PROFIT-NEXT:  Partially unrolling with count: 0
-; NO-PROFIT-NEXT: Not unrolling: no viable strategy found.
-
-define i32 @no_profitable_count(ptr %A) {
+define i32 @partial_unroll_cost_analysis(ptr %A) {
 entry:
   br label %for.body
 
@@ -848,48 +664,139 @@ for.body:
   %load = load i32, ptr %arrayidx
   %add = add i32 %sum, %load
   %inc = add i32 %i, 1
-  %cmp = icmp ult i32 %inc, 100
+  %cmp = icmp ult i32 %inc, 200
   br i1 %cmp, label %for.body, label %exit
 
 exit:
   ret i32 %add
 }
 
-; CHECK-LABEL:Loop Unroll: F[extended_convergence] Loop %for.body (depth=1)
-; CHECK-NEXT: Not unrolling: contains convergent operations.
-; CHECK-NEXT:remark: <unknown>:0:0: unable to unroll loop: contains convergent operations
-
-declare void @convergent_func() convergent
-declare token @llvm.experimental.convergence.anchor()
+; CHECK-LABEL:Loop Unroll: F[pragma_full_tc_too_large] Loop %for.body (depth=1)
+; CHECK-NEXT:Loop Size = 6
+; CHECK-NEXT: Computing unroll count: TripCount=1000001, MaxTripCount=0, TripMultiple=1000001
+; CHECK-NEXT: Explicit unroll requested: pragma-full
+; CHECK-NEXT: Trying pragma unroll...
+; CHECK-NEXT:  Won't unroll; trip count is too large.
+; CHECK-NEXT:remark: <unknown>:0:0: may be unable to fully unroll loop: trip count 1000001 exceeds limit 1000000
+; CHECK-NEXT: Trying full unroll...
+; CHECK-NEXT:  Unrolled size {{[0-9]+}} exceeds threshold {{[0-9]+}}; checking for cost benefit.
+; CHECK-NEXT:   Not analyzing loop cost: trip count too large.
+; CHECK-NEXT: Trying upper-bound unroll...
+; CHECK-NEXT: Trying loop peeling...
+; CHECK-NEXT: Trying partial unroll...
+; CHECK-NEXT:  Unrolled size exceeds threshold; reducing count from {{[0-9]+}} to {{[0-9]+}}.
+; CHECK-NEXT:  Partially unrolling with count: {{[0-9]+}}
+; CHECK-NEXT:  Exiting block %for.body: TripCount=1000001, TripMultiple=0, BreakoutTrip=0
+; CHECK-NEXT:UNROLLING loop %for.body by {{[0-9]+}}!
+; CHECK-NEXT:remark: <unknown>:0:0: unrolled loop by a factor of {{[0-9]+}}
+; CHECK-NEXT:remark: <unknown>:0:0: unable to fully unroll loop as directed; unrolled by factor {{[0-9]+}}
 
-define i32 @extended_convergence(ptr %A, i32 %n) {
+define i32 @pragma_full_tc_too_large(ptr %A) {
 entry:
-  br label %for.body, !llvm.loop !1
+  br label %for.body
 
 for.body:
   %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %tok = call token @llvm.experimental.convergence.anchor()
   %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
   %load = load i32, ptr %arrayidx
   %add = add i32 %sum, %load
   %inc = add i32 %i, 1
-  %cmp = icmp slt i32 %inc, %n
-  br i1 %cmp, label %for.body, label %exit, !llvm.loop !1
+  %cmp = icmp ult i32 %inc, 1000001
+  br i1 %cmp, label %for.body, label %exit, !llvm.loop !12
 
 exit:
-  ; Using convergence token outside the loop creates ExtendedLoop convergence
-  call void @convergent_func() [ "convergencectrl"(token %tok) ]
   ret i32 %add
 }
 
-; CHECK-LABEL:Loop Unroll: F[noduplicate_prevents_unroll] Loop %for.body (depth=1)
-; CHECK-NEXT: Not unrolling: contains non-duplicatable instructions.
-; CHECK-NEXT:remark: <unknown>:0:0: unable to unroll loop: contains non-duplicatable instructions
+; CHECK-LABEL:Loop Unroll: F[nested_loop_cost] Loop %inner (depth=2)
+; CHECK-NEXT:Loop Size = 6
+; CHECK-NEXT: Computing unroll count: TripCount=0, MaxTripCount=2147483647, TripMultiple=1
+; CHECK-NEXT: Trying pragma unroll...
+; CHECK-NEXT: Trying full unroll...
+; CHECK-NEXT: Trying upper-bound unroll...
+; CHECK-NEXT: Trying loop peeling...
+; CHECK-NEXT: Trying partial unroll...
+; CHECK-NEXT: Trying runtime unroll...
+; CHECK-NEXT:  Will not try to unroll loop with runtime trip count because -unroll-runtime not given
+; CHECK-NEXT: Not unrolling: no viable strategy found.
+; CHECK-LABEL:Loop Unroll: F[nested_loop_cost] Loop %outer (depth=1)
+; CHECK-NEXT:Loop Size = {{[0-9]+}}
+; CHECK-NEXT: Computing unroll count: TripCount=10, MaxTripCount=0, TripMultiple=10
+; CHECK-NEXT: Trying pragma unroll...
+; CHECK-NEXT: Trying full unroll...
+; CHECK-NEXT:  Unrolled size {{[0-9]+}} exceeds threshold {{[0-9]+}}; checking for cost benefit.
+; CHECK-NEXT:   Not analyzing loop cost: not an innermost loop.
+; CHECK-NEXT: Trying upper-bound unroll...
+; CHECK-NEXT: Trying loop peeling...
+; CHECK-NEXT: Trying partial unroll...
+; CHECK-NEXT:  Will not try to unroll partially because -unroll-allow-partial not given
+; CHECK-NEXT: Not unrolling: no viable strategy found.
 
-declare void @noduplicate_func() noduplicate
+define i32 @nested_loop_cost(ptr %A, i32 %n) {
+entry:
+  br label %outer
+
+outer:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %inner.exit ]
+  %sum.outer = phi i32 [ 0, %entry ], [ %sum.inner.lcssa, %inner.exit ]
+  %idxA = getelementptr inbounds i32, ptr %A, i32 %i
+  %loadA = load i32, ptr %idxA
+  %mul1 = mul i32 %loadA, %sum.outer
+  %add1 = add i32 %mul1, %loadA
+  %mul2 = mul i32 %add1, %loadA
+  %sub1 = sub i32 %mul2, %loadA
+  %add2 = add i32 %sub1, %loadA
+  %mul3 = mul i32 %add2, %loadA
+  %sub2 = sub i32 %mul3, %loadA
+  %xor1 = xor i32 %sub2, %loadA
+  %or1 = or i32 %xor1, %loadA
+  %outer.sum = add i32 %sum.outer, %or1
+  br label %inner
+
+inner:
+  %j = phi i32 [ 0, %outer ], [ %j.next, %inner ]
+  %sum.inner = phi i32 [ %outer.sum, %outer ], [ %inner.add, %inner ]
+  %idxB = getelementptr inbounds i32, ptr %A, i32 %j
+  %loadB = load i32, ptr %idxB
+  %inner.add = add i32 %sum.inner, %loadB
+  %j.next = add i32 %j, 1
+  %inner.cmp = icmp slt i32 %j.next, %n
+  br i1 %inner.cmp, label %inner, label %inner.exit
+
+inner.exit:
+  %sum.inner.lcssa = phi i32 [ %inner.add, %inner ]
+  %i.next = add i32 %i, 1
+  %outer.cmp = icmp ult i32 %i.next, 10
+  br i1 %outer.cmp, label %outer, label %exit
+
+exit:
+  ret i32 %sum.inner.lcssa
+}
+
+; USER-COUNT-LABEL:Loop Unroll: F[user_count_unroll] Loop %for.body (depth=1)
+; USER-COUNT-NEXT:Loop Size = 6
+; USER-COUNT-NEXT: Computing unroll count: TripCount=12, MaxTripCount=0, TripMultiple=12
+; USER-COUNT-NEXT: Explicit unroll requested: user-count
+; USER-COUNT-NEXT: Trying pragma unroll...
+; USER-COUNT-NEXT:  Unrolling with user-specified count: 4.
+; USER-COUNT-NEXT:  Exiting block %for.body: TripCount=12, TripMultiple=0, BreakoutTrip=0
+; USER-COUNT-NEXT:UNROLLING loop %for.body by 4!
+; USER-COUNT-NEXT:remark: <unknown>:0:0: unrolled loop by a factor of 4
+;
+; USER-COUNT-EXCEED-LABEL:Loop Unroll: F[user_count_unroll] Loop %for.body (depth=1)
+; USER-COUNT-EXCEED-NEXT:Loop Size = 6
+; USER-COUNT-EXCEED-NEXT: Computing unroll count: TripCount=12, MaxTripCount=0, TripMultiple=12
+; USER-COUNT-EXCEED-NEXT: Explicit unroll requested: user-count
+; USER-COUNT-EXCEED-NEXT: Trying pragma unroll...
+; USER-COUNT-EXCEED-NEXT:  Not unrolling with user count 9999: exceeds threshold.
+; USER-COUNT-EXCEED-NEXT: Trying full unroll...
+; USER-COUNT-EXCEED-NEXT:  Unrolling: size {{[0-9]+}} < threshold {{[0-9]+}}.
+; USER-COUNT-EXCEED-NEXT:  Exiting block %for.body: TripCount=12, TripMultiple=0, BreakoutTrip=0
+; USER-COUNT-EXCEED-NEXT:COMPLETELY UNROLLING loop %for.body with trip count 12!
+; USER-COUNT-EXCEED-NEXT:remark: <unknown>:0:0: completely unrolled loop with 12 iterations
 
-define i32 @noduplicate_prevents_unroll(ptr %A) {
+define i32 @user_count_unroll(ptr %A) {
 entry:
   br label %for.body
 
@@ -898,125 +805,26 @@ for.body:
   %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
   %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
   %load = load i32, ptr %arrayidx
-  ; noduplicate attribute prevents loop unrolling
-  call void @noduplicate_func()
   %add = add i32 %sum, %load
   %inc = add i32 %i, 1
-  %cmp = icmp ult i32 %inc, 8
-  br i1 %cmp, label %for.body, label %exit, !llvm.loop !1
+  %cmp = icmp ult i32 %inc, 12
+  br i1 %cmp, label %for.body, label %exit
 
 exit:
   ret i32 %add
 }
 
-; UNROLL-AS-DIRECTED-FAIL-LABEL:Loop Unroll: F[unroll_as_directed_fail] Loop %for.body (depth=1)
-; UNROLL-AS-DIRECTED-FAIL-NEXT:Loop Size = 19
-; UNROLL-AS-DIRECTED-FAIL-NEXT: Computing unroll count: TripCount=100, MaxTripCount=0, TripMultiple=100
-; UNROLL-AS-DIRECTED-FAIL-NEXT: Explicit unroll requested: pragma-enable
-; UNROLL-AS-DIRECTED-FAIL-NEXT: Trying pragma unroll...
-; UNROLL-AS-DIRECTED-FAIL-NEXT: Trying full unroll...
-; UNROLL-AS-DIRECTED-FAIL-NEXT:  Unrolled size {{[0-9]+}} exceeds threshold {{[0-9]+}}; checking for cost benefit.
-; UNROLL-AS-DIRECTED-FAIL-NEXT:   Not analyzing loop cost: trip count too large.
-; UNROLL-AS-DIRECTED-FAIL-NEXT:  Skipping: cost analysis unavailable.
-; UNROLL-AS-DIRECTED-FAIL-NEXT: Trying upper-bound unroll...
-; UNROLL-AS-DIRECTED-FAIL-NEXT: Trying loop peeling...
-; UNROLL-AS-DIRECTED-FAIL-NEXT: Trying partial unroll...
-; UNROLL-AS-DIRECTED-FAIL-NEXT:  Unrolled size exceeds threshold; reducing count from {{[0-9]+}} to 0.
-; UNROLL-AS-DIRECTED-FAIL-NEXT:  Will not partially unroll: no profitable count.
-; UNROLL-AS-DIRECTED-FAIL-NEXT:  Partially unrolling with count: 0
-; UNROLL-AS-DIRECTED-FAIL-NEXT: Not unrolling as directed: unrolled size too large.
-; UNROLL-AS-DIRECTED-FAIL-NEXT:remark: <unknown>:0:0: unable to fully unroll loop as directed by unroll metadata because unrolled size is too large
-
-define i32 @unroll_as_directed_fail(ptr %A, ptr %B, ptr %C, ptr %D) {
-entry:
-  br label %for.body
-
-for.body:
-  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %sum = phi i32 [ 0, %entry ], [ %add8, %for.body ]
-  %idx1 = add i32 %i, 0
-  %arrayidx1 = getelementptr inbounds i32, ptr %A, i32 %idx1
-  %load1 = load i32, ptr %arrayidx1
-  %idx2 = add i32 %i, 1
-  %arrayidx2 = getelementptr inbounds i32, ptr %B, i32 %idx2
-  %load2 = load i32, ptr %arrayidx2
-  %idx3 = add i32 %i, 2
-  %arrayidx3 = getelementptr inbounds i32, ptr %C, i32 %idx3
-  %load3 = load i32, ptr %arrayidx3
-  %idx4 = add i32 %i, 3
-  %arrayidx4 = getelementptr inbounds i32, ptr %D, i32 %idx4
-  %load4 = load i32, ptr %arrayidx4
-  %add1 = add i32 %sum, %load1
-  %add2 = add i32 %add1, %load2
-  %add3 = add i32 %add2, %load3
-  %add8 = add i32 %add3, %load4
-  %inc = add i32 %i, 1
-  %cmp = icmp ult i32 %inc, 100
-  br i1 %cmp, label %for.body, label %exit, !llvm.loop !1
+; EXPLICIT-PEEL-LABEL:Loop Unroll: F[explicit_peel_count] Loop %for.body (depth=1)
+; EXPLICIT-PEEL-NEXT:Loop Size = 6
+; EXPLICIT-PEEL-NEXT: Computing unroll count: TripCount=0, MaxTripCount=2147483647, TripMultiple=1
+; EXPLICIT-PEEL-NEXT:  Using explicit peel count: 2.
+; EXPLICIT-PEEL-NEXT:PEELING loop %for.body with iteration count 2!
+; EXPLICIT-PEEL-NEXT:remark: <unknown>:0:0: peeled loop by 2 iterations
 
-exit:
-  ret i32 %add8
-}
-
-; UNROLL-AS-DIRECTED-FAIL-LABEL:Loop Unroll: F[full_unroll_as_directed_fail] Loop %for.body (depth=1)
-; UNROLL-AS-DIRECTED-FAIL-NEXT:Loop Size = 19
-; UNROLL-AS-DIRECTED-FAIL-NEXT: Computing unroll count: TripCount=100, MaxTripCount=0, TripMultiple=100
-; UNROLL-AS-DIRECTED-FAIL-NEXT: Explicit unroll requested: pragma-full
-; UNROLL-AS-DIRECTED-FAIL-NEXT: Trying pragma unroll...
-; UNROLL-AS-DIRECTED-FAIL-NEXT:  Won't unroll; trip count is too large.
-; UNROLL-AS-DIRECTED-FAIL-NEXT:remark: <unknown>:0:0: unable to fully unroll loop: trip count 100 exceeds limit 10
-; UNROLL-AS-DIRECTED-FAIL-NEXT: Trying full unroll...
-; UNROLL-AS-DIRECTED-FAIL-NEXT:  Unrolled size {{[0-9]+}} exceeds threshold {{[0-9]+}}; checking for cost benefit.
-; UNROLL-AS-DIRECTED-FAIL-NEXT:   Not analyzing loop cost: trip count too large.
-; UNROLL-AS-DIRECTED-FAIL-NEXT:  Skipping: cost analysis unavailable.
-; UNROLL-AS-DIRECTED-FAIL-NEXT:remark: <unknown>:0:0: unable to fully unroll loop: estimated unrolled size {{[0-9]+}} exceeds threshold {{[0-9]+}}
-; UNROLL-AS-DIRECTED-FAIL-NEXT: Trying upper-bound unroll...
-; UNROLL-AS-DIRECTED-FAIL-NEXT: Trying loop peeling...
-; UNROLL-AS-DIRECTED-FAIL-NEXT: Trying partial unroll...
-; UNROLL-AS-DIRECTED-FAIL-NEXT:  Unrolled size exceeds threshold; reducing count from {{[0-9]+}} to 0.
-; UNROLL-AS-DIRECTED-FAIL-NEXT:  Will not partially unroll: no profitable count.
-; UNROLL-AS-DIRECTED-FAIL-NEXT:  Partially unrolling with count: 0
-; UNROLL-AS-DIRECTED-FAIL-NEXT: Not unrolling as directed: unrolled size too large.
-; UNROLL-AS-DIRECTED-FAIL-NEXT:remark: <unknown>:0:0: unable to fully unroll loop as directed by unroll metadata because unrolled size is too large
-
-define i32 @full_unroll_as_directed_fail(ptr %A, ptr %B, ptr %C, ptr %D) {
-entry:
-  br label %for.body
-
-for.body:
-  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %sum = phi i32 [ 0, %entry ], [ %add8, %for.body ]
-  %idx1 = add i32 %i, 0
-  %arrayidx1 = getelementptr inbounds i32, ptr %A, i32 %idx1
-  %load1 = load i32, ptr %arrayidx1
-  %idx2 = add i32 %i, 1
-  %arrayidx2 = getelementptr inbounds i32, ptr %B, i32 %idx2
-  %load2 = load i32, ptr %arrayidx2
-  %idx3 = add i32 %i, 2
-  %arrayidx3 = getelementptr inbounds i32, ptr %C, i32 %idx3
-  %load3 = load i32, ptr %arrayidx3
-  %idx4 = add i32 %i, 3
-  %arrayidx4 = getelementptr inbounds i32, ptr %D, i32 %idx4
-  %load4 = load i32, ptr %arrayidx4
-  %add1 = add i32 %sum, %load1
-  %add2 = add i32 %add1, %load2
-  %add3 = add i32 %add2, %load3
-  %add8 = add i32 %add3, %load4
-  %inc = add i32 %i, 1
-  %cmp = icmp ult i32 %inc, 100
-  br i1 %cmp, label %for.body, label %exit, !llvm.loop !0
-
-exit:
-  ret i32 %add8
-}
-
-; CHECK-LABEL:Loop Unroll: F[indirectbr_loop] Loop %for.body (depth=1)
-; CHECK-NEXT: Not unrolling loop which is not in loop-simplify form.
-; CHECK-NEXT:remark: <unknown>:0:0: unable to unroll loop: not in loop-simplify form
-
-define i32 @indirectbr_loop(ptr %A, ptr %target) {
+define i32 @explicit_peel_count(ptr %A, i32 %n) {
 entry:
-  indirectbr ptr %target, [label %for.body, label %exit]
+  %cmp.entry = icmp sgt i32 %n, 0
+  br i1 %cmp.entry, label %for.body, label %exit
 
 for.body:
   %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
@@ -1025,54 +833,19 @@ for.body:
   %load = load i32, ptr %arrayidx
   %add = add i32 %sum, %load
   %inc = add i32 %i, 1
-  %cmp = icmp ult i32 %inc, 10
-  br i1 %cmp, label %for.body, label %exit, !llvm.loop !1
+  %cmp = icmp slt i32 %inc, %n
+  br i1 %cmp, label %for.body, label %exit
 
 exit:
   %result = phi i32 [ 0, %entry ], [ %add, %for.body ]
   ret i32 %result
 }
 
-; USER-COUNT-EXCEED-LABEL:Loop Unroll: F[user_count_exceed] Loop %for.body (depth=1)
-; USER-COUNT-EXCEED-NEXT:Loop Size = 5
-; USER-COUNT-EXCEED-NEXT: Computing unroll count: TripCount=16, MaxTripCount=0, TripMultiple=16
-; USER-COUNT-EXCEED-NEXT: Explicit unroll requested: user-count
-; USER-COUNT-EXCEED-NEXT: Trying pragma unroll...
-; USER-COUNT-EXCEED-NEXT:  Not unrolling with user count 8: exceeds threshold.
-; USER-COUNT-EXCEED-NEXT: Trying full unroll...
-; USER-COUNT-EXCEED-NEXT:  Unrolling: size 50 < threshold 16384.
-; USER-COUNT-EXCEED-NEXT:  Exiting block %for.body: TripCount=16, TripMultiple=0, BreakoutTrip=0
-; USER-COUNT-EXCEED-NEXT:COMPLETELY UNROLLING loop %for.body with trip count 16!
-; USER-COUNT-EXCEED-NEXT:remark: <unknown>:0:0: completely unrolled loop with 16 iterations
+; ZERO-THRESH-LABEL:Loop Unroll: F[zero_thresh_unroll] Loop %for.body (depth=1)
+; ZERO-THRESH-NEXT: Not unrolling: all thresholds are zero.
+; ZERO-THRESH-NEXT:remark: <unknown>:0:0: unable to unroll loop: unroll threshold is zero
 
-define void @user_count_exceed(ptr %A) {
-entry:
-  br label %for.body
-
-for.body:
-  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
-  store i32 %i, ptr %arrayidx
-  %inc = add i32 %i, 1
-  %cmp = icmp ult i32 %inc, 16
-  br i1 %cmp, label %for.body, label %exit
-
-exit:
-  ret void
-}
-
-; CHECK-LABEL:Loop Unroll: F[inline_prevents_unroll] Loop %for.body (depth=1)
-; CHECK-NEXT:Loop Size = 8
-; CHECK-NEXT: Not unrolling loop with inlinable calls.
-; CHECK-NEXT:remark: <unknown>:0:0: unable to unroll loop: contains inlinable calls
-
-; Internal function with single use - this is an inline candidate
-define internal i32 @single_use_helper(i32 %x) {
-  %add = add i32 %x, 42
-  ret i32 %add
-}
-
-define i32 @inline_prevents_unroll(ptr %A) {
+define i32 @zero_thresh_unroll(ptr %A) {
 entry:
   br label %for.body
 
@@ -1081,104 +854,28 @@ for.body:
   %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
   %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
   %load = load i32, ptr %arrayidx
-  %helper_result = call i32 @single_use_helper(i32 %load)
-  %add = add i32 %sum, %helper_result
+  %add = add i32 %sum, %load
   %inc = add i32 %i, 1
-  %cmp = icmp ult i32 %inc, 10
-  br i1 %cmp, label %for.body, label %exit, !llvm.loop !11
+  %cmp = icmp ult i32 %inc, 8
+  br i1 %cmp, label %for.body, label %exit, !llvm.loop !16
 
 exit:
   ret i32 %add
 }
 
-; NO-REMAINDER-LABEL:Loop Unroll: F[small_max_trip_count] Loop %for.body (depth=1)
-; NO-REMAINDER-NEXT:Loop Size = 5
-; NO-REMAINDER-NEXT: Computing unroll count: TripCount=0, MaxTripCount=5, TripMultiple=1
-; NO-REMAINDER-NEXT: Explicit unroll requested: pragma-count(4)
-; NO-REMAINDER-NEXT: Trying pragma unroll...
-; NO-REMAINDER-NEXT:  Not unrolling with pragma count 4: remainder not allowed, count does not divide trip multiple 1.
-; NO-REMAINDER-NEXT:remark: <unknown>:0:0: unable to unroll loop with count 4: remainder loop is restricted and count does not divide trip multiple 1
-; NO-REMAINDER-NEXT: Trying full unroll...
-; NO-REMAINDER-NEXT: Trying upper-bound unroll...
-; NO-REMAINDER-NEXT: Trying loop peeling...
-; NO-REMAINDER-NEXT: Trying partial unroll...
-; NO-REMAINDER-NEXT: Trying runtime unroll...
-; NO-REMAINDER-NEXT:  Not runtime unrolling: max trip count {{[0-9]+}} is small (< 8) and not forced.
-; NO-REMAINDER-NEXT:remark: <unknown>:0:0: unable to runtime unroll loop: max trip count {{[0-9]+}} is too small (< {{[0-9]+}})
-; NO-REMAINDER-NEXT: Not unrolling: no viable strategy found.
-
-define void @small_max_trip_count(ptr %A, i32 %n) {
-entry:
-  ; Clamp n to max of 5, so MaxTripCount will be 5 (< default MaxUpperBound of 8)
-  %clamped = call i32 @llvm.umin.i32(i32 %n, i32 5)
-  %cmp.entry = icmp ugt i32 %clamped, 0
-  br i1 %cmp.entry, label %for.body, label %exit
-
-for.body:
-  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
-  store i32 %i, ptr %arrayidx
-  %inc = add nuw nsw i32 %i, 1
-  %cmp = icmp ult i32 %inc, %clamped
-  br i1 %cmp, label %for.body, label %exit, !llvm.loop !13
-
-exit:
-  ret void
-}
+; MAX-COUNT-LABEL:Loop Unroll: F[max_count_unroll] Loop %for.body (depth=1)
+; MAX-COUNT-NEXT:Loop Size = 6
+; MAX-COUNT-NEXT: Computing unroll count: TripCount=10, MaxTripCount=0, TripMultiple=10
+; MAX-COUNT-NEXT: Trying pragma unroll...
+; MAX-COUNT-NEXT: Trying full unroll...
+; MAX-COUNT-NEXT:  Not unrolling: trip count 10 exceeds max count 2.
+; MAX-COUNT-NEXT: Trying upper-bound unroll...
+; MAX-COUNT-NEXT: Trying loop peeling...
+; MAX-COUNT-NEXT: Trying partial unroll...
+; MAX-COUNT-NEXT:  Will not try to unroll partially because -unroll-allow-partial not given
+; MAX-COUNT-NEXT: Not unrolling: no viable strategy found.
 
-declare i32 @llvm.umin.i32(i32, i32)
-
-; NO-STRATEGY-LABEL:Loop Unroll: F[no_strategy_pragma] Loop %for.body (depth=1)
-; NO-STRATEGY-NEXT:Loop Size = 5
-; NO-STRATEGY-NEXT: Computing unroll count: TripCount=0, MaxTripCount={{[0-9]+}}, TripMultiple=1
-; NO-STRATEGY-NEXT: Explicit unroll requested: pragma-enable
-; NO-STRATEGY-NEXT: Trying pragma unroll...
-; NO-STRATEGY-NEXT: Trying full unroll...
-; NO-STRATEGY-NEXT: Trying upper-bound unroll...
-; NO-STRATEGY-NEXT: Trying loop peeling...
-; NO-STRATEGY-NEXT: Trying partial unroll...
-; NO-STRATEGY-NEXT: Trying runtime unroll...
-; NO-STRATEGY-NEXT: Not unrolling: no viable strategy found.
-; NO-STRATEGY-NEXT:remark: <unknown>:0:0: unable to unroll loop: no viable unroll count found
-
-define void @no_strategy_pragma(ptr %A, i32 %n) {
-entry:
-  %cmp.entry = icmp ugt i32 %n, 0
-  br i1 %cmp.entry, label %for.body, label %exit
-
-for.body:
-  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
-  store i32 %i, ptr %arrayidx
-  %inc = add nuw nsw i32 %i, 1
-  %cmp = icmp ult i32 %inc, %n
-  br i1 %cmp, label %for.body, label %exit, !llvm.loop !12
-
-exit:
-  ret void
-}
-
-; We get contradictory remarks here: full unroll is blocked by -unroll-full-max-count=10,
-; but partial unroll picks count=20 (the full trip count) anyway. This is a test artifact.
-; MAX-COUNT-10-LABEL:Loop Unroll: F[tc_exceeds_max_ore] Loop %for.body (depth=1)
-; MAX-COUNT-10-NEXT:Loop Size = 6
-; MAX-COUNT-10-NEXT: Computing unroll count: TripCount=20, MaxTripCount=0, TripMultiple=20
-; MAX-COUNT-10-NEXT: Explicit unroll requested: pragma-full
-; MAX-COUNT-10-NEXT: Trying pragma unroll...
-; MAX-COUNT-10-NEXT:  Won't unroll; trip count is too large.
-; MAX-COUNT-10-NEXT:remark: <unknown>:0:0: unable to fully unroll loop: trip count 20 exceeds limit 10
-; MAX-COUNT-10-NEXT: Trying full unroll...
-; MAX-COUNT-10-NEXT:  Not unrolling: trip count 20 exceeds max count 10.
-; MAX-COUNT-10-NEXT:remark: <unknown>:0:0: unable to fully unroll loop: trip count 20 exceeds maximum full unroll count 10
-; MAX-COUNT-10-NEXT: Trying upper-bound unroll...
-; MAX-COUNT-10-NEXT: Trying loop peeling...
-; MAX-COUNT-10-NEXT: Trying partial unroll...
-; MAX-COUNT-10-NEXT:  Partially unrolling with count: 20
-; MAX-COUNT-10-NEXT:  Exiting block %for.body: TripCount=20, TripMultiple=0, BreakoutTrip=0
-; MAX-COUNT-10-NEXT:COMPLETELY UNROLLING loop %for.body with trip count 20!
-; MAX-COUNT-10-NEXT:remark: <unknown>:0:0: completely unrolled loop with 20 iterations
-
-define i32 @tc_exceeds_max_ore(ptr %A) {
+define i32 @max_count_unroll(ptr %A) {
 entry:
   br label %for.body
 
@@ -1189,108 +886,32 @@ for.body:
   %load = load i32, ptr %arrayidx
   %add = add i32 %sum, %load
   %inc = add i32 %i, 1
-  %cmp = icmp ult i32 %inc, 20
-  br i1 %cmp, label %for.body, label %exit, !llvm.loop !0
+  %cmp = icmp ult i32 %inc, 10
+  br i1 %cmp, label %for.body, label %exit
 
 exit:
   ret i32 %add
 }
 
-; CHECK-LABEL:Loop Unroll: F[caller_with_inlined_loop] Loop %for.body.i (depth=1)
-; CHECK-NEXT:remark: inlined.c:5:3: loop is from inlined function; call site is at caller.c:10
-; CHECK-NEXT:Loop Size = 6
-; CHECK-NEXT: Computing unroll count: TripCount=4, MaxTripCount=0, TripMultiple=4
-; CHECK-NEXT: Explicit unroll requested: pragma-enable
-; CHECK-NEXT: Trying pragma unroll...
-; CHECK-NEXT: Trying full unroll...
-; CHECK-NEXT:  Unrolling: size 18 < threshold 16384.
-; CHECK-NEXT:  Exiting block %for.body.i: TripCount=4, TripMultiple=0, BreakoutTrip=0
-; CHECK-NEXT:COMPLETELY UNROLLING loop %for.body.i with trip count 4!
-; CHECK-NEXT:remark: inlined.c:5:3: completely unrolled loop with 4 iterations
-
-define i32 @caller_with_inlined_loop(ptr %A) !dbg !20 {
-entry:
-  br label %for.body.i, !dbg !21
-
-for.body.i:
-  %i = phi i32 [ 0, %entry ], [ %inc, %for.body.i ]
-  %sum = phi i32 [ 0, %entry ], [ %add, %for.body.i ]
-  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i, !dbg !22
-  %load = load i32, ptr %arrayidx, !dbg !22
-  %add = add i32 %sum, %load, !dbg !22
-  %inc = add i32 %i, 1, !dbg !22
-  %cmp = icmp ult i32 %inc, 4, !dbg !22
-  br i1 %cmp, label %for.body.i, label %exit, !dbg !22, !llvm.loop !23
-
-exit:
-  ret i32 %add, !dbg !21
-}
+; PARTIAL-NOPROFIT-LABEL:Loop Unroll: F[partial_no_profit] Loop %for.body (depth=1)
+; PARTIAL-NOPROFIT-NEXT:Loop Size = 6
+; PARTIAL-NOPROFIT-NEXT: Computing unroll count: TripCount=200, MaxTripCount=0, TripMultiple=200
+; PARTIAL-NOPROFIT-NEXT: Trying pragma unroll...
+; PARTIAL-NOPROFIT-NEXT: Trying full unroll...
+; PARTIAL-NOPROFIT-NEXT:  Unrolled size {{[0-9]+}} exceeds threshold {{[0-9]+}}; checking for cost benefit.
+; PARTIAL-NOPROFIT-NEXT:   Not analyzing loop cost: trip count too large.
+; PARTIAL-NOPROFIT-NEXT: Trying upper-bound unroll...
+; PARTIAL-NOPROFIT-NEXT: Trying loop peeling...
+; PARTIAL-NOPROFIT-NEXT: Trying partial unroll...
+; PARTIAL-NOPROFIT-NEXT:  Unrolled size exceeds threshold; reducing count from {{[0-9]+}} to {{[0-9]+}}.
+; PARTIAL-NOPROFIT-NEXT:  Will not partially unroll: no profitable count.
+; PARTIAL-NOPROFIT-NEXT:  Partially unrolling with count: 0
+; PARTIAL-NOPROFIT-NEXT: Not unrolling: no viable strategy found.
 
-; Same contradiction pattern as above: the low -unroll-threshold=20 -pragma-unroll-threshold=20
-; cause cost analysis to reject full unroll, but partial unroll uses different heuristics and
-; picks count=8 (the full trip count). Only happens with these artificial test flags.
-; COST-NOT-PROFITABLE-LABEL:Loop Unroll: F[cost_not_profitable] Loop %for.body (depth=1)
-; COST-NOT-PROFITABLE-NEXT:Loop Size = 14
-; COST-NOT-PROFITABLE-NEXT: Computing unroll count: TripCount=8, MaxTripCount=0, TripMultiple=8
-; COST-NOT-PROFITABLE-NEXT: Explicit unroll requested: pragma-enable
-; COST-NOT-PROFITABLE-NEXT: Trying pragma unroll...
-; COST-NOT-PROFITABLE-NEXT: Trying full unroll...
-; COST-NOT-PROFITABLE-NEXT:  Unrolled size {{[0-9]+}} exceeds threshold {{[0-9]+}}; checking for cost benefit.
-; COST-NOT-PROFITABLE-NEXT:   Starting LoopUnroll profitability analysis...
-; COST-NOT-PROFITABLE:   Analysis finished:
-; COST-NOT-PROFITABLE-NEXT:   UnrolledCost: {{[0-9]+}}, RolledDynamicCost: {{[0-9]+}}
-; COST-NOT-PROFITABLE-NEXT:  Not unrolling: cost {{[0-9]+}} >= boosted threshold {{[0-9]+}}.
-; COST-NOT-PROFITABLE-NEXT: Trying upper-bound unroll...
-; COST-NOT-PROFITABLE-NEXT: Trying loop peeling...
-; COST-NOT-PROFITABLE-NEXT: Trying partial unroll...
-; COST-NOT-PROFITABLE-NEXT:  Partially unrolling with count: 8
-; COST-NOT-PROFITABLE-NEXT:  Exiting block %for.body: TripCount=8, TripMultiple=0, BreakoutTrip=0
-; COST-NOT-PROFITABLE-NEXT:COMPLETELY UNROLLING loop %for.body with trip count 8!
-; COST-NOT-PROFITABLE-NEXT:remark: <unknown>:0:0: completely unrolled loop with 8 iterations
-
-define i32 @cost_not_profitable(ptr %A, ptr %B, ptr %C) {
+define i32 @partial_no_profit(ptr %A) {
 entry:
   br label %for.body
 
-for.body:
-  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %sum = phi i32 [ 0, %entry ], [ %add3, %for.body ]
-  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
-  %load1 = load i32, ptr %arrayidx
-  %arrayidx2 = getelementptr inbounds i32, ptr %B, i32 %i
-  %load2 = load i32, ptr %arrayidx2
-  %arrayidx3 = getelementptr inbounds i32, ptr %C, i32 %i
-  %load3 = load i32, ptr %arrayidx3
-  %mul1 = mul i32 %load1, %load2
-  %mul2 = mul i32 %mul1, %load3
-  %add1 = add i32 %sum, %mul2
-  %add2 = add i32 %add1, %load1
-  %add3 = add i32 %add2, %load2
-  %inc = add i32 %i, 1
-  %cmp = icmp ult i32 %inc, 8
-  br i1 %cmp, label %for.body, label %exit, !llvm.loop !1
-
-exit:
-  ret i32 %add3
-}
-
-; UPPER-BOUND-HEURISTIC-LABEL:Loop Unroll: F[upper_bound_heuristic] Loop %for.body (depth=1)
-; UPPER-BOUND-HEURISTIC-NEXT:Loop Size = 6
-; UPPER-BOUND-HEURISTIC-NEXT: Computing unroll count: TripCount=0, MaxTripCount=3, TripMultiple=1
-; UPPER-BOUND-HEURISTIC-NEXT: Trying pragma unroll...
-; UPPER-BOUND-HEURISTIC-NEXT: Trying full unroll...
-; UPPER-BOUND-HEURISTIC-NEXT: Trying upper-bound unroll...
-; UPPER-BOUND-HEURISTIC-NEXT:  Unrolling: size {{[0-9]+}} < threshold {{[0-9]+}}.
-; UPPER-BOUND-HEURISTIC-NEXT:  Exiting block %for.body: TripCount=0, TripMultiple=1, BreakoutTrip=1
-; UPPER-BOUND-HEURISTIC-NEXT:COMPLETELY UNROLLING loop %for.body with trip count 3!
-; UPPER-BOUND-HEURISTIC-NEXT:remark: <unknown>:0:0: completely unrolled loop with 3 iterations
-
-define i32 @upper_bound_heuristic(ptr %A, i32 %n) {
-entry:
-  ; Clamp n to max of 3, so MaxTripCount will be 3
-  %n.clamped = and i32 %n, 3
-  br label %for.body
-
 for.body:
   %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
@@ -1298,36 +919,28 @@ for.body:
   %load = load i32, ptr %arrayidx
   %add = add i32 %sum, %load
   %inc = add i32 %i, 1
-  %cmp = icmp ult i32 %inc, %n.clamped
+  %cmp = icmp ult i32 %inc, 200
   br i1 %cmp, label %for.body, label %exit
 
 exit:
   ret i32 %add
 }
 
-; FULL-COST-NOT-PROFITABLE-LABEL:Loop Unroll: F[pragma_full_cost_not_profitable] Loop %for.body (depth=1)
-; FULL-COST-NOT-PROFITABLE-NEXT:Loop Size = 9
-; FULL-COST-NOT-PROFITABLE-NEXT: Computing unroll count: TripCount=10, MaxTripCount=0, TripMultiple=10
-; FULL-COST-NOT-PROFITABLE-NEXT: Explicit unroll requested: pragma-full
-; FULL-COST-NOT-PROFITABLE-NEXT: Trying pragma unroll...
-; FULL-COST-NOT-PROFITABLE-NEXT:  Won't unroll; trip count is too large.
-; FULL-COST-NOT-PROFITABLE-NEXT:remark: <unknown>:0:0: unable to fully unroll loop: trip count 10 exceeds limit 8
-; FULL-COST-NOT-PROFITABLE-NEXT: Trying full unroll...
-; FULL-COST-NOT-PROFITABLE-NEXT:  Unrolled size {{[0-9]+}} exceeds threshold 20; checking for cost benefit.
-; FULL-COST-NOT-PROFITABLE-NEXT:   Starting LoopUnroll profitability analysis...
-; FULL-COST-NOT-PROFITABLE:   Analysis finished:
-; FULL-COST-NOT-PROFITABLE-NEXT:   UnrolledCost: {{[0-9]+}}, RolledDynamicCost: {{[0-9]+}}
-; FULL-COST-NOT-PROFITABLE-NEXT:  Not unrolling: cost {{[0-9]+}} >= boosted threshold {{[0-9]+}}.
-; FULL-COST-NOT-PROFITABLE-NEXT:remark: <unknown>:0:0: unable to fully unroll loop: estimated unrolled cost {{[0-9]+}} exceeds boosted threshold {{[0-9]+}}
-; FULL-COST-NOT-PROFITABLE-NEXT: Trying upper-bound unroll...
-; FULL-COST-NOT-PROFITABLE-NEXT: Trying loop peeling...
-; FULL-COST-NOT-PROFITABLE-NEXT: Trying partial unroll...
-; FULL-COST-NOT-PROFITABLE-NEXT:  Partially unrolling with count: 10
-; FULL-COST-NOT-PROFITABLE-NEXT:  Exiting block %for.body: TripCount=10, TripMultiple=0, BreakoutTrip=0
-; FULL-COST-NOT-PROFITABLE-NEXT:COMPLETELY UNROLLING loop %for.body with trip count 10!
-; FULL-COST-NOT-PROFITABLE-NEXT:remark: <unknown>:0:0: completely unrolled loop with 10 iterations
-
-define i32 @pragma_full_cost_not_profitable(ptr %A, ptr %B) {
+; PRAGMA-NOREMAINDER-LABEL:Loop Unroll: F[pragma_count_no_remainder] Loop %for.body (depth=1)
+; PRAGMA-NOREMAINDER-NEXT:Loop Size = 6
+; PRAGMA-NOREMAINDER-NEXT: Computing unroll count: TripCount=10, MaxTripCount=0, TripMultiple=10
+; PRAGMA-NOREMAINDER-NEXT: Explicit unroll requested: pragma-count(3)
+; PRAGMA-NOREMAINDER-NEXT: Trying pragma unroll...
+; PRAGMA-NOREMAINDER-NEXT:  Not unrolling with pragma count 3: remainder not allowed, count does not divide trip multiple 10.
+; PRAGMA-NOREMAINDER-NEXT:remark: <unknown>:0:0: may be unable to unroll loop with count 3: remainder loop is not allowed and count does not divide trip multiple 10
+; PRAGMA-NOREMAINDER-NEXT: Trying full unroll...
+; PRAGMA-NOREMAINDER-NEXT:  Unrolling: size {{[0-9]+}} < threshold {{[0-9]+}}.
+; PRAGMA-NOREMAINDER-NEXT:  Exiting block %for.body: TripCount=10, TripMultiple=0, BreakoutTrip=0
+; PRAGMA-NOREMAINDER-NEXT:COMPLETELY UNROLLING loop %for.body with trip count 10!
+; PRAGMA-NOREMAINDER-NEXT:remark: <unknown>:0:0: completely unrolled loop with 10 iterations
+; PRAGMA-NOREMAINDER-NEXT:remark: <unknown>:0:0: unable to unroll loop with requested count 3; unrolled by factor 10
+
+define i32 @pragma_count_no_remainder(ptr %A) {
 entry:
   br label %for.body
 
@@ -1336,198 +949,59 @@ for.body:
   %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
   %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
   %load = load i32, ptr %arrayidx
-  %arrayidx2 = getelementptr inbounds i32, ptr %B, i32 %i
-  %load2 = load i32, ptr %arrayidx2
-  %mul = mul i32 %load, %load2
-  %add = add i32 %sum, %mul
+  %add = add i32 %sum, %load
   %inc = add i32 %i, 1
   %cmp = icmp ult i32 %inc, 10
-  br i1 %cmp, label %for.body, label %exit, !llvm.loop !0
+  br i1 %cmp, label %for.body, label %exit, !llvm.loop !14
 
 exit:
   ret i32 %add
 }
 
-; =============================================================================
-; Below are regression tests for edge cases in loop unrolling remarks.
-; =============================================================================
-
-; Test that a loop with multiple exits where one has a known trip count and
-; another has an unknown trip count is NOT labeled as upper-bound unroll.
-; CHECK-LABEL:Loop Unroll: F[multi_exit_known_and_unknown] Loop %for.header (depth=1)
-; CHECK-NEXT:Loop Size = 8
-; CHECK-NEXT: Computing unroll count: TripCount=5, MaxTripCount=0, TripMultiple=5
-; CHECK-NEXT: Trying pragma unroll...
-; CHECK-NEXT: Trying full unroll...
-; CHECK-NEXT:  Unrolling: size {{[0-9]+}} < threshold {{[0-9]+}}.
-; CHECK-NEXT:  Exiting block %for.header: TripCount=5, TripMultiple=0, BreakoutTrip=0
-; CHECK-NEXT:  Exiting block %for.body: TripCount=0, TripMultiple=1, BreakoutTrip=1
-; Note: This is a full unroll (not upper-bound) because we have a known trip count exit.
-; CHECK-NEXT:COMPLETELY UNROLLING loop %for.header with trip count 5!
-; CHECK-NEXT:remark: <unknown>:0:0: completely unrolled loop with 5 iterations
-
-define i32 @multi_exit_known_and_unknown(ptr %A, i1 %cond) {
-entry:
-  br label %for.header
-
-for.header:
-  %i = phi i32 [ 0, %entry ], [ %inc, %for.latch ]
-  %sum = phi i32 [ 0, %entry ], [ %add, %for.latch ]
-  ; This exit has known trip count = 5
-  %cmp = icmp ult i32 %i, 4
-  br i1 %cmp, label %for.body, label %exit
-
-for.body:
-  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
-  %load = load i32, ptr %arrayidx
-  %add = add i32 %sum, %load
-  ; This exit has unknown trip count (depends on runtime condition)
-  br i1 %cond, label %for.latch, label %exit
-
-for.latch:
-  %inc = add i32 %i, 1
-  br label %for.header
-
-exit:
-  %result = phi i32 [ %sum, %for.header ], [ %add, %for.body ]
-  ret i32 %result
-}
-
-; Test header-exiting while-style loop with partial unroll and remainder.
-; The latch is NOT the exiting block (unconditional branch), but the header is.
-; PARTIAL-UNROLL-LABEL:Loop Unroll: F[header_exit_with_remainder] Loop %while.header (depth=1)
-; PARTIAL-UNROLL-NEXT:Loop Size = 8
-; PARTIAL-UNROLL-NEXT: Computing unroll count: TripCount=11, MaxTripCount=0, TripMultiple=11
-; PARTIAL-UNROLL-NEXT: Explicit unroll requested: user-count
-; PARTIAL-UNROLL-NEXT: Trying pragma unroll...
-; PARTIAL-UNROLL-NEXT:  Unrolling with user-specified count: 4.
-; PARTIAL-UNROLL-NEXT:Attempting unroll by factor 4 with remainder loop (trip count 11)
-; PARTIAL-UNROLL-NEXT:remark: <unknown>:0:0: attempting unroll by factor 4 with remainder loop (trip count 11)
-; PARTIAL-UNROLL-NEXT:  Exiting block %while.header: TripCount=11, TripMultiple=0, BreakoutTrip=3
-; Note: Should say "with remainder" even though the latch is not the exiting block.
-; PARTIAL-UNROLL-NEXT:UNROLLING loop %while.header by 4!
-; PARTIAL-UNROLL-NEXT:remark: <unknown>:0:0: unrolled loop by a factor of 4
-
-define i32 @header_exit_with_remainder(ptr %A) {
-entry:
-  br label %while.header
-
-while.header:
-  %i = phi i32 [ 0, %entry ], [ %inc, %while.latch ]
-  %sum = phi i32 [ 0, %entry ], [ %add, %while.latch ]
-  ; Exit is in the header (while-style loop)
-  %cmp = icmp ult i32 %i, 10
-  br i1 %cmp, label %while.body, label %exit
-
-while.body:
-  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
-  %load = load i32, ptr %arrayidx
-  %add = add i32 %sum, %load
-  br label %while.latch
-
-while.latch:
-  ; Latch is NOT an exiting block - unconditional branch
-  %inc = add i32 %i, 1
-  br label %while.header
-
-exit:
-  ret i32 %sum
-}
-
-; CHECK-LABEL:Loop Unroll: F[switch_exit_full_unroll_bug] Loop %loop (depth=1)
+; CHECK-LABEL:Loop Unroll: F[header_address_taken] Loop %for.body (depth=1)
 ; CHECK-NEXT:Loop Size = 6
-; CHECK-NEXT: Computing unroll count: TripCount=5, MaxTripCount=0, TripMultiple=5
+; CHECK-NEXT: Computing unroll count: TripCount=4, MaxTripCount=0, TripMultiple=4
+; CHECK-NEXT: Explicit unroll requested: pragma-enable
 ; CHECK-NEXT: Trying pragma unroll...
 ; CHECK-NEXT: Trying full unroll...
 ; CHECK-NEXT:  Unrolling: size {{[0-9]+}} < threshold {{[0-9]+}}.
-; CHECK-NEXT:COMPLETELY UNROLLING loop %loop with trip count 5!
-; CHECK-NEXT:remark: <unknown>:0:0: completely unrolled loop with 5 iterations
-
-define i32 @switch_exit_full_unroll_bug(ptr %A) {
-entry:
-  br label %loop
-
-loop:
-  %i = phi i32 [ 0, %entry ], [ %inc, %loop.latch ]
-  %sum = phi i32 [ 0, %entry ], [ %add, %loop.latch ]
-  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
-  %load = load i32, ptr %arrayidx
-  %add = add i32 %sum, %load
-  %inc = add i32 %i, 1
-  ; Switch exit - not a branch, so won't be in ExitInfos
-  switch i32 %i, label %loop.latch [
-    i32 4, label %exit
-  ]
-
-loop.latch:
-  br label %loop
-
-exit:
-  ret i32 %add
-}
+; CHECK-NEXT:  Won't unroll loop: address of header block is taken.
+; CHECK-NEXT: Failed to unroll loop as explicitly requested.
+; CHECK-NEXT:remark: <unknown>:0:0: failed to unroll loop as explicitly requested
 
-; PARTIAL-UNROLL-LABEL:Loop Unroll: F[switch_exit_partial_remainder_bug] Loop %loop (depth=1)
-; PARTIAL-UNROLL-NEXT:Loop Size = 6
-; PARTIAL-UNROLL-NEXT: Computing unroll count: TripCount=11, MaxTripCount=0, TripMultiple=11
-; PARTIAL-UNROLL-NEXT: Explicit unroll requested: user-count
-; PARTIAL-UNROLL-NEXT: Trying pragma unroll...
-; PARTIAL-UNROLL-NEXT:  Unrolling with user-specified count: 4.
-; PARTIAL-UNROLL-NEXT:Attempting unroll by factor 4 with remainder loop (trip count 11)
-; PARTIAL-UNROLL-NEXT:remark: <unknown>:0:0: attempting unroll by factor 4 with remainder loop (trip count 11)
-; PARTIAL-UNROLL-NEXT:UNROLLING loop %loop by 4!
-; PARTIAL-UNROLL-NEXT:remark: <unknown>:0:0: unrolled loop by a factor of 4
-
-define i32 @switch_exit_partial_remainder_bug(ptr %A) {
+define i32 @header_address_taken(ptr %A) {
 entry:
-  br label %loop
+  store ptr blockaddress(@header_address_taken, %for.body), ptr %A
+  br label %for.body
 
-loop:
-  %i = phi i32 [ 0, %entry ], [ %inc, %loop.latch ]
-  %sum = phi i32 [ 0, %entry ], [ %add, %loop.latch ]
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
   %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
   %load = load i32, ptr %arrayidx
   %add = add i32 %sum, %load
   %inc = add i32 %i, 1
-  ; Switch exit with trip count 11
-  switch i32 %i, label %loop.latch [
-    i32 10, label %exit
-  ]
-
-loop.latch:
-  br label %loop
+  %cmp = icmp ult i32 %inc, 4
+  br i1 %cmp, label %for.body, label %exit, !llvm.loop !15
 
 exit:
   ret i32 %add
 }
 
-; Metadata definitions
-!0 = distinct !{!0, !5}
-!1 = distinct !{!1, !6}
-!2 = distinct !{!2, !7}
-!3 = distinct !{!3, !8}
-!4 = distinct !{!4, !9}
-!5 = !{!"llvm.loop.unroll.full"}
-!6 = !{!"llvm.loop.unroll.enable"}
-!7 = !{!"llvm.loop.unroll.count", i32 4}
+!0 = distinct !{!0, !3}
+!1 = distinct !{!1, !4}
+!2 = distinct !{!2, !4}
+!3 = !{!"llvm.loop.unroll.full"}
+!4 = !{!"llvm.loop.unroll.enable"}
+!5 = distinct !{!5, !6}
+!6 = !{!"llvm.loop.unroll.count", i32 3}
+!7 = distinct !{!7, !8}
 !8 = !{!"llvm.loop.unroll.disable"}
-!9 = !{!"llvm.loop.unroll.runtime.disable"}
-!10 = distinct !{!10, !7}
-!11 = distinct !{!11, !6}
-!12 = distinct !{!12, !6}
-!13 = distinct !{!13, !7}
-
-; Debug info for inlined loop test
-!llvm.dbg.cu = !{!15}
-!llvm.module.flags = !{!19}
-
-!15 = distinct !DICompileUnit(language: DW_LANG_C99, file: !16, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug)
-!16 = !DIFile(filename: "caller.c", directory: "/tmp")
-!17 = !DIFile(filename: "inlined.c", directory: "/tmp")
-!18 = distinct !DISubprogram(name: "inlined_func", scope: !17, file: !17, line: 1, type: !24, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: true, unit: !15)
-!19 = !{i32 2, !"Debug Info Version", i32 3}
-!20 = distinct !DISubprogram(name: "caller_with_inlined_loop", scope: !16, file: !16, line: 8, type: !24, isLocal: false, isDefinition: true, scopeLine: 8, isOptimized: true, unit: !15)
-!21 = !DILocation(line: 10, column: 3, scope: !20)
-!22 = !DILocation(line: 5, column: 3, scope: !18, inlinedAt: !21)
-!23 = distinct !{!23, !22, !6}
-!24 = !DISubroutineType(types: !25)
-!25 = !{null}
+!9 = distinct !{!9, !4}
+!10 = !{!"llvm.loop.unroll.runtime.disable"}
+!11 = distinct !{!11, !10}
+!12 = distinct !{!12, !3}
+!13 = distinct !{!13, !4}
+!14 = distinct !{!14, !6}
+!15 = distinct !{!15, !4}
+!16 = distinct !{!16, !4}
diff --git a/llvm/test/Transforms/LoopUnroll/debug.ll b/llvm/test/Transforms/LoopUnroll/debug.ll
deleted file mode 100644
index ba98003cc2af2..0000000000000
--- a/llvm/test/Transforms/LoopUnroll/debug.ll
+++ /dev/null
@@ -1,922 +0,0 @@
-; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll < %s 2>&1 | FileCheck %s --match-full-lines --strict-whitespace
-; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -unroll-allow-partial < %s 2>&1 | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=PARTIAL-ALLOW
-; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -unroll-count=4 < %s 2>&1 | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=USER-COUNT
-; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -unroll-count=9999 < %s 2>&1 | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=USER-COUNT-EXCEED
-; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -unroll-peel-count=2 < %s 2>&1 | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=EXPLICIT-PEEL
-; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -unroll-threshold=0 < %s 2>&1 | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=ZERO-THRESH
-; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -unroll-full-max-count=2 < %s 2>&1 | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=MAX-COUNT
-; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -unroll-allow-partial -unroll-partial-threshold=4 < %s 2>&1 | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=PARTIAL-NOPROFIT
-; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -unroll-allow-remainder=false < %s 2>&1 | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=PRAGMA-NOREMAINDER
-
-; REQUIRES: asserts
-
-; CHECK-LABEL:Loop Unroll: F[pragma_full_unroll_unknown_tc] Loop %for.body (depth=1)
-; CHECK-NEXT:Loop Size = 6
-; CHECK-NEXT: Computing unroll count: TripCount=0, MaxTripCount=2147483647, TripMultiple=1
-; CHECK-NEXT: Explicit unroll requested: pragma-full
-; CHECK-NEXT: Trying pragma unroll...
-; CHECK-NEXT:  Not fully unrolling: unknown trip count.
-; CHECK-NEXT: Trying full unroll...
-; CHECK-NEXT: Trying upper-bound unroll...
-; CHECK-NEXT: Trying loop peeling...
-; CHECK-NEXT: Trying partial unroll...
-; CHECK-NEXT: Not fully unrolling as directed: loop has runtime trip count.
-; CHECK-NEXT: Trying runtime unroll...
-; CHECK-NEXT:  Will not try to unroll loop with runtime trip count because -unroll-runtime not given
-; CHECK-NEXT: Not unrolling: no viable strategy found.
-
-define i32 @pragma_full_unroll_unknown_tc(ptr %A, i32 %n) {
-entry:
-  %cmp.entry = icmp sgt i32 %n, 0
-  br i1 %cmp.entry, label %for.body, label %exit
-
-for.body:
-  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
-  %load = load i32, ptr %arrayidx
-  %add = add i32 %sum, %load
-  %inc = add i32 %i, 1
-  %cmp = icmp slt i32 %inc, %n
-  br i1 %cmp, label %for.body, label %exit, !llvm.loop !0
-
-exit:
-  %result = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  ret i32 %result
-}
-
-; CHECK-LABEL:Loop Unroll: F[full_unroll_cost_exceeds] Loop %for.body (depth=1)
-; CHECK-NEXT:Loop Size = 6
-; CHECK-NEXT: Computing unroll count: TripCount=100, MaxTripCount=0, TripMultiple=100
-; CHECK-NEXT: Trying pragma unroll...
-; CHECK-NEXT: Trying full unroll...
-; CHECK-NEXT:  Unrolled size {{[0-9]+}} exceeds threshold {{[0-9]+}}; checking for cost benefit.
-; CHECK-NEXT:   Not analyzing loop cost: trip count too large.
-; CHECK-NEXT:  Skipping: cost analysis unavailable.
-; CHECK-NEXT: Trying upper-bound unroll...
-; CHECK-NEXT: Trying loop peeling...
-; CHECK-NEXT: Trying partial unroll...
-; CHECK-NEXT:  Will not try to unroll partially because -unroll-allow-partial not given
-; CHECK-NEXT: Not unrolling: no viable strategy found.
-
-define i32 @full_unroll_cost_exceeds(ptr %A) {
-entry:
-  br label %for.body
-
-for.body:
-  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
-  %load = load i32, ptr %arrayidx
-  %add = add i32 %sum, %load
-  %inc = add i32 %i, 1
-  %cmp = icmp ult i32 %inc, 100
-  br i1 %cmp, label %for.body, label %exit
-
-exit:
-  ret i32 %add
-}
-
-; CHECK-LABEL:Loop Unroll: F[extended_convergence] Loop %for.body (depth=1)
-; CHECK-NEXT: Not unrolling: contains convergent operations.
-
-declare void @convergent_func() convergent
-declare token @llvm.experimental.convergence.anchor()
-
-define i32 @extended_convergence(ptr %A, i32 %n) {
-entry:
-  br label %for.body, !llvm.loop !1
-
-for.body:
-  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %tok = call token @llvm.experimental.convergence.anchor()
-  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
-  %load = load i32, ptr %arrayidx
-  %add = add i32 %sum, %load
-  %inc = add i32 %i, 1
-  %cmp = icmp slt i32 %inc, %n
-  br i1 %cmp, label %for.body, label %exit, !llvm.loop !1
-
-exit:
-  call void @convergent_func() [ "convergencectrl"(token %tok) ]
-  ret i32 %add
-}
-
-; CHECK-LABEL:Loop Unroll: F[noduplicate_prevents_unroll] Loop %for.body (depth=1)
-; CHECK-NEXT: Not unrolling: contains non-duplicatable instructions.
-
-declare void @noduplicate_func() noduplicate
-
-define i32 @noduplicate_prevents_unroll(ptr %A) {
-entry:
-  br label %for.body
-
-for.body:
-  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
-  %load = load i32, ptr %arrayidx
-  call void @noduplicate_func()
-  %add = add i32 %sum, %load
-  %inc = add i32 %i, 1
-  %cmp = icmp ult i32 %inc, 8
-  br i1 %cmp, label %for.body, label %exit, !llvm.loop !1
-
-exit:
-  ret i32 %add
-}
-
-; CHECK-LABEL:Loop Unroll: F[indirectbr_loop] Loop %for.body (depth=1)
-; CHECK-NEXT: Not unrolling loop which is not in loop-simplify form.
-
-define i32 @indirectbr_loop(ptr %A, ptr %target) {
-entry:
-  indirectbr ptr %target, [label %for.body, label %exit]
-
-for.body:
-  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
-  %load = load i32, ptr %arrayidx
-  %add = add i32 %sum, %load
-  %inc = add i32 %i, 1
-  %cmp = icmp ult i32 %inc, 10
-  br i1 %cmp, label %for.body, label %exit, !llvm.loop !1
-
-exit:
-  %result = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  ret i32 %result
-}
-
-; CHECK-LABEL:Loop Unroll: F[inline_prevents_unroll] Loop %for.body (depth=1)
-; CHECK-NEXT:Loop Size = 8
-; CHECK-NEXT: Not unrolling loop with inlinable calls.
-
-define internal i32 @single_use_helper(i32 %x) {
-  %add = add i32 %x, 42
-  ret i32 %add
-}
-
-define i32 @inline_prevents_unroll(ptr %A) {
-entry:
-  br label %for.body
-
-for.body:
-  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
-  %load = load i32, ptr %arrayidx
-  %helper_result = call i32 @single_use_helper(i32 %load)
-  %add = add i32 %sum, %helper_result
-  %inc = add i32 %i, 1
-  %cmp = icmp ult i32 %inc, 10
-  br i1 %cmp, label %for.body, label %exit, !llvm.loop !2
-
-exit:
-  ret i32 %add
-}
-
-; CHECK-LABEL:Loop Unroll: F[full_unroll_profitability_analysis] Loop %for.body (depth=1)
-; CHECK-NEXT:Loop Size = {{[0-9]+}}
-; CHECK-NEXT: Computing unroll count: TripCount=10, MaxTripCount=0, TripMultiple=10
-; CHECK-NEXT: Trying pragma unroll...
-; CHECK-NEXT: Trying full unroll...
-; CHECK-NEXT:  Unrolled size {{[0-9]+}} exceeds threshold {{[0-9]+}}; checking for cost benefit.
-; CHECK-NEXT:   Starting LoopUnroll profitability analysis...
-; CHECK-NEXT:   Analyzing iteration 0
-; CHECK-NEXT:   Analyzing iteration 1
-; CHECK-NEXT:   Analyzing iteration 2
-; CHECK-NEXT:   Analyzing iteration 3
-; CHECK-NEXT:   Analyzing iteration 4
-; CHECK-NEXT:   Analyzing iteration 5
-; CHECK-NEXT:   Analyzing iteration 6
-; CHECK-NEXT:   Analyzing iteration 7
-; CHECK-NEXT:   Analyzing iteration 8
-; CHECK-NEXT:   Analyzing iteration 9
-; CHECK:   Analysis finished:
-; CHECK-NEXT:   UnrolledCost: {{[0-9]+}}, RolledDynamicCost: {{[0-9]+}}
-; CHECK-NEXT:  Profitable after cost analysis.
-; CHECK-NEXT:  Exiting block %for.body: TripCount=10, TripMultiple=0, BreakoutTrip=0
-; CHECK-NEXT:COMPLETELY UNROLLING loop %for.body with trip count 10!
-
-define i32 @full_unroll_profitability_analysis(ptr %A, ptr %B) {
-entry:
-  br label %for.body
-
-for.body:
-  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %sum = phi i32 [ 0, %entry ], [ %result, %for.body ]
-  %idxA = getelementptr inbounds i32, ptr %A, i32 %i
-  %loadA = load i32, ptr %idxA
-  %idxB = getelementptr inbounds i32, ptr %B, i32 %i
-  %loadB = load i32, ptr %idxB
-  %mul1 = mul i32 %loadA, %loadB
-  %add1 = add i32 %mul1, %loadA
-  %mul2 = mul i32 %add1, %loadB
-  %sub1 = sub i32 %mul2, %loadA
-  %add2 = add i32 %sub1, %loadB
-  %mul3 = mul i32 %add2, %loadA
-  %sub2 = sub i32 %mul3, %loadB
-  %xor1 = xor i32 %sub2, %loadA
-  %or1 = or i32 %xor1, %loadB
-  %result = add i32 %sum, %or1
-  %inc = add i32 %i, 1
-  %cmp = icmp ult i32 %inc, 10
-  br i1 %cmp, label %for.body, label %exit
-
-exit:
-  ret i32 %result
-}
-
-; CHECK-LABEL:Loop Unroll: F[cost_exceed_boosted_threshold] Loop %for.body (depth=1)
-; CHECK-NEXT:Loop Size = {{[0-9]+}}
-; CHECK-NEXT: Computing unroll count: TripCount=10, MaxTripCount=0, TripMultiple=10
-; CHECK-NEXT: Trying pragma unroll...
-; CHECK-NEXT: Trying full unroll...
-; CHECK-NEXT:  Unrolled size {{[0-9]+}} exceeds threshold {{[0-9]+}}; checking for cost benefit.
-; CHECK-NEXT:   Starting LoopUnroll profitability analysis...
-; CHECK-NEXT:   Analyzing iteration 0
-; CHECK-NEXT:   Analyzing iteration 1
-; CHECK-NEXT:   Analyzing iteration 2
-; CHECK-NEXT:   Analyzing iteration 3
-; CHECK-NEXT:   Analyzing iteration 4
-; CHECK-NEXT:   Analyzing iteration 5
-; CHECK-NEXT:   Analyzing iteration 6
-; CHECK-NEXT:   Analyzing iteration 7
-; CHECK-NEXT:   Analyzing iteration 8
-; CHECK-NEXT:   Analyzing iteration 9
-; CHECK:   Analysis finished:
-; CHECK-NEXT:   UnrolledCost: {{[0-9]+}}, RolledDynamicCost: {{[0-9]+}}
-; CHECK-NEXT:  Not unrolling: cost {{[0-9]+}} >= boosted threshold {{[0-9]+}}.
-; CHECK-NEXT: Trying upper-bound unroll...
-; CHECK-NEXT: Trying loop peeling...
-; CHECK-NEXT: Trying partial unroll...
-; CHECK-NEXT:  Will not try to unroll partially because -unroll-allow-partial not given
-; CHECK-NEXT: Not unrolling: no viable strategy found.
-
-define i32 @cost_exceed_boosted_threshold(ptr %A, ptr %B) {
-entry:
-  br label %for.body
-
-for.body:
-  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %sum = phi i32 [ 0, %entry ], [ %result, %for.body ]
-  %idxA = getelementptr inbounds i32, ptr %A, i32 %i
-  %loadA = load i32, ptr %idxA
-  %idxB = getelementptr inbounds i32, ptr %B, i32 %i
-  %loadB = load i32, ptr %idxB
-  %mul1 = mul i32 %loadA, %loadB
-  %add1 = add i32 %mul1, %loadA
-  %mul2 = mul i32 %add1, %loadB
-  %sub1 = sub i32 %mul2, %loadA
-  %add2 = add i32 %sub1, %loadB
-  %mul3 = mul i32 %add2, %loadA
-  %sub2 = sub i32 %mul3, %loadB
-  %xor1 = xor i32 %sub2, %loadA
-  %or1 = or i32 %xor1, %loadB
-  %and1 = and i32 %or1, %loadA
-  %shl1 = shl i32 %and1, 2
-  %ashr1 = ashr i32 %shl1, 1
-  %mul4 = mul i32 %ashr1, %loadB
-  %add3 = add i32 %mul4, %loadA
-  %xor2 = xor i32 %add3, %loadB
-  %result = add i32 %sum, %xor2
-  %inc = add i32 %i, 1
-  %cmp = icmp ult i32 %inc, 10
-  br i1 %cmp, label %for.body, label %exit
-
-exit:
-  ret i32 %result
-}
-
-; CHECK-LABEL:Loop Unroll: F[full_unroll_size_under_threshold] Loop %for.body (depth=1)
-; CHECK-NEXT:Loop Size = 6
-; CHECK-NEXT: Computing unroll count: TripCount=4, MaxTripCount=0, TripMultiple=4
-; CHECK-NEXT: Trying pragma unroll...
-; CHECK-NEXT: Trying full unroll...
-; CHECK-NEXT:  Unrolling: size {{[0-9]+}} < threshold {{[0-9]+}}.
-; CHECK-NEXT:  Exiting block %for.body: TripCount=4, TripMultiple=0, BreakoutTrip=0
-; CHECK-NEXT:COMPLETELY UNROLLING loop %for.body with trip count 4!
-
-define i32 @full_unroll_size_under_threshold(ptr %A) {
-entry:
-  br label %for.body
-
-for.body:
-  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
-  %load = load i32, ptr %arrayidx
-  %add = add i32 %sum, %load
-  %inc = add i32 %i, 1
-  %cmp = icmp ult i32 %inc, 4
-  br i1 %cmp, label %for.body, label %exit
-
-exit:
-  ret i32 %add
-}
-
-; CHECK-LABEL:Loop Unroll: F[pragma_full_known_tc] Loop %for.body (depth=1)
-; CHECK-NEXT:Loop Size = 6
-; CHECK-NEXT: Computing unroll count: TripCount=6, MaxTripCount=0, TripMultiple=6
-; CHECK-NEXT: Explicit unroll requested: pragma-full
-; CHECK-NEXT: Trying pragma unroll...
-; CHECK-NEXT:  Fully unrolling with trip count: 6.
-; CHECK-NEXT:  Exiting block %for.body: TripCount=6, TripMultiple=0, BreakoutTrip=0
-; CHECK-NEXT:COMPLETELY UNROLLING loop %for.body with trip count 6!
-
-define i32 @pragma_full_known_tc(ptr %A) {
-entry:
-  br label %for.body
-
-for.body:
-  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
-  %load = load i32, ptr %arrayidx
-  %add = add i32 %sum, %load
-  %inc = add i32 %i, 1
-  %cmp = icmp ult i32 %inc, 6
-  br i1 %cmp, label %for.body, label %exit, !llvm.loop !0
-
-exit:
-  ret i32 %add
-}
-
-; CHECK-LABEL:Loop Unroll: F[pragma_count_unroll] Loop %for.body (depth=1)
-; CHECK-NEXT:Loop Size = 6
-; CHECK-NEXT: Computing unroll count: TripCount=12, MaxTripCount=0, TripMultiple=12
-; CHECK-NEXT: Explicit unroll requested: pragma-count(3)
-; CHECK-NEXT: Trying pragma unroll...
-; CHECK-NEXT:  Unrolling with pragma count: 3.
-; CHECK-NEXT:  Exiting block %for.body: TripCount=12, TripMultiple=0, BreakoutTrip=0
-; CHECK-NEXT:UNROLLING loop %for.body by 3!
-
-define i32 @pragma_count_unroll(ptr %A) {
-entry:
-  br label %for.body
-
-for.body:
-  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
-  %load = load i32, ptr %arrayidx
-  %add = add i32 %sum, %load
-  %inc = add i32 %i, 1
-  %cmp = icmp ult i32 %inc, 12
-  br i1 %cmp, label %for.body, label %exit, !llvm.loop !5
-
-exit:
-  ret i32 %add
-}
-
-; CHECK-LABEL:Loop Unroll: F[no_viable_strategy] Loop %for.body (depth=1)
-; CHECK-NEXT:Loop Size = 6
-; CHECK-NEXT: Computing unroll count: TripCount=0, MaxTripCount=2147483647, TripMultiple=1
-; CHECK-NEXT: Trying pragma unroll...
-; CHECK-NEXT: Trying full unroll...
-; CHECK-NEXT: Trying upper-bound unroll...
-; CHECK-NEXT: Trying loop peeling...
-; CHECK-NEXT: Trying partial unroll...
-; CHECK-NEXT: Trying runtime unroll...
-; CHECK-NEXT:  Will not try to unroll loop with runtime trip count because -unroll-runtime not given
-; CHECK-NEXT: Not unrolling: no viable strategy found.
-
-define i32 @no_viable_strategy(ptr %A, i32 %n) {
-entry:
-  %cmp.entry = icmp sgt i32 %n, 0
-  br i1 %cmp.entry, label %for.body, label %exit
-
-for.body:
-  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
-  %load = load i32, ptr %arrayidx
-  %add = add i32 %sum, %load
-  %inc = add i32 %i, 1
-  %cmp = icmp slt i32 %inc, %n
-  br i1 %cmp, label %for.body, label %exit
-
-exit:
-  %result = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  ret i32 %result
-}
-
-; CHECK-LABEL:Loop Unroll: F[disabled_by_metadata] Loop %for.body (depth=1)
-; CHECK-NEXT: Not unrolling: transformation disabled by metadata.
-
-define i32 @disabled_by_metadata(ptr %A) {
-entry:
-  br label %for.body
-
-for.body:
-  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
-  %load = load i32, ptr %arrayidx
-  %add = add i32 %sum, %load
-  %inc = add i32 %i, 1
-  %cmp = icmp ult i32 %inc, 8
-  br i1 %cmp, label %for.body, label %exit, !llvm.loop !7
-
-exit:
-  ret i32 %add
-}
-
-; CHECK-LABEL:Loop Unroll: F[upper_bound_unroll] Loop %for.body (depth=1)
-; CHECK-NEXT:Loop Size = 6
-; CHECK-NEXT: Computing unroll count: TripCount=0, MaxTripCount=3, TripMultiple=1
-; CHECK-NEXT: Explicit unroll requested: pragma-enable
-; CHECK-NEXT: Trying pragma unroll...
-; CHECK-NEXT:  Unrolling with max trip count: 3.
-; CHECK-NEXT:  Exiting block %for.body: TripCount=0, TripMultiple=1, BreakoutTrip=1
-; CHECK-NEXT:COMPLETELY UNROLLING loop %for.body with trip count 3!
-
-define i32 @upper_bound_unroll(ptr %A, i32 %n) {
-entry:
-  %masked = and i32 %n, 3
-  %cmp.entry = icmp sgt i32 %masked, 0
-  br i1 %cmp.entry, label %for.body, label %exit
-
-for.body:
-  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
-  %load = load i32, ptr %arrayidx
-  %add = add i32 %sum, %load
-  %inc = add i32 %i, 1
-  %cmp = icmp slt i32 %inc, %masked
-  br i1 %cmp, label %for.body, label %exit, !llvm.loop !9
-
-exit:
-  %result = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  ret i32 %result
-}
-
-; CHECK-LABEL:Loop Unroll: F[runtime_small_max_tc] Loop %for.body (depth=1)
-; CHECK-NEXT:Loop Size = 6
-; CHECK-NEXT: Computing unroll count: TripCount=0, MaxTripCount=3, TripMultiple=1
-; CHECK-NEXT: Trying pragma unroll...
-; CHECK-NEXT: Trying full unroll...
-; CHECK-NEXT: Trying upper-bound unroll...
-; CHECK-NEXT: Trying loop peeling...
-; CHECK-NEXT: Trying partial unroll...
-; CHECK-NEXT: Trying runtime unroll...
-; CHECK-NEXT:  Not runtime unrolling: max trip count 3 is small (< 8) and not forced.
-; CHECK-NEXT: Not unrolling: no viable strategy found.
-
-define i32 @runtime_small_max_tc(ptr %A, i32 %n) {
-entry:
-  %masked = and i32 %n, 3
-  %cmp.entry = icmp sgt i32 %masked, 0
-  br i1 %cmp.entry, label %for.body, label %exit
-
-for.body:
-  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
-  %load = load i32, ptr %arrayidx
-  %add = add i32 %sum, %load
-  %inc = add i32 %i, 1
-  %cmp = icmp slt i32 %inc, %masked
-  br i1 %cmp, label %for.body, label %exit
-
-exit:
-  %result = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  ret i32 %result
-}
-
-; CHECK-LABEL:Loop Unroll: F[runtime_unroll_disabled_pragma] Loop %for.body (depth=1)
-; CHECK-NEXT:Loop Size = 6
-; CHECK-NEXT: Computing unroll count: TripCount=0, MaxTripCount=2147483647, TripMultiple=1
-; CHECK-NEXT: Trying pragma unroll...
-; CHECK-NEXT: Trying full unroll...
-; CHECK-NEXT: Trying upper-bound unroll...
-; CHECK-NEXT: Trying loop peeling...
-; CHECK-NEXT: Trying partial unroll...
-; CHECK-NEXT: Trying runtime unroll...
-; CHECK-NEXT:  Not runtime unrolling: disabled by pragma.
-; CHECK-NEXT: Not unrolling: no viable strategy found.
-
-define i32 @runtime_unroll_disabled_pragma(ptr %A, i32 %n) {
-entry:
-  %cmp.entry = icmp sgt i32 %n, 0
-  br i1 %cmp.entry, label %for.body, label %exit
-
-for.body:
-  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
-  %load = load i32, ptr %arrayidx
-  %add = add i32 %sum, %load
-  %inc = add i32 %i, 1
-  %cmp = icmp slt i32 %inc, %n
-  br i1 %cmp, label %for.body, label %exit, !llvm.loop !11
-
-exit:
-  %result = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  ret i32 %result
-}
-
-; CHECK-LABEL:Loop Unroll: F[heuristic_peel] Loop %for.header (depth=1)
-; CHECK-NEXT:Loop Size = 9
-; CHECK-NEXT: Computing unroll count: TripCount=0, MaxTripCount=2147483647, TripMultiple=1
-; CHECK-NEXT: Trying pragma unroll...
-; CHECK-NEXT: Trying full unroll...
-; CHECK-NEXT: Trying upper-bound unroll...
-; CHECK-NEXT: Trying loop peeling...
-; CHECK-NEXT:  Peeling with count: 1.
-; CHECK-NEXT:PEELING loop %for.header with iteration count 1!
-
-declare void @foo()
-
-define void @heuristic_peel(ptr %A, i32 %n) {
-entry:
-  %cmp.entry = icmp sgt i32 %n, 0
-  br i1 %cmp.entry, label %for.header, label %exit
-
-for.header:
-  %i = phi i32 [ 0, %entry ], [ %inc, %for.latch ]
-  %cmp.zero = icmp eq i32 %i, 0
-  br i1 %cmp.zero, label %then, label %for.latch
-
-then:
-  call void @foo()
-  br label %for.latch
-
-for.latch:
-  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
-  %load = load i32, ptr %arrayidx
-  %inc = add i32 %i, 1
-  %cmp = icmp slt i32 %inc, %n
-  br i1 %cmp, label %for.header, label %exit
-
-exit:
-  ret void
-}
-
-; CHECK-LABEL:Loop Unroll: F[runtime_unroll_simple] Loop %for.body (depth=1)
-; CHECK-NEXT:Loop Size = 6
-; CHECK-NEXT: Computing unroll count: TripCount=0, MaxTripCount=2147483647, TripMultiple=1
-; CHECK-NEXT: Explicit unroll requested: pragma-enable
-; CHECK-NEXT: Trying pragma unroll...
-; CHECK-NEXT: Trying full unroll...
-; CHECK-NEXT: Trying upper-bound unroll...
-; CHECK-NEXT: Trying loop peeling...
-; CHECK-NEXT: Trying partial unroll...
-; CHECK-NEXT: Trying runtime unroll...
-; CHECK-NEXT:  Runtime unrolling with count: 8
-; CHECK-NEXT:  Exiting block %for.body: TripCount=0, TripMultiple=1, BreakoutTrip=1
-; CHECK:UNROLLING loop %for.body by 8 with run-time trip count!
-
-define i32 @runtime_unroll_simple(ptr %A, i32 %n) {
-entry:
-  %cmp.entry = icmp sgt i32 %n, 0
-  br i1 %cmp.entry, label %for.body, label %exit
-
-for.body:
-  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
-  %load = load i32, ptr %arrayidx
-  %add = add i32 %sum, %load
-  %inc = add i32 %i, 1
-  %cmp = icmp slt i32 %inc, %n
-  br i1 %cmp, label %for.body, label %exit, !llvm.loop !13
-
-exit:
-  %result = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  ret i32 %result
-}
-
-; PARTIAL-ALLOW-LABEL:Loop Unroll: F[partial_unroll_cost_analysis] Loop %for.body (depth=1)
-; PARTIAL-ALLOW-NEXT:Loop Size = 6
-; PARTIAL-ALLOW-NEXT: Computing unroll count: TripCount=200, MaxTripCount=0, TripMultiple=200
-; PARTIAL-ALLOW-NEXT: Trying pragma unroll...
-; PARTIAL-ALLOW-NEXT: Trying full unroll...
-; PARTIAL-ALLOW-NEXT:  Unrolled size {{[0-9]+}} exceeds threshold {{[0-9]+}}; checking for cost benefit.
-; PARTIAL-ALLOW-NEXT:   Not analyzing loop cost: trip count too large.
-; PARTIAL-ALLOW-NEXT:  Skipping: cost analysis unavailable.
-; PARTIAL-ALLOW-NEXT: Trying upper-bound unroll...
-; PARTIAL-ALLOW-NEXT: Trying loop peeling...
-; PARTIAL-ALLOW-NEXT: Trying partial unroll...
-; PARTIAL-ALLOW-NEXT:  Unrolled size exceeds threshold; reducing count from {{[0-9]+}} to {{[0-9]+}}.
-; PARTIAL-ALLOW-NEXT:  Partially unrolling with count: {{[0-9]+}}
-; PARTIAL-ALLOW-NEXT:  Exiting block %for.body: TripCount=200, TripMultiple=0, BreakoutTrip=0
-; PARTIAL-ALLOW-NEXT:UNROLLING loop %for.body by {{[0-9]+}}!
-
-define i32 @partial_unroll_cost_analysis(ptr %A) {
-entry:
-  br label %for.body
-
-for.body:
-  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
-  %load = load i32, ptr %arrayidx
-  %add = add i32 %sum, %load
-  %inc = add i32 %i, 1
-  %cmp = icmp ult i32 %inc, 200
-  br i1 %cmp, label %for.body, label %exit
-
-exit:
-  ret i32 %add
-}
-
-; CHECK-LABEL:Loop Unroll: F[pragma_full_tc_too_large] Loop %for.body (depth=1)
-; CHECK-NEXT:Loop Size = 6
-; CHECK-NEXT: Computing unroll count: TripCount=1000001, MaxTripCount=0, TripMultiple=1000001
-; CHECK-NEXT: Explicit unroll requested: pragma-full
-; CHECK-NEXT: Trying pragma unroll...
-; CHECK-NEXT:  Won't unroll; trip count is too large.
-; CHECK-NEXT: Trying full unroll...
-; CHECK-NEXT:  Unrolled size {{[0-9]+}} exceeds threshold {{[0-9]+}}; checking for cost benefit.
-; CHECK-NEXT:   Not analyzing loop cost: trip count too large.
-; CHECK-NEXT:  Skipping: cost analysis unavailable.
-; CHECK-NEXT: Trying upper-bound unroll...
-; CHECK-NEXT: Trying loop peeling...
-; CHECK-NEXT: Trying partial unroll...
-; CHECK-NEXT:  Unrolled size exceeds threshold; reducing count from {{[0-9]+}} to {{[0-9]+}}.
-; CHECK-NEXT:  Partially unrolling with count: {{[0-9]+}}
-; CHECK-NEXT: Partial unroll instead of full: unrolled size too large. Unrolling {{[0-9]+}} times instead of {{[0-9]+}}.
-; CHECK-NEXT:  Exiting block %for.body: TripCount=1000001, TripMultiple=0, BreakoutTrip=0
-; CHECK-NEXT:UNROLLING loop %for.body by {{[0-9]+}}!
-
-define i32 @pragma_full_tc_too_large(ptr %A) {
-entry:
-  br label %for.body
-
-for.body:
-  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
-  %load = load i32, ptr %arrayidx
-  %add = add i32 %sum, %load
-  %inc = add i32 %i, 1
-  %cmp = icmp ult i32 %inc, 1000001
-  br i1 %cmp, label %for.body, label %exit, !llvm.loop !12
-
-exit:
-  ret i32 %add
-}
-
-; CHECK-LABEL:Loop Unroll: F[nested_loop_cost] Loop %inner (depth=2)
-; CHECK-NEXT:Loop Size = 6
-; CHECK-NEXT: Computing unroll count: TripCount=0, MaxTripCount=2147483647, TripMultiple=1
-; CHECK-NEXT: Trying pragma unroll...
-; CHECK-NEXT: Trying full unroll...
-; CHECK-NEXT: Trying upper-bound unroll...
-; CHECK-NEXT: Trying loop peeling...
-; CHECK-NEXT: Trying partial unroll...
-; CHECK-NEXT: Trying runtime unroll...
-; CHECK-NEXT:  Will not try to unroll loop with runtime trip count because -unroll-runtime not given
-; CHECK-NEXT: Not unrolling: no viable strategy found.
-; CHECK-LABEL:Loop Unroll: F[nested_loop_cost] Loop %outer (depth=1)
-; CHECK-NEXT:Loop Size = {{[0-9]+}}
-; CHECK-NEXT: Computing unroll count: TripCount=10, MaxTripCount=0, TripMultiple=10
-; CHECK-NEXT: Trying pragma unroll...
-; CHECK-NEXT: Trying full unroll...
-; CHECK-NEXT:  Unrolled size {{[0-9]+}} exceeds threshold {{[0-9]+}}; checking for cost benefit.
-; CHECK-NEXT:   Not analyzing loop cost: not an innermost loop.
-; CHECK-NEXT:  Skipping: cost analysis unavailable.
-; CHECK-NEXT: Trying upper-bound unroll...
-; CHECK-NEXT: Trying loop peeling...
-; CHECK-NEXT: Trying partial unroll...
-; CHECK-NEXT:  Will not try to unroll partially because -unroll-allow-partial not given
-; CHECK-NEXT: Not unrolling: no viable strategy found.
-
-define i32 @nested_loop_cost(ptr %A, i32 %n) {
-entry:
-  br label %outer
-
-outer:
-  %i = phi i32 [ 0, %entry ], [ %i.next, %inner.exit ]
-  %sum.outer = phi i32 [ 0, %entry ], [ %sum.inner.lcssa, %inner.exit ]
-  %idxA = getelementptr inbounds i32, ptr %A, i32 %i
-  %loadA = load i32, ptr %idxA
-  %mul1 = mul i32 %loadA, %sum.outer
-  %add1 = add i32 %mul1, %loadA
-  %mul2 = mul i32 %add1, %loadA
-  %sub1 = sub i32 %mul2, %loadA
-  %add2 = add i32 %sub1, %loadA
-  %mul3 = mul i32 %add2, %loadA
-  %sub2 = sub i32 %mul3, %loadA
-  %xor1 = xor i32 %sub2, %loadA
-  %or1 = or i32 %xor1, %loadA
-  %outer.sum = add i32 %sum.outer, %or1
-  br label %inner
-
-inner:
-  %j = phi i32 [ 0, %outer ], [ %j.next, %inner ]
-  %sum.inner = phi i32 [ %outer.sum, %outer ], [ %inner.add, %inner ]
-  %idxB = getelementptr inbounds i32, ptr %A, i32 %j
-  %loadB = load i32, ptr %idxB
-  %inner.add = add i32 %sum.inner, %loadB
-  %j.next = add i32 %j, 1
-  %inner.cmp = icmp slt i32 %j.next, %n
-  br i1 %inner.cmp, label %inner, label %inner.exit
-
-inner.exit:
-  %sum.inner.lcssa = phi i32 [ %inner.add, %inner ]
-  %i.next = add i32 %i, 1
-  %outer.cmp = icmp ult i32 %i.next, 10
-  br i1 %outer.cmp, label %outer, label %exit
-
-exit:
-  ret i32 %sum.inner.lcssa
-}
-
-; USER-COUNT-LABEL:Loop Unroll: F[user_count_unroll] Loop %for.body (depth=1)
-; USER-COUNT-NEXT:Loop Size = 6
-; USER-COUNT-NEXT: Computing unroll count: TripCount=12, MaxTripCount=0, TripMultiple=12
-; USER-COUNT-NEXT: Explicit unroll requested: user-count
-; USER-COUNT-NEXT: Trying pragma unroll...
-; USER-COUNT-NEXT:  Unrolling with user-specified count: 4.
-; USER-COUNT-NEXT:  Exiting block %for.body: TripCount=12, TripMultiple=0, BreakoutTrip=0
-; USER-COUNT-NEXT:UNROLLING loop %for.body by 4!
-;
-; USER-COUNT-EXCEED-LABEL:Loop Unroll: F[user_count_unroll] Loop %for.body (depth=1)
-; USER-COUNT-EXCEED-NEXT:Loop Size = 6
-; USER-COUNT-EXCEED-NEXT: Computing unroll count: TripCount=12, MaxTripCount=0, TripMultiple=12
-; USER-COUNT-EXCEED-NEXT: Explicit unroll requested: user-count
-; USER-COUNT-EXCEED-NEXT: Trying pragma unroll...
-; USER-COUNT-EXCEED-NEXT:  Not unrolling with user count 9999: exceeds threshold.
-; USER-COUNT-EXCEED-NEXT: Trying full unroll...
-; USER-COUNT-EXCEED-NEXT:  Unrolling: size {{[0-9]+}} < threshold {{[0-9]+}}.
-; USER-COUNT-EXCEED-NEXT:  Exiting block %for.body: TripCount=12, TripMultiple=0, BreakoutTrip=0
-; USER-COUNT-EXCEED-NEXT:COMPLETELY UNROLLING loop %for.body with trip count 12!
-
-define i32 @user_count_unroll(ptr %A) {
-entry:
-  br label %for.body
-
-for.body:
-  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
-  %load = load i32, ptr %arrayidx
-  %add = add i32 %sum, %load
-  %inc = add i32 %i, 1
-  %cmp = icmp ult i32 %inc, 12
-  br i1 %cmp, label %for.body, label %exit
-
-exit:
-  ret i32 %add
-}
-
-; EXPLICIT-PEEL-LABEL:Loop Unroll: F[explicit_peel_count] Loop %for.body (depth=1)
-; EXPLICIT-PEEL-NEXT:Loop Size = 6
-; EXPLICIT-PEEL-NEXT: Computing unroll count: TripCount=0, MaxTripCount=2147483647, TripMultiple=1
-; EXPLICIT-PEEL-NEXT:  Using explicit peel count: 2.
-; EXPLICIT-PEEL-NEXT:PEELING loop %for.body with iteration count 2!
-
-define i32 @explicit_peel_count(ptr %A, i32 %n) {
-entry:
-  %cmp.entry = icmp sgt i32 %n, 0
-  br i1 %cmp.entry, label %for.body, label %exit
-
-for.body:
-  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
-  %load = load i32, ptr %arrayidx
-  %add = add i32 %sum, %load
-  %inc = add i32 %i, 1
-  %cmp = icmp slt i32 %inc, %n
-  br i1 %cmp, label %for.body, label %exit
-
-exit:
-  %result = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  ret i32 %result
-}
-
-; ZERO-THRESH-LABEL:Loop Unroll: F[zero_thresh_unroll] Loop %for.body (depth=1)
-; ZERO-THRESH-NEXT: Not unrolling: all thresholds are zero.
-
-define i32 @zero_thresh_unroll(ptr %A) {
-entry:
-  br label %for.body
-
-for.body:
-  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
-  %load = load i32, ptr %arrayidx
-  %add = add i32 %sum, %load
-  %inc = add i32 %i, 1
-  %cmp = icmp ult i32 %inc, 8
-  br i1 %cmp, label %for.body, label %exit
-
-exit:
-  ret i32 %add
-}
-
-; MAX-COUNT-LABEL:Loop Unroll: F[max_count_unroll] Loop %for.body (depth=1)
-; MAX-COUNT-NEXT:Loop Size = 6
-; MAX-COUNT-NEXT: Computing unroll count: TripCount=10, MaxTripCount=0, TripMultiple=10
-; MAX-COUNT-NEXT: Trying pragma unroll...
-; MAX-COUNT-NEXT: Trying full unroll...
-; MAX-COUNT-NEXT:  Not unrolling: trip count 10 exceeds max count 2.
-; MAX-COUNT-NEXT: Trying upper-bound unroll...
-; MAX-COUNT-NEXT: Trying loop peeling...
-; MAX-COUNT-NEXT: Trying partial unroll...
-; MAX-COUNT-NEXT:  Will not try to unroll partially because -unroll-allow-partial not given
-; MAX-COUNT-NEXT: Not unrolling: no viable strategy found.
-
-define i32 @max_count_unroll(ptr %A) {
-entry:
-  br label %for.body
-
-for.body:
-  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
-  %load = load i32, ptr %arrayidx
-  %add = add i32 %sum, %load
-  %inc = add i32 %i, 1
-  %cmp = icmp ult i32 %inc, 10
-  br i1 %cmp, label %for.body, label %exit
-
-exit:
-  ret i32 %add
-}
-
-; PARTIAL-NOPROFIT-LABEL:Loop Unroll: F[partial_no_profit] Loop %for.body (depth=1)
-; PARTIAL-NOPROFIT-NEXT:Loop Size = 6
-; PARTIAL-NOPROFIT-NEXT: Computing unroll count: TripCount=200, MaxTripCount=0, TripMultiple=200
-; PARTIAL-NOPROFIT-NEXT: Trying pragma unroll...
-; PARTIAL-NOPROFIT-NEXT: Trying full unroll...
-; PARTIAL-NOPROFIT-NEXT:  Unrolled size {{[0-9]+}} exceeds threshold {{[0-9]+}}; checking for cost benefit.
-; PARTIAL-NOPROFIT-NEXT:   Not analyzing loop cost: trip count too large.
-; PARTIAL-NOPROFIT-NEXT:  Skipping: cost analysis unavailable.
-; PARTIAL-NOPROFIT-NEXT: Trying upper-bound unroll...
-; PARTIAL-NOPROFIT-NEXT: Trying loop peeling...
-; PARTIAL-NOPROFIT-NEXT: Trying partial unroll...
-; PARTIAL-NOPROFIT-NEXT:  Unrolled size exceeds threshold; reducing count from {{[0-9]+}} to {{[0-9]+}}.
-; PARTIAL-NOPROFIT-NEXT:  Will not partially unroll: no profitable count.
-; PARTIAL-NOPROFIT-NEXT:  Partially unrolling with count: 0
-; PARTIAL-NOPROFIT-NEXT: Not unrolling: no viable strategy found.
-
-define i32 @partial_no_profit(ptr %A) {
-entry:
-  br label %for.body
-
-for.body:
-  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
-  %load = load i32, ptr %arrayidx
-  %add = add i32 %sum, %load
-  %inc = add i32 %i, 1
-  %cmp = icmp ult i32 %inc, 200
-  br i1 %cmp, label %for.body, label %exit
-
-exit:
-  ret i32 %add
-}
-
-; PRAGMA-NOREMAINDER-LABEL:Loop Unroll: F[pragma_count_no_remainder] Loop %for.body (depth=1)
-; PRAGMA-NOREMAINDER-NEXT:Loop Size = 6
-; PRAGMA-NOREMAINDER-NEXT: Computing unroll count: TripCount=10, MaxTripCount=0, TripMultiple=10
-; PRAGMA-NOREMAINDER-NEXT: Explicit unroll requested: pragma-count(3)
-; PRAGMA-NOREMAINDER-NEXT: Trying pragma unroll...
-; PRAGMA-NOREMAINDER-NEXT:  Not unrolling with pragma count 3: remainder not allowed, count does not divide trip multiple 10.
-; PRAGMA-NOREMAINDER-NEXT: Trying full unroll...
-; PRAGMA-NOREMAINDER-NEXT:  Unrolling: size {{[0-9]+}} < threshold {{[0-9]+}}.
-; PRAGMA-NOREMAINDER-NEXT:  Exiting block %for.body: TripCount=10, TripMultiple=0, BreakoutTrip=0
-; PRAGMA-NOREMAINDER-NEXT:COMPLETELY UNROLLING loop %for.body with trip count 10!
-
-define i32 @pragma_count_no_remainder(ptr %A) {
-entry:
-  br label %for.body
-
-for.body:
-  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i
-  %load = load i32, ptr %arrayidx
-  %add = add i32 %sum, %load
-  %inc = add i32 %i, 1
-  %cmp = icmp ult i32 %inc, 10
-  br i1 %cmp, label %for.body, label %exit, !llvm.loop !14
-
-exit:
-  ret i32 %add
-}
-
-!0 = distinct !{!0, !3}
-!1 = distinct !{!1, !4}
-!2 = distinct !{!2, !4}
-!3 = !{!"llvm.loop.unroll.full"}
-!4 = !{!"llvm.loop.unroll.enable"}
-!5 = distinct !{!5, !6}
-!6 = !{!"llvm.loop.unroll.count", i32 3}
-!7 = distinct !{!7, !8}
-!8 = !{!"llvm.loop.unroll.disable"}
-!9 = distinct !{!9, !4}
-!10 = !{!"llvm.loop.unroll.runtime.disable"}
-!11 = distinct !{!11, !10}
-!12 = distinct !{!12, !3}
-!13 = distinct !{!13, !4}
-!14 = distinct !{!14, !6}