[llvm] [LoopUnroll] Rotate loop to make it countable for runtime unrolling (PR #146540)

Marek Sedláček via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 7 05:22:35 PDT 2025


https://github.com/mark-sed updated https://github.com/llvm/llvm-project/pull/146540

>From e5aabbdfc9bb75d0ff8855cebc933d8057c64345 Mon Sep 17 00:00:00 2001
From: Marek Sedlacek <msedlacek at azul.com>
Date: Tue, 1 Jul 2025 14:45:50 +0000
Subject: [PATCH 1/3] This patch adds loop rotation to runtime loop unrolling
 if this makes the loop countable, which then might enable additional
 unrolling of the loop.

---
 .../llvm/Transforms/Utils/LoopRotationUtils.h |  14 +-
 .../Transforms/Utils/LoopRotationUtils.cpp    |  28 +-
 llvm/lib/Transforms/Utils/LoopUnroll.cpp      |  24 +-
 .../X86/runtime-unroll-after-rotate.ll        |  99 +++++++
 .../runtime-loop-multiexit-dom-verify.ll      | 272 ++++++++++++------
 5 files changed, 334 insertions(+), 103 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopUnroll/X86/runtime-unroll-after-rotate.ll

diff --git a/llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h b/llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h
index c3643e0f27f94..b1d3b9dd4792e 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h
@@ -13,6 +13,7 @@
 #ifndef LLVM_TRANSFORMS_UTILS_LOOPROTATIONUTILS_H
 #define LLVM_TRANSFORMS_UTILS_LOOPROTATIONUTILS_H
 
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Compiler.h"
 
 namespace llvm {
@@ -32,12 +33,13 @@ class TargetTransformInfo;
 /// header. If the loop header's size exceeds the threshold, the loop rotation
 /// will give up. The flag IsUtilMode controls the heuristic used in the
 /// LoopRotation. If it is true, the profitability heuristic will be ignored.
-LLVM_ABI bool LoopRotation(Loop *L, LoopInfo *LI,
-                           const TargetTransformInfo *TTI, AssumptionCache *AC,
-                           DominatorTree *DT, ScalarEvolution *SE,
-                           MemorySSAUpdater *MSSAU, const SimplifyQuery &SQ,
-                           bool RotationOnly, unsigned Threshold,
-                           bool IsUtilMode, bool PrepareForLTO = false);
+LLVM_ABI bool LoopRotation(
+    Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI, AssumptionCache *AC,
+    DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
+    const SimplifyQuery &SQ, bool RotationOnly, unsigned Threshold,
+    bool IsUtilMode, bool PrepareForLTO = false,
+    function_ref<bool(Loop *, ScalarEvolution *)> profitabilityCheck =
+        [](Loop *, ScalarEvolution *) { return false; });
 
 } // namespace llvm
 
diff --git a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
index 66d0573e83f65..d8fa24347f3a9 100644
--- a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
@@ -69,16 +69,19 @@ class LoopRotate {
   bool RotationOnly;
   bool IsUtilMode;
   bool PrepareForLTO;
+  function_ref<bool(Loop *, ScalarEvolution *)> profitabilityCheck;
 
 public:
   LoopRotate(unsigned MaxHeaderSize, LoopInfo *LI,
              const TargetTransformInfo *TTI, AssumptionCache *AC,
              DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
              const SimplifyQuery &SQ, bool RotationOnly, bool IsUtilMode,
-             bool PrepareForLTO)
+             bool PrepareForLTO,
+             function_ref<bool(Loop *, ScalarEvolution *)> profitabilityCheck)
       : MaxHeaderSize(MaxHeaderSize), LI(LI), TTI(TTI), AC(AC), DT(DT), SE(SE),
         MSSAU(MSSAU), SQ(SQ), RotationOnly(RotationOnly),
-        IsUtilMode(IsUtilMode), PrepareForLTO(PrepareForLTO) {}
+        IsUtilMode(IsUtilMode), PrepareForLTO(PrepareForLTO),
+        profitabilityCheck(profitabilityCheck) {}
   bool processLoop(Loop *L);
 
 private:
@@ -440,9 +443,9 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
 
     // Rotate if either the loop latch does *not* exit the loop, or if the loop
     // latch was just simplified. Or if we think it will be profitable.
-    if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch && IsUtilMode == false &&
-        !profitableToRotateLoopExitingLatch(L) &&
-        !canRotateDeoptimizingLatchExit(L))
+    if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch &&
+        IsUtilMode == false && !profitableToRotateLoopExitingLatch(L) &&
+        !canRotateDeoptimizingLatchExit(L) && !profitabilityCheck(L, SE))
       return Rotated;
 
     // Check size of original header and reject loop if it is very big or we can't
@@ -1053,13 +1056,14 @@ bool LoopRotate::processLoop(Loop *L) {
 
 
 /// The utility to convert a loop into a loop with bottom test.
-bool llvm::LoopRotation(Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI,
-                        AssumptionCache *AC, DominatorTree *DT,
-                        ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
-                        const SimplifyQuery &SQ, bool RotationOnly = true,
-                        unsigned Threshold = unsigned(-1),
-                        bool IsUtilMode = true, bool PrepareForLTO) {
+bool llvm::LoopRotation(
+    Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI, AssumptionCache *AC,
+    DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
+    const SimplifyQuery &SQ, bool RotationOnly = true,
+    unsigned Threshold = unsigned(-1), bool IsUtilMode = true,
+    bool PrepareForLTO,
+    function_ref<bool(Loop *, ScalarEvolution *)> profitabilityCheck) {
   LoopRotate LR(Threshold, LI, TTI, AC, DT, SE, MSSAU, SQ, RotationOnly,
-                IsUtilMode, PrepareForLTO);
+                IsUtilMode, PrepareForLTO, profitabilityCheck);
   return LR.processLoop(L);
 }
diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index 86b268de43cf6..17bf8816c888a 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -58,6 +58,7 @@
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopRotationUtils.h"
 #include "llvm/Transforms/Utils/LoopSimplify.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
@@ -484,8 +485,27 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
 
   assert(ULO.Count > 0);
 
-  // All these values should be taken only after peeling because they might have
-  // changed.
+  if (ULO.Runtime && SE) {
+    BasicBlock *OrigHeader = L->getHeader();
+    BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
+    // Rotate loop if it makes it countable (for later unrolling)
+    if (BI && !BI->isUnconditional() &&
+        isa<SCEVCouldNotCompute>(SE->getExitCount(L, L->getLoopLatch())) &&
+        !isa<SCEVCouldNotCompute>(SE->getExitCount(L, OrigHeader))) {
+      LLVM_DEBUG(dbgs() << "  Rotating loop to make the loop countable.\n");
+      SimplifyQuery SQ{OrigHeader->getDataLayout()};
+      SQ.TLI = nullptr;
+      SQ.DT = DT;
+      SQ.AC = AC;
+      llvm::LoopRotation(L, LI, TTI, AC, DT, SE, nullptr /*MemorySSAUpdater*/,
+                         SQ, false /*RotationOnly*/, 16 /*Threshold*/,
+                         false /*IsUtilMode*/, false /*PrepareForLTO*/,
+                         [](Loop *, ScalarEvolution *) { return true; });
+    }
+  }
+
+  // All these values should be taken only after peeling or loop rotation
+  // because they might have changed.
   BasicBlock *Preheader = L->getLoopPreheader();
   BasicBlock *Header = L->getHeader();
   BasicBlock *LatchBlock = L->getLoopLatch();
diff --git a/llvm/test/Transforms/LoopUnroll/X86/runtime-unroll-after-rotate.ll b/llvm/test/Transforms/LoopUnroll/X86/runtime-unroll-after-rotate.ll
new file mode 100644
index 0000000000000..20803f1c95c08
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/X86/runtime-unroll-after-rotate.ll
@@ -0,0 +1,99 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt --passes=loop-unroll -unroll-runtime-other-exit-predictable=1 -S %s | FileCheck %s
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @test(i64 %0) #0 {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: i64 [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[B1:%.*]] = icmp eq i64 [[TMP0]], 0
+; CHECK-NEXT:    br i1 [[B1]], label %[[AFTER:.*]], label %[[BODY_LR_PH:.*]]
+; CHECK:       [[BODY_LR_PH]]:
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i64 0, [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = freeze i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[TMP2]], -1
+; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[TMP2]], 3
+; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT:    br i1 [[LCMP_MOD]], label %[[BODY_PROL_PREHEADER:.*]], label %[[BODY_PROL_LOOPEXIT:.*]]
+; CHECK:       [[BODY_PROL_PREHEADER]]:
+; CHECK-NEXT:    br label %[[BODY_PROL:.*]]
+; CHECK:       [[BODY_PROL]]:
+; CHECK-NEXT:    [[A2_PROL:%.*]] = phi i64 [ [[TMP0]], %[[BODY_PROL_PREHEADER]] ], [ [[A_PROL:%.*]], %[[HEADER_PROL:.*]] ]
+; CHECK-NEXT:    [[PROL_ITER:%.*]] = phi i64 [ 0, %[[BODY_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], %[[HEADER_PROL]] ]
+; CHECK-NEXT:    [[C_PROL:%.*]] = add i64 [[A2_PROL]], 1
+; CHECK-NEXT:    [[D_PROL:%.*]] = load i32, ptr addrspace(1) null, align 4
+; CHECK-NEXT:    [[E_PROL:%.*]] = icmp eq i32 [[D_PROL]], 0
+; CHECK-NEXT:    br i1 [[E_PROL]], label %[[END_LOOPEXIT3:.*]], label %[[HEADER_PROL]]
+; CHECK:       [[HEADER_PROL]]:
+; CHECK-NEXT:    [[A_PROL]] = phi i64 [ [[C_PROL]], %[[BODY_PROL]] ]
+; CHECK-NEXT:    [[B_PROL:%.*]] = icmp eq i64 [[A_PROL]], 0
+; CHECK-NEXT:    [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
+; CHECK-NEXT:    [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]]
+; CHECK-NEXT:    br i1 [[PROL_ITER_CMP]], label %[[BODY_PROL]], label %[[BODY_PROL_LOOPEXIT_UNR_LCSSA:.*]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       [[BODY_PROL_LOOPEXIT_UNR_LCSSA]]:
+; CHECK-NEXT:    [[A2_UNR_PH:%.*]] = phi i64 [ [[A_PROL]], %[[HEADER_PROL]] ]
+; CHECK-NEXT:    br label %[[BODY_PROL_LOOPEXIT]]
+; CHECK:       [[BODY_PROL_LOOPEXIT]]:
+; CHECK-NEXT:    [[A2_UNR:%.*]] = phi i64 [ [[TMP0]], %[[BODY_LR_PH]] ], [ [[A2_UNR_PH]], %[[BODY_PROL_LOOPEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 3
+; CHECK-NEXT:    br i1 [[TMP4]], label %[[HEADER_AFTER_CRIT_EDGE:.*]], label %[[BODY_LR_PH_NEW:.*]]
+; CHECK:       [[BODY_LR_PH_NEW]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[HEADER:.*]]:
+; CHECK-NEXT:    br i1 false, label %[[END_LOOPEXIT:.*]], label %[[HEADER_1:.*]]
+; CHECK:       [[HEADER_1]]:
+; CHECK-NEXT:    br i1 false, label %[[END_LOOPEXIT]], label %[[HEADER_2:.*]]
+; CHECK:       [[HEADER_2]]:
+; CHECK-NEXT:    [[C_7:%.*]] = add i64 [[A2:%.*]], 4
+; CHECK-NEXT:    br i1 false, label %[[END_LOOPEXIT]], label %[[HEADER_3:.*]]
+; CHECK:       [[HEADER_3]]:
+; CHECK-NEXT:    [[B_7:%.*]] = icmp eq i64 [[C_7]], 0
+; CHECK-NEXT:    br i1 [[B_7]], label %[[HEADER_AFTER_CRIT_EDGE_UNR_LCSSA:.*]], label %[[BODY]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[A2]] = phi i64 [ [[A2_UNR]], %[[BODY_LR_PH_NEW]] ], [ [[C_7]], %[[HEADER_3]] ]
+; CHECK-NEXT:    [[D:%.*]] = load i32, ptr addrspace(1) null, align 4
+; CHECK-NEXT:    [[E:%.*]] = icmp eq i32 [[D]], 0
+; CHECK-NEXT:    br i1 [[E]], label %[[END_LOOPEXIT]], label %[[HEADER]]
+; CHECK:       [[END_LOOPEXIT]]:
+; CHECK-NEXT:    br label %[[END:.*]]
+; CHECK:       [[END_LOOPEXIT3]]:
+; CHECK-NEXT:    br label %[[END]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret void
+; CHECK:       [[HEADER_AFTER_CRIT_EDGE_UNR_LCSSA]]:
+; CHECK-NEXT:    br label %[[HEADER_AFTER_CRIT_EDGE]]
+; CHECK:       [[HEADER_AFTER_CRIT_EDGE]]:
+; CHECK-NEXT:    br label %[[AFTER]]
+; CHECK:       [[AFTER]]:
+; CHECK-NEXT:    call void @foo(i32 0)
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %header
+
+header:
+  %a = phi i64 [ %0, %entry ], [ %c, %body ]
+  %b = icmp eq i64 %a, 0
+  br i1 %b, label %after, label %body
+
+body:
+  %c = add i64 %a, 1
+  %d = load i32, ptr addrspace(1) null, align 4
+  %e = icmp eq i32 %d, 0
+  br i1 %e, label %end, label %header
+
+end:
+  ret void
+
+after:
+  call void @foo(i32 0)
+  ret void
+}
+
+declare void @foo(i32)
+
+attributes #0 = { "tune-cpu"="generic" }
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.unroll.disable"}
+;.
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll
index de54852313456..b079abefaea65 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll
@@ -16,48 +16,86 @@ define i64 @test1() {
 ; CHECK-NEXT:    br label [[PREHEADER:%.*]]
 ; CHECK:       preheader:
 ; CHECK-NEXT:    [[TRIP:%.*]] = zext i32 undef to i64
+; CHECK-NEXT:    br i1 false, label [[LATCH_LR_PH:%.*]], label [[HEADEREXIT:%.*]]
+; CHECK:       latch.lr.ph:
+; CHECK-NEXT:    [[TMP0:%.*]] = add nsw i64 [[TRIP]], -5
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i64 [[TMP0]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = add nuw i64 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP3:%.*]] = freeze i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[TMP3]], -1
+; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[TMP3]], 3
+; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[LATCH_PROL_PREHEADER:%.*]], label [[LATCH_PROL_LOOPEXIT:%.*]]
+; CHECK:       latch.prol.preheader:
 ; CHECK-NEXT:    br label [[HEADER:%.*]]
-; CHECK:       header:
-; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 2, [[PREHEADER]] ], [ [[ADD_IV_3:%.*]], [[LATCH_3:%.*]] ]
-; CHECK-NEXT:    [[ADD_IV:%.*]] = add nuw nsw i64 [[IV]], 2
+; CHECK:       latch.prol:
+; CHECK-NEXT:    [[ADD_IV1_PROL:%.*]] = phi i64 [ 4, [[LATCH_PROL_PREHEADER]] ], [ [[ADD_IV:%.*]], [[HEADER_PROL:%.*]] ]
+; CHECK-NEXT:    [[PROL_ITER:%.*]] = phi i64 [ 0, [[LATCH_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], [[HEADER_PROL]] ]
+; CHECK-NEXT:    [[SHFT_PROL:%.*]] = ashr i64 [[ADD_IV1_PROL]], 1
+; CHECK-NEXT:    [[CMP2_PROL:%.*]] = icmp ult i64 [[SHFT_PROL]], [[TRIP]]
+; CHECK-NEXT:    br i1 [[CMP2_PROL]], label [[HEADER_PROL]], label [[LATCHEXIT_LOOPEXIT2:%.*]]
+; CHECK:       header.prol:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[ADD_IV1_PROL]], [[HEADER]] ]
+; CHECK-NEXT:    [[ADD_IV]] = add nuw nsw i64 [[IV]], 2
 ; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult i64 [[ADD_IV]], [[TRIP]]
-; CHECK-NEXT:    br i1 [[CMP1]], label [[LATCH:%.*]], label [[HEADEREXIT:%.*]]
-; CHECK:       latch:
-; CHECK-NEXT:    [[SHFT:%.*]] = ashr i64 [[ADD_IV]], 1
+; CHECK-NEXT:    [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
+; CHECK-NEXT:    [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]]
+; CHECK-NEXT:    br i1 [[PROL_ITER_CMP]], label [[HEADER]], label [[LATCH_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       latch.prol.loopexit.unr-lcssa:
+; CHECK-NEXT:    [[ADD_IV1_UNR_PH:%.*]] = phi i64 [ [[ADD_IV]], [[HEADER_PROL]] ]
+; CHECK-NEXT:    [[SPLIT_UNR_PH:%.*]] = phi i64 [ [[ADD_IV]], [[HEADER_PROL]] ]
+; CHECK-NEXT:    br label [[LATCH_PROL_LOOPEXIT]]
+; CHECK:       latch.prol.loopexit:
+; CHECK-NEXT:    [[ADD_IV1_UNR:%.*]] = phi i64 [ 4, [[LATCH_LR_PH]] ], [ [[ADD_IV1_UNR_PH]], [[LATCH_PROL_LOOPEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[SPLIT_UNR:%.*]] = phi i64 [ poison, [[LATCH_LR_PH]] ], [ [[SPLIT_UNR_PH]], [[LATCH_PROL_LOOPEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 3
+; CHECK-NEXT:    br i1 [[TMP5]], label [[HEADER_HEADEREXIT_CRIT_EDGE:%.*]], label [[LATCH_LR_PH_NEW:%.*]]
+; CHECK:       latch.lr.ph.new:
+; CHECK-NEXT:    br label [[LATCH:%.*]]
+; CHECK:       header:
+; CHECK-NEXT:    [[ADD_IV2:%.*]] = add nuw nsw i64 [[ADD_IV1:%.*]], 2
+; CHECK-NEXT:    [[SHFT:%.*]] = ashr i64 [[ADD_IV2]], 1
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp ult i64 [[SHFT]], [[TRIP]]
 ; CHECK-NEXT:    br i1 [[CMP2]], label [[HEADER_1:%.*]], label [[LATCHEXIT:%.*]]
 ; CHECK:       header.1:
-; CHECK-NEXT:    [[ADD_IV_1:%.*]] = add nuw nsw i64 [[IV]], 4
-; CHECK-NEXT:    [[CMP1_1:%.*]] = icmp ult i64 [[ADD_IV_1]], [[TRIP]]
-; CHECK-NEXT:    br i1 [[CMP1_1]], label [[LATCH_1:%.*]], label [[HEADEREXIT]]
-; CHECK:       latch.1:
+; CHECK-NEXT:    [[ADD_IV_1:%.*]] = add nuw nsw i64 [[ADD_IV1]], 4
 ; CHECK-NEXT:    [[SHFT_1:%.*]] = ashr i64 [[ADD_IV_1]], 1
 ; CHECK-NEXT:    [[CMP2_1:%.*]] = icmp ult i64 [[SHFT_1]], [[TRIP]]
 ; CHECK-NEXT:    br i1 [[CMP2_1]], label [[HEADER_2:%.*]], label [[LATCHEXIT]]
 ; CHECK:       header.2:
-; CHECK-NEXT:    [[ADD_IV_2:%.*]] = add nuw nsw i64 [[IV]], 6
-; CHECK-NEXT:    [[CMP1_2:%.*]] = icmp ult i64 [[ADD_IV_2]], [[TRIP]]
-; CHECK-NEXT:    br i1 [[CMP1_2]], label [[LATCH_2:%.*]], label [[HEADEREXIT]]
-; CHECK:       latch.2:
+; CHECK-NEXT:    [[ADD_IV_2:%.*]] = add nuw nsw i64 [[ADD_IV1]], 6
 ; CHECK-NEXT:    [[SHFT_2:%.*]] = ashr i64 [[ADD_IV_2]], 1
 ; CHECK-NEXT:    [[CMP2_2:%.*]] = icmp ult i64 [[SHFT_2]], [[TRIP]]
 ; CHECK-NEXT:    br i1 [[CMP2_2]], label [[HEADER_3:%.*]], label [[LATCHEXIT]]
 ; CHECK:       header.3:
-; CHECK-NEXT:    [[ADD_IV_3]] = add nuw nsw i64 [[IV]], 8
+; CHECK-NEXT:    [[ADD_IV_3:%.*]] = add nuw nsw i64 [[ADD_IV1]], 8
 ; CHECK-NEXT:    [[CMP1_3:%.*]] = icmp ult i64 [[ADD_IV_3]], [[TRIP]]
-; CHECK-NEXT:    br i1 [[CMP1_3]], label [[LATCH_3]], label [[HEADEREXIT]]
-; CHECK:       latch.3:
-; CHECK-NEXT:    [[SHFT_3:%.*]] = ashr i64 [[ADD_IV_3]], 1
+; CHECK-NEXT:    br i1 [[CMP1_3]], label [[LATCH]], label [[HEADER_HEADEREXIT_CRIT_EDGE_UNR_LCSSA:%.*]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK:       latch:
+; CHECK-NEXT:    [[ADD_IV1]] = phi i64 [ [[ADD_IV1_UNR]], [[LATCH_LR_PH_NEW]] ], [ [[ADD_IV_3]], [[HEADER_3]] ]
+; CHECK-NEXT:    [[SHFT_3:%.*]] = ashr i64 [[ADD_IV1]], 1
 ; CHECK-NEXT:    [[CMP2_3:%.*]] = icmp ult i64 [[SHFT_3]], [[TRIP]]
-; CHECK-NEXT:    br i1 [[CMP2_3]], label [[HEADER]], label [[LATCHEXIT]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT:    br i1 [[CMP2_3]], label [[HEADER1:%.*]], label [[LATCHEXIT]]
+; CHECK:       header.headerexit_crit_edge.unr-lcssa:
+; CHECK-NEXT:    [[SPLIT_PH:%.*]] = phi i64 [ [[ADD_IV_3]], [[HEADER_3]] ]
+; CHECK-NEXT:    br label [[HEADER_HEADEREXIT_CRIT_EDGE]]
+; CHECK:       header.headerexit_crit_edge:
+; CHECK-NEXT:    [[SPLIT:%.*]] = phi i64 [ [[SPLIT_UNR]], [[LATCH_PROL_LOOPEXIT]] ], [ [[SPLIT_PH]], [[HEADER_HEADEREXIT_CRIT_EDGE_UNR_LCSSA]] ]
+; CHECK-NEXT:    br label [[HEADEREXIT]]
 ; CHECK:       headerexit:
-; CHECK-NEXT:    [[ADDPHI:%.*]] = phi i64 [ [[ADD_IV]], [[HEADER]] ], [ [[ADD_IV_1]], [[HEADER_1]] ], [ [[ADD_IV_2]], [[HEADER_2]] ], [ [[ADD_IV_3]], [[HEADER_3]] ]
+; CHECK-NEXT:    [[ADDPHI:%.*]] = phi i64 [ [[SPLIT]], [[HEADER_HEADEREXIT_CRIT_EDGE]] ], [ 4, [[PREHEADER]] ]
+; CHECK-NEXT:    br label [[MERGEDEXIT1:%.*]]
+; CHECK:       latchexit.loopexit:
+; CHECK-NEXT:    [[SHFTPHI_PH:%.*]] = phi i64 [ [[SHFT_3]], [[LATCH]] ], [ [[SHFT]], [[HEADER1]] ], [ [[SHFT_1]], [[HEADER_1]] ], [ [[SHFT_2]], [[HEADER_2]] ]
 ; CHECK-NEXT:    br label [[MERGEDEXIT:%.*]]
-; CHECK:       latchexit:
-; CHECK-NEXT:    [[SHFTPHI:%.*]] = phi i64 [ [[SHFT]], [[LATCH]] ], [ [[SHFT_1]], [[LATCH_1]] ], [ [[SHFT_2]], [[LATCH_2]] ], [ [[SHFT_3]], [[LATCH_3]] ]
+; CHECK:       latchexit.loopexit2:
+; CHECK-NEXT:    [[SHFTPHI_PH3:%.*]] = phi i64 [ [[SHFT_PROL]], [[HEADER]] ]
 ; CHECK-NEXT:    br label [[MERGEDEXIT]]
+; CHECK:       latchexit:
+; CHECK-NEXT:    [[SHFTPHI:%.*]] = phi i64 [ [[SHFTPHI_PH]], [[LATCHEXIT]] ], [ [[SHFTPHI_PH3]], [[LATCHEXIT_LOOPEXIT2]] ]
+; CHECK-NEXT:    br label [[MERGEDEXIT1]]
 ; CHECK:       mergedexit:
-; CHECK-NEXT:    [[RETVAL:%.*]] = phi i64 [ [[ADDPHI]], [[HEADEREXIT]] ], [ [[SHFTPHI]], [[LATCHEXIT]] ]
+; CHECK-NEXT:    [[RETVAL:%.*]] = phi i64 [ [[ADDPHI]], [[HEADEREXIT]] ], [ [[SHFTPHI]], [[MERGEDEXIT]] ]
 ; CHECK-NEXT:    ret i64 [[RETVAL]]
 ;
 entry:
@@ -98,42 +136,75 @@ define  void @test2(i1 %cond, i32 %n) {
 ; CHECK-NEXT:    br i1 [[COND:%.*]], label [[PREHEADER:%.*]], label [[MERGEDEXIT:%.*]]
 ; CHECK:       preheader:
 ; CHECK-NEXT:    [[TRIP:%.*]] = zext i32 [[N:%.*]] to i64
+; CHECK-NEXT:    [[CMP11:%.*]] = icmp ult i64 4, [[TRIP]]
+; CHECK-NEXT:    br i1 [[CMP11]], label [[LATCH_LR_PH:%.*]], label [[HEADEREXIT:%.*]]
+; CHECK:       latch.lr.ph:
+; CHECK-NEXT:    [[TMP0:%.*]] = add nsw i64 [[TRIP]], -5
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i64 [[TMP0]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = add nuw i64 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP3:%.*]] = freeze i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[TMP3]], -1
+; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[TMP3]], 3
+; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[LATCH_PROL_PREHEADER:%.*]], label [[LATCH_PROL_LOOPEXIT:%.*]]
+; CHECK:       latch.prol.preheader:
 ; CHECK-NEXT:    br label [[HEADER:%.*]]
-; CHECK:       header:
-; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 2, [[PREHEADER]] ], [ [[ADD_IV_3:%.*]], [[LATCH_3:%.*]] ]
-; CHECK-NEXT:    [[ADD_IV:%.*]] = add nuw nsw i64 [[IV]], 2
+; CHECK:       latch.prol:
+; CHECK-NEXT:    [[ADD_IV2_PROL:%.*]] = phi i64 [ 4, [[LATCH_PROL_PREHEADER]] ], [ [[ADD_IV:%.*]], [[HEADER_PROL:%.*]] ]
+; CHECK-NEXT:    [[PROL_ITER:%.*]] = phi i64 [ 0, [[LATCH_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], [[HEADER_PROL]] ]
+; CHECK-NEXT:    [[SHFT_PROL:%.*]] = ashr i64 [[ADD_IV2_PROL]], 1
+; CHECK-NEXT:    [[CMP2_PROL:%.*]] = icmp ult i64 [[SHFT_PROL]], [[TRIP]]
+; CHECK-NEXT:    br i1 [[CMP2_PROL]], label [[HEADER_PROL]], label [[LATCHEXIT_LOOPEXIT3:%.*]]
+; CHECK:       header.prol:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[ADD_IV2_PROL]], [[HEADER]] ]
+; CHECK-NEXT:    [[ADD_IV]] = add nuw nsw i64 [[IV]], 2
 ; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult i64 [[ADD_IV]], [[TRIP]]
-; CHECK-NEXT:    br i1 [[CMP1]], label [[LATCH:%.*]], label [[HEADEREXIT:%.*]]
-; CHECK:       latch:
-; CHECK-NEXT:    [[SHFT:%.*]] = ashr i64 [[ADD_IV]], 1
+; CHECK-NEXT:    [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
+; CHECK-NEXT:    [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]]
+; CHECK-NEXT:    br i1 [[PROL_ITER_CMP]], label [[HEADER]], label [[LATCH_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK:       latch.prol.loopexit.unr-lcssa:
+; CHECK-NEXT:    [[ADD_IV2_UNR_PH:%.*]] = phi i64 [ [[ADD_IV]], [[HEADER_PROL]] ]
+; CHECK-NEXT:    br label [[LATCH_PROL_LOOPEXIT]]
+; CHECK:       latch.prol.loopexit:
+; CHECK-NEXT:    [[ADD_IV2_UNR:%.*]] = phi i64 [ 4, [[LATCH_LR_PH]] ], [ [[ADD_IV2_UNR_PH]], [[LATCH_PROL_LOOPEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 3
+; CHECK-NEXT:    br i1 [[TMP5]], label [[HEADER_HEADEREXIT_CRIT_EDGE:%.*]], label [[LATCH_LR_PH_NEW:%.*]]
+; CHECK:       latch.lr.ph.new:
+; CHECK-NEXT:    br label [[LATCH:%.*]]
+; CHECK:       header:
+; CHECK-NEXT:    [[ADD_IV1:%.*]] = add nuw nsw i64 [[ADD_IV2:%.*]], 2
+; CHECK-NEXT:    [[SHFT:%.*]] = ashr i64 [[ADD_IV1]], 1
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp ult i64 [[SHFT]], [[TRIP]]
 ; CHECK-NEXT:    br i1 [[CMP2]], label [[HEADER_1:%.*]], label [[LATCHEXIT:%.*]]
 ; CHECK:       header.1:
-; CHECK-NEXT:    [[ADD_IV_1:%.*]] = add nuw nsw i64 [[IV]], 4
-; CHECK-NEXT:    [[CMP1_1:%.*]] = icmp ult i64 [[ADD_IV_1]], [[TRIP]]
-; CHECK-NEXT:    br i1 [[CMP1_1]], label [[LATCH_1:%.*]], label [[HEADEREXIT]]
-; CHECK:       latch.1:
+; CHECK-NEXT:    [[ADD_IV_1:%.*]] = add nuw nsw i64 [[ADD_IV2]], 4
 ; CHECK-NEXT:    [[SHFT_1:%.*]] = ashr i64 [[ADD_IV_1]], 1
 ; CHECK-NEXT:    [[CMP2_1:%.*]] = icmp ult i64 [[SHFT_1]], [[TRIP]]
 ; CHECK-NEXT:    br i1 [[CMP2_1]], label [[HEADER_2:%.*]], label [[LATCHEXIT]]
 ; CHECK:       header.2:
-; CHECK-NEXT:    [[ADD_IV_2:%.*]] = add nuw nsw i64 [[IV]], 6
-; CHECK-NEXT:    [[CMP1_2:%.*]] = icmp ult i64 [[ADD_IV_2]], [[TRIP]]
-; CHECK-NEXT:    br i1 [[CMP1_2]], label [[LATCH_2:%.*]], label [[HEADEREXIT]]
-; CHECK:       latch.2:
+; CHECK-NEXT:    [[ADD_IV_2:%.*]] = add nuw nsw i64 [[ADD_IV2]], 6
 ; CHECK-NEXT:    [[SHFT_2:%.*]] = ashr i64 [[ADD_IV_2]], 1
 ; CHECK-NEXT:    [[CMP2_2:%.*]] = icmp ult i64 [[SHFT_2]], [[TRIP]]
 ; CHECK-NEXT:    br i1 [[CMP2_2]], label [[HEADER_3:%.*]], label [[LATCHEXIT]]
 ; CHECK:       header.3:
-; CHECK-NEXT:    [[ADD_IV_3]] = add nuw nsw i64 [[IV]], 8
+; CHECK-NEXT:    [[ADD_IV_3:%.*]] = add nuw nsw i64 [[ADD_IV2]], 8
 ; CHECK-NEXT:    [[CMP1_3:%.*]] = icmp ult i64 [[ADD_IV_3]], [[TRIP]]
-; CHECK-NEXT:    br i1 [[CMP1_3]], label [[LATCH_3]], label [[HEADEREXIT]]
-; CHECK:       latch.3:
-; CHECK-NEXT:    [[SHFT_3:%.*]] = ashr i64 [[ADD_IV_3]], 1
+; CHECK-NEXT:    br i1 [[CMP1_3]], label [[LATCH]], label [[HEADER_HEADEREXIT_CRIT_EDGE_UNR_LCSSA:%.*]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK:       latch:
+; CHECK-NEXT:    [[ADD_IV2]] = phi i64 [ [[ADD_IV2_UNR]], [[LATCH_LR_PH_NEW]] ], [ [[ADD_IV_3]], [[HEADER_3]] ]
+; CHECK-NEXT:    [[SHFT_3:%.*]] = ashr i64 [[ADD_IV2]], 1
 ; CHECK-NEXT:    [[CMP2_3:%.*]] = icmp ult i64 [[SHFT_3]], [[TRIP]]
-; CHECK-NEXT:    br i1 [[CMP2_3]], label [[HEADER]], label [[LATCHEXIT]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK-NEXT:    br i1 [[CMP2_3]], label [[HEADER1:%.*]], label [[LATCHEXIT]]
+; CHECK:       header.headerexit_crit_edge.unr-lcssa:
+; CHECK-NEXT:    br label [[HEADER_HEADEREXIT_CRIT_EDGE]]
+; CHECK:       header.headerexit_crit_edge:
+; CHECK-NEXT:    br label [[HEADEREXIT]]
 ; CHECK:       headerexit:
 ; CHECK-NEXT:    br label [[MERGEDEXIT]]
+; CHECK:       latchexit.loopexit:
+; CHECK-NEXT:    br label [[LATCHEXIT1:%.*]]
+; CHECK:       latchexit.loopexit3:
+; CHECK-NEXT:    br label [[LATCHEXIT1]]
 ; CHECK:       latchexit:
 ; CHECK-NEXT:    br label [[MERGEDEXIT]]
 ; CHECK:       mergedexit:
@@ -175,44 +246,79 @@ define i64 @test3(i32 %n) {
 ; CHECK-NEXT:    br label [[PREHEADER:%.*]]
 ; CHECK:       preheader:
 ; CHECK-NEXT:    [[TRIP:%.*]] = zext i32 [[N:%.*]] to i64
+; CHECK-NEXT:    [[CMP11:%.*]] = icmp ult i64 4, [[TRIP]]
+; CHECK-NEXT:    br i1 [[CMP11]], label [[LATCH_LR_PH:%.*]], label [[HEADEREXIT:%.*]]
+; CHECK:       latch.lr.ph:
+; CHECK-NEXT:    [[TMP0:%.*]] = add nsw i64 [[TRIP]], -5
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i64 [[TMP0]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = add nuw i64 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP3:%.*]] = freeze i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[TMP3]], -1
+; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[TMP3]], 3
+; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[LATCH_PROL_PREHEADER:%.*]], label [[LATCH_PROL_LOOPEXIT:%.*]]
+; CHECK:       latch.prol.preheader:
 ; CHECK-NEXT:    br label [[HEADER:%.*]]
-; CHECK:       header:
-; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 2, [[PREHEADER]] ], [ [[ADD_IV_3:%.*]], [[LATCH_3:%.*]] ]
-; CHECK-NEXT:    [[ADD_IV:%.*]] = add nuw nsw i64 [[IV]], 2
+; CHECK:       latch.prol:
+; CHECK-NEXT:    [[ADD_IV2_PROL:%.*]] = phi i64 [ 4, [[LATCH_PROL_PREHEADER]] ], [ [[ADD_IV:%.*]], [[HEADER_PROL:%.*]] ]
+; CHECK-NEXT:    [[PROL_ITER:%.*]] = phi i64 [ 0, [[LATCH_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], [[HEADER_PROL]] ]
+; CHECK-NEXT:    [[SHFT_PROL:%.*]] = ashr i64 [[ADD_IV2_PROL]], 1
+; CHECK-NEXT:    [[CMP2_PROL:%.*]] = icmp ult i64 [[SHFT_PROL]], [[TRIP]]
+; CHECK-NEXT:    br i1 [[CMP2_PROL]], label [[HEADER_PROL]], label [[LATCHEXIT_LOOPEXIT3:%.*]]
+; CHECK:       header.prol:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[ADD_IV2_PROL]], [[HEADER]] ]
+; CHECK-NEXT:    [[ADD_IV]] = add nuw nsw i64 [[IV]], 2
 ; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult i64 [[ADD_IV]], [[TRIP]]
-; CHECK-NEXT:    br i1 [[CMP1]], label [[LATCH:%.*]], label [[HEADEREXIT:%.*]]
-; CHECK:       latch:
-; CHECK-NEXT:    [[SHFT:%.*]] = ashr i64 [[ADD_IV]], 1
+; CHECK-NEXT:    [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
+; CHECK-NEXT:    [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]]
+; CHECK-NEXT:    br i1 [[PROL_ITER_CMP]], label [[HEADER]], label [[LATCH_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK:       latch.prol.loopexit.unr-lcssa:
+; CHECK-NEXT:    [[ADD_IV2_UNR_PH:%.*]] = phi i64 [ [[ADD_IV]], [[HEADER_PROL]] ]
+; CHECK-NEXT:    br label [[LATCH_PROL_LOOPEXIT]]
+; CHECK:       latch.prol.loopexit:
+; CHECK-NEXT:    [[ADD_IV2_UNR:%.*]] = phi i64 [ 4, [[LATCH_LR_PH]] ], [ [[ADD_IV2_UNR_PH]], [[LATCH_PROL_LOOPEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 3
+; CHECK-NEXT:    br i1 [[TMP5]], label [[HEADER_HEADEREXIT_CRIT_EDGE:%.*]], label [[LATCH_LR_PH_NEW:%.*]]
+; CHECK:       latch.lr.ph.new:
+; CHECK-NEXT:    br label [[LATCH:%.*]]
+; CHECK:       header:
+; CHECK-NEXT:    [[ADD_IV1:%.*]] = add nuw nsw i64 [[ADD_IV2:%.*]], 2
+; CHECK-NEXT:    [[SHFT:%.*]] = ashr i64 [[ADD_IV1]], 1
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp ult i64 [[SHFT]], [[TRIP]]
 ; CHECK-NEXT:    br i1 [[CMP2]], label [[HEADER_1:%.*]], label [[LATCHEXIT:%.*]]
 ; CHECK:       header.1:
-; CHECK-NEXT:    [[ADD_IV_1:%.*]] = add nuw nsw i64 [[IV]], 4
-; CHECK-NEXT:    [[CMP1_1:%.*]] = icmp ult i64 [[ADD_IV_1]], [[TRIP]]
-; CHECK-NEXT:    br i1 [[CMP1_1]], label [[LATCH_1:%.*]], label [[HEADEREXIT]]
-; CHECK:       latch.1:
+; CHECK-NEXT:    [[ADD_IV_1:%.*]] = add nuw nsw i64 [[ADD_IV2]], 4
 ; CHECK-NEXT:    [[SHFT_1:%.*]] = ashr i64 [[ADD_IV_1]], 1
 ; CHECK-NEXT:    [[CMP2_1:%.*]] = icmp ult i64 [[SHFT_1]], [[TRIP]]
 ; CHECK-NEXT:    br i1 [[CMP2_1]], label [[HEADER_2:%.*]], label [[LATCHEXIT]]
 ; CHECK:       header.2:
-; CHECK-NEXT:    [[ADD_IV_2:%.*]] = add nuw nsw i64 [[IV]], 6
-; CHECK-NEXT:    [[CMP1_2:%.*]] = icmp ult i64 [[ADD_IV_2]], [[TRIP]]
-; CHECK-NEXT:    br i1 [[CMP1_2]], label [[LATCH_2:%.*]], label [[HEADEREXIT]]
-; CHECK:       latch.2:
+; CHECK-NEXT:    [[ADD_IV_2:%.*]] = add nuw nsw i64 [[ADD_IV2]], 6
 ; CHECK-NEXT:    [[SHFT_2:%.*]] = ashr i64 [[ADD_IV_2]], 1
 ; CHECK-NEXT:    [[CMP2_2:%.*]] = icmp ult i64 [[SHFT_2]], [[TRIP]]
 ; CHECK-NEXT:    br i1 [[CMP2_2]], label [[HEADER_3:%.*]], label [[LATCHEXIT]]
 ; CHECK:       header.3:
-; CHECK-NEXT:    [[ADD_IV_3]] = add nuw nsw i64 [[IV]], 8
+; CHECK-NEXT:    [[ADD_IV_3:%.*]] = add nuw nsw i64 [[ADD_IV2]], 8
 ; CHECK-NEXT:    [[CMP1_3:%.*]] = icmp ult i64 [[ADD_IV_3]], [[TRIP]]
-; CHECK-NEXT:    br i1 [[CMP1_3]], label [[LATCH_3]], label [[HEADEREXIT]]
-; CHECK:       latch.3:
-; CHECK-NEXT:    [[SHFT_3:%.*]] = ashr i64 [[ADD_IV_3]], 1
+; CHECK-NEXT:    br i1 [[CMP1_3]], label [[LATCH]], label [[HEADER_HEADEREXIT_CRIT_EDGE_UNR_LCSSA:%.*]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK:       latch:
+; CHECK-NEXT:    [[ADD_IV2]] = phi i64 [ [[ADD_IV2_UNR]], [[LATCH_LR_PH_NEW]] ], [ [[ADD_IV_3]], [[HEADER_3]] ]
+; CHECK-NEXT:    [[SHFT_3:%.*]] = ashr i64 [[ADD_IV2]], 1
 ; CHECK-NEXT:    [[CMP2_3:%.*]] = icmp ult i64 [[SHFT_3]], [[TRIP]]
-; CHECK-NEXT:    br i1 [[CMP2_3]], label [[HEADER]], label [[LATCHEXIT]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-NEXT:    br i1 [[CMP2_3]], label [[HEADER1:%.*]], label [[LATCHEXIT]]
+; CHECK:       header.headerexit_crit_edge.unr-lcssa:
+; CHECK-NEXT:    br label [[HEADER_HEADEREXIT_CRIT_EDGE]]
+; CHECK:       header.headerexit_crit_edge:
+; CHECK-NEXT:    br label [[HEADEREXIT]]
 ; CHECK:       headerexit:
 ; CHECK-NEXT:    br label [[EXITSUCC:%.*]]
+; CHECK:       latchexit.loopexit:
+; CHECK-NEXT:    [[SHFTPHI_PH:%.*]] = phi i64 [ [[SHFT_3]], [[LATCH]] ], [ [[SHFT]], [[HEADER1]] ], [ [[SHFT_1]], [[HEADER_1]] ], [ [[SHFT_2]], [[HEADER_2]] ]
+; CHECK-NEXT:    br label [[LATCHEXIT1:%.*]]
+; CHECK:       latchexit.loopexit3:
+; CHECK-NEXT:    [[SHFTPHI_PH4:%.*]] = phi i64 [ [[SHFT_PROL]], [[HEADER]] ]
+; CHECK-NEXT:    br label [[LATCHEXIT1]]
 ; CHECK:       latchexit:
-; CHECK-NEXT:    [[SHFTPHI:%.*]] = phi i64 [ [[SHFT]], [[LATCH]] ], [ [[SHFT_1]], [[LATCH_1]] ], [ [[SHFT_2]], [[LATCH_2]] ], [ [[SHFT_3]], [[LATCH_3]] ]
+; CHECK-NEXT:    [[SHFTPHI:%.*]] = phi i64 [ [[SHFTPHI_PH]], [[LATCHEXIT]] ], [ [[SHFTPHI_PH4]], [[LATCHEXIT_LOOPEXIT3]] ]
 ; CHECK-NEXT:    ret i64 [[SHFTPHI]]
 ; CHECK:       exitsucc:
 ; CHECK-NEXT:    ret i64 96
@@ -265,15 +371,15 @@ define void @test4(i16 %c3) {
 ; CHECK-NEXT:    br label [[EXITING_PROL:%.*]]
 ; CHECK:       exiting.prol:
 ; CHECK-NEXT:    switch i16 [[C3:%.*]], label [[DEFAULT_LOOPEXIT_LOOPEXIT1:%.*]] [
-; CHECK-NEXT:    i16 45, label [[OTHEREXIT_LOOPEXIT2:%.*]]
-; CHECK-NEXT:    i16 95, label [[LATCH_PROL]]
+; CHECK-NEXT:      i16 45, label [[OTHEREXIT_LOOPEXIT2:%.*]]
+; CHECK-NEXT:      i16 95, label [[LATCH_PROL]]
 ; CHECK-NEXT:    ]
 ; CHECK:       latch.prol:
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_PROL]] = add nuw nsw i64 [[INDVARS_IV_PROL]], 1
 ; CHECK-NEXT:    [[C2_PROL:%.*]] = icmp ult i64 [[INDVARS_IV_NEXT_PROL]], [[C1]]
 ; CHECK-NEXT:    [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
 ; CHECK-NEXT:    [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]]
-; CHECK-NEXT:    br i1 [[PROL_ITER_CMP]], label [[HEADER_PROL]], label [[HEADER_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-NEXT:    br i1 [[PROL_ITER_CMP]], label [[HEADER_PROL]], label [[HEADER_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP7:![0-9]+]]
 ; CHECK:       header.prol.loopexit.unr-lcssa:
 ; CHECK-NEXT:    [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL]], [[LATCH_PROL]] ]
 ; CHECK-NEXT:    br label [[HEADER_PROL_LOOPEXIT]]
@@ -288,34 +394,34 @@ define void @test4(i16 %c3) {
 ; CHECK-NEXT:    br label [[EXITING:%.*]]
 ; CHECK:       exiting:
 ; CHECK-NEXT:    switch i16 [[C3]], label [[DEFAULT_LOOPEXIT_LOOPEXIT:%.*]] [
-; CHECK-NEXT:    i16 45, label [[OTHEREXIT_LOOPEXIT:%.*]]
-; CHECK-NEXT:    i16 95, label [[LATCH:%.*]]
+; CHECK-NEXT:      i16 45, label [[OTHEREXIT_LOOPEXIT:%.*]]
+; CHECK-NEXT:      i16 95, label [[LATCH:%.*]]
 ; CHECK-NEXT:    ]
 ; CHECK:       latch:
 ; CHECK-NEXT:    br label [[EXITING_1:%.*]]
 ; CHECK:       exiting.1:
 ; CHECK-NEXT:    switch i16 [[C3]], label [[DEFAULT_LOOPEXIT_LOOPEXIT]] [
-; CHECK-NEXT:    i16 45, label [[OTHEREXIT_LOOPEXIT]]
-; CHECK-NEXT:    i16 95, label [[LATCH_1:%.*]]
+; CHECK-NEXT:      i16 45, label [[OTHEREXIT_LOOPEXIT]]
+; CHECK-NEXT:      i16 95, label [[LATCH_1:%.*]]
 ; CHECK-NEXT:    ]
 ; CHECK:       latch.1:
 ; CHECK-NEXT:    br label [[EXITING_2:%.*]]
 ; CHECK:       exiting.2:
 ; CHECK-NEXT:    switch i16 [[C3]], label [[DEFAULT_LOOPEXIT_LOOPEXIT]] [
-; CHECK-NEXT:    i16 45, label [[OTHEREXIT_LOOPEXIT]]
-; CHECK-NEXT:    i16 95, label [[LATCH_2:%.*]]
+; CHECK-NEXT:      i16 45, label [[OTHEREXIT_LOOPEXIT]]
+; CHECK-NEXT:      i16 95, label [[LATCH_2:%.*]]
 ; CHECK-NEXT:    ]
 ; CHECK:       latch.2:
 ; CHECK-NEXT:    br label [[EXITING_3:%.*]]
 ; CHECK:       exiting.3:
 ; CHECK-NEXT:    switch i16 [[C3]], label [[DEFAULT_LOOPEXIT_LOOPEXIT]] [
-; CHECK-NEXT:    i16 45, label [[OTHEREXIT_LOOPEXIT]]
-; CHECK-NEXT:    i16 95, label [[LATCH_3]]
+; CHECK-NEXT:      i16 45, label [[OTHEREXIT_LOOPEXIT]]
+; CHECK-NEXT:      i16 95, label [[LATCH_3]]
 ; CHECK-NEXT:    ]
 ; CHECK:       latch.3:
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_3]] = add nuw nsw i64 [[INDVARS_IV]], 4
 ; CHECK-NEXT:    [[C2_3:%.*]] = icmp ult i64 [[INDVARS_IV_NEXT_3]], [[C1]]
-; CHECK-NEXT:    br i1 [[C2_3]], label [[HEADER]], label [[LATCHEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-NEXT:    br i1 [[C2_3]], label [[HEADER]], label [[LATCHEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP8:![0-9]+]]
 ; CHECK:       latchexit.unr-lcssa:
 ; CHECK-NEXT:    br label [[LATCHEXIT]]
 ; CHECK:       latchexit:
@@ -414,13 +520,13 @@ define void @test5() {
 ; CHECK-NEXT:    [[C2_3_PROL:%.*]] = call i1 @unknown(i32 0)
 ; CHECK-NEXT:    br i1 [[C2_3_PROL]], label [[INNERLATCH_3_PROL:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT2]]
 ; CHECK:       innerLatch.3.prol:
-; CHECK-NEXT:    br i1 false, label [[INNERH_PROL]], label [[OUTERLATCH_PROL]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-NEXT:    br i1 false, label [[INNERH_PROL]], label [[OUTERLATCH_PROL]], !llvm.loop [[LOOP9:![0-9]+]]
 ; CHECK:       outerLatch.prol:
 ; CHECK-NEXT:    [[TMP6_PROL]] = add i32 [[TMP4_PROL]], 1
 ; CHECK-NEXT:    [[TMP7_PROL:%.*]] = icmp sgt i32 [[TMP6_PROL]], 79
 ; CHECK-NEXT:    [[PROL_ITER_NEXT]] = add i32 [[PROL_ITER]], 1
 ; CHECK-NEXT:    [[PROL_ITER_CMP:%.*]] = icmp ne i32 [[PROL_ITER_NEXT]], [[XTRAITER]]
-; CHECK-NEXT:    br i1 [[PROL_ITER_CMP]], label [[OUTERH_PROL]], label [[OUTERH_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK-NEXT:    br i1 [[PROL_ITER_CMP]], label [[OUTERH_PROL]], label [[OUTERH_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP10:![0-9]+]]
 ; CHECK:       outerH.prol.loopexit.unr-lcssa:
 ; CHECK-NEXT:    [[TMP4_UNR_PH:%.*]] = phi i32 [ [[TMP6_PROL]], [[OUTERLATCH_PROL]] ]
 ; CHECK-NEXT:    br label [[OUTERH_PROL_LOOPEXIT]]
@@ -464,7 +570,7 @@ define void @test5() {
 ; CHECK-NEXT:    [[C2_3:%.*]] = call i1 @unknown(i32 0)
 ; CHECK-NEXT:    br i1 [[C2_3]], label [[INNERLATCH_3:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT]]
 ; CHECK:       innerLatch.3:
-; CHECK-NEXT:    br i1 false, label [[INNERH]], label [[OUTERLATCH]], !llvm.loop [[LOOP6]]
+; CHECK-NEXT:    br i1 false, label [[INNERH]], label [[OUTERLATCH]], !llvm.loop [[LOOP9]]
 ; CHECK:       outerLatch:
 ; CHECK-NEXT:    br label [[INNERH_14:%.*]]
 ; CHECK:       innerH.14:
@@ -498,7 +604,7 @@ define void @test5() {
 ; CHECK-NEXT:    [[C2_3_1:%.*]] = call i1 @unknown(i32 0)
 ; CHECK-NEXT:    br i1 [[C2_3_1]], label [[INNERLATCH_3_1:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT19]]
 ; CHECK:       innerLatch.3.1:
-; CHECK-NEXT:    br i1 false, label [[INNERH_14]], label [[OUTERLATCH_1]], !llvm.loop [[LOOP6]]
+; CHECK-NEXT:    br i1 false, label [[INNERH_14]], label [[OUTERLATCH_1]], !llvm.loop [[LOOP9]]
 ; CHECK:       outerLatch.1:
 ; CHECK-NEXT:    br label [[INNERH_29:%.*]]
 ; CHECK:       innerH.29:
@@ -532,7 +638,7 @@ define void @test5() {
 ; CHECK-NEXT:    [[C2_3_2:%.*]] = call i1 @unknown(i32 0)
 ; CHECK-NEXT:    br i1 [[C2_3_2]], label [[INNERLATCH_3_2:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT21]]
 ; CHECK:       innerLatch.3.2:
-; CHECK-NEXT:    br i1 false, label [[INNERH_29]], label [[OUTERLATCH_2]], !llvm.loop [[LOOP6]]
+; CHECK-NEXT:    br i1 false, label [[INNERH_29]], label [[OUTERLATCH_2]], !llvm.loop [[LOOP9]]
 ; CHECK:       outerLatch.2:
 ; CHECK-NEXT:    br label [[INNERH_314:%.*]]
 ; CHECK:       innerH.314:
@@ -566,11 +672,11 @@ define void @test5() {
 ; CHECK-NEXT:    [[C2_3_3:%.*]] = call i1 @unknown(i32 0)
 ; CHECK-NEXT:    br i1 [[C2_3_3]], label [[INNERLATCH_3_3:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT23]]
 ; CHECK:       innerLatch.3.3:
-; CHECK-NEXT:    br i1 false, label [[INNERH_314]], label [[OUTERLATCH_3]], !llvm.loop [[LOOP6]]
+; CHECK-NEXT:    br i1 false, label [[INNERH_314]], label [[OUTERLATCH_3]], !llvm.loop [[LOOP9]]
 ; CHECK:       outerLatch.3:
 ; CHECK-NEXT:    [[TMP6_3]] = add i32 [[TMP4]], 4
 ; CHECK-NEXT:    [[TMP7_3:%.*]] = icmp sgt i32 [[TMP6_3]], 79
-; CHECK-NEXT:    br i1 [[TMP7_3]], label [[OUTERLATCHEXIT_LOOPEXIT_UNR_LCSSA:%.*]], label [[OUTERH]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK-NEXT:    br i1 [[TMP7_3]], label [[OUTERLATCHEXIT_LOOPEXIT_UNR_LCSSA:%.*]], label [[OUTERH]], !llvm.loop [[LOOP11:![0-9]+]]
 ; CHECK:       outerLatchExit.loopexit.unr-lcssa:
 ; CHECK-NEXT:    br label [[OUTERLATCHEXIT_LOOPEXIT]]
 ; CHECK:       outerLatchExit.loopexit:
@@ -676,7 +782,7 @@ define void @test6(i64 %start) {
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT_PROL]], 616
 ; CHECK-NEXT:    [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
 ; CHECK-NEXT:    [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]]
-; CHECK-NEXT:    br i1 [[PROL_ITER_CMP]], label [[HEADER_PROL]], label [[HEADER_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK-NEXT:    br i1 [[PROL_ITER_CMP]], label [[HEADER_PROL]], label [[HEADER_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP12:![0-9]+]]
 ; CHECK:       header.prol.loopexit.unr-lcssa:
 ; CHECK-NEXT:    [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL]], [[LATCH_PROL]] ]
 ; CHECK-NEXT:    br label [[HEADER_PROL_LOOPEXIT]]
@@ -709,7 +815,7 @@ define void @test6(i64 %start) {
 ; CHECK:       latch.3:
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_3]] = add nsw i64 [[INDVARS_IV]], 8
 ; CHECK-NEXT:    [[TMP9:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT_3]], 616
-; CHECK-NEXT:    br i1 [[TMP9]], label [[HEADER]], label [[LATCHEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK-NEXT:    br i1 [[TMP9]], label [[HEADER]], label [[LATCHEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP13:![0-9]+]]
 ; CHECK:       latchexit.unr-lcssa:
 ; CHECK-NEXT:    br label [[LATCHEXIT]]
 ; CHECK:       latchexit:

>From d49e403aefcbb1dbf794777d7d82ccc6ff4fe334 Mon Sep 17 00:00:00 2001
From: Marek Sedlacek <msedlacek at azul.com>
Date: Thu, 3 Jul 2025 14:23:24 +0000
Subject: [PATCH 2/3] Fixed wording of a comment and debug; Removed UB and
 unused parameter from a test

---
 llvm/lib/Transforms/Utils/LoopUnroll.cpp      |  6 ++++--
 .../X86/runtime-unroll-after-rotate.ll        | 20 +++++++++----------
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index 17bf8816c888a..dacfaec56e541 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -488,11 +488,13 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
   if (ULO.Runtime && SE) {
     BasicBlock *OrigHeader = L->getHeader();
     BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
-    // Rotate loop if it makes it countable (for later unrolling)
+    // Rotate loop if it makes the exit count from the latch computable (for
+    // later unrolling).
     if (BI && !BI->isUnconditional() &&
         isa<SCEVCouldNotCompute>(SE->getExitCount(L, L->getLoopLatch())) &&
         !isa<SCEVCouldNotCompute>(SE->getExitCount(L, OrigHeader))) {
-      LLVM_DEBUG(dbgs() << "  Rotating loop to make the loop countable.\n");
+      LLVM_DEBUG(
+          dbgs() << "  Rotating loop to make the exit count computable.\n");
       SimplifyQuery SQ{OrigHeader->getDataLayout()};
       SQ.TLI = nullptr;
       SQ.DT = DT;
diff --git a/llvm/test/Transforms/LoopUnroll/X86/runtime-unroll-after-rotate.ll b/llvm/test/Transforms/LoopUnroll/X86/runtime-unroll-after-rotate.ll
index 20803f1c95c08..7feb96bd3b855 100644
--- a/llvm/test/Transforms/LoopUnroll/X86/runtime-unroll-after-rotate.ll
+++ b/llvm/test/Transforms/LoopUnroll/X86/runtime-unroll-after-rotate.ll
@@ -2,15 +2,15 @@
 ; RUN: opt --passes=loop-unroll -unroll-runtime-other-exit-predictable=1 -S %s | FileCheck %s
 target triple = "x86_64-unknown-linux-gnu"
 
-define void @test(i64 %0) #0 {
+define void @test(i64 %0, ptr %1) #0 {
 ; CHECK-LABEL: define void @test(
-; CHECK-SAME: i64 [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-SAME: i64 [[TMP0:%.*]], ptr [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[B1:%.*]] = icmp eq i64 [[TMP0]], 0
 ; CHECK-NEXT:    br i1 [[B1]], label %[[AFTER:.*]], label %[[BODY_LR_PH:.*]]
 ; CHECK:       [[BODY_LR_PH]]:
-; CHECK-NEXT:    [[TMP1:%.*]] = sub i64 0, [[TMP0]]
-; CHECK-NEXT:    [[TMP2:%.*]] = freeze i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP5:%.*]] = sub i64 0, [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = freeze i64 [[TMP5]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[TMP2]], -1
 ; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[TMP2]], 3
 ; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
@@ -21,7 +21,7 @@ define void @test(i64 %0) #0 {
 ; CHECK-NEXT:    [[A2_PROL:%.*]] = phi i64 [ [[TMP0]], %[[BODY_PROL_PREHEADER]] ], [ [[A_PROL:%.*]], %[[HEADER_PROL:.*]] ]
 ; CHECK-NEXT:    [[PROL_ITER:%.*]] = phi i64 [ 0, %[[BODY_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], %[[HEADER_PROL]] ]
 ; CHECK-NEXT:    [[C_PROL:%.*]] = add i64 [[A2_PROL]], 1
-; CHECK-NEXT:    [[D_PROL:%.*]] = load i32, ptr addrspace(1) null, align 4
+; CHECK-NEXT:    [[D_PROL:%.*]] = load i32, ptr [[TMP1]], align 4
 ; CHECK-NEXT:    [[E_PROL:%.*]] = icmp eq i32 [[D_PROL]], 0
 ; CHECK-NEXT:    br i1 [[E_PROL]], label %[[END_LOOPEXIT3:.*]], label %[[HEADER_PROL]]
 ; CHECK:       [[HEADER_PROL]]:
@@ -51,7 +51,7 @@ define void @test(i64 %0) #0 {
 ; CHECK-NEXT:    br i1 [[B_7]], label %[[HEADER_AFTER_CRIT_EDGE_UNR_LCSSA:.*]], label %[[BODY]]
 ; CHECK:       [[BODY]]:
 ; CHECK-NEXT:    [[A2]] = phi i64 [ [[A2_UNR]], %[[BODY_LR_PH_NEW]] ], [ [[C_7]], %[[HEADER_3]] ]
-; CHECK-NEXT:    [[D:%.*]] = load i32, ptr addrspace(1) null, align 4
+; CHECK-NEXT:    [[D:%.*]] = load i32, ptr [[TMP1]], align 4
 ; CHECK-NEXT:    [[E:%.*]] = icmp eq i32 [[D]], 0
 ; CHECK-NEXT:    br i1 [[E]], label %[[END_LOOPEXIT]], label %[[HEADER]]
 ; CHECK:       [[END_LOOPEXIT]]:
@@ -65,7 +65,7 @@ define void @test(i64 %0) #0 {
 ; CHECK:       [[HEADER_AFTER_CRIT_EDGE]]:
 ; CHECK-NEXT:    br label %[[AFTER]]
 ; CHECK:       [[AFTER]]:
-; CHECK-NEXT:    call void @foo(i32 0)
+; CHECK-NEXT:    call void @foo()
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -78,7 +78,7 @@ header:
 
 body:
   %c = add i64 %a, 1
-  %d = load i32, ptr addrspace(1) null, align 4
+  %d = load i32, ptr %1, align 4
   %e = icmp eq i32 %d, 0
   br i1 %e, label %end, label %header
 
@@ -86,11 +86,11 @@ end:
   ret void
 
 after:
-  call void @foo(i32 0)
+  call void @foo()
   ret void
 }
 
-declare void @foo(i32)
+declare void @foo()
 
 attributes #0 = { "tune-cpu"="generic" }
 ;.

>From 9d8cd0b913f24c69ce5761117601570ecec94281 Mon Sep 17 00:00:00 2001
From: Marek Sedlacek <msedlacek at azul.com>
Date: Mon, 7 Jul 2025 12:22:20 +0000
Subject: [PATCH 3/3] Added indication of loop rotation when no unroll happens

---
 llvm/include/llvm/Transforms/Utils/UnrollLoop.h |  3 +++
 llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp   |  5 +++--
 llvm/lib/Transforms/Utils/LoopUnroll.cpp        | 16 ++++++++++------
 3 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
index 765c613b04a44..3fe59cad879c9 100644
--- a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
+++ b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
@@ -59,6 +59,9 @@ enum class LoopUnrollResult {
   /// The loop was not modified.
   Unmodified,
 
+  /// The loop was modified, but not unrolled.
+  Modified,
+
   /// The loop was partially unrolled -- we still have a loop, but with a
   /// smaller trip count.  We may also have emitted epilogue loop if the loop
   /// had a non-constant trip count.
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index a22d84dcf014d..8b1ab5a9e2181 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -1357,8 +1357,9 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
   ULO.RuntimeUnrollMultiExit = UP.RuntimeUnrollMultiExit;
   LoopUnrollResult UnrollResult = UnrollLoop(
       L, ULO, LI, &SE, &DT, &AC, &TTI, &ORE, PreserveLCSSA, &RemainderLoop, AA);
-  if (UnrollResult == LoopUnrollResult::Unmodified)
-    return LoopUnrollResult::Unmodified;
+  if (UnrollResult == LoopUnrollResult::Unmodified ||
+      UnrollResult == LoopUnrollResult::Modified)
+    return UnrollResult;
 
   if (RemainderLoop) {
     std::optional<MDNode *> RemainderLoopID =
diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index dacfaec56e541..cfd422710b34a 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -485,6 +485,8 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
 
   assert(ULO.Count > 0);
 
+  LoopUnrollResult Result = LoopUnrollResult::Unmodified;
+
   if (ULO.Runtime && SE) {
     BasicBlock *OrigHeader = L->getHeader();
     BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
@@ -499,10 +501,12 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
       SQ.TLI = nullptr;
       SQ.DT = DT;
       SQ.AC = AC;
-      llvm::LoopRotation(L, LI, TTI, AC, DT, SE, nullptr /*MemorySSAUpdater*/,
-                         SQ, false /*RotationOnly*/, 16 /*Threshold*/,
-                         false /*IsUtilMode*/, false /*PrepareForLTO*/,
-                         [](Loop *, ScalarEvolution *) { return true; });
+      if (llvm::LoopRotation(L, LI, TTI, AC, DT, SE,
+                             nullptr /*MemorySSAUpdater*/, SQ,
+                             false /*RotationOnly*/, 16 /*Threshold*/,
+                             false /*IsUtilMode*/, false /*PrepareForLTO*/,
+                             [](Loop *, ScalarEvolution *) { return true; }))
+        Result = LoopUnrollResult::Modified;
     }
   }
 
@@ -599,7 +603,7 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
   if (!LatchBI || (LatchBI->isConditional() && !LatchIsExiting)) {
     LLVM_DEBUG(
         dbgs() << "Can't unroll; a conditional latch must exit the loop");
-    return LoopUnrollResult::Unmodified;
+    return Result;
   }
 
   assert((!ULO.Runtime || canHaveUnrollRemainder(L)) &&
@@ -620,7 +624,7 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
     else {
       LLVM_DEBUG(dbgs() << "Won't unroll; remainder loop could not be "
                            "generated when assuming runtime trip count\n");
-      return LoopUnrollResult::Unmodified;
+      return Result;
     }
   }
 



More information about the llvm-commits mailing list