[llvm] [LoopUnroll] Rotate loop to make it countable for runtime unrolling (PR #146540)

via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 1 08:01:31 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: Marek Sedláček (mark-sed)

<details>
<summary>Changes</summary>

This patch adds loop rotation to runtime loop unrolling, if this makes the loop countable, which then might enable additional unrolling of the loop.

This patch and POC was co-authored by @<!-- -->annamthomas.

In this implementation I have chosen to use a "filter function" for loop rotate to override existing profitability check which is unaware of the profitability for loop unroll. With this approach the filter function can also be later on used by some other pass/transformation.
Alternatively I was thinking about refactoring the existing loop rotation function to separate the legality check, profitability check and the rotation. One thing which would make this little bit ugly is that the loop rotation can be done in a loop when `MultiRotate` is enabled and for each cycle the legality and profitability check needs to be done.
I am open to any suggestions and also okay with keeping the current approach.

---

Patch is 38.98 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/146540.diff


5 Files Affected:

- (modified) llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h (+8-6) 
- (modified) llvm/lib/Transforms/Utils/LoopRotationUtils.cpp (+16-12) 
- (modified) llvm/lib/Transforms/Utils/LoopUnroll.cpp (+22-2) 
- (added) llvm/test/Transforms/LoopUnroll/X86/runtime-unroll-after-rotate.ll (+99) 
- (modified) llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll (+189-83) 


``````````diff
diff --git a/llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h b/llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h
index c3643e0f27f94..b1d3b9dd4792e 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h
@@ -13,6 +13,7 @@
 #ifndef LLVM_TRANSFORMS_UTILS_LOOPROTATIONUTILS_H
 #define LLVM_TRANSFORMS_UTILS_LOOPROTATIONUTILS_H
 
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Compiler.h"
 
 namespace llvm {
@@ -32,12 +33,13 @@ class TargetTransformInfo;
 /// header. If the loop header's size exceeds the threshold, the loop rotation
 /// will give up. The flag IsUtilMode controls the heuristic used in the
 /// LoopRotation. If it is true, the profitability heuristic will be ignored.
-LLVM_ABI bool LoopRotation(Loop *L, LoopInfo *LI,
-                           const TargetTransformInfo *TTI, AssumptionCache *AC,
-                           DominatorTree *DT, ScalarEvolution *SE,
-                           MemorySSAUpdater *MSSAU, const SimplifyQuery &SQ,
-                           bool RotationOnly, unsigned Threshold,
-                           bool IsUtilMode, bool PrepareForLTO = false);
+LLVM_ABI bool LoopRotation(
+    Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI, AssumptionCache *AC,
+    DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
+    const SimplifyQuery &SQ, bool RotationOnly, unsigned Threshold,
+    bool IsUtilMode, bool PrepareForLTO = false,
+    function_ref<bool(Loop *, ScalarEvolution *)> profitabilityCheck =
+        [](Loop *, ScalarEvolution *) { return false; });
 
 } // namespace llvm
 
diff --git a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
index 66d0573e83f65..d8fa24347f3a9 100644
--- a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
@@ -69,16 +69,19 @@ class LoopRotate {
   bool RotationOnly;
   bool IsUtilMode;
   bool PrepareForLTO;
+  function_ref<bool(Loop *, ScalarEvolution *)> profitabilityCheck;
 
 public:
   LoopRotate(unsigned MaxHeaderSize, LoopInfo *LI,
              const TargetTransformInfo *TTI, AssumptionCache *AC,
              DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
              const SimplifyQuery &SQ, bool RotationOnly, bool IsUtilMode,
-             bool PrepareForLTO)
+             bool PrepareForLTO,
+             function_ref<bool(Loop *, ScalarEvolution *)> profitabilityCheck)
       : MaxHeaderSize(MaxHeaderSize), LI(LI), TTI(TTI), AC(AC), DT(DT), SE(SE),
         MSSAU(MSSAU), SQ(SQ), RotationOnly(RotationOnly),
-        IsUtilMode(IsUtilMode), PrepareForLTO(PrepareForLTO) {}
+        IsUtilMode(IsUtilMode), PrepareForLTO(PrepareForLTO),
+        profitabilityCheck(profitabilityCheck) {}
   bool processLoop(Loop *L);
 
 private:
@@ -440,9 +443,9 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
 
     // Rotate if either the loop latch does *not* exit the loop, or if the loop
     // latch was just simplified. Or if we think it will be profitable.
-    if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch && IsUtilMode == false &&
-        !profitableToRotateLoopExitingLatch(L) &&
-        !canRotateDeoptimizingLatchExit(L))
+    if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch &&
+        IsUtilMode == false && !profitableToRotateLoopExitingLatch(L) &&
+        !canRotateDeoptimizingLatchExit(L) && !profitabilityCheck(L, SE))
       return Rotated;
 
     // Check size of original header and reject loop if it is very big or we can't
@@ -1053,13 +1056,14 @@ bool LoopRotate::processLoop(Loop *L) {
 
 
 /// The utility to convert a loop into a loop with bottom test.
-bool llvm::LoopRotation(Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI,
-                        AssumptionCache *AC, DominatorTree *DT,
-                        ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
-                        const SimplifyQuery &SQ, bool RotationOnly = true,
-                        unsigned Threshold = unsigned(-1),
-                        bool IsUtilMode = true, bool PrepareForLTO) {
+bool llvm::LoopRotation(
+    Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI, AssumptionCache *AC,
+    DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
+    const SimplifyQuery &SQ, bool RotationOnly = true,
+    unsigned Threshold = unsigned(-1), bool IsUtilMode = true,
+    bool PrepareForLTO,
+    function_ref<bool(Loop *, ScalarEvolution *)> profitabilityCheck) {
   LoopRotate LR(Threshold, LI, TTI, AC, DT, SE, MSSAU, SQ, RotationOnly,
-                IsUtilMode, PrepareForLTO);
+                IsUtilMode, PrepareForLTO, profitabilityCheck);
   return LR.processLoop(L);
 }
diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index 86b268de43cf6..17bf8816c888a 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -58,6 +58,7 @@
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopRotationUtils.h"
 #include "llvm/Transforms/Utils/LoopSimplify.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
@@ -484,8 +485,27 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
 
   assert(ULO.Count > 0);
 
-  // All these values should be taken only after peeling because they might have
-  // changed.
+  if (ULO.Runtime && SE) {
+    BasicBlock *OrigHeader = L->getHeader();
+    BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
+    // Rotate loop if it makes it countable (for later unrolling)
+    if (BI && !BI->isUnconditional() &&
+        isa<SCEVCouldNotCompute>(SE->getExitCount(L, L->getLoopLatch())) &&
+        !isa<SCEVCouldNotCompute>(SE->getExitCount(L, OrigHeader))) {
+      LLVM_DEBUG(dbgs() << "  Rotating loop to make the loop countable.\n");
+      SimplifyQuery SQ{OrigHeader->getDataLayout()};
+      SQ.TLI = nullptr;
+      SQ.DT = DT;
+      SQ.AC = AC;
+      llvm::LoopRotation(L, LI, TTI, AC, DT, SE, nullptr /*MemorySSAUpdater*/,
+                         SQ, false /*RotationOnly*/, 16 /*Threshold*/,
+                         false /*IsUtilMode*/, false /*PrepareForLTO*/,
+                         [](Loop *, ScalarEvolution *) { return true; });
+    }
+  }
+
+  // All these values should be taken only after peeling or loop rotation
+  // because they might have changed.
   BasicBlock *Preheader = L->getLoopPreheader();
   BasicBlock *Header = L->getHeader();
   BasicBlock *LatchBlock = L->getLoopLatch();
diff --git a/llvm/test/Transforms/LoopUnroll/X86/runtime-unroll-after-rotate.ll b/llvm/test/Transforms/LoopUnroll/X86/runtime-unroll-after-rotate.ll
new file mode 100644
index 0000000000000..20803f1c95c08
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/X86/runtime-unroll-after-rotate.ll
@@ -0,0 +1,99 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt --passes=loop-unroll -unroll-runtime-other-exit-predictable=1 -S %s | FileCheck %s
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @test(i64 %0) #0 {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: i64 [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[B1:%.*]] = icmp eq i64 [[TMP0]], 0
+; CHECK-NEXT:    br i1 [[B1]], label %[[AFTER:.*]], label %[[BODY_LR_PH:.*]]
+; CHECK:       [[BODY_LR_PH]]:
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i64 0, [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = freeze i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[TMP2]], -1
+; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[TMP2]], 3
+; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT:    br i1 [[LCMP_MOD]], label %[[BODY_PROL_PREHEADER:.*]], label %[[BODY_PROL_LOOPEXIT:.*]]
+; CHECK:       [[BODY_PROL_PREHEADER]]:
+; CHECK-NEXT:    br label %[[BODY_PROL:.*]]
+; CHECK:       [[BODY_PROL]]:
+; CHECK-NEXT:    [[A2_PROL:%.*]] = phi i64 [ [[TMP0]], %[[BODY_PROL_PREHEADER]] ], [ [[A_PROL:%.*]], %[[HEADER_PROL:.*]] ]
+; CHECK-NEXT:    [[PROL_ITER:%.*]] = phi i64 [ 0, %[[BODY_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], %[[HEADER_PROL]] ]
+; CHECK-NEXT:    [[C_PROL:%.*]] = add i64 [[A2_PROL]], 1
+; CHECK-NEXT:    [[D_PROL:%.*]] = load i32, ptr addrspace(1) null, align 4
+; CHECK-NEXT:    [[E_PROL:%.*]] = icmp eq i32 [[D_PROL]], 0
+; CHECK-NEXT:    br i1 [[E_PROL]], label %[[END_LOOPEXIT3:.*]], label %[[HEADER_PROL]]
+; CHECK:       [[HEADER_PROL]]:
+; CHECK-NEXT:    [[A_PROL]] = phi i64 [ [[C_PROL]], %[[BODY_PROL]] ]
+; CHECK-NEXT:    [[B_PROL:%.*]] = icmp eq i64 [[A_PROL]], 0
+; CHECK-NEXT:    [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
+; CHECK-NEXT:    [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]]
+; CHECK-NEXT:    br i1 [[PROL_ITER_CMP]], label %[[BODY_PROL]], label %[[BODY_PROL_LOOPEXIT_UNR_LCSSA:.*]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       [[BODY_PROL_LOOPEXIT_UNR_LCSSA]]:
+; CHECK-NEXT:    [[A2_UNR_PH:%.*]] = phi i64 [ [[A_PROL]], %[[HEADER_PROL]] ]
+; CHECK-NEXT:    br label %[[BODY_PROL_LOOPEXIT]]
+; CHECK:       [[BODY_PROL_LOOPEXIT]]:
+; CHECK-NEXT:    [[A2_UNR:%.*]] = phi i64 [ [[TMP0]], %[[BODY_LR_PH]] ], [ [[A2_UNR_PH]], %[[BODY_PROL_LOOPEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 3
+; CHECK-NEXT:    br i1 [[TMP4]], label %[[HEADER_AFTER_CRIT_EDGE:.*]], label %[[BODY_LR_PH_NEW:.*]]
+; CHECK:       [[BODY_LR_PH_NEW]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[HEADER:.*]]:
+; CHECK-NEXT:    br i1 false, label %[[END_LOOPEXIT:.*]], label %[[HEADER_1:.*]]
+; CHECK:       [[HEADER_1]]:
+; CHECK-NEXT:    br i1 false, label %[[END_LOOPEXIT]], label %[[HEADER_2:.*]]
+; CHECK:       [[HEADER_2]]:
+; CHECK-NEXT:    [[C_7:%.*]] = add i64 [[A2:%.*]], 4
+; CHECK-NEXT:    br i1 false, label %[[END_LOOPEXIT]], label %[[HEADER_3:.*]]
+; CHECK:       [[HEADER_3]]:
+; CHECK-NEXT:    [[B_7:%.*]] = icmp eq i64 [[C_7]], 0
+; CHECK-NEXT:    br i1 [[B_7]], label %[[HEADER_AFTER_CRIT_EDGE_UNR_LCSSA:.*]], label %[[BODY]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[A2]] = phi i64 [ [[A2_UNR]], %[[BODY_LR_PH_NEW]] ], [ [[C_7]], %[[HEADER_3]] ]
+; CHECK-NEXT:    [[D:%.*]] = load i32, ptr addrspace(1) null, align 4
+; CHECK-NEXT:    [[E:%.*]] = icmp eq i32 [[D]], 0
+; CHECK-NEXT:    br i1 [[E]], label %[[END_LOOPEXIT]], label %[[HEADER]]
+; CHECK:       [[END_LOOPEXIT]]:
+; CHECK-NEXT:    br label %[[END:.*]]
+; CHECK:       [[END_LOOPEXIT3]]:
+; CHECK-NEXT:    br label %[[END]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret void
+; CHECK:       [[HEADER_AFTER_CRIT_EDGE_UNR_LCSSA]]:
+; CHECK-NEXT:    br label %[[HEADER_AFTER_CRIT_EDGE]]
+; CHECK:       [[HEADER_AFTER_CRIT_EDGE]]:
+; CHECK-NEXT:    br label %[[AFTER]]
+; CHECK:       [[AFTER]]:
+; CHECK-NEXT:    call void @foo(i32 0)
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %header
+
+header:
+  %a = phi i64 [ %0, %entry ], [ %c, %body ]
+  %b = icmp eq i64 %a, 0
+  br i1 %b, label %after, label %body
+
+body:
+  %c = add i64 %a, 1
+  %d = load i32, ptr addrspace(1) null, align 4
+  %e = icmp eq i32 %d, 0
+  br i1 %e, label %end, label %header
+
+end:
+  ret void
+
+after:
+  call void @foo(i32 0)
+  ret void
+}
+
+declare void @foo(i32)
+
+attributes #0 = { "tune-cpu"="generic" }
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.unroll.disable"}
+;.
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll
index de54852313456..b079abefaea65 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll
@@ -16,48 +16,86 @@ define i64 @test1() {
 ; CHECK-NEXT:    br label [[PREHEADER:%.*]]
 ; CHECK:       preheader:
 ; CHECK-NEXT:    [[TRIP:%.*]] = zext i32 undef to i64
+; CHECK-NEXT:    br i1 false, label [[LATCH_LR_PH:%.*]], label [[HEADEREXIT:%.*]]
+; CHECK:       latch.lr.ph:
+; CHECK-NEXT:    [[TMP0:%.*]] = add nsw i64 [[TRIP]], -5
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i64 [[TMP0]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = add nuw i64 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP3:%.*]] = freeze i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[TMP3]], -1
+; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[TMP3]], 3
+; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[LATCH_PROL_PREHEADER:%.*]], label [[LATCH_PROL_LOOPEXIT:%.*]]
+; CHECK:       latch.prol.preheader:
 ; CHECK-NEXT:    br label [[HEADER:%.*]]
-; CHECK:       header:
-; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 2, [[PREHEADER]] ], [ [[ADD_IV_3:%.*]], [[LATCH_3:%.*]] ]
-; CHECK-NEXT:    [[ADD_IV:%.*]] = add nuw nsw i64 [[IV]], 2
+; CHECK:       latch.prol:
+; CHECK-NEXT:    [[ADD_IV1_PROL:%.*]] = phi i64 [ 4, [[LATCH_PROL_PREHEADER]] ], [ [[ADD_IV:%.*]], [[HEADER_PROL:%.*]] ]
+; CHECK-NEXT:    [[PROL_ITER:%.*]] = phi i64 [ 0, [[LATCH_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], [[HEADER_PROL]] ]
+; CHECK-NEXT:    [[SHFT_PROL:%.*]] = ashr i64 [[ADD_IV1_PROL]], 1
+; CHECK-NEXT:    [[CMP2_PROL:%.*]] = icmp ult i64 [[SHFT_PROL]], [[TRIP]]
+; CHECK-NEXT:    br i1 [[CMP2_PROL]], label [[HEADER_PROL]], label [[LATCHEXIT_LOOPEXIT2:%.*]]
+; CHECK:       header.prol:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[ADD_IV1_PROL]], [[HEADER]] ]
+; CHECK-NEXT:    [[ADD_IV]] = add nuw nsw i64 [[IV]], 2
 ; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult i64 [[ADD_IV]], [[TRIP]]
-; CHECK-NEXT:    br i1 [[CMP1]], label [[LATCH:%.*]], label [[HEADEREXIT:%.*]]
-; CHECK:       latch:
-; CHECK-NEXT:    [[SHFT:%.*]] = ashr i64 [[ADD_IV]], 1
+; CHECK-NEXT:    [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
+; CHECK-NEXT:    [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]]
+; CHECK-NEXT:    br i1 [[PROL_ITER_CMP]], label [[HEADER]], label [[LATCH_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       latch.prol.loopexit.unr-lcssa:
+; CHECK-NEXT:    [[ADD_IV1_UNR_PH:%.*]] = phi i64 [ [[ADD_IV]], [[HEADER_PROL]] ]
+; CHECK-NEXT:    [[SPLIT_UNR_PH:%.*]] = phi i64 [ [[ADD_IV]], [[HEADER_PROL]] ]
+; CHECK-NEXT:    br label [[LATCH_PROL_LOOPEXIT]]
+; CHECK:       latch.prol.loopexit:
+; CHECK-NEXT:    [[ADD_IV1_UNR:%.*]] = phi i64 [ 4, [[LATCH_LR_PH]] ], [ [[ADD_IV1_UNR_PH]], [[LATCH_PROL_LOOPEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[SPLIT_UNR:%.*]] = phi i64 [ poison, [[LATCH_LR_PH]] ], [ [[SPLIT_UNR_PH]], [[LATCH_PROL_LOOPEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 3
+; CHECK-NEXT:    br i1 [[TMP5]], label [[HEADER_HEADEREXIT_CRIT_EDGE:%.*]], label [[LATCH_LR_PH_NEW:%.*]]
+; CHECK:       latch.lr.ph.new:
+; CHECK-NEXT:    br label [[LATCH:%.*]]
+; CHECK:       header:
+; CHECK-NEXT:    [[ADD_IV2:%.*]] = add nuw nsw i64 [[ADD_IV1:%.*]], 2
+; CHECK-NEXT:    [[SHFT:%.*]] = ashr i64 [[ADD_IV2]], 1
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp ult i64 [[SHFT]], [[TRIP]]
 ; CHECK-NEXT:    br i1 [[CMP2]], label [[HEADER_1:%.*]], label [[LATCHEXIT:%.*]]
 ; CHECK:       header.1:
-; CHECK-NEXT:    [[ADD_IV_1:%.*]] = add nuw nsw i64 [[IV]], 4
-; CHECK-NEXT:    [[CMP1_1:%.*]] = icmp ult i64 [[ADD_IV_1]], [[TRIP]]
-; CHECK-NEXT:    br i1 [[CMP1_1]], label [[LATCH_1:%.*]], label [[HEADEREXIT]]
-; CHECK:       latch.1:
+; CHECK-NEXT:    [[ADD_IV_1:%.*]] = add nuw nsw i64 [[ADD_IV1]], 4
 ; CHECK-NEXT:    [[SHFT_1:%.*]] = ashr i64 [[ADD_IV_1]], 1
 ; CHECK-NEXT:    [[CMP2_1:%.*]] = icmp ult i64 [[SHFT_1]], [[TRIP]]
 ; CHECK-NEXT:    br i1 [[CMP2_1]], label [[HEADER_2:%.*]], label [[LATCHEXIT]]
 ; CHECK:       header.2:
-; CHECK-NEXT:    [[ADD_IV_2:%.*]] = add nuw nsw i64 [[IV]], 6
-; CHECK-NEXT:    [[CMP1_2:%.*]] = icmp ult i64 [[ADD_IV_2]], [[TRIP]]
-; CHECK-NEXT:    br i1 [[CMP1_2]], label [[LATCH_2:%.*]], label [[HEADEREXIT]]
-; CHECK:       latch.2:
+; CHECK-NEXT:    [[ADD_IV_2:%.*]] = add nuw nsw i64 [[ADD_IV1]], 6
 ; CHECK-NEXT:    [[SHFT_2:%.*]] = ashr i64 [[ADD_IV_2]], 1
 ; CHECK-NEXT:    [[CMP2_2:%.*]] = icmp ult i64 [[SHFT_2]], [[TRIP]]
 ; CHECK-NEXT:    br i1 [[CMP2_2]], label [[HEADER_3:%.*]], label [[LATCHEXIT]]
 ; CHECK:       header.3:
-; CHECK-NEXT:    [[ADD_IV_3]] = add nuw nsw i64 [[IV]], 8
+; CHECK-NEXT:    [[ADD_IV_3:%.*]] = add nuw nsw i64 [[ADD_IV1]], 8
 ; CHECK-NEXT:    [[CMP1_3:%.*]] = icmp ult i64 [[ADD_IV_3]], [[TRIP]]
-; CHECK-NEXT:    br i1 [[CMP1_3]], label [[LATCH_3]], label [[HEADEREXIT]]
-; CHECK:       latch.3:
-; CHECK-NEXT:    [[SHFT_3:%.*]] = ashr i64 [[ADD_IV_3]], 1
+; CHECK-NEXT:    br i1 [[CMP1_3]], label [[LATCH]], label [[HEADER_HEADEREXIT_CRIT_EDGE_UNR_LCSSA:%.*]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK:       latch:
+; CHECK-NEXT:    [[ADD_IV1]] = phi i64 [ [[ADD_IV1_UNR]], [[LATCH_LR_PH_NEW]] ], [ [[ADD_IV_3]], [[HEADER_3]] ]
+; CHECK-NEXT:    [[SHFT_3:%.*]] = ashr i64 [[ADD_IV1]], 1
 ; CHECK-NEXT:    [[CMP2_3:%.*]] = icmp ult i64 [[SHFT_3]], [[TRIP]]
-; CHECK-NEXT:    br i1 [[CMP2_3]], label [[HEADER]], label [[LATCHEXIT]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT:    br i1 [[CMP2_3]], label [[HEADER1:%.*]], label [[LATCHEXIT]]
+; CHECK:       header.headerexit_crit_edge.unr-lcssa:
+; CHECK-NEXT:    [[SPLIT_PH:%.*]] = phi i64 [ [[ADD_IV_3]], [[HEADER_3]] ]
+; CHECK-NEXT:    br label [[HEADER_HEADEREXIT_CRIT_EDGE]]
+; CHECK:       header.headerexit_crit_edge:
+; CHECK-NEXT:    [[SPLIT:%.*]] = phi i64 [ [[SPLIT_UNR]], [[LATCH_PROL_LOOPEXIT]] ], [ [[SPLIT_PH]], [[HEADER_HEADEREXIT_CRIT_EDGE_UNR_LCSSA]] ]
+; CHECK-NEXT:    br label [[HEADEREXIT]]
 ; CHECK:       headerexit:
-; CHECK-NEXT:    [[ADDPHI:%.*]] = phi i64 [ [[ADD_IV]], [[HEADER]] ], [ [[ADD_IV_1]], [[HEADER_1]] ], [ [[ADD_IV_2]], [[HEADER_2]] ], [ [[ADD_IV_3]], [[HEADER_3]] ]
+; CHECK-NEXT:    [[ADDPHI:%.*]] = phi i64 [ [[SPLIT]], [[HEADER_HEADEREXIT_CRIT_EDGE]] ], [ 4, [[PREHEADER]] ]
+; CHECK-NEXT:    br label [[MERGEDEXIT1:%.*]]
+; CHECK:       latchexit.loopexit:
+; CHECK-NEXT:    [[SHFTPHI_PH:%.*]] = phi i64 [ [[SHFT_3]], [[LATCH]] ], [ [[SHFT]], [[HEADER1]] ], [ [[SHFT_1]], [[HEADER_1]] ], [ [[SHFT_2]], [[HEADER_2]] ]
 ; CHECK-NEXT:    br label [[MERGEDEXIT:%.*]]
-; CHECK:       latchexit:
-; CHECK-NEXT:    [[SHFTPHI:%.*]] = phi i64 [ [[SHFT]], [[LATCH]] ], [ [[SHFT_1]], [[LATCH_1]] ], [ [[SHFT_2]], [[LATCH_2]] ], [ [[SHFT_3]], [[LATCH_3]] ]
+; CHECK:       latchexit.loopexit2:
+; CHECK-NEXT:    [[SHFTPHI_PH3:%.*]] = phi i64 [ [[SHFT_PROL]], [[HEADER]] ]
 ; CHECK-NEXT:    br label [[MERGEDEXIT]]
+; CHECK:       latchexit:
+; CHECK-NEXT:    [[SHFTPHI:%.*]] = phi i64 [ [[SHFTPHI_PH]], [[LATCHEXIT]] ], [ [[SHFTPHI_PH3]], [[LATCHEXIT_LOOPEXIT2]] ]
+; CHECK-NEXT:    br label [[MERGEDEXIT1]]
 ; CHECK:       mergedexit:
-; CHECK-NEXT:    [[RETVAL:%.*]] = phi i64 [ [[ADDPHI]], [[HEADEREXIT]] ], [ [[SHFTPHI]], [[LATCHEXIT]] ]
+; CHECK-NEXT:    [[RETVAL:%.*]] = phi i64 [ [[ADDPHI]], [[HEADEREXIT]] ], [ [[SHFTPHI]], [[MERGEDEXIT]] ]
 ; CHECK-NEXT:    ret i64 [[RETVAL]]
 ;
 entry:
@@ -98,42 +136,75 @@ define  void @test2(i1 %cond, i32 %n) {
 ; CHECK-NEXT:    br i1 [[COND:%.*]], label [[PREHEADER:%.*]], label [[MERGEDEXIT:%.*]]
 ; CHECK:       preheader:
 ; CHECK-NEXT:    [[TRIP:%.*]] = zext i32 [[N:%.*]] to i64
+; CHECK-NEXT:    [[CMP11:%.*]] = icmp ult i64 4, [[TRIP]]
+; CHECK-NEXT:    br i1 [[CMP11]], label [[LATCH_LR_PH:%.*]], label [[HEADEREXIT:%.*]]
+; CHECK:       latch.lr.ph:
+; CHECK-NEXT:    [[TMP0:%.*]] = add nsw i64 [[TRIP]], -5
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i64 [[TMP0]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = add nuw i64 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP3:%.*]] = freeze i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[TMP3]], -1
+; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[TMP3]], 3
+; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[LATCH_PROL_PREHEADER:%.*]], label [[LATCH_PROL_LOOPEXIT:%.*]]
+; CHECK:       latch.prol.preheader:
 ; CHECK-NEXT:    br label [[HEADER:%.*]]
-; CHECK:       header:
-; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 2, [[PREHEADER]] ], [ [[ADD_IV_3:%.*]], [[LATCH_3:%.*]] ]
-; CHECK-NEXT:    [[ADD_IV:%.*]] = add nuw nsw i64 [[IV]], 2
+; CHECK:       latch.prol:
+; CHECK-NEXT:    [[ADD_IV2_PROL:%.*]] = phi i64 [ 4, [[LATCH_PROL_PREHEADER]] ], [ [[ADD_IV:%.*]...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/146540


More information about the llvm-commits mailing list