[llvm] [LoopUnroll] Rotate loop to make it countable for runtime unrolling (PR #146540)
Marek Sedláček via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 10 06:05:44 PDT 2025
https://github.com/mark-sed updated https://github.com/llvm/llvm-project/pull/146540
>From e5aabbdfc9bb75d0ff8855cebc933d8057c64345 Mon Sep 17 00:00:00 2001
From: Marek Sedlacek <msedlacek at azul.com>
Date: Tue, 1 Jul 2025 14:45:50 +0000
Subject: [PATCH 1/4] This patch adds loop rotation to runtime loop unrolling
if this makes the loop countable, which then might enable additional
unrolling of the loop.
---
.../llvm/Transforms/Utils/LoopRotationUtils.h | 14 +-
.../Transforms/Utils/LoopRotationUtils.cpp | 28 +-
llvm/lib/Transforms/Utils/LoopUnroll.cpp | 24 +-
.../X86/runtime-unroll-after-rotate.ll | 99 +++++++
.../runtime-loop-multiexit-dom-verify.ll | 272 ++++++++++++------
5 files changed, 334 insertions(+), 103 deletions(-)
create mode 100644 llvm/test/Transforms/LoopUnroll/X86/runtime-unroll-after-rotate.ll
diff --git a/llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h b/llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h
index c3643e0f27f94..b1d3b9dd4792e 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h
@@ -13,6 +13,7 @@
#ifndef LLVM_TRANSFORMS_UTILS_LOOPROTATIONUTILS_H
#define LLVM_TRANSFORMS_UTILS_LOOPROTATIONUTILS_H
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/Compiler.h"
namespace llvm {
@@ -32,12 +33,13 @@ class TargetTransformInfo;
/// header. If the loop header's size exceeds the threshold, the loop rotation
/// will give up. The flag IsUtilMode controls the heuristic used in the
/// LoopRotation. If it is true, the profitability heuristic will be ignored.
-LLVM_ABI bool LoopRotation(Loop *L, LoopInfo *LI,
- const TargetTransformInfo *TTI, AssumptionCache *AC,
- DominatorTree *DT, ScalarEvolution *SE,
- MemorySSAUpdater *MSSAU, const SimplifyQuery &SQ,
- bool RotationOnly, unsigned Threshold,
- bool IsUtilMode, bool PrepareForLTO = false);
+LLVM_ABI bool LoopRotation(
+ Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI, AssumptionCache *AC,
+ DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
+ const SimplifyQuery &SQ, bool RotationOnly, unsigned Threshold,
+ bool IsUtilMode, bool PrepareForLTO = false,
+ function_ref<bool(Loop *, ScalarEvolution *)> profitabilityCheck =
+ [](Loop *, ScalarEvolution *) { return false; });
} // namespace llvm
diff --git a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
index 66d0573e83f65..d8fa24347f3a9 100644
--- a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
@@ -69,16 +69,19 @@ class LoopRotate {
bool RotationOnly;
bool IsUtilMode;
bool PrepareForLTO;
+ function_ref<bool(Loop *, ScalarEvolution *)> profitabilityCheck;
public:
LoopRotate(unsigned MaxHeaderSize, LoopInfo *LI,
const TargetTransformInfo *TTI, AssumptionCache *AC,
DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
const SimplifyQuery &SQ, bool RotationOnly, bool IsUtilMode,
- bool PrepareForLTO)
+ bool PrepareForLTO,
+ function_ref<bool(Loop *, ScalarEvolution *)> profitabilityCheck)
: MaxHeaderSize(MaxHeaderSize), LI(LI), TTI(TTI), AC(AC), DT(DT), SE(SE),
MSSAU(MSSAU), SQ(SQ), RotationOnly(RotationOnly),
- IsUtilMode(IsUtilMode), PrepareForLTO(PrepareForLTO) {}
+ IsUtilMode(IsUtilMode), PrepareForLTO(PrepareForLTO),
+ profitabilityCheck(profitabilityCheck) {}
bool processLoop(Loop *L);
private:
@@ -440,9 +443,9 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// Rotate if either the loop latch does *not* exit the loop, or if the loop
// latch was just simplified. Or if we think it will be profitable.
- if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch && IsUtilMode == false &&
- !profitableToRotateLoopExitingLatch(L) &&
- !canRotateDeoptimizingLatchExit(L))
+ if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch &&
+ IsUtilMode == false && !profitableToRotateLoopExitingLatch(L) &&
+ !canRotateDeoptimizingLatchExit(L) && !profitabilityCheck(L, SE))
return Rotated;
// Check size of original header and reject loop if it is very big or we can't
@@ -1053,13 +1056,14 @@ bool LoopRotate::processLoop(Loop *L) {
/// The utility to convert a loop into a loop with bottom test.
-bool llvm::LoopRotation(Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI,
- AssumptionCache *AC, DominatorTree *DT,
- ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
- const SimplifyQuery &SQ, bool RotationOnly = true,
- unsigned Threshold = unsigned(-1),
- bool IsUtilMode = true, bool PrepareForLTO) {
+bool llvm::LoopRotation(
+ Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI, AssumptionCache *AC,
+ DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
+ const SimplifyQuery &SQ, bool RotationOnly = true,
+ unsigned Threshold = unsigned(-1), bool IsUtilMode = true,
+ bool PrepareForLTO,
+ function_ref<bool(Loop *, ScalarEvolution *)> profitabilityCheck) {
LoopRotate LR(Threshold, LI, TTI, AC, DT, SE, MSSAU, SQ, RotationOnly,
- IsUtilMode, PrepareForLTO);
+ IsUtilMode, PrepareForLTO, profitabilityCheck);
return LR.processLoop(L);
}
diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index 86b268de43cf6..17bf8816c888a 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -58,6 +58,7 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopRotationUtils.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
@@ -484,8 +485,27 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
assert(ULO.Count > 0);
- // All these values should be taken only after peeling because they might have
- // changed.
+ if (ULO.Runtime && SE) {
+ BasicBlock *OrigHeader = L->getHeader();
+ BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
+ // Rotate loop if it makes it countable (for later unrolling)
+ if (BI && !BI->isUnconditional() &&
+ isa<SCEVCouldNotCompute>(SE->getExitCount(L, L->getLoopLatch())) &&
+ !isa<SCEVCouldNotCompute>(SE->getExitCount(L, OrigHeader))) {
+ LLVM_DEBUG(dbgs() << " Rotating loop to make the loop countable.\n");
+ SimplifyQuery SQ{OrigHeader->getDataLayout()};
+ SQ.TLI = nullptr;
+ SQ.DT = DT;
+ SQ.AC = AC;
+ llvm::LoopRotation(L, LI, TTI, AC, DT, SE, nullptr /*MemorySSAUpdater*/,
+ SQ, false /*RotationOnly*/, 16 /*Threshold*/,
+ false /*IsUtilMode*/, false /*PrepareForLTO*/,
+ [](Loop *, ScalarEvolution *) { return true; });
+ }
+ }
+
+ // All these values should be taken only after peeling or loop rotation
+ // because they might have changed.
BasicBlock *Preheader = L->getLoopPreheader();
BasicBlock *Header = L->getHeader();
BasicBlock *LatchBlock = L->getLoopLatch();
diff --git a/llvm/test/Transforms/LoopUnroll/X86/runtime-unroll-after-rotate.ll b/llvm/test/Transforms/LoopUnroll/X86/runtime-unroll-after-rotate.ll
new file mode 100644
index 0000000000000..20803f1c95c08
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/X86/runtime-unroll-after-rotate.ll
@@ -0,0 +1,99 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt --passes=loop-unroll -unroll-runtime-other-exit-predictable=1 -S %s | FileCheck %s
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @test(i64 %0) #0 {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: i64 [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[B1:%.*]] = icmp eq i64 [[TMP0]], 0
+; CHECK-NEXT: br i1 [[B1]], label %[[AFTER:.*]], label %[[BODY_LR_PH:.*]]
+; CHECK: [[BODY_LR_PH]]:
+; CHECK-NEXT: [[TMP1:%.*]] = sub i64 0, [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = freeze i64 [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], -1
+; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP2]], 3
+; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT: br i1 [[LCMP_MOD]], label %[[BODY_PROL_PREHEADER:.*]], label %[[BODY_PROL_LOOPEXIT:.*]]
+; CHECK: [[BODY_PROL_PREHEADER]]:
+; CHECK-NEXT: br label %[[BODY_PROL:.*]]
+; CHECK: [[BODY_PROL]]:
+; CHECK-NEXT: [[A2_PROL:%.*]] = phi i64 [ [[TMP0]], %[[BODY_PROL_PREHEADER]] ], [ [[A_PROL:%.*]], %[[HEADER_PROL:.*]] ]
+; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ 0, %[[BODY_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], %[[HEADER_PROL]] ]
+; CHECK-NEXT: [[C_PROL:%.*]] = add i64 [[A2_PROL]], 1
+; CHECK-NEXT: [[D_PROL:%.*]] = load i32, ptr addrspace(1) null, align 4
+; CHECK-NEXT: [[E_PROL:%.*]] = icmp eq i32 [[D_PROL]], 0
+; CHECK-NEXT: br i1 [[E_PROL]], label %[[END_LOOPEXIT3:.*]], label %[[HEADER_PROL]]
+; CHECK: [[HEADER_PROL]]:
+; CHECK-NEXT: [[A_PROL]] = phi i64 [ [[C_PROL]], %[[BODY_PROL]] ]
+; CHECK-NEXT: [[B_PROL:%.*]] = icmp eq i64 [[A_PROL]], 0
+; CHECK-NEXT: [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
+; CHECK-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]]
+; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label %[[BODY_PROL]], label %[[BODY_PROL_LOOPEXIT_UNR_LCSSA:.*]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[BODY_PROL_LOOPEXIT_UNR_LCSSA]]:
+; CHECK-NEXT: [[A2_UNR_PH:%.*]] = phi i64 [ [[A_PROL]], %[[HEADER_PROL]] ]
+; CHECK-NEXT: br label %[[BODY_PROL_LOOPEXIT]]
+; CHECK: [[BODY_PROL_LOOPEXIT]]:
+; CHECK-NEXT: [[A2_UNR:%.*]] = phi i64 [ [[TMP0]], %[[BODY_LR_PH]] ], [ [[A2_UNR_PH]], %[[BODY_PROL_LOOPEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 3
+; CHECK-NEXT: br i1 [[TMP4]], label %[[HEADER_AFTER_CRIT_EDGE:.*]], label %[[BODY_LR_PH_NEW:.*]]
+; CHECK: [[BODY_LR_PH_NEW]]:
+; CHECK-NEXT: br label %[[BODY:.*]]
+; CHECK: [[HEADER:.*]]:
+; CHECK-NEXT: br i1 false, label %[[END_LOOPEXIT:.*]], label %[[HEADER_1:.*]]
+; CHECK: [[HEADER_1]]:
+; CHECK-NEXT: br i1 false, label %[[END_LOOPEXIT]], label %[[HEADER_2:.*]]
+; CHECK: [[HEADER_2]]:
+; CHECK-NEXT: [[C_7:%.*]] = add i64 [[A2:%.*]], 4
+; CHECK-NEXT: br i1 false, label %[[END_LOOPEXIT]], label %[[HEADER_3:.*]]
+; CHECK: [[HEADER_3]]:
+; CHECK-NEXT: [[B_7:%.*]] = icmp eq i64 [[C_7]], 0
+; CHECK-NEXT: br i1 [[B_7]], label %[[HEADER_AFTER_CRIT_EDGE_UNR_LCSSA:.*]], label %[[BODY]]
+; CHECK: [[BODY]]:
+; CHECK-NEXT: [[A2]] = phi i64 [ [[A2_UNR]], %[[BODY_LR_PH_NEW]] ], [ [[C_7]], %[[HEADER_3]] ]
+; CHECK-NEXT: [[D:%.*]] = load i32, ptr addrspace(1) null, align 4
+; CHECK-NEXT: [[E:%.*]] = icmp eq i32 [[D]], 0
+; CHECK-NEXT: br i1 [[E]], label %[[END_LOOPEXIT]], label %[[HEADER]]
+; CHECK: [[END_LOOPEXIT]]:
+; CHECK-NEXT: br label %[[END:.*]]
+; CHECK: [[END_LOOPEXIT3]]:
+; CHECK-NEXT: br label %[[END]]
+; CHECK: [[END]]:
+; CHECK-NEXT: ret void
+; CHECK: [[HEADER_AFTER_CRIT_EDGE_UNR_LCSSA]]:
+; CHECK-NEXT: br label %[[HEADER_AFTER_CRIT_EDGE]]
+; CHECK: [[HEADER_AFTER_CRIT_EDGE]]:
+; CHECK-NEXT: br label %[[AFTER]]
+; CHECK: [[AFTER]]:
+; CHECK-NEXT: call void @foo(i32 0)
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %header
+
+header:
+ %a = phi i64 [ %0, %entry ], [ %c, %body ]
+ %b = icmp eq i64 %a, 0
+ br i1 %b, label %after, label %body
+
+body:
+ %c = add i64 %a, 1
+ %d = load i32, ptr addrspace(1) null, align 4
+ %e = icmp eq i32 %d, 0
+ br i1 %e, label %end, label %header
+
+end:
+ ret void
+
+after:
+ call void @foo(i32 0)
+ ret void
+}
+
+declare void @foo(i32)
+
+attributes #0 = { "tune-cpu"="generic" }
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.unroll.disable"}
+;.
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll
index de54852313456..b079abefaea65 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll
@@ -16,48 +16,86 @@ define i64 @test1() {
; CHECK-NEXT: br label [[PREHEADER:%.*]]
; CHECK: preheader:
; CHECK-NEXT: [[TRIP:%.*]] = zext i32 undef to i64
+; CHECK-NEXT: br i1 false, label [[LATCH_LR_PH:%.*]], label [[HEADEREXIT:%.*]]
+; CHECK: latch.lr.ph:
+; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[TRIP]], -5
+; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1
+; CHECK-NEXT: [[TMP2:%.*]] = add nuw i64 [[TMP1]], 1
+; CHECK-NEXT: [[TMP3:%.*]] = freeze i64 [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP3]], -1
+; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP3]], 3
+; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[LATCH_PROL_PREHEADER:%.*]], label [[LATCH_PROL_LOOPEXIT:%.*]]
+; CHECK: latch.prol.preheader:
; CHECK-NEXT: br label [[HEADER:%.*]]
-; CHECK: header:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 2, [[PREHEADER]] ], [ [[ADD_IV_3:%.*]], [[LATCH_3:%.*]] ]
-; CHECK-NEXT: [[ADD_IV:%.*]] = add nuw nsw i64 [[IV]], 2
+; CHECK: latch.prol:
+; CHECK-NEXT: [[ADD_IV1_PROL:%.*]] = phi i64 [ 4, [[LATCH_PROL_PREHEADER]] ], [ [[ADD_IV:%.*]], [[HEADER_PROL:%.*]] ]
+; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ 0, [[LATCH_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], [[HEADER_PROL]] ]
+; CHECK-NEXT: [[SHFT_PROL:%.*]] = ashr i64 [[ADD_IV1_PROL]], 1
+; CHECK-NEXT: [[CMP2_PROL:%.*]] = icmp ult i64 [[SHFT_PROL]], [[TRIP]]
+; CHECK-NEXT: br i1 [[CMP2_PROL]], label [[HEADER_PROL]], label [[LATCHEXIT_LOOPEXIT2:%.*]]
+; CHECK: header.prol:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[ADD_IV1_PROL]], [[HEADER]] ]
+; CHECK-NEXT: [[ADD_IV]] = add nuw nsw i64 [[IV]], 2
; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i64 [[ADD_IV]], [[TRIP]]
-; CHECK-NEXT: br i1 [[CMP1]], label [[LATCH:%.*]], label [[HEADEREXIT:%.*]]
-; CHECK: latch:
-; CHECK-NEXT: [[SHFT:%.*]] = ashr i64 [[ADD_IV]], 1
+; CHECK-NEXT: [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
+; CHECK-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]]
+; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label [[HEADER]], label [[LATCH_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: latch.prol.loopexit.unr-lcssa:
+; CHECK-NEXT: [[ADD_IV1_UNR_PH:%.*]] = phi i64 [ [[ADD_IV]], [[HEADER_PROL]] ]
+; CHECK-NEXT: [[SPLIT_UNR_PH:%.*]] = phi i64 [ [[ADD_IV]], [[HEADER_PROL]] ]
+; CHECK-NEXT: br label [[LATCH_PROL_LOOPEXIT]]
+; CHECK: latch.prol.loopexit:
+; CHECK-NEXT: [[ADD_IV1_UNR:%.*]] = phi i64 [ 4, [[LATCH_LR_PH]] ], [ [[ADD_IV1_UNR_PH]], [[LATCH_PROL_LOOPEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT: [[SPLIT_UNR:%.*]] = phi i64 [ poison, [[LATCH_LR_PH]] ], [ [[SPLIT_UNR_PH]], [[LATCH_PROL_LOOPEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 3
+; CHECK-NEXT: br i1 [[TMP5]], label [[HEADER_HEADEREXIT_CRIT_EDGE:%.*]], label [[LATCH_LR_PH_NEW:%.*]]
+; CHECK: latch.lr.ph.new:
+; CHECK-NEXT: br label [[LATCH:%.*]]
+; CHECK: header:
+; CHECK-NEXT: [[ADD_IV2:%.*]] = add nuw nsw i64 [[ADD_IV1:%.*]], 2
+; CHECK-NEXT: [[SHFT:%.*]] = ashr i64 [[ADD_IV2]], 1
; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i64 [[SHFT]], [[TRIP]]
; CHECK-NEXT: br i1 [[CMP2]], label [[HEADER_1:%.*]], label [[LATCHEXIT:%.*]]
; CHECK: header.1:
-; CHECK-NEXT: [[ADD_IV_1:%.*]] = add nuw nsw i64 [[IV]], 4
-; CHECK-NEXT: [[CMP1_1:%.*]] = icmp ult i64 [[ADD_IV_1]], [[TRIP]]
-; CHECK-NEXT: br i1 [[CMP1_1]], label [[LATCH_1:%.*]], label [[HEADEREXIT]]
-; CHECK: latch.1:
+; CHECK-NEXT: [[ADD_IV_1:%.*]] = add nuw nsw i64 [[ADD_IV1]], 4
; CHECK-NEXT: [[SHFT_1:%.*]] = ashr i64 [[ADD_IV_1]], 1
; CHECK-NEXT: [[CMP2_1:%.*]] = icmp ult i64 [[SHFT_1]], [[TRIP]]
; CHECK-NEXT: br i1 [[CMP2_1]], label [[HEADER_2:%.*]], label [[LATCHEXIT]]
; CHECK: header.2:
-; CHECK-NEXT: [[ADD_IV_2:%.*]] = add nuw nsw i64 [[IV]], 6
-; CHECK-NEXT: [[CMP1_2:%.*]] = icmp ult i64 [[ADD_IV_2]], [[TRIP]]
-; CHECK-NEXT: br i1 [[CMP1_2]], label [[LATCH_2:%.*]], label [[HEADEREXIT]]
-; CHECK: latch.2:
+; CHECK-NEXT: [[ADD_IV_2:%.*]] = add nuw nsw i64 [[ADD_IV1]], 6
; CHECK-NEXT: [[SHFT_2:%.*]] = ashr i64 [[ADD_IV_2]], 1
; CHECK-NEXT: [[CMP2_2:%.*]] = icmp ult i64 [[SHFT_2]], [[TRIP]]
; CHECK-NEXT: br i1 [[CMP2_2]], label [[HEADER_3:%.*]], label [[LATCHEXIT]]
; CHECK: header.3:
-; CHECK-NEXT: [[ADD_IV_3]] = add nuw nsw i64 [[IV]], 8
+; CHECK-NEXT: [[ADD_IV_3:%.*]] = add nuw nsw i64 [[ADD_IV1]], 8
; CHECK-NEXT: [[CMP1_3:%.*]] = icmp ult i64 [[ADD_IV_3]], [[TRIP]]
-; CHECK-NEXT: br i1 [[CMP1_3]], label [[LATCH_3]], label [[HEADEREXIT]]
-; CHECK: latch.3:
-; CHECK-NEXT: [[SHFT_3:%.*]] = ashr i64 [[ADD_IV_3]], 1
+; CHECK-NEXT: br i1 [[CMP1_3]], label [[LATCH]], label [[HEADER_HEADEREXIT_CRIT_EDGE_UNR_LCSSA:%.*]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK: latch:
+; CHECK-NEXT: [[ADD_IV1]] = phi i64 [ [[ADD_IV1_UNR]], [[LATCH_LR_PH_NEW]] ], [ [[ADD_IV_3]], [[HEADER_3]] ]
+; CHECK-NEXT: [[SHFT_3:%.*]] = ashr i64 [[ADD_IV1]], 1
; CHECK-NEXT: [[CMP2_3:%.*]] = icmp ult i64 [[SHFT_3]], [[TRIP]]
-; CHECK-NEXT: br i1 [[CMP2_3]], label [[HEADER]], label [[LATCHEXIT]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT: br i1 [[CMP2_3]], label [[HEADER1:%.*]], label [[LATCHEXIT]]
+; CHECK: header.headerexit_crit_edge.unr-lcssa:
+; CHECK-NEXT: [[SPLIT_PH:%.*]] = phi i64 [ [[ADD_IV_3]], [[HEADER_3]] ]
+; CHECK-NEXT: br label [[HEADER_HEADEREXIT_CRIT_EDGE]]
+; CHECK: header.headerexit_crit_edge:
+; CHECK-NEXT: [[SPLIT:%.*]] = phi i64 [ [[SPLIT_UNR]], [[LATCH_PROL_LOOPEXIT]] ], [ [[SPLIT_PH]], [[HEADER_HEADEREXIT_CRIT_EDGE_UNR_LCSSA]] ]
+; CHECK-NEXT: br label [[HEADEREXIT]]
; CHECK: headerexit:
-; CHECK-NEXT: [[ADDPHI:%.*]] = phi i64 [ [[ADD_IV]], [[HEADER]] ], [ [[ADD_IV_1]], [[HEADER_1]] ], [ [[ADD_IV_2]], [[HEADER_2]] ], [ [[ADD_IV_3]], [[HEADER_3]] ]
+; CHECK-NEXT: [[ADDPHI:%.*]] = phi i64 [ [[SPLIT]], [[HEADER_HEADEREXIT_CRIT_EDGE]] ], [ 4, [[PREHEADER]] ]
+; CHECK-NEXT: br label [[MERGEDEXIT1:%.*]]
+; CHECK: latchexit.loopexit:
+; CHECK-NEXT: [[SHFTPHI_PH:%.*]] = phi i64 [ [[SHFT_3]], [[LATCH]] ], [ [[SHFT]], [[HEADER1]] ], [ [[SHFT_1]], [[HEADER_1]] ], [ [[SHFT_2]], [[HEADER_2]] ]
; CHECK-NEXT: br label [[MERGEDEXIT:%.*]]
-; CHECK: latchexit:
-; CHECK-NEXT: [[SHFTPHI:%.*]] = phi i64 [ [[SHFT]], [[LATCH]] ], [ [[SHFT_1]], [[LATCH_1]] ], [ [[SHFT_2]], [[LATCH_2]] ], [ [[SHFT_3]], [[LATCH_3]] ]
+; CHECK: latchexit.loopexit2:
+; CHECK-NEXT: [[SHFTPHI_PH3:%.*]] = phi i64 [ [[SHFT_PROL]], [[HEADER]] ]
; CHECK-NEXT: br label [[MERGEDEXIT]]
+; CHECK: latchexit:
+; CHECK-NEXT: [[SHFTPHI:%.*]] = phi i64 [ [[SHFTPHI_PH]], [[LATCHEXIT]] ], [ [[SHFTPHI_PH3]], [[LATCHEXIT_LOOPEXIT2]] ]
+; CHECK-NEXT: br label [[MERGEDEXIT1]]
; CHECK: mergedexit:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[ADDPHI]], [[HEADEREXIT]] ], [ [[SHFTPHI]], [[LATCHEXIT]] ]
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[ADDPHI]], [[HEADEREXIT]] ], [ [[SHFTPHI]], [[MERGEDEXIT]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
@@ -98,42 +136,75 @@ define void @test2(i1 %cond, i32 %n) {
; CHECK-NEXT: br i1 [[COND:%.*]], label [[PREHEADER:%.*]], label [[MERGEDEXIT:%.*]]
; CHECK: preheader:
; CHECK-NEXT: [[TRIP:%.*]] = zext i32 [[N:%.*]] to i64
+; CHECK-NEXT: [[CMP11:%.*]] = icmp ult i64 4, [[TRIP]]
+; CHECK-NEXT: br i1 [[CMP11]], label [[LATCH_LR_PH:%.*]], label [[HEADEREXIT:%.*]]
+; CHECK: latch.lr.ph:
+; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[TRIP]], -5
+; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1
+; CHECK-NEXT: [[TMP2:%.*]] = add nuw i64 [[TMP1]], 1
+; CHECK-NEXT: [[TMP3:%.*]] = freeze i64 [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP3]], -1
+; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP3]], 3
+; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[LATCH_PROL_PREHEADER:%.*]], label [[LATCH_PROL_LOOPEXIT:%.*]]
+; CHECK: latch.prol.preheader:
; CHECK-NEXT: br label [[HEADER:%.*]]
-; CHECK: header:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 2, [[PREHEADER]] ], [ [[ADD_IV_3:%.*]], [[LATCH_3:%.*]] ]
-; CHECK-NEXT: [[ADD_IV:%.*]] = add nuw nsw i64 [[IV]], 2
+; CHECK: latch.prol:
+; CHECK-NEXT: [[ADD_IV2_PROL:%.*]] = phi i64 [ 4, [[LATCH_PROL_PREHEADER]] ], [ [[ADD_IV:%.*]], [[HEADER_PROL:%.*]] ]
+; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ 0, [[LATCH_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], [[HEADER_PROL]] ]
+; CHECK-NEXT: [[SHFT_PROL:%.*]] = ashr i64 [[ADD_IV2_PROL]], 1
+; CHECK-NEXT: [[CMP2_PROL:%.*]] = icmp ult i64 [[SHFT_PROL]], [[TRIP]]
+; CHECK-NEXT: br i1 [[CMP2_PROL]], label [[HEADER_PROL]], label [[LATCHEXIT_LOOPEXIT3:%.*]]
+; CHECK: header.prol:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[ADD_IV2_PROL]], [[HEADER]] ]
+; CHECK-NEXT: [[ADD_IV]] = add nuw nsw i64 [[IV]], 2
; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i64 [[ADD_IV]], [[TRIP]]
-; CHECK-NEXT: br i1 [[CMP1]], label [[LATCH:%.*]], label [[HEADEREXIT:%.*]]
-; CHECK: latch:
-; CHECK-NEXT: [[SHFT:%.*]] = ashr i64 [[ADD_IV]], 1
+; CHECK-NEXT: [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
+; CHECK-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]]
+; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label [[HEADER]], label [[LATCH_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK: latch.prol.loopexit.unr-lcssa:
+; CHECK-NEXT: [[ADD_IV2_UNR_PH:%.*]] = phi i64 [ [[ADD_IV]], [[HEADER_PROL]] ]
+; CHECK-NEXT: br label [[LATCH_PROL_LOOPEXIT]]
+; CHECK: latch.prol.loopexit:
+; CHECK-NEXT: [[ADD_IV2_UNR:%.*]] = phi i64 [ 4, [[LATCH_LR_PH]] ], [ [[ADD_IV2_UNR_PH]], [[LATCH_PROL_LOOPEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 3
+; CHECK-NEXT: br i1 [[TMP5]], label [[HEADER_HEADEREXIT_CRIT_EDGE:%.*]], label [[LATCH_LR_PH_NEW:%.*]]
+; CHECK: latch.lr.ph.new:
+; CHECK-NEXT: br label [[LATCH:%.*]]
+; CHECK: header:
+; CHECK-NEXT: [[ADD_IV1:%.*]] = add nuw nsw i64 [[ADD_IV2:%.*]], 2
+; CHECK-NEXT: [[SHFT:%.*]] = ashr i64 [[ADD_IV1]], 1
; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i64 [[SHFT]], [[TRIP]]
; CHECK-NEXT: br i1 [[CMP2]], label [[HEADER_1:%.*]], label [[LATCHEXIT:%.*]]
; CHECK: header.1:
-; CHECK-NEXT: [[ADD_IV_1:%.*]] = add nuw nsw i64 [[IV]], 4
-; CHECK-NEXT: [[CMP1_1:%.*]] = icmp ult i64 [[ADD_IV_1]], [[TRIP]]
-; CHECK-NEXT: br i1 [[CMP1_1]], label [[LATCH_1:%.*]], label [[HEADEREXIT]]
-; CHECK: latch.1:
+; CHECK-NEXT: [[ADD_IV_1:%.*]] = add nuw nsw i64 [[ADD_IV2]], 4
; CHECK-NEXT: [[SHFT_1:%.*]] = ashr i64 [[ADD_IV_1]], 1
; CHECK-NEXT: [[CMP2_1:%.*]] = icmp ult i64 [[SHFT_1]], [[TRIP]]
; CHECK-NEXT: br i1 [[CMP2_1]], label [[HEADER_2:%.*]], label [[LATCHEXIT]]
; CHECK: header.2:
-; CHECK-NEXT: [[ADD_IV_2:%.*]] = add nuw nsw i64 [[IV]], 6
-; CHECK-NEXT: [[CMP1_2:%.*]] = icmp ult i64 [[ADD_IV_2]], [[TRIP]]
-; CHECK-NEXT: br i1 [[CMP1_2]], label [[LATCH_2:%.*]], label [[HEADEREXIT]]
-; CHECK: latch.2:
+; CHECK-NEXT: [[ADD_IV_2:%.*]] = add nuw nsw i64 [[ADD_IV2]], 6
; CHECK-NEXT: [[SHFT_2:%.*]] = ashr i64 [[ADD_IV_2]], 1
; CHECK-NEXT: [[CMP2_2:%.*]] = icmp ult i64 [[SHFT_2]], [[TRIP]]
; CHECK-NEXT: br i1 [[CMP2_2]], label [[HEADER_3:%.*]], label [[LATCHEXIT]]
; CHECK: header.3:
-; CHECK-NEXT: [[ADD_IV_3]] = add nuw nsw i64 [[IV]], 8
+; CHECK-NEXT: [[ADD_IV_3:%.*]] = add nuw nsw i64 [[ADD_IV2]], 8
; CHECK-NEXT: [[CMP1_3:%.*]] = icmp ult i64 [[ADD_IV_3]], [[TRIP]]
-; CHECK-NEXT: br i1 [[CMP1_3]], label [[LATCH_3]], label [[HEADEREXIT]]
-; CHECK: latch.3:
-; CHECK-NEXT: [[SHFT_3:%.*]] = ashr i64 [[ADD_IV_3]], 1
+; CHECK-NEXT: br i1 [[CMP1_3]], label [[LATCH]], label [[HEADER_HEADEREXIT_CRIT_EDGE_UNR_LCSSA:%.*]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK: latch:
+; CHECK-NEXT: [[ADD_IV2]] = phi i64 [ [[ADD_IV2_UNR]], [[LATCH_LR_PH_NEW]] ], [ [[ADD_IV_3]], [[HEADER_3]] ]
+; CHECK-NEXT: [[SHFT_3:%.*]] = ashr i64 [[ADD_IV2]], 1
; CHECK-NEXT: [[CMP2_3:%.*]] = icmp ult i64 [[SHFT_3]], [[TRIP]]
-; CHECK-NEXT: br i1 [[CMP2_3]], label [[HEADER]], label [[LATCHEXIT]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK-NEXT: br i1 [[CMP2_3]], label [[HEADER1:%.*]], label [[LATCHEXIT]]
+; CHECK: header.headerexit_crit_edge.unr-lcssa:
+; CHECK-NEXT: br label [[HEADER_HEADEREXIT_CRIT_EDGE]]
+; CHECK: header.headerexit_crit_edge:
+; CHECK-NEXT: br label [[HEADEREXIT]]
; CHECK: headerexit:
; CHECK-NEXT: br label [[MERGEDEXIT]]
+; CHECK: latchexit.loopexit:
+; CHECK-NEXT: br label [[LATCHEXIT1:%.*]]
+; CHECK: latchexit.loopexit3:
+; CHECK-NEXT: br label [[LATCHEXIT1]]
; CHECK: latchexit:
; CHECK-NEXT: br label [[MERGEDEXIT]]
; CHECK: mergedexit:
@@ -175,44 +246,79 @@ define i64 @test3(i32 %n) {
; CHECK-NEXT: br label [[PREHEADER:%.*]]
; CHECK: preheader:
; CHECK-NEXT: [[TRIP:%.*]] = zext i32 [[N:%.*]] to i64
+; CHECK-NEXT: [[CMP11:%.*]] = icmp ult i64 4, [[TRIP]]
+; CHECK-NEXT: br i1 [[CMP11]], label [[LATCH_LR_PH:%.*]], label [[HEADEREXIT:%.*]]
+; CHECK: latch.lr.ph:
+; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[TRIP]], -5
+; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1
+; CHECK-NEXT: [[TMP2:%.*]] = add nuw i64 [[TMP1]], 1
+; CHECK-NEXT: [[TMP3:%.*]] = freeze i64 [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP3]], -1
+; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP3]], 3
+; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[LATCH_PROL_PREHEADER:%.*]], label [[LATCH_PROL_LOOPEXIT:%.*]]
+; CHECK: latch.prol.preheader:
; CHECK-NEXT: br label [[HEADER:%.*]]
-; CHECK: header:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 2, [[PREHEADER]] ], [ [[ADD_IV_3:%.*]], [[LATCH_3:%.*]] ]
-; CHECK-NEXT: [[ADD_IV:%.*]] = add nuw nsw i64 [[IV]], 2
+; CHECK: latch.prol:
+; CHECK-NEXT: [[ADD_IV2_PROL:%.*]] = phi i64 [ 4, [[LATCH_PROL_PREHEADER]] ], [ [[ADD_IV:%.*]], [[HEADER_PROL:%.*]] ]
+; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ 0, [[LATCH_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], [[HEADER_PROL]] ]
+; CHECK-NEXT: [[SHFT_PROL:%.*]] = ashr i64 [[ADD_IV2_PROL]], 1
+; CHECK-NEXT: [[CMP2_PROL:%.*]] = icmp ult i64 [[SHFT_PROL]], [[TRIP]]
+; CHECK-NEXT: br i1 [[CMP2_PROL]], label [[HEADER_PROL]], label [[LATCHEXIT_LOOPEXIT3:%.*]]
+; CHECK: header.prol:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[ADD_IV2_PROL]], [[HEADER]] ]
+; CHECK-NEXT: [[ADD_IV]] = add nuw nsw i64 [[IV]], 2
; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i64 [[ADD_IV]], [[TRIP]]
-; CHECK-NEXT: br i1 [[CMP1]], label [[LATCH:%.*]], label [[HEADEREXIT:%.*]]
-; CHECK: latch:
-; CHECK-NEXT: [[SHFT:%.*]] = ashr i64 [[ADD_IV]], 1
+; CHECK-NEXT: [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
+; CHECK-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]]
+; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label [[HEADER]], label [[LATCH_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK: latch.prol.loopexit.unr-lcssa:
+; CHECK-NEXT: [[ADD_IV2_UNR_PH:%.*]] = phi i64 [ [[ADD_IV]], [[HEADER_PROL]] ]
+; CHECK-NEXT: br label [[LATCH_PROL_LOOPEXIT]]
+; CHECK: latch.prol.loopexit:
+; CHECK-NEXT: [[ADD_IV2_UNR:%.*]] = phi i64 [ 4, [[LATCH_LR_PH]] ], [ [[ADD_IV2_UNR_PH]], [[LATCH_PROL_LOOPEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 3
+; CHECK-NEXT: br i1 [[TMP5]], label [[HEADER_HEADEREXIT_CRIT_EDGE:%.*]], label [[LATCH_LR_PH_NEW:%.*]]
+; CHECK: latch.lr.ph.new:
+; CHECK-NEXT: br label [[LATCH:%.*]]
+; CHECK: header:
+; CHECK-NEXT: [[ADD_IV1:%.*]] = add nuw nsw i64 [[ADD_IV2:%.*]], 2
+; CHECK-NEXT: [[SHFT:%.*]] = ashr i64 [[ADD_IV1]], 1
; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i64 [[SHFT]], [[TRIP]]
; CHECK-NEXT: br i1 [[CMP2]], label [[HEADER_1:%.*]], label [[LATCHEXIT:%.*]]
; CHECK: header.1:
-; CHECK-NEXT: [[ADD_IV_1:%.*]] = add nuw nsw i64 [[IV]], 4
-; CHECK-NEXT: [[CMP1_1:%.*]] = icmp ult i64 [[ADD_IV_1]], [[TRIP]]
-; CHECK-NEXT: br i1 [[CMP1_1]], label [[LATCH_1:%.*]], label [[HEADEREXIT]]
-; CHECK: latch.1:
+; CHECK-NEXT: [[ADD_IV_1:%.*]] = add nuw nsw i64 [[ADD_IV2]], 4
; CHECK-NEXT: [[SHFT_1:%.*]] = ashr i64 [[ADD_IV_1]], 1
; CHECK-NEXT: [[CMP2_1:%.*]] = icmp ult i64 [[SHFT_1]], [[TRIP]]
; CHECK-NEXT: br i1 [[CMP2_1]], label [[HEADER_2:%.*]], label [[LATCHEXIT]]
; CHECK: header.2:
-; CHECK-NEXT: [[ADD_IV_2:%.*]] = add nuw nsw i64 [[IV]], 6
-; CHECK-NEXT: [[CMP1_2:%.*]] = icmp ult i64 [[ADD_IV_2]], [[TRIP]]
-; CHECK-NEXT: br i1 [[CMP1_2]], label [[LATCH_2:%.*]], label [[HEADEREXIT]]
-; CHECK: latch.2:
+; CHECK-NEXT: [[ADD_IV_2:%.*]] = add nuw nsw i64 [[ADD_IV2]], 6
; CHECK-NEXT: [[SHFT_2:%.*]] = ashr i64 [[ADD_IV_2]], 1
; CHECK-NEXT: [[CMP2_2:%.*]] = icmp ult i64 [[SHFT_2]], [[TRIP]]
; CHECK-NEXT: br i1 [[CMP2_2]], label [[HEADER_3:%.*]], label [[LATCHEXIT]]
; CHECK: header.3:
-; CHECK-NEXT: [[ADD_IV_3]] = add nuw nsw i64 [[IV]], 8
+; CHECK-NEXT: [[ADD_IV_3:%.*]] = add nuw nsw i64 [[ADD_IV2]], 8
; CHECK-NEXT: [[CMP1_3:%.*]] = icmp ult i64 [[ADD_IV_3]], [[TRIP]]
-; CHECK-NEXT: br i1 [[CMP1_3]], label [[LATCH_3]], label [[HEADEREXIT]]
-; CHECK: latch.3:
-; CHECK-NEXT: [[SHFT_3:%.*]] = ashr i64 [[ADD_IV_3]], 1
+; CHECK-NEXT: br i1 [[CMP1_3]], label [[LATCH]], label [[HEADER_HEADEREXIT_CRIT_EDGE_UNR_LCSSA:%.*]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK: latch:
+; CHECK-NEXT: [[ADD_IV2]] = phi i64 [ [[ADD_IV2_UNR]], [[LATCH_LR_PH_NEW]] ], [ [[ADD_IV_3]], [[HEADER_3]] ]
+; CHECK-NEXT: [[SHFT_3:%.*]] = ashr i64 [[ADD_IV2]], 1
; CHECK-NEXT: [[CMP2_3:%.*]] = icmp ult i64 [[SHFT_3]], [[TRIP]]
-; CHECK-NEXT: br i1 [[CMP2_3]], label [[HEADER]], label [[LATCHEXIT]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-NEXT: br i1 [[CMP2_3]], label [[HEADER1:%.*]], label [[LATCHEXIT]]
+; CHECK: header.headerexit_crit_edge.unr-lcssa:
+; CHECK-NEXT: br label [[HEADER_HEADEREXIT_CRIT_EDGE]]
+; CHECK: header.headerexit_crit_edge:
+; CHECK-NEXT: br label [[HEADEREXIT]]
; CHECK: headerexit:
; CHECK-NEXT: br label [[EXITSUCC:%.*]]
+; CHECK: latchexit.loopexit:
+; CHECK-NEXT: [[SHFTPHI_PH:%.*]] = phi i64 [ [[SHFT_3]], [[LATCH]] ], [ [[SHFT]], [[HEADER1]] ], [ [[SHFT_1]], [[HEADER_1]] ], [ [[SHFT_2]], [[HEADER_2]] ]
+; CHECK-NEXT: br label [[LATCHEXIT1:%.*]]
+; CHECK: latchexit.loopexit3:
+; CHECK-NEXT: [[SHFTPHI_PH4:%.*]] = phi i64 [ [[SHFT_PROL]], [[HEADER]] ]
+; CHECK-NEXT: br label [[LATCHEXIT1]]
; CHECK: latchexit:
-; CHECK-NEXT: [[SHFTPHI:%.*]] = phi i64 [ [[SHFT]], [[LATCH]] ], [ [[SHFT_1]], [[LATCH_1]] ], [ [[SHFT_2]], [[LATCH_2]] ], [ [[SHFT_3]], [[LATCH_3]] ]
+; CHECK-NEXT: [[SHFTPHI:%.*]] = phi i64 [ [[SHFTPHI_PH]], [[LATCHEXIT]] ], [ [[SHFTPHI_PH4]], [[LATCHEXIT_LOOPEXIT3]] ]
; CHECK-NEXT: ret i64 [[SHFTPHI]]
; CHECK: exitsucc:
; CHECK-NEXT: ret i64 96
@@ -265,15 +371,15 @@ define void @test4(i16 %c3) {
; CHECK-NEXT: br label [[EXITING_PROL:%.*]]
; CHECK: exiting.prol:
; CHECK-NEXT: switch i16 [[C3:%.*]], label [[DEFAULT_LOOPEXIT_LOOPEXIT1:%.*]] [
-; CHECK-NEXT: i16 45, label [[OTHEREXIT_LOOPEXIT2:%.*]]
-; CHECK-NEXT: i16 95, label [[LATCH_PROL]]
+; CHECK-NEXT: i16 45, label [[OTHEREXIT_LOOPEXIT2:%.*]]
+; CHECK-NEXT: i16 95, label [[LATCH_PROL]]
; CHECK-NEXT: ]
; CHECK: latch.prol:
; CHECK-NEXT: [[INDVARS_IV_NEXT_PROL]] = add nuw nsw i64 [[INDVARS_IV_PROL]], 1
; CHECK-NEXT: [[C2_PROL:%.*]] = icmp ult i64 [[INDVARS_IV_NEXT_PROL]], [[C1]]
; CHECK-NEXT: [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
; CHECK-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]]
-; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label [[HEADER_PROL]], label [[HEADER_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label [[HEADER_PROL]], label [[HEADER_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: header.prol.loopexit.unr-lcssa:
; CHECK-NEXT: [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL]], [[LATCH_PROL]] ]
; CHECK-NEXT: br label [[HEADER_PROL_LOOPEXIT]]
@@ -288,34 +394,34 @@ define void @test4(i16 %c3) {
; CHECK-NEXT: br label [[EXITING:%.*]]
; CHECK: exiting:
; CHECK-NEXT: switch i16 [[C3]], label [[DEFAULT_LOOPEXIT_LOOPEXIT:%.*]] [
-; CHECK-NEXT: i16 45, label [[OTHEREXIT_LOOPEXIT:%.*]]
-; CHECK-NEXT: i16 95, label [[LATCH:%.*]]
+; CHECK-NEXT: i16 45, label [[OTHEREXIT_LOOPEXIT:%.*]]
+; CHECK-NEXT: i16 95, label [[LATCH:%.*]]
; CHECK-NEXT: ]
; CHECK: latch:
; CHECK-NEXT: br label [[EXITING_1:%.*]]
; CHECK: exiting.1:
; CHECK-NEXT: switch i16 [[C3]], label [[DEFAULT_LOOPEXIT_LOOPEXIT]] [
-; CHECK-NEXT: i16 45, label [[OTHEREXIT_LOOPEXIT]]
-; CHECK-NEXT: i16 95, label [[LATCH_1:%.*]]
+; CHECK-NEXT: i16 45, label [[OTHEREXIT_LOOPEXIT]]
+; CHECK-NEXT: i16 95, label [[LATCH_1:%.*]]
; CHECK-NEXT: ]
; CHECK: latch.1:
; CHECK-NEXT: br label [[EXITING_2:%.*]]
; CHECK: exiting.2:
; CHECK-NEXT: switch i16 [[C3]], label [[DEFAULT_LOOPEXIT_LOOPEXIT]] [
-; CHECK-NEXT: i16 45, label [[OTHEREXIT_LOOPEXIT]]
-; CHECK-NEXT: i16 95, label [[LATCH_2:%.*]]
+; CHECK-NEXT: i16 45, label [[OTHEREXIT_LOOPEXIT]]
+; CHECK-NEXT: i16 95, label [[LATCH_2:%.*]]
; CHECK-NEXT: ]
; CHECK: latch.2:
; CHECK-NEXT: br label [[EXITING_3:%.*]]
; CHECK: exiting.3:
; CHECK-NEXT: switch i16 [[C3]], label [[DEFAULT_LOOPEXIT_LOOPEXIT]] [
-; CHECK-NEXT: i16 45, label [[OTHEREXIT_LOOPEXIT]]
-; CHECK-NEXT: i16 95, label [[LATCH_3]]
+; CHECK-NEXT: i16 45, label [[OTHEREXIT_LOOPEXIT]]
+; CHECK-NEXT: i16 95, label [[LATCH_3]]
; CHECK-NEXT: ]
; CHECK: latch.3:
; CHECK-NEXT: [[INDVARS_IV_NEXT_3]] = add nuw nsw i64 [[INDVARS_IV]], 4
; CHECK-NEXT: [[C2_3:%.*]] = icmp ult i64 [[INDVARS_IV_NEXT_3]], [[C1]]
-; CHECK-NEXT: br i1 [[C2_3]], label [[HEADER]], label [[LATCHEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-NEXT: br i1 [[C2_3]], label [[HEADER]], label [[LATCHEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: latchexit.unr-lcssa:
; CHECK-NEXT: br label [[LATCHEXIT]]
; CHECK: latchexit:
@@ -414,13 +520,13 @@ define void @test5() {
; CHECK-NEXT: [[C2_3_PROL:%.*]] = call i1 @unknown(i32 0)
; CHECK-NEXT: br i1 [[C2_3_PROL]], label [[INNERLATCH_3_PROL:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT2]]
; CHECK: innerLatch.3.prol:
-; CHECK-NEXT: br i1 false, label [[INNERH_PROL]], label [[OUTERLATCH_PROL]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-NEXT: br i1 false, label [[INNERH_PROL]], label [[OUTERLATCH_PROL]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK: outerLatch.prol:
; CHECK-NEXT: [[TMP6_PROL]] = add i32 [[TMP4_PROL]], 1
; CHECK-NEXT: [[TMP7_PROL:%.*]] = icmp sgt i32 [[TMP6_PROL]], 79
; CHECK-NEXT: [[PROL_ITER_NEXT]] = add i32 [[PROL_ITER]], 1
; CHECK-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i32 [[PROL_ITER_NEXT]], [[XTRAITER]]
-; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label [[OUTERH_PROL]], label [[OUTERH_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label [[OUTERH_PROL]], label [[OUTERH_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK: outerH.prol.loopexit.unr-lcssa:
; CHECK-NEXT: [[TMP4_UNR_PH:%.*]] = phi i32 [ [[TMP6_PROL]], [[OUTERLATCH_PROL]] ]
; CHECK-NEXT: br label [[OUTERH_PROL_LOOPEXIT]]
@@ -464,7 +570,7 @@ define void @test5() {
; CHECK-NEXT: [[C2_3:%.*]] = call i1 @unknown(i32 0)
; CHECK-NEXT: br i1 [[C2_3]], label [[INNERLATCH_3:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT]]
; CHECK: innerLatch.3:
-; CHECK-NEXT: br i1 false, label [[INNERH]], label [[OUTERLATCH]], !llvm.loop [[LOOP6]]
+; CHECK-NEXT: br i1 false, label [[INNERH]], label [[OUTERLATCH]], !llvm.loop [[LOOP9]]
; CHECK: outerLatch:
; CHECK-NEXT: br label [[INNERH_14:%.*]]
; CHECK: innerH.14:
@@ -498,7 +604,7 @@ define void @test5() {
; CHECK-NEXT: [[C2_3_1:%.*]] = call i1 @unknown(i32 0)
; CHECK-NEXT: br i1 [[C2_3_1]], label [[INNERLATCH_3_1:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT19]]
; CHECK: innerLatch.3.1:
-; CHECK-NEXT: br i1 false, label [[INNERH_14]], label [[OUTERLATCH_1]], !llvm.loop [[LOOP6]]
+; CHECK-NEXT: br i1 false, label [[INNERH_14]], label [[OUTERLATCH_1]], !llvm.loop [[LOOP9]]
; CHECK: outerLatch.1:
; CHECK-NEXT: br label [[INNERH_29:%.*]]
; CHECK: innerH.29:
@@ -532,7 +638,7 @@ define void @test5() {
; CHECK-NEXT: [[C2_3_2:%.*]] = call i1 @unknown(i32 0)
; CHECK-NEXT: br i1 [[C2_3_2]], label [[INNERLATCH_3_2:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT21]]
; CHECK: innerLatch.3.2:
-; CHECK-NEXT: br i1 false, label [[INNERH_29]], label [[OUTERLATCH_2]], !llvm.loop [[LOOP6]]
+; CHECK-NEXT: br i1 false, label [[INNERH_29]], label [[OUTERLATCH_2]], !llvm.loop [[LOOP9]]
; CHECK: outerLatch.2:
; CHECK-NEXT: br label [[INNERH_314:%.*]]
; CHECK: innerH.314:
@@ -566,11 +672,11 @@ define void @test5() {
; CHECK-NEXT: [[C2_3_3:%.*]] = call i1 @unknown(i32 0)
; CHECK-NEXT: br i1 [[C2_3_3]], label [[INNERLATCH_3_3:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT23]]
; CHECK: innerLatch.3.3:
-; CHECK-NEXT: br i1 false, label [[INNERH_314]], label [[OUTERLATCH_3]], !llvm.loop [[LOOP6]]
+; CHECK-NEXT: br i1 false, label [[INNERH_314]], label [[OUTERLATCH_3]], !llvm.loop [[LOOP9]]
; CHECK: outerLatch.3:
; CHECK-NEXT: [[TMP6_3]] = add i32 [[TMP4]], 4
; CHECK-NEXT: [[TMP7_3:%.*]] = icmp sgt i32 [[TMP6_3]], 79
-; CHECK-NEXT: br i1 [[TMP7_3]], label [[OUTERLATCHEXIT_LOOPEXIT_UNR_LCSSA:%.*]], label [[OUTERH]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP7_3]], label [[OUTERLATCHEXIT_LOOPEXIT_UNR_LCSSA:%.*]], label [[OUTERH]], !llvm.loop [[LOOP11:![0-9]+]]
; CHECK: outerLatchExit.loopexit.unr-lcssa:
; CHECK-NEXT: br label [[OUTERLATCHEXIT_LOOPEXIT]]
; CHECK: outerLatchExit.loopexit:
@@ -676,7 +782,7 @@ define void @test6(i64 %start) {
; CHECK-NEXT: [[TMP7:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT_PROL]], 616
; CHECK-NEXT: [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
; CHECK-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]]
-; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label [[HEADER_PROL]], label [[HEADER_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label [[HEADER_PROL]], label [[HEADER_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK: header.prol.loopexit.unr-lcssa:
; CHECK-NEXT: [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL]], [[LATCH_PROL]] ]
; CHECK-NEXT: br label [[HEADER_PROL_LOOPEXIT]]
@@ -709,7 +815,7 @@ define void @test6(i64 %start) {
; CHECK: latch.3:
; CHECK-NEXT: [[INDVARS_IV_NEXT_3]] = add nsw i64 [[INDVARS_IV]], 8
; CHECK-NEXT: [[TMP9:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT_3]], 616
-; CHECK-NEXT: br i1 [[TMP9]], label [[HEADER]], label [[LATCHEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP9]], label [[HEADER]], label [[LATCHEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP13:![0-9]+]]
; CHECK: latchexit.unr-lcssa:
; CHECK-NEXT: br label [[LATCHEXIT]]
; CHECK: latchexit:
>From d49e403aefcbb1dbf794777d7d82ccc6ff4fe334 Mon Sep 17 00:00:00 2001
From: Marek Sedlacek <msedlacek at azul.com>
Date: Thu, 3 Jul 2025 14:23:24 +0000
Subject: [PATCH 2/4] Fixed wording of a comment and debug; Removed UB and
unused parameter from a test
---
llvm/lib/Transforms/Utils/LoopUnroll.cpp | 6 ++++--
.../X86/runtime-unroll-after-rotate.ll | 20 +++++++++----------
2 files changed, 14 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index 17bf8816c888a..dacfaec56e541 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -488,11 +488,13 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
if (ULO.Runtime && SE) {
BasicBlock *OrigHeader = L->getHeader();
BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
- // Rotate loop if it makes it countable (for later unrolling)
+ // Rotate loop if it makes the exit count from the latch computable (for
+ // later unrolling).
if (BI && !BI->isUnconditional() &&
isa<SCEVCouldNotCompute>(SE->getExitCount(L, L->getLoopLatch())) &&
!isa<SCEVCouldNotCompute>(SE->getExitCount(L, OrigHeader))) {
- LLVM_DEBUG(dbgs() << " Rotating loop to make the loop countable.\n");
+ LLVM_DEBUG(
+ dbgs() << " Rotating loop to make the exit count computable.\n");
SimplifyQuery SQ{OrigHeader->getDataLayout()};
SQ.TLI = nullptr;
SQ.DT = DT;
diff --git a/llvm/test/Transforms/LoopUnroll/X86/runtime-unroll-after-rotate.ll b/llvm/test/Transforms/LoopUnroll/X86/runtime-unroll-after-rotate.ll
index 20803f1c95c08..7feb96bd3b855 100644
--- a/llvm/test/Transforms/LoopUnroll/X86/runtime-unroll-after-rotate.ll
+++ b/llvm/test/Transforms/LoopUnroll/X86/runtime-unroll-after-rotate.ll
@@ -2,15 +2,15 @@
; RUN: opt --passes=loop-unroll -unroll-runtime-other-exit-predictable=1 -S %s | FileCheck %s
target triple = "x86_64-unknown-linux-gnu"
-define void @test(i64 %0) #0 {
+define void @test(i64 %0, ptr %1) #0 {
; CHECK-LABEL: define void @test(
-; CHECK-SAME: i64 [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-SAME: i64 [[TMP0:%.*]], ptr [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[B1:%.*]] = icmp eq i64 [[TMP0]], 0
; CHECK-NEXT: br i1 [[B1]], label %[[AFTER:.*]], label %[[BODY_LR_PH:.*]]
; CHECK: [[BODY_LR_PH]]:
-; CHECK-NEXT: [[TMP1:%.*]] = sub i64 0, [[TMP0]]
-; CHECK-NEXT: [[TMP2:%.*]] = freeze i64 [[TMP1]]
+; CHECK-NEXT: [[TMP5:%.*]] = sub i64 0, [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = freeze i64 [[TMP5]]
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], -1
; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP2]], 3
; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
@@ -21,7 +21,7 @@ define void @test(i64 %0) #0 {
; CHECK-NEXT: [[A2_PROL:%.*]] = phi i64 [ [[TMP0]], %[[BODY_PROL_PREHEADER]] ], [ [[A_PROL:%.*]], %[[HEADER_PROL:.*]] ]
; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ 0, %[[BODY_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], %[[HEADER_PROL]] ]
; CHECK-NEXT: [[C_PROL:%.*]] = add i64 [[A2_PROL]], 1
-; CHECK-NEXT: [[D_PROL:%.*]] = load i32, ptr addrspace(1) null, align 4
+; CHECK-NEXT: [[D_PROL:%.*]] = load i32, ptr [[TMP1]], align 4
; CHECK-NEXT: [[E_PROL:%.*]] = icmp eq i32 [[D_PROL]], 0
; CHECK-NEXT: br i1 [[E_PROL]], label %[[END_LOOPEXIT3:.*]], label %[[HEADER_PROL]]
; CHECK: [[HEADER_PROL]]:
@@ -51,7 +51,7 @@ define void @test(i64 %0) #0 {
; CHECK-NEXT: br i1 [[B_7]], label %[[HEADER_AFTER_CRIT_EDGE_UNR_LCSSA:.*]], label %[[BODY]]
; CHECK: [[BODY]]:
; CHECK-NEXT: [[A2]] = phi i64 [ [[A2_UNR]], %[[BODY_LR_PH_NEW]] ], [ [[C_7]], %[[HEADER_3]] ]
-; CHECK-NEXT: [[D:%.*]] = load i32, ptr addrspace(1) null, align 4
+; CHECK-NEXT: [[D:%.*]] = load i32, ptr [[TMP1]], align 4
; CHECK-NEXT: [[E:%.*]] = icmp eq i32 [[D]], 0
; CHECK-NEXT: br i1 [[E]], label %[[END_LOOPEXIT]], label %[[HEADER]]
; CHECK: [[END_LOOPEXIT]]:
@@ -65,7 +65,7 @@ define void @test(i64 %0) #0 {
; CHECK: [[HEADER_AFTER_CRIT_EDGE]]:
; CHECK-NEXT: br label %[[AFTER]]
; CHECK: [[AFTER]]:
-; CHECK-NEXT: call void @foo(i32 0)
+; CHECK-NEXT: call void @foo()
; CHECK-NEXT: ret void
;
entry:
@@ -78,7 +78,7 @@ header:
body:
%c = add i64 %a, 1
- %d = load i32, ptr addrspace(1) null, align 4
+ %d = load i32, ptr %1, align 4
%e = icmp eq i32 %d, 0
br i1 %e, label %end, label %header
@@ -86,11 +86,11 @@ end:
ret void
after:
- call void @foo(i32 0)
+ call void @foo()
ret void
}
-declare void @foo(i32)
+declare void @foo()
attributes #0 = { "tune-cpu"="generic" }
;.
>From 9d8cd0b913f24c69ce5761117601570ecec94281 Mon Sep 17 00:00:00 2001
From: Marek Sedlacek <msedlacek at azul.com>
Date: Mon, 7 Jul 2025 12:22:20 +0000
Subject: [PATCH 3/4] Added indication of loop rotation when no unroll happens
---
llvm/include/llvm/Transforms/Utils/UnrollLoop.h | 3 +++
llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp | 5 +++--
llvm/lib/Transforms/Utils/LoopUnroll.cpp | 16 ++++++++++------
3 files changed, 16 insertions(+), 8 deletions(-)
diff --git a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
index 765c613b04a44..3fe59cad879c9 100644
--- a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
+++ b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
@@ -59,6 +59,9 @@ enum class LoopUnrollResult {
/// The loop was not modified.
Unmodified,
+ /// The loop was modified, but not unrolled.
+ Modified,
+
/// The loop was partially unrolled -- we still have a loop, but with a
/// smaller trip count. We may also have emitted epilogue loop if the loop
/// had a non-constant trip count.
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index a22d84dcf014d..8b1ab5a9e2181 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -1357,8 +1357,9 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
ULO.RuntimeUnrollMultiExit = UP.RuntimeUnrollMultiExit;
LoopUnrollResult UnrollResult = UnrollLoop(
L, ULO, LI, &SE, &DT, &AC, &TTI, &ORE, PreserveLCSSA, &RemainderLoop, AA);
- if (UnrollResult == LoopUnrollResult::Unmodified)
- return LoopUnrollResult::Unmodified;
+ if (UnrollResult == LoopUnrollResult::Unmodified ||
+ UnrollResult == LoopUnrollResult::Modified)
+ return UnrollResult;
if (RemainderLoop) {
std::optional<MDNode *> RemainderLoopID =
diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index dacfaec56e541..cfd422710b34a 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -485,6 +485,8 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
assert(ULO.Count > 0);
+ LoopUnrollResult Result = LoopUnrollResult::Unmodified;
+
if (ULO.Runtime && SE) {
BasicBlock *OrigHeader = L->getHeader();
BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
@@ -499,10 +501,12 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
SQ.TLI = nullptr;
SQ.DT = DT;
SQ.AC = AC;
- llvm::LoopRotation(L, LI, TTI, AC, DT, SE, nullptr /*MemorySSAUpdater*/,
- SQ, false /*RotationOnly*/, 16 /*Threshold*/,
- false /*IsUtilMode*/, false /*PrepareForLTO*/,
- [](Loop *, ScalarEvolution *) { return true; });
+ if (llvm::LoopRotation(L, LI, TTI, AC, DT, SE,
+ nullptr /*MemorySSAUpdater*/, SQ,
+ false /*RotationOnly*/, 16 /*Threshold*/,
+ false /*IsUtilMode*/, false /*PrepareForLTO*/,
+ [](Loop *, ScalarEvolution *) { return true; }))
+ Result = LoopUnrollResult::Modified;
}
}
@@ -599,7 +603,7 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
if (!LatchBI || (LatchBI->isConditional() && !LatchIsExiting)) {
LLVM_DEBUG(
dbgs() << "Can't unroll; a conditional latch must exit the loop");
- return LoopUnrollResult::Unmodified;
+ return Result;
}
assert((!ULO.Runtime || canHaveUnrollRemainder(L)) &&
@@ -620,7 +624,7 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
else {
LLVM_DEBUG(dbgs() << "Won't unroll; remainder loop could not be "
"generated when assuming runtime trip count\n");
- return LoopUnrollResult::Unmodified;
+ return Result;
}
}
>From 7a41a6f0bbd7e312cc27c551faa2e520358cae57 Mon Sep 17 00:00:00 2001
From: Marek Sedlacek <msedlacek at azul.com>
Date: Thu, 10 Jul 2025 13:05:23 +0000
Subject: [PATCH 4/4] Removed cpu specification from .ll test; Added check for
simplify form of loop before rotation
---
llvm/lib/Transforms/Utils/LoopUnroll.cpp | 9 +++++---
.../{X86 => }/runtime-unroll-after-rotate.ll | 23 ++++++++++++-------
2 files changed, 21 insertions(+), 11 deletions(-)
rename llvm/test/Transforms/LoopUnroll/{X86 => }/runtime-unroll-after-rotate.ll (82%)
diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index cfd422710b34a..89f77232a13f9 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -487,11 +487,14 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
LoopUnrollResult Result = LoopUnrollResult::Unmodified;
- if (ULO.Runtime && SE) {
+ // Rotate loop if it makes the exit count from the latch computable (for
+ // later unrolling).
+ // The check for LoopSimplify form is done so that after the rotation this
+ // check does not fail in UnrollRuntimeLoopRemainder and the rotation is not
+ // redundant.
+ if (ULO.Runtime && SE && L->isLoopSimplifyForm()) {
BasicBlock *OrigHeader = L->getHeader();
BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
- // Rotate loop if it makes the exit count from the latch computable (for
- // later unrolling).
if (BI && !BI->isUnconditional() &&
isa<SCEVCouldNotCompute>(SE->getExitCount(L, L->getLoopLatch())) &&
!isa<SCEVCouldNotCompute>(SE->getExitCount(L, OrigHeader))) {
diff --git a/llvm/test/Transforms/LoopUnroll/X86/runtime-unroll-after-rotate.ll b/llvm/test/Transforms/LoopUnroll/runtime-unroll-after-rotate.ll
similarity index 82%
rename from llvm/test/Transforms/LoopUnroll/X86/runtime-unroll-after-rotate.ll
rename to llvm/test/Transforms/LoopUnroll/runtime-unroll-after-rotate.ll
index 7feb96bd3b855..adb9005304807 100644
--- a/llvm/test/Transforms/LoopUnroll/X86/runtime-unroll-after-rotate.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-unroll-after-rotate.ll
@@ -1,10 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt --passes=loop-unroll -unroll-runtime-other-exit-predictable=1 -S %s | FileCheck %s
+; RUN: opt --passes=loop-unroll -unroll-runtime=true -unroll-runtime-other-exit-predictable=1 -S %s | FileCheck %s
target triple = "x86_64-unknown-linux-gnu"
define void @test(i64 %0, ptr %1) #0 {
; CHECK-LABEL: define void @test(
-; CHECK-SAME: i64 [[TMP0:%.*]], ptr [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-SAME: i64 [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[B1:%.*]] = icmp eq i64 [[TMP0]], 0
; CHECK-NEXT: br i1 [[B1]], label %[[AFTER:.*]], label %[[BODY_LR_PH:.*]]
@@ -12,7 +12,7 @@ define void @test(i64 %0, ptr %1) #0 {
; CHECK-NEXT: [[TMP5:%.*]] = sub i64 0, [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = freeze i64 [[TMP5]]
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], -1
-; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP2]], 3
+; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP2]], 7
; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
; CHECK-NEXT: br i1 [[LCMP_MOD]], label %[[BODY_PROL_PREHEADER:.*]], label %[[BODY_PROL_LOOPEXIT:.*]]
; CHECK: [[BODY_PROL_PREHEADER]]:
@@ -35,8 +35,8 @@ define void @test(i64 %0, ptr %1) #0 {
; CHECK-NEXT: br label %[[BODY_PROL_LOOPEXIT]]
; CHECK: [[BODY_PROL_LOOPEXIT]]:
; CHECK-NEXT: [[A2_UNR:%.*]] = phi i64 [ [[TMP0]], %[[BODY_LR_PH]] ], [ [[A2_UNR_PH]], %[[BODY_PROL_LOOPEXIT_UNR_LCSSA]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 3
-; CHECK-NEXT: br i1 [[TMP4]], label %[[HEADER_AFTER_CRIT_EDGE:.*]], label %[[BODY_LR_PH_NEW:.*]]
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], 7
+; CHECK-NEXT: br i1 [[TMP6]], label %[[HEADER_AFTER_CRIT_EDGE:.*]], label %[[BODY_LR_PH_NEW:.*]]
; CHECK: [[BODY_LR_PH_NEW]]:
; CHECK-NEXT: br label %[[BODY:.*]]
; CHECK: [[HEADER:.*]]:
@@ -44,13 +44,21 @@ define void @test(i64 %0, ptr %1) #0 {
; CHECK: [[HEADER_1]]:
; CHECK-NEXT: br i1 false, label %[[END_LOOPEXIT]], label %[[HEADER_2:.*]]
; CHECK: [[HEADER_2]]:
-; CHECK-NEXT: [[C_7:%.*]] = add i64 [[A2:%.*]], 4
; CHECK-NEXT: br i1 false, label %[[END_LOOPEXIT]], label %[[HEADER_3:.*]]
; CHECK: [[HEADER_3]]:
+; CHECK-NEXT: br i1 false, label %[[END_LOOPEXIT]], label %[[HEADER_4:.*]]
+; CHECK: [[HEADER_4]]:
+; CHECK-NEXT: br i1 false, label %[[END_LOOPEXIT]], label %[[HEADER_5:.*]]
+; CHECK: [[HEADER_5]]:
+; CHECK-NEXT: br i1 false, label %[[END_LOOPEXIT]], label %[[HEADER_6:.*]]
+; CHECK: [[HEADER_6]]:
+; CHECK-NEXT: [[C_7:%.*]] = add i64 [[A2:%.*]], 8
+; CHECK-NEXT: br i1 false, label %[[END_LOOPEXIT]], label %[[HEADER_7:.*]]
+; CHECK: [[HEADER_7]]:
; CHECK-NEXT: [[B_7:%.*]] = icmp eq i64 [[C_7]], 0
; CHECK-NEXT: br i1 [[B_7]], label %[[HEADER_AFTER_CRIT_EDGE_UNR_LCSSA:.*]], label %[[BODY]]
; CHECK: [[BODY]]:
-; CHECK-NEXT: [[A2]] = phi i64 [ [[A2_UNR]], %[[BODY_LR_PH_NEW]] ], [ [[C_7]], %[[HEADER_3]] ]
+; CHECK-NEXT: [[A2]] = phi i64 [ [[A2_UNR]], %[[BODY_LR_PH_NEW]] ], [ [[C_7]], %[[HEADER_7]] ]
; CHECK-NEXT: [[D:%.*]] = load i32, ptr [[TMP1]], align 4
; CHECK-NEXT: [[E:%.*]] = icmp eq i32 [[D]], 0
; CHECK-NEXT: br i1 [[E]], label %[[END_LOOPEXIT]], label %[[HEADER]]
@@ -92,7 +100,6 @@ after:
declare void @foo()
-attributes #0 = { "tune-cpu"="generic" }
;.
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
; CHECK: [[META1]] = !{!"llvm.loop.unroll.disable"}
More information about the llvm-commits
mailing list