[llvm] [LoopUnroll] Use branch probability in multi-exit loop unrolling (PR #164799)
Marek Sedláček via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 23 04:19:47 PST 2026
https://github.com/mark-sed updated https://github.com/llvm/llvm-project/pull/164799
>From 564930674bb45aa7c46f871d8fe7b635a9a48aa0 Mon Sep 17 00:00:00 2001
From: Marek Sedlacek <msedlacek at azul.com>
Date: Thu, 23 Oct 2025 10:49:32 +0000
Subject: [PATCH 1/5] This patch improves multi-exit loop unrolling by taking
into account branch probability and not only other exit being deopting one.
This implementation uses branch metadata directly because of unstable
state of BPI in this part of code.
---
.../Transforms/Utils/LoopUnrollRuntime.cpp | 75 +-
.../LoopUnroll/multi-exit-loop-unroll.ll | 655 ++++++++++++++++++
2 files changed, 723 insertions(+), 7 deletions(-)
create mode 100644 llvm/test/Transforms/LoopUnroll/multi-exit-loop-unroll.ll
diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 7ff3f0b28f4e2..5bfdc185b642b 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -536,10 +536,53 @@ static Loop *CloneLoopBlocks(Loop *L, Value *NewIter,
return NewLoop;
}
+// Calculates the edge probability from Src to Dst.
+// Dst has to be a successor to Src.
+// This uses branch_probability metadata directly. If data are missing or
+// probability cannot be computed, then std::nullopt is returned.
+// This does not use BranchProbabilityInfo and the values computed by this
+// will vary from BPI because BPI has its own more advanced heuristics to
+// determine probabilities without metadata.
+static std::optional<BranchProbability>
+computeBranchProbabilityUsingMetadata(BasicBlock *Src, BasicBlock *Dst) {
+ assert(Src != Dst && "Passed in same source as destination");
+
+ Instruction *TI = Src->getTerminator();
+ if (!TI || TI->getNumSuccessors() == 0)
+ return BranchProbability::getZero();
+
+ auto NumSucc = TI->getNumSuccessors();
+ SmallVector<uint32_t, 4> Weights;
+
+ if (!extractBranchWeights(*TI, Weights)) {
+ // No metadata
+ return std::nullopt;
+ }
+ assert(NumSucc == Weights.size() && "Missing weights in branch_probability");
+
+ uint64_t Total = 0;
+ uint32_t Numerator = 0;
+ for (auto [i, Weight] : llvm::enumerate(Weights)) {
+ if (TI->getSuccessor(i) == Dst)
+ Numerator += Weight;
+ Total += Weight;
+ }
+
+ // Total of edges might be 0 if the metadata is incorrect/set by hand
+ // or missing. In such case return here to avoid division by 0 later on.
+ // There might also be a case where the value of Total cannot fit into
+ // uint32_t, in such case, just bail out.
+ if (Total == 0 || Total > std::numeric_limits<uint32_t>::max())
+ return std::nullopt;
+
+ return BranchProbability(Numerator, Total);
+}
+
/// Returns true if we can profitably unroll the multi-exit loop L. Currently,
/// we return true only if UnrollRuntimeMultiExit is set to true.
static bool canProfitablyRuntimeUnrollMultiExitLoop(
- Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits, BasicBlock *LatchExit,
+ Loop *L, const TargetTransformInfo *TTI,
+ SmallVectorImpl<BasicBlock *> &OtherExits, BasicBlock *LatchExit,
bool UseEpilogRemainder) {
// The main pain point with multi-exit loop unrolling is that once unrolled,
@@ -567,11 +610,28 @@ static bool canProfitablyRuntimeUnrollMultiExitLoop(
return true;
// The second heuristic is that L has one exit other than the latchexit and
- // that exit is a deoptimize block. We know that deoptimize blocks are rarely
- // taken, which also implies the branch leading to the deoptimize block is
- // highly predictable. When UnrollRuntimeOtherExitPredictable is specified, we
- // assume the other exit branch is predictable even if it has no deoptimize
- // call.
+ // that exit is highly predictable.
+ if (TTI) {
+ if (OtherExits.size() != 1)
+ return false;
+ BasicBlock *LatchBB = L->getLoopLatch();
+ assert(LatchBB && "Expected loop to have a latch");
+ BasicBlock *NonLatchExitingBlock =
+ (ExitingBlocks[0] == LatchBB) ? ExitingBlocks[1] : ExitingBlocks[0];
+ auto BranchProb = computeBranchProbabilityUsingMetadata(
+ NonLatchExitingBlock, OtherExits[0]);
+ // If BranchProbability could not be extracted (returns nullopt), then
+ // don't return and do the check for deopt block.
+ if (BranchProb) {
+ auto Threshold = TTI->getPredictableBranchThreshold().getCompl();
+ return UnrollRuntimeOtherExitPredictable || *BranchProb < Threshold;
+ }
+ }
+
+ // We know that deoptimize blocks are rarely taken, which also implies the
+ // branch leading to the deoptimize block is highly predictable. When
+ // UnrollRuntimeOtherExitPredictable is specified, we assume the other exit
+ // branch is predictable even if it has no deoptimize call.
return (OtherExits.size() == 1 &&
(UnrollRuntimeOtherExitPredictable ||
OtherExits[0]->getPostdominatingDeoptimizeCall()));
@@ -715,7 +775,8 @@ bool llvm::UnrollRuntimeLoopRemainder(
// Otherwise perform multi-exit unrolling, if either the target indicates
// it is profitable or the general profitability heuristics apply.
if (!RuntimeUnrollMultiExit &&
- !canProfitablyRuntimeUnrollMultiExitLoop(L, OtherExits, LatchExit,
+ !canProfitablyRuntimeUnrollMultiExitLoop(L, TTI, OtherExits,
+ LatchExit,
UseEpilogRemainder)) {
LLVM_DEBUG(dbgs() << "Multiple exit/exiting blocks in loop and "
"multi-exit unrolling not enabled!\n");
diff --git a/llvm/test/Transforms/LoopUnroll/multi-exit-loop-unroll.ll b/llvm/test/Transforms/LoopUnroll/multi-exit-loop-unroll.ll
new file mode 100644
index 0000000000000..f006a2dccda26
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/multi-exit-loop-unroll.ll
@@ -0,0 +1,655 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=loop-unroll -unroll-runtime=true -verify-dom-info -verify-loop-info -S | FileCheck %s
+; RUN: opt < %s -passes=loop-unroll -unroll-runtime=true -verify-dom-info -verify-loop-info -unroll-runtime-multi-exit=false -S | FileCheck %s -check-prefix=NOUNROLL
+
+define i32 @test1(ptr nocapture %a, i64 %n) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = freeze i64 [[N:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], -1
+; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP0]], 7
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP1]], 7
+; CHECK-NEXT: br i1 [[TMP2]], label [[LATCHEXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
+; CHECK: entry.new:
+; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[TMP0]], [[XTRAITER]]
+; CHECK-NEXT: br label [[HEADER:%.*]]
+; CHECK: header:
+; CHECK-NEXT: [[INDVARS_IV_EPIL:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[LATCH_7:%.*]] ]
+; CHECK-NEXT: [[SUM_02_EPIL:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[ADD_7:%.*]], [[LATCH_7]] ]
+; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_7:%.*]], [[LATCH_7]] ]
+; CHECK-NEXT: br label [[FOR_EXITING_BLOCK:%.*]]
+; CHECK: for.exiting_block:
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[N]], 42
+; CHECK-NEXT: br i1 [[CMP]], label [[OTHEREXIT_LOOPEXIT:%.*]], label [[LATCH:%.*]], !prof [[PROF0:![0-9]+]]
+; CHECK: latch:
+; CHECK-NEXT: [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV_EPIL]]
+; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX_EPIL]], align 4
+; CHECK-NEXT: [[ADD_EPIL:%.*]] = add nsw i32 [[TMP11]], [[SUM_02_EPIL]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV_EPIL]], 1
+; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_1:%.*]]
+; CHECK: for.exiting_block.1:
+; CHECK-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[N]], 42
+; CHECK-NEXT: br i1 [[CMP_1]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_1:%.*]], !prof [[PROF0]]
+; CHECK: latch.1:
+; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
+; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4
+; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP4]], [[ADD_EPIL]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV_EPIL]], 2
+; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_2:%.*]]
+; CHECK: for.exiting_block.2:
+; CHECK-NEXT: [[CMP_2:%.*]] = icmp eq i64 [[N]], 42
+; CHECK-NEXT: br i1 [[CMP_2]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_2:%.*]], !prof [[PROF0]]
+; CHECK: latch.2:
+; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_1]]
+; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4
+; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP5]], [[ADD_1]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV_EPIL]], 3
+; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_3:%.*]]
+; CHECK: for.exiting_block.3:
+; CHECK-NEXT: [[CMP_3:%.*]] = icmp eq i64 [[N]], 42
+; CHECK-NEXT: br i1 [[CMP_3]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_3:%.*]], !prof [[PROF0]]
+; CHECK: latch.3:
+; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_2]]
+; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4
+; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[TMP6]], [[ADD_2]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV_EPIL]], 4
+; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_4:%.*]]
+; CHECK: for.exiting_block.4:
+; CHECK-NEXT: [[CMP_4:%.*]] = icmp eq i64 [[N]], 42
+; CHECK-NEXT: br i1 [[CMP_4]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_4:%.*]], !prof [[PROF0]]
+; CHECK: latch.4:
+; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_3]]
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX_4]], align 4
+; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 [[TMP7]], [[ADD_3]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV_EPIL]], 5
+; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_5:%.*]]
+; CHECK: for.exiting_block.5:
+; CHECK-NEXT: [[CMP_5:%.*]] = icmp eq i64 [[N]], 42
+; CHECK-NEXT: br i1 [[CMP_5]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_5:%.*]], !prof [[PROF0]]
+; CHECK: latch.5:
+; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_4]]
+; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX_5]], align 4
+; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 [[TMP8]], [[ADD_4]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV_EPIL]], 6
+; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_6:%.*]]
+; CHECK: for.exiting_block.6:
+; CHECK-NEXT: [[CMP_6:%.*]] = icmp eq i64 [[N]], 42
+; CHECK-NEXT: br i1 [[CMP_6]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_6:%.*]], !prof [[PROF0]]
+; CHECK: latch.6:
+; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_5]]
+; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX_6]], align 4
+; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 [[TMP9]], [[ADD_5]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV_EPIL]], 7
+; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_7:%.*]]
+; CHECK: for.exiting_block.7:
+; CHECK-NEXT: [[CMP_7:%.*]] = icmp eq i64 [[N]], 42
+; CHECK-NEXT: br i1 [[CMP_7]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_7]], !prof [[PROF0]]
+; CHECK: latch.7:
+; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_6]]
+; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX_7]], align 4
+; CHECK-NEXT: [[ADD_7]] = add nsw i32 [[TMP10]], [[ADD_6]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_7]] = add i64 [[INDVARS_IV_EPIL]], 8
+; CHECK-NEXT: [[NITER_NEXT_7]] = add i64 [[NITER]], 8
+; CHECK-NEXT: [[EXITCOND_EPIL:%.*]] = icmp eq i64 [[NITER_NEXT_7]], [[UNROLL_ITER]]
+; CHECK-NEXT: br i1 [[EXITCOND_EPIL]], label [[LATCHEXIT:%.*]], label [[HEADER]]
+; CHECK: latchexit.unr-lcssa:
+; CHECK-NEXT: [[SUM_0_LCSSA_PH_PH:%.*]] = phi i32 [ [[ADD_7]], [[LATCH_7]] ]
+; CHECK-NEXT: [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_7]], [[LATCH_7]] ]
+; CHECK-NEXT: [[SUM_02_UNR_PH:%.*]] = phi i32 [ [[ADD_7]], [[LATCH_7]] ]
+; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[LATCHEXIT_UNR_LCSSA]], label [[LATCHEXIT1:%.*]]
+; CHECK: header.epil.preheader:
+; CHECK-NEXT: [[INDVARS_IV_EPIL_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_UNR_PH]], [[LATCHEXIT]] ]
+; CHECK-NEXT: [[SUM_02_EPIL_INIT:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_02_UNR_PH]], [[LATCHEXIT]] ]
+; CHECK-NEXT: [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT: call void @llvm.assume(i1 [[LCMP_MOD2]])
+; CHECK-NEXT: br label [[HEADER_EPIL:%.*]]
+; CHECK: header.epil:
+; CHECK-NEXT: [[INDVARS_IV_EPIL1:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], [[LATCH_EPIL:%.*]] ], [ [[INDVARS_IV_EPIL_INIT]], [[LATCHEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT: [[SUM_02_EPIL1:%.*]] = phi i32 [ [[ADD_EPIL1:%.*]], [[LATCH_EPIL]] ], [ [[SUM_02_EPIL_INIT]], [[LATCHEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ 0, [[LATCHEXIT_UNR_LCSSA]] ], [ [[EPIL_ITER_NEXT:%.*]], [[LATCH_EPIL]] ]
+; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_EPIL:%.*]]
+; CHECK: for.exiting_block.epil:
+; CHECK-NEXT: [[CMP_EPIL:%.*]] = icmp eq i64 [[N]], 42
+; CHECK-NEXT: br i1 [[CMP_EPIL]], label [[OTHEREXIT_LOOPEXIT3:%.*]], label [[LATCH_EPIL]], !prof [[PROF0]]
+; CHECK: latch.epil:
+; CHECK-NEXT: [[ARRAYIDX_EPIL1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_EPIL1]]
+; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX_EPIL1]], align 4
+; CHECK-NEXT: [[ADD_EPIL1]] = add nsw i32 [[TMP12]], [[SUM_02_EPIL1]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_EPIL]] = add i64 [[INDVARS_IV_EPIL1]], 1
+; CHECK-NEXT: [[EXITCOND_EPIL1:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_EPIL]], [[N]]
+; CHECK-NEXT: [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1
+; CHECK-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp ne i64 [[EPIL_ITER_NEXT]], [[XTRAITER]]
+; CHECK-NEXT: br i1 [[EPIL_ITER_CMP]], label [[HEADER_EPIL]], label [[LATCHEXIT_EPILOG_LCSSA:%.*]], !llvm.loop [[LOOP1:![0-9]+]]
+; CHECK: latchexit.epilog-lcssa:
+; CHECK-NEXT: [[SUM_0_LCSSA_PH1:%.*]] = phi i32 [ [[ADD_EPIL1]], [[LATCH_EPIL]] ]
+; CHECK-NEXT: br label [[LATCHEXIT1]]
+; CHECK: latchexit:
+; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[SUM_0_LCSSA_PH_PH]], [[LATCHEXIT]] ], [ [[SUM_0_LCSSA_PH1]], [[LATCHEXIT_EPILOG_LCSSA]] ]
+; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]]
+; CHECK: otherexit.loopexit:
+; CHECK-NEXT: br label [[OTHEREXIT:%.*]]
+; CHECK: otherexit.loopexit3:
+; CHECK-NEXT: br label [[OTHEREXIT]]
+; CHECK: otherexit:
+; CHECK-NEXT: [[RVAL:%.*]] = call i32 @foo()
+; CHECK-NEXT: ret i32 [[RVAL]]
+;
+; NOUNROLL-LABEL: @test1(
+; NOUNROLL-NEXT: entry:
+; NOUNROLL-NEXT: br label [[HEADER:%.*]]
+; NOUNROLL: header:
+; NOUNROLL-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LATCH:%.*]] ], [ 0, [[ENTRY:%.*]] ]
+; NOUNROLL-NEXT: [[SUM_02:%.*]] = phi i32 [ [[ADD:%.*]], [[LATCH]] ], [ 0, [[ENTRY]] ]
+; NOUNROLL-NEXT: br label [[FOR_EXITING_BLOCK:%.*]]
+; NOUNROLL: for.exiting_block:
+; NOUNROLL-NEXT: [[CMP:%.*]] = icmp eq i64 [[N:%.*]], 42
+; NOUNROLL-NEXT: br i1 [[CMP]], label [[OTHEREXIT:%.*]], label [[LATCH]], !prof [[PROF0:![0-9]+]]
+; NOUNROLL: latch:
+; NOUNROLL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; NOUNROLL-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; NOUNROLL-NEXT: [[ADD]] = add nsw i32 [[TMP0]], [[SUM_02]]
+; NOUNROLL-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
+; NOUNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; NOUNROLL-NEXT: br i1 [[EXITCOND]], label [[LATCHEXIT:%.*]], label [[HEADER]]
+; NOUNROLL: latchexit:
+; NOUNROLL-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[ADD]], [[LATCH]] ]
+; NOUNROLL-NEXT: ret i32 [[SUM_0_LCSSA]]
+; NOUNROLL: otherexit:
+; NOUNROLL-NEXT: [[RVAL:%.*]] = call i32 @foo()
+; NOUNROLL-NEXT: ret i32 [[RVAL]]
+;
+entry:
+ br label %header
+
+header:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %latch ], [ 0, %entry ]
+ %sum.02 = phi i32 [ %add, %latch ], [ 0, %entry ]
+ br label %for.exiting_block
+
+for.exiting_block:
+ %cmp = icmp eq i64 %n, 42
+ br i1 %cmp, label %otherexit, label %latch, !prof !0
+
+latch:
+ %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+ %0 = load i32, ptr %arrayidx, align 4
+ %add = add nsw i32 %0, %sum.02
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, %n
+ br i1 %exitcond, label %latchexit, label %header
+
+latchexit: ; preds = %latch
+ %sum.0.lcssa = phi i32 [ %add, %latch ]
+ ret i32 %sum.0.lcssa
+
+otherexit:
+ %rval = call i32 @foo()
+ ret i32 %rval
+}
+
+declare i32 @foo()
+
+!0 = !{!"branch_weights", i32 1, i32 100}
+
+; exit is a deopt call so it should unroll
+define i32 @test2(ptr nocapture %a, i64 %n) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = freeze i64 [[N:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], -1
+; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP0]], 7
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP1]], 7
+; CHECK-NEXT: br i1 [[TMP2]], label [[LATCHEXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
+; CHECK: entry.new:
+; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[TMP0]], [[XTRAITER]]
+; CHECK-NEXT: br label [[HEADER:%.*]]
+; CHECK: header:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[LATCH_7:%.*]] ]
+; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[ADD_7:%.*]], [[LATCH_7]] ]
+; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_7:%.*]], [[LATCH_7]] ]
+; CHECK-NEXT: br label [[FOR_EXITING_BLOCK:%.*]]
+; CHECK: for.exiting_block:
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[N]], 42
+; CHECK-NEXT: br i1 [[CMP]], label [[OTHEREXIT_LOOPEXIT:%.*]], label [[LATCH:%.*]]
+; CHECK: latch:
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], [[SUM_02]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_1:%.*]]
+; CHECK: for.exiting_block.1:
+; CHECK-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[N]], 42
+; CHECK-NEXT: br i1 [[CMP_1]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_1:%.*]]
+; CHECK: latch.1:
+; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
+; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4
+; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP4]], [[ADD]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2
+; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_2:%.*]]
+; CHECK: for.exiting_block.2:
+; CHECK-NEXT: [[CMP_2:%.*]] = icmp eq i64 [[N]], 42
+; CHECK-NEXT: br i1 [[CMP_2]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_2:%.*]]
+; CHECK: latch.2:
+; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_1]]
+; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4
+; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP5]], [[ADD_1]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 3
+; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_3:%.*]]
+; CHECK: for.exiting_block.3:
+; CHECK-NEXT: [[CMP_3:%.*]] = icmp eq i64 [[N]], 42
+; CHECK-NEXT: br i1 [[CMP_3]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_3:%.*]]
+; CHECK: latch.3:
+; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_2]]
+; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4
+; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[TMP6]], [[ADD_2]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 4
+; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_4:%.*]]
+; CHECK: for.exiting_block.4:
+; CHECK-NEXT: [[CMP_4:%.*]] = icmp eq i64 [[N]], 42
+; CHECK-NEXT: br i1 [[CMP_4]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_4:%.*]]
+; CHECK: latch.4:
+; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_3]]
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX_4]], align 4
+; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 [[TMP7]], [[ADD_3]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 5
+; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_5:%.*]]
+; CHECK: for.exiting_block.5:
+; CHECK-NEXT: [[CMP_5:%.*]] = icmp eq i64 [[N]], 42
+; CHECK-NEXT: br i1 [[CMP_5]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_5:%.*]]
+; CHECK: latch.5:
+; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_4]]
+; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX_5]], align 4
+; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 [[TMP8]], [[ADD_4]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 6
+; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_6:%.*]]
+; CHECK: for.exiting_block.6:
+; CHECK-NEXT: [[CMP_6:%.*]] = icmp eq i64 [[N]], 42
+; CHECK-NEXT: br i1 [[CMP_6]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_6:%.*]]
+; CHECK: latch.6:
+; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_5]]
+; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX_6]], align 4
+; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 [[TMP9]], [[ADD_5]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 7
+; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_7:%.*]]
+; CHECK: for.exiting_block.7:
+; CHECK-NEXT: [[CMP_7:%.*]] = icmp eq i64 [[N]], 42
+; CHECK-NEXT: br i1 [[CMP_7]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_7]]
+; CHECK: latch.7:
+; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_6]]
+; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX_7]], align 4
+; CHECK-NEXT: [[ADD_7]] = add nsw i32 [[TMP10]], [[ADD_6]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_7]] = add i64 [[INDVARS_IV]], 8
+; CHECK-NEXT: [[NITER_NEXT_7]] = add i64 [[NITER]], 8
+; CHECK-NEXT: [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NEXT_7]], [[UNROLL_ITER]]
+; CHECK-NEXT: br i1 [[NITER_NCMP_7]], label [[LATCHEXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[HEADER]]
+; CHECK: latchexit.unr-lcssa:
+; CHECK-NEXT: [[SUM_0_LCSSA_PH_PH:%.*]] = phi i32 [ [[ADD_7]], [[LATCH_7]] ]
+; CHECK-NEXT: [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_7]], [[LATCH_7]] ]
+; CHECK-NEXT: [[SUM_02_UNR_PH:%.*]] = phi i32 [ [[ADD_7]], [[LATCH_7]] ]
+; CHECK-NEXT: [[LCMP_MOD_NOT:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT: br i1 [[LCMP_MOD_NOT]], label [[LATCHEXIT_UNR_LCSSA]], label [[HEADER_EPIL_PREHEADER:%.*]]
+; CHECK: header.epil.preheader:
+; CHECK-NEXT: [[INDVARS_IV_EPIL_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_UNR_PH]], [[LATCHEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT: [[SUM_02_EPIL_INIT:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_02_UNR_PH]], [[LATCHEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT: [[LCMP_MOD3:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT: call void @llvm.assume(i1 [[LCMP_MOD3]])
+; CHECK-NEXT: br label [[HEADER_EPIL:%.*]]
+; CHECK: header.epil:
+; CHECK-NEXT: [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], [[LATCH_EPIL:%.*]] ], [ [[INDVARS_IV_EPIL_INIT]], [[LATCHEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT: [[SUM_02_EPIL:%.*]] = phi i32 [ [[ADD_EPIL:%.*]], [[LATCH_EPIL]] ], [ [[SUM_02_EPIL_INIT]], [[LATCHEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ 0, [[LATCHEXIT_UNR_LCSSA]] ], [ [[EPIL_ITER_NEXT:%.*]], [[LATCH_EPIL]] ]
+; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_EPIL:%.*]]
+; CHECK: for.exiting_block.epil:
+; CHECK-NEXT: [[CMP_EPIL:%.*]] = icmp eq i64 [[N]], 42
+; CHECK-NEXT: br i1 [[CMP_EPIL]], label [[OTHEREXIT_LOOPEXIT3:%.*]], label [[LATCH_EPIL]]
+; CHECK: latch.epil:
+; CHECK-NEXT: [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_EPIL]]
+; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX_EPIL]], align 4
+; CHECK-NEXT: [[ADD_EPIL]] = add nsw i32 [[TMP11]], [[SUM_02_EPIL]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_EPIL]] = add i64 [[INDVARS_IV_EPIL]], 1
+; CHECK-NEXT: [[EXITCOND_EPIL:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_EPIL]], [[N]]
+; CHECK-NEXT: [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1
+; CHECK-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp ne i64 [[EPIL_ITER_NEXT]], [[XTRAITER]]
+; CHECK-NEXT: br i1 [[EPIL_ITER_CMP]], label [[HEADER_EPIL]], label [[LATCHEXIT_EPILOG_LCSSA:%.*]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK: latchexit.epilog-lcssa:
+; CHECK-NEXT: [[SUM_0_LCSSA_PH1:%.*]] = phi i32 [ [[ADD_EPIL]], [[LATCH_EPIL]] ]
+; CHECK-NEXT: br label [[HEADER_EPIL_PREHEADER]]
+; CHECK: latchexit:
+; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[SUM_0_LCSSA_PH_PH]], [[LATCHEXIT_UNR_LCSSA_LOOPEXIT]] ], [ [[SUM_0_LCSSA_PH1]], [[LATCHEXIT_EPILOG_LCSSA]] ]
+; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]]
+; CHECK: otherexit.loopexit:
+; CHECK-NEXT: [[SUM_02_LCSSA_PH:%.*]] = phi i32 [ [[SUM_02]], [[FOR_EXITING_BLOCK]] ], [ [[ADD]], [[FOR_EXITING_BLOCK_1]] ], [ [[ADD_1]], [[FOR_EXITING_BLOCK_2]] ], [ [[ADD_2]], [[FOR_EXITING_BLOCK_3]] ], [ [[ADD_3]], [[FOR_EXITING_BLOCK_4]] ], [ [[ADD_4]], [[FOR_EXITING_BLOCK_5]] ], [ [[ADD_5]], [[FOR_EXITING_BLOCK_6]] ], [ [[ADD_6]], [[FOR_EXITING_BLOCK_7]] ]
+; CHECK-NEXT: br label [[OTHEREXIT:%.*]]
+; CHECK: otherexit.loopexit4:
+; CHECK-NEXT: [[SUM_02_LCSSA_PH4:%.*]] = phi i32 [ [[SUM_02_EPIL]], [[FOR_EXITING_BLOCK_EPIL]] ]
+; CHECK-NEXT: br label [[OTHEREXIT]]
+; CHECK: otherexit:
+; CHECK-NEXT: [[SUM_02_LCSSA:%.*]] = phi i32 [ [[SUM_02_LCSSA_PH]], [[OTHEREXIT_LOOPEXIT]] ], [ [[SUM_02_LCSSA_PH4]], [[OTHEREXIT_LOOPEXIT3]] ]
+; CHECK-NEXT: [[RVAL:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 [[SUM_02_LCSSA]]) ]
+; CHECK-NEXT: ret i32 [[RVAL]]
+;
+; NOUNROLL-LABEL: @test2(
+; NOUNROLL-NEXT: entry:
+; NOUNROLL-NEXT: br label [[HEADER:%.*]]
+; NOUNROLL: header:
+; NOUNROLL-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LATCH:%.*]] ], [ 0, [[ENTRY:%.*]] ]
+; NOUNROLL-NEXT: [[SUM_02:%.*]] = phi i32 [ [[ADD:%.*]], [[LATCH]] ], [ 0, [[ENTRY]] ]
+; NOUNROLL-NEXT: br label [[FOR_EXITING_BLOCK:%.*]]
+; NOUNROLL: for.exiting_block:
+; NOUNROLL-NEXT: [[CMP:%.*]] = icmp eq i64 [[N:%.*]], 42
+; NOUNROLL-NEXT: br i1 [[CMP]], label [[OTHEREXIT:%.*]], label [[LATCH]]
+; NOUNROLL: latch:
+; NOUNROLL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; NOUNROLL-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; NOUNROLL-NEXT: [[ADD]] = add nsw i32 [[TMP0]], [[SUM_02]]
+; NOUNROLL-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
+; NOUNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; NOUNROLL-NEXT: br i1 [[EXITCOND]], label [[LATCHEXIT:%.*]], label [[HEADER]]
+; NOUNROLL: latchexit:
+; NOUNROLL-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[ADD]], [[LATCH]] ]
+; NOUNROLL-NEXT: ret i32 [[SUM_0_LCSSA]]
+; NOUNROLL: otherexit:
+; NOUNROLL-NEXT: [[SUM_02_LCSSA:%.*]] = phi i32 [ [[SUM_02]], [[FOR_EXITING_BLOCK]] ]
+; NOUNROLL-NEXT: [[RVAL:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 [[SUM_02_LCSSA]]) ]
+; NOUNROLL-NEXT: ret i32 [[RVAL]]
+;
+entry:
+ br label %header
+
+header:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %latch ], [ 0, %entry ]
+ %sum.02 = phi i32 [ %add, %latch ], [ 0, %entry ]
+ br label %for.exiting_block
+
+for.exiting_block:
+ %cmp = icmp eq i64 %n, 42
+ br i1 %cmp, label %otherexit, label %latch
+
+latch:
+ %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+ %0 = load i32, ptr %arrayidx, align 4
+ %add = add nsw i32 %0, %sum.02
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, %n
+ br i1 %exitcond, label %latchexit, label %header
+
+latchexit: ; preds = %latch
+ %sum.0.lcssa = phi i32 [ %add, %latch ]
+ ret i32 %sum.0.lcssa
+
+otherexit:
+ %rval = call i32(...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 %sum.02) ]
+ ret i32 %rval
+}
+
+declare i32 @llvm.experimental.deoptimize.i32(...)
+
+define i32 @test3(ptr nocapture %a, i64 %n) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[HEADER:%.*]]
+; CHECK: header:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LATCH:%.*]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[ADD:%.*]], [[LATCH]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT: br label [[FOR_EXITING_BLOCK:%.*]]
+; CHECK: for.exiting_block:
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[N:%.*]], 42
+; CHECK-NEXT: br i1 [[CMP]], label [[OTHEREXIT:%.*]], label [[LATCH]], !prof [[PROF4:![0-9]+]]
+; CHECK: latch:
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[ADD]] = add nsw i32 [[TMP0]], [[SUM_02]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LATCHEXIT:%.*]], label [[HEADER]]
+; CHECK: latchexit:
+; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[ADD]], [[LATCH]] ]
+; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]]
+; CHECK: otherexit:
+; CHECK-NEXT: [[RVAL:%.*]] = call i32 @foo()
+; CHECK-NEXT: ret i32 [[RVAL]]
+;
+; NOUNROLL-LABEL: @test3(
+; NOUNROLL-NEXT: entry:
+; NOUNROLL-NEXT: br label [[HEADER:%.*]]
+; NOUNROLL: header:
+; NOUNROLL-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LATCH:%.*]] ], [ 0, [[ENTRY:%.*]] ]
+; NOUNROLL-NEXT: [[SUM_02:%.*]] = phi i32 [ [[ADD:%.*]], [[LATCH]] ], [ 0, [[ENTRY]] ]
+; NOUNROLL-NEXT: br label [[FOR_EXITING_BLOCK:%.*]]
+; NOUNROLL: for.exiting_block:
+; NOUNROLL-NEXT: [[CMP:%.*]] = icmp eq i64 [[N:%.*]], 42
+; NOUNROLL-NEXT: br i1 [[CMP]], label [[OTHEREXIT:%.*]], label [[LATCH]], !prof [[PROF1:![0-9]+]]
+; NOUNROLL: latch:
+; NOUNROLL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; NOUNROLL-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; NOUNROLL-NEXT: [[ADD]] = add nsw i32 [[TMP0]], [[SUM_02]]
+; NOUNROLL-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
+; NOUNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; NOUNROLL-NEXT: br i1 [[EXITCOND]], label [[LATCHEXIT:%.*]], label [[HEADER]]
+; NOUNROLL: latchexit:
+; NOUNROLL-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[ADD]], [[LATCH]] ]
+; NOUNROLL-NEXT: ret i32 [[SUM_0_LCSSA]]
+; NOUNROLL: otherexit:
+; NOUNROLL-NEXT: [[RVAL:%.*]] = call i32 @foo()
+; NOUNROLL-NEXT: ret i32 [[RVAL]]
+;
+entry:
+ br label %header
+
+header:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %latch ], [ 0, %entry ]
+ %sum.02 = phi i32 [ %add, %latch ], [ 0, %entry ]
+ br label %for.exiting_block
+
+for.exiting_block:
+ %cmp = icmp eq i64 %n, 42
+ br i1 %cmp, label %otherexit, label %latch, !prof !2
+
+latch:
+ %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+ %0 = load i32, ptr %arrayidx, align 4
+ %add = add nsw i32 %0, %sum.02
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, %n
+ br i1 %exitcond, label %latchexit, label %header
+
+latchexit: ; preds = %latch
+ %sum.0.lcssa = phi i32 [ %add, %latch ]
+ ret i32 %sum.0.lcssa
+
+otherexit:
+ %rval = call i32 @foo()
+ ret i32 %rval
+}
+
+!2 = !{!"branch_weights", i32 1, i32 2}
+
+
+define i32 @test4(ptr nocapture %a, i64 %n) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP3:%.*]] = freeze i64 [[N1:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP3]], -1
+; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP3]], 7
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP1]], 7
+; CHECK-NEXT: br i1 [[TMP2]], label [[LATCHEXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
+; CHECK: entry.new:
+; CHECK-NEXT: [[N:%.*]] = sub i64 [[TMP3]], [[XTRAITER]]
+; CHECK-NEXT: br label [[HEADER:%.*]]
+; CHECK: header:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[CONTINUE_7:%.*]] ]
+; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[ADD_7:%.*]], [[CONTINUE_7]] ]
+; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[INDVARS_IV_NEXT:%.*]], [[CONTINUE_7]] ]
+; CHECK-NEXT: br label [[FOR_EXITING_BLOCK:%.*]]
+; CHECK: otherexitingblock:
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[SUM_02]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT1:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[N1]], 42
+; CHECK-NEXT: br i1 [[CMP]], label [[OTHEREXIT_LOOPEXIT:%.*]], label [[LATCH:%.*]], !prof [[PROF5:![0-9]+]]
+; CHECK: latch:
+; CHECK-NEXT: br label [[LATCH_1:%.*]]
+; CHECK: otherexitingblock.1:
+; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT1]]
+; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4
+; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP4]], [[ADD]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2
+; CHECK-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[N1]], 42
+; CHECK-NEXT: br i1 [[CMP_1]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_8:%.*]], !prof [[PROF5]]
+; CHECK: latch.1:
+; CHECK-NEXT: br label [[LATCH_2:%.*]]
+; CHECK: otherexitingblock.2:
+; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_1]]
+; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4
+; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP5]], [[ADD_1]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 3
+; CHECK-NEXT: [[CMP_2:%.*]] = icmp eq i64 [[N1]], 42
+; CHECK-NEXT: br i1 [[CMP_2]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_9:%.*]], !prof [[PROF5]]
+; CHECK: latch.2:
+; CHECK-NEXT: br label [[LATCH_3:%.*]]
+; CHECK: otherexitingblock.3:
+; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_2]]
+; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4
+; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[TMP6]], [[ADD_2]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 4
+; CHECK-NEXT: [[CMP_3:%.*]] = icmp eq i64 [[N1]], 42
+; CHECK-NEXT: br i1 [[CMP_3]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_10:%.*]], !prof [[PROF5]]
+; CHECK: latch.3:
+; CHECK-NEXT: br label [[LATCH_4:%.*]]
+; CHECK: otherexitingblock.4:
+; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_3]]
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX_4]], align 4
+; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 [[TMP7]], [[ADD_3]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 5
+; CHECK-NEXT: [[CMP_4:%.*]] = icmp eq i64 [[N1]], 42
+; CHECK-NEXT: br i1 [[CMP_4]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_11:%.*]], !prof [[PROF5]]
+; CHECK: latch.4:
+; CHECK-NEXT: br label [[LATCH_5:%.*]]
+; CHECK: otherexitingblock.5:
+; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_4]]
+; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX_5]], align 4
+; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 [[TMP8]], [[ADD_4]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 6
+; CHECK-NEXT: [[CMP_5:%.*]] = icmp eq i64 [[N1]], 42
+; CHECK-NEXT: br i1 [[CMP_5]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_12:%.*]], !prof [[PROF5]]
+; CHECK: latch.5:
+; CHECK-NEXT: br label [[LATCH_6:%.*]]
+; CHECK: otherexitingblock.6:
+; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_5]]
+; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX_6]], align 4
+; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 [[TMP9]], [[ADD_5]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 7
+; CHECK-NEXT: [[CMP_6:%.*]] = icmp eq i64 [[N1]], 42
+; CHECK-NEXT: br i1 [[CMP_6]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_13:%.*]], !prof [[PROF5]]
+; CHECK: latch.6:
+; CHECK-NEXT: br label [[LATCH_7:%.*]]
+; CHECK: otherexitingblock.7:
+; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_6]]
+; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX_7]], align 4
+; CHECK-NEXT: [[ADD_7]] = add nsw i32 [[TMP10]], [[ADD_6]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_7]] = add i64 [[INDVARS_IV]], 8
+; CHECK-NEXT: [[CMP_7:%.*]] = icmp eq i64 [[N1]], 42
+; CHECK-NEXT: br i1 [[CMP_7]], label [[OTHEREXIT_LOOPEXIT]], label [[CONTINUE_7]], !prof [[PROF5]]
+; CHECK: latch.7:
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[NITER]], 8
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LATCHEXIT:%.*]], label [[HEADER]]
+; CHECK: latchexit.unr-lcssa:
+; CHECK-NEXT: [[SUM_0_LCSSA_PH_PH:%.*]] = phi i32 [ [[ADD_7]], [[CONTINUE_7]] ]
+; CHECK-NEXT: [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_7]], [[CONTINUE_7]] ]
+; CHECK-NEXT: [[SUM_02_UNR_PH:%.*]] = phi i32 [ [[ADD_7]], [[CONTINUE_7]] ]
+; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[LATCHEXIT_UNR_LCSSA]], label [[LATCHEXIT1:%.*]]
+; CHECK: header.epil.preheader:
+; CHECK-NEXT: [[INDVARS_IV_EPIL_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_UNR_PH]], [[LATCHEXIT]] ]
+; CHECK-NEXT: [[SUM_02_EPIL_INIT:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_02_UNR_PH]], [[LATCHEXIT]] ]
+; CHECK-NEXT: [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT: call void @llvm.assume(i1 [[LCMP_MOD2]])
+; CHECK-NEXT: br label [[HEADER_EPIL:%.*]]
+; CHECK: header.epil:
+; CHECK-NEXT: [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_EPIL_INIT]], [[LATCHEXIT_UNR_LCSSA]] ], [ [[INDVARS_IV_NEXT_EPIL:%.*]], [[CONTINUE_EPIL:%.*]] ]
+; CHECK-NEXT: [[SUM_02_EPIL:%.*]] = phi i32 [ [[SUM_02_EPIL_INIT]], [[LATCHEXIT_UNR_LCSSA]] ], [ [[ADD_EPIL:%.*]], [[CONTINUE_EPIL]] ]
+; CHECK-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ 0, [[LATCHEXIT_UNR_LCSSA]] ], [ [[EPIL_ITER_NEXT:%.*]], [[CONTINUE_EPIL]] ]
+; CHECK-NEXT: br label [[LATCH_EPIL:%.*]]
+; CHECK: otherexitingblock.epil:
+; CHECK-NEXT: [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_EPIL]]
+; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX_EPIL]], align 4
+; CHECK-NEXT: [[ADD_EPIL]] = add nsw i32 [[TMP11]], [[SUM_02_EPIL]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_EPIL]] = add i64 [[INDVARS_IV_EPIL]], 1
+; CHECK-NEXT: [[CMP_EPIL:%.*]] = icmp eq i64 [[N1]], 42
+; CHECK-NEXT: br i1 [[CMP_EPIL]], label [[OTHEREXIT_LOOPEXIT3:%.*]], label [[CONTINUE_EPIL]], !prof [[PROF5]]
+; CHECK: latch.epil:
+; CHECK-NEXT: [[EXITCOND_EPIL:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_EPIL]], [[N1]]
+; CHECK-NEXT: [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1
+; CHECK-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp ne i64 [[EPIL_ITER_NEXT]], [[XTRAITER]]
+; CHECK-NEXT: br i1 [[EPIL_ITER_CMP]], label [[HEADER_EPIL]], label [[LATCHEXIT_EPILOG_LCSSA:%.*]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK: latchexit.epilog-lcssa:
+; CHECK-NEXT: [[SUM_0_LCSSA_PH1:%.*]] = phi i32 [ [[ADD_EPIL]], [[CONTINUE_EPIL]] ]
+; CHECK-NEXT: br label [[LATCHEXIT1]]
+; CHECK: latchexit:
+; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[SUM_0_LCSSA_PH_PH]], [[LATCHEXIT]] ], [ [[SUM_0_LCSSA_PH1]], [[LATCHEXIT_EPILOG_LCSSA]] ]
+; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]]
+; CHECK: otherexit.loopexit:
+; CHECK-NEXT: br label [[OTHEREXIT:%.*]]
+; CHECK: otherexit.loopexit3:
+; CHECK-NEXT: br label [[OTHEREXIT]]
+; CHECK: otherexit:
+; CHECK-NEXT: [[RVAL:%.*]] = call i32 @foo()
+; CHECK-NEXT: ret i32 [[RVAL]]
+;
+; NOUNROLL-LABEL: @test4(
+; NOUNROLL-NEXT: entry:
+; NOUNROLL-NEXT: br label [[HEADER:%.*]]
+; NOUNROLL: header:
+; NOUNROLL-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LATCH:%.*]] ]
+; NOUNROLL-NEXT: [[SUM_02:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[LATCH]] ]
+; NOUNROLL-NEXT: br label [[FOR_EXITING_BLOCK:%.*]]
+; NOUNROLL: otherexitingblock:
+; NOUNROLL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; NOUNROLL-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; NOUNROLL-NEXT: [[ADD]] = add nsw i32 [[TMP0]], [[SUM_02]]
+; NOUNROLL-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
+; NOUNROLL-NEXT: [[CMP:%.*]] = icmp eq i64 [[N:%.*]], 42
+; NOUNROLL-NEXT: br i1 [[CMP]], label [[OTHEREXIT:%.*]], label [[LATCH]], !prof [[PROF2:![0-9]+]]
+; NOUNROLL: latch:
+; NOUNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; NOUNROLL-NEXT: br i1 [[EXITCOND]], label [[LATCHEXIT:%.*]], label [[HEADER]]
+; NOUNROLL: latchexit:
+; NOUNROLL-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[ADD]], [[LATCH]] ]
+; NOUNROLL-NEXT: ret i32 [[SUM_0_LCSSA]]
+; NOUNROLL: otherexit:
+; NOUNROLL-NEXT: [[RVAL:%.*]] = call i32 @foo()
+; NOUNROLL-NEXT: ret i32 [[RVAL]]
+;
+entry:
+ br label %header
+
+header:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %latch ]
+ %sum.02 = phi i32 [ 0, %entry ], [ %add, %latch ]
+ br label %otherexitingblock
+
+otherexitingblock:
+ %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+ %0 = load i32, ptr %arrayidx, align 4
+ %add = add nsw i32 %0, %sum.02
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %cmp = icmp eq i64 %n, 42
+ br i1 %cmp, label %otherexit, label %latch, !prof !3
+
+latch:
+ %exitcond = icmp eq i64 %indvars.iv.next, %n
+ br i1 %exitcond, label %latchexit, label %header
+
+latchexit:
+ %sum.0.lcssa = phi i32 [ %add, %latch ]
+ ret i32 %sum.0.lcssa
+
+otherexit:
+ %rval = call i32 @foo()
+ ret i32 %rval
+}
+
+!3 = !{!"branch_weights", i32 1, i32 200}
>From 5a3afa30a2ed126c54bd7aa76773d49892cf727e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Sedl=C3=A1=C4=8Dek?= <mr.mareksedlacek at gmail.com>
Date: Thu, 15 Jan 2026 21:01:48 +0000
Subject: [PATCH 2/5] Moved computation into LoopUtils and optimized conditions
---
.../include/llvm/Transforms/Utils/LoopUtils.h | 9 +++
.../Transforms/Utils/LoopUnrollRuntime.cpp | 76 +++++--------------
llvm/lib/Transforms/Utils/LoopUtils.cpp | 35 +++++++++
.../LoopUnroll/multi-exit-loop-unroll.ll | 4 +-
4 files changed, 64 insertions(+), 60 deletions(-)
diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
index 0f7ee82a6ff64..cc6948ea8936c 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -402,6 +402,15 @@ BranchProbability getBranchProbability(BranchInst *B, bool ForFirstTarget);
bool setBranchProbability(BranchInst *B, BranchProbability P,
bool ForFirstTarget);
+/// Calculates the edge probability from Src to Dst.
+/// Dst has to be a successor to Src.
+/// This uses branch_probability metadata directly. If data are missing or
+/// probability cannot be computed, then unknown probability is returned.
+/// This does not use BranchProbabilityInfo and the values computed by this
+/// will vary from BPI because BPI has its own more advanced heuristics to
+/// determine probabilities without metadata.
+BranchProbability getBranchProbability(BasicBlock *Src, BasicBlock *Dst);
+
/// Check inner loop (L) backedge count is known to be invariant on all
/// iterations of its outer loop. If the loop has no parent, this is trivially
/// true.
diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 5bfdc185b642b..d886e5f92b914 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -536,50 +536,7 @@ static Loop *CloneLoopBlocks(Loop *L, Value *NewIter,
return NewLoop;
}
-// Calculates the edge probability from Src to Dst.
-// Dst has to be a successor to Src.
-// This uses branch_probability metadata directly. If data are missing or
-// probability cannot be computed, then std::nullopt is returned.
-// This does not use BranchProbabilityInfo and the values computed by this
-// will vary from BPI because BPI has its own more advanced heuristics to
-// determine probabilities without metadata.
-static std::optional<BranchProbability>
-computeBranchProbabilityUsingMetadata(BasicBlock *Src, BasicBlock *Dst) {
- assert(Src != Dst && "Passed in same source as destination");
-
- Instruction *TI = Src->getTerminator();
- if (!TI || TI->getNumSuccessors() == 0)
- return BranchProbability::getZero();
-
- auto NumSucc = TI->getNumSuccessors();
- SmallVector<uint32_t, 4> Weights;
-
- if (!extractBranchWeights(*TI, Weights)) {
- // No metadata
- return std::nullopt;
- }
- assert(NumSucc == Weights.size() && "Missing weights in branch_probability");
-
- uint64_t Total = 0;
- uint32_t Numerator = 0;
- for (auto [i, Weight] : llvm::enumerate(Weights)) {
- if (TI->getSuccessor(i) == Dst)
- Numerator += Weight;
- Total += Weight;
- }
-
- // Total of edges might be 0 if the metadata is incorrect/set by hand
- // or missing. In such case return here to avoid division by 0 later on.
- // There might also be a case where the value of Total cannot fit into
- // uint32_t, in such case, just bail out.
- if (Total == 0 || Total > std::numeric_limits<uint32_t>::max())
- return std::nullopt;
-
- return BranchProbability(Numerator, Total);
-}
-
-/// Returns true if we can profitably unroll the multi-exit loop L. Currently,
-/// we return true only if UnrollRuntimeMultiExit is set to true.
+/// Returns true if we can profitably unroll the multi-exit loop L.
static bool canProfitablyRuntimeUnrollMultiExitLoop(
Loop *L, const TargetTransformInfo *TTI,
SmallVectorImpl<BasicBlock *> &OtherExits, BasicBlock *LatchExit,
@@ -600,41 +557,42 @@ static bool canProfitablyRuntimeUnrollMultiExitLoop(
// We avoid unrolling loops that have more than two exiting blocks. This
// limits the total number of branches in the unrolled loop to be atmost
// the unroll factor (since one of the exiting blocks is the latch block).
+
+ // Allow unrolling of loops with no non latch exit blocks.
+ if (OtherExits.size() == 0)
+ return true;
+
SmallVector<BasicBlock*, 4> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
- if (ExitingBlocks.size() > 2)
+ if (ExitingBlocks.size() > 2 || OtherExits.size() != 1)
return false;
- // Allow unrolling of loops with no non latch exit blocks.
- if (OtherExits.size() == 0)
+ // When UnrollRuntimeOtherExitPredictable is specified, we assume the other
+ // exit branch is predictable even if it has no deoptimize call.
+ if (UnrollRuntimeOtherExitPredictable)
return true;
// The second heuristic is that L has one exit other than the latchexit and
// that exit is highly predictable.
if (TTI) {
- if (OtherExits.size() != 1)
- return false;
BasicBlock *LatchBB = L->getLoopLatch();
assert(LatchBB && "Expected loop to have a latch");
BasicBlock *NonLatchExitingBlock =
(ExitingBlocks[0] == LatchBB) ? ExitingBlocks[1] : ExitingBlocks[0];
- auto BranchProb = computeBranchProbabilityUsingMetadata(
- NonLatchExitingBlock, OtherExits[0]);
- // If BranchProbability could not be extracted (returns nullopt), then
+ auto BranchProb =
+ llvm::getBranchProbability(NonLatchExitingBlock, OtherExits[0]);
+ // If BranchProbability could not be extracted (returns unknown), then
// don't return and do the check for deopt block.
- if (BranchProb) {
+ if (!BranchProb.isUnknown()) {
auto Threshold = TTI->getPredictableBranchThreshold().getCompl();
- return UnrollRuntimeOtherExitPredictable || *BranchProb < Threshold;
+ return BranchProb < Threshold;
}
}
// We know that deoptimize blocks are rarely taken, which also implies the
- // branch leading to the deoptimize block is highly predictable. When
- // UnrollRuntimeOtherExitPredictable is specified, we assume the other exit
- // branch is predictable even if it has no deoptimize call.
+ // branch leading to the deoptimize block is highly predictable.
return (OtherExits.size() == 1 &&
- (UnrollRuntimeOtherExitPredictable ||
- OtherExits[0]->getPostdominatingDeoptimizeCall()));
+ OtherExits[0]->getPostdominatingDeoptimizeCall());
// TODO: These can be fine-tuned further to consider code size or deopt states
// that are captured by the deoptimize exit block.
// Also, we can extend this to support more cases, if we actually
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 9c2710a22a0db..42112a78adb22 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -1020,6 +1020,41 @@ BranchProbability llvm::getBranchProbability(BranchInst *B,
return BranchProbability::getBranchProbability(Weight0, Denominator);
}
+BranchProbability llvm::getBranchProbability(BasicBlock *Src, BasicBlock *Dst) {
+ assert(Src != Dst && "Passed in same source as destination");
+
+ Instruction *TI = Src->getTerminator();
+ if (!TI || TI->getNumSuccessors() == 0)
+ return BranchProbability::getZero();
+
+ auto NumSucc = TI->getNumSuccessors();
+ SmallVector<uint32_t, 4> Weights;
+
+ if (!extractBranchWeights(*TI, Weights)) {
+ // No metadata
+ return BranchProbability::getUnknown();
+ }
+ assert(NumSucc == Weights.size() && "Missing weights in branch_probability");
+
+ uint64_t Total = 0;
+ uint32_t Numerator = 0;
+ for (auto [i, Weight] : llvm::enumerate(Weights)) {
+ if (TI->getSuccessor(i) == Dst)
+ Numerator += Weight;
+ Total += Weight;
+ }
+
+ // Total of edges might be 0 if the metadata is incorrect/set by hand
+ // or missing. In such case return here to avoid division by 0 later on.
+ // There might also be a case where the value of Total cannot fit into
+ // uint32_t, in such case, just bail out.
+ if (Total == 0 || Total > std::numeric_limits<uint32_t>::max())
+ return BranchProbability::getUnknown();
+
+ return BranchProbability(Numerator, Total);
+}
+
+
bool llvm::setBranchProbability(BranchInst *B, BranchProbability P,
bool ForFirstTarget) {
if (B->getNumSuccessors() != 2)
diff --git a/llvm/test/Transforms/LoopUnroll/multi-exit-loop-unroll.ll b/llvm/test/Transforms/LoopUnroll/multi-exit-loop-unroll.ll
index f006a2dccda26..4d9e9888f131a 100644
--- a/llvm/test/Transforms/LoopUnroll/multi-exit-loop-unroll.ll
+++ b/llvm/test/Transforms/LoopUnroll/multi-exit-loop-unroll.ll
@@ -2,6 +2,7 @@
; RUN: opt < %s -passes=loop-unroll -unroll-runtime=true -verify-dom-info -verify-loop-info -S | FileCheck %s
; RUN: opt < %s -passes=loop-unroll -unroll-runtime=true -verify-dom-info -verify-loop-info -unroll-runtime-multi-exit=false -S | FileCheck %s -check-prefix=NOUNROLL
+; Multi exit loop with predictable exit -- unroll
define i32 @test1(ptr nocapture %a, i64 %n) {
; CHECK-LABEL: @test1(
; CHECK-NEXT: entry:
@@ -385,6 +386,7 @@ otherexit:
declare i32 @llvm.experimental.deoptimize.i32(...)
+; multi exit loop where the exits are not predictable -- no unroll
define i32 @test3(ptr nocapture %a, i64 %n) {
; CHECK-LABEL: @test3(
; CHECK-NEXT: entry:
@@ -465,7 +467,7 @@ otherexit:
!2 = !{!"branch_weights", i32 1, i32 2}
-
+; multi exit loop with high predictability of exists -- unroll
define i32 @test4(ptr nocapture %a, i64 %n) {
; CHECK-LABEL: @test4(
; CHECK-NEXT: entry:
>From a27a8dbffbfe9158deb12c0fe2e5e2f7893325b4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Sedl=C3=A1=C4=8Dek?= <mr.mareksedlacek at gmail.com>
Date: Tue, 20 Jan 2026 17:59:10 +0000
Subject: [PATCH 3/5] Fixed formatting and early exit conditions
---
.../include/llvm/Transforms/Utils/LoopUtils.h | 18 +++++------
.../Transforms/Utils/LoopUnrollRuntime.cpp | 30 +++++++++----------
llvm/lib/Transforms/Utils/LoopUtils.cpp | 1 -
3 files changed, 24 insertions(+), 25 deletions(-)
diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
index cc6948ea8936c..f41fe51762ff0 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -393,6 +393,15 @@ bool setLoopProbability(Loop *L, BranchProbability P);
/// second target label, or vice-versa if \p ForFirstTarget is false.
BranchProbability getBranchProbability(BranchInst *B, bool ForFirstTarget);
+/// Calculates the edge probability from Src to Dst.
+/// Dst has to be a successor to Src.
+/// This uses branch_weights metadata directly. If data are missing or
+/// probability cannot be computed, then unknown probability is returned.
+/// This does not use BranchProbabilityInfo and the values computed by this
+/// will vary from BPI because BPI has its own more advanced heuristics to
+/// determine probabilities even without branch_weights metadata.
+BranchProbability getBranchProbability(BasicBlock *Src, BasicBlock *Dst);
+
/// Set branch weight metadata for \p B to indicate that \p P and `1 - P` are
/// the probabilities of control flowing to its first and second target labels,
/// respectively, or vice-versa if \p ForFirstTarget is false. Return false if
@@ -402,15 +411,6 @@ BranchProbability getBranchProbability(BranchInst *B, bool ForFirstTarget);
bool setBranchProbability(BranchInst *B, BranchProbability P,
bool ForFirstTarget);
-/// Calculates the edge probability from Src to Dst.
-/// Dst has to be a successor to Src.
-/// This uses branch_probability metadata directly. If data are missing or
-/// probability cannot be computed, then unknown probability is returned.
-/// This does not use BranchProbabilityInfo and the values computed by this
-/// will vary from BPI because BPI has its own more advanced heuristics to
-/// determine probabilities without metadata.
-BranchProbability getBranchProbability(BasicBlock *Src, BasicBlock *Dst);
-
/// Check inner loop (L) backedge count is known to be invariant on all
/// iterations of its outer loop. If the loop has no parent, this is trivially
/// true.
diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index d886e5f92b914..0cfd4a59bb4e0 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -538,7 +538,7 @@ static Loop *CloneLoopBlocks(Loop *L, Value *NewIter,
/// Returns true if we can profitably unroll the multi-exit loop L.
static bool canProfitablyRuntimeUnrollMultiExitLoop(
- Loop *L, const TargetTransformInfo *TTI,
+ Loop *L, const TargetTransformInfo *TTI,
SmallVectorImpl<BasicBlock *> &OtherExits, BasicBlock *LatchExit,
bool UseEpilogRemainder) {
@@ -557,14 +557,16 @@ static bool canProfitablyRuntimeUnrollMultiExitLoop(
// We avoid unrolling loops that have more than two exiting blocks. This
// limits the total number of branches in the unrolled loop to be atmost
// the unroll factor (since one of the exiting blocks is the latch block).
-
+ SmallVector<BasicBlock*, 4> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+ if (ExitingBlocks.size() > 2)
+ return false;
+
// Allow unrolling of loops with no non latch exit blocks.
if (OtherExits.size() == 0)
return true;
-
- SmallVector<BasicBlock*, 4> ExitingBlocks;
- L->getExitingBlocks(ExitingBlocks);
- if (ExitingBlocks.size() > 2 || OtherExits.size() != 1)
+
+ if (OtherExits.size() != 1)
return false;
// When UnrollRuntimeOtherExitPredictable is specified, we assume the other
@@ -573,7 +575,7 @@ static bool canProfitablyRuntimeUnrollMultiExitLoop(
return true;
// The second heuristic is that L has one exit other than the latchexit and
- // that exit is highly predictable.
+ // that exit is highly unlikely.
if (TTI) {
BasicBlock *LatchBB = L->getLoopLatch();
assert(LatchBB && "Expected loop to have a latch");
@@ -588,11 +590,10 @@ static bool canProfitablyRuntimeUnrollMultiExitLoop(
return BranchProb < Threshold;
}
}
-
- // We know that deoptimize blocks are rarely taken, which also implies the
- // branch leading to the deoptimize block is highly predictable.
- return (OtherExits.size() == 1 &&
- OtherExits[0]->getPostdominatingDeoptimizeCall());
+
+ // We know that deoptimize blocks are rarely taken, which also implies the
+ // branch leading to the deoptimize block is highly unlikely.
+ return OtherExits[0]->getPostdominatingDeoptimizeCall();
// TODO: These can be fine-tuned further to consider code size or deopt states
// that are captured by the deoptimize exit block.
// Also, we can extend this to support more cases, if we actually
@@ -733,9 +734,8 @@ bool llvm::UnrollRuntimeLoopRemainder(
// Otherwise perform multi-exit unrolling, if either the target indicates
// it is profitable or the general profitability heuristics apply.
if (!RuntimeUnrollMultiExit &&
- !canProfitablyRuntimeUnrollMultiExitLoop(L, TTI, OtherExits,
- LatchExit,
- UseEpilogRemainder)) {
+ !canProfitablyRuntimeUnrollMultiExitLoop(
+ L, TTI, OtherExits, LatchExit, UseEpilogRemainder)) {
LLVM_DEBUG(dbgs() << "Multiple exit/exiting blocks in loop and "
"multi-exit unrolling not enabled!\n");
return false;
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 42112a78adb22..19afd1f126705 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -1054,7 +1054,6 @@ BranchProbability llvm::getBranchProbability(BasicBlock *Src, BasicBlock *Dst) {
return BranchProbability(Numerator, Total);
}
-
bool llvm::setBranchProbability(BranchInst *B, BranchProbability P,
bool ForFirstTarget) {
if (B->getNumSuccessors() != 2)
>From 1d38d3b4b6e17b41a9e1b0561efb3ac1ee329e67 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Sedl=C3=A1=C4=8Dek?= <mr.mareksedlacek at gmail.com>
Date: Fri, 23 Jan 2026 12:15:49 +0000
Subject: [PATCH 4/5] Fixed typos and simplified test checks
---
llvm/lib/Transforms/Utils/LoopUtils.cpp | 2 +-
.../LoopUnroll/multi-exit-loop-unroll.ll | 515 +-----------------
2 files changed, 10 insertions(+), 507 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 19afd1f126705..1ff260b1bf4d4 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -1034,7 +1034,7 @@ BranchProbability llvm::getBranchProbability(BasicBlock *Src, BasicBlock *Dst) {
// No metadata
return BranchProbability::getUnknown();
}
- assert(NumSucc == Weights.size() && "Missing weights in branch_probability");
+ assert(NumSucc == Weights.size() && "Missing weights in branch_weights");
uint64_t Total = 0;
uint32_t Numerator = 0;
diff --git a/llvm/test/Transforms/LoopUnroll/multi-exit-loop-unroll.ll b/llvm/test/Transforms/LoopUnroll/multi-exit-loop-unroll.ll
index 4d9e9888f131a..a115483b4f1ba 100644
--- a/llvm/test/Transforms/LoopUnroll/multi-exit-loop-unroll.ll
+++ b/llvm/test/Transforms/LoopUnroll/multi-exit-loop-unroll.ll
@@ -5,160 +5,10 @@
; Multi exit loop with predictable exit -- unroll
define i32 @test1(ptr nocapture %a, i64 %n) {
; CHECK-LABEL: @test1(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = freeze i64 [[N:%.*]]
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], -1
-; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP0]], 7
-; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP1]], 7
-; CHECK-NEXT: br i1 [[TMP2]], label [[LATCHEXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
-; CHECK: entry.new:
-; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[TMP0]], [[XTRAITER]]
-; CHECK-NEXT: br label [[HEADER:%.*]]
-; CHECK: header:
-; CHECK-NEXT: [[INDVARS_IV_EPIL:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[LATCH_7:%.*]] ]
-; CHECK-NEXT: [[SUM_02_EPIL:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[ADD_7:%.*]], [[LATCH_7]] ]
-; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_7:%.*]], [[LATCH_7]] ]
-; CHECK-NEXT: br label [[FOR_EXITING_BLOCK:%.*]]
-; CHECK: for.exiting_block:
-; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[N]], 42
-; CHECK-NEXT: br i1 [[CMP]], label [[OTHEREXIT_LOOPEXIT:%.*]], label [[LATCH:%.*]], !prof [[PROF0:![0-9]+]]
-; CHECK: latch:
-; CHECK-NEXT: [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV_EPIL]]
-; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX_EPIL]], align 4
-; CHECK-NEXT: [[ADD_EPIL:%.*]] = add nsw i32 [[TMP11]], [[SUM_02_EPIL]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV_EPIL]], 1
-; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_1:%.*]]
-; CHECK: for.exiting_block.1:
-; CHECK-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[N]], 42
-; CHECK-NEXT: br i1 [[CMP_1]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_1:%.*]], !prof [[PROF0]]
-; CHECK: latch.1:
-; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
-; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4
-; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP4]], [[ADD_EPIL]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV_EPIL]], 2
-; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_2:%.*]]
-; CHECK: for.exiting_block.2:
-; CHECK-NEXT: [[CMP_2:%.*]] = icmp eq i64 [[N]], 42
-; CHECK-NEXT: br i1 [[CMP_2]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_2:%.*]], !prof [[PROF0]]
-; CHECK: latch.2:
-; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_1]]
-; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4
-; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP5]], [[ADD_1]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV_EPIL]], 3
-; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_3:%.*]]
-; CHECK: for.exiting_block.3:
-; CHECK-NEXT: [[CMP_3:%.*]] = icmp eq i64 [[N]], 42
-; CHECK-NEXT: br i1 [[CMP_3]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_3:%.*]], !prof [[PROF0]]
-; CHECK: latch.3:
-; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_2]]
-; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4
-; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[TMP6]], [[ADD_2]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV_EPIL]], 4
-; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_4:%.*]]
-; CHECK: for.exiting_block.4:
-; CHECK-NEXT: [[CMP_4:%.*]] = icmp eq i64 [[N]], 42
-; CHECK-NEXT: br i1 [[CMP_4]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_4:%.*]], !prof [[PROF0]]
-; CHECK: latch.4:
-; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_3]]
-; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX_4]], align 4
-; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 [[TMP7]], [[ADD_3]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV_EPIL]], 5
-; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_5:%.*]]
-; CHECK: for.exiting_block.5:
-; CHECK-NEXT: [[CMP_5:%.*]] = icmp eq i64 [[N]], 42
-; CHECK-NEXT: br i1 [[CMP_5]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_5:%.*]], !prof [[PROF0]]
-; CHECK: latch.5:
-; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_4]]
-; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX_5]], align 4
-; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 [[TMP8]], [[ADD_4]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV_EPIL]], 6
-; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_6:%.*]]
-; CHECK: for.exiting_block.6:
-; CHECK-NEXT: [[CMP_6:%.*]] = icmp eq i64 [[N]], 42
-; CHECK-NEXT: br i1 [[CMP_6]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_6:%.*]], !prof [[PROF0]]
-; CHECK: latch.6:
-; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_5]]
-; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX_6]], align 4
-; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 [[TMP9]], [[ADD_5]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV_EPIL]], 7
-; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_7:%.*]]
-; CHECK: for.exiting_block.7:
-; CHECK-NEXT: [[CMP_7:%.*]] = icmp eq i64 [[N]], 42
-; CHECK-NEXT: br i1 [[CMP_7]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_7]], !prof [[PROF0]]
-; CHECK: latch.7:
-; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_6]]
-; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX_7]], align 4
-; CHECK-NEXT: [[ADD_7]] = add nsw i32 [[TMP10]], [[ADD_6]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_7]] = add i64 [[INDVARS_IV_EPIL]], 8
-; CHECK-NEXT: [[NITER_NEXT_7]] = add i64 [[NITER]], 8
-; CHECK-NEXT: [[EXITCOND_EPIL:%.*]] = icmp eq i64 [[NITER_NEXT_7]], [[UNROLL_ITER]]
-; CHECK-NEXT: br i1 [[EXITCOND_EPIL]], label [[LATCHEXIT:%.*]], label [[HEADER]]
-; CHECK: latchexit.unr-lcssa:
-; CHECK-NEXT: [[SUM_0_LCSSA_PH_PH:%.*]] = phi i32 [ [[ADD_7]], [[LATCH_7]] ]
-; CHECK-NEXT: [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_7]], [[LATCH_7]] ]
-; CHECK-NEXT: [[SUM_02_UNR_PH:%.*]] = phi i32 [ [[ADD_7]], [[LATCH_7]] ]
-; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
-; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[LATCHEXIT_UNR_LCSSA]], label [[LATCHEXIT1:%.*]]
-; CHECK: header.epil.preheader:
-; CHECK-NEXT: [[INDVARS_IV_EPIL_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_UNR_PH]], [[LATCHEXIT]] ]
-; CHECK-NEXT: [[SUM_02_EPIL_INIT:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_02_UNR_PH]], [[LATCHEXIT]] ]
-; CHECK-NEXT: [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER]], 0
-; CHECK-NEXT: call void @llvm.assume(i1 [[LCMP_MOD2]])
-; CHECK-NEXT: br label [[HEADER_EPIL:%.*]]
-; CHECK: header.epil:
-; CHECK-NEXT: [[INDVARS_IV_EPIL1:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], [[LATCH_EPIL:%.*]] ], [ [[INDVARS_IV_EPIL_INIT]], [[LATCHEXIT_UNR_LCSSA]] ]
-; CHECK-NEXT: [[SUM_02_EPIL1:%.*]] = phi i32 [ [[ADD_EPIL1:%.*]], [[LATCH_EPIL]] ], [ [[SUM_02_EPIL_INIT]], [[LATCHEXIT_UNR_LCSSA]] ]
-; CHECK-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ 0, [[LATCHEXIT_UNR_LCSSA]] ], [ [[EPIL_ITER_NEXT:%.*]], [[LATCH_EPIL]] ]
-; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_EPIL:%.*]]
-; CHECK: for.exiting_block.epil:
-; CHECK-NEXT: [[CMP_EPIL:%.*]] = icmp eq i64 [[N]], 42
-; CHECK-NEXT: br i1 [[CMP_EPIL]], label [[OTHEREXIT_LOOPEXIT3:%.*]], label [[LATCH_EPIL]], !prof [[PROF0]]
-; CHECK: latch.epil:
-; CHECK-NEXT: [[ARRAYIDX_EPIL1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_EPIL1]]
-; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX_EPIL1]], align 4
-; CHECK-NEXT: [[ADD_EPIL1]] = add nsw i32 [[TMP12]], [[SUM_02_EPIL1]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_EPIL]] = add i64 [[INDVARS_IV_EPIL1]], 1
-; CHECK-NEXT: [[EXITCOND_EPIL1:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_EPIL]], [[N]]
-; CHECK-NEXT: [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1
-; CHECK-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp ne i64 [[EPIL_ITER_NEXT]], [[XTRAITER]]
-; CHECK-NEXT: br i1 [[EPIL_ITER_CMP]], label [[HEADER_EPIL]], label [[LATCHEXIT_EPILOG_LCSSA:%.*]], !llvm.loop [[LOOP1:![0-9]+]]
-; CHECK: latchexit.epilog-lcssa:
-; CHECK-NEXT: [[SUM_0_LCSSA_PH1:%.*]] = phi i32 [ [[ADD_EPIL1]], [[LATCH_EPIL]] ]
-; CHECK-NEXT: br label [[LATCHEXIT1]]
-; CHECK: latchexit:
-; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[SUM_0_LCSSA_PH_PH]], [[LATCHEXIT]] ], [ [[SUM_0_LCSSA_PH1]], [[LATCHEXIT_EPILOG_LCSSA]] ]
-; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]]
-; CHECK: otherexit.loopexit:
-; CHECK-NEXT: br label [[OTHEREXIT:%.*]]
-; CHECK: otherexit.loopexit3:
-; CHECK-NEXT: br label [[OTHEREXIT]]
-; CHECK: otherexit:
-; CHECK-NEXT: [[RVAL:%.*]] = call i32 @foo()
-; CHECK-NEXT: ret i32 [[RVAL]]
+; CHECK: epil
;
; NOUNROLL-LABEL: @test1(
-; NOUNROLL-NEXT: entry:
-; NOUNROLL-NEXT: br label [[HEADER:%.*]]
-; NOUNROLL: header:
-; NOUNROLL-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LATCH:%.*]] ], [ 0, [[ENTRY:%.*]] ]
-; NOUNROLL-NEXT: [[SUM_02:%.*]] = phi i32 [ [[ADD:%.*]], [[LATCH]] ], [ 0, [[ENTRY]] ]
-; NOUNROLL-NEXT: br label [[FOR_EXITING_BLOCK:%.*]]
-; NOUNROLL: for.exiting_block:
-; NOUNROLL-NEXT: [[CMP:%.*]] = icmp eq i64 [[N:%.*]], 42
-; NOUNROLL-NEXT: br i1 [[CMP]], label [[OTHEREXIT:%.*]], label [[LATCH]], !prof [[PROF0:![0-9]+]]
-; NOUNROLL: latch:
-; NOUNROLL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
-; NOUNROLL-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; NOUNROLL-NEXT: [[ADD]] = add nsw i32 [[TMP0]], [[SUM_02]]
-; NOUNROLL-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
-; NOUNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
-; NOUNROLL-NEXT: br i1 [[EXITCOND]], label [[LATCHEXIT:%.*]], label [[HEADER]]
-; NOUNROLL: latchexit:
-; NOUNROLL-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[ADD]], [[LATCH]] ]
-; NOUNROLL-NEXT: ret i32 [[SUM_0_LCSSA]]
-; NOUNROLL: otherexit:
-; NOUNROLL-NEXT: [[RVAL:%.*]] = call i32 @foo()
-; NOUNROLL-NEXT: ret i32 [[RVAL]]
+; NOUNROLL-NOT: epil
;
entry:
br label %header
@@ -196,164 +46,10 @@ declare i32 @foo()
; exit is a deopt call so it should unroll
define i32 @test2(ptr nocapture %a, i64 %n) {
; CHECK-LABEL: @test2(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = freeze i64 [[N:%.*]]
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], -1
-; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP0]], 7
-; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP1]], 7
-; CHECK-NEXT: br i1 [[TMP2]], label [[LATCHEXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
-; CHECK: entry.new:
-; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[TMP0]], [[XTRAITER]]
-; CHECK-NEXT: br label [[HEADER:%.*]]
-; CHECK: header:
-; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[LATCH_7:%.*]] ]
-; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[ADD_7:%.*]], [[LATCH_7]] ]
-; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_7:%.*]], [[LATCH_7]] ]
-; CHECK-NEXT: br label [[FOR_EXITING_BLOCK:%.*]]
-; CHECK: for.exiting_block:
-; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[N]], 42
-; CHECK-NEXT: br i1 [[CMP]], label [[OTHEREXIT_LOOPEXIT:%.*]], label [[LATCH:%.*]]
-; CHECK: latch:
-; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
-; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], [[SUM_02]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_1:%.*]]
-; CHECK: for.exiting_block.1:
-; CHECK-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[N]], 42
-; CHECK-NEXT: br i1 [[CMP_1]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_1:%.*]]
-; CHECK: latch.1:
-; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
-; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4
-; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP4]], [[ADD]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2
-; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_2:%.*]]
-; CHECK: for.exiting_block.2:
-; CHECK-NEXT: [[CMP_2:%.*]] = icmp eq i64 [[N]], 42
-; CHECK-NEXT: br i1 [[CMP_2]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_2:%.*]]
-; CHECK: latch.2:
-; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_1]]
-; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4
-; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP5]], [[ADD_1]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 3
-; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_3:%.*]]
-; CHECK: for.exiting_block.3:
-; CHECK-NEXT: [[CMP_3:%.*]] = icmp eq i64 [[N]], 42
-; CHECK-NEXT: br i1 [[CMP_3]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_3:%.*]]
-; CHECK: latch.3:
-; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_2]]
-; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4
-; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[TMP6]], [[ADD_2]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 4
-; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_4:%.*]]
-; CHECK: for.exiting_block.4:
-; CHECK-NEXT: [[CMP_4:%.*]] = icmp eq i64 [[N]], 42
-; CHECK-NEXT: br i1 [[CMP_4]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_4:%.*]]
-; CHECK: latch.4:
-; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_3]]
-; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX_4]], align 4
-; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 [[TMP7]], [[ADD_3]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 5
-; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_5:%.*]]
-; CHECK: for.exiting_block.5:
-; CHECK-NEXT: [[CMP_5:%.*]] = icmp eq i64 [[N]], 42
-; CHECK-NEXT: br i1 [[CMP_5]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_5:%.*]]
-; CHECK: latch.5:
-; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_4]]
-; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX_5]], align 4
-; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 [[TMP8]], [[ADD_4]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 6
-; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_6:%.*]]
-; CHECK: for.exiting_block.6:
-; CHECK-NEXT: [[CMP_6:%.*]] = icmp eq i64 [[N]], 42
-; CHECK-NEXT: br i1 [[CMP_6]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_6:%.*]]
-; CHECK: latch.6:
-; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_5]]
-; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX_6]], align 4
-; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 [[TMP9]], [[ADD_5]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 7
-; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_7:%.*]]
-; CHECK: for.exiting_block.7:
-; CHECK-NEXT: [[CMP_7:%.*]] = icmp eq i64 [[N]], 42
-; CHECK-NEXT: br i1 [[CMP_7]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_7]]
-; CHECK: latch.7:
-; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_6]]
-; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX_7]], align 4
-; CHECK-NEXT: [[ADD_7]] = add nsw i32 [[TMP10]], [[ADD_6]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_7]] = add i64 [[INDVARS_IV]], 8
-; CHECK-NEXT: [[NITER_NEXT_7]] = add i64 [[NITER]], 8
-; CHECK-NEXT: [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NEXT_7]], [[UNROLL_ITER]]
-; CHECK-NEXT: br i1 [[NITER_NCMP_7]], label [[LATCHEXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[HEADER]]
-; CHECK: latchexit.unr-lcssa:
-; CHECK-NEXT: [[SUM_0_LCSSA_PH_PH:%.*]] = phi i32 [ [[ADD_7]], [[LATCH_7]] ]
-; CHECK-NEXT: [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_7]], [[LATCH_7]] ]
-; CHECK-NEXT: [[SUM_02_UNR_PH:%.*]] = phi i32 [ [[ADD_7]], [[LATCH_7]] ]
-; CHECK-NEXT: [[LCMP_MOD_NOT:%.*]] = icmp ne i64 [[XTRAITER]], 0
-; CHECK-NEXT: br i1 [[LCMP_MOD_NOT]], label [[LATCHEXIT_UNR_LCSSA]], label [[HEADER_EPIL_PREHEADER:%.*]]
-; CHECK: header.epil.preheader:
-; CHECK-NEXT: [[INDVARS_IV_EPIL_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_UNR_PH]], [[LATCHEXIT_UNR_LCSSA_LOOPEXIT]] ]
-; CHECK-NEXT: [[SUM_02_EPIL_INIT:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_02_UNR_PH]], [[LATCHEXIT_UNR_LCSSA_LOOPEXIT]] ]
-; CHECK-NEXT: [[LCMP_MOD3:%.*]] = icmp ne i64 [[XTRAITER]], 0
-; CHECK-NEXT: call void @llvm.assume(i1 [[LCMP_MOD3]])
-; CHECK-NEXT: br label [[HEADER_EPIL:%.*]]
-; CHECK: header.epil:
-; CHECK-NEXT: [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], [[LATCH_EPIL:%.*]] ], [ [[INDVARS_IV_EPIL_INIT]], [[LATCHEXIT_UNR_LCSSA]] ]
-; CHECK-NEXT: [[SUM_02_EPIL:%.*]] = phi i32 [ [[ADD_EPIL:%.*]], [[LATCH_EPIL]] ], [ [[SUM_02_EPIL_INIT]], [[LATCHEXIT_UNR_LCSSA]] ]
-; CHECK-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ 0, [[LATCHEXIT_UNR_LCSSA]] ], [ [[EPIL_ITER_NEXT:%.*]], [[LATCH_EPIL]] ]
-; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_EPIL:%.*]]
-; CHECK: for.exiting_block.epil:
-; CHECK-NEXT: [[CMP_EPIL:%.*]] = icmp eq i64 [[N]], 42
-; CHECK-NEXT: br i1 [[CMP_EPIL]], label [[OTHEREXIT_LOOPEXIT3:%.*]], label [[LATCH_EPIL]]
-; CHECK: latch.epil:
-; CHECK-NEXT: [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_EPIL]]
-; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX_EPIL]], align 4
-; CHECK-NEXT: [[ADD_EPIL]] = add nsw i32 [[TMP11]], [[SUM_02_EPIL]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_EPIL]] = add i64 [[INDVARS_IV_EPIL]], 1
-; CHECK-NEXT: [[EXITCOND_EPIL:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_EPIL]], [[N]]
-; CHECK-NEXT: [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1
-; CHECK-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp ne i64 [[EPIL_ITER_NEXT]], [[XTRAITER]]
-; CHECK-NEXT: br i1 [[EPIL_ITER_CMP]], label [[HEADER_EPIL]], label [[LATCHEXIT_EPILOG_LCSSA:%.*]], !llvm.loop [[LOOP3:![0-9]+]]
-; CHECK: latchexit.epilog-lcssa:
-; CHECK-NEXT: [[SUM_0_LCSSA_PH1:%.*]] = phi i32 [ [[ADD_EPIL]], [[LATCH_EPIL]] ]
-; CHECK-NEXT: br label [[HEADER_EPIL_PREHEADER]]
-; CHECK: latchexit:
-; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[SUM_0_LCSSA_PH_PH]], [[LATCHEXIT_UNR_LCSSA_LOOPEXIT]] ], [ [[SUM_0_LCSSA_PH1]], [[LATCHEXIT_EPILOG_LCSSA]] ]
-; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]]
-; CHECK: otherexit.loopexit:
-; CHECK-NEXT: [[SUM_02_LCSSA_PH:%.*]] = phi i32 [ [[SUM_02]], [[FOR_EXITING_BLOCK]] ], [ [[ADD]], [[FOR_EXITING_BLOCK_1]] ], [ [[ADD_1]], [[FOR_EXITING_BLOCK_2]] ], [ [[ADD_2]], [[FOR_EXITING_BLOCK_3]] ], [ [[ADD_3]], [[FOR_EXITING_BLOCK_4]] ], [ [[ADD_4]], [[FOR_EXITING_BLOCK_5]] ], [ [[ADD_5]], [[FOR_EXITING_BLOCK_6]] ], [ [[ADD_6]], [[FOR_EXITING_BLOCK_7]] ]
-; CHECK-NEXT: br label [[OTHEREXIT:%.*]]
-; CHECK: otherexit.loopexit4:
-; CHECK-NEXT: [[SUM_02_LCSSA_PH4:%.*]] = phi i32 [ [[SUM_02_EPIL]], [[FOR_EXITING_BLOCK_EPIL]] ]
-; CHECK-NEXT: br label [[OTHEREXIT]]
-; CHECK: otherexit:
-; CHECK-NEXT: [[SUM_02_LCSSA:%.*]] = phi i32 [ [[SUM_02_LCSSA_PH]], [[OTHEREXIT_LOOPEXIT]] ], [ [[SUM_02_LCSSA_PH4]], [[OTHEREXIT_LOOPEXIT3]] ]
-; CHECK-NEXT: [[RVAL:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 [[SUM_02_LCSSA]]) ]
-; CHECK-NEXT: ret i32 [[RVAL]]
+; CHECK: epil
;
; NOUNROLL-LABEL: @test2(
-; NOUNROLL-NEXT: entry:
-; NOUNROLL-NEXT: br label [[HEADER:%.*]]
-; NOUNROLL: header:
-; NOUNROLL-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LATCH:%.*]] ], [ 0, [[ENTRY:%.*]] ]
-; NOUNROLL-NEXT: [[SUM_02:%.*]] = phi i32 [ [[ADD:%.*]], [[LATCH]] ], [ 0, [[ENTRY]] ]
-; NOUNROLL-NEXT: br label [[FOR_EXITING_BLOCK:%.*]]
-; NOUNROLL: for.exiting_block:
-; NOUNROLL-NEXT: [[CMP:%.*]] = icmp eq i64 [[N:%.*]], 42
-; NOUNROLL-NEXT: br i1 [[CMP]], label [[OTHEREXIT:%.*]], label [[LATCH]]
-; NOUNROLL: latch:
-; NOUNROLL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
-; NOUNROLL-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; NOUNROLL-NEXT: [[ADD]] = add nsw i32 [[TMP0]], [[SUM_02]]
-; NOUNROLL-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
-; NOUNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
-; NOUNROLL-NEXT: br i1 [[EXITCOND]], label [[LATCHEXIT:%.*]], label [[HEADER]]
-; NOUNROLL: latchexit:
-; NOUNROLL-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[ADD]], [[LATCH]] ]
-; NOUNROLL-NEXT: ret i32 [[SUM_0_LCSSA]]
-; NOUNROLL: otherexit:
-; NOUNROLL-NEXT: [[SUM_02_LCSSA:%.*]] = phi i32 [ [[SUM_02]], [[FOR_EXITING_BLOCK]] ]
-; NOUNROLL-NEXT: [[RVAL:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 [[SUM_02_LCSSA]]) ]
-; NOUNROLL-NEXT: ret i32 [[RVAL]]
+; NOUNROLL-NOT: epil
;
entry:
br label %header
@@ -389,52 +85,10 @@ declare i32 @llvm.experimental.deoptimize.i32(...)
; multi exit loop where the exits are not predictable -- no unroll
define i32 @test3(ptr nocapture %a, i64 %n) {
; CHECK-LABEL: @test3(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: br label [[HEADER:%.*]]
-; CHECK: header:
-; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LATCH:%.*]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[ADD:%.*]], [[LATCH]] ], [ 0, [[ENTRY]] ]
-; CHECK-NEXT: br label [[FOR_EXITING_BLOCK:%.*]]
-; CHECK: for.exiting_block:
-; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[N:%.*]], 42
-; CHECK-NEXT: br i1 [[CMP]], label [[OTHEREXIT:%.*]], label [[LATCH]], !prof [[PROF4:![0-9]+]]
-; CHECK: latch:
-; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
-; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT: [[ADD]] = add nsw i32 [[TMP0]], [[SUM_02]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[LATCHEXIT:%.*]], label [[HEADER]]
-; CHECK: latchexit:
-; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[ADD]], [[LATCH]] ]
-; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]]
-; CHECK: otherexit:
-; CHECK-NEXT: [[RVAL:%.*]] = call i32 @foo()
-; CHECK-NEXT: ret i32 [[RVAL]]
+; CHECK-NOT: epil
;
; NOUNROLL-LABEL: @test3(
-; NOUNROLL-NEXT: entry:
-; NOUNROLL-NEXT: br label [[HEADER:%.*]]
-; NOUNROLL: header:
-; NOUNROLL-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LATCH:%.*]] ], [ 0, [[ENTRY:%.*]] ]
-; NOUNROLL-NEXT: [[SUM_02:%.*]] = phi i32 [ [[ADD:%.*]], [[LATCH]] ], [ 0, [[ENTRY]] ]
-; NOUNROLL-NEXT: br label [[FOR_EXITING_BLOCK:%.*]]
-; NOUNROLL: for.exiting_block:
-; NOUNROLL-NEXT: [[CMP:%.*]] = icmp eq i64 [[N:%.*]], 42
-; NOUNROLL-NEXT: br i1 [[CMP]], label [[OTHEREXIT:%.*]], label [[LATCH]], !prof [[PROF1:![0-9]+]]
-; NOUNROLL: latch:
-; NOUNROLL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
-; NOUNROLL-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; NOUNROLL-NEXT: [[ADD]] = add nsw i32 [[TMP0]], [[SUM_02]]
-; NOUNROLL-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
-; NOUNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
-; NOUNROLL-NEXT: br i1 [[EXITCOND]], label [[LATCHEXIT:%.*]], label [[HEADER]]
-; NOUNROLL: latchexit:
-; NOUNROLL-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[ADD]], [[LATCH]] ]
-; NOUNROLL-NEXT: ret i32 [[SUM_0_LCSSA]]
-; NOUNROLL: otherexit:
-; NOUNROLL-NEXT: [[RVAL:%.*]] = call i32 @foo()
-; NOUNROLL-NEXT: ret i32 [[RVAL]]
+; NOUNROLL-NOT: epil
;
entry:
br label %header
@@ -467,164 +121,13 @@ otherexit:
!2 = !{!"branch_weights", i32 1, i32 2}
-; multi exit loop with high predictability of exists -- unroll
+; multi exit loop with high predictability of exits -- unroll
define i32 @test4(ptr nocapture %a, i64 %n) {
; CHECK-LABEL: @test4(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP3:%.*]] = freeze i64 [[N1:%.*]]
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP3]], -1
-; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP3]], 7
-; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP1]], 7
-; CHECK-NEXT: br i1 [[TMP2]], label [[LATCHEXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
-; CHECK: entry.new:
-; CHECK-NEXT: [[N:%.*]] = sub i64 [[TMP3]], [[XTRAITER]]
-; CHECK-NEXT: br label [[HEADER:%.*]]
-; CHECK: header:
-; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[CONTINUE_7:%.*]] ]
-; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[ADD_7:%.*]], [[CONTINUE_7]] ]
-; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[INDVARS_IV_NEXT:%.*]], [[CONTINUE_7]] ]
-; CHECK-NEXT: br label [[FOR_EXITING_BLOCK:%.*]]
-; CHECK: otherexitingblock:
-; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
-; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[SUM_02]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT1:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[N1]], 42
-; CHECK-NEXT: br i1 [[CMP]], label [[OTHEREXIT_LOOPEXIT:%.*]], label [[LATCH:%.*]], !prof [[PROF5:![0-9]+]]
-; CHECK: latch:
-; CHECK-NEXT: br label [[LATCH_1:%.*]]
-; CHECK: otherexitingblock.1:
-; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT1]]
-; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4
-; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP4]], [[ADD]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2
-; CHECK-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[N1]], 42
-; CHECK-NEXT: br i1 [[CMP_1]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_8:%.*]], !prof [[PROF5]]
-; CHECK: latch.1:
-; CHECK-NEXT: br label [[LATCH_2:%.*]]
-; CHECK: otherexitingblock.2:
-; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_1]]
-; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4
-; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP5]], [[ADD_1]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 3
-; CHECK-NEXT: [[CMP_2:%.*]] = icmp eq i64 [[N1]], 42
-; CHECK-NEXT: br i1 [[CMP_2]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_9:%.*]], !prof [[PROF5]]
-; CHECK: latch.2:
-; CHECK-NEXT: br label [[LATCH_3:%.*]]
-; CHECK: otherexitingblock.3:
-; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_2]]
-; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4
-; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[TMP6]], [[ADD_2]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 4
-; CHECK-NEXT: [[CMP_3:%.*]] = icmp eq i64 [[N1]], 42
-; CHECK-NEXT: br i1 [[CMP_3]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_10:%.*]], !prof [[PROF5]]
-; CHECK: latch.3:
-; CHECK-NEXT: br label [[LATCH_4:%.*]]
-; CHECK: otherexitingblock.4:
-; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_3]]
-; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX_4]], align 4
-; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 [[TMP7]], [[ADD_3]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 5
-; CHECK-NEXT: [[CMP_4:%.*]] = icmp eq i64 [[N1]], 42
-; CHECK-NEXT: br i1 [[CMP_4]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_11:%.*]], !prof [[PROF5]]
-; CHECK: latch.4:
-; CHECK-NEXT: br label [[LATCH_5:%.*]]
-; CHECK: otherexitingblock.5:
-; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_4]]
-; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX_5]], align 4
-; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 [[TMP8]], [[ADD_4]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 6
-; CHECK-NEXT: [[CMP_5:%.*]] = icmp eq i64 [[N1]], 42
-; CHECK-NEXT: br i1 [[CMP_5]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_12:%.*]], !prof [[PROF5]]
-; CHECK: latch.5:
-; CHECK-NEXT: br label [[LATCH_6:%.*]]
-; CHECK: otherexitingblock.6:
-; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_5]]
-; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX_6]], align 4
-; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 [[TMP9]], [[ADD_5]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 7
-; CHECK-NEXT: [[CMP_6:%.*]] = icmp eq i64 [[N1]], 42
-; CHECK-NEXT: br i1 [[CMP_6]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_13:%.*]], !prof [[PROF5]]
-; CHECK: latch.6:
-; CHECK-NEXT: br label [[LATCH_7:%.*]]
-; CHECK: otherexitingblock.7:
-; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_6]]
-; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX_7]], align 4
-; CHECK-NEXT: [[ADD_7]] = add nsw i32 [[TMP10]], [[ADD_6]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_7]] = add i64 [[INDVARS_IV]], 8
-; CHECK-NEXT: [[CMP_7:%.*]] = icmp eq i64 [[N1]], 42
-; CHECK-NEXT: br i1 [[CMP_7]], label [[OTHEREXIT_LOOPEXIT]], label [[CONTINUE_7]], !prof [[PROF5]]
-; CHECK: latch.7:
-; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[NITER]], 8
-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[LATCHEXIT:%.*]], label [[HEADER]]
-; CHECK: latchexit.unr-lcssa:
-; CHECK-NEXT: [[SUM_0_LCSSA_PH_PH:%.*]] = phi i32 [ [[ADD_7]], [[CONTINUE_7]] ]
-; CHECK-NEXT: [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_7]], [[CONTINUE_7]] ]
-; CHECK-NEXT: [[SUM_02_UNR_PH:%.*]] = phi i32 [ [[ADD_7]], [[CONTINUE_7]] ]
-; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
-; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[LATCHEXIT_UNR_LCSSA]], label [[LATCHEXIT1:%.*]]
-; CHECK: header.epil.preheader:
-; CHECK-NEXT: [[INDVARS_IV_EPIL_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_UNR_PH]], [[LATCHEXIT]] ]
-; CHECK-NEXT: [[SUM_02_EPIL_INIT:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_02_UNR_PH]], [[LATCHEXIT]] ]
-; CHECK-NEXT: [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER]], 0
-; CHECK-NEXT: call void @llvm.assume(i1 [[LCMP_MOD2]])
-; CHECK-NEXT: br label [[HEADER_EPIL:%.*]]
-; CHECK: header.epil:
-; CHECK-NEXT: [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_EPIL_INIT]], [[LATCHEXIT_UNR_LCSSA]] ], [ [[INDVARS_IV_NEXT_EPIL:%.*]], [[CONTINUE_EPIL:%.*]] ]
-; CHECK-NEXT: [[SUM_02_EPIL:%.*]] = phi i32 [ [[SUM_02_EPIL_INIT]], [[LATCHEXIT_UNR_LCSSA]] ], [ [[ADD_EPIL:%.*]], [[CONTINUE_EPIL]] ]
-; CHECK-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ 0, [[LATCHEXIT_UNR_LCSSA]] ], [ [[EPIL_ITER_NEXT:%.*]], [[CONTINUE_EPIL]] ]
-; CHECK-NEXT: br label [[LATCH_EPIL:%.*]]
-; CHECK: otherexitingblock.epil:
-; CHECK-NEXT: [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_EPIL]]
-; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX_EPIL]], align 4
-; CHECK-NEXT: [[ADD_EPIL]] = add nsw i32 [[TMP11]], [[SUM_02_EPIL]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_EPIL]] = add i64 [[INDVARS_IV_EPIL]], 1
-; CHECK-NEXT: [[CMP_EPIL:%.*]] = icmp eq i64 [[N1]], 42
-; CHECK-NEXT: br i1 [[CMP_EPIL]], label [[OTHEREXIT_LOOPEXIT3:%.*]], label [[CONTINUE_EPIL]], !prof [[PROF5]]
-; CHECK: latch.epil:
-; CHECK-NEXT: [[EXITCOND_EPIL:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_EPIL]], [[N1]]
-; CHECK-NEXT: [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1
-; CHECK-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp ne i64 [[EPIL_ITER_NEXT]], [[XTRAITER]]
-; CHECK-NEXT: br i1 [[EPIL_ITER_CMP]], label [[HEADER_EPIL]], label [[LATCHEXIT_EPILOG_LCSSA:%.*]], !llvm.loop [[LOOP6:![0-9]+]]
-; CHECK: latchexit.epilog-lcssa:
-; CHECK-NEXT: [[SUM_0_LCSSA_PH1:%.*]] = phi i32 [ [[ADD_EPIL]], [[CONTINUE_EPIL]] ]
-; CHECK-NEXT: br label [[LATCHEXIT1]]
-; CHECK: latchexit:
-; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[SUM_0_LCSSA_PH_PH]], [[LATCHEXIT]] ], [ [[SUM_0_LCSSA_PH1]], [[LATCHEXIT_EPILOG_LCSSA]] ]
-; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]]
-; CHECK: otherexit.loopexit:
-; CHECK-NEXT: br label [[OTHEREXIT:%.*]]
-; CHECK: otherexit.loopexit3:
-; CHECK-NEXT: br label [[OTHEREXIT]]
-; CHECK: otherexit:
-; CHECK-NEXT: [[RVAL:%.*]] = call i32 @foo()
-; CHECK-NEXT: ret i32 [[RVAL]]
+; CHECK: epil
;
; NOUNROLL-LABEL: @test4(
-; NOUNROLL-NEXT: entry:
-; NOUNROLL-NEXT: br label [[HEADER:%.*]]
-; NOUNROLL: header:
-; NOUNROLL-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LATCH:%.*]] ]
-; NOUNROLL-NEXT: [[SUM_02:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[LATCH]] ]
-; NOUNROLL-NEXT: br label [[FOR_EXITING_BLOCK:%.*]]
-; NOUNROLL: otherexitingblock:
-; NOUNROLL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
-; NOUNROLL-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; NOUNROLL-NEXT: [[ADD]] = add nsw i32 [[TMP0]], [[SUM_02]]
-; NOUNROLL-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
-; NOUNROLL-NEXT: [[CMP:%.*]] = icmp eq i64 [[N:%.*]], 42
-; NOUNROLL-NEXT: br i1 [[CMP]], label [[OTHEREXIT:%.*]], label [[LATCH]], !prof [[PROF2:![0-9]+]]
-; NOUNROLL: latch:
-; NOUNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
-; NOUNROLL-NEXT: br i1 [[EXITCOND]], label [[LATCHEXIT:%.*]], label [[HEADER]]
-; NOUNROLL: latchexit:
-; NOUNROLL-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[ADD]], [[LATCH]] ]
-; NOUNROLL-NEXT: ret i32 [[SUM_0_LCSSA]]
-; NOUNROLL: otherexit:
-; NOUNROLL-NEXT: [[RVAL:%.*]] = call i32 @foo()
-; NOUNROLL-NEXT: ret i32 [[RVAL]]
-;
+; NOUNROLL-NOT: epil
entry:
br label %header
>From 973242b3400e5b429908dd17a5e5f36b4e8079e1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Sedl=C3=A1=C4=8Dek?= <mr.mareksedlacek at gmail.com>
Date: Fri, 23 Jan 2026 12:18:19 +0000
Subject: [PATCH 5/5] Removed (now) untrue comment about autogenerated test.
---
llvm/test/Transforms/LoopUnroll/multi-exit-loop-unroll.ll | 1 -
1 file changed, 1 deletion(-)
diff --git a/llvm/test/Transforms/LoopUnroll/multi-exit-loop-unroll.ll b/llvm/test/Transforms/LoopUnroll/multi-exit-loop-unroll.ll
index a115483b4f1ba..9e029caa4bbda 100644
--- a/llvm/test/Transforms/LoopUnroll/multi-exit-loop-unroll.ll
+++ b/llvm/test/Transforms/LoopUnroll/multi-exit-loop-unroll.ll
@@ -1,4 +1,3 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=loop-unroll -unroll-runtime=true -verify-dom-info -verify-loop-info -S | FileCheck %s
; RUN: opt < %s -passes=loop-unroll -unroll-runtime=true -verify-dom-info -verify-loop-info -unroll-runtime-multi-exit=false -S | FileCheck %s -check-prefix=NOUNROLL
More information about the llvm-commits
mailing list