[llvm] [LoopUnroll] Use branch probability in multi-exit loop unrolling (PR #164799)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 23 03:59:13 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Marek Sedláček (mark-sed)
<details>
<summary>Changes</summary>
This patch improves multi-exit loop unrolling by taking into account branch probability and not only other exit being deopting one.
This implementation uses branch metadata directly because of unstable state of BPI in this part of code (runtime unrolling invalidates the state of the map and using BPI in my tests has caused errors).
If branch probability metadata are not present then the current deopt heuristic is still used.
---
Patch is 42.05 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/164799.diff
2 Files Affected:
- (modified) llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp (+68-7)
- (added) llvm/test/Transforms/LoopUnroll/multi-exit-loop-unroll.ll (+655)
``````````diff
diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 6312831cf0ee0..520fd741ef0f4 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -482,10 +482,53 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder,
return NewLoop;
}
+// Calculates the edge probability from Src to Dst.
+// Dst has to be a successor to Src.
+// This uses branch_probability metadata directly. If data are missing or
+// probability cannot be computed, then std::nullopt is returned.
+// This does not use BranchProbabilityInfo and the values computed by this
+// will vary from BPI because BPI has its own more advanced heuristics to
+// determine probabilities without metadata.
+static std::optional<BranchProbability>
+computeBranchProbabilityUsingMetadata(BasicBlock *Src, BasicBlock *Dst) {
+ assert(Src != Dst && "Passed in same source as destination");
+
+ Instruction *TI = Src->getTerminator();
+ if (!TI || TI->getNumSuccessors() == 0)
+ return BranchProbability::getZero();
+
+ auto NumSucc = TI->getNumSuccessors();
+ SmallVector<uint32_t, 4> Weights;
+
+ if (!extractBranchWeights(*TI, Weights)) {
+ // No metadata
+ return std::nullopt;
+ }
+ assert(NumSucc == Weights.size() && "Missing weights in branch_probability");
+
+ uint64_t Total = 0;
+ uint32_t Numerator = 0;
+ for (auto [i, Weight] : llvm::enumerate(Weights)) {
+ if (TI->getSuccessor(i) == Dst)
+ Numerator += Weight;
+ Total += Weight;
+ }
+
+ // Total of edges might be 0 if the metadata is incorrect/set by hand
+ // or missing. In such case return here to avoid division by 0 later on.
+ // There might also be a case where the value of Total cannot fit into
+ // uint32_t, in such case, just bail out.
+ if (Total == 0 || Total > std::numeric_limits<uint32_t>::max())
+ return std::nullopt;
+
+ return BranchProbability(Numerator, Total);
+}
+
/// Returns true if we can profitably unroll the multi-exit loop L. Currently,
/// we return true only if UnrollRuntimeMultiExit is set to true.
static bool canProfitablyRuntimeUnrollMultiExitLoop(
- Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits, BasicBlock *LatchExit,
+ Loop *L, const TargetTransformInfo *TTI,
+ SmallVectorImpl<BasicBlock *> &OtherExits, BasicBlock *LatchExit,
bool UseEpilogRemainder) {
// The main pain point with multi-exit loop unrolling is that once unrolled,
@@ -513,11 +556,28 @@ static bool canProfitablyRuntimeUnrollMultiExitLoop(
return true;
// The second heuristic is that L has one exit other than the latchexit and
- // that exit is a deoptimize block. We know that deoptimize blocks are rarely
- // taken, which also implies the branch leading to the deoptimize block is
- // highly predictable. When UnrollRuntimeOtherExitPredictable is specified, we
- // assume the other exit branch is predictable even if it has no deoptimize
- // call.
+ // that exit is highly predictable.
+ if (TTI) {
+ if (OtherExits.size() != 1)
+ return false;
+ BasicBlock *LatchBB = L->getLoopLatch();
+ assert(LatchBB && "Expected loop to have a latch");
+ BasicBlock *NonLatchExitingBlock =
+ (ExitingBlocks[0] == LatchBB) ? ExitingBlocks[1] : ExitingBlocks[0];
+ auto BranchProb = computeBranchProbabilityUsingMetadata(
+ NonLatchExitingBlock, OtherExits[0]);
+ // If BranchProbability could not be extracted (returns nullopt), then
+ // don't return and do the check for deopt block.
+ if (BranchProb) {
+ auto Threshold = TTI->getPredictableBranchThreshold().getCompl();
+ return UnrollRuntimeOtherExitPredictable || *BranchProb < Threshold;
+ }
+ }
+
+ // We know that deoptimize blocks are rarely taken, which also implies the
+ // branch leading to the deoptimize block is highly predictable. When
+ // UnrollRuntimeOtherExitPredictable is specified, we assume the other exit
+ // branch is predictable even if it has no deoptimize call.
return (OtherExits.size() == 1 &&
(UnrollRuntimeOtherExitPredictable ||
OtherExits[0]->getPostdominatingDeoptimizeCall()));
@@ -660,7 +720,8 @@ bool llvm::UnrollRuntimeLoopRemainder(
// Otherwise perform multi-exit unrolling, if either the target indicates
// it is profitable or the general profitability heuristics apply.
if (!RuntimeUnrollMultiExit &&
- !canProfitablyRuntimeUnrollMultiExitLoop(L, OtherExits, LatchExit,
+ !canProfitablyRuntimeUnrollMultiExitLoop(L, TTI, OtherExits,
+ LatchExit,
UseEpilogRemainder)) {
LLVM_DEBUG(dbgs() << "Multiple exit/exiting blocks in loop and "
"multi-exit unrolling not enabled!\n");
diff --git a/llvm/test/Transforms/LoopUnroll/multi-exit-loop-unroll.ll b/llvm/test/Transforms/LoopUnroll/multi-exit-loop-unroll.ll
new file mode 100644
index 0000000000000..f006a2dccda26
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/multi-exit-loop-unroll.ll
@@ -0,0 +1,655 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=loop-unroll -unroll-runtime=true -verify-dom-info -verify-loop-info -S | FileCheck %s
+; RUN: opt < %s -passes=loop-unroll -unroll-runtime=true -verify-dom-info -verify-loop-info -unroll-runtime-multi-exit=false -S | FileCheck %s -check-prefix=NOUNROLL
+
+define i32 @test1(ptr nocapture %a, i64 %n) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = freeze i64 [[N:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], -1
+; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP0]], 7
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP1]], 7
+; CHECK-NEXT: br i1 [[TMP2]], label [[LATCHEXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
+; CHECK: entry.new:
+; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[TMP0]], [[XTRAITER]]
+; CHECK-NEXT: br label [[HEADER:%.*]]
+; CHECK: header:
+; CHECK-NEXT: [[INDVARS_IV_EPIL:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[LATCH_7:%.*]] ]
+; CHECK-NEXT: [[SUM_02_EPIL:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[ADD_7:%.*]], [[LATCH_7]] ]
+; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_7:%.*]], [[LATCH_7]] ]
+; CHECK-NEXT: br label [[FOR_EXITING_BLOCK:%.*]]
+; CHECK: for.exiting_block:
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[N]], 42
+; CHECK-NEXT: br i1 [[CMP]], label [[OTHEREXIT_LOOPEXIT:%.*]], label [[LATCH:%.*]], !prof [[PROF0:![0-9]+]]
+; CHECK: latch:
+; CHECK-NEXT: [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV_EPIL]]
+; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX_EPIL]], align 4
+; CHECK-NEXT: [[ADD_EPIL:%.*]] = add nsw i32 [[TMP11]], [[SUM_02_EPIL]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV_EPIL]], 1
+; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_1:%.*]]
+; CHECK: for.exiting_block.1:
+; CHECK-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[N]], 42
+; CHECK-NEXT: br i1 [[CMP_1]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_1:%.*]], !prof [[PROF0]]
+; CHECK: latch.1:
+; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
+; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4
+; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP4]], [[ADD_EPIL]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV_EPIL]], 2
+; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_2:%.*]]
+; CHECK: for.exiting_block.2:
+; CHECK-NEXT: [[CMP_2:%.*]] = icmp eq i64 [[N]], 42
+; CHECK-NEXT: br i1 [[CMP_2]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_2:%.*]], !prof [[PROF0]]
+; CHECK: latch.2:
+; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_1]]
+; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4
+; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP5]], [[ADD_1]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV_EPIL]], 3
+; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_3:%.*]]
+; CHECK: for.exiting_block.3:
+; CHECK-NEXT: [[CMP_3:%.*]] = icmp eq i64 [[N]], 42
+; CHECK-NEXT: br i1 [[CMP_3]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_3:%.*]], !prof [[PROF0]]
+; CHECK: latch.3:
+; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_2]]
+; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4
+; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[TMP6]], [[ADD_2]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV_EPIL]], 4
+; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_4:%.*]]
+; CHECK: for.exiting_block.4:
+; CHECK-NEXT: [[CMP_4:%.*]] = icmp eq i64 [[N]], 42
+; CHECK-NEXT: br i1 [[CMP_4]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_4:%.*]], !prof [[PROF0]]
+; CHECK: latch.4:
+; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_3]]
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX_4]], align 4
+; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 [[TMP7]], [[ADD_3]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV_EPIL]], 5
+; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_5:%.*]]
+; CHECK: for.exiting_block.5:
+; CHECK-NEXT: [[CMP_5:%.*]] = icmp eq i64 [[N]], 42
+; CHECK-NEXT: br i1 [[CMP_5]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_5:%.*]], !prof [[PROF0]]
+; CHECK: latch.5:
+; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_4]]
+; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX_5]], align 4
+; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 [[TMP8]], [[ADD_4]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV_EPIL]], 6
+; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_6:%.*]]
+; CHECK: for.exiting_block.6:
+; CHECK-NEXT: [[CMP_6:%.*]] = icmp eq i64 [[N]], 42
+; CHECK-NEXT: br i1 [[CMP_6]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_6:%.*]], !prof [[PROF0]]
+; CHECK: latch.6:
+; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_5]]
+; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX_6]], align 4
+; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 [[TMP9]], [[ADD_5]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV_EPIL]], 7
+; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_7:%.*]]
+; CHECK: for.exiting_block.7:
+; CHECK-NEXT: [[CMP_7:%.*]] = icmp eq i64 [[N]], 42
+; CHECK-NEXT: br i1 [[CMP_7]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_7]], !prof [[PROF0]]
+; CHECK: latch.7:
+; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_6]]
+; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX_7]], align 4
+; CHECK-NEXT: [[ADD_7]] = add nsw i32 [[TMP10]], [[ADD_6]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_7]] = add i64 [[INDVARS_IV_EPIL]], 8
+; CHECK-NEXT: [[NITER_NEXT_7]] = add i64 [[NITER]], 8
+; CHECK-NEXT: [[EXITCOND_EPIL:%.*]] = icmp eq i64 [[NITER_NEXT_7]], [[UNROLL_ITER]]
+; CHECK-NEXT: br i1 [[EXITCOND_EPIL]], label [[LATCHEXIT:%.*]], label [[HEADER]]
+; CHECK: latchexit.unr-lcssa:
+; CHECK-NEXT: [[SUM_0_LCSSA_PH_PH:%.*]] = phi i32 [ [[ADD_7]], [[LATCH_7]] ]
+; CHECK-NEXT: [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_7]], [[LATCH_7]] ]
+; CHECK-NEXT: [[SUM_02_UNR_PH:%.*]] = phi i32 [ [[ADD_7]], [[LATCH_7]] ]
+; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[LATCHEXIT_UNR_LCSSA]], label [[LATCHEXIT1:%.*]]
+; CHECK: header.epil.preheader:
+; CHECK-NEXT: [[INDVARS_IV_EPIL_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_UNR_PH]], [[LATCHEXIT]] ]
+; CHECK-NEXT: [[SUM_02_EPIL_INIT:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_02_UNR_PH]], [[LATCHEXIT]] ]
+; CHECK-NEXT: [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT: call void @llvm.assume(i1 [[LCMP_MOD2]])
+; CHECK-NEXT: br label [[HEADER_EPIL:%.*]]
+; CHECK: header.epil:
+; CHECK-NEXT: [[INDVARS_IV_EPIL1:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], [[LATCH_EPIL:%.*]] ], [ [[INDVARS_IV_EPIL_INIT]], [[LATCHEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT: [[SUM_02_EPIL1:%.*]] = phi i32 [ [[ADD_EPIL1:%.*]], [[LATCH_EPIL]] ], [ [[SUM_02_EPIL_INIT]], [[LATCHEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ 0, [[LATCHEXIT_UNR_LCSSA]] ], [ [[EPIL_ITER_NEXT:%.*]], [[LATCH_EPIL]] ]
+; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_EPIL:%.*]]
+; CHECK: for.exiting_block.epil:
+; CHECK-NEXT: [[CMP_EPIL:%.*]] = icmp eq i64 [[N]], 42
+; CHECK-NEXT: br i1 [[CMP_EPIL]], label [[OTHEREXIT_LOOPEXIT3:%.*]], label [[LATCH_EPIL]], !prof [[PROF0]]
+; CHECK: latch.epil:
+; CHECK-NEXT: [[ARRAYIDX_EPIL1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_EPIL1]]
+; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX_EPIL1]], align 4
+; CHECK-NEXT: [[ADD_EPIL1]] = add nsw i32 [[TMP12]], [[SUM_02_EPIL1]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_EPIL]] = add i64 [[INDVARS_IV_EPIL1]], 1
+; CHECK-NEXT: [[EXITCOND_EPIL1:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_EPIL]], [[N]]
+; CHECK-NEXT: [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1
+; CHECK-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp ne i64 [[EPIL_ITER_NEXT]], [[XTRAITER]]
+; CHECK-NEXT: br i1 [[EPIL_ITER_CMP]], label [[HEADER_EPIL]], label [[LATCHEXIT_EPILOG_LCSSA:%.*]], !llvm.loop [[LOOP1:![0-9]+]]
+; CHECK: latchexit.epilog-lcssa:
+; CHECK-NEXT: [[SUM_0_LCSSA_PH1:%.*]] = phi i32 [ [[ADD_EPIL1]], [[LATCH_EPIL]] ]
+; CHECK-NEXT: br label [[LATCHEXIT1]]
+; CHECK: latchexit:
+; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[SUM_0_LCSSA_PH_PH]], [[LATCHEXIT]] ], [ [[SUM_0_LCSSA_PH1]], [[LATCHEXIT_EPILOG_LCSSA]] ]
+; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]]
+; CHECK: otherexit.loopexit:
+; CHECK-NEXT: br label [[OTHEREXIT:%.*]]
+; CHECK: otherexit.loopexit3:
+; CHECK-NEXT: br label [[OTHEREXIT]]
+; CHECK: otherexit:
+; CHECK-NEXT: [[RVAL:%.*]] = call i32 @foo()
+; CHECK-NEXT: ret i32 [[RVAL]]
+;
+; NOUNROLL-LABEL: @test1(
+; NOUNROLL-NEXT: entry:
+; NOUNROLL-NEXT: br label [[HEADER:%.*]]
+; NOUNROLL: header:
+; NOUNROLL-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LATCH:%.*]] ], [ 0, [[ENTRY:%.*]] ]
+; NOUNROLL-NEXT: [[SUM_02:%.*]] = phi i32 [ [[ADD:%.*]], [[LATCH]] ], [ 0, [[ENTRY]] ]
+; NOUNROLL-NEXT: br label [[FOR_EXITING_BLOCK:%.*]]
+; NOUNROLL: for.exiting_block:
+; NOUNROLL-NEXT: [[CMP:%.*]] = icmp eq i64 [[N:%.*]], 42
+; NOUNROLL-NEXT: br i1 [[CMP]], label [[OTHEREXIT:%.*]], label [[LATCH]], !prof [[PROF0:![0-9]+]]
+; NOUNROLL: latch:
+; NOUNROLL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; NOUNROLL-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; NOUNROLL-NEXT: [[ADD]] = add nsw i32 [[TMP0]], [[SUM_02]]
+; NOUNROLL-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
+; NOUNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; NOUNROLL-NEXT: br i1 [[EXITCOND]], label [[LATCHEXIT:%.*]], label [[HEADER]]
+; NOUNROLL: latchexit:
+; NOUNROLL-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[ADD]], [[LATCH]] ]
+; NOUNROLL-NEXT: ret i32 [[SUM_0_LCSSA]]
+; NOUNROLL: otherexit:
+; NOUNROLL-NEXT: [[RVAL:%.*]] = call i32 @foo()
+; NOUNROLL-NEXT: ret i32 [[RVAL]]
+;
+entry:
+ br label %header
+
+header:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %latch ], [ 0, %entry ]
+ %sum.02 = phi i32 [ %add, %latch ], [ 0, %entry ]
+ br label %for.exiting_block
+
+for.exiting_block:
+ %cmp = icmp eq i64 %n, 42
+ br i1 %cmp, label %otherexit, label %latch, !prof !0
+
+latch:
+ %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+ %0 = load i32, ptr %arrayidx, align 4
+ %add = add nsw i32 %0, %sum.02
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, %n
+ br i1 %exitcond, label %latchexit, label %header
+
+latchexit: ; preds = %latch
+ %sum.0.lcssa = phi i32 [ %add, %latch ]
+ ret i32 %sum.0.lcssa
+
+otherexit:
+ %rval = call i32 @foo()
+ ret i32 %rval
+}
+
+declare i32 @foo()
+
+!0 = !{!"branch_weights", i32 1, i32 100}
+
+; exit is a deopt call so it should unroll
+define i32 @test2(ptr nocapture %a, i64 %n) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = freeze i64 [[N:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], -1
+; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP0]], 7
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP1]], 7
+; CHECK-NEXT: br i1 [[TMP2]], label [[LATCHEXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
+; CHECK: entry.new:
+; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[TMP0]], [[XTRAITER]]
+; CHECK-NEXT: br label [[HEADER:%.*]]
+; CHECK: header:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[LATCH_7:%.*]] ]
+; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[ADD_7:%.*]], [[LATCH_7]] ]
+; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_7:%.*]], [[LATCH_7]] ]
+; CHECK-NEXT: br label [[FOR_EXITING_BLOCK:%.*]]
+; CHECK: for.exiting_block:
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[N]], 42
+; CHECK-NEXT: br i1 [[CMP]], label [[OTHEREXIT_LOOPEXIT:%.*]], label [[LATCH:%.*]]
+; CHECK: latch:
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], [[SUM_02]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_1:%.*]]
+; CHECK: for.exiting_block.1:
+; CHECK-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[N]], 42
+; CHECK-NEXT: br i1 [[CMP_1]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_1:%.*]]
+; CHECK: latch.1:
+; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
+; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4
+; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP4]], [[ADD]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2
+; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_2:%.*]]
+; CHECK: for.exiting_block.2:
+; CHECK-NEXT: [[CMP_2:%.*]] = icmp eq i64 [[N]], 42
+; CHECK-NEXT: br i1 [[CMP_2]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_2:%.*]]
+; CHECK: latch.2:
+; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_1]]
+; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4
+; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP5]], [[ADD_1]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 3
+; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_3:%.*]]
+; CHECK: for.exiting_block.3:
+; CHECK-NEXT: [[CMP_3:%.*]] = icmp eq i64 [[N]], 42
+; CHECK-NEXT: br i1 [[CMP_3]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_3:%.*]]
+; CHECK: latch.3:
+; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_2]]
+; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4
+; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[TMP6]], [[ADD_2]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 4
+; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_4:%.*]]
+; CHECK: for.exiting_block.4:
+; CHECK-NEXT: [[CMP_4:%.*]] = icmp eq i64 [[N]], 42
+; CHECK-NEXT: br i1 [[CMP_4]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_4:%.*]]
+; CHECK: latch.4:
+; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_3]]
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX_4]], align 4
+; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 [[TMP7]], [[ADD_3]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/164799
More information about the llvm-commits
mailing list