[llvm] [LP] Assign weights when peeling last iteration. (PR #166858)
Mircea Trofin via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 6 16:00:12 PST 2025
https://github.com/mtrofin updated https://github.com/llvm/llvm-project/pull/166858
>From 1da3c16e7284a946e4582dfb8b8705a92f02bc47 Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin at google.com>
Date: Thu, 6 Nov 2025 14:16:13 -0800
Subject: [PATCH] [LP] Assign weights when peeling last iteration.
---
llvm/lib/Transforms/Utils/LoopPeel.cpp | 18 ++++-
.../LoopUnroll/peel-last-iteration-bfi.ll | 66 +++++++++++++++++++
.../peel-last-iteration-expansion-cost.ll | 43 +++++++-----
llvm/test/lit.cfg.py | 1 +
4 files changed, 112 insertions(+), 16 deletions(-)
create mode 100644 llvm/test/Transforms/LoopUnroll/peel-last-iteration-bfi.ll
diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp
index e1dcaa85a5780..3c3ce7b73f305 100644
--- a/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -54,6 +54,7 @@ using namespace llvm::SCEVPatternMatch;
STATISTIC(NumPeeled, "Number of loops peeled");
STATISTIC(NumPeeledEnd, "Number of loops peeled from end");
+namespace llvm {
static cl::opt<unsigned> UnrollPeelCount(
"unroll-peel-count", cl::Hidden,
cl::desc("Set the unroll peeling count, for testing purposes"));
@@ -87,6 +88,9 @@ static cl::opt<bool> EnablePeelingForIV(
static const char *PeeledCountMetaData = "llvm.loop.peeled.count";
+extern cl::opt<bool> ProfcheckDisableMetadataFixes;
+} // namespace llvm
+
// Check whether we are capable of peeling this loop.
bool llvm::canPeel(const Loop *L) {
// Make sure the loop is in simplified form
@@ -1190,7 +1194,19 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
IRBuilder<> B(PreHeaderBR);
Value *Cond =
B.CreateICmpNE(BTCValue, ConstantInt::get(BTCValue->getType(), 0));
- B.CreateCondBr(Cond, NewPreHeader, InsertTop);
+ auto *BI = B.CreateCondBr(Cond, NewPreHeader, InsertTop);
+ SmallVector<uint32_t> Weights;
+ auto *OrigLatchBr = Latch->getTerminator();
+ auto HasBranchWeights = !ProfcheckDisableMetadataFixes &&
+ extractBranchWeights(*OrigLatchBr, Weights);
+ if (HasBranchWeights) {
+ // The probability of going into the loop or exiting should stay the
+ // same, but we may need to flip the weights. For BI, InsertTop
+ // (position 1) is towards the exit.
+ if (L->getExitBlock() == OrigLatchBr->getSuccessor(0))
+ std::swap(Weights[0], Weights[1]);
+ setBranchWeights(*BI, Weights, /*IsExpected=*/false);
+ }
PreHeaderBR->eraseFromParent();
// PreHeader now dominates InsertTop.
diff --git a/llvm/test/Transforms/LoopUnroll/peel-last-iteration-bfi.ll b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-bfi.ll
new file mode 100644
index 0000000000000..43e2cd8dcd89c
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-bfi.ll
@@ -0,0 +1,66 @@
+; Disable this test in profcheck because the first run would cause profcheck to fail.
+; REQUIRES: !profcheck
+; RUN: opt -p "print<block-freq>,loop-unroll,print<block-freq>" -scev-cheap-expansion-budget=3 -S %s -profcheck-disable-metadata-fixes 2>&1 | FileCheck %s --check-prefixes=COMMON,BAD
+; RUN: opt -p "print<block-freq>,loop-unroll,print<block-freq>" -scev-cheap-expansion-budget=3 -S %s 2>&1 | FileCheck %s --check-prefixes=COMMON,GOOD
+
+define i32 @test_expansion_cost_2(i32 %start, i32 %end) !prof !0 {
+entry:
+ %sub = add i32 %end, -1
+ br label %loop.header
+
+loop.header:
+ %iv = phi i32 [ %start, %entry ], [ %iv.next, %loop.latch ]
+ %c = icmp eq i32 %iv, %sub
+ br i1 %c, label %then, label %loop.latch, !prof !1
+
+then:
+ br label %loop.latch
+
+loop.latch:
+ %iv.next = add nsw i32 %iv, 1
+ %ec = icmp eq i32 %iv.next, %end
+ br i1 %ec, label %exit, label %loop.header, !prof !2
+
+exit:
+ ret i32 0
+}
+
+!0 = !{!"function_entry_count", i32 10}
+!1 = !{!"branch_weights", i32 2, i32 3}
+!2 = !{!"branch_weights", i32 1, i32 50}
+
+; COMMON: block-frequency-info: test_expansion_cost_2
+; COMMON-NEXT: entry: float = 1.0
+; COMMON-NEXT: loop.header: float = 51.0
+; COMMON-NEXT: then: float = 20.4
+; COMMON-NEXT: loop.latch: float = 51.0
+; COMMON-NEXT: exit: float = 1.0
+
+; COMMON: block-frequency-info: test_expansion_cost_2
+; GOOD-NEXT: entry: float = 1.0
+; GOOD-NEXT: entry.split: float = 0.98039
+; GOOD-NEXT: loop.header: float = 50.0
+; GOOD-NEXT: then: float = 20.0
+; GOOD-NEXT: loop.latch: float = 50.0
+; GOOD-NEXT: exit.peel.begin.loopexit: float = 0.98039
+; GOOD-NEXT: exit.peel.begin: float = 1.0
+; GOOD-NEXT: loop.header.peel: float = 1.0
+; GOOD-NEXT: then.peel: float = 0.4
+; GOOD-NEXT: loop.latch.peel: float = 1.0
+; GOOD-NEXT: exit.peel.next: float = 1.0
+; GOOD-NEXT: loop.header.peel.next: float = 1.0
+; GOOD-NEXT: exit: float = 1.0
+
+; BAD-NEXT: entry: float = 1.0
+; BAD-NEXT: entry.split: float = 0.625
+; BAD-NEXT: loop.header: float = 31.875
+; BAD-NEXT: then: float = 12.75
+; BAD-NEXT: loop.latch: float = 31.875
+; BAD-NEXT: exit.peel.begin.loopexit: float = 0.625
+; BAD-NEXT: exit.peel.begin: float = 1.0
+; BAD-NEXT: loop.header.peel: float = 1.0
+; BAD-NEXT: then.peel: float = 0.4
+; BAD-NEXT: loop.latch.peel: float = 1.0
+; BAD-NEXT: exit.peel.next: float = 1.0
+; BAD-NEXT: loop.header.peel.next: float = 1.0
+; BAD-NEXT: exit: float = 1.0
\ No newline at end of file
diff --git a/llvm/test/Transforms/LoopUnroll/peel-last-iteration-expansion-cost.ll b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-expansion-cost.ll
index f3910f9bfc399..9b1e08c8ca526 100644
--- a/llvm/test/Transforms/LoopUnroll/peel-last-iteration-expansion-cost.ll
+++ b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-expansion-cost.ll
@@ -1,46 +1,46 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
; RUN: opt -p loop-unroll -scev-cheap-expansion-budget=2 -S %s | FileCheck --check-prefix=BUDGET2 %s
; RUN: opt -p loop-unroll -scev-cheap-expansion-budget=3 -S %s | FileCheck --check-prefix=BUDGET3 %s
-define i32 @test_expansion_cost_2(i32 %start, i32 %end) {
+define i32 @test_expansion_cost_2(i32 %start, i32 %end) !prof !0 {
; BUDGET2-LABEL: define i32 @test_expansion_cost_2(
-; BUDGET2-SAME: i32 [[START:%.*]], i32 [[END:%.*]]) {
+; BUDGET2-SAME: i32 [[START:%.*]], i32 [[END:%.*]]) !prof [[PROF0:![0-9]+]] {
; BUDGET2-NEXT: [[ENTRY:.*]]:
; BUDGET2-NEXT: [[SUB:%.*]] = add i32 [[END]], -1
; BUDGET2-NEXT: br label %[[LOOP_HEADER:.*]]
; BUDGET2: [[LOOP_HEADER]]:
; BUDGET2-NEXT: [[IV:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; BUDGET2-NEXT: [[C:%.*]] = icmp eq i32 [[IV]], [[SUB]]
-; BUDGET2-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
+; BUDGET2-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]], !prof [[PROF1:![0-9]+]]
; BUDGET2: [[THEN]]:
; BUDGET2-NEXT: br label %[[LOOP_LATCH]]
; BUDGET2: [[LOOP_LATCH]]:
; BUDGET2-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1
; BUDGET2-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[END]]
-; BUDGET2-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]]
+; BUDGET2-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]], !prof [[PROF2:![0-9]+]]
; BUDGET2: [[EXIT]]:
; BUDGET2-NEXT: ret i32 0
;
; BUDGET3-LABEL: define i32 @test_expansion_cost_2(
-; BUDGET3-SAME: i32 [[START:%.*]], i32 [[END:%.*]]) {
+; BUDGET3-SAME: i32 [[START:%.*]], i32 [[END:%.*]]) !prof [[PROF0:![0-9]+]] {
; BUDGET3-NEXT: [[ENTRY:.*]]:
; BUDGET3-NEXT: [[SUB:%.*]] = add i32 [[END]], -1
; BUDGET3-NEXT: [[TMP0:%.*]] = sub i32 [[SUB]], [[START]]
; BUDGET3-NEXT: [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0
-; BUDGET3-NEXT: br i1 [[TMP1]], label %[[ENTRY_SPLIT:.*]], label %[[EXIT_PEEL_BEGIN:.*]]
+; BUDGET3-NEXT: br i1 [[TMP1]], label %[[ENTRY_SPLIT:.*]], label %[[EXIT_PEEL_BEGIN:.*]], !prof [[PROF1:![0-9]+]]
; BUDGET3: [[ENTRY_SPLIT]]:
; BUDGET3-NEXT: br label %[[LOOP_HEADER:.*]]
; BUDGET3: [[LOOP_HEADER]]:
; BUDGET3-NEXT: [[IV:%.*]] = phi i32 [ [[START]], %[[ENTRY_SPLIT]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; BUDGET3-NEXT: [[C:%.*]] = icmp eq i32 [[IV]], [[SUB]]
-; BUDGET3-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
+; BUDGET3-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]], !prof [[PROF2:![0-9]+]]
; BUDGET3: [[THEN]]:
; BUDGET3-NEXT: br label %[[LOOP_LATCH]]
; BUDGET3: [[LOOP_LATCH]]:
; BUDGET3-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1
; BUDGET3-NEXT: [[TMP2:%.*]] = sub i32 [[END]], 1
; BUDGET3-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[TMP2]]
-; BUDGET3-NEXT: br i1 [[EC]], label %[[EXIT_PEEL_BEGIN_LOOPEXIT:.*]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP0:![0-9]+]]
+; BUDGET3-NEXT: br i1 [[EC]], label %[[EXIT_PEEL_BEGIN_LOOPEXIT:.*]], label %[[LOOP_HEADER]], !prof [[PROF3:![0-9]+]], !llvm.loop [[LOOP4:![0-9]+]]
; BUDGET3: [[EXIT_PEEL_BEGIN_LOOPEXIT]]:
; BUDGET3-NEXT: [[DOTPH:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP_LATCH]] ]
; BUDGET3-NEXT: br label %[[EXIT_PEEL_BEGIN]]
@@ -49,13 +49,13 @@ define i32 @test_expansion_cost_2(i32 %start, i32 %end) {
; BUDGET3-NEXT: br label %[[LOOP_HEADER_PEEL:.*]]
; BUDGET3: [[LOOP_HEADER_PEEL]]:
; BUDGET3-NEXT: [[C_PEEL:%.*]] = icmp eq i32 [[TMP3]], [[SUB]]
-; BUDGET3-NEXT: br i1 [[C_PEEL]], label %[[THEN_PEEL:.*]], label %[[LOOP_LATCH_PEEL:.*]]
+; BUDGET3-NEXT: br i1 [[C_PEEL]], label %[[THEN_PEEL:.*]], label %[[LOOP_LATCH_PEEL:.*]], !prof [[PROF2]]
; BUDGET3: [[THEN_PEEL]]:
; BUDGET3-NEXT: br label %[[LOOP_LATCH_PEEL]]
; BUDGET3: [[LOOP_LATCH_PEEL]]:
; BUDGET3-NEXT: [[IV_NEXT_PEEL:%.*]] = add nsw i32 [[TMP3]], 1
; BUDGET3-NEXT: [[EC_PEEL:%.*]] = icmp eq i32 [[IV_NEXT_PEEL]], [[END]]
-; BUDGET3-NEXT: br i1 [[EC_PEEL]], label %[[EXIT_PEEL_NEXT:.*]], label %[[EXIT_PEEL_NEXT]]
+; BUDGET3-NEXT: br i1 [[EC_PEEL]], label %[[EXIT_PEEL_NEXT:.*]], label %[[EXIT_PEEL_NEXT]], !prof [[PROF3]]
; BUDGET3: [[EXIT_PEEL_NEXT]]:
; BUDGET3-NEXT: br label %[[LOOP_HEADER_PEEL_NEXT:.*]]
; BUDGET3: [[LOOP_HEADER_PEEL_NEXT]]:
@@ -70,7 +70,7 @@ entry:
loop.header:
%iv = phi i32 [ %start, %entry ], [ %iv.next, %loop.latch ]
%c = icmp eq i32 %iv, %sub
- br i1 %c, label %then, label %loop.latch
+ br i1 %c, label %then, label %loop.latch, !prof !1
then:
br label %loop.latch
@@ -78,12 +78,25 @@ then:
loop.latch:
%iv.next = add nsw i32 %iv, 1
%ec = icmp eq i32 %iv.next, %end
- br i1 %ec, label %exit, label %loop.header
+ br i1 %ec, label %exit, label %loop.header, !prof !2
exit:
ret i32 0
}
+
+!0 = !{!"function_entry_count", i32 10}
+!1 = !{!"branch_weights", i32 2, i32 3}
+!2 = !{!"branch_weights", i32 1, i32 10}
+;.
+; BUDGET2: [[PROF0]] = !{!"function_entry_count", i32 10}
+; BUDGET2: [[PROF1]] = !{!"branch_weights", i32 2, i32 3}
+; BUDGET2: [[PROF2]] = !{!"branch_weights", i32 1, i32 10}
;.
-; BUDGET3: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
-; BUDGET3: [[META1]] = !{!"llvm.loop.peeled.count", i32 1}
+; BUDGET3: [[PROF0]] = !{!"function_entry_count", i32 10}
+; BUDGET3: [[PROF1]] = !{!"branch_weights", i32 10, i32 1}
+; BUDGET3: [[PROF2]] = !{!"branch_weights", i32 2, i32 3}
+; BUDGET3: [[PROF3]] = !{!"branch_weights", i32 1, i32 10}
+; BUDGET3: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]], [[META6:![0-9]+]]}
+; BUDGET3: [[META5]] = !{!"llvm.loop.peeled.count", i32 1}
+; BUDGET3: [[META6]] = !{!"llvm.loop.estimated_trip_count", i32 10}
;.
diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index 35ea8b84d7ec1..2d1622e502e59 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -49,6 +49,7 @@
config.excludes = ["Inputs", "CMakeLists.txt", "README.txt", "LICENSE.txt"]
if config.enable_profcheck:
+ config.available_features.add("profcheck")
# Exclude llvm-reduce tests for profcheck because we substitute the FileCheck
# binary with a no-op command for profcheck, but llvm-reduce tests have RUN
# commands of the form llvm-reduce --test FileCheck, which explode if we
More information about the llvm-commits
mailing list