[llvm] [LoopUnroll] Add flag to disable PGO usage (PR #102950)
Ellis Hoag via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 12 11:36:35 PDT 2024
https://github.com/ellishg created https://github.com/llvm/llvm-project/pull/102950
Create the `-loop-unroll-use-branch-weights` LLVM flag to enable/disable the usage of `getLoopEstimatedTripCount()` which uses branch weights to determine the peel count.
https://github.com/llvm/llvm-project/blob/dc21cb5cc74fdff18418092570230cd980cafa27/llvm/include/llvm/Transforms/Utils/LoopUtils.h#L318-L324
When building with `-Oz`, consuming profiles can drastically increase binary size. We found `-loop-unroll-use-branch-weights=false` can give us a 1.6% text size win when profiles are used, which mitigates some of this regression.
>From 9a0e480777fc85bd8125a7324f2f1133b8dd41af Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellis.sparky.hoag at gmail.com>
Date: Mon, 12 Aug 2024 11:29:10 -0700
Subject: [PATCH] [LoopUnroll] Add flag to disable PGO usage
---
llvm/include/llvm/Transforms/Utils/LoopPeel.h | 3 ++-
llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp | 11 +++++++++--
llvm/lib/Transforms/Utils/LoopPeel.cpp | 6 +++---
.../LoopUnroll/peel-loop-conditions-pgo-1.ll | 7 ++++---
4 files changed, 18 insertions(+), 9 deletions(-)
diff --git a/llvm/include/llvm/Transforms/Utils/LoopPeel.h b/llvm/include/llvm/Transforms/Utils/LoopPeel.h
index 0b78700ca71bb9..987c21b7ca5610 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopPeel.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopPeel.h
@@ -37,7 +37,8 @@ gatherPeelingPreferences(Loop *L, ScalarEvolution &SE,
void computePeelCount(Loop *L, unsigned LoopSize,
TargetTransformInfo::PeelingPreferences &PP,
unsigned TripCount, DominatorTree &DT,
- ScalarEvolution &SE, AssumptionCache *AC = nullptr,
+ ScalarEvolution &SE, bool UseBranchWeights,
+ AssumptionCache *AC = nullptr,
unsigned Threshold = UINT_MAX);
} // end namespace llvm
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index cbc35b6dd4292a..0a446851acf2d3 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -179,6 +179,12 @@ static cl::opt<unsigned> PragmaUnrollFullMaxIterations(
"pragma-unroll-full-max-iterations", cl::init(1'000'000), cl::Hidden,
cl::desc("Maximum allowed iterations to unroll under pragma unroll full."));
+static cl::opt<bool>
+ UseBranchWeights("loop-unroll-use-branch-weights", cl::init(true),
+ cl::Hidden,
+ cl::desc("Estimate loop trip counts with branch weight "
+ "metadata to help determine the peel count"));
+
/// A magic value for use with the Threshold parameter to indicate
/// that the loop unroll should be performed regardless of how much
/// code expansion would result.
@@ -1012,7 +1018,8 @@ bool llvm::computeUnrollCount(
}
// 5th priority is loop peeling.
- computePeelCount(L, LoopSize, PP, TripCount, DT, SE, AC, UP.Threshold);
+ computePeelCount(L, LoopSize, PP, TripCount, DT, SE, UseBranchWeights, AC,
+ UP.Threshold);
if (PP.PeelCount) {
UP.Runtime = false;
UP.Count = 1;
@@ -1081,7 +1088,7 @@ bool llvm::computeUnrollCount(
}
// Check if the runtime trip count is too small when profile is available.
- if (L->getHeader()->getParent()->hasProfileData()) {
+ if (UseBranchWeights && L->getHeader()->getParent()->hasProfileData()) {
if (auto ProfileTripCount = getLoopEstimatedTripCount(L)) {
if (*ProfileTripCount < FlatLoopTripCountThreshold)
return false;
diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp
index 5d7c0d947facc4..9557d31a122a63 100644
--- a/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -538,8 +538,8 @@ static bool violatesLegacyMultiExitLoopCheck(Loop *L) {
void llvm::computePeelCount(Loop *L, unsigned LoopSize,
TargetTransformInfo::PeelingPreferences &PP,
unsigned TripCount, DominatorTree &DT,
- ScalarEvolution &SE, AssumptionCache *AC,
- unsigned Threshold) {
+ ScalarEvolution &SE, bool UseBranchWeights,
+ AssumptionCache *AC, unsigned Threshold) {
assert(LoopSize > 0 && "Zero loop size is not allowed!");
// Save the PP.PeelCount value set by the target in
// TTI.getPeelingPreferences or by the flag -unroll-peel-count.
@@ -632,7 +632,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
// hit the peeled section.
// We only do this in the presence of profile information, since otherwise
// our estimates of the trip count are not reliable enough.
- if (L->getHeader()->getParent()->hasProfileData()) {
+ if (UseBranchWeights && L->getHeader()->getParent()->hasProfileData()) {
if (violatesLegacyMultiExitLoopCheck(L))
return;
std::optional<unsigned> EstimatedTripCount = getLoopEstimatedTripCount(L);
diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-conditions-pgo-1.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-conditions-pgo-1.ll
index e3cfe53950f572..c7fb389c635957 100644
--- a/llvm/test/Transforms/LoopUnroll/peel-loop-conditions-pgo-1.ll
+++ b/llvm/test/Transforms/LoopUnroll/peel-loop-conditions-pgo-1.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -S -passes=loop-unroll,loop-unroll -verify-dom-info -debug-only=loop-unroll -unroll-peel-max-count=7 2>&1 | FileCheck %s
+; RUN: opt < %s -S -passes=loop-unroll,loop-unroll -verify-dom-info -debug-only=loop-unroll -unroll-peel-max-count=7 2>&1 | FileCheck %s --check-prefixes=CHECK,PGO
+; RUN: opt < %s -S -passes=loop-unroll,loop-unroll -verify-dom-info -debug-only=loop-unroll -unroll-peel-max-count=7 -loop-unroll-use-branch-weights=false 2>&1 | FileCheck %s
; REQUIRES: asserts
declare void @f1()
@@ -11,8 +12,8 @@ declare void @f2()
define void @test1(i32 %k) !prof !4 {
; CHECK: Loop Unroll: F[test1] Loop %for.body
; CHECK: PEELING loop %for.body with iteration count 2!
-; CHECK: PEELING loop %for.body with iteration count 5!
-; CHECK: llvm.loop.unroll.disable
+; PGO: PEELING loop %for.body with iteration count 5!
+; PGO: llvm.loop.unroll.disable
for.body.lr.ph:
br label %for.body
More information about the llvm-commits
mailing list