[llvm] [LoopUnroll] Add flag to disable PGO usage (PR #102950)

Ellis Hoag via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 12 11:36:35 PDT 2024


https://github.com/ellishg created https://github.com/llvm/llvm-project/pull/102950

Create the `-loop-unroll-use-branch-weights` LLVM flag to enable/disable the usage of `getLoopEstimatedTripCount()` which uses branch weights to determine the peel count.

https://github.com/llvm/llvm-project/blob/dc21cb5cc74fdff18418092570230cd980cafa27/llvm/include/llvm/Transforms/Utils/LoopUtils.h#L318-L324

When building with `-Oz`, consuming profiles can drastically increase binary size. We found `-loop-unroll-use-branch-weights=false` can give us a 1.6% text size win when profiles are used, which mitigates some of this regression.

>From 9a0e480777fc85bd8125a7324f2f1133b8dd41af Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellis.sparky.hoag at gmail.com>
Date: Mon, 12 Aug 2024 11:29:10 -0700
Subject: [PATCH] [LoopUnroll] Add flag to disable PGO usage

---
 llvm/include/llvm/Transforms/Utils/LoopPeel.h         |  3 ++-
 llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp         | 11 +++++++++--
 llvm/lib/Transforms/Utils/LoopPeel.cpp                |  6 +++---
 .../LoopUnroll/peel-loop-conditions-pgo-1.ll          |  7 ++++---
 4 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Utils/LoopPeel.h b/llvm/include/llvm/Transforms/Utils/LoopPeel.h
index 0b78700ca71bb9..987c21b7ca5610 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopPeel.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopPeel.h
@@ -37,7 +37,8 @@ gatherPeelingPreferences(Loop *L, ScalarEvolution &SE,
 void computePeelCount(Loop *L, unsigned LoopSize,
                       TargetTransformInfo::PeelingPreferences &PP,
                       unsigned TripCount, DominatorTree &DT,
-                      ScalarEvolution &SE, AssumptionCache *AC = nullptr,
+                      ScalarEvolution &SE, bool UseBranchWeights,
+                      AssumptionCache *AC = nullptr,
                       unsigned Threshold = UINT_MAX);
 
 } // end namespace llvm
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index cbc35b6dd4292a..0a446851acf2d3 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -179,6 +179,12 @@ static cl::opt<unsigned> PragmaUnrollFullMaxIterations(
     "pragma-unroll-full-max-iterations", cl::init(1'000'000), cl::Hidden,
     cl::desc("Maximum allowed iterations to unroll under pragma unroll full."));
 
+static cl::opt<bool>
+    UseBranchWeights("loop-unroll-use-branch-weights", cl::init(true),
+                     cl::Hidden,
+                     cl::desc("Estimate loop trip counts with branch weight "
+                              "metadata to help determine the peel count"));
+
 /// A magic value for use with the Threshold parameter to indicate
 /// that the loop unroll should be performed regardless of how much
 /// code expansion would result.
@@ -1012,7 +1018,8 @@ bool llvm::computeUnrollCount(
   }
 
   // 5th priority is loop peeling.
-  computePeelCount(L, LoopSize, PP, TripCount, DT, SE, AC, UP.Threshold);
+  computePeelCount(L, LoopSize, PP, TripCount, DT, SE, UseBranchWeights, AC,
+                   UP.Threshold);
   if (PP.PeelCount) {
     UP.Runtime = false;
     UP.Count = 1;
@@ -1081,7 +1088,7 @@ bool llvm::computeUnrollCount(
   }
 
   // Check if the runtime trip count is too small when profile is available.
-  if (L->getHeader()->getParent()->hasProfileData()) {
+  if (UseBranchWeights && L->getHeader()->getParent()->hasProfileData()) {
     if (auto ProfileTripCount = getLoopEstimatedTripCount(L)) {
       if (*ProfileTripCount < FlatLoopTripCountThreshold)
         return false;
diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp
index 5d7c0d947facc4..9557d31a122a63 100644
--- a/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -538,8 +538,8 @@ static bool violatesLegacyMultiExitLoopCheck(Loop *L) {
 void llvm::computePeelCount(Loop *L, unsigned LoopSize,
                             TargetTransformInfo::PeelingPreferences &PP,
                             unsigned TripCount, DominatorTree &DT,
-                            ScalarEvolution &SE, AssumptionCache *AC,
-                            unsigned Threshold) {
+                            ScalarEvolution &SE, bool UseBranchWeights,
+                            AssumptionCache *AC, unsigned Threshold) {
   assert(LoopSize > 0 && "Zero loop size is not allowed!");
   // Save the PP.PeelCount value set by the target in
   // TTI.getPeelingPreferences or by the flag -unroll-peel-count.
@@ -632,7 +632,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
   // hit the peeled section.
   // We only do this in the presence of profile information, since otherwise
   // our estimates of the trip count are not reliable enough.
-  if (L->getHeader()->getParent()->hasProfileData()) {
+  if (UseBranchWeights && L->getHeader()->getParent()->hasProfileData()) {
     if (violatesLegacyMultiExitLoopCheck(L))
       return;
     std::optional<unsigned> EstimatedTripCount = getLoopEstimatedTripCount(L);
diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-conditions-pgo-1.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-conditions-pgo-1.ll
index e3cfe53950f572..c7fb389c635957 100644
--- a/llvm/test/Transforms/LoopUnroll/peel-loop-conditions-pgo-1.ll
+++ b/llvm/test/Transforms/LoopUnroll/peel-loop-conditions-pgo-1.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -S -passes=loop-unroll,loop-unroll -verify-dom-info -debug-only=loop-unroll -unroll-peel-max-count=7 2>&1 | FileCheck %s
+; RUN: opt < %s -S -passes=loop-unroll,loop-unroll -verify-dom-info -debug-only=loop-unroll -unroll-peel-max-count=7 2>&1 | FileCheck %s --check-prefixes=CHECK,PGO
+; RUN: opt < %s -S -passes=loop-unroll,loop-unroll -verify-dom-info -debug-only=loop-unroll -unroll-peel-max-count=7 -loop-unroll-use-branch-weights=false 2>&1 | FileCheck %s
 ; REQUIRES: asserts
 
 declare void @f1()
@@ -11,8 +12,8 @@ declare void @f2()
 define void @test1(i32 %k) !prof !4 {
 ; CHECK: Loop Unroll: F[test1] Loop %for.body
 ; CHECK: PEELING loop %for.body with iteration count 2!
-; CHECK: PEELING loop %for.body with iteration count 5!
-; CHECK: llvm.loop.unroll.disable
+; PGO: PEELING loop %for.body with iteration count 5!
+; PGO: llvm.loop.unroll.disable
 for.body.lr.ph:
   br label %for.body
 



More information about the llvm-commits mailing list