[llvm-branch-commits] [llvm] [BOLT] Add profile density computation (PR #101094)

Amir Ayupov via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Thu Oct 24 16:09:14 PDT 2024


https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/101094

>From f598510001859a29f6f1ff6362fb9950ab6340cd Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Mon, 29 Jul 2024 16:14:08 -0700
Subject: [PATCH 1/7] Update test to check the option with llvm-bolt with
 fdata, YAML, and pre-aggregated profile

Created using spr 1.3.4
---
 bolt/test/X86/pre-aggregated-perf.test | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/bolt/test/X86/pre-aggregated-perf.test b/bolt/test/X86/pre-aggregated-perf.test
index fc6f332d53dfb8..0f5137309e85d1 100644
--- a/bolt/test/X86/pre-aggregated-perf.test
+++ b/bolt/test/X86/pre-aggregated-perf.test
@@ -15,9 +15,15 @@ RUN:   --show-density --profile-density-threshold=9 \
 RUN:   --profile-density-cutoff-hot=970000 \
 RUN:   --profile-use-dfs | FileCheck %s
 
-RUN: llvm-bolt %t.exe -data %t -o %t.null | FileCheck %s
-RUN: llvm-bolt %t.exe -data %t.new -o %t.null | FileCheck %s
-RUN: llvm-bolt %t.exe -p %p/Inputs/pre-aggregated.txt --pa -o %t.null | FileCheck %s
+RUN: llvm-bolt %t.exe -data %t -o %t.null \
+RUN:   --show-density --profile-density-threshold=9 \
+RUN:   --profile-density-cutoff-hot=970000 | FileCheck %s
+RUN: llvm-bolt %t.exe -data %t.new -o %t.null \
+RUN:   --show-density --profile-density-threshold=9 \
+RUN:   --profile-density-cutoff-hot=970000 | FileCheck %s
+RUN: llvm-bolt %t.exe -p %p/Inputs/pre-aggregated.txt --pa -o %t.null \
+RUN:   --show-density --profile-density-threshold=9 \
+RUN:   --profile-density-cutoff-hot=970000 | FileCheck %s
 
 CHECK: BOLT-INFO: 4 out of 7 functions in the binary (57.1%) have non-empty execution profile
 CHECK: BOLT-INFO: Functions with density >= 9.4 account for 97.00% total sample counts.

>From e91907e57b39c8c79eb58b4d28d78fa253b130cb Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Mon, 29 Jul 2024 20:09:08 -0700
Subject: [PATCH 2/7] show-density init(true)

Created using spr 1.3.4
---
 bolt/lib/Passes/BinaryPasses.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp
index 23009bf74e0773..83fd6b2562eca8 100644
--- a/bolt/lib/Passes/BinaryPasses.cpp
+++ b/bolt/lib/Passes/BinaryPasses.cpp
@@ -224,7 +224,7 @@ static cl::opt<unsigned> TopCalledLimit(
     cl::init(100), cl::Hidden, cl::cat(BoltCategory));
 
 // Profile density options, synced with llvm-profgen/ProfileGenerator.cpp
-static cl::opt<bool> ShowDensity("show-density", cl::init(false),
+static cl::opt<bool> ShowDensity("show-density", cl::init(true),
                                  cl::desc("show profile density details"),
                                  cl::Optional);
 

>From 0d5291b01264a5387f8afd9fb69baf55fdc409a7 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Fri, 9 Aug 2024 11:17:57 -0700
Subject: [PATCH 3/7] show-density off by default

Created using spr 1.3.4
---
 bolt/lib/Passes/BinaryPasses.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp
index e0ad2af63a384a..0dc4a37e0ba946 100644
--- a/bolt/lib/Passes/BinaryPasses.cpp
+++ b/bolt/lib/Passes/BinaryPasses.cpp
@@ -224,7 +224,7 @@ static cl::opt<unsigned> TopCalledLimit(
     cl::init(100), cl::Hidden, cl::cat(BoltCategory));
 
 // Profile density options, synced with llvm-profgen/ProfileGenerator.cpp
-static cl::opt<bool> ShowDensity("show-density", cl::init(true),
+static cl::opt<bool> ShowDensity("show-density", cl::init(false),
                                  cl::desc("show profile density details"),
                                  cl::Optional);
 

>From f20e9618d0ab9602ebde9ec518ae194fbc323382 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Mon, 12 Aug 2024 14:46:21 -0700
Subject: [PATCH 4/7] s/ExecutedBytes/SampleCountInBytes

Created using spr 1.3.4
---
 bolt/include/bolt/Core/BinaryFunction.h | 4 ++--
 bolt/lib/Passes/BinaryPasses.cpp        | 4 ++--
 bolt/lib/Profile/DataAggregator.cpp     | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index 73d0d48c907e38..54ee4748b54c22 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -388,7 +388,7 @@ class BinaryFunction {
   uint64_t RawBranchCount{0};
 
   /// Dynamically executed function bytes, used for density computation.
-  uint64_t ExecutedBytes{0};
+  uint64_t SampleCountInBytes{0};
 
   /// Indicates the type of profile the function is using.
   uint16_t ProfileFlags{PF_NONE};
@@ -1847,7 +1847,7 @@ class BinaryFunction {
   void setRawBranchCount(uint64_t Count) { RawBranchCount = Count; }
 
   /// Return the number of dynamically executed bytes, from raw perf data.
-  uint64_t getExecutedBytes() const { return ExecutedBytes; }
+  uint64_t getSampleCountInBytes() const { return SampleCountInBytes; }
 
   /// Return the execution count for functions with known profile.
   /// Return 0 if the function has no profile.
diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp
index 0dc4a37e0ba946..a9935f02862f87 100644
--- a/bolt/lib/Passes/BinaryPasses.cpp
+++ b/bolt/lib/Passes/BinaryPasses.cpp
@@ -1467,10 +1467,10 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
       if (IsHotParentOfBOLTSplitFunction)
         for (const BinaryFunction *Fragment : Function.getFragments())
           Size += Fragment->getSize();
-      double Density = (double)1.0 * Function.getExecutedBytes() / Size;
+      double Density = (double)1.0 * Function.getSampleCountInBytes() / Size;
       FuncDensityList.emplace_back(Density, SampleCount);
       LLVM_DEBUG(BC.outs() << Function << ": executed bytes "
-                           << Function.getExecutedBytes() << ", size (b) "
+                           << Function.getSampleCountInBytes() << ", size (b) "
                            << Size << ", density " << Density
                            << ", sample count " << SampleCount << '\n');
     }
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index c26705e49774c0..a9c55c98f54a92 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -853,7 +853,7 @@ bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second,
   BinaryFunction *ParentFunc = getBATParentFunction(*FromFunc);
   if (!ParentFunc)
     ParentFunc = FromFunc;
-  ParentFunc->ExecutedBytes += Count * (Second.From - First.To);
+  ParentFunc->SampleCountInBytes += Count * (Second.From - First.To);
 
   std::optional<BoltAddressTranslation::FallthroughListTy> FTs =
       BAT ? BAT->getFallthroughsInTrace(FromFunc->getAddress(), First.To,

>From f0b70141d43fc49c158467040c2b01f32a6a5e0e Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Thu, 24 Oct 2024 13:37:10 -0700
Subject: [PATCH 5/7] Only enable show-density for perf2bolt

Created using spr 1.3.4
---
 bolt/include/bolt/Utils/CommandLineOpts.h | 1 +
 bolt/lib/Passes/BinaryPasses.cpp          | 5 +----
 bolt/lib/Utils/CommandLineOpts.cpp        | 4 ++++
 bolt/tools/driver/llvm-bolt.cpp           | 1 +
 4 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/bolt/include/bolt/Utils/CommandLineOpts.h b/bolt/include/bolt/Utils/CommandLineOpts.h
index baabeab577fb5e..04bf7db5de9527 100644
--- a/bolt/include/bolt/Utils/CommandLineOpts.h
+++ b/bolt/include/bolt/Utils/CommandLineOpts.h
@@ -55,6 +55,7 @@ extern llvm::cl::opt<bool> PrintSections;
 enum ProfileFormatKind { PF_Fdata, PF_YAML };
 
 extern llvm::cl::opt<ProfileFormatKind> ProfileFormat;
+extern llvm::cl::opt<bool> ShowDensity;
 extern llvm::cl::opt<bool> SplitEH;
 extern llvm::cl::opt<bool> StrictMode;
 extern llvm::cl::opt<bool> TimeOpts;
diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp
index c2c248abf8e264..179fe67caf5249 100644
--- a/bolt/lib/Passes/BinaryPasses.cpp
+++ b/bolt/lib/Passes/BinaryPasses.cpp
@@ -15,6 +15,7 @@
 #include "bolt/Core/ParallelUtilities.h"
 #include "bolt/Passes/ReorderAlgorithm.h"
 #include "bolt/Passes/ReorderFunctions.h"
+#include "bolt/Utils/CommandLineOpts.h"
 #include "llvm/Support/CommandLine.h"
 #include <atomic>
 #include <mutex>
@@ -224,10 +225,6 @@ static cl::opt<unsigned> TopCalledLimit(
     cl::init(100), cl::Hidden, cl::cat(BoltCategory));
 
 // Profile density options, synced with llvm-profgen/ProfileGenerator.cpp
-static cl::opt<bool> ShowDensity("show-density", cl::init(true),
-                                 cl::desc("show profile density details"),
-                                 cl::Optional);
-
 static cl::opt<int> ProfileDensityCutOffHot(
     "profile-density-cutoff-hot", cl::init(990000),
     cl::desc("Total samples cutoff for functions used to calculate "
diff --git a/bolt/lib/Utils/CommandLineOpts.cpp b/bolt/lib/Utils/CommandLineOpts.cpp
index 435a8fa9cafcae..de82420a167131 100644
--- a/bolt/lib/Utils/CommandLineOpts.cpp
+++ b/bolt/lib/Utils/CommandLineOpts.cpp
@@ -175,6 +175,10 @@ cl::opt<std::string> SaveProfile("w",
                                  cl::desc("save recorded profile to a file"),
                                  cl::cat(BoltOutputCategory));
 
+cl::opt<bool> ShowDensity("show-density",
+                          cl::desc("show profile density details"),
+                          cl::Optional, cl::cat(AggregatorCategory));
+
 cl::opt<bool> SplitEH("split-eh", cl::desc("split C++ exception handling code"),
                       cl::Hidden, cl::cat(BoltOptCategory));
 
diff --git a/bolt/tools/driver/llvm-bolt.cpp b/bolt/tools/driver/llvm-bolt.cpp
index a8d1ac64808930..efa06cd68cb997 100644
--- a/bolt/tools/driver/llvm-bolt.cpp
+++ b/bolt/tools/driver/llvm-bolt.cpp
@@ -129,6 +129,7 @@ void perf2boltMode(int argc, char **argv) {
     exit(1);
   }
   opts::AggregateOnly = true;
+  opts::ShowDensity = true;
 }
 
 void boltDiffMode(int argc, char **argv) {

>From 34f91248e09e1586bf87e7abb8a59e8ad34ec2ef Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Thu, 24 Oct 2024 15:50:39 -0700
Subject: [PATCH 6/7] default threshold=60

Created using spr 1.3.4
---
 bolt/lib/Passes/BinaryPasses.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp
index 179fe67caf5249..5a676185227ec1 100644
--- a/bolt/lib/Passes/BinaryPasses.cpp
+++ b/bolt/lib/Passes/BinaryPasses.cpp
@@ -231,7 +231,7 @@ static cl::opt<int> ProfileDensityCutOffHot(
              "profile density."));
 
 static cl::opt<double> ProfileDensityThreshold(
-    "profile-density-threshold", cl::init(50),
+    "profile-density-threshold", cl::init(60),
     cl::desc("If the profile density is below the given threshold, it "
              "will be suggested to increase the sampling rate."),
     cl::Optional);

>From a06c4bbae2ba4c588297e1df540a6b53578f7aa3 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Thu, 24 Oct 2024 16:09:02 -0700
Subject: [PATCH 7/7] Add test with warning

Created using spr 1.3.4
---
 bolt/test/X86/pre-aggregated-perf.test | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/bolt/test/X86/pre-aggregated-perf.test b/bolt/test/X86/pre-aggregated-perf.test
index ecdc613a8d76b8..3242ba22f59164 100644
--- a/bolt/test/X86/pre-aggregated-perf.test
+++ b/bolt/test/X86/pre-aggregated-perf.test
@@ -11,13 +11,20 @@ REQUIRES: system-linux
 
 RUN: yaml2obj %p/Inputs/blarge.yaml &> %t.exe
 RUN: perf2bolt %t.exe -o %t --pa -p %p/Inputs/pre-aggregated.txt -w %t.new \
-RUN:   --show-density --profile-density-threshold=9 \
-RUN:   --profile-density-cutoff-hot=970000 \
+RUN:   --profile-density-threshold=9 --profile-density-cutoff-hot=970000 \
 RUN:   --profile-use-dfs | FileCheck %s --check-prefix=CHECK-P2B
 
 CHECK-P2B: BOLT-INFO: 4 out of 7 functions in the binary (57.1%) have non-empty execution profile
 CHECK-P2B: BOLT-INFO: Functions with density >= 21.7 account for 97.00% total sample counts.
 
+RUN: perf2bolt %t.exe -o %t --pa -p %p/Inputs/pre-aggregated.txt -w %t.new \
+RUN:   --profile-density-cutoff-hot=970000 \
+RUN:   --profile-use-dfs 2>&1 | FileCheck %s --check-prefix=CHECK-WARNING
+
+CHECK-WARNING: BOLT-INFO: 4 out of 7 functions in the binary (57.1%) have non-empty execution profile
+CHECK-WARNING: BOLT-WARNING: BOLT is estimated to optimize better with 2.8x more samples.
+CHECK-WARNING: BOLT-INFO: Functions with density >= 21.7 account for 97.00% total sample counts.
+
 RUN: llvm-bolt %t.exe -data %t -o %t.null | FileCheck %s
 RUN: llvm-bolt %t.exe -data %t.new -o %t.null | FileCheck %s
 RUN: llvm-bolt %t.exe -p %p/Inputs/pre-aggregated.txt --pa -o %t.null | FileCheck %s



More information about the llvm-branch-commits mailing list