[llvm] [llvm-profgen] Improve sample profile density (PR #92144)

Lei Wang via llvm-commits llvm-commits at lists.llvm.org
Tue May 14 17:46:01 PDT 2024


https://github.com/wlei-llvm updated https://github.com/llvm/llvm-project/pull/92144

>From 441a16d95c2deb4b50641241e283891d7765c50b Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Mon, 13 May 2024 13:57:02 -0700
Subject: [PATCH] improve profile density

---
 .../tools/llvm-profgen/profile-density.test   |  2 +-
 llvm/tools/llvm-profgen/ProfileGenerator.cpp  | 90 +++++++++++++++++--
 llvm/tools/llvm-profgen/ProfileGenerator.h    |  5 +-
 3 files changed, 89 insertions(+), 8 deletions(-)

diff --git a/llvm/test/tools/llvm-profgen/profile-density.test b/llvm/test/tools/llvm-profgen/profile-density.test
index 0eb83838d16e7..f22c6f04914aa 100644
--- a/llvm/test/tools/llvm-profgen/profile-density.test
+++ b/llvm/test/tools/llvm-profgen/profile-density.test
@@ -7,7 +7,7 @@
 ;CHECK-DENSITY: Sample PGO is estimated to optimize better with 3.1x more samples. Please consider increasing sampling rate or profiling for longer duration to get more samples.
 ;CHECK-DENSITY: Minimum profile density for hot functions with top 99.00% total samples: 3.2
 
-;CHECK-DENSITY-CS: Minimum profile density for hot functions with top 99.00% total samples: 128.3
+;CHECK-DENSITY-CS: Minimum profile density for hot functions with top 99.00% total samples: 619.0
 
 ; original code:
 ; clang -O3 -g -fno-optimize-sibling-calls -fdebug-info-for-profiling qsort.c -o a.out
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index 5aa44108f9660..ecbc6763e56f1 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -83,6 +83,10 @@ static cl::opt<double> HotFunctionDensityThreshold(
 static cl::opt<bool> ShowDensity("show-density", llvm::cl::init(false),
                                  llvm::cl::desc("show profile density details"),
                                  llvm::cl::Optional);
+static cl::opt<int> ProfileDensityHotFuncCutOff(
+    "profile-density-hot-func-cutoff", llvm::cl::init(990000),
+    llvm::cl::desc("Total sample cutoff for hot functions used to calculate "
+                   "the profile density."));
 
 static cl::opt<bool> UpdateTotalSamples(
     "update-total-samples", llvm::cl::init(false),
@@ -177,7 +181,8 @@ void ProfileGeneratorBase::write() {
   write(std::move(WriterOrErr.get()), ProfileMap);
 }
 
-void ProfileGeneratorBase::showDensitySuggestion(double Density) {
+void ProfileGeneratorBase::showDensitySuggestion(double Density,
+                                                 int DensityCutoffHot) {
   if (Density == 0.0)
     WithColor::warning() << "The --profile-summary-cutoff-hot option may be "
                             "set too low. Please check your command.\n";
@@ -190,9 +195,7 @@ void ProfileGeneratorBase::showDensitySuggestion(double Density) {
 
   if (ShowDensity)
     outs() << "Minimum profile density for hot functions with top "
-           << format("%.2f",
-                     static_cast<double>(ProfileSummaryCutoffHot.getValue()) /
-                         10000)
+           << format("%.2f", static_cast<double>(DensityCutoffHot) / 10000)
            << "% total samples: " << format("%.1f", Density) << "\n";
 }
 
@@ -771,7 +774,7 @@ void ProfileGenerator::populateBoundarySamplesForAllFunctions(
 void ProfileGeneratorBase::calculateAndShowDensity(
     const SampleProfileMap &Profiles) {
   double Density = calculateDensity(Profiles, HotCountThreshold);
-  showDensitySuggestion(Density);
+  showDensitySuggestion(Density, ProfileSummaryCutoffHot);
 }
 
 FunctionSamples *
@@ -1032,6 +1035,78 @@ void CSProfileGenerator::convertToProfileMap() {
   IsProfileValidOnTrie = false;
 }
 
+void CSProfileGenerator::calculateAndShowDensity(
+    SampleContextTracker &CTracker) {
+  double Density = calculateDensity(CTracker);
+  showDensitySuggestion(Density, ProfileDensityHotFuncCutOff);
+}
+
+// Calculate Profile-density:
+// Sort the list of function-density in descending order and iterate them once
+// their accumulated total samples exceeds the percentage_threshold of total
+// profile samples, the profile-density is the last(minimum) function-density of
+// the processed functions, which means all the functions significant to perf
+// are on good density if the profile-density is good, or in other words, if the
+// profile-density is bad, the accumulated samples for all the bad density
+// profile exceeds the (100% - percentage_threshold).
+// The percentage_threshold(--profile-density-hot-func-cutoff) is configurable
+// depending on how much regression the system want to tolerate.
+double CSProfileGenerator::calculateDensity(SampleContextTracker &CTracker) {
+  double ProfileDensity = 0.0;
+
+  uint64_t TotalProfileSamples = 0;
+  // A list of the function profile density and total samples.
+  std::vector<std::pair<double, uint64_t>> DensityList;
+  for (const auto *Node : CTracker) {
+    const auto *FSamples = Node->getFunctionSamples();
+    if (!FSamples)
+      continue;
+
+    uint64_t TotalBodySamples = 0;
+    uint64_t FuncBodySize = 0;
+    for (const auto &I : FSamples->getBodySamples()) {
+      TotalBodySamples += I.second.getSamples();
+      FuncBodySize++;
+    }
+    // The whole function could be inlined and optimized out, use the callsite
+    // head samples instead to estimate the body count.
+    if (FuncBodySize == 0) {
+      for (const auto &CallsiteSamples : FSamples->getCallsiteSamples()) {
+        FuncBodySize++;
+        for (const auto &Callee : CallsiteSamples.second)
+          TotalBodySamples += Callee.second.getHeadSamplesEstimate();
+      }
+    }
+
+    if (FuncBodySize == 0)
+      continue;
+
+    double FuncDensity = static_cast<double>(TotalBodySamples) / FuncBodySize;
+    TotalProfileSamples += TotalBodySamples;
+    DensityList.emplace_back(FuncDensity, TotalBodySamples);
+  }
+
+  // Sorted by the density in descending order.
+  llvm::stable_sort(DensityList, [&](const std::pair<double, uint64_t> &A,
+                                     const std::pair<double, uint64_t> &B) {
+    if (A.first != B.first)
+      return A.first > B.first;
+    return A.second < B.second;
+  });
+
+  uint64_t AccumulatedSamples = 0;
+  for (const auto &P : DensityList) {
+    AccumulatedSamples += P.second;
+    ProfileDensity = P.first;
+    if (AccumulatedSamples >=
+        TotalProfileSamples * static_cast<float>(ProfileDensityHotFuncCutOff) /
+            1000000)
+      break;
+  }
+
+  return ProfileDensity;
+}
+
 void CSProfileGenerator::postProcessProfiles() {
   // Compute hot/cold threshold based on profile. This will be used for cold
   // context profile merging/trimming.
@@ -1041,6 +1116,7 @@ void CSProfileGenerator::postProcessProfiles() {
   // inline decisions.
   if (EnableCSPreInliner) {
     ContextTracker.populateFuncToCtxtMap();
+    calculateAndShowDensity(ContextTracker);
     CSPreInliner(ContextTracker, *Binary, Summary.get()).run();
     // Turn off the profile merger by default unless it is explicitly enabled.
     if (!CSProfMergeColdContext.getNumOccurrences())
@@ -1061,7 +1137,9 @@ void CSProfileGenerator::postProcessProfiles() {
   sampleprof::SampleProfileMap ContextLessProfiles;
   ProfileConverter::flattenProfile(ProfileMap, ContextLessProfiles, true);
 
-  calculateAndShowDensity(ContextLessProfiles);
+  if (!EnableCSPreInliner)
+    ProfileGeneratorBase::calculateAndShowDensity(ContextLessProfiles);
+
   if (GenCSNestedProfile) {
     ProfileConverter CSConverter(ProfileMap);
     CSConverter.convertCSProfiles();
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h
index d258fb78bfb11..cf451f9d1a1a4 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.h
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.h
@@ -121,7 +121,7 @@ class ProfileGeneratorBase {
   double calculateDensity(const SampleProfileMap &Profiles,
                           uint64_t HotCntThreshold);
 
-  void showDensitySuggestion(double Density);
+  void showDensitySuggestion(double Density, int DensityCutoffHot);
 
   void collectProfiledFunctions();
 
@@ -363,6 +363,9 @@ class CSProfileGenerator : public ProfileGeneratorBase {
 
   void computeSummaryAndThreshold();
 
+  void calculateAndShowDensity(SampleContextTracker &CTracker);
+  double calculateDensity(SampleContextTracker &CTracker);
+
   bool collectFunctionsFromLLVMProfile(
       std::unordered_set<const BinaryFunction *> &ProfiledFunctions) override;
 



More information about the llvm-commits mailing list