[llvm] [llvm-profgen] Improve sample profile density (PR #92144)

Lei Wang via llvm-commits llvm-commits at lists.llvm.org
Thu May 23 12:21:39 PDT 2024


https://github.com/wlei-llvm updated https://github.com/llvm/llvm-project/pull/92144

>From 441a16d95c2deb4b50641241e283891d7765c50b Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Mon, 13 May 2024 13:57:02 -0700
Subject: [PATCH 1/8] improve profile density

---
 .../tools/llvm-profgen/profile-density.test   |  2 +-
 llvm/tools/llvm-profgen/ProfileGenerator.cpp  | 90 +++++++++++++++++--
 llvm/tools/llvm-profgen/ProfileGenerator.h    |  5 +-
 3 files changed, 89 insertions(+), 8 deletions(-)

diff --git a/llvm/test/tools/llvm-profgen/profile-density.test b/llvm/test/tools/llvm-profgen/profile-density.test
index 0eb83838d16e7..f22c6f04914aa 100644
--- a/llvm/test/tools/llvm-profgen/profile-density.test
+++ b/llvm/test/tools/llvm-profgen/profile-density.test
@@ -7,7 +7,7 @@
 ;CHECK-DENSITY: Sample PGO is estimated to optimize better with 3.1x more samples. Please consider increasing sampling rate or profiling for longer duration to get more samples.
 ;CHECK-DENSITY: Minimum profile density for hot functions with top 99.00% total samples: 3.2
 
-;CHECK-DENSITY-CS: Minimum profile density for hot functions with top 99.00% total samples: 128.3
+;CHECK-DENSITY-CS: Minimum profile density for hot functions with top 99.00% total samples: 619.0
 
 ; original code:
 ; clang -O3 -g -fno-optimize-sibling-calls -fdebug-info-for-profiling qsort.c -o a.out
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index 5aa44108f9660..ecbc6763e56f1 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -83,6 +83,10 @@ static cl::opt<double> HotFunctionDensityThreshold(
 static cl::opt<bool> ShowDensity("show-density", llvm::cl::init(false),
                                  llvm::cl::desc("show profile density details"),
                                  llvm::cl::Optional);
+static cl::opt<int> ProfileDensityHotFuncCutOff(
+    "profile-density-hot-func-cutoff", llvm::cl::init(990000),
+    llvm::cl::desc("Total sample cutoff for hot functions used to calculate "
+                   "the profile density."));
 
 static cl::opt<bool> UpdateTotalSamples(
     "update-total-samples", llvm::cl::init(false),
@@ -177,7 +181,8 @@ void ProfileGeneratorBase::write() {
   write(std::move(WriterOrErr.get()), ProfileMap);
 }
 
-void ProfileGeneratorBase::showDensitySuggestion(double Density) {
+void ProfileGeneratorBase::showDensitySuggestion(double Density,
+                                                 int DensityCutoffHot) {
   if (Density == 0.0)
     WithColor::warning() << "The --profile-summary-cutoff-hot option may be "
                             "set too low. Please check your command.\n";
@@ -190,9 +195,7 @@ void ProfileGeneratorBase::showDensitySuggestion(double Density) {
 
   if (ShowDensity)
     outs() << "Minimum profile density for hot functions with top "
-           << format("%.2f",
-                     static_cast<double>(ProfileSummaryCutoffHot.getValue()) /
-                         10000)
+           << format("%.2f", static_cast<double>(DensityCutoffHot) / 10000)
            << "% total samples: " << format("%.1f", Density) << "\n";
 }
 
@@ -771,7 +774,7 @@ void ProfileGenerator::populateBoundarySamplesForAllFunctions(
 void ProfileGeneratorBase::calculateAndShowDensity(
     const SampleProfileMap &Profiles) {
   double Density = calculateDensity(Profiles, HotCountThreshold);
-  showDensitySuggestion(Density);
+  showDensitySuggestion(Density, ProfileSummaryCutoffHot);
 }
 
 FunctionSamples *
@@ -1032,6 +1035,78 @@ void CSProfileGenerator::convertToProfileMap() {
   IsProfileValidOnTrie = false;
 }
 
+void CSProfileGenerator::calculateAndShowDensity(
+    SampleContextTracker &CTracker) {
+  double Density = calculateDensity(CTracker);
+  showDensitySuggestion(Density, ProfileDensityHotFuncCutOff);
+}
+
+// Calculate Profile-density:
+// Sort the list of function-density in descending order and iterate them once
+// their accumulated total samples exceeds the percentage_threshold of total
+// profile samples, the profile-density is the last(minimum) function-density of
+// the processed functions, which means all the functions significant to perf
+// are on good density if the profile-density is good, or in other words, if the
+// profile-density is bad, the accumulated samples for all the bad density
+// profile exceeds the (100% - percentage_threshold).
+// The percentage_threshold(--profile-density-hot-func-cutoff) is configurable
+// depending on how much regression the system want to tolerate.
+double CSProfileGenerator::calculateDensity(SampleContextTracker &CTracker) {
+  double ProfileDensity = 0.0;
+
+  uint64_t TotalProfileSamples = 0;
+  // A list of the function profile density and total samples.
+  std::vector<std::pair<double, uint64_t>> DensityList;
+  for (const auto *Node : CTracker) {
+    const auto *FSamples = Node->getFunctionSamples();
+    if (!FSamples)
+      continue;
+
+    uint64_t TotalBodySamples = 0;
+    uint64_t FuncBodySize = 0;
+    for (const auto &I : FSamples->getBodySamples()) {
+      TotalBodySamples += I.second.getSamples();
+      FuncBodySize++;
+    }
+    // The whole function could be inlined and optimized out, use the callsite
+    // head samples instead to estimate the body count.
+    if (FuncBodySize == 0) {
+      for (const auto &CallsiteSamples : FSamples->getCallsiteSamples()) {
+        FuncBodySize++;
+        for (const auto &Callee : CallsiteSamples.second)
+          TotalBodySamples += Callee.second.getHeadSamplesEstimate();
+      }
+    }
+
+    if (FuncBodySize == 0)
+      continue;
+
+    double FuncDensity = static_cast<double>(TotalBodySamples) / FuncBodySize;
+    TotalProfileSamples += TotalBodySamples;
+    DensityList.emplace_back(FuncDensity, TotalBodySamples);
+  }
+
+  // Sorted by the density in descending order.
+  llvm::stable_sort(DensityList, [&](const std::pair<double, uint64_t> &A,
+                                     const std::pair<double, uint64_t> &B) {
+    if (A.first != B.first)
+      return A.first > B.first;
+    return A.second < B.second;
+  });
+
+  uint64_t AccumulatedSamples = 0;
+  for (const auto &P : DensityList) {
+    AccumulatedSamples += P.second;
+    ProfileDensity = P.first;
+    if (AccumulatedSamples >=
+        TotalProfileSamples * static_cast<float>(ProfileDensityHotFuncCutOff) /
+            1000000)
+      break;
+  }
+
+  return ProfileDensity;
+}
+
 void CSProfileGenerator::postProcessProfiles() {
   // Compute hot/cold threshold based on profile. This will be used for cold
   // context profile merging/trimming.
@@ -1041,6 +1116,7 @@ void CSProfileGenerator::postProcessProfiles() {
   // inline decisions.
   if (EnableCSPreInliner) {
     ContextTracker.populateFuncToCtxtMap();
+    calculateAndShowDensity(ContextTracker);
     CSPreInliner(ContextTracker, *Binary, Summary.get()).run();
     // Turn off the profile merger by default unless it is explicitly enabled.
     if (!CSProfMergeColdContext.getNumOccurrences())
@@ -1061,7 +1137,9 @@ void CSProfileGenerator::postProcessProfiles() {
   sampleprof::SampleProfileMap ContextLessProfiles;
   ProfileConverter::flattenProfile(ProfileMap, ContextLessProfiles, true);
 
-  calculateAndShowDensity(ContextLessProfiles);
+  if (!EnableCSPreInliner)
+    ProfileGeneratorBase::calculateAndShowDensity(ContextLessProfiles);
+
   if (GenCSNestedProfile) {
     ProfileConverter CSConverter(ProfileMap);
     CSConverter.convertCSProfiles();
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h
index d258fb78bfb11..cf451f9d1a1a4 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.h
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.h
@@ -121,7 +121,7 @@ class ProfileGeneratorBase {
   double calculateDensity(const SampleProfileMap &Profiles,
                           uint64_t HotCntThreshold);
 
-  void showDensitySuggestion(double Density);
+  void showDensitySuggestion(double Density, int DensityCutoffHot);
 
   void collectProfiledFunctions();
 
@@ -363,6 +363,9 @@ class CSProfileGenerator : public ProfileGeneratorBase {
 
   void computeSummaryAndThreshold();
 
+  void calculateAndShowDensity(SampleContextTracker &CTracker);
+  double calculateDensity(SampleContextTracker &CTracker);
+
   bool collectFunctionsFromLLVMProfile(
       std::unordered_set<const BinaryFunction *> &ProfiledFunctions) override;
 

>From 9082e49bad782088c8f7da0057027c6367f8d927 Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Fri, 17 May 2024 11:50:25 -0700
Subject: [PATCH 2/8] change to base on finial profile and addressing other
 comments

---
 .../tools/llvm-profgen/profile-density.test   |   6 +-
 llvm/tools/llvm-profgen/ProfileGenerator.cpp  | 205 +++++++-----------
 llvm/tools/llvm-profgen/ProfileGenerator.h    |  14 +-
 3 files changed, 94 insertions(+), 131 deletions(-)

diff --git a/llvm/test/tools/llvm-profgen/profile-density.test b/llvm/test/tools/llvm-profgen/profile-density.test
index f22c6f04914aa..e8bcc9a3a5028 100644
--- a/llvm/test/tools/llvm-profgen/profile-density.test
+++ b/llvm/test/tools/llvm-profgen/profile-density.test
@@ -4,10 +4,10 @@
 ; RUN: llvm-profgen --format=text --unsymbolized-profile=%S/Inputs/profile-density-cs.raw.prof --binary=%S/Inputs/inline-noprobe2.perfbin --output=%t3 --show-density -hot-function-density-threshold=1 &> %t4
 ; RUN: FileCheck %s --input-file %t4 --check-prefix=CHECK-DENSITY-CS
 
-;CHECK-DENSITY: Sample PGO is estimated to optimize better with 3.1x more samples. Please consider increasing sampling rate or profiling for longer duration to get more samples.
-;CHECK-DENSITY: Minimum profile density for hot functions with top 99.00% total samples: 3.2
+;CHECK-DENSITY: Sample PGO is estimated to optimize better with 2.9x more samples. Please consider increasing sampling rate or profiling for longer duration to get more samples.
+;CHECK-DENSITY: Functions with density >= 3.5 account for 99.00% total sample counts.
 
-;CHECK-DENSITY-CS: Minimum profile density for hot functions with top 99.00% total samples: 619.0
+;CHECK-DENSITY-CS: Functions with density >= 800.1 account for 99.00% total sample counts.
 
 ; original code:
 ; clang -O3 -g -fno-optimize-sibling-calls -fdebug-info-for-profiling qsort.c -o a.out
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index ecbc6763e56f1..e3e856ead918c 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -76,17 +76,16 @@ static cl::opt<int, true> CSProfMaxContextDepth(
     cl::location(llvm::sampleprof::CSProfileGenerator::MaxContextDepth));
 
 static cl::opt<double> HotFunctionDensityThreshold(
-    "hot-function-density-threshold", llvm::cl::init(1000),
-    llvm::cl::desc(
-        "specify density threshold for hot functions (default: 1000)"),
+    "hot-function-density-threshold", llvm::cl::init(20),
+    llvm::cl::desc("specify density threshold for hot functions (default: 20)"),
     llvm::cl::Optional);
 static cl::opt<bool> ShowDensity("show-density", llvm::cl::init(false),
                                  llvm::cl::desc("show profile density details"),
                                  llvm::cl::Optional);
-static cl::opt<int> ProfileDensityHotFuncCutOff(
-    "profile-density-hot-func-cutoff", llvm::cl::init(990000),
-    llvm::cl::desc("Total sample cutoff for hot functions used to calculate "
-                   "the profile density."));
+static cl::opt<int> ProfileDensityCutOffHot(
+    "profile-density-cutoff-hot", llvm::cl::init(990000),
+    llvm::cl::desc("Total samples cutoff for functions used to calculate "
+                   "profile density."));
 
 static cl::opt<bool> UpdateTotalSamples(
     "update-total-samples", llvm::cl::init(false),
@@ -181,10 +180,9 @@ void ProfileGeneratorBase::write() {
   write(std::move(WriterOrErr.get()), ProfileMap);
 }
 
-void ProfileGeneratorBase::showDensitySuggestion(double Density,
-                                                 int DensityCutoffHot) {
+void ProfileGeneratorBase::showDensitySuggestion(double Density) {
   if (Density == 0.0)
-    WithColor::warning() << "The --profile-summary-cutoff-hot option may be "
+    WithColor::warning() << "The --profile-density-cutoff-hot option may be "
                             "set too low. Please check your command.\n";
   else if (Density < HotFunctionDensityThreshold)
     WithColor::warning()
@@ -194,9 +192,11 @@ void ProfileGeneratorBase::showDensitySuggestion(double Density,
            "profiling for longer duration to get more samples.\n";
 
   if (ShowDensity)
-    outs() << "Minimum profile density for hot functions with top "
-           << format("%.2f", static_cast<double>(DensityCutoffHot) / 10000)
-           << "% total samples: " << format("%.1f", Density) << "\n";
+    outs() << "Functions with density >= " << format("%.1f", Density)
+           << " account for "
+           << format("%.2f",
+                     static_cast<double>(ProfileDensityCutOffHot) / 10000)
+           << "% total sample counts.\n";
 }
 
 bool ProfileGeneratorBase::filterAmbiguousProfile(FunctionSamples &FS) {
@@ -241,32 +241,6 @@ void ProfileGeneratorBase::filterAmbiguousProfile(SampleProfileMap &Profiles) {
   }
 }
 
-double ProfileGeneratorBase::calculateDensity(const SampleProfileMap &Profiles,
-                                              uint64_t HotCntThreshold) {
-  double Density = DBL_MAX;
-  std::vector<const FunctionSamples *> HotFuncs;
-  for (auto &I : Profiles) {
-    auto &FuncSamples = I.second;
-    if (FuncSamples.getTotalSamples() < HotCntThreshold)
-      continue;
-    HotFuncs.emplace_back(&FuncSamples);
-  }
-
-  for (auto *FuncSamples : HotFuncs) {
-    auto *Func = Binary->getBinaryFunction(FuncSamples->getFunction());
-    if (!Func)
-      continue;
-    uint64_t FuncSize = Func->getFuncSize();
-    if (FuncSize == 0)
-      continue;
-    Density =
-        std::min(Density, static_cast<double>(FuncSamples->getTotalSamples()) /
-                              FuncSize);
-  }
-
-  return Density == DBL_MAX ? 0.0 : Density;
-}
-
 void ProfileGeneratorBase::findDisjointRanges(RangeSample &DisjointRanges,
                                               const RangeSample &Ranges) {
 
@@ -771,10 +745,78 @@ void ProfileGenerator::populateBoundarySamplesForAllFunctions(
   }
 }
 
+void ProfileGeneratorBase::calculateDensity(
+    const FunctionSamples &FSamples,
+    std::vector<std::pair<double, uint64_t>> &DensityList,
+    uint64_t &TotalProfileSamples) {
+  uint64_t TotalBodySamples = 0;
+  uint64_t FuncBodySize = 0;
+  for (const auto &I : FSamples.getBodySamples()) {
+    TotalBodySamples += I.second.getSamples();
+    FuncBodySize++;
+  }
+
+  // The whole function could be inlined and optimized out, use the callsite
+  // head samples instead to estimate the body count.
+  if (FuncBodySize == 0) {
+    for (const auto &CallsiteSamples : FSamples.getCallsiteSamples()) {
+      FuncBodySize++;
+      for (const auto &Callee : CallsiteSamples.second)
+        TotalBodySamples += Callee.second.getHeadSamplesEstimate();
+    }
+  }
+
+  if (FuncBodySize == 0)
+    return;
+
+  double FuncDensity = static_cast<double>(TotalBodySamples) / FuncBodySize;
+  TotalProfileSamples += TotalBodySamples;
+  DensityList.emplace_back(FuncDensity, TotalBodySamples);
+}
+
+// Calculate Profile-density:
+// Calculate the density for each function and sort them in descending order,
+// iterate them once their accumulated total samples exceeds the
+// percentage_threshold(cut-off) of total profile samples, the profile-density
+// is the last(minimum) function-density of the processed functions, which means
+// all the functions hot to perf are on good density if the profile-density is
+// good. The percentage_threshold(--profile-density-cutoff-hot) is configurable
+// depending on how much regression the system want to tolerate.
+double
+ProfileGeneratorBase::calculateDensity(const SampleProfileMap &Profiles) {
+  double ProfileDensity = 0.0;
+
+  uint64_t TotalProfileSamples = 0;
+  // A list of the function profile density and its total samples.
+  std::vector<std::pair<double, uint64_t>> FuncDensityList;
+  for (const auto &I : Profiles)
+    calculateDensity(I.second, FuncDensityList, TotalProfileSamples);
+
+  // Sorted by the density in descending order.
+  llvm::stable_sort(FuncDensityList, [&](const std::pair<double, uint64_t> &A,
+                                         const std::pair<double, uint64_t> &B) {
+    if (A.first != B.first)
+      return A.first > B.first;
+    return A.second < B.second;
+  });
+
+  uint64_t AccumulatedSamples = 0;
+  for (const auto &P : FuncDensityList) {
+    AccumulatedSamples += P.second;
+    ProfileDensity = P.first;
+    if (AccumulatedSamples >= TotalProfileSamples *
+                                  static_cast<float>(ProfileDensityCutOffHot) /
+                                  1000000)
+      break;
+  }
+
+  return ProfileDensity;
+}
+
 void ProfileGeneratorBase::calculateAndShowDensity(
     const SampleProfileMap &Profiles) {
-  double Density = calculateDensity(Profiles, HotCountThreshold);
-  showDensitySuggestion(Density, ProfileSummaryCutoffHot);
+  double Density = calculateDensity(Profiles);
+  showDensitySuggestion(Density);
 }
 
 FunctionSamples *
@@ -1035,78 +1077,6 @@ void CSProfileGenerator::convertToProfileMap() {
   IsProfileValidOnTrie = false;
 }
 
-void CSProfileGenerator::calculateAndShowDensity(
-    SampleContextTracker &CTracker) {
-  double Density = calculateDensity(CTracker);
-  showDensitySuggestion(Density, ProfileDensityHotFuncCutOff);
-}
-
-// Calculate Profile-density:
-// Sort the list of function-density in descending order and iterate them once
-// their accumulated total samples exceeds the percentage_threshold of total
-// profile samples, the profile-density is the last(minimum) function-density of
-// the processed functions, which means all the functions significant to perf
-// are on good density if the profile-density is good, or in other words, if the
-// profile-density is bad, the accumulated samples for all the bad density
-// profile exceeds the (100% - percentage_threshold).
-// The percentage_threshold(--profile-density-hot-func-cutoff) is configurable
-// depending on how much regression the system want to tolerate.
-double CSProfileGenerator::calculateDensity(SampleContextTracker &CTracker) {
-  double ProfileDensity = 0.0;
-
-  uint64_t TotalProfileSamples = 0;
-  // A list of the function profile density and total samples.
-  std::vector<std::pair<double, uint64_t>> DensityList;
-  for (const auto *Node : CTracker) {
-    const auto *FSamples = Node->getFunctionSamples();
-    if (!FSamples)
-      continue;
-
-    uint64_t TotalBodySamples = 0;
-    uint64_t FuncBodySize = 0;
-    for (const auto &I : FSamples->getBodySamples()) {
-      TotalBodySamples += I.second.getSamples();
-      FuncBodySize++;
-    }
-    // The whole function could be inlined and optimized out, use the callsite
-    // head samples instead to estimate the body count.
-    if (FuncBodySize == 0) {
-      for (const auto &CallsiteSamples : FSamples->getCallsiteSamples()) {
-        FuncBodySize++;
-        for (const auto &Callee : CallsiteSamples.second)
-          TotalBodySamples += Callee.second.getHeadSamplesEstimate();
-      }
-    }
-
-    if (FuncBodySize == 0)
-      continue;
-
-    double FuncDensity = static_cast<double>(TotalBodySamples) / FuncBodySize;
-    TotalProfileSamples += TotalBodySamples;
-    DensityList.emplace_back(FuncDensity, TotalBodySamples);
-  }
-
-  // Sorted by the density in descending order.
-  llvm::stable_sort(DensityList, [&](const std::pair<double, uint64_t> &A,
-                                     const std::pair<double, uint64_t> &B) {
-    if (A.first != B.first)
-      return A.first > B.first;
-    return A.second < B.second;
-  });
-
-  uint64_t AccumulatedSamples = 0;
-  for (const auto &P : DensityList) {
-    AccumulatedSamples += P.second;
-    ProfileDensity = P.first;
-    if (AccumulatedSamples >=
-        TotalProfileSamples * static_cast<float>(ProfileDensityHotFuncCutOff) /
-            1000000)
-      break;
-  }
-
-  return ProfileDensity;
-}
-
 void CSProfileGenerator::postProcessProfiles() {
   // Compute hot/cold threshold based on profile. This will be used for cold
   // context profile merging/trimming.
@@ -1116,7 +1086,6 @@ void CSProfileGenerator::postProcessProfiles() {
   // inline decisions.
   if (EnableCSPreInliner) {
     ContextTracker.populateFuncToCtxtMap();
-    calculateAndShowDensity(ContextTracker);
     CSPreInliner(ContextTracker, *Binary, Summary.get()).run();
     // Turn off the profile merger by default unless it is explicitly enabled.
     if (!CSProfMergeColdContext.getNumOccurrences())
@@ -1133,19 +1102,13 @@ void CSProfileGenerator::postProcessProfiles() {
             CSProfMaxColdContextDepth, EnableCSPreInliner);
   }
 
-  // Merge function samples of CS profile to calculate profile density.
-  sampleprof::SampleProfileMap ContextLessProfiles;
-  ProfileConverter::flattenProfile(ProfileMap, ContextLessProfiles, true);
-
-  if (!EnableCSPreInliner)
-    ProfileGeneratorBase::calculateAndShowDensity(ContextLessProfiles);
-
   if (GenCSNestedProfile) {
     ProfileConverter CSConverter(ProfileMap);
     CSConverter.convertCSProfiles();
     FunctionSamples::ProfileIsCS = false;
   }
   filterAmbiguousProfile(ProfileMap);
+  ProfileGeneratorBase::calculateAndShowDensity(ProfileMap);
 }
 
 void ProfileGeneratorBase::computeSummaryAndThreshold(
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h
index cf451f9d1a1a4..d40a37d658829 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.h
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.h
@@ -116,12 +116,15 @@ class ProfileGeneratorBase {
 
   void computeSummaryAndThreshold(SampleProfileMap &ProfileMap);
 
-  void calculateAndShowDensity(const SampleProfileMap &Profiles);
+  void calculateDensity(const FunctionSamples &FSamples,
+                        std::vector<std::pair<double, uint64_t>> &DensityList,
+                        uint64_t &TotalProfileSamples);
+
+  double calculateDensity(const SampleProfileMap &Profiles);
 
-  double calculateDensity(const SampleProfileMap &Profiles,
-                          uint64_t HotCntThreshold);
+  void calculateAndShowDensity(const SampleProfileMap &Profiles);
 
-  void showDensitySuggestion(double Density, int DensityCutoffHot);
+  void showDensitySuggestion(double Density);
 
   void collectProfiledFunctions();
 
@@ -363,9 +366,6 @@ class CSProfileGenerator : public ProfileGeneratorBase {
 
   void computeSummaryAndThreshold();
 
-  void calculateAndShowDensity(SampleContextTracker &CTracker);
-  double calculateDensity(SampleContextTracker &CTracker);
-
   bool collectFunctionsFromLLVMProfile(
       std::unordered_set<const BinaryFunction *> &ProfiledFunctions) override;
 

>From 1a4679a9128a2f60bcd2158634326da6fe223821 Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Fri, 17 May 2024 13:48:47 -0700
Subject: [PATCH 3/8] fix missing callee sample

---
 llvm/tools/llvm-profgen/ProfileGenerator.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index e3e856ead918c..0bdf543d2f39a 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -761,8 +761,10 @@ void ProfileGeneratorBase::calculateDensity(
   if (FuncBodySize == 0) {
     for (const auto &CallsiteSamples : FSamples.getCallsiteSamples()) {
       FuncBodySize++;
-      for (const auto &Callee : CallsiteSamples.second)
+      for (const auto &Callee : CallsiteSamples.second) {
+        calculateDensity(Callee.second, DensityList, TotalProfileSamples);
         TotalBodySamples += Callee.second.getHeadSamplesEstimate();
+      }
     }
   }
 

>From 9cef0d9ab287a2b3561a5656a4f7ac3872e492f8 Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Wed, 22 May 2024 10:21:30 -0700
Subject: [PATCH 4/8] change to binary-level density

---
 .../tools/llvm-profgen/profile-density.test   |  9 +-
 llvm/tools/llvm-profgen/ProfileGenerator.cpp  | 83 +++++++++++--------
 llvm/tools/llvm-profgen/ProfileGenerator.h    |  6 +-
 3 files changed, 57 insertions(+), 41 deletions(-)

diff --git a/llvm/test/tools/llvm-profgen/profile-density.test b/llvm/test/tools/llvm-profgen/profile-density.test
index e8bcc9a3a5028..05190c949597a 100644
--- a/llvm/test/tools/llvm-profgen/profile-density.test
+++ b/llvm/test/tools/llvm-profgen/profile-density.test
@@ -1,14 +1,17 @@
-; RUN: llvm-profgen --format=text --unsymbolized-profile=%S/Inputs/profile-density.raw.prof --binary=%S/Inputs/inline-noprobe2.perfbin --output=%t1 --use-offset=0 --show-density -hot-function-density-threshold=10  --trim-cold-profile=0 &> %t2
+; RUN: llvm-profgen --format=text --unsymbolized-profile=%S/Inputs/profile-density.raw.prof --binary=%S/Inputs/inline-noprobe2.perfbin --output=%t1 --use-offset=0 --show-density -profile-density-threshold=10  --trim-cold-profile=0 &> %t2
 ; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK-DENSITY
-
-; RUN: llvm-profgen --format=text --unsymbolized-profile=%S/Inputs/profile-density-cs.raw.prof --binary=%S/Inputs/inline-noprobe2.perfbin --output=%t3 --show-density -hot-function-density-threshold=1 &> %t4
+; RUN: llvm-profgen --format=text --unsymbolized-profile=%S/Inputs/profile-density-cs.raw.prof --binary=%S/Inputs/inline-noprobe2.perfbin --output=%t3 --show-density -profile-density-threshold=1 &> %t4
 ; RUN: FileCheck %s --input-file %t4 --check-prefix=CHECK-DENSITY-CS
+; RUN: llvm-profgen --format=text --unsymbolized-profile=%S/Inputs/profile-density-cs.raw.prof --binary=%S/Inputs/inline-noprobe2.perfbin --output=%t5 --show-density -profile-density-threshold=1 -profile-density-cutoff-hot=800000  &> %t6
+; RUN: FileCheck %s --input-file %t6 --check-prefix=CHECK-DENSITY-CS-80
 
 ;CHECK-DENSITY: Sample PGO is estimated to optimize better with 2.9x more samples. Please consider increasing sampling rate or profiling for longer duration to get more samples.
 ;CHECK-DENSITY: Functions with density >= 3.5 account for 99.00% total sample counts.
 
 ;CHECK-DENSITY-CS: Functions with density >= 800.1 account for 99.00% total sample counts.
 
+;CHECK-DENSITY-CS-80: Functions with density >= 1860.5 account for 80.00% total sample counts.
+
 ; original code:
 ; clang -O3 -g -fno-optimize-sibling-calls -fdebug-info-for-profiling qsort.c -o a.out
 #include <stdio.h>
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index 0bdf543d2f39a..c7b6f859d881c 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -75,9 +75,11 @@ static cl::opt<int, true> CSProfMaxContextDepth(
              "depth limit."),
     cl::location(llvm::sampleprof::CSProfileGenerator::MaxContextDepth));
 
-static cl::opt<double> HotFunctionDensityThreshold(
-    "hot-function-density-threshold", llvm::cl::init(20),
-    llvm::cl::desc("specify density threshold for hot functions (default: 20)"),
+static cl::opt<double> ProfileDensityThreshold(
+    "profile-density-threshold", llvm::cl::init(20),
+    llvm::cl::desc(
+        "Set the profile density threshold(default: 20), which is used to "
+        "provide suggestions for user to increase the sampling rate.\n"),
     llvm::cl::Optional);
 static cl::opt<bool> ShowDensity("show-density", llvm::cl::init(false),
                                  llvm::cl::desc("show profile density details"),
@@ -182,12 +184,13 @@ void ProfileGeneratorBase::write() {
 
 void ProfileGeneratorBase::showDensitySuggestion(double Density) {
   if (Density == 0.0)
-    WithColor::warning() << "The --profile-density-cutoff-hot option may be "
+    WithColor::warning() << "The output profile is empty or the "
+                            "--profile-density-cutoff-hot option is "
                             "set too low. Please check your command.\n";
-  else if (Density < HotFunctionDensityThreshold)
+  else if (Density < ProfileDensityThreshold)
     WithColor::warning()
         << "Sample PGO is estimated to optimize better with "
-        << format("%.1f", HotFunctionDensityThreshold / Density)
+        << format("%.1f", ProfileDensityThreshold / Density)
         << "x more samples. Please consider increasing sampling rate or "
            "profiling for longer duration to get more samples.\n";
 
@@ -745,12 +748,15 @@ void ProfileGenerator::populateBoundarySamplesForAllFunctions(
   }
 }
 
-void ProfileGeneratorBase::calculateDensity(
-    const FunctionSamples &FSamples,
-    std::vector<std::pair<double, uint64_t>> &DensityList,
-    uint64_t &TotalProfileSamples) {
-  uint64_t TotalBodySamples = 0;
-  uint64_t FuncBodySize = 0;
+// Note taht ideally the size should be the number of function's instruction.
+// However, for probe-based profile, we don't have the accurate instruction
+// count for each probe, Instead, the probe sample is the samples count for the
+// block, which is equivelant to total_instruction_samples/num_instruction in
+// one block. Hence, we use the number of probe as a proxy for the function's
+// size.
+void ProfileGeneratorBase::calculateBodySamplesAndSize(
+    const FunctionSamples &FSamples, uint64_t &TotalBodySamples,
+    uint64_t &FuncBodySize) {
   for (const auto &I : FSamples.getBodySamples()) {
     TotalBodySamples += I.second.getSamples();
     FuncBodySize++;
@@ -758,27 +764,21 @@ void ProfileGeneratorBase::calculateDensity(
 
   // The whole function could be inlined and optimized out, use the callsite
   // head samples instead to estimate the body count.
-  if (FuncBodySize == 0) {
-    for (const auto &CallsiteSamples : FSamples.getCallsiteSamples()) {
-      FuncBodySize++;
-      for (const auto &Callee : CallsiteSamples.second) {
-        calculateDensity(Callee.second, DensityList, TotalProfileSamples);
-        TotalBodySamples += Callee.second.getHeadSamplesEstimate();
-      }
+  for (const auto &CallsiteSamples : FSamples.getCallsiteSamples()) {
+    FuncBodySize++;
+    for (const auto &Callee : CallsiteSamples.second) {
+      // This is used for caluculating the binary-level density, so the
+      // inlinees' samples and size should be included in the calculation.
+      calculateBodySamplesAndSize(Callee.second, TotalBodySamples,
+                                  FuncBodySize);
+      TotalBodySamples += Callee.second.getHeadSamplesEstimate();
     }
   }
-
-  if (FuncBodySize == 0)
-    return;
-
-  double FuncDensity = static_cast<double>(TotalBodySamples) / FuncBodySize;
-  TotalProfileSamples += TotalBodySamples;
-  DensityList.emplace_back(FuncDensity, TotalBodySamples);
 }
 
 // Calculate Profile-density:
 // Calculate the density for each function and sort them in descending order,
-// iterate them once their accumulated total samples exceeds the
+// keep accumulating their total samples unitl it exceeds the
 // percentage_threshold(cut-off) of total profile samples, the profile-density
 // is the last(minimum) function-density of the processed functions, which means
 // all the functions hot to perf are on good density if the profile-density is
@@ -791,8 +791,18 @@ ProfileGeneratorBase::calculateDensity(const SampleProfileMap &Profiles) {
   uint64_t TotalProfileSamples = 0;
   // A list of the function profile density and its total samples.
   std::vector<std::pair<double, uint64_t>> FuncDensityList;
-  for (const auto &I : Profiles)
-    calculateDensity(I.second, FuncDensityList, TotalProfileSamples);
+  for (const auto &I : Profiles) {
+    uint64_t TotalBodySamples = 0;
+    uint64_t FuncBodySize = 0;
+    calculateBodySamplesAndSize(I.second, TotalBodySamples, FuncBodySize);
+
+    if (FuncBodySize == 0)
+      continue;
+
+    double FuncDensity = static_cast<double>(TotalBodySamples) / FuncBodySize;
+    TotalProfileSamples += TotalBodySamples;
+    FuncDensityList.emplace_back(FuncDensity, TotalBodySamples);
+  }
 
   // Sorted by the density in descending order.
   llvm::stable_sort(FuncDensityList, [&](const std::pair<double, uint64_t> &A,
@@ -803,13 +813,16 @@ ProfileGeneratorBase::calculateDensity(const SampleProfileMap &Profiles) {
   });
 
   uint64_t AccumulatedSamples = 0;
-  for (const auto &P : FuncDensityList) {
-    AccumulatedSamples += P.second;
-    ProfileDensity = P.first;
-    if (AccumulatedSamples >= TotalProfileSamples *
+  uint32_t I = 0;
+  assert(ProfileDensityCutOffHot <= 1000000 &&
+         "The cutoff value is greater than 1000000(100%)");
+  while (AccumulatedSamples < TotalProfileSamples *
                                   static_cast<float>(ProfileDensityCutOffHot) /
-                                  1000000)
-      break;
+                                  1000000 &&
+         I < FuncDensityList.size()) {
+    AccumulatedSamples += FuncDensityList[I].second;
+    ProfileDensity = FuncDensityList[I].first;
+    I++;
   }
 
   return ProfileDensity;
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h
index d40a37d658829..5e36128530cd9 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.h
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.h
@@ -116,9 +116,9 @@ class ProfileGeneratorBase {
 
   void computeSummaryAndThreshold(SampleProfileMap &ProfileMap);
 
-  void calculateDensity(const FunctionSamples &FSamples,
-                        std::vector<std::pair<double, uint64_t>> &DensityList,
-                        uint64_t &TotalProfileSamples);
+  void calculateBodySamplesAndSize(const FunctionSamples &FSamples,
+                                   uint64_t &TotalBodySamples,
+                                   uint64_t &FuncBodySize);
 
   double calculateDensity(const SampleProfileMap &Profiles);
 

>From a42f48051a9c9edabbfee8aa599564fb36bc7c0e Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Wed, 22 May 2024 13:56:21 -0700
Subject: [PATCH 5/8] set profile density threshold to 50 and address other
 comments

---
 .../tools/llvm-profgen/profile-density.test   |  3 ++-
 llvm/tools/llvm-profgen/ProfileGenerator.cpp  | 24 +++++++++----------
 2 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/llvm/test/tools/llvm-profgen/profile-density.test b/llvm/test/tools/llvm-profgen/profile-density.test
index 05190c949597a..14a6a77a5983b 100644
--- a/llvm/test/tools/llvm-profgen/profile-density.test
+++ b/llvm/test/tools/llvm-profgen/profile-density.test
@@ -1,6 +1,6 @@
 ; RUN: llvm-profgen --format=text --unsymbolized-profile=%S/Inputs/profile-density.raw.prof --binary=%S/Inputs/inline-noprobe2.perfbin --output=%t1 --use-offset=0 --show-density -profile-density-threshold=10  --trim-cold-profile=0 &> %t2
 ; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK-DENSITY
-; RUN: llvm-profgen --format=text --unsymbolized-profile=%S/Inputs/profile-density-cs.raw.prof --binary=%S/Inputs/inline-noprobe2.perfbin --output=%t3 --show-density -profile-density-threshold=1 &> %t4
+; RUN: llvm-profgen --format=text --unsymbolized-profile=%S/Inputs/profile-density-cs.raw.prof --binary=%S/Inputs/inline-noprobe2.perfbin --output=%t3 --show-density -profile-density-threshold=1 -profile-density-threshold=10000 &> %t4
 ; RUN: FileCheck %s --input-file %t4 --check-prefix=CHECK-DENSITY-CS
 ; RUN: llvm-profgen --format=text --unsymbolized-profile=%S/Inputs/profile-density-cs.raw.prof --binary=%S/Inputs/inline-noprobe2.perfbin --output=%t5 --show-density -profile-density-threshold=1 -profile-density-cutoff-hot=800000  &> %t6
 ; RUN: FileCheck %s --input-file %t6 --check-prefix=CHECK-DENSITY-CS-80
@@ -8,6 +8,7 @@
 ;CHECK-DENSITY: Sample PGO is estimated to optimize better with 2.9x more samples. Please consider increasing sampling rate or profiling for longer duration to get more samples.
 ;CHECK-DENSITY: Functions with density >= 3.5 account for 99.00% total sample counts.
 
+;CHECK-DENSITY-CS: Sample PGO is estimated to optimize better with 12.5x more samples. Please consider increasing sampling rate or profiling for longer duration to get more samples.
 ;CHECK-DENSITY-CS: Functions with density >= 800.1 account for 99.00% total sample counts.
 
 ;CHECK-DENSITY-CS-80: Functions with density >= 1860.5 account for 80.00% total sample counts.
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index c7b6f859d881c..8ee0ccff22a5f 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -76,10 +76,9 @@ static cl::opt<int, true> CSProfMaxContextDepth(
     cl::location(llvm::sampleprof::CSProfileGenerator::MaxContextDepth));
 
 static cl::opt<double> ProfileDensityThreshold(
-    "profile-density-threshold", llvm::cl::init(20),
-    llvm::cl::desc(
-        "Set the profile density threshold(default: 20), which is used to "
-        "provide suggestions for user to increase the sampling rate.\n"),
+    "profile-density-threshold", llvm::cl::init(50),
+    llvm::cl::desc("If the profile density is below the given threshold, it "
+                   "will be suggested to increase the sampling rate."),
     llvm::cl::Optional);
 static cl::opt<bool> ShowDensity("show-density", llvm::cl::init(false),
                                  llvm::cl::desc("show profile density details"),
@@ -748,24 +747,24 @@ void ProfileGenerator::populateBoundarySamplesForAllFunctions(
   }
 }
 
-// Note taht ideally the size should be the number of function's instruction.
+// Note that ideally the size should be the number of function instruction.
 // However, for probe-based profile, we don't have the accurate instruction
-// count for each probe, Instead, the probe sample is the samples count for the
-// block, which is equivelant to total_instruction_samples/num_instruction in
+// count for each probe, instead, the probe sample is the samples count for the
+// block, which is equivelant to total_instruction_samples/num_of_instruction in
 // one block. Hence, we use the number of probe as a proxy for the function's
 // size.
 void ProfileGeneratorBase::calculateBodySamplesAndSize(
     const FunctionSamples &FSamples, uint64_t &TotalBodySamples,
     uint64_t &FuncBodySize) {
-  for (const auto &I : FSamples.getBodySamples()) {
+  FuncBodySize +=
+      FSamples.getBodySamples().size() + FSamples.getCallsiteSamples().size();
+
+  for (const auto &I : FSamples.getBodySamples())
     TotalBodySamples += I.second.getSamples();
-    FuncBodySize++;
-  }
 
   // The whole function could be inlined and optimized out, use the callsite
   // head samples instead to estimate the body count.
-  for (const auto &CallsiteSamples : FSamples.getCallsiteSamples()) {
-    FuncBodySize++;
+  for (const auto &CallsiteSamples : FSamples.getCallsiteSamples())
     for (const auto &Callee : CallsiteSamples.second) {
       // This is used for caluculating the binary-level density, so the
       // inlinees' samples and size should be included in the calculation.
@@ -773,7 +772,6 @@ void ProfileGeneratorBase::calculateBodySamplesAndSize(
                                   FuncBodySize);
       TotalBodySamples += Callee.second.getHeadSamplesEstimate();
     }
-  }
 }
 
 // Calculate Profile-density:

>From 5974082a7060ebfe910fc6b958ef01603e755527 Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Wed, 22 May 2024 16:20:38 -0700
Subject: [PATCH 6/8] fix comment

---
 llvm/tools/llvm-profgen/ProfileGenerator.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index 8ee0ccff22a5f..98b12de1fd13a 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -766,8 +766,8 @@ void ProfileGeneratorBase::calculateBodySamplesAndSize(
   // head samples instead to estimate the body count.
   for (const auto &CallsiteSamples : FSamples.getCallsiteSamples())
     for (const auto &Callee : CallsiteSamples.second) {
-      // This is used for caluculating the binary-level density, so the
-      // inlinees' samples and size should be included in the calculation.
+      // For binary-level density, the inlinees' samples and size should be
+      // included in the calculation.
       calculateBodySamplesAndSize(Callee.second, TotalBodySamples,
                                   FuncBodySize);
       TotalBodySamples += Callee.second.getHeadSamplesEstimate();

>From 7769d0fa8b0cb4724d7362e9667fea1dd4f366ea Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Thu, 23 May 2024 10:34:24 -0700
Subject: [PATCH 7/8] fix body samples and update comments

---
 .../tools/llvm-profgen/profile-density.test   |  2 +-
 llvm/tools/llvm-profgen/ProfileGenerator.cpp  | 30 +++++++++++--------
 2 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/llvm/test/tools/llvm-profgen/profile-density.test b/llvm/test/tools/llvm-profgen/profile-density.test
index 14a6a77a5983b..086697e8da0a5 100644
--- a/llvm/test/tools/llvm-profgen/profile-density.test
+++ b/llvm/test/tools/llvm-profgen/profile-density.test
@@ -11,7 +11,7 @@
 ;CHECK-DENSITY-CS: Sample PGO is estimated to optimize better with 12.5x more samples. Please consider increasing sampling rate or profiling for longer duration to get more samples.
 ;CHECK-DENSITY-CS: Functions with density >= 800.1 account for 99.00% total sample counts.
 
-;CHECK-DENSITY-CS-80: Functions with density >= 1860.5 account for 80.00% total sample counts.
+;CHECK-DENSITY-CS-80: Functions with density >= 1886.2 account for 80.00% total sample counts.
 
 ; original code:
 ; clang -O3 -g -fno-optimize-sibling-calls -fdebug-info-for-profiling qsort.c -o a.out
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index 98b12de1fd13a..fdc1f1e9e79aa 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -747,30 +747,36 @@ void ProfileGenerator::populateBoundarySamplesForAllFunctions(
   }
 }
 
-// Note that ideally the size should be the number of function instruction.
-// However, for probe-based profile, we don't have the accurate instruction
-// count for each probe, instead, the probe sample is the samples count for the
-// block, which is equivelant to total_instruction_samples/num_of_instruction in
-// one block. Hence, we use the number of probe as a proxy for the function's
-// size.
 void ProfileGeneratorBase::calculateBodySamplesAndSize(
     const FunctionSamples &FSamples, uint64_t &TotalBodySamples,
     uint64_t &FuncBodySize) {
-  FuncBodySize +=
-      FSamples.getBodySamples().size() + FSamples.getCallsiteSamples().size();
-
+  // Note that ideally the size should be the number of function instruction.
+  // However, for probe-based profile, we don't have the accurate instruction
+  // count for each probe, instead, the probe sample is the samples count for
+  // the block, which is equivelant to
+  // total_instruction_samples/num_of_instruction in one block. Hence, we use
+  // the number of probe as a proxy for the function's size.
+  FuncBodySize += FSamples.getBodySamples().size();
+
+  // The accumulated body samples re-calculated here could be different from the
+  // TotalSamples(getTotalSamples) field of FunctionSamples for line-number
+  // based profile. The reason is that TotalSamples is the sum of all the
+  // samples of the machine instruction in one source-code line, however, the
+  // entry of Bodysamples is the only max number of them, so the TotalSamples is
+  // usually much bigger than the accumulated body samples as one souce-code
+  // line can emit many machine instructions. We observed a regression when we
+  // switched to use the accumulated body samples(by using
+  // -update-total-samples). Hence, it's safer to re-calculate here to avoid
+  // such discrepancy.
   for (const auto &I : FSamples.getBodySamples())
     TotalBodySamples += I.second.getSamples();
 
-  // The whole function could be inlined and optimized out, use the callsite
-  // head samples instead to estimate the body count.
   for (const auto &CallsiteSamples : FSamples.getCallsiteSamples())
     for (const auto &Callee : CallsiteSamples.second) {
       // For binary-level density, the inlinees' samples and size should be
       // included in the calculation.
       calculateBodySamplesAndSize(Callee.second, TotalBodySamples,
                                   FuncBodySize);
-      TotalBodySamples += Callee.second.getHeadSamplesEstimate();
     }
 }
 

>From 4539a39060549a370f3e1cb31e78a01877de2975 Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Thu, 23 May 2024 12:20:09 -0700
Subject: [PATCH 8/8] update comments

---
 llvm/tools/llvm-profgen/ProfileGenerator.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index fdc1f1e9e79aa..2118e954fe543 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -767,7 +767,8 @@ void ProfileGeneratorBase::calculateBodySamplesAndSize(
   // line can emit many machine instructions. We observed a regression when we
   // switched to use the accumulated body samples(by using
   // -update-total-samples). Hence, it's safer to re-calculate here to avoid
-  // such discrepancy.
+  // such discrepancy. There is no problem for probe-based profile, as the
+  // TotalSamples is exactly the same as the accumulated body samples.
   for (const auto &I : FSamples.getBodySamples())
     TotalBodySamples += I.second.getSamples();
 



More information about the llvm-commits mailing list