[llvm] [llvm-profgen] Improve sample profile density (PR #92144)
Lei Wang via llvm-commits
llvm-commits at lists.llvm.org
Tue May 14 17:46:01 PDT 2024
https://github.com/wlei-llvm updated https://github.com/llvm/llvm-project/pull/92144
>From 441a16d95c2deb4b50641241e283891d7765c50b Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Mon, 13 May 2024 13:57:02 -0700
Subject: [PATCH] improve profile density
---
.../tools/llvm-profgen/profile-density.test | 2 +-
llvm/tools/llvm-profgen/ProfileGenerator.cpp | 90 +++++++++++++++++--
llvm/tools/llvm-profgen/ProfileGenerator.h | 5 +-
3 files changed, 89 insertions(+), 8 deletions(-)
diff --git a/llvm/test/tools/llvm-profgen/profile-density.test b/llvm/test/tools/llvm-profgen/profile-density.test
index 0eb83838d16e7..f22c6f04914aa 100644
--- a/llvm/test/tools/llvm-profgen/profile-density.test
+++ b/llvm/test/tools/llvm-profgen/profile-density.test
@@ -7,7 +7,7 @@
;CHECK-DENSITY: Sample PGO is estimated to optimize better with 3.1x more samples. Please consider increasing sampling rate or profiling for longer duration to get more samples.
;CHECK-DENSITY: Minimum profile density for hot functions with top 99.00% total samples: 3.2
-;CHECK-DENSITY-CS: Minimum profile density for hot functions with top 99.00% total samples: 128.3
+;CHECK-DENSITY-CS: Minimum profile density for hot functions with top 99.00% total samples: 619.0
; original code:
; clang -O3 -g -fno-optimize-sibling-calls -fdebug-info-for-profiling qsort.c -o a.out
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index 5aa44108f9660..ecbc6763e56f1 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -83,6 +83,10 @@ static cl::opt<double> HotFunctionDensityThreshold(
static cl::opt<bool> ShowDensity("show-density", llvm::cl::init(false),
llvm::cl::desc("show profile density details"),
llvm::cl::Optional);
+static cl::opt<int> ProfileDensityHotFuncCutOff(
+ "profile-density-hot-func-cutoff", llvm::cl::init(990000),
+ llvm::cl::desc("Total sample cutoff for hot functions used to calculate "
+ "the profile density."));
static cl::opt<bool> UpdateTotalSamples(
"update-total-samples", llvm::cl::init(false),
@@ -177,7 +181,8 @@ void ProfileGeneratorBase::write() {
write(std::move(WriterOrErr.get()), ProfileMap);
}
-void ProfileGeneratorBase::showDensitySuggestion(double Density) {
+void ProfileGeneratorBase::showDensitySuggestion(double Density,
+ int DensityCutoffHot) {
if (Density == 0.0)
WithColor::warning() << "The --profile-summary-cutoff-hot option may be "
"set too low. Please check your command.\n";
@@ -190,9 +195,7 @@ void ProfileGeneratorBase::showDensitySuggestion(double Density) {
if (ShowDensity)
outs() << "Minimum profile density for hot functions with top "
- << format("%.2f",
- static_cast<double>(ProfileSummaryCutoffHot.getValue()) /
- 10000)
+ << format("%.2f", static_cast<double>(DensityCutoffHot) / 10000)
<< "% total samples: " << format("%.1f", Density) << "\n";
}
@@ -771,7 +774,7 @@ void ProfileGenerator::populateBoundarySamplesForAllFunctions(
void ProfileGeneratorBase::calculateAndShowDensity(
const SampleProfileMap &Profiles) {
double Density = calculateDensity(Profiles, HotCountThreshold);
- showDensitySuggestion(Density);
+ showDensitySuggestion(Density, ProfileSummaryCutoffHot);
}
FunctionSamples *
@@ -1032,6 +1035,78 @@ void CSProfileGenerator::convertToProfileMap() {
IsProfileValidOnTrie = false;
}
+void CSProfileGenerator::calculateAndShowDensity(
+ SampleContextTracker &CTracker) {
+ double Density = calculateDensity(CTracker);
+ showDensitySuggestion(Density, ProfileDensityHotFuncCutOff);
+}
+
+// Calculate Profile-density:
+// Sort the list of function-density in descending order and iterate them once
+// their accumulated total samples exceeds the percentage_threshold of total
+// profile samples, the profile-density is the last(minimum) function-density of
+// the processed functions, which means all the functions significant to perf
+// are on good density if the profile-density is good, or in other words, if the
+// profile-density is bad, the accumulated samples for all the bad density
+// profile exceeds the (100% - percentage_threshold).
+// The percentage_threshold(--profile-density-hot-func-cutoff) is configurable
+// depending on how much regression the system want to tolerate.
+double CSProfileGenerator::calculateDensity(SampleContextTracker &CTracker) {
+ double ProfileDensity = 0.0;
+
+ uint64_t TotalProfileSamples = 0;
+ // A list of the function profile density and total samples.
+ std::vector<std::pair<double, uint64_t>> DensityList;
+ for (const auto *Node : CTracker) {
+ const auto *FSamples = Node->getFunctionSamples();
+ if (!FSamples)
+ continue;
+
+ uint64_t TotalBodySamples = 0;
+ uint64_t FuncBodySize = 0;
+ for (const auto &I : FSamples->getBodySamples()) {
+ TotalBodySamples += I.second.getSamples();
+ FuncBodySize++;
+ }
+ // The whole function could be inlined and optimized out, use the callsite
+ // head samples instead to estimate the body count.
+ if (FuncBodySize == 0) {
+ for (const auto &CallsiteSamples : FSamples->getCallsiteSamples()) {
+ FuncBodySize++;
+ for (const auto &Callee : CallsiteSamples.second)
+ TotalBodySamples += Callee.second.getHeadSamplesEstimate();
+ }
+ }
+
+ if (FuncBodySize == 0)
+ continue;
+
+ double FuncDensity = static_cast<double>(TotalBodySamples) / FuncBodySize;
+ TotalProfileSamples += TotalBodySamples;
+ DensityList.emplace_back(FuncDensity, TotalBodySamples);
+ }
+
+ // Sorted by the density in descending order.
+ llvm::stable_sort(DensityList, [&](const std::pair<double, uint64_t> &A,
+ const std::pair<double, uint64_t> &B) {
+ if (A.first != B.first)
+ return A.first > B.first;
+ return A.second < B.second;
+ });
+
+ uint64_t AccumulatedSamples = 0;
+ for (const auto &P : DensityList) {
+ AccumulatedSamples += P.second;
+ ProfileDensity = P.first;
+ if (AccumulatedSamples >=
+ TotalProfileSamples * static_cast<float>(ProfileDensityHotFuncCutOff) /
+ 1000000)
+ break;
+ }
+
+ return ProfileDensity;
+}
+
void CSProfileGenerator::postProcessProfiles() {
// Compute hot/cold threshold based on profile. This will be used for cold
// context profile merging/trimming.
@@ -1041,6 +1116,7 @@ void CSProfileGenerator::postProcessProfiles() {
// inline decisions.
if (EnableCSPreInliner) {
ContextTracker.populateFuncToCtxtMap();
+ calculateAndShowDensity(ContextTracker);
CSPreInliner(ContextTracker, *Binary, Summary.get()).run();
// Turn off the profile merger by default unless it is explicitly enabled.
if (!CSProfMergeColdContext.getNumOccurrences())
@@ -1061,7 +1137,9 @@ void CSProfileGenerator::postProcessProfiles() {
sampleprof::SampleProfileMap ContextLessProfiles;
ProfileConverter::flattenProfile(ProfileMap, ContextLessProfiles, true);
- calculateAndShowDensity(ContextLessProfiles);
+ if (!EnableCSPreInliner)
+ ProfileGeneratorBase::calculateAndShowDensity(ContextLessProfiles);
+
if (GenCSNestedProfile) {
ProfileConverter CSConverter(ProfileMap);
CSConverter.convertCSProfiles();
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h
index d258fb78bfb11..cf451f9d1a1a4 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.h
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.h
@@ -121,7 +121,7 @@ class ProfileGeneratorBase {
double calculateDensity(const SampleProfileMap &Profiles,
uint64_t HotCntThreshold);
- void showDensitySuggestion(double Density);
+ void showDensitySuggestion(double Density, int DensityCutoffHot);
void collectProfiledFunctions();
@@ -363,6 +363,9 @@ class CSProfileGenerator : public ProfileGeneratorBase {
void computeSummaryAndThreshold();
+ void calculateAndShowDensity(SampleContextTracker &CTracker);
+ double calculateDensity(SampleContextTracker &CTracker);
+
bool collectFunctionsFromLLVMProfile(
std::unordered_set<const BinaryFunction *> &ProfiledFunctions) override;
More information about the llvm-commits
mailing list