[llvm] [BOLT][NFCI] Emit uniform diagnostics in DataAggregator (PR #136530)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 21 05:37:17 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-bolt
Author: Amir Ayupov (aaupov)
<details>
<summary>Changes</summary>
DataAggregator supports reading different kinds of profile data:
- perf data: branch records or IP samples,
- pre-aggregated branch data.
Make profile quality reporting uniform across all kinds of input:
- out-of-range and mismatching samples,
- samples in cold code in BAT mode (profiled BOLTed binary).
Test Plan: NFCI
---
Full diff: https://github.com/llvm/llvm-project/pull/136530.diff
2 Files Affected:
- (modified) bolt/include/bolt/Profile/DataAggregator.h (+12-3)
- (modified) bolt/lib/Profile/DataAggregator.cpp (+86-122)
``````````diff
diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h
index 56eb463fc98fc..79a91861554df 100644
--- a/bolt/include/bolt/Profile/DataAggregator.h
+++ b/bolt/include/bolt/Profile/DataAggregator.h
@@ -223,7 +223,8 @@ class DataAggregator : public DataReader {
bool recordExit(BinaryFunction &BF, uint64_t From, bool Mispred,
uint64_t Count = 1) const;
- /// Aggregation statistics
+ /// Branch stacks aggregation statistics
+ uint64_t NumTraces{0};
uint64_t NumInvalidTraces{0};
uint64_t NumLongRangeTraces{0};
/// Specifies how many samples were recorded in cold areas if we are dealing
@@ -231,6 +232,7 @@ class DataAggregator : public DataReader {
/// for the source of the branch to avoid counting cold activity twice (one
/// for source and another for destination).
uint64_t NumColdSamples{0};
+ uint64_t NumTotalSamples{0};
/// Looks into system PATH for Linux Perf and set up the aggregator to use it
void findPerfExecutable();
@@ -327,8 +329,8 @@ class DataAggregator : public DataReader {
/// Parse a single LBR entry as output by perf script -Fbrstack
ErrorOr<LBREntry> parseLBREntry();
- /// Parse LBR sample, returns the number of traces.
- uint64_t parseLBRSample(const PerfBranchSample &Sample, bool NeedsSkylakeFix);
+ /// Parse LBR sample.
+ void parseLBRSample(const PerfBranchSample &Sample, bool NeedsSkylakeFix);
/// Parse and pre-aggregate branch events.
std::error_code parseBranchEvents();
@@ -487,6 +489,13 @@ class DataAggregator : public DataReader {
void dump(const PerfBranchSample &Sample) const;
void dump(const PerfMemSample &Sample) const;
+ /// Profile diagnostics print methods
+ void printColdSamplesDiagnostic() const;
+ void printLongRangeTracesDiagnostic() const;
+ void printBranchSamplesDiagnostics() const;
+ void printBasicSamplesDiagnostics(uint64_t OutOfRangeSamples) const;
+ void printBranchStacksDiagnostics(uint64_t IgnoredSamples) const;
+
public:
/// If perf.data was collected without build ids, the buildid-list may contain
/// incomplete entries. Return true if the buffer containing
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index f016576ff61af..1127cc1f9331a 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -1422,9 +1422,8 @@ std::error_code DataAggregator::printLBRHeatMap() {
return std::error_code();
}
-uint64_t DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
- bool NeedsSkylakeFix) {
- uint64_t NumTraces{0};
+void DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
+ bool NeedsSkylakeFix) {
// LBRs are stored in reverse execution order. NextLBR refers to the next
// executed branch record.
const LBREntry *NextLBR = nullptr;
@@ -1487,7 +1486,83 @@ uint64_t DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
++Info.TakenCount;
Info.MispredCount += LBR.Mispred;
}
- return NumTraces;
+}
+
+void DataAggregator::printColdSamplesDiagnostic() const {
+ if (NumColdSamples > 0) {
+ const float ColdSamples = NumColdSamples * 100.0f / NumTotalSamples;
+ outs() << "PERF2BOLT: " << NumColdSamples
+ << format(" (%.1f%%)", ColdSamples)
+ << " samples recorded in cold regions of split functions.\n";
+ if (ColdSamples > 5.0f)
+ outs()
+ << "WARNING: The BOLT-processed binary where samples were collected "
+ "likely used bad data or your service observed a large shift in "
+ "profile. You may want to audit this.\n";
+ }
+}
+
+void DataAggregator::printLongRangeTracesDiagnostic() const {
+ outs() << "PERF2BOLT: out of range traces involving unknown regions: "
+ << NumLongRangeTraces;
+ if (NumTraces > 0)
+ outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
+ outs() << "\n";
+}
+
+static float printColoredPct(uint64_t Numerator, uint64_t Denominator, float T1,
+ float T2) {
+ if (Denominator == 0) {
+ outs() << "\n";
+ return 0;
+ }
+ float Percent = Numerator * 100.0f / Denominator;
+ outs() << " (";
+ if (outs().has_colors()) {
+ if (Percent > T2)
+ outs().changeColor(raw_ostream::RED);
+ else if (Percent > T1)
+ outs().changeColor(raw_ostream::YELLOW);
+ else
+ outs().changeColor(raw_ostream::GREEN);
+ }
+ outs() << format("%.1f%%", Percent);
+ if (outs().has_colors())
+ outs().resetColor();
+ outs() << ")\n";
+ return Percent;
+}
+
+void DataAggregator::printBranchSamplesDiagnostics() const {
+ outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
+ << NumInvalidTraces;
+ if (printColoredPct(NumInvalidTraces, NumTraces, 5, 10) > 10)
+ outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
+ "binary is probably not the same binary used during profiling "
+ "collection. The generated data may be ineffective for improving "
+ "performance.\n\n";
+ printLongRangeTracesDiagnostic();
+ printColdSamplesDiagnostic();
+}
+
+void DataAggregator::printBasicSamplesDiagnostics(
+ uint64_t OutOfRangeSamples) const {
+ outs() << "PERF2BOLT: out of range samples recorded in unknown regions: "
+ << OutOfRangeSamples;
+ if (printColoredPct(OutOfRangeSamples, NumTotalSamples, 40, 60) > 80)
+ outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
+ "binary is probably not the same binary used during profiling "
+ "collection. The generated data may be ineffective for improving "
+ "performance.\n\n";
+ printColdSamplesDiagnostic();
+}
+
+void DataAggregator::printBranchStacksDiagnostics(
+ uint64_t IgnoredSamples) const {
+ outs() << "PERF2BOLT: ignored samples: " << IgnoredSamples;
+ if (printColoredPct(IgnoredSamples, NumTotalSamples, 20, 50) > 50)
+ errs() << "PERF2BOLT-WARNING: less than 50% of all recorded samples "
+ "were attributed to the input binary\n";
}
std::error_code DataAggregator::parseBranchEvents() {
@@ -1495,11 +1570,9 @@ std::error_code DataAggregator::parseBranchEvents() {
NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
TimerGroupDesc, opts::TimeAggregator);
- uint64_t NumTotalSamples = 0;
uint64_t NumEntries = 0;
uint64_t NumSamples = 0;
uint64_t NumSamplesNoLBR = 0;
- uint64_t NumTraces = 0;
bool NeedsSkylakeFix = false;
while (hasData() && NumTotalSamples < opts::MaxSamples) {
@@ -1526,7 +1599,7 @@ std::error_code DataAggregator::parseBranchEvents() {
NeedsSkylakeFix = true;
}
- NumTraces += parseLBRSample(Sample, NeedsSkylakeFix);
+ parseLBRSample(Sample, NeedsSkylakeFix);
}
for (const Trace &Trace : llvm::make_first_range(BranchLBRs))
@@ -1534,22 +1607,6 @@ std::error_code DataAggregator::parseBranchEvents() {
if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Addr))
BF->setHasProfileAvailable();
- auto printColored = [](raw_ostream &OS, float Percent, float T1, float T2) {
- OS << " (";
- if (OS.has_colors()) {
- if (Percent > T2)
- OS.changeColor(raw_ostream::RED);
- else if (Percent > T1)
- OS.changeColor(raw_ostream::YELLOW);
- else
- OS.changeColor(raw_ostream::GREEN);
- }
- OS << format("%.1f%%", Percent);
- if (OS.has_colors())
- OS.resetColor();
- OS << ")";
- };
-
outs() << "PERF2BOLT: read " << NumSamples << " samples and " << NumEntries
<< " LBR entries\n";
if (NumTotalSamples) {
@@ -1561,47 +1618,10 @@ std::error_code DataAggregator::parseBranchEvents() {
"in no-LBR mode with -nl (the performance improvement in -nl "
"mode may be limited)\n";
} else {
- const uint64_t IgnoredSamples = NumTotalSamples - NumSamples;
- const float PercentIgnored = 100.0f * IgnoredSamples / NumTotalSamples;
- outs() << "PERF2BOLT: " << IgnoredSamples << " samples";
- printColored(outs(), PercentIgnored, 20, 50);
- outs() << " were ignored\n";
- if (PercentIgnored > 50.0f)
- errs() << "PERF2BOLT-WARNING: less than 50% of all recorded samples "
- "were attributed to the input binary\n";
+ printBranchStacksDiagnostics(NumTotalSamples - NumSamples);
}
}
- outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
- << NumInvalidTraces;
- float Perc = 0.0f;
- if (NumTraces > 0) {
- Perc = NumInvalidTraces * 100.0f / NumTraces;
- printColored(outs(), Perc, 5, 10);
- }
- outs() << "\n";
- if (Perc > 10.0f)
- outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
- "binary is probably not the same binary used during profiling "
- "collection. The generated data may be ineffective for improving "
- "performance.\n\n";
-
- outs() << "PERF2BOLT: out of range traces involving unknown regions: "
- << NumLongRangeTraces;
- if (NumTraces > 0)
- outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
- outs() << "\n";
-
- if (NumColdSamples > 0) {
- const float ColdSamples = NumColdSamples * 100.0f / NumTotalSamples;
- outs() << "PERF2BOLT: " << NumColdSamples
- << format(" (%.1f%%)", ColdSamples)
- << " samples recorded in cold regions of split functions.\n";
- if (ColdSamples > 5.0f)
- outs()
- << "WARNING: The BOLT-processed binary where samples were collected "
- "likely used bad data or your service observed a large shift in "
- "profile. You may want to audit this.\n";
- }
+ printBranchSamplesDiagnostics();
return std::error_code();
}
@@ -1658,11 +1678,10 @@ void DataAggregator::processBasicEvents() {
NamedRegionTimer T("processBasic", "Processing basic events", TimerGroupName,
TimerGroupDesc, opts::TimeAggregator);
uint64_t OutOfRangeSamples = 0;
- uint64_t NumSamples = 0;
for (auto &Sample : BasicSamples) {
const uint64_t PC = Sample.first;
const uint64_t HitCount = Sample.second;
- NumSamples += HitCount;
+ NumTotalSamples += HitCount;
BinaryFunction *Func = getBinaryFunctionContainingAddress(PC);
if (!Func) {
OutOfRangeSamples += HitCount;
@@ -1671,33 +1690,9 @@ void DataAggregator::processBasicEvents() {
doSample(*Func, PC, HitCount);
}
- outs() << "PERF2BOLT: read " << NumSamples << " samples\n";
+ outs() << "PERF2BOLT: read " << NumTotalSamples << " samples\n";
- outs() << "PERF2BOLT: out of range samples recorded in unknown regions: "
- << OutOfRangeSamples;
- float Perc = 0.0f;
- if (NumSamples > 0) {
- outs() << " (";
- Perc = OutOfRangeSamples * 100.0f / NumSamples;
- if (outs().has_colors()) {
- if (Perc > 60.0f)
- outs().changeColor(raw_ostream::RED);
- else if (Perc > 40.0f)
- outs().changeColor(raw_ostream::YELLOW);
- else
- outs().changeColor(raw_ostream::GREEN);
- }
- outs() << format("%.1f%%", Perc);
- if (outs().has_colors())
- outs().resetColor();
- outs() << ")";
- }
- outs() << "\n";
- if (Perc > 80.0f)
- outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
- "binary is probably not the same binary used during profiling "
- "collection. The generated data may be ineffective for improving "
- "performance.\n\n";
+ printBasicSamplesDiagnostics(OutOfRangeSamples);
}
std::error_code DataAggregator::parseMemEvents() {
@@ -1775,7 +1770,6 @@ void DataAggregator::processPreAggregated() {
NamedRegionTimer T("processAggregated", "Processing aggregated branch events",
TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
- uint64_t NumTraces = 0;
for (const AggregatedLBREntry &AggrEntry : AggregatedLBRs) {
switch (AggrEntry.EntryType) {
case AggregatedLBREntry::BRANCH:
@@ -1799,37 +1793,7 @@ void DataAggregator::processPreAggregated() {
outs() << "PERF2BOLT: read " << AggregatedLBRs.size()
<< " aggregated LBR entries\n";
- outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
- << NumInvalidTraces;
- float Perc = 0.0f;
- if (NumTraces > 0) {
- outs() << " (";
- Perc = NumInvalidTraces * 100.0f / NumTraces;
- if (outs().has_colors()) {
- if (Perc > 10.0f)
- outs().changeColor(raw_ostream::RED);
- else if (Perc > 5.0f)
- outs().changeColor(raw_ostream::YELLOW);
- else
- outs().changeColor(raw_ostream::GREEN);
- }
- outs() << format("%.1f%%", Perc);
- if (outs().has_colors())
- outs().resetColor();
- outs() << ")";
- }
- outs() << "\n";
- if (Perc > 10.0f)
- outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
- "binary is probably not the same binary used during profiling "
- "collection. The generated data may be ineffective for improving "
- "performance.\n\n";
-
- outs() << "PERF2BOLT: Out of range traces involving unknown regions: "
- << NumLongRangeTraces;
- if (NumTraces > 0)
- outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
- outs() << "\n";
+ printBranchSamplesDiagnostics();
}
std::optional<int32_t> DataAggregator::parseCommExecEvent() {
``````````
</details>
https://github.com/llvm/llvm-project/pull/136530
More information about the llvm-commits
mailing list