[llvm] [BOLT][NFCI] Emit uniform diagnostics in DataAggregator (PR #136530)
Amir Ayupov via llvm-commits
llvm-commits at lists.llvm.org
Sun Apr 20 20:04:11 PDT 2025
https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/136530
>From 1f319f789b8d6f5321a89425699d0e55d099565a Mon Sep 17 00:00:00 2001
From: Amir Aupov <amir.aupov at gmail.com>
Date: Sun, 20 Apr 2025 19:48:39 -0700
Subject: [PATCH 1/2] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?=
=?UTF-8?q?itial=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.4
---
bolt/include/bolt/Profile/DataAggregator.h | 1 +
bolt/lib/Profile/DataAggregator.cpp | 194 +++++++++------------
2 files changed, 81 insertions(+), 114 deletions(-)
diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h
index 56eb463fc98fc..1aa05f4121874 100644
--- a/bolt/include/bolt/Profile/DataAggregator.h
+++ b/bolt/include/bolt/Profile/DataAggregator.h
@@ -231,6 +231,7 @@ class DataAggregator : public DataReader {
/// for the source of the branch to avoid counting cold activity twice (one
/// for source and another for destination).
uint64_t NumColdSamples{0};
+ uint64_t NumTotalSamples{0};
/// Looks into system PATH for Linux Perf and set up the aggregator to use it
void findPerfExecutable();
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index f016576ff61af..734b3a1244e11 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -1490,12 +1490,86 @@ uint64_t DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
return NumTraces;
}
+static void printColdSamplesDiagnostic() {
+ if (NumColdSamples > 0) {
+ const float ColdSamples = NumColdSamples * 100.0f / NumTotalSamples;
+ outs() << "PERF2BOLT: " << NumColdSamples
+ << format(" (%.1f%%)", ColdSamples)
+ << " samples recorded in cold regions of split functions.\n";
+ if (ColdSamples > 5.0f)
+ outs()
+ << "WARNING: The BOLT-processed binary where samples were collected "
+ "likely used bad data or your service observed a large shift in "
+ "profile. You may want to audit this.\n";
+ }
+}
+
+static void printLongRangeTracesDiagnostic() {
+ outs() << "PERF2BOLT: out of range traces involving unknown regions: "
+ << NumLongRangeTraces;
+ if (NumTraces > 0)
+ outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
+ outs() << "\n";
+}
+
+static float printColoredPct(uint64_t Numerator, uint64_t Denominator, float T1,
+ float T2) {
+ if (Denominator == 0) {
+ outs() << "\n";
+ return 0;
+ }
+ float Percent = Numerator * 100.0f / Denominator;
+ outs() << " (";
+ if (outs().has_colors()) {
+ if (Percent > T2)
+ outs().changeColor(raw_ostream::RED);
+ else if (Percent > T1)
+ outs().changeColor(raw_ostream::YELLOW);
+ else
+ outs().changeColor(raw_ostream::GREEN);
+ }
+ outs() << format("%.1f%%", Percent);
+ if (outs().has_colors())
+ outs().resetColor();
+ outs() << ")\n";
+ return Perc;
+}
+
+static void printBranchSamplesDiagnostics() {
+ outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
+ << NumInvalidTraces;
+ if (printColoredPct(NumInvalidTraces, NumTraces, 5, 10) > 10)
+ outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
+ "binary is probably not the same binary used during profiling "
+ "collection. The generated data may be ineffective for improving "
+ "performance.\n\n";
+ printLongRangeTracesDiagnostic();
+ printColdSamplesDiagnostic();
+}
+
+static void printBasicSamplesDiagnostics(uint64_t OutOfRangeSamples) {
+ outs() << "PERF2BOLT: out of range samples recorded in unknown regions: "
+ << OutOfRangeSamples;
+ if (printColoredPct(OutOfRangeSamples, NumTotalSamples, 40, 60) > 80)
+ outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
+ "binary is probably not the same binary used during profiling "
+ "collection. The generated data may be ineffective for improving "
+ "performance.\n\n";
+ printColdSamplesDiagnostic();
+}
+
+static void printBranchStacksDiagnostics(uint64_t IgnoredSamples) {
+ outs() << "PERF2BOLT: ignored samples: " << IgnoredSamples;
+ if (printColoredPct(IgnoredSamples, NumTotalSamples, 20, 50) > 50)
+ errs() << "PERF2BOLT-WARNING: less than 50% of all recorded samples "
+ "were attributed to the input binary\n";
+}
+
std::error_code DataAggregator::parseBranchEvents() {
outs() << "PERF2BOLT: parse branch events...\n";
NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
TimerGroupDesc, opts::TimeAggregator);
- uint64_t NumTotalSamples = 0;
uint64_t NumEntries = 0;
uint64_t NumSamples = 0;
uint64_t NumSamplesNoLBR = 0;
@@ -1534,22 +1608,6 @@ std::error_code DataAggregator::parseBranchEvents() {
if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Addr))
BF->setHasProfileAvailable();
- auto printColored = [](raw_ostream &OS, float Percent, float T1, float T2) {
- OS << " (";
- if (OS.has_colors()) {
- if (Percent > T2)
- OS.changeColor(raw_ostream::RED);
- else if (Percent > T1)
- OS.changeColor(raw_ostream::YELLOW);
- else
- OS.changeColor(raw_ostream::GREEN);
- }
- OS << format("%.1f%%", Percent);
- if (OS.has_colors())
- OS.resetColor();
- OS << ")";
- };
-
outs() << "PERF2BOLT: read " << NumSamples << " samples and " << NumEntries
<< " LBR entries\n";
if (NumTotalSamples) {
@@ -1561,47 +1619,10 @@ std::error_code DataAggregator::parseBranchEvents() {
"in no-LBR mode with -nl (the performance improvement in -nl "
"mode may be limited)\n";
} else {
- const uint64_t IgnoredSamples = NumTotalSamples - NumSamples;
- const float PercentIgnored = 100.0f * IgnoredSamples / NumTotalSamples;
- outs() << "PERF2BOLT: " << IgnoredSamples << " samples";
- printColored(outs(), PercentIgnored, 20, 50);
- outs() << " were ignored\n";
- if (PercentIgnored > 50.0f)
- errs() << "PERF2BOLT-WARNING: less than 50% of all recorded samples "
- "were attributed to the input binary\n";
+ printBranchStacksDiagnostics(NumTotalSamples - NumSamples);
}
}
- outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
- << NumInvalidTraces;
- float Perc = 0.0f;
- if (NumTraces > 0) {
- Perc = NumInvalidTraces * 100.0f / NumTraces;
- printColored(outs(), Perc, 5, 10);
- }
- outs() << "\n";
- if (Perc > 10.0f)
- outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
- "binary is probably not the same binary used during profiling "
- "collection. The generated data may be ineffective for improving "
- "performance.\n\n";
-
- outs() << "PERF2BOLT: out of range traces involving unknown regions: "
- << NumLongRangeTraces;
- if (NumTraces > 0)
- outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
- outs() << "\n";
-
- if (NumColdSamples > 0) {
- const float ColdSamples = NumColdSamples * 100.0f / NumTotalSamples;
- outs() << "PERF2BOLT: " << NumColdSamples
- << format(" (%.1f%%)", ColdSamples)
- << " samples recorded in cold regions of split functions.\n";
- if (ColdSamples > 5.0f)
- outs()
- << "WARNING: The BOLT-processed binary where samples were collected "
- "likely used bad data or your service observed a large shift in "
- "profile. You may want to audit this.\n";
- }
+ printBranchSamplesDiagnostics();
return std::error_code();
}
@@ -1658,11 +1679,10 @@ void DataAggregator::processBasicEvents() {
NamedRegionTimer T("processBasic", "Processing basic events", TimerGroupName,
TimerGroupDesc, opts::TimeAggregator);
uint64_t OutOfRangeSamples = 0;
- uint64_t NumSamples = 0;
for (auto &Sample : BasicSamples) {
const uint64_t PC = Sample.first;
const uint64_t HitCount = Sample.second;
- NumSamples += HitCount;
+ NumTotalSamples += HitCount;
BinaryFunction *Func = getBinaryFunctionContainingAddress(PC);
if (!Func) {
OutOfRangeSamples += HitCount;
@@ -1673,31 +1693,7 @@ void DataAggregator::processBasicEvents() {
}
outs() << "PERF2BOLT: read " << NumSamples << " samples\n";
- outs() << "PERF2BOLT: out of range samples recorded in unknown regions: "
- << OutOfRangeSamples;
- float Perc = 0.0f;
- if (NumSamples > 0) {
- outs() << " (";
- Perc = OutOfRangeSamples * 100.0f / NumSamples;
- if (outs().has_colors()) {
- if (Perc > 60.0f)
- outs().changeColor(raw_ostream::RED);
- else if (Perc > 40.0f)
- outs().changeColor(raw_ostream::YELLOW);
- else
- outs().changeColor(raw_ostream::GREEN);
- }
- outs() << format("%.1f%%", Perc);
- if (outs().has_colors())
- outs().resetColor();
- outs() << ")";
- }
- outs() << "\n";
- if (Perc > 80.0f)
- outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
- "binary is probably not the same binary used during profiling "
- "collection. The generated data may be ineffective for improving "
- "performance.\n\n";
+ printBasicSamplesDiagnostics(OutOfRangeSamples);
}
std::error_code DataAggregator::parseMemEvents() {
@@ -1799,37 +1795,7 @@ void DataAggregator::processPreAggregated() {
outs() << "PERF2BOLT: read " << AggregatedLBRs.size()
<< " aggregated LBR entries\n";
- outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
- << NumInvalidTraces;
- float Perc = 0.0f;
- if (NumTraces > 0) {
- outs() << " (";
- Perc = NumInvalidTraces * 100.0f / NumTraces;
- if (outs().has_colors()) {
- if (Perc > 10.0f)
- outs().changeColor(raw_ostream::RED);
- else if (Perc > 5.0f)
- outs().changeColor(raw_ostream::YELLOW);
- else
- outs().changeColor(raw_ostream::GREEN);
- }
- outs() << format("%.1f%%", Perc);
- if (outs().has_colors())
- outs().resetColor();
- outs() << ")";
- }
- outs() << "\n";
- if (Perc > 10.0f)
- outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
- "binary is probably not the same binary used during profiling "
- "collection. The generated data may be ineffective for improving "
- "performance.\n\n";
-
- outs() << "PERF2BOLT: Out of range traces involving unknown regions: "
- << NumLongRangeTraces;
- if (NumTraces > 0)
- outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
- outs() << "\n";
+ printBranchSamplesDiagnostics();
}
std::optional<int32_t> DataAggregator::parseCommExecEvent() {
>From 1fe232095bb590257cc73a511b3ab119bac4cbad Mon Sep 17 00:00:00 2001
From: Amir Aupov <amir.aupov at gmail.com>
Date: Sun, 20 Apr 2025 20:04:01 -0700
Subject: [PATCH 2/2] fix build
Created using spr 1.3.4
---
bolt/include/bolt/Profile/DataAggregator.h | 14 +++++++++---
bolt/lib/Profile/DataAggregator.cpp | 26 ++++++++++------------
2 files changed, 23 insertions(+), 17 deletions(-)
diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h
index 1aa05f4121874..79a91861554df 100644
--- a/bolt/include/bolt/Profile/DataAggregator.h
+++ b/bolt/include/bolt/Profile/DataAggregator.h
@@ -223,7 +223,8 @@ class DataAggregator : public DataReader {
bool recordExit(BinaryFunction &BF, uint64_t From, bool Mispred,
uint64_t Count = 1) const;
- /// Aggregation statistics
+ /// Branch stacks aggregation statistics
+ uint64_t NumTraces{0};
uint64_t NumInvalidTraces{0};
uint64_t NumLongRangeTraces{0};
/// Specifies how many samples were recorded in cold areas if we are dealing
@@ -328,8 +329,8 @@ class DataAggregator : public DataReader {
/// Parse a single LBR entry as output by perf script -Fbrstack
ErrorOr<LBREntry> parseLBREntry();
- /// Parse LBR sample, returns the number of traces.
- uint64_t parseLBRSample(const PerfBranchSample &Sample, bool NeedsSkylakeFix);
+ /// Parse LBR sample.
+ void parseLBRSample(const PerfBranchSample &Sample, bool NeedsSkylakeFix);
/// Parse and pre-aggregate branch events.
std::error_code parseBranchEvents();
@@ -488,6 +489,13 @@ class DataAggregator : public DataReader {
void dump(const PerfBranchSample &Sample) const;
void dump(const PerfMemSample &Sample) const;
+ /// Profile diagnostics print methods
+ void printColdSamplesDiagnostic() const;
+ void printLongRangeTracesDiagnostic() const;
+ void printBranchSamplesDiagnostics() const;
+ void printBasicSamplesDiagnostics(uint64_t OutOfRangeSamples) const;
+ void printBranchStacksDiagnostics(uint64_t IgnoredSamples) const;
+
public:
/// If perf.data was collected without build ids, the buildid-list may contain
/// incomplete entries. Return true if the buffer containing
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 734b3a1244e11..1127cc1f9331a 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -1422,9 +1422,8 @@ std::error_code DataAggregator::printLBRHeatMap() {
return std::error_code();
}
-uint64_t DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
- bool NeedsSkylakeFix) {
- uint64_t NumTraces{0};
+void DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
+ bool NeedsSkylakeFix) {
// LBRs are stored in reverse execution order. NextLBR refers to the next
// executed branch record.
const LBREntry *NextLBR = nullptr;
@@ -1487,10 +1486,9 @@ uint64_t DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
++Info.TakenCount;
Info.MispredCount += LBR.Mispred;
}
- return NumTraces;
}
-static void printColdSamplesDiagnostic() {
+void DataAggregator::printColdSamplesDiagnostic() const {
if (NumColdSamples > 0) {
const float ColdSamples = NumColdSamples * 100.0f / NumTotalSamples;
outs() << "PERF2BOLT: " << NumColdSamples
@@ -1504,7 +1502,7 @@ static void printColdSamplesDiagnostic() {
}
}
-static void printLongRangeTracesDiagnostic() {
+void DataAggregator::printLongRangeTracesDiagnostic() const {
outs() << "PERF2BOLT: out of range traces involving unknown regions: "
<< NumLongRangeTraces;
if (NumTraces > 0)
@@ -1532,10 +1530,10 @@ static float printColoredPct(uint64_t Numerator, uint64_t Denominator, float T1,
if (outs().has_colors())
outs().resetColor();
outs() << ")\n";
- return Perc;
+ return Percent;
}
-static void printBranchSamplesDiagnostics() {
+void DataAggregator::printBranchSamplesDiagnostics() const {
outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
<< NumInvalidTraces;
if (printColoredPct(NumInvalidTraces, NumTraces, 5, 10) > 10)
@@ -1547,7 +1545,8 @@ static void printBranchSamplesDiagnostics() {
printColdSamplesDiagnostic();
}
-static void printBasicSamplesDiagnostics(uint64_t OutOfRangeSamples) {
+void DataAggregator::printBasicSamplesDiagnostics(
+ uint64_t OutOfRangeSamples) const {
outs() << "PERF2BOLT: out of range samples recorded in unknown regions: "
<< OutOfRangeSamples;
if (printColoredPct(OutOfRangeSamples, NumTotalSamples, 40, 60) > 80)
@@ -1558,7 +1557,8 @@ static void printBasicSamplesDiagnostics(uint64_t OutOfRangeSamples) {
printColdSamplesDiagnostic();
}
-static void printBranchStacksDiagnostics(uint64_t IgnoredSamples) {
+void DataAggregator::printBranchStacksDiagnostics(
+ uint64_t IgnoredSamples) const {
outs() << "PERF2BOLT: ignored samples: " << IgnoredSamples;
if (printColoredPct(IgnoredSamples, NumTotalSamples, 20, 50) > 50)
errs() << "PERF2BOLT-WARNING: less than 50% of all recorded samples "
@@ -1573,7 +1573,6 @@ std::error_code DataAggregator::parseBranchEvents() {
uint64_t NumEntries = 0;
uint64_t NumSamples = 0;
uint64_t NumSamplesNoLBR = 0;
- uint64_t NumTraces = 0;
bool NeedsSkylakeFix = false;
while (hasData() && NumTotalSamples < opts::MaxSamples) {
@@ -1600,7 +1599,7 @@ std::error_code DataAggregator::parseBranchEvents() {
NeedsSkylakeFix = true;
}
- NumTraces += parseLBRSample(Sample, NeedsSkylakeFix);
+ parseLBRSample(Sample, NeedsSkylakeFix);
}
for (const Trace &Trace : llvm::make_first_range(BranchLBRs))
@@ -1691,7 +1690,7 @@ void DataAggregator::processBasicEvents() {
doSample(*Func, PC, HitCount);
}
- outs() << "PERF2BOLT: read " << NumSamples << " samples\n";
+ outs() << "PERF2BOLT: read " << NumTotalSamples << " samples\n";
printBasicSamplesDiagnostics(OutOfRangeSamples);
}
@@ -1771,7 +1770,6 @@ void DataAggregator::processPreAggregated() {
NamedRegionTimer T("processAggregated", "Processing aggregated branch events",
TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
- uint64_t NumTraces = 0;
for (const AggregatedLBREntry &AggrEntry : AggregatedLBRs) {
switch (AggrEntry.EntryType) {
case AggregatedLBREntry::BRANCH:
More information about the llvm-commits
mailing list