[llvm-branch-commits] [BOLT] Print .text scores in perf2bolt (PR #139194)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu May 8 18:46:10 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-bolt
Author: Amir Ayupov (aaupov)
<details>
<summary>Changes</summary>
Expose heatmap functionality of profile score computation for text
section under a new option `--print-heatmap-stats`.
This option collects and prints the following stats:
- hotness is the percentage of samples attributed to the section,
- utilization: percentage of executed buckets,
- partition score: hotness times utilization, higher is better.
Test Plan:
updated per2bolt tests
- pre-aggregated-perf.test: pre-aggregated data
- bolt-address-translation-yaml.test: pre-aggregated + BOLTed input
- perf_test.test: no-LBR perf data
---
Full diff: https://github.com/llvm/llvm-project/pull/139194.diff
9 Files Affected:
- (modified) bolt/include/bolt/Profile/DataAggregator.h (+6-1)
- (modified) bolt/include/bolt/Profile/Heatmap.h (+2)
- (modified) bolt/include/bolt/Utils/CommandLineOpts.h (+1)
- (modified) bolt/lib/Profile/DataAggregator.cpp (+50-27)
- (modified) bolt/lib/Profile/Heatmap.cpp (+9)
- (modified) bolt/lib/Utils/CommandLineOpts.cpp (+5)
- (modified) bolt/test/X86/bolt-address-translation-yaml.test (+2-1)
- (modified) bolt/test/X86/pre-aggregated-perf.test (+2-1)
- (modified) bolt/test/perf2bolt/perf_test.test (+5-2)
``````````diff
diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h
index d66d198e37d61..ac036fe167eed 100644
--- a/bolt/include/bolt/Profile/DataAggregator.h
+++ b/bolt/include/bolt/Profile/DataAggregator.h
@@ -15,6 +15,7 @@
#define BOLT_PROFILE_DATA_AGGREGATOR_H
#include "bolt/Profile/DataReader.h"
+#include "bolt/Profile/Heatmap.h"
#include "bolt/Profile/YAMLProfileWriter.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Error.h"
@@ -270,8 +271,10 @@ class DataAggregator : public DataReader {
/// everything
bool hasData() const { return !ParsingBuf.empty(); }
+ /// Build heat map based on LBR samples.
+ Expected<Heatmap> buildHeatMap();
/// Print heat map based on LBR samples.
- std::error_code printLBRHeatMap();
+ void printHeatMap(const Heatmap::SectionStatsMap &, const Heatmap &) const;
/// Parse a single perf sample containing a PID associated with a sequence of
/// LBR entries. If the PID does not correspond to the binary we are looking
@@ -473,6 +476,8 @@ class DataAggregator : public DataReader {
void printBranchSamplesDiagnostics() const;
void printBasicSamplesDiagnostics(uint64_t OutOfRangeSamples) const;
void printBranchStacksDiagnostics(uint64_t IgnoredSamples) const;
+ void printHeatmapTextStats(const Heatmap &,
+ const Heatmap::SectionStatsMap &) const;
public:
/// If perf.data was collected without build ids, the buildid-list may contain
diff --git a/bolt/include/bolt/Profile/Heatmap.h b/bolt/include/bolt/Profile/Heatmap.h
index c7b3d45fa5cc2..bb073833ec9f7 100644
--- a/bolt/include/bolt/Profile/Heatmap.h
+++ b/bolt/include/bolt/Profile/Heatmap.h
@@ -88,6 +88,8 @@ class Heatmap {
uint64_t Buckets{0};
};
+ uint64_t getNumBuckets(StringRef Name) const;
+
/// Mapping from section name to associated \p SectionStats. Special entries:
/// - [total] for total stats,
/// - [unmapped] for samples outside any section, if non-zero.
diff --git a/bolt/include/bolt/Utils/CommandLineOpts.h b/bolt/include/bolt/Utils/CommandLineOpts.h
index 3de945f6a1507..b5a7be53e4189 100644
--- a/bolt/include/bolt/Utils/CommandLineOpts.h
+++ b/bolt/include/bolt/Utils/CommandLineOpts.h
@@ -44,6 +44,7 @@ extern llvm::cl::opt<unsigned> HeatmapBlock;
extern llvm::cl::opt<unsigned long long> HeatmapMaxAddress;
extern llvm::cl::opt<unsigned long long> HeatmapMinAddress;
extern llvm::cl::opt<bool> HeatmapPrintMappings;
+extern llvm::cl::opt<bool> HeatmapStats;
extern llvm::cl::opt<bool> HotData;
extern llvm::cl::opt<bool> HotFunctionsAtEnd;
extern llvm::cl::opt<bool> HotText;
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 11850fab28bb8..b0ad4c69e2334 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -508,21 +508,27 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {
errs() << "PERF2BOLT: failed to parse samples\n";
// Special handling for memory events
- if (prepareToParse("mem events", MemEventsPPI, MemEventsErrorCallback))
- return Error::success();
-
- if (const std::error_code EC = parseMemEvents())
- errs() << "PERF2BOLT: failed to parse memory events: " << EC.message()
- << '\n';
+ if (!prepareToParse("mem events", MemEventsPPI, MemEventsErrorCallback))
+ if (const std::error_code EC = parseMemEvents())
+ errs() << "PERF2BOLT: failed to parse memory events: " << EC.message()
+ << '\n';
deleteTempFiles();
heatmap:
+ if (!opts::HeatmapMode && !opts::HeatmapStats)
+ return Error::success();
+
+ Expected<Heatmap> HM = buildHeatMap();
+ if (!HM)
+ return HM.takeError();
+ Heatmap::SectionStatsMap Stats = HM->computeSectionStats();
if (opts::HeatmapMode) {
- if (std::error_code EC = printLBRHeatMap())
- return errorCodeToError(EC);
+ printHeatMap(Stats, *HM);
exit(0);
}
+ // opts::HeatmapStats
+ printHeatmapTextStats(*HM, Stats);
return Error::success();
}
@@ -1310,7 +1316,7 @@ bool DataAggregator::ignoreKernelInterrupt(LBREntry &LBR) const {
(LBR.From >= KernelBaseAddr || LBR.To >= KernelBaseAddr);
}
-std::error_code DataAggregator::printLBRHeatMap() {
+Expected<Heatmap> DataAggregator::buildHeatMap() {
outs() << "PERF2BOLT: parse branch events...\n";
NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
TimerGroupDesc, opts::TimeAggregator);
@@ -1323,15 +1329,12 @@ std::error_code DataAggregator::printLBRHeatMap() {
opts::HeatmapMaxAddress, getTextSections(BC));
if (!NumTotalSamples) {
- if (opts::BasicAggregation) {
- errs() << "HEATMAP-ERROR: no basic event samples detected in profile. "
- "Cannot build heatmap.";
- } else {
- errs() << "HEATMAP-ERROR: no LBR traces detected in profile. "
- "Cannot build heatmap. Use -nl for building heatmap from "
- "basic events.\n";
- }
- exit(1);
+ if (opts::BasicAggregation)
+ return createStringError(
+ "no basic event samples detected in profile. Cannot build heatmap");
+ return createStringError(
+ "no LBR traces detected in profile. Cannot build heatmap. Use -nl for "
+ "building heatmap from basic events");
}
outs() << "HEATMAP: building heat map...\n";
@@ -1347,24 +1350,44 @@ std::error_code DataAggregator::printLBRHeatMap() {
if (HM.getNumInvalidRanges())
outs() << "HEATMAP: invalid traces: " << HM.getNumInvalidRanges() << '\n';
- if (!HM.size()) {
- errs() << "HEATMAP-ERROR: no valid traces registered\n";
- exit(1);
- }
+ if (!HM.size())
+ return createStringError("no valid traces registered");
+ return HM;
+}
+void DataAggregator::printHeatMap(const Heatmap::SectionStatsMap &Stats,
+ const Heatmap &HM) const {
HM.print(opts::OutputFilename);
if (opts::OutputFilename == "-")
HM.printCDF(opts::OutputFilename);
else
HM.printCDF(opts::OutputFilename + ".csv");
- Heatmap::SectionStatsMap Stats = HM.computeSectionStats();
if (opts::OutputFilename == "-")
HM.printSectionHotness(Stats, opts::OutputFilename);
else
HM.printSectionHotness(Stats,
opts::OutputFilename + "-section-hotness.csv");
+}
- return std::error_code();
+void DataAggregator::printHeatmapTextStats(
+ const Heatmap &HM, const Heatmap::SectionStatsMap &Stats) const {
+ Heatmap::SectionStatsMap::const_iterator TotalStatsIt = Stats.find("[total]");
+ assert(TotalStatsIt != Stats.end() && "Malformed SectionStatsMap");
+ Heatmap::SectionStatsMap::const_iterator TextStatsIt =
+ Stats.find(BC->getMainCodeSectionName());
+ if (TextStatsIt == Stats.end())
+ return;
+
+ const Heatmap::SectionStats &TextStats = TextStatsIt->second;
+ const Heatmap::SectionStats &TotalStats = TotalStatsIt->second;
+
+ const float TextHotness = 1. * TextStats.Samples / TotalStats.Samples;
+ const float TextUtilization =
+ 1. * TextStats.Buckets / HM.getNumBuckets(BC->getMainCodeSectionName());
+ const float TextPartitionScore = TextHotness * TextUtilization;
+ outs() << "HEATMAP: " << BC->getMainCodeSectionName() << " scores: "
+ << formatv("hotness: {0:f4}, utilization: {1:f4}, partition: {2:f4}\n",
+ TextHotness, TextUtilization, TextPartitionScore);
}
void DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
@@ -1389,7 +1412,7 @@ void DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
const uint64_t TraceTo = NextLBR->From;
const BinaryFunction *TraceBF =
getBinaryFunctionContainingAddress(TraceFrom);
- if (opts::HeatmapMode) {
+ if (opts::HeatmapMode || opts::HeatmapStats) {
FTInfo &Info = FallthroughLBRs[Trace(TraceFrom, TraceTo)];
++Info.InternCount;
} else if (TraceBF && TraceBF->containsAddress(TraceTo)) {
@@ -1426,7 +1449,7 @@ void DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
}
NextLBR = &LBR;
- if (opts::HeatmapMode) {
+ if (opts::HeatmapMode || opts::HeatmapStats) {
TakenBranchInfo &Info = BranchLBRs[Trace(LBR.From, LBR.To)];
++Info.TakenCount;
continue;
@@ -1439,7 +1462,7 @@ void DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
++Info.TakenCount;
Info.MispredCount += LBR.Mispred;
}
- if (opts::HeatmapMode && !Sample.LBR.empty()) {
+ if ((opts::HeatmapMode || opts::HeatmapStats) && !Sample.LBR.empty()) {
++BasicSamples[Sample.LBR.front().To];
++BasicSamples[Sample.LBR.back().From];
}
diff --git a/bolt/lib/Profile/Heatmap.cpp b/bolt/lib/Profile/Heatmap.cpp
index d3ff74f664046..09e4cdd7f4cd8 100644
--- a/bolt/lib/Profile/Heatmap.cpp
+++ b/bolt/lib/Profile/Heatmap.cpp
@@ -369,5 +369,14 @@ void Heatmap::printSectionHotness(const StringMap<SectionStats> &Stats,
const float UnmappedPct = 100. * UnmappedIt->second.Samples / NumTotalCounts;
OS << formatv("[unmapped], 0x0, 0x0, {0:f4}, 0\n", UnmappedPct);
}
+
+uint64_t Heatmap::getNumBuckets(StringRef Name) const {
+ auto It = llvm::find_if(TextSections, [Name](const SectionNameAndRange &Sec) {
+ return Sec.Name == Name;
+ });
+ if (It == TextSections.end())
+ return 0;
+ return getNumBuckets(It->BeginAddress, It->EndAddress);
+}
} // namespace bolt
} // namespace llvm
diff --git a/bolt/lib/Utils/CommandLineOpts.cpp b/bolt/lib/Utils/CommandLineOpts.cpp
index ad714371436e0..98ae00b34652a 100644
--- a/bolt/lib/Utils/CommandLineOpts.cpp
+++ b/bolt/lib/Utils/CommandLineOpts.cpp
@@ -119,6 +119,11 @@ cl::opt<bool> HeatmapPrintMappings(
"sections (default false)"),
cl::Optional, cl::cat(HeatmapCategory));
+cl::opt<bool> HeatmapStats(
+ "print-heatmap-stats",
+ cl::desc("print heatmap statistics without producing the heatmap"),
+ cl::Optional, cl::cat(HeatmapCategory));
+
cl::opt<bool> HotData("hot-data",
cl::desc("hot data symbols support (relocation mode)"),
cl::cat(BoltCategory));
diff --git a/bolt/test/X86/bolt-address-translation-yaml.test b/bolt/test/X86/bolt-address-translation-yaml.test
index a6a212d9c1b38..443702dd6c4fb 100644
--- a/bolt/test/X86/bolt-address-translation-yaml.test
+++ b/bolt/test/X86/bolt-address-translation-yaml.test
@@ -28,7 +28,7 @@ ORDER-YAML-CHECK-NEXT: calls: [ { off: 0x26, fid: [[#]], cnt: 20 } ]
ORDER-YAML-CHECK-NEXT: succ: [ { bid: 5, cnt: 7 }
## Large profile test
RUN: perf2bolt %t.out --pa -p %p/Inputs/blarge_new_bat.preagg.txt -w %t.yaml -o %t.fdata \
-RUN: 2>&1 | FileCheck --check-prefix READ-BAT-CHECK %s
+RUN: --print-heatmap-stats 2>&1 | FileCheck --check-prefix READ-BAT-CHECK %s
RUN: FileCheck --input-file %t.yaml --check-prefix YAML-BAT-CHECK %s
## Check that YAML converted from fdata matches YAML created directly with BAT.
RUN: llvm-bolt %t.exe -data %t.fdata -w %t.yaml-fdata -o /dev/null \
@@ -46,6 +46,7 @@ WRITE-BAT-CHECK: BOLT-INFO: BAT section size (bytes): 404
READ-BAT-CHECK-NOT: BOLT-ERROR: unable to save profile in YAML format for input file processed by BOLT
READ-BAT-CHECK: BOLT-INFO: Parsed 5 BAT entries
READ-BAT-CHECK: PERF2BOLT: read 79 aggregated LBR entries
+READ-BAT-CHECK: HEATMAP: .text scores: hotness: 0.3876, utilization: 0.9167, partition: 0.3553
READ-BAT-CHECK: BOLT-INFO: 5 out of 21 functions in the binary (23.8%) have non-empty execution profile
READ-BAT-FDATA-CHECK: BOLT-INFO: 5 out of 16 functions in the binary (31.2%) have non-empty execution profile
diff --git a/bolt/test/X86/pre-aggregated-perf.test b/bolt/test/X86/pre-aggregated-perf.test
index cf745ca7bf7b6..926b0cd44b8b4 100644
--- a/bolt/test/X86/pre-aggregated-perf.test
+++ b/bolt/test/X86/pre-aggregated-perf.test
@@ -11,10 +11,11 @@ REQUIRES: system-linux
RUN: yaml2obj %p/Inputs/blarge.yaml &> %t.exe
RUN: perf2bolt %t.exe -o %t --pa -p %p/Inputs/pre-aggregated.txt -w %t.new \
-RUN: --show-density \
+RUN: --show-density --print-heatmap-stats \
RUN: --profile-density-threshold=9 --profile-density-cutoff-hot=970000 \
RUN: --profile-use-dfs | FileCheck %s --check-prefix=CHECK-P2B
+CHECK-P2B: HEATMAP: .text scores: hotness: 1.0000, utilization: 0.0426, partition: 0.0426
CHECK-P2B: BOLT-INFO: 4 out of 7 functions in the binary (57.1%) have non-empty execution profile
CHECK-P2B: BOLT-INFO: Functions with density >= 21.7 account for 97.00% total sample counts.
diff --git a/bolt/test/perf2bolt/perf_test.test b/bolt/test/perf2bolt/perf_test.test
index 44111de89a4ea..f55e09191c3d6 100644
--- a/bolt/test/perf2bolt/perf_test.test
+++ b/bolt/test/perf2bolt/perf_test.test
@@ -4,12 +4,15 @@ REQUIRES: system-linux, perf
RUN: %clang %S/Inputs/perf_test.c -fuse-ld=lld -Wl,--script=%S/Inputs/perf_test.lds -o %t
RUN: perf record -Fmax -e cycles:u -o %t2 -- %t
-RUN: perf2bolt %t -p=%t2 -o %t3 -nl -ignore-build-id 2>&1 | FileCheck %s
+RUN: perf2bolt %t -p=%t2 -o %t3 -nl -ignore-build-id --print-heatmap-stats \
+RUN: 2>&1 | FileCheck %s
CHECK-NOT: PERF2BOLT-ERROR
CHECK-NOT: !! WARNING !! This high mismatch ratio indicates the input binary is probably not the same binary used during profiling collection.
+CHECK: HEATMAP: .text scores: hotness: {{.*}}, utilization: {{.*}}, partition: {{.*}}
CHECK: BOLT-INFO: Functions with density >= {{.*}} account for 99.00% total sample counts.
RUN: %clang %S/Inputs/perf_test.c -no-pie -fuse-ld=lld -o %t4
RUN: perf record -Fmax -e cycles:u -o %t5 -- %t4
-RUN: perf2bolt %t4 -p=%t5 -o %t6 -nl -ignore-build-id 2>&1 | FileCheck %s
+RUN: perf2bolt %t4 -p=%t5 -o %t6 -nl -ignore-build-id --print-heatmap-stats \
+RUN: 2>&1 | FileCheck %s
``````````
</details>
https://github.com/llvm/llvm-project/pull/139194
More information about the llvm-branch-commits
mailing list