[llvm-branch-commits] [llvm] [BOLT] Support multiple perf data inputs (PR #199324)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri May 22 22:12:11 PDT 2026
llvmorg-github-actions[bot] wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-bolt
Author: Amir Ayupov (aaupov)
<details>
<summary>Changes</summary>
Allow -p/-perfdata to accept multiple files as comma-separated and
repeated arguments. Process them with `-perfdata-jobs` (default 4).
Keep YAML/DataReader inputs single-profile only.
Test Plan: Updated pre-aggregated-perf.test and perf_test
---
Full diff: https://github.com/llvm/llvm-project/pull/199324.diff
9 Files Affected:
- (modified) bolt/include/bolt/Profile/DataAggregator.h (+12-3)
- (modified) bolt/include/bolt/Utils/CommandLineOpts.h (+1-1)
- (modified) bolt/lib/Profile/DataAggregator.cpp (+49-5)
- (modified) bolt/lib/Rewrite/RewriteInstance.cpp (+7)
- (modified) bolt/lib/Utils/CommandLineOpts.cpp (+8-9)
- (modified) bolt/test/X86/pre-aggregated-perf.test (+5)
- (modified) bolt/test/perf2bolt/perf_test.test (+11)
- (modified) bolt/tools/driver/llvm-bolt.cpp (+3-2)
- (modified) bolt/tools/heatmap/heatmap.cpp (+3-2)
``````````diff
diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h
index 5f63087ec7409..c5133e90d07a6 100644
--- a/bolt/include/bolt/Profile/DataAggregator.h
+++ b/bolt/include/bolt/Profile/DataAggregator.h
@@ -51,9 +51,7 @@ class BoltAddressTranslation;
/// specified by the user.
class DataAggregator : public DataReader {
public:
- explicit DataAggregator(StringRef Filename) : DataReader(Filename) {
- start();
- }
+ explicit DataAggregator(StringRef Filename) : DataReader(Filename) {}
~DataAggregator();
@@ -69,6 +67,10 @@ class DataAggregator : public DataReader {
Error readProfile(BinaryContext &BC) override;
+ /// Add an additional perf.data or pre-aggregated profile input to be merged
+ /// into this aggregation job.
+ void addInputFile(StringRef Filename);
+
bool mayHaveProfileData(const BinaryFunction &BF) override;
/// Set Bolt Address Translation Table when processing samples collected in
@@ -147,6 +149,10 @@ class DataAggregator : public DataReader {
std::unordered_map<uint64_t, uint64_t> BasicSamples;
std::vector<PerfMemSample> MemSamples;
+ /// Perf.data or pre-aggregated inputs to aggregate and merge into this
+ /// reader.
+ std::vector<std::string> InputFilenames;
+
/// Filter pre-aggregated entries belonging to a DSO with this buildid.
/// Set when processing a shared library, empty implies main binary.
StringRef FilterBuildID;
@@ -394,6 +400,9 @@ class DataAggregator : public DataReader {
/// Parse this aggregator's input file.
void parseInput();
+ /// Merge parsed profile data from another aggregation job.
+ void mergeFrom(const DataAggregator &Other);
+
/// Mark binary functions covered by parsed profile data.
void markFunctionsWithProfile();
diff --git a/bolt/include/bolt/Utils/CommandLineOpts.h b/bolt/include/bolt/Utils/CommandLineOpts.h
index fcea952919f11..dc193477023d7 100644
--- a/bolt/include/bolt/Utils/CommandLineOpts.h
+++ b/bolt/include/bolt/Utils/CommandLineOpts.h
@@ -94,7 +94,7 @@ extern llvm::cl::opt<bool> HotText;
extern llvm::cl::opt<bool> Hugify;
extern llvm::cl::opt<bool> Instrument;
extern llvm::cl::opt<std::string> OutputFilename;
-extern llvm::cl::opt<std::string> PerfData;
+extern llvm::cl::list<std::string> PerfData;
extern llvm::cl::opt<bool> PrintCacheMetrics;
extern llvm::cl::opt<bool> PrintSections;
extern llvm::cl::opt<bool> UpdateBranchProtection;
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 33bfda160ae58..701054c325bf0 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -28,6 +28,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Parallel.h"
#include "llvm/Support/Process.h"
#include "llvm/Support/Program.h"
#include "llvm/Support/Regex.h"
@@ -114,6 +115,11 @@ MaxSamples("max-samples",
cl::Hidden,
cl::cat(AggregatorCategory));
+cl::opt<unsigned>
+ PerfDataJobs("perfdata-jobs",
+ cl::desc("number of perf data files to process in parallel"),
+ cl::init(4), cl::cat(AggregatorCategory),
+ cl::sub(cl::SubCommand::getAll()));
extern cl::opt<opts::ProfileFormatKind> ProfileFormat;
extern cl::opt<bool> ProfileWritePseudoProbes;
extern cl::opt<std::string> SaveProfile;
@@ -169,6 +175,27 @@ std::vector<SectionNameAndRange> getTextSections(const BinaryContext *BC) {
DataAggregator::~DataAggregator() { deleteTempFiles(); }
+void DataAggregator::addInputFile(StringRef Filename) {
+ InputFilenames.emplace_back(Filename);
+}
+
+void DataAggregator::mergeFrom(const DataAggregator &Other) {
+ Traces.insert(Traces.end(), Other.Traces.begin(), Other.Traces.end());
+
+ for (const auto &[PC, Count] : Other.BasicSamples)
+ BasicSamples[PC] += Count;
+
+ MemSamples.insert(MemSamples.end(), Other.MemSamples.begin(),
+ Other.MemSamples.end());
+ Returns.insert(Other.Returns.begin(), Other.Returns.end());
+ EventNames.insert(Other.EventNames.begin(), Other.EventNames.end());
+
+ NumTraces += Other.NumTraces;
+ NumInvalidTraces += Other.NumInvalidTraces;
+ NumLongRangeTraces += Other.NumLongRangeTraces;
+ NumTotalSamples += Other.NumTotalSamples;
+}
+
void DataAggregator::markFunctionsWithProfile() {
std::unordered_set<uint64_t> Samples;
std::unordered_set<BinaryFunction *> Funcs;
@@ -228,10 +255,6 @@ void DataAggregator::findPerfExecutable() {
void DataAggregator::start() {
outs() << "PERF2BOLT: Starting data aggregation job for " << Filename << "\n";
- // Turn on heatmap building if requested by --heatmap flag.
- if (!opts::HeatmapMode && opts::HeatmapOutput.getNumOccurrences())
- opts::HeatmapMode = opts::HeatmapModeKind::HM_Optional;
-
// Don't launch perf for pre-aggregated files or when perf input is specified
// by the user.
if (opts::ReadPreAggregated || !opts::ReadPerfEvents.empty())
@@ -707,6 +730,8 @@ void DataAggregator::imputeFallThroughs() {
}
void DataAggregator::parseInput() {
+ start();
+
if (opts::ReadPreAggregated)
parsePreAggregated();
else
@@ -714,9 +739,14 @@ void DataAggregator::parseInput() {
}
Error DataAggregator::preprocessProfile(BinaryContext &BC) {
+ // Turn on heatmap building if requested by --heatmap flag.
+ if (!opts::HeatmapMode && opts::HeatmapOutput.getNumOccurrences())
+ opts::HeatmapMode = opts::HeatmapModeKind::HM_Optional;
+
this->BC = &BC;
if (opts::GeneratePerfTextProfile) {
+ start();
if (Error E = generatePerfTextData()) {
deleteTempFiles();
exit(1);
@@ -724,7 +754,21 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {
exit(0);
}
- parseInput();
+ SmallVector<DataAggregator *, 1> Aggregators(1, this);
+ for (StringRef InputFilename : InputFilenames) {
+ auto *DA = Aggregators.emplace_back(new DataAggregator(InputFilename));
+ DA->BC = &BC;
+ }
+
+ ThreadPoolStrategy SavedStrategy = parallel::strategy;
+ parallel::strategy = hardware_concurrency(opts::PerfDataJobs);
+ parallelForEach(Aggregators, [](DataAggregator *DA) { DA->parseInput(); });
+ parallel::strategy = SavedStrategy;
+
+ for (DataAggregator *DA : llvm::drop_begin(Aggregators)) {
+ mergeFrom(*DA);
+ delete DA;
+ }
markFunctionsWithProfile();
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 4a2d83cbf8706..d47ea41a51780 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -473,6 +473,13 @@ Error RewriteInstance::setProfile(StringRef Filename) {
return errorCodeToError(make_error_code(errc::no_such_file_or_directory));
if (ProfileReader) {
+ if (DataAggregator::checkPerfDataMagic(Filename) &&
+ // Poor man's RTTI
+ ProfileReader->getReaderName() == StringRef("perf data aggregator")) {
+ static_cast<DataAggregator *>(ProfileReader.get())
+ ->addInputFile(Filename);
+ return Error::success();
+ }
// Already exists
return make_error<StringError>(Twine("multiple profiles specified: ") +
ProfileReader->getFilename() + " and " +
diff --git a/bolt/lib/Utils/CommandLineOpts.cpp b/bolt/lib/Utils/CommandLineOpts.cpp
index 49f9ee7403c50..cbd0be4a806ae 100644
--- a/bolt/lib/Utils/CommandLineOpts.cpp
+++ b/bolt/lib/Utils/CommandLineOpts.cpp
@@ -243,15 +243,14 @@ OutputFilename("o",
cl::Optional,
cl::cat(BoltOutputCategory));
-cl::opt<std::string> PerfData("perfdata", cl::desc("<data file>"), cl::Optional,
- cl::cat(AggregatorCategory),
- cl::sub(cl::SubCommand::getAll()));
-
-static cl::alias
-PerfDataA("p",
- cl::desc("alias for -perfdata"),
- cl::aliasopt(PerfData),
- cl::cat(AggregatorCategory));
+cl::list<std::string> PerfData("perfdata", cl::CommaSeparated,
+ cl::desc("<data file>"),
+ cl::cat(AggregatorCategory),
+ cl::sub(cl::SubCommand::getAll()));
+
+static cl::alias PerfDataA("p", cl::CommaSeparated,
+ cl::desc("alias for -perfdata"),
+ cl::aliasopt(PerfData), cl::cat(AggregatorCategory));
cl::opt<bool> PrintCacheMetrics(
"print-cache-metrics",
diff --git a/bolt/test/X86/pre-aggregated-perf.test b/bolt/test/X86/pre-aggregated-perf.test
index 6951d09db3de6..50a0d6f9d6f71 100644
--- a/bolt/test/X86/pre-aggregated-perf.test
+++ b/bolt/test/X86/pre-aggregated-perf.test
@@ -33,8 +33,13 @@ CHECK-WARNING: BOLT-INFO: Functions with density >= 21.7 account for 97.00% tota
RUN: llvm-bolt %t.exe -data %t -o %t.null | FileCheck %s
RUN: llvm-bolt %t.exe -data %t.new -o %t.null | FileCheck %s
RUN: llvm-bolt %t.exe -p %p/Inputs/pre-aggregated.txt --pa -o %t.null | FileCheck %s
+RUN: llvm-bolt %t.exe --pa -perfdata %p/Inputs/pre-aggregated.txt -perfdata %p/Inputs/pre-aggregated.txt -o %t.multi-perfdata.null | FileCheck %s --check-prefix=CHECK-MULTI
+RUN: llvm-bolt %t.exe --pa -p %p/Inputs/pre-aggregated.txt,%p/Inputs/pre-aggregated.txt -o %t.multi-perfdata-comma.null | FileCheck %s --check-prefix=CHECK-MULTI
+RUN: not llvm-bolt %t.exe --pa -p %p/Inputs/pre-aggregated.txt -p %t.missing -o %t.multi-perfdata-missing.null 2>&1 | FileCheck %s --check-prefix=CHECK-MISSING
CHECK: BOLT-INFO: 4 out of 7 functions in the binary (57.1%) have non-empty execution profile
+CHECK-MULTI: BOLT-INFO: 4 out of 7 functions in the binary (57.1%) have non-empty execution profile
+CHECK-MISSING: {{.*}}missing': No such file or directory.
RUN: FileCheck %s -check-prefix=PERF2BOLT --input-file %t
RUN: FileCheck %s -check-prefix=NEWFORMAT --input-file %t.new
diff --git a/bolt/test/perf2bolt/perf_test.test b/bolt/test/perf2bolt/perf_test.test
index 984432716992a..e34ac76632113 100644
--- a/bolt/test/perf2bolt/perf_test.test
+++ b/bolt/test/perf2bolt/perf_test.test
@@ -7,11 +7,22 @@ RUN: perf record -Fmax -e cycles:u -o %t2 -- %t
RUN: perf2bolt %t -p=%t2 -o %t3 -ba -ignore-build-id --show-density \
RUN: --heatmap %t.hm 2>&1 | FileCheck %s
RUN: FileCheck %s --input-file %t.hm-section-hotness.csv --check-prefix=CHECK-HM
+# Multiple perf.data files as input
+RUN: perf2bolt %t -p=%t2 -p %t2 -o %t3.multi -ba -ignore-build-id \
+RUN: | FileCheck %s --check-prefix=CHECK-MULTI
+RUN: perf2bolt %t -p=%t2,%t2 -o %t3.comma -ba -ignore-build-id \
+RUN: | FileCheck %s --check-prefix=CHECK-MULTI
+RUN: cmp %t3.multi %t3.comma
+# Check counts are 2x the original
+RUN: merge-fdata %t3 %t3 | sort > %t3.x2
+RUN: sort %t3.multi > %t3.multi.x2
+RUN: cmp %t3.x2 %t3.multi.x2
CHECK-NOT: PERF2BOLT-ERROR
CHECK-NOT: !! WARNING !! This high mismatch ratio indicates the input binary is probably not the same binary used during profiling collection.
CHECK: HEATMAP: building heat map
CHECK: BOLT-INFO: Functions with density >= {{.*}} account for 99.00% total sample counts.
+CHECK-MULTI: BOLT-INFO
RUN: %clang %S/Inputs/perf_test.c -no-pie -fuse-ld=lld -o %t4
RUN: perf record -Fmax -e cycles:u -o %t5 -- %t4
diff --git a/bolt/tools/driver/llvm-bolt.cpp b/bolt/tools/driver/llvm-bolt.cpp
index 2cd5151bed49a..cc8e6d70001eb 100644
--- a/bolt/tools/driver/llvm-bolt.cpp
+++ b/bolt/tools/driver/llvm-bolt.cpp
@@ -234,8 +234,9 @@ int main(int argc, char **argv) {
}
if (!opts::PerfData.empty()) {
- if (Error E = RI.setProfile(opts::PerfData))
- report_error(opts::PerfData, std::move(E));
+ for (StringRef Filename : opts::PerfData)
+ if (Error E = RI.setProfile(Filename))
+ report_error(Filename, std::move(E));
} else if (opts::AggregateOnly) {
errs() << ToolName << ": missing required -perfdata option.\n";
exit(1);
diff --git a/bolt/tools/heatmap/heatmap.cpp b/bolt/tools/heatmap/heatmap.cpp
index 17a969e0c8598..682eb60779025 100644
--- a/bolt/tools/heatmap/heatmap.cpp
+++ b/bolt/tools/heatmap/heatmap.cpp
@@ -121,8 +121,9 @@ int main(int argc, char **argv) {
report_error("RewriteInstance", std::move(E));
RewriteInstance &RI = *RIOrErr.get();
- if (Error E = RI.setProfile(opts::PerfData))
- report_error(opts::PerfData, std::move(E));
+ for (StringRef Filename : opts::PerfData)
+ if (Error E = RI.setProfile(Filename))
+ report_error(Filename, std::move(E));
if (Error E = RI.run())
report_error(opts::InputFilename, std::move(E));
``````````
</details>
https://github.com/llvm/llvm-project/pull/199324
More information about the llvm-branch-commits
mailing list