[llvm-branch-commits] [llvm] [BOLT] Support multiple perf data inputs (PR #199324)

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Fri May 22 22:12:11 PDT 2026


llvmorg-github-actions[bot] wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-bolt

Author: Amir Ayupov (aaupov)

<details>
<summary>Changes</summary>

Allow -p/-perfdata to accept multiple files as comma-separated and
repeated arguments. Process them with `-perfdata-jobs` (default 4).
Keep YAML/DataReader inputs single-profile only.

Test Plan: Updated pre-aggregated-perf.test and perf_test


---
Full diff: https://github.com/llvm/llvm-project/pull/199324.diff


9 Files Affected:

- (modified) bolt/include/bolt/Profile/DataAggregator.h (+12-3) 
- (modified) bolt/include/bolt/Utils/CommandLineOpts.h (+1-1) 
- (modified) bolt/lib/Profile/DataAggregator.cpp (+49-5) 
- (modified) bolt/lib/Rewrite/RewriteInstance.cpp (+7) 
- (modified) bolt/lib/Utils/CommandLineOpts.cpp (+8-9) 
- (modified) bolt/test/X86/pre-aggregated-perf.test (+5) 
- (modified) bolt/test/perf2bolt/perf_test.test (+11) 
- (modified) bolt/tools/driver/llvm-bolt.cpp (+3-2) 
- (modified) bolt/tools/heatmap/heatmap.cpp (+3-2) 


``````````diff
diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h
index 5f63087ec7409..c5133e90d07a6 100644
--- a/bolt/include/bolt/Profile/DataAggregator.h
+++ b/bolt/include/bolt/Profile/DataAggregator.h
@@ -51,9 +51,7 @@ class BoltAddressTranslation;
 /// specified by the user.
 class DataAggregator : public DataReader {
 public:
-  explicit DataAggregator(StringRef Filename) : DataReader(Filename) {
-    start();
-  }
+  explicit DataAggregator(StringRef Filename) : DataReader(Filename) {}
 
   ~DataAggregator();
 
@@ -69,6 +67,10 @@ class DataAggregator : public DataReader {
 
   Error readProfile(BinaryContext &BC) override;
 
+  /// Add an additional perf.data or pre-aggregated profile input to be merged
+  /// into this aggregation job.
+  void addInputFile(StringRef Filename);
+
   bool mayHaveProfileData(const BinaryFunction &BF) override;
 
   /// Set Bolt Address Translation Table when processing samples collected in
@@ -147,6 +149,10 @@ class DataAggregator : public DataReader {
   std::unordered_map<uint64_t, uint64_t> BasicSamples;
   std::vector<PerfMemSample> MemSamples;
 
+  /// Perf.data or pre-aggregated inputs to aggregate and merge into this
+  /// reader.
+  std::vector<std::string> InputFilenames;
+
   /// Filter pre-aggregated entries belonging to a DSO with this buildid.
   /// Set when processing a shared library, empty implies main binary.
   StringRef FilterBuildID;
@@ -394,6 +400,9 @@ class DataAggregator : public DataReader {
   /// Parse this aggregator's input file.
   void parseInput();
 
+  /// Merge parsed profile data from another aggregation job.
+  void mergeFrom(const DataAggregator &Other);
+
   /// Mark binary functions covered by parsed profile data.
   void markFunctionsWithProfile();
 
diff --git a/bolt/include/bolt/Utils/CommandLineOpts.h b/bolt/include/bolt/Utils/CommandLineOpts.h
index fcea952919f11..dc193477023d7 100644
--- a/bolt/include/bolt/Utils/CommandLineOpts.h
+++ b/bolt/include/bolt/Utils/CommandLineOpts.h
@@ -94,7 +94,7 @@ extern llvm::cl::opt<bool> HotText;
 extern llvm::cl::opt<bool> Hugify;
 extern llvm::cl::opt<bool> Instrument;
 extern llvm::cl::opt<std::string> OutputFilename;
-extern llvm::cl::opt<std::string> PerfData;
+extern llvm::cl::list<std::string> PerfData;
 extern llvm::cl::opt<bool> PrintCacheMetrics;
 extern llvm::cl::opt<bool> PrintSections;
 extern llvm::cl::opt<bool> UpdateBranchProtection;
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 33bfda160ae58..701054c325bf0 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -28,6 +28,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Errc.h"
 #include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Parallel.h"
 #include "llvm/Support/Process.h"
 #include "llvm/Support/Program.h"
 #include "llvm/Support/Regex.h"
@@ -114,6 +115,11 @@ MaxSamples("max-samples",
   cl::Hidden,
   cl::cat(AggregatorCategory));
 
+cl::opt<unsigned>
+    PerfDataJobs("perfdata-jobs",
+                 cl::desc("number of perf data files to process in parallel"),
+                 cl::init(4), cl::cat(AggregatorCategory),
+                 cl::sub(cl::SubCommand::getAll()));
 extern cl::opt<opts::ProfileFormatKind> ProfileFormat;
 extern cl::opt<bool> ProfileWritePseudoProbes;
 extern cl::opt<std::string> SaveProfile;
@@ -169,6 +175,27 @@ std::vector<SectionNameAndRange> getTextSections(const BinaryContext *BC) {
 
 DataAggregator::~DataAggregator() { deleteTempFiles(); }
 
+void DataAggregator::addInputFile(StringRef Filename) {
+  InputFilenames.emplace_back(Filename);
+}
+
+void DataAggregator::mergeFrom(const DataAggregator &Other) {
+  Traces.insert(Traces.end(), Other.Traces.begin(), Other.Traces.end());
+
+  for (const auto &[PC, Count] : Other.BasicSamples)
+    BasicSamples[PC] += Count;
+
+  MemSamples.insert(MemSamples.end(), Other.MemSamples.begin(),
+                    Other.MemSamples.end());
+  Returns.insert(Other.Returns.begin(), Other.Returns.end());
+  EventNames.insert(Other.EventNames.begin(), Other.EventNames.end());
+
+  NumTraces += Other.NumTraces;
+  NumInvalidTraces += Other.NumInvalidTraces;
+  NumLongRangeTraces += Other.NumLongRangeTraces;
+  NumTotalSamples += Other.NumTotalSamples;
+}
+
 void DataAggregator::markFunctionsWithProfile() {
   std::unordered_set<uint64_t> Samples;
   std::unordered_set<BinaryFunction *> Funcs;
@@ -228,10 +255,6 @@ void DataAggregator::findPerfExecutable() {
 void DataAggregator::start() {
   outs() << "PERF2BOLT: Starting data aggregation job for " << Filename << "\n";
 
-  // Turn on heatmap building if requested by --heatmap flag.
-  if (!opts::HeatmapMode && opts::HeatmapOutput.getNumOccurrences())
-    opts::HeatmapMode = opts::HeatmapModeKind::HM_Optional;
-
   // Don't launch perf for pre-aggregated files or when perf input is specified
   // by the user.
   if (opts::ReadPreAggregated || !opts::ReadPerfEvents.empty())
@@ -707,6 +730,8 @@ void DataAggregator::imputeFallThroughs() {
 }
 
 void DataAggregator::parseInput() {
+  start();
+
   if (opts::ReadPreAggregated)
     parsePreAggregated();
   else
@@ -714,9 +739,14 @@ void DataAggregator::parseInput() {
 }
 
 Error DataAggregator::preprocessProfile(BinaryContext &BC) {
+  // Turn on heatmap building if requested by --heatmap flag.
+  if (!opts::HeatmapMode && opts::HeatmapOutput.getNumOccurrences())
+    opts::HeatmapMode = opts::HeatmapModeKind::HM_Optional;
+
   this->BC = &BC;
 
   if (opts::GeneratePerfTextProfile) {
+    start();
     if (Error E = generatePerfTextData()) {
       deleteTempFiles();
       exit(1);
@@ -724,7 +754,21 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {
     exit(0);
   }
 
-  parseInput();
+  SmallVector<DataAggregator *, 1> Aggregators(1, this);
+  for (StringRef InputFilename : InputFilenames) {
+    auto *DA = Aggregators.emplace_back(new DataAggregator(InputFilename));
+    DA->BC = &BC;
+  }
+
+  ThreadPoolStrategy SavedStrategy = parallel::strategy;
+  parallel::strategy = hardware_concurrency(opts::PerfDataJobs);
+  parallelForEach(Aggregators, [](DataAggregator *DA) { DA->parseInput(); });
+  parallel::strategy = SavedStrategy;
+
+  for (DataAggregator *DA : llvm::drop_begin(Aggregators)) {
+    mergeFrom(*DA);
+    delete DA;
+  }
 
   markFunctionsWithProfile();
 
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 4a2d83cbf8706..d47ea41a51780 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -473,6 +473,13 @@ Error RewriteInstance::setProfile(StringRef Filename) {
     return errorCodeToError(make_error_code(errc::no_such_file_or_directory));
 
   if (ProfileReader) {
+    if (DataAggregator::checkPerfDataMagic(Filename) &&
+        // Poor man's RTTI
+        ProfileReader->getReaderName() == StringRef("perf data aggregator")) {
+      static_cast<DataAggregator *>(ProfileReader.get())
+          ->addInputFile(Filename);
+      return Error::success();
+    }
     // Already exists
     return make_error<StringError>(Twine("multiple profiles specified: ") +
                                        ProfileReader->getFilename() + " and " +
diff --git a/bolt/lib/Utils/CommandLineOpts.cpp b/bolt/lib/Utils/CommandLineOpts.cpp
index 49f9ee7403c50..cbd0be4a806ae 100644
--- a/bolt/lib/Utils/CommandLineOpts.cpp
+++ b/bolt/lib/Utils/CommandLineOpts.cpp
@@ -243,15 +243,14 @@ OutputFilename("o",
   cl::Optional,
   cl::cat(BoltOutputCategory));
 
-cl::opt<std::string> PerfData("perfdata", cl::desc("<data file>"), cl::Optional,
-                              cl::cat(AggregatorCategory),
-                              cl::sub(cl::SubCommand::getAll()));
-
-static cl::alias
-PerfDataA("p",
-  cl::desc("alias for -perfdata"),
-  cl::aliasopt(PerfData),
-  cl::cat(AggregatorCategory));
+cl::list<std::string> PerfData("perfdata", cl::CommaSeparated,
+                               cl::desc("<data file>"),
+                               cl::cat(AggregatorCategory),
+                               cl::sub(cl::SubCommand::getAll()));
+
+static cl::alias PerfDataA("p", cl::CommaSeparated,
+                           cl::desc("alias for -perfdata"),
+                           cl::aliasopt(PerfData), cl::cat(AggregatorCategory));
 
 cl::opt<bool> PrintCacheMetrics(
     "print-cache-metrics",
diff --git a/bolt/test/X86/pre-aggregated-perf.test b/bolt/test/X86/pre-aggregated-perf.test
index 6951d09db3de6..50a0d6f9d6f71 100644
--- a/bolt/test/X86/pre-aggregated-perf.test
+++ b/bolt/test/X86/pre-aggregated-perf.test
@@ -33,8 +33,13 @@ CHECK-WARNING: BOLT-INFO: Functions with density >= 21.7 account for 97.00% tota
 RUN: llvm-bolt %t.exe -data %t -o %t.null | FileCheck %s
 RUN: llvm-bolt %t.exe -data %t.new -o %t.null | FileCheck %s
 RUN: llvm-bolt %t.exe -p %p/Inputs/pre-aggregated.txt --pa -o %t.null | FileCheck %s
+RUN: llvm-bolt %t.exe --pa -perfdata %p/Inputs/pre-aggregated.txt -perfdata %p/Inputs/pre-aggregated.txt -o %t.multi-perfdata.null | FileCheck %s --check-prefix=CHECK-MULTI
+RUN: llvm-bolt %t.exe --pa -p %p/Inputs/pre-aggregated.txt,%p/Inputs/pre-aggregated.txt -o %t.multi-perfdata-comma.null | FileCheck %s --check-prefix=CHECK-MULTI
+RUN: not llvm-bolt %t.exe --pa -p %p/Inputs/pre-aggregated.txt -p %t.missing -o %t.multi-perfdata-missing.null 2>&1 | FileCheck %s --check-prefix=CHECK-MISSING
 
 CHECK: BOLT-INFO: 4 out of 7 functions in the binary (57.1%) have non-empty execution profile
+CHECK-MULTI: BOLT-INFO: 4 out of 7 functions in the binary (57.1%) have non-empty execution profile
+CHECK-MISSING: {{.*}}missing': No such file or directory.
 
 RUN: FileCheck %s -check-prefix=PERF2BOLT --input-file %t
 RUN: FileCheck %s -check-prefix=NEWFORMAT --input-file %t.new
diff --git a/bolt/test/perf2bolt/perf_test.test b/bolt/test/perf2bolt/perf_test.test
index 984432716992a..e34ac76632113 100644
--- a/bolt/test/perf2bolt/perf_test.test
+++ b/bolt/test/perf2bolt/perf_test.test
@@ -7,11 +7,22 @@ RUN: perf record -Fmax -e cycles:u -o %t2 -- %t
 RUN: perf2bolt %t -p=%t2 -o %t3 -ba -ignore-build-id --show-density \
 RUN:   --heatmap %t.hm 2>&1 | FileCheck %s
 RUN: FileCheck %s --input-file %t.hm-section-hotness.csv --check-prefix=CHECK-HM
+# Multiple perf.data files as input
+RUN: perf2bolt %t -p=%t2 -p %t2 -o %t3.multi -ba -ignore-build-id \
+RUN:   | FileCheck %s --check-prefix=CHECK-MULTI
+RUN: perf2bolt %t -p=%t2,%t2 -o %t3.comma -ba -ignore-build-id \
+RUN:   | FileCheck %s --check-prefix=CHECK-MULTI
+RUN: cmp %t3.multi %t3.comma
+# Check counts are 2x the original
+RUN: merge-fdata %t3 %t3 | sort > %t3.x2
+RUN: sort %t3.multi > %t3.multi.x2
+RUN: cmp %t3.x2 %t3.multi.x2
 
 CHECK-NOT: PERF2BOLT-ERROR
 CHECK-NOT: !! WARNING !! This high mismatch ratio indicates the input binary is probably not the same binary used during profiling collection.
 CHECK: HEATMAP: building heat map
 CHECK: BOLT-INFO: Functions with density >= {{.*}} account for 99.00% total sample counts.
+CHECK-MULTI: BOLT-INFO
 
 RUN: %clang %S/Inputs/perf_test.c -no-pie -fuse-ld=lld -o %t4
 RUN: perf record -Fmax -e cycles:u -o %t5 -- %t4
diff --git a/bolt/tools/driver/llvm-bolt.cpp b/bolt/tools/driver/llvm-bolt.cpp
index 2cd5151bed49a..cc8e6d70001eb 100644
--- a/bolt/tools/driver/llvm-bolt.cpp
+++ b/bolt/tools/driver/llvm-bolt.cpp
@@ -234,8 +234,9 @@ int main(int argc, char **argv) {
       }
 
       if (!opts::PerfData.empty()) {
-        if (Error E = RI.setProfile(opts::PerfData))
-          report_error(opts::PerfData, std::move(E));
+        for (StringRef Filename : opts::PerfData)
+          if (Error E = RI.setProfile(Filename))
+            report_error(Filename, std::move(E));
       } else if (opts::AggregateOnly) {
         errs() << ToolName << ": missing required -perfdata option.\n";
         exit(1);
diff --git a/bolt/tools/heatmap/heatmap.cpp b/bolt/tools/heatmap/heatmap.cpp
index 17a969e0c8598..682eb60779025 100644
--- a/bolt/tools/heatmap/heatmap.cpp
+++ b/bolt/tools/heatmap/heatmap.cpp
@@ -121,8 +121,9 @@ int main(int argc, char **argv) {
       report_error("RewriteInstance", std::move(E));
 
     RewriteInstance &RI = *RIOrErr.get();
-    if (Error E = RI.setProfile(opts::PerfData))
-      report_error(opts::PerfData, std::move(E));
+    for (StringRef Filename : opts::PerfData)
+      if (Error E = RI.setProfile(Filename))
+        report_error(Filename, std::move(E));
 
     if (Error E = RI.run())
       report_error(opts::InputFilename, std::move(E));

``````````

</details>


https://github.com/llvm/llvm-project/pull/199324


More information about the llvm-branch-commits mailing list