[llvm] [BOLT][Perf2bolt] Add support to generate pre-parsed perf data (PR #171144)

via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 10 09:28:52 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-bolt

Author: Ádám Kallai (kaadam)

<details>
<summary>Changes</summary>

Adding a generator into Perf2bolt is the initial step to support the large end-to-end tests. This functionality proves unified format of pre-parsed profile that Perf2bolt is able to consume.

The generator relies on the aggregator work to spawn the required perf-script jobs based on the the aggregation type, and merges the results of the pref-script jobs into a single file.
This hybrid profile will contain all required events such as BuildID, MMAP, TASK, BRSTACK, or MEM event for the aggregation.
The generator also creates a file header, where these events are listed along with the length information of their contents.
It helps Perf2bolt to read and parse this type of the profile easier.

You can see two examples below how to generate a pre-parsed perf data as an input for ARM SPE aggregation:

 `perf2bolt -p perf.data -o perf.text --spe --generate-perf-text-data`

Or for basic aggregation:

 `perf2bolt -p perf.data -o perf.text --ba --generate-perf-text-data`

The results place into the perf.text output file.

---
Full diff: https://github.com/llvm/llvm-project/pull/171144.diff


2 Files Affected:

- (modified) bolt/include/bolt/Profile/DataAggregator.h (+71) 
- (modified) bolt/lib/Profile/DataAggregator.cpp (+70-3) 


``````````diff
diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h
index db0f6903185b7..ad965a39402a9 100644
--- a/bolt/include/bolt/Profile/DataAggregator.h
+++ b/bolt/include/bolt/Profile/DataAggregator.h
@@ -149,8 +149,24 @@ class DataAggregator : public DataReader {
   /// Perf utility full path name
   std::string PerfPath;
 
+  enum PerfProcessType {
+    BUILDIDS = 0,
+    MAIN_EVENTS,
+    MEN_EVENTS,
+    MMAP_EVENTS,
+    TASK_EVENTS
+  };
+  friend raw_ostream &operator<<(raw_ostream &OS, const PerfProcessType &T);
+
   /// Perf process spawning bookkeeping
   struct PerfProcessInfo {
+    static constexpr StringLiteral BuildIDEventStr = "BUILDIDS";
+    static constexpr StringLiteral MainEventStr = "MAIN";
+    static constexpr StringLiteral MemEventStr = "MEM";
+    static constexpr StringLiteral MMapEventStr = "MMAP";
+    static constexpr StringLiteral TaskEventsStr = "TASK";
+
+    enum PerfProcessType Type;
     bool IsFinished{false};
     sys::ProcessInfo PI;
     SmallVector<char, 256> StdoutPath;
@@ -158,6 +174,7 @@ class DataAggregator : public DataReader {
   };
 
   /// Process info for spawned processes
+  PerfProcessInfo BuildIDProcessInfo;
   PerfProcessInfo MainEventsPPI;
   PerfProcessInfo MemEventsPPI;
   PerfProcessInfo MMapEventsPPI;
@@ -238,6 +255,9 @@ class DataAggregator : public DataReader {
   /// parsing.
   void launchPerfProcess(StringRef Name, PerfProcessInfo &PPI, StringRef Args);
 
+  /// Helps to generate pre-parsed perf text profile.
+  uint64_t getFileSize(const StringRef File);
+
   /// Delete all temporary files created to hold the output generated by spawned
   /// subprocesses during the aggregation job
   void deleteTempFiles();
@@ -444,6 +464,35 @@ class DataAggregator : public DataReader {
   /// an external tool.
   std::error_code parsePreAggregatedLBRSamples();
 
+  /// Dump pre-parsed perf profile data into a single file.
+  /// The generator relies on the aggregator work to spawn the required
+  /// perf-script jobs based on the the aggregation type, and merges
+  /// the results of the pref-script jobs into a single file.
+  /// This hybrid profile contains all required events such as BuildID,
+  /// MMAP, TASK, Branch/BrStack, or Memory for the aggregation.
+  /// The generator also creates a file header, where these events
+  /// are listed along with the length information of their contents.
+  /// This is how a pre-parsed profile data looks like for Basic Aggregation:
+  ///
+  /// perf2bolt -p perf.data -o perf.text --ba --generate-perf-text-data
+  ///
+  /// PERFTEXT BUILDIDS=55;MMAP=2523121;MAIN=6426;TASK=352203;
+  /// 68c3da33ca43d5a74d501b5ea0012f782e04096e /example/bin1
+  /// c3a8496f2347b468a54a21072dc6cde7f0d88c6c /example/bin2
+  /// ...
+  /// bin1   20470 ... PERF_RECORD_MMAP2 20470/20470: ... r-xp /example/bin1
+  /// bin1   20470 ... PERF_RECORD_MMAP2 20470/20470: ... r-xp [vdso]
+  /// ...
+  /// bin1   20470 ... PERF_RECORD_COMM exec: bin1:20470/20470
+  /// bin1   20470 ... PERF_RECORD_EXIT(20470:20470):(20469:20469)
+  /// ...
+  /// 20470 branch: ffffffd1a4764d04 ffffffd1a4764cfc
+  /// 20470 branch: ffffffd1a44777f4 ffffffd1a4fc8af0
+  /// 20470 branch: ffffffd1a477cd14 ffffffd1a477cd00
+  /// 20470 branch: ffffffd1a4400f58 ffffffd1a4400f7c
+  /// ...
+  void generatePerfTextData();
+
   /// If \p Address falls into the binary address space based on memory
   /// mapping info \p MMI, then adjust it for further processing by subtracting
   /// the base load address. External addresses, i.e. addresses that do not
@@ -594,6 +643,28 @@ inline raw_ostream &operator<<(raw_ostream &OS,
     OS << " ... " << Twine::utohexstr(T.To);
   return OS;
 }
+
+inline raw_ostream &operator<<(raw_ostream &OS,
+                               const DataAggregator::PerfProcessType &T) {
+  switch (T) {
+  case DataAggregator::PerfProcessType::BUILDIDS:
+    OS << DataAggregator::PerfProcessInfo::BuildIDEventStr;
+    break;
+  case DataAggregator::PerfProcessType::MAIN_EVENTS:
+    OS << DataAggregator::PerfProcessInfo::MainEventStr;
+    break;
+  case DataAggregator::PerfProcessType::MEN_EVENTS:
+    OS << DataAggregator::PerfProcessInfo::MemEventStr;
+    break;
+  case DataAggregator::PerfProcessType::MMAP_EVENTS:
+    OS << DataAggregator::PerfProcessInfo::MMapEventStr;
+    break;
+  case DataAggregator::PerfProcessType::TASK_EVENTS:
+    OS << DataAggregator::PerfProcessInfo::TaskEventsStr;
+    break;
+  }
+  return OS;
+}
 } // namespace bolt
 } // namespace llvm
 
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 4e062038a3e4c..bb8bc62a8fae0 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -127,6 +127,11 @@ cl::opt<std::string>
                             "perf-script output in a textual format"),
                    cl::ReallyHidden, cl::init(""), cl::cat(AggregatorCategory));
 
+cl::opt<bool> GeneratePerfTextProfile(
+    "generate-perf-text-data",
+    cl::desc("Dump perf-script jobs' output into output file"), cl::Hidden,
+    cl::cat(AggregatorCategory));
+
 static cl::opt<bool>
 TimeAggregator("time-aggr",
   cl::desc("time BOLT aggregator"),
@@ -141,6 +146,8 @@ namespace {
 const char TimerGroupName[] = "aggregator";
 const char TimerGroupDesc[] = "Aggregator";
 
+constexpr const StringLiteral PerfTextMagicStr = "PERFTEXT";
+
 std::vector<SectionNameAndRange> getTextSections(const BinaryContext *BC) {
   std::vector<SectionNameAndRange> sections;
   for (BinarySection &Section : BC->sections()) {
@@ -171,6 +178,17 @@ void deleteTempFile(const std::string &FileName) {
 }
 }
 
+uint64_t DataAggregator::getFileSize(const StringRef File) {
+  uint64_t Size;
+  std::error_code EC = sys::fs::file_size(File, Size);
+  if (EC) {
+    errs() << "unable to obtain file size: " << EC.message() << "\n";
+    deleteTempFiles();
+    exit(1);
+  }
+  return Size;
+}
+
 void DataAggregator::deleteTempFiles() {
   for (std::string &FileName : TempFiles)
     deleteTempFile(FileName);
@@ -235,6 +253,8 @@ void DataAggregator::start() {
 
   launchPerfProcess("task events", TaskEventsPPI,
                     "script --show-task-events --no-itrace");
+
+  launchPerfProcess("buildid list", BuildIDProcessInfo, "buildid-list");
 }
 
 void DataAggregator::abort() {
@@ -305,8 +325,6 @@ void DataAggregator::processFileBuildID(StringRef FileBuildID) {
     errs() << "PERF-ERROR: return code " << ReturnCode << "\n" << ErrBuf;
   };
 
-  PerfProcessInfo BuildIDProcessInfo;
-  launchPerfProcess("buildid list", BuildIDProcessInfo, "buildid-list");
   if (prepareToParse("buildid", BuildIDProcessInfo, WarningCallback))
     return;
 
@@ -384,6 +402,53 @@ void DataAggregator::parsePreAggregated() {
   }
 }
 
+void DataAggregator::generatePerfTextData() {
+  std::error_code EC;
+  raw_fd_ostream OutFile(opts::OutputFilename, EC, sys::fs::OpenFlags::OF_None);
+  if (EC) {
+    errs() << "error opening output file: " << EC.message() << "\n";
+    deleteTempFiles();
+    exit(1);
+  }
+
+  SmallVector<PerfProcessInfo *, 5> ProcessInfos = {
+      &BuildIDProcessInfo, &MMapEventsPPI, &MainEventsPPI, &TaskEventsPPI};
+  if (opts::ParseMemProfile)
+    ProcessInfos.push_back(&MemEventsPPI);
+
+  // Create a file header as a table of the contents
+  // PERFTEXT;EVENT1={$SIZE};EVENT2={$SIZE}...
+  OutFile << PerfTextMagicStr << ";";
+  for (const auto PPI : ProcessInfos) {
+    std::string Error;
+    sys::Wait(PPI->PI, std::nullopt, &Error);
+    if (!Error.empty()) {
+      errs() << "PERF-ERROR: " << PerfPath << ": " << Error << "\n";
+      deleteTempFiles();
+      exit(1);
+    }
+    uint64_t FS = getFileSize(PPI->StdoutPath.data());
+    OutFile << PPI->Type << "=" << FS << ";";
+  }
+  OutFile << "\n";
+
+  // Merge all perf-scripts jobs' output into the single OutputFile
+  for (const auto PPI : ProcessInfos) {
+    ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
+        MemoryBuffer::getFileOrSTDIN(PPI->StdoutPath.data());
+    if (std::error_code EC = MB.getError()) {
+      errs() << "Cannot open " << PPI->StdoutPath.data() << ": " << EC.message()
+             << "\n";
+      deleteTempFiles();
+      exit(1);
+    }
+    OutFile << (*MB)->getBuffer();
+  }
+  OutFile.close();
+  deleteTempFiles();
+  exit(0);
+}
+
 void DataAggregator::filterBinaryMMapInfo() {
   if (opts::FilterPID) {
     auto MMapInfoIter = BinaryMMapInfo.find(opts::FilterPID);
@@ -596,7 +661,9 @@ void DataAggregator::imputeFallThroughs() {
 Error DataAggregator::preprocessProfile(BinaryContext &BC) {
   this->BC = &BC;
 
-  if (opts::ReadPreAggregated) {
+  if (opts::GeneratePerfTextProfile) {
+    generatePerfTextData();
+  } else if (opts::ReadPreAggregated) {
     parsePreAggregated();
   } else {
     parsePerfData(BC);

``````````

</details>


https://github.com/llvm/llvm-project/pull/171144


More information about the llvm-commits mailing list