[llvm] [BOLT] Drop parsing sample PC when processing perf data with LBR (PR #123420)

Amir Ayupov via llvm-commits llvm-commits at lists.llvm.org
Fri Jan 17 15:10:25 PST 2025


https://github.com/aaupov created https://github.com/llvm/llvm-project/pull/123420

Remove options to generate autofdo data (unused) and `use-event-pc`
(not beneficial).

For 80MB perf.data, makes perf script parsing 2.8±0.8x faster.


>From 9af0e9a9d4d540aa885f311a8d77cdbb43cfabd4 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Fri, 17 Jan 2025 15:10:14 -0800
Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?=
 =?UTF-8?q?l=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.4
---
 bolt/include/bolt/Profile/DataAggregator.h |   4 -
 bolt/lib/Profile/DataAggregator.cpp        | 117 +++------------------
 2 files changed, 14 insertions(+), 107 deletions(-)

diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h
index 320623cfa15af1..aa83d7f9b13ab5 100644
--- a/bolt/include/bolt/Profile/DataAggregator.h
+++ b/bolt/include/bolt/Profile/DataAggregator.h
@@ -80,7 +80,6 @@ class DataAggregator : public DataReader {
 private:
   struct PerfBranchSample {
     SmallVector<LBREntry, 32> LBR;
-    uint64_t PC;
   };
 
   struct PerfBasicSample {
@@ -334,9 +333,6 @@ class DataAggregator : public DataReader {
   /// Process all branch events.
   void processBranchEvents();
 
-  /// This member function supports generating data for AutoFDO LLVM tools.
-  std::error_code writeAutoFDOData(StringRef OutputFilename);
-
   /// Parse the full output generated by perf script to report non-LBR samples.
   std::error_code parseBasicEvents();
 
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 2b02086e3e0c99..a5252e18a0fdd6 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -108,15 +108,6 @@ TimeAggregator("time-aggr",
   cl::ZeroOrMore,
   cl::cat(AggregatorCategory));
 
-static cl::opt<bool>
-    UseEventPC("use-event-pc",
-               cl::desc("use event PC in combination with LBR sampling"),
-               cl::cat(AggregatorCategory));
-
-static cl::opt<bool> WriteAutoFDOData(
-    "autofdo", cl::desc("generate autofdo textual data instead of bolt data"),
-    cl::cat(AggregatorCategory));
-
 } // namespace opts
 
 namespace {
@@ -185,16 +176,16 @@ void DataAggregator::start() {
                       MainEventsPPI,
                       "script -F pid,event,ip",
                       /*Wait = */false);
-  } else if (!opts::ITraceAggregation.empty()) {
-    std::string ItracePerfScriptArgs = llvm::formatv(
-        "script -F pid,ip,brstack --itrace={0}", opts::ITraceAggregation);
-    launchPerfProcess("branch events with itrace", MainEventsPPI,
-                      ItracePerfScriptArgs.c_str(),
-                      /*Wait = */ false);
   } else {
-    launchPerfProcess("branch events",
+    std::string Name = "branch events";
+    std::string PerfScriptArgs = "script -F pid,brstack";
+    if (!opts::ITraceAggregation.empty()) {
+      Name += " with itrace";
+      PerfScriptArgs += " --itrace=" + opts::ITraceAggregation;
+    }
+    launchPerfProcess(Name,
                       MainEventsPPI,
-                      "script -F pid,ip,brstack",
+                      PerfScriptArgs.c_str(),
                       /*Wait = */false);
   }
 
@@ -381,67 +372,6 @@ void DataAggregator::parsePreAggregated() {
   }
 }
 
-std::error_code DataAggregator::writeAutoFDOData(StringRef OutputFilename) {
-  outs() << "PERF2BOLT: writing data for autofdo tools...\n";
-  NamedRegionTimer T("writeAutoFDO", "Processing branch events", TimerGroupName,
-                     TimerGroupDesc, opts::TimeAggregator);
-
-  std::error_code EC;
-  raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
-  if (EC)
-    return EC;
-
-  // Format:
-  // number of unique traces
-  // from_1-to_1:count_1
-  // from_2-to_2:count_2
-  // ......
-  // from_n-to_n:count_n
-  // number of unique sample addresses
-  // addr_1:count_1
-  // addr_2:count_2
-  // ......
-  // addr_n:count_n
-  // number of unique LBR entries
-  // src_1->dst_1:count_1
-  // src_2->dst_2:count_2
-  // ......
-  // src_n->dst_n:count_n
-
-  const uint64_t FirstAllocAddress = this->BC->FirstAllocAddress;
-
-  // AutoFDO addresses are relative to the first allocated loadable program
-  // segment
-  auto filterAddress = [&FirstAllocAddress](uint64_t Address) -> uint64_t {
-    if (Address < FirstAllocAddress)
-      return 0;
-    return Address - FirstAllocAddress;
-  };
-
-  OutFile << FallthroughLBRs.size() << "\n";
-  for (const auto &[Trace, Info] : FallthroughLBRs) {
-    OutFile << formatv("{0:x-}-{1:x-}:{2}\n", filterAddress(Trace.From),
-                       filterAddress(Trace.To),
-                       Info.InternCount + Info.ExternCount);
-  }
-
-  OutFile << BasicSamples.size() << "\n";
-  for (const auto [PC, HitCount] : BasicSamples)
-    OutFile << formatv("{0:x-}:{1}\n", filterAddress(PC), HitCount);
-
-  OutFile << BranchLBRs.size() << "\n";
-  for (const auto &[Trace, Info] : BranchLBRs) {
-    OutFile << formatv("{0:x-}->{1:x-}:{2}\n", filterAddress(Trace.From),
-                       filterAddress(Trace.To), Info.TakenCount);
-  }
-
-  outs() << "PERF2BOLT: wrote " << FallthroughLBRs.size() << " unique traces, "
-         << BasicSamples.size() << " sample addresses and " << BranchLBRs.size()
-         << " unique branches to " << OutputFilename << "\n";
-
-  return std::error_code();
-}
-
 void DataAggregator::filterBinaryMMapInfo() {
   if (opts::FilterPID) {
     auto MMapInfoIter = BinaryMMapInfo.find(opts::FilterPID);
@@ -583,15 +513,6 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {
       (opts::BasicAggregation && parseBasicEvents()))
     errs() << "PERF2BOLT: failed to parse samples\n";
 
-  // We can finish early if the goal is just to generate data for autofdo
-  if (opts::WriteAutoFDOData) {
-    if (std::error_code EC = writeAutoFDOData(opts::OutputFilename))
-      errs() << "Error writing autofdo data to file: " << EC.message() << "\n";
-
-    deleteTempFiles();
-    exit(0);
-  }
-
   // Special handling for memory events
   if (prepareToParse("mem events", MemEventsPPI, MemEventsErrorCallback))
     return Error::success();
@@ -1158,14 +1079,6 @@ ErrorOr<DataAggregator::PerfBranchSample> DataAggregator::parseBranchSample() {
     return make_error_code(errc::no_such_process);
   }
 
-  while (checkAndConsumeFS()) {
-  }
-
-  ErrorOr<uint64_t> PCRes = parseHexField(FieldSeparator, true);
-  if (std::error_code EC = PCRes.getError())
-    return EC;
-  Res.PC = PCRes.get();
-
   if (checkAndConsumeNewLine())
     return Res;
 
@@ -1472,9 +1385,9 @@ std::error_code DataAggregator::printLBRHeatMap() {
 uint64_t DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
                                         bool NeedsSkylakeFix) {
   uint64_t NumTraces{0};
-  // LBRs are stored in reverse execution order. NextPC refers to the next
-  // recorded executed PC.
-  uint64_t NextPC = opts::UseEventPC ? Sample.PC : 0;
+  // LBRs are stored in reverse execution order. NextLBR refers to the next
+  // executed branch record.
+  const LBREntry *NextLBR{nullptr};
   uint32_t NumEntry = 0;
   for (const LBREntry &LBR : Sample.LBR) {
     ++NumEntry;
@@ -1486,10 +1399,10 @@ uint64_t DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
     // chronological order)
     if (NeedsSkylakeFix && NumEntry <= 2)
       continue;
-    if (NextPC) {
+    if (NextLBR) {
       // Record fall-through trace.
       const uint64_t TraceFrom = LBR.To;
-      const uint64_t TraceTo = NextPC;
+      const uint64_t TraceTo = NextLBR->From;
       const BinaryFunction *TraceBF =
           getBinaryFunctionContainingAddress(TraceFrom);
       if (TraceBF && TraceBF->containsAddress(TraceTo)) {
@@ -1524,7 +1437,7 @@ uint64_t DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
       }
       ++NumTraces;
     }
-    NextPC = LBR.From;
+    NextLBR = &LBR;
 
     uint64_t From = getBinaryFunctionContainingAddress(LBR.From) ? LBR.From : 0;
     uint64_t To = getBinaryFunctionContainingAddress(LBR.To) ? LBR.To : 0;
@@ -1561,8 +1474,6 @@ std::error_code DataAggregator::parseBranchEvents() {
     ++NumSamples;
 
     PerfBranchSample &Sample = SampleRes.get();
-    if (opts::WriteAutoFDOData)
-      ++BasicSamples[Sample.PC];
 
     if (Sample.LBR.empty()) {
       ++NumSamplesNoLBR;



More information about the llvm-commits mailing list