[llvm] [BOLT] Drop parsing sample PC when processing perf data with LBR (PR #123420)
Amir Ayupov via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 17 15:10:25 PST 2025
https://github.com/aaupov created https://github.com/llvm/llvm-project/pull/123420
Remove options to generate autofdo data (unused) and `use-event-pc`
(not beneficial).
For 80MB perf.data, makes perf script parsing 2.8±0.8x faster.
>From 9af0e9a9d4d540aa885f311a8d77cdbb43cfabd4 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Fri, 17 Jan 2025 15:10:14 -0800
Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?=
=?UTF-8?q?l=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.4
---
bolt/include/bolt/Profile/DataAggregator.h | 4 -
bolt/lib/Profile/DataAggregator.cpp | 117 +++------------------
2 files changed, 14 insertions(+), 107 deletions(-)
diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h
index 320623cfa15af1..aa83d7f9b13ab5 100644
--- a/bolt/include/bolt/Profile/DataAggregator.h
+++ b/bolt/include/bolt/Profile/DataAggregator.h
@@ -80,7 +80,6 @@ class DataAggregator : public DataReader {
private:
struct PerfBranchSample {
SmallVector<LBREntry, 32> LBR;
- uint64_t PC;
};
struct PerfBasicSample {
@@ -334,9 +333,6 @@ class DataAggregator : public DataReader {
/// Process all branch events.
void processBranchEvents();
- /// This member function supports generating data for AutoFDO LLVM tools.
- std::error_code writeAutoFDOData(StringRef OutputFilename);
-
/// Parse the full output generated by perf script to report non-LBR samples.
std::error_code parseBasicEvents();
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 2b02086e3e0c99..a5252e18a0fdd6 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -108,15 +108,6 @@ TimeAggregator("time-aggr",
cl::ZeroOrMore,
cl::cat(AggregatorCategory));
-static cl::opt<bool>
- UseEventPC("use-event-pc",
- cl::desc("use event PC in combination with LBR sampling"),
- cl::cat(AggregatorCategory));
-
-static cl::opt<bool> WriteAutoFDOData(
- "autofdo", cl::desc("generate autofdo textual data instead of bolt data"),
- cl::cat(AggregatorCategory));
-
} // namespace opts
namespace {
@@ -185,16 +176,16 @@ void DataAggregator::start() {
MainEventsPPI,
"script -F pid,event,ip",
/*Wait = */false);
- } else if (!opts::ITraceAggregation.empty()) {
- std::string ItracePerfScriptArgs = llvm::formatv(
- "script -F pid,ip,brstack --itrace={0}", opts::ITraceAggregation);
- launchPerfProcess("branch events with itrace", MainEventsPPI,
- ItracePerfScriptArgs.c_str(),
- /*Wait = */ false);
} else {
- launchPerfProcess("branch events",
+ std::string Name = "branch events";
+ std::string PerfScriptArgs = "script -F pid,brstack";
+ if (!opts::ITraceAggregation.empty()) {
+ Name += " with itrace";
+ PerfScriptArgs += " --itrace=" + opts::ITraceAggregation;
+ }
+ launchPerfProcess(Name,
MainEventsPPI,
- "script -F pid,ip,brstack",
+ PerfScriptArgs.c_str(),
/*Wait = */false);
}
@@ -381,67 +372,6 @@ void DataAggregator::parsePreAggregated() {
}
}
-std::error_code DataAggregator::writeAutoFDOData(StringRef OutputFilename) {
- outs() << "PERF2BOLT: writing data for autofdo tools...\n";
- NamedRegionTimer T("writeAutoFDO", "Processing branch events", TimerGroupName,
- TimerGroupDesc, opts::TimeAggregator);
-
- std::error_code EC;
- raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
- if (EC)
- return EC;
-
- // Format:
- // number of unique traces
- // from_1-to_1:count_1
- // from_2-to_2:count_2
- // ......
- // from_n-to_n:count_n
- // number of unique sample addresses
- // addr_1:count_1
- // addr_2:count_2
- // ......
- // addr_n:count_n
- // number of unique LBR entries
- // src_1->dst_1:count_1
- // src_2->dst_2:count_2
- // ......
- // src_n->dst_n:count_n
-
- const uint64_t FirstAllocAddress = this->BC->FirstAllocAddress;
-
- // AutoFDO addresses are relative to the first allocated loadable program
- // segment
- auto filterAddress = [&FirstAllocAddress](uint64_t Address) -> uint64_t {
- if (Address < FirstAllocAddress)
- return 0;
- return Address - FirstAllocAddress;
- };
-
- OutFile << FallthroughLBRs.size() << "\n";
- for (const auto &[Trace, Info] : FallthroughLBRs) {
- OutFile << formatv("{0:x-}-{1:x-}:{2}\n", filterAddress(Trace.From),
- filterAddress(Trace.To),
- Info.InternCount + Info.ExternCount);
- }
-
- OutFile << BasicSamples.size() << "\n";
- for (const auto [PC, HitCount] : BasicSamples)
- OutFile << formatv("{0:x-}:{1}\n", filterAddress(PC), HitCount);
-
- OutFile << BranchLBRs.size() << "\n";
- for (const auto &[Trace, Info] : BranchLBRs) {
- OutFile << formatv("{0:x-}->{1:x-}:{2}\n", filterAddress(Trace.From),
- filterAddress(Trace.To), Info.TakenCount);
- }
-
- outs() << "PERF2BOLT: wrote " << FallthroughLBRs.size() << " unique traces, "
- << BasicSamples.size() << " sample addresses and " << BranchLBRs.size()
- << " unique branches to " << OutputFilename << "\n";
-
- return std::error_code();
-}
-
void DataAggregator::filterBinaryMMapInfo() {
if (opts::FilterPID) {
auto MMapInfoIter = BinaryMMapInfo.find(opts::FilterPID);
@@ -583,15 +513,6 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {
(opts::BasicAggregation && parseBasicEvents()))
errs() << "PERF2BOLT: failed to parse samples\n";
- // We can finish early if the goal is just to generate data for autofdo
- if (opts::WriteAutoFDOData) {
- if (std::error_code EC = writeAutoFDOData(opts::OutputFilename))
- errs() << "Error writing autofdo data to file: " << EC.message() << "\n";
-
- deleteTempFiles();
- exit(0);
- }
-
// Special handling for memory events
if (prepareToParse("mem events", MemEventsPPI, MemEventsErrorCallback))
return Error::success();
@@ -1158,14 +1079,6 @@ ErrorOr<DataAggregator::PerfBranchSample> DataAggregator::parseBranchSample() {
return make_error_code(errc::no_such_process);
}
- while (checkAndConsumeFS()) {
- }
-
- ErrorOr<uint64_t> PCRes = parseHexField(FieldSeparator, true);
- if (std::error_code EC = PCRes.getError())
- return EC;
- Res.PC = PCRes.get();
-
if (checkAndConsumeNewLine())
return Res;
@@ -1472,9 +1385,9 @@ std::error_code DataAggregator::printLBRHeatMap() {
uint64_t DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
bool NeedsSkylakeFix) {
uint64_t NumTraces{0};
- // LBRs are stored in reverse execution order. NextPC refers to the next
- // recorded executed PC.
- uint64_t NextPC = opts::UseEventPC ? Sample.PC : 0;
+ // LBRs are stored in reverse execution order. NextLBR refers to the next
+ // executed branch record.
+ const LBREntry *NextLBR{nullptr};
uint32_t NumEntry = 0;
for (const LBREntry &LBR : Sample.LBR) {
++NumEntry;
@@ -1486,10 +1399,10 @@ uint64_t DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
// chronological order)
if (NeedsSkylakeFix && NumEntry <= 2)
continue;
- if (NextPC) {
+ if (NextLBR) {
// Record fall-through trace.
const uint64_t TraceFrom = LBR.To;
- const uint64_t TraceTo = NextPC;
+ const uint64_t TraceTo = NextLBR->From;
const BinaryFunction *TraceBF =
getBinaryFunctionContainingAddress(TraceFrom);
if (TraceBF && TraceBF->containsAddress(TraceTo)) {
@@ -1524,7 +1437,7 @@ uint64_t DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
}
++NumTraces;
}
- NextPC = LBR.From;
+ NextLBR = &LBR;
uint64_t From = getBinaryFunctionContainingAddress(LBR.From) ? LBR.From : 0;
uint64_t To = getBinaryFunctionContainingAddress(LBR.To) ? LBR.To : 0;
@@ -1561,8 +1474,6 @@ std::error_code DataAggregator::parseBranchEvents() {
++NumSamples;
PerfBranchSample &Sample = SampleRes.get();
- if (opts::WriteAutoFDOData)
- ++BasicSamples[Sample.PC];
if (Sample.LBR.empty()) {
++NumSamplesNoLBR;
More information about the llvm-commits
mailing list