[llvm-branch-commits] [BOLT] Add perf2bolt pre-aggregated profile output (PR #199465)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Sun May 24 17:05:45 PDT 2026
llvmorg-github-actions[bot] wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-bolt
Author: Amir Ayupov (aaupov)
<details>
<summary>Changes</summary>
Add a pre-aggregated profile output format (`--profile-format=preagg`)
so perf.data can be pre-parsed/aggregated and used as input with -pa.
Supports branch and basic profiles, emits traces (T/R) and S records.
Currently only covers main binary, can be extended to cover multi-DSO.
Test Plan: Updated perf_test.test, added perf_brstack.test
---
Full diff: https://github.com/llvm/llvm-project/pull/199465.diff
8 Files Affected:
- (modified) bolt/docs/profiles.md (+7)
- (modified) bolt/include/bolt/Profile/DataAggregator.h (+5-2)
- (modified) bolt/include/bolt/Utils/CommandLineOpts.h (+1-1)
- (modified) bolt/lib/Profile/DataAggregator.cpp (+30-2)
- (modified) bolt/lib/Utils/CommandLineOpts.cpp (+3-1)
- (modified) bolt/test/perf2bolt/lit.local.cfg (+8-1)
- (added) bolt/test/perf2bolt/perf_brstack.test (+12)
- (modified) bolt/test/perf2bolt/perf_test.test (+4)
``````````diff
diff --git a/bolt/docs/profiles.md b/bolt/docs/profiles.md
index f208620be85ce..4900c2fb008d0 100644
--- a/bolt/docs/profiles.md
+++ b/bolt/docs/profiles.md
@@ -16,6 +16,8 @@ $ perf2bolt executable \
-o perf.fdata [-w perf.yaml]
# the output format for `-o` can be switched with `--profile-format`:
-o perf.yaml --profile-format=yaml
+# perf.data can also be cached as pre-aggregated trace data:
+ -o perf.preagg --profile-format=preagg
```
# Unsymbolized profiles
@@ -153,6 +155,11 @@ Pre-aggregated profiles can be generated by external tools. See
[ebpf-bolt](https://github.com/aaupov/ebpf-bolt) for a reference
implementation using eBPF-based collection.
+`perf2bolt` can generate a pre-aggregated profile from`perf.data`:
+```
+perf2bolt ./binary -p perf.data -o perf.preagg --profile-format=preagg
+```
+
# Symbolized profiles
The profiles accepted by llvm-bolt. fdata is the legacy format, YAML is the rich (metadata-enabled) format.
diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h
index c5133e90d07a6..dca24b9c57983 100644
--- a/bolt/include/bolt/Profile/DataAggregator.h
+++ b/bolt/include/bolt/Profile/DataAggregator.h
@@ -555,8 +555,11 @@ class DataAggregator : public DataReader {
/// Force all subprocesses to stop and cancel aggregation
void abort();
- /// Dump data structures into a file readable by llvm-bolt
- std::error_code writeAggregatedFile(StringRef OutputFilename) const;
+ /// Dump data structures into an fdata file readable by llvm-bolt.
+ std::error_code writeFdataFile(StringRef OutputFilename) const;
+
+ /// Dump TraceMap into a pre-aggregated file readable by perf2bolt -pa.
+ std::error_code writePreAggregatedFile(StringRef OutputFilename) const;
/// Dump translated data structures into YAML
std::error_code writeBATYAML(BinaryContext &BC,
diff --git a/bolt/include/bolt/Utils/CommandLineOpts.h b/bolt/include/bolt/Utils/CommandLineOpts.h
index dc193477023d7..5a6440034350f 100644
--- a/bolt/include/bolt/Utils/CommandLineOpts.h
+++ b/bolt/include/bolt/Utils/CommandLineOpts.h
@@ -101,7 +101,7 @@ extern llvm::cl::opt<bool> UpdateBranchProtection;
extern llvm::cl::opt<SplitFunctionsStrategy> SplitStrategy;
// The format to use with -o in aggregation mode (perf2bolt)
-enum ProfileFormatKind { PF_Fdata, PF_YAML };
+enum ProfileFormatKind { PF_Fdata, PF_YAML, PF_PreAgg };
extern llvm::cl::opt<ProfileFormatKind> ProfileFormat;
extern llvm::cl::opt<bool> ShowDensity;
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 5b46e8a30729b..b5017ccdcdb7f 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -790,6 +790,13 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {
exit(0);
}
+ if (opts::AggregateOnly &&
+ opts::ProfileFormat == opts::ProfileFormatKind::PF_PreAgg) {
+ if (std::error_code EC = writePreAggregatedFile(opts::OutputFilename))
+ report_error("cannot create output data file", EC);
+ exit(0);
+ }
+
return Error::success();
}
@@ -801,7 +808,7 @@ Error DataAggregator::readProfile(BinaryContext &BC) {
if (opts::AggregateOnly) {
if (opts::ProfileFormat == opts::ProfileFormatKind::PF_Fdata)
- if (std::error_code EC = writeAggregatedFile(opts::OutputFilename))
+ if (std::error_code EC = writeFdataFile(opts::OutputFilename))
report_error("cannot create output data file", EC);
// BAT YAML is handled by DataAggregator since normal YAML output requires
@@ -2325,7 +2332,28 @@ DataAggregator::getFileNameForBuildID(StringRef FileBuildID) {
}
std::error_code
-DataAggregator::writeAggregatedFile(StringRef OutputFilename) const {
+DataAggregator::writePreAggregatedFile(StringRef OutputFilename) const {
+ std::error_code EC;
+ raw_fd_ostream OS(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
+ if (EC)
+ return EC;
+
+ for (const auto &[Trace, Info] : Traces) {
+ const bool IsReturn = Returns.find(Trace.Branch) != Returns.end();
+ OS << formatv("{0} {1:x-} {2:x-} {3:x-} {4}\n", IsReturn ? 'R' : 'T',
+ Trace.Branch, Trace.From, Trace.To, Info.TakenCount);
+ }
+ OS << formatv("E {0:$[,]}\n", EventNames.keys());
+ for (const auto &[PC, Count] : BasicSamples)
+ OS << formatv("S {0:x-} {1}\n", PC, Count);
+
+ outs() << "PERF2BOLT: wrote " << Traces.size() + BasicSamples.size()
+ << " pre-aggregated objects to " << OutputFilename << "\n";
+
+ return std::error_code();
+}
+
+std::error_code DataAggregator::writeFdataFile(StringRef OutputFilename) const {
std::error_code EC;
raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
if (EC)
diff --git a/bolt/lib/Utils/CommandLineOpts.cpp b/bolt/lib/Utils/CommandLineOpts.cpp
index cbd0be4a806ae..36a55d7a9d283 100644
--- a/bolt/lib/Utils/CommandLineOpts.cpp
+++ b/bolt/lib/Utils/CommandLineOpts.cpp
@@ -277,7 +277,9 @@ cl::opt<ProfileFormatKind> ProfileFormat(
"format to dump profile output in aggregation mode, default is fdata"),
cl::init(PF_Fdata),
cl::values(clEnumValN(PF_Fdata, "fdata", "offset-based plaintext format"),
- clEnumValN(PF_YAML, "yaml", "dense YAML representation")),
+ clEnumValN(PF_YAML, "yaml", "dense YAML representation"),
+ clEnumValN(PF_PreAgg, "preagg",
+ "pre-aggregated profile format")),
cl::ZeroOrMore, cl::Hidden, cl::cat(BoltCategory));
cl::opt<std::string> SaveProfile("w",
diff --git a/bolt/test/perf2bolt/lit.local.cfg b/bolt/test/perf2bolt/lit.local.cfg
index 0fecf913aa98b..acad9786bd8d3 100644
--- a/bolt/test/perf2bolt/lit.local.cfg
+++ b/bolt/test/perf2bolt/lit.local.cfg
@@ -1,5 +1,12 @@
import shutil
import subprocess
-if shutil.which("perf") is not None and subprocess.run(["perf", "record", "-e", "cycles:u", "-o", "/dev/null", "--", "perf", "--version"], capture_output=True).returncode == 0:
+cmd = "perf record -e cycles:u -o /dev/null {} -- perf --version"
+cmd_basic = cmd.format("").split()
+cmd_brstack = cmd.format("-j any,u").split()
+
+if shutil.which("perf") is not None:
+ if subprocess.run(cmd_basic, capture_output=True).returncode == 0:
config.available_features.add("perf")
+ if subprocess.run(cmd_brstack, capture_output=True).returncode == 0:
+ config.available_features.add("perf-brstack")
diff --git a/bolt/test/perf2bolt/perf_brstack.test b/bolt/test/perf2bolt/perf_brstack.test
new file mode 100644
index 0000000000000..e93f440650006
--- /dev/null
+++ b/bolt/test/perf2bolt/perf_brstack.test
@@ -0,0 +1,12 @@
+# Check perf2bolt pre-aggregated profile emission from branch-stack perf data.
+
+REQUIRES: system-linux, perf-brstack
+
+RUN: %clang %S/Inputs/perf_test.c -no-pie -fuse-ld=lld -o %t
+RUN: perf record -Fmax -j any,u -e cycles:u -o %t.perf.data -- %t
+RUN: perf2bolt %t -p=%t.perf.data -o %t.fdata -ignore-build-id
+RUN: perf2bolt %t -p=%t.perf.data -o %t.preagg -ignore-build-id --profile-format=preagg
+RUN: perf2bolt %t -pa -p=%t.preagg -o %t.roundtrip.fdata -ignore-build-id
+RUN: sort %t.fdata > %t.fdata.sorted
+RUN: sort %t.roundtrip.fdata > %t.roundtrip.fdata.sorted
+RUN: diff %t.fdata.sorted %t.roundtrip.fdata.sorted
diff --git a/bolt/test/perf2bolt/perf_test.test b/bolt/test/perf2bolt/perf_test.test
index e34ac76632113..03b13398c1055 100644
--- a/bolt/test/perf2bolt/perf_test.test
+++ b/bolt/test/perf2bolt/perf_test.test
@@ -17,6 +17,10 @@ RUN: cmp %t3.multi %t3.comma
RUN: merge-fdata %t3 %t3 | sort > %t3.x2
RUN: sort %t3.multi > %t3.multi.x2
RUN: cmp %t3.x2 %t3.multi.x2
+# Pre-aggregated output: compare perf->preagg->fdata vs perf->fdata
+RUN: perf2bolt %t -p=%t2 -o %t2.pa -ba -ignore-build-id --profile-format=preagg
+RUN: perf2bolt %t -p=%t2.pa -o %t2.pa.fdata -ba -pa
+RUN: cmp %t2.pa.fdata %t3
CHECK-NOT: PERF2BOLT-ERROR
CHECK-NOT: !! WARNING !! This high mismatch ratio indicates the input binary is probably not the same binary used during profiling collection.
``````````
</details>
https://github.com/llvm/llvm-project/pull/199465
More information about the llvm-branch-commits
mailing list