[llvm-branch-commits] [BOLT] Support pre-aggregated basic sample profile (PR #140196)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu May 15 23:15:33 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-bolt
Author: Amir Ayupov (aaupov)
<details>
<summary>Changes</summary>
Define a pre-aggregated basic sample format:
```
S <location> <count>
```
Test Plan: update pre-aggregated-perf.test
---
Full diff: https://github.com/llvm/llvm-project/pull/140196.diff
5 Files Affected:
- (modified) bolt/include/bolt/Profile/DataAggregator.h (+20-14)
- (modified) bolt/lib/Profile/DataAggregator.cpp (+49-39)
- (added) bolt/test/X86/Inputs/pre-aggregated-basic.txt (+18)
- (modified) bolt/test/X86/pre-aggregated-perf.test (+8)
- (modified) bolt/test/link_fdata.py (+3-3)
``````````diff
diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h
index 6d918134137d5..7d60b4689fb04 100644
--- a/bolt/include/bolt/Profile/DataAggregator.h
+++ b/bolt/include/bolt/Profile/DataAggregator.h
@@ -370,26 +370,25 @@ class DataAggregator : public DataReader {
/// memory.
///
/// File format syntax:
- /// {B|F|f|T} [<start_id>:]<start_offset> [<end_id>:]<end_offset> [<ft_end>]
- /// <count> [<mispred_count>]
+ /// {S|B|F|f|T} <start> [<end>] [<ft_end>] <count> [<mispred_count>]
///
- /// B - indicates an aggregated branch
- /// F - an aggregated fall-through
+ /// where <start>, <end>, <ft_end> have the format [<id>:]<offset>
+ ///
+ /// S - indicates an aggregated basic sample at <start>
+ /// B - indicates an aggregated branch from <start> to <end>
+ /// F - an aggregated fall-through from <start> to <end>
/// f - an aggregated fall-through with external origin - used to disambiguate
/// between a return hitting a basic block head and a regular internal
/// jump to the block
- /// T - an aggregated trace: branch with a fall-through (from, to, ft_end)
- ///
- /// <start_id> - build id of the object containing the start address. We can
- /// skip it for the main binary and use "X" for an unknown object. This will
- /// save some space and facilitate human parsing.
- ///
- /// <start_offset> - hex offset from the object base load address (0 for the
- /// main executable unless it's PIE) to the start address.
+ /// T - an aggregated trace: branch from <start> to <end> with a fall-through
+ /// to <ft_end>
///
- /// <end_id>, <end_offset> - same for the end address.
+ /// <id> - build id of the object containing the address. We can skip it for
+ /// the main binary and use "X" for an unknown object. This will save some
+ /// space and facilitate human parsing.
///
- /// <ft_end> - same for the fallthrough_end address.
+ /// <offset> - hex offset from the object base load address (0 for the
+ /// main executable unless it's PIE) to the address.
///
/// <count> - total aggregated count of the branch or a fall-through.
///
@@ -397,10 +396,17 @@ class DataAggregator : public DataReader {
/// Omitted for fall-throughs.
///
/// Example:
+ /// Basic samples profile:
+ /// S 41be50 3
+ ///
+ /// Soft-deprecated branch profile with separate branches and fall-throughs:
/// F 41be50 41be50 3
/// F 41be90 41be90 4
/// B 4b1942 39b57f0 3 0
/// B 4b196f 4b19e0 2 0
+ ///
+ /// Recommended branch profile with pre-aggregated traces:
+ /// T 4b196f 4b19e0 4b19ef 2
void parsePreAggregated();
/// Parse the full output of pre-aggregated LBR samples generated by
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index aa681e633c0d8..6e889908003da 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -1216,54 +1216,54 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
ErrorOr<StringRef> TypeOrErr = parseString(FieldSeparator);
if (std::error_code EC = TypeOrErr.getError())
return EC;
- enum AggregatedLBREntry { TRACE, BRANCH, FT, FT_EXTERNAL_ORIGIN, INVALID };
+ enum AggregatedLBREntry {
+ TRACE,
+ SAMPLE,
+ BRANCH,
+ FT,
+ FT_EXTERNAL_ORIGIN,
+ INVALID
+ };
auto Type = StringSwitch<AggregatedLBREntry>(TypeOrErr.get())
.Case("T", TRACE)
+ .Case("S", SAMPLE)
.Case("B", BRANCH)
.Case("F", FT)
.Case("f", FT_EXTERNAL_ORIGIN)
.Default(INVALID);
if (Type == INVALID) {
- reportError("expected T, B, F or f");
+ reportError("expected T, S, B, F or f");
return make_error_code(llvm::errc::io_error);
}
- while (checkAndConsumeFS()) {
- }
- ErrorOr<Location> From = parseLocationOrOffset();
- if (std::error_code EC = From.getError())
- return EC;
+ std::optional<Location> Addrs[3];
+ int AddrNum = 2;
+ if (Type == TRACE)
+ AddrNum = 3;
+ else if (Type == SAMPLE)
+ AddrNum = 1;
- while (checkAndConsumeFS()) {
- }
- ErrorOr<Location> To = parseLocationOrOffset();
- if (std::error_code EC = To.getError())
- return EC;
+ int64_t Counters[2];
+ int CounterNum = 1;
+ if (Type == BRANCH)
+ CounterNum = 2;
- ErrorOr<Location> TraceFtEnd = std::error_code();
- if (Type == AggregatedLBREntry::TRACE) {
+ for (int I = 0; I < AddrNum; ++I) {
while (checkAndConsumeFS()) {
}
- TraceFtEnd = parseLocationOrOffset();
- if (std::error_code EC = TraceFtEnd.getError())
- return EC;
- }
-
- while (checkAndConsumeFS()) {
+ if (ErrorOr<Location> Addr = parseLocationOrOffset())
+ Addrs[I] = Addr.get();
+ else
+ return Addr.getError();
}
- ErrorOr<int64_t> Frequency =
- parseNumberField(FieldSeparator, Type != AggregatedLBREntry::BRANCH);
- if (std::error_code EC = Frequency.getError())
- return EC;
- uint64_t Mispreds = 0;
- if (Type == AggregatedLBREntry::BRANCH) {
+ for (int I = 0; I < CounterNum; ++I) {
while (checkAndConsumeFS()) {
}
- ErrorOr<int64_t> MispredsOrErr = parseNumberField(FieldSeparator, true);
- if (std::error_code EC = MispredsOrErr.getError())
- return EC;
- Mispreds = static_cast<uint64_t>(MispredsOrErr.get());
+ if (ErrorOr<int64_t> Count = parseNumberField(FieldSeparator, I + 1 == CounterNum))
+ Counters[I] = Count.get();
+ else
+ return Count.getError();
}
if (!checkAndConsumeNewLine()) {
@@ -1271,16 +1271,25 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
return make_error_code(llvm::errc::io_error);
}
- BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(From->Offset);
- BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(To->Offset);
+ const uint64_t FromOffset = Addrs[0]->Offset;
+ BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(FromOffset);
+ if (FromFunc)
+ FromFunc->setHasProfileAvailable();
- for (BinaryFunction *BF : {FromFunc, ToFunc})
- if (BF)
- BF->setHasProfileAvailable();
+ if (Type == SAMPLE) {
+ BasicSamples[FromOffset] += Counters[0];
+ return std::error_code();
+ }
+
+ const uint64_t ToOffset = Addrs[1]->Offset;
+ BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(ToOffset);
+ if (ToFunc)
+ ToFunc->setHasProfileAvailable();
- uint64_t Count = static_cast<uint64_t>(Frequency.get());
+ int64_t Count = Counters[0];
+ int64_t Mispreds = Counters[1];
- Trace Trace(From->Offset, To->Offset);
+ Trace Trace(FromOffset, ToOffset);
// Taken trace
if (Type == TRACE || Type == BRANCH) {
TakenBranchInfo &Info = BranchLBRs[Trace];
@@ -1291,8 +1300,9 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
}
// Construct fallthrough part of the trace
if (Type == TRACE) {
- Trace.From = To->Offset;
- Trace.To = TraceFtEnd->Offset;
+ const uint64_t TraceFtEndOffset = Addrs[2]->Offset;
+ Trace.From = ToOffset;
+ Trace.To = TraceFtEndOffset;
Type = FromFunc == ToFunc ? FT : FT_EXTERNAL_ORIGIN;
}
// Add fallthrough trace
diff --git a/bolt/test/X86/Inputs/pre-aggregated-basic.txt b/bolt/test/X86/Inputs/pre-aggregated-basic.txt
new file mode 100644
index 0000000000000..28bcacca70ee1
--- /dev/null
+++ b/bolt/test/X86/Inputs/pre-aggregated-basic.txt
@@ -0,0 +1,18 @@
+S 4005f0 1
+S 4005f0 1
+S 400610 1
+S 400ad1 2
+S 400b10 1
+S 400bb7 1
+S 400bbc 2
+S 400d90 1
+S 400dae 1
+S 400e00 2
+S 401170 22
+S 401180 58
+S 4011a0 33
+S 4011a9 33
+S 4011ad 58
+S 4011b2 22
+S X:7f36d18d60c0 2
+S X:7f36d18f2ce0 1
diff --git a/bolt/test/X86/pre-aggregated-perf.test b/bolt/test/X86/pre-aggregated-perf.test
index c05a06bf74945..63488a9062bdd 100644
--- a/bolt/test/X86/pre-aggregated-perf.test
+++ b/bolt/test/X86/pre-aggregated-perf.test
@@ -57,6 +57,14 @@ RUN: llvm-bolt %t.exe -o %t.bolt.yaml --pa -p %p/Inputs/pre-aggregated.txt \
RUN: --aggregate-only --profile-format=yaml --profile-use-dfs
RUN: cat %t.bolt.yaml | FileCheck %s -check-prefix=NEWFORMAT
+## Test pre-aggregated basic profile
+RUN: perf2bolt %t.exe -o %t --pa -p %p/Inputs/pre-aggregated-basic.txt -o %t.ba \
+RUN: 2>&1 | FileCheck %s --check-prefix=BASIC-ERROR
+RUN: perf2bolt %t.exe -o %t --pa -p %p/Inputs/pre-aggregated-basic.txt -o %t.ba \
+RUN: -nl 2>&1 | FileCheck %s --check-prefix=BASIC-SUCCESS
+BASIC-ERROR: BOLT-INFO: 0 out of 7 functions in the binary (0.0%) have non-empty execution profile
+BASIC-SUCCESS: BOLT-INFO: 4 out of 7 functions in the binary (57.1%) have non-empty execution profile
+
PERF2BOLT: 0 [unknown] 7f36d18d60c0 1 main 53c 0 2
PERF2BOLT: 1 main 451 1 SolveCubic 0 0 2
PERF2BOLT: 1 main 490 0 [unknown] 4005f0 0 1
diff --git a/bolt/test/link_fdata.py b/bolt/test/link_fdata.py
index bcf9a777922d5..6a391c10b9481 100755
--- a/bolt/test/link_fdata.py
+++ b/bolt/test/link_fdata.py
@@ -36,9 +36,9 @@
fdata_pat = re.compile(r"([01].*) (?P<exec>\d+) (?P<mispred>\d+)")
# Pre-aggregated profile:
-# {T|B|F|f} [<start_id>:]<start_offset> [<end_id>:]<end_offset> [<ft_end>]
-# <count> [<mispred_count>]
-preagg_pat = re.compile(r"(?P<type>[TBFf]) (?P<offsets_count>.*)")
+# {T|S|B|F|f} <start> [<end>] [<ft_end>] <count> [<mispred_count>]
+# <loc>: [<id>:]<offset>
+preagg_pat = re.compile(r"(?P<type>[TSBFf]) (?P<offsets_count>.*)")
# No-LBR profile:
# <is symbol?> <closest elf symbol or DSO name> <relative address> <count>
``````````
</details>
https://github.com/llvm/llvm-project/pull/140196
More information about the llvm-branch-commits
mailing list