[llvm] [BOLT][NFCI] Simplify DataAggregator using traces (PR #143289)
Amir Ayupov via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 9 17:12:45 PDT 2025
https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/143289
>From c846650dab434c3eb570bbc4cb5773833f5325d2 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Sat, 7 Jun 2025 15:58:09 -0700
Subject: [PATCH 1/5] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20ch?=
=?UTF-8?q?anges=20to=20main=20this=20commit=20is=20based=20on?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.4
[skip ci]
---
bolt/include/bolt/Core/BinaryFunction.h | 12 ++++
bolt/include/bolt/Profile/DataAggregator.h | 14 +++-
bolt/include/bolt/Profile/DataReader.h | 15 +----
.../include/bolt/Profile/ProfileYAMLMapping.h | 2 +
bolt/lib/Core/BinaryFunction.cpp | 2 +
bolt/lib/Passes/ProfileQualityStats.cpp | 3 +
bolt/lib/Profile/BoltAddressTranslation.cpp | 4 +-
bolt/lib/Profile/DataAggregator.cpp | 66 +++----------------
bolt/lib/Profile/DataReader.cpp | 6 ++
bolt/lib/Profile/YAMLProfileReader.cpp | 1 +
bolt/lib/Profile/YAMLProfileWriter.cpp | 1 +
bolt/test/X86/shrinkwrapping.test | 2 +
12 files changed, 56 insertions(+), 72 deletions(-)
diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index 14957cba50174..ca8b786f4ab69 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -388,6 +388,10 @@ class BinaryFunction {
/// The profile data for the number of times the function was executed.
uint64_t ExecutionCount{COUNT_NO_PROFILE};
+ /// Profile data for the number of times this function was entered from
+ /// external code (DSO, JIT, etc).
+ uint64_t ExternEntryCount{0};
+
/// Profile match ratio.
float ProfileMatchRatio{0.0f};
@@ -1877,6 +1881,10 @@ class BinaryFunction {
return *this;
}
+ /// Set the profile data for the number of times the function was entered from
+ /// external code (DSO/JIT).
+ void setExternEntryCount(uint64_t Count) { ExternEntryCount = Count; }
+
/// Adjust execution count for the function by a given \p Count. The value
/// \p Count will be subtracted from the current function count.
///
@@ -1904,6 +1912,10 @@ class BinaryFunction {
/// Return COUNT_NO_PROFILE if there's no profile info.
uint64_t getExecutionCount() const { return ExecutionCount; }
+ /// Return the profile information about the number of times the function was
+ /// entered from external code (DSO/JIT).
+ uint64_t getExternEntryCount() const { return ExternEntryCount; }
+
/// Return the raw profile information about the number of branch
/// executions corresponding to this function.
uint64_t getRawSampleCount() const { return RawSampleCount; }
diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h
index cb8e81b829a09..3f07a6dc03a4f 100644
--- a/bolt/include/bolt/Profile/DataAggregator.h
+++ b/bolt/include/bolt/Profile/DataAggregator.h
@@ -78,6 +78,13 @@ class DataAggregator : public DataReader {
static bool checkPerfDataMagic(StringRef FileName);
private:
+ struct LBREntry {
+ uint64_t From;
+ uint64_t To;
+ bool Mispred;
+ };
+ friend raw_ostream &operator<<(raw_ostream &OS, const LBREntry &);
+
struct PerfBranchSample {
SmallVector<LBREntry, 32> LBR;
};
@@ -476,7 +483,6 @@ class DataAggregator : public DataReader {
/// Debugging dump methods
void dump() const;
- void dump(const LBREntry &LBR) const;
void dump(const PerfBranchSample &Sample) const;
void dump(const PerfMemSample &Sample) const;
@@ -504,6 +510,12 @@ class DataAggregator : public DataReader {
friend class YAMLProfileWriter;
};
+
+inline raw_ostream &operator<<(raw_ostream &OS,
+ const DataAggregator::LBREntry &L) {
+ OS << formatv("{0:x} -> {1:x}/{2}", L.From, L.To, L.Mispred ? 'M' : 'P');
+ return OS;
+}
} // namespace bolt
} // namespace llvm
diff --git a/bolt/include/bolt/Profile/DataReader.h b/bolt/include/bolt/Profile/DataReader.h
index 5df1b5a8f4a00..6f527ba3931d4 100644
--- a/bolt/include/bolt/Profile/DataReader.h
+++ b/bolt/include/bolt/Profile/DataReader.h
@@ -32,18 +32,6 @@ namespace bolt {
class BinaryFunction;
-struct LBREntry {
- uint64_t From;
- uint64_t To;
- bool Mispred;
-};
-
-inline raw_ostream &operator<<(raw_ostream &OS, const LBREntry &LBR) {
- OS << "0x" << Twine::utohexstr(LBR.From) << " -> 0x"
- << Twine::utohexstr(LBR.To);
- return OS;
-}
-
struct Location {
bool IsSymbol;
StringRef Name;
@@ -109,6 +97,9 @@ struct FuncBranchData {
/// Total execution count for the function.
int64_t ExecutionCount{0};
+ /// Total entry count from external code for the function.
+ uint64_t ExternEntryCount{0};
+
/// Indicate if the data was used.
bool Used{false};
diff --git a/bolt/include/bolt/Profile/ProfileYAMLMapping.h b/bolt/include/bolt/Profile/ProfileYAMLMapping.h
index a8d9a15311d94..41e2bd1651efd 100644
--- a/bolt/include/bolt/Profile/ProfileYAMLMapping.h
+++ b/bolt/include/bolt/Profile/ProfileYAMLMapping.h
@@ -206,6 +206,7 @@ struct BinaryFunctionProfile {
uint32_t Id{0};
llvm::yaml::Hex64 Hash{0};
uint64_t ExecCount{0};
+ uint64_t ExternEntryCount{0};
std::vector<BinaryBasicBlockProfile> Blocks;
std::vector<InlineTreeNode> InlineTree;
bool Used{false};
@@ -218,6 +219,7 @@ template <> struct MappingTraits<bolt::BinaryFunctionProfile> {
YamlIO.mapRequired("fid", BFP.Id);
YamlIO.mapRequired("hash", BFP.Hash);
YamlIO.mapRequired("exec", BFP.ExecCount);
+ YamlIO.mapOptional("extern", BFP.ExternEntryCount, 0);
YamlIO.mapRequired("nblocks", BFP.NumBasicBlocks);
YamlIO.mapOptional("blocks", BFP.Blocks,
std::vector<bolt::BinaryBasicBlockProfile>());
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index 6d1969f5c6c30..b998d7160aae7 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -471,6 +471,8 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation) {
OS << "\n Sample Count: " << RawSampleCount;
OS << "\n Profile Acc : " << format("%.1f%%", ProfileMatchRatio * 100.0f);
}
+ if (ExternEntryCount)
+ OS << "\n Extern Entry Count: " << ExternEntryCount;
if (opts::PrintDynoStats && !getLayout().block_empty()) {
OS << '\n';
diff --git a/bolt/lib/Passes/ProfileQualityStats.cpp b/bolt/lib/Passes/ProfileQualityStats.cpp
index dfd74d3dd5719..64cc662c3ab29 100644
--- a/bolt/lib/Passes/ProfileQualityStats.cpp
+++ b/bolt/lib/Passes/ProfileQualityStats.cpp
@@ -532,6 +532,9 @@ void computeFlowMappings(const BinaryContext &BC, FlowInfo &TotalFlowMap) {
std::vector<uint64_t> &MaxCountMap = TotalMaxCountMaps[FunctionNum];
std::vector<uint64_t> &MinCountMap = TotalMinCountMaps[FunctionNum];
+ // Record external entry count into CallGraphIncomingFlows
+ CallGraphIncomingFlows[FunctionNum] += Function->getExternEntryCount();
+
// Update MaxCountMap, MinCountMap, and CallGraphIncomingFlows
auto recordCall = [&](const BinaryBasicBlock *SourceBB,
const MCSymbol *DestSymbol, uint64_t Count,
diff --git a/bolt/lib/Profile/BoltAddressTranslation.cpp b/bolt/lib/Profile/BoltAddressTranslation.cpp
index a253522e4fb15..7ad4e6a2e1411 100644
--- a/bolt/lib/Profile/BoltAddressTranslation.cpp
+++ b/bolt/lib/Profile/BoltAddressTranslation.cpp
@@ -546,7 +546,7 @@ BoltAddressTranslation::getFallthroughsInTrace(uint64_t FuncAddress,
return Res;
for (auto Iter = FromIter; Iter != ToIter;) {
- const uint32_t Src = Iter->first;
+ const uint32_t Src = Iter->second >> 1;
if (Iter->second & BRANCHENTRY) {
++Iter;
continue;
@@ -557,7 +557,7 @@ BoltAddressTranslation::getFallthroughsInTrace(uint64_t FuncAddress,
++Iter;
if (Iter->second & BRANCHENTRY)
break;
- Res.emplace_back(Src, Iter->first);
+ Res.emplace_back(Src, Iter->second >> 1);
}
return Res;
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 2527b5bfe38d2..addff196f4f5b 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -827,13 +827,8 @@ bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second,
<< FromFunc->getPrintName() << ":"
<< Twine::utohexstr(First.To) << " to "
<< Twine::utohexstr(Second.From) << ".\n");
- for (auto [From, To] : *FTs) {
- if (BAT) {
- From = BAT->translate(FromFunc->getAddress(), From, /*IsBranchSrc=*/true);
- To = BAT->translate(FromFunc->getAddress(), To, /*IsBranchSrc=*/false);
- }
+ for (auto [From, To] : *FTs)
doIntraBranch(*ParentFunc, From, To, Count, false);
- }
return true;
}
@@ -972,7 +967,7 @@ bool DataAggregator::recordExit(BinaryFunction &BF, uint64_t From, bool Mispred,
return true;
}
-ErrorOr<LBREntry> DataAggregator::parseLBREntry() {
+ErrorOr<DataAggregator::LBREntry> DataAggregator::parseLBREntry() {
LBREntry Res;
ErrorOr<StringRef> FromStrRes = parseString('/');
if (std::error_code EC = FromStrRes.getError())
@@ -1430,54 +1425,16 @@ void DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
const uint64_t TraceTo = NextLBR->From;
const BinaryFunction *TraceBF =
getBinaryFunctionContainingAddress(TraceFrom);
- if (opts::HeatmapMode == opts::HeatmapModeKind::HM_Exclusive) {
- FTInfo &Info = FallthroughLBRs[Trace(TraceFrom, TraceTo)];
+ FTInfo &Info = FallthroughLBRs[Trace(TraceFrom, TraceTo)];
+ if (TraceBF && TraceBF->containsAddress(LBR.From))
++Info.InternCount;
- } else if (TraceBF && TraceBF->containsAddress(TraceTo)) {
- FTInfo &Info = FallthroughLBRs[Trace(TraceFrom, TraceTo)];
- if (TraceBF->containsAddress(LBR.From))
- ++Info.InternCount;
- else
- ++Info.ExternCount;
- } else {
- const BinaryFunction *ToFunc =
- getBinaryFunctionContainingAddress(TraceTo);
- if (TraceBF && ToFunc) {
- LLVM_DEBUG({
- dbgs() << "Invalid trace starting in " << TraceBF->getPrintName()
- << formatv(" @ {0:x}", TraceFrom - TraceBF->getAddress())
- << formatv(" and ending @ {0:x}\n", TraceTo);
- });
- ++NumInvalidTraces;
- } else {
- LLVM_DEBUG({
- dbgs() << "Out of range trace starting in "
- << (TraceBF ? TraceBF->getPrintName() : "None")
- << formatv(" @ {0:x}",
- TraceFrom - (TraceBF ? TraceBF->getAddress() : 0))
- << " and ending in "
- << (ToFunc ? ToFunc->getPrintName() : "None")
- << formatv(" @ {0:x}\n",
- TraceTo - (ToFunc ? ToFunc->getAddress() : 0));
- });
- ++NumLongRangeTraces;
- }
- }
+ else
+ ++Info.ExternCount;
++NumTraces;
}
NextLBR = &LBR;
- // Record branches outside binary functions for heatmap.
- if (opts::HeatmapMode == opts::HeatmapModeKind::HM_Exclusive) {
- TakenBranchInfo &Info = BranchLBRs[Trace(LBR.From, LBR.To)];
- ++Info.TakenCount;
- continue;
- }
- uint64_t From = getBinaryFunctionContainingAddress(LBR.From) ? LBR.From : 0;
- uint64_t To = getBinaryFunctionContainingAddress(LBR.To) ? LBR.To : 0;
- if (!From && !To)
- continue;
- TakenBranchInfo &Info = BranchLBRs[Trace(From, To)];
+ TakenBranchInfo &Info = BranchLBRs[Trace(LBR.From, LBR.To)];
++Info.TakenCount;
Info.MispredCount += LBR.Mispred;
}
@@ -2289,6 +2246,7 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
YamlBF.Id = BF->getFunctionNumber();
YamlBF.Hash = BAT->getBFHash(FuncAddress);
YamlBF.ExecCount = BF->getKnownExecutionCount();
+ YamlBF.ExternEntryCount = BF->getExternEntryCount();
YamlBF.NumBasicBlocks = BAT->getNumBasicBlocks(FuncAddress);
const BoltAddressTranslation::BBHashMapTy &BlockMap =
BAT->getBBHashMap(FuncAddress);
@@ -2398,16 +2356,10 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
void DataAggregator::dump() const { DataReader::dump(); }
-void DataAggregator::dump(const LBREntry &LBR) const {
- Diag << "From: " << Twine::utohexstr(LBR.From)
- << " To: " << Twine::utohexstr(LBR.To) << " Mispred? " << LBR.Mispred
- << "\n";
-}
-
void DataAggregator::dump(const PerfBranchSample &Sample) const {
Diag << "Sample LBR entries: " << Sample.LBR.size() << "\n";
for (const LBREntry &LBR : Sample.LBR)
- dump(LBR);
+ Diag << LBR << '\n';
}
void DataAggregator::dump(const PerfMemSample &Sample) const {
diff --git a/bolt/lib/Profile/DataReader.cpp b/bolt/lib/Profile/DataReader.cpp
index c512394f26a3b..afe24216d7f5d 100644
--- a/bolt/lib/Profile/DataReader.cpp
+++ b/bolt/lib/Profile/DataReader.cpp
@@ -85,6 +85,7 @@ void FuncBranchData::appendFrom(const FuncBranchData &FBD, uint64_t Offset) {
}
llvm::stable_sort(Data);
ExecutionCount += FBD.ExecutionCount;
+ ExternEntryCount += FBD.ExternEntryCount;
for (auto I = FBD.EntryData.begin(), E = FBD.EntryData.end(); I != E; ++I) {
assert(I->To.Name == FBD.Name);
auto NewElmt = EntryData.insert(EntryData.end(), *I);
@@ -269,6 +270,7 @@ Error DataReader::preprocessProfile(BinaryContext &BC) {
if (FuncBranchData *FuncData = getBranchDataForNames(Function.getNames())) {
setBranchData(Function, FuncData);
Function.ExecutionCount = FuncData->ExecutionCount;
+ Function.ExternEntryCount = FuncData->ExternEntryCount;
FuncData->Used = true;
}
}
@@ -419,6 +421,7 @@ void DataReader::matchProfileData(BinaryFunction &BF) {
if (fetchProfileForOtherEntryPoints(BF)) {
BF.ProfileMatchRatio = evaluateProfileData(BF, *FBD);
BF.ExecutionCount = FBD->ExecutionCount;
+ BF.ExternEntryCount = FBD->ExternEntryCount;
BF.RawSampleCount = FBD->getNumExecutedBranches();
}
return;
@@ -449,6 +452,7 @@ void DataReader::matchProfileData(BinaryFunction &BF) {
setBranchData(BF, NewBranchData);
NewBranchData->Used = true;
BF.ExecutionCount = NewBranchData->ExecutionCount;
+ BF.ExternEntryCount = NewBranchData->ExternEntryCount;
BF.ProfileMatchRatio = 1.0f;
break;
}
@@ -1190,6 +1194,8 @@ std::error_code DataReader::parse() {
if (BI.To.IsSymbol && BI.To.Offset == 0) {
I = GetOrCreateFuncEntry(BI.To.Name);
I->second.ExecutionCount += BI.Branches;
+ if (!BI.From.IsSymbol)
+ I->second.ExternEntryCount += BI.Branches;
}
}
diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index 33ce40ac2eeec..086e47b661e10 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -176,6 +176,7 @@ bool YAMLProfileReader::parseFunctionProfile(
uint64_t FunctionExecutionCount = 0;
BF.setExecutionCount(YamlBF.ExecCount);
+ BF.setExternEntryCount(YamlBF.ExternEntryCount);
uint64_t FuncRawBranchCount = 0;
for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks)
diff --git a/bolt/lib/Profile/YAMLProfileWriter.cpp b/bolt/lib/Profile/YAMLProfileWriter.cpp
index f1fe45f21a0f6..f4308d6fc1992 100644
--- a/bolt/lib/Profile/YAMLProfileWriter.cpp
+++ b/bolt/lib/Profile/YAMLProfileWriter.cpp
@@ -226,6 +226,7 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
YamlBF.Hash = BF.getHash();
YamlBF.NumBasicBlocks = BF.size();
YamlBF.ExecCount = BF.getKnownExecutionCount();
+ YamlBF.ExternEntryCount = BF.getExternEntryCount();
DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t> InlineTreeNodeId;
if (PseudoProbeDecoder && BF.getGUID()) {
std::tie(YamlBF.InlineTree, InlineTreeNodeId) =
diff --git a/bolt/test/X86/shrinkwrapping.test b/bolt/test/X86/shrinkwrapping.test
index 8581d7e0c0f7b..521b4561b3ba6 100644
--- a/bolt/test/X86/shrinkwrapping.test
+++ b/bolt/test/X86/shrinkwrapping.test
@@ -8,6 +8,7 @@ REQUIRES: shell
RUN: %clangxx %cxxflags -no-pie %S/Inputs/exc4sw.S -o %t.exe -Wl,-q
RUN: llvm-bolt %t.exe -o %t --relocs --frame-opt=all \
+RUN: --print-only=main --print-cfg \
RUN: --data=%p/Inputs/exc4sw.fdata --reorder-blocks=cache 2>&1 | \
RUN: FileCheck %s --check-prefix=CHECK-BOLT
@@ -19,6 +20,7 @@ RUN: llvm-objdump --dwarf=frames %t | grep -A20 -e \
RUN: `llvm-nm --numeric-sort %t | grep main | tail -n 1 | cut -f1 -d' ' | \
RUN: tail -c9` 2>&1 | FileCheck %s --check-prefix=CHECK-OUTPUT
+CHECK-BOLT: Extern Entry Count: 100
CHECK-BOLT: Shrink wrapping moved 2 spills inserting load/stores and 0 spills inserting push/pops
CHECK-INPUT: DW_CFA_advance_loc: 2
>From 81f92265327a8cf6e730ad1d01fd02f4ef76ed86 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Sat, 7 Jun 2025 21:11:24 -0700
Subject: [PATCH 2/5] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20ch?=
=?UTF-8?q?anges=20introduced=20through=20rebase?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.4
[skip ci]
---
bolt/lib/Profile/DataAggregator.cpp | 4 +++-
bolt/test/X86/pre-aggregated-perf.test | 6 +++---
2 files changed, 6 insertions(+), 4 deletions(-)
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index addff196f4f5b..0e6abdb2052af 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -733,8 +733,10 @@ bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
// corresponds to a return (if \p IsFrom) or a call continuation (otherwise).
auto handleAddress = [&](uint64_t &Addr, bool IsFrom) {
BinaryFunction *Func = getBinaryFunctionContainingAddress(Addr);
- if (!Func)
+ if (!Func) {
+ Addr = 0;
return std::pair{Func, false};
+ }
Addr -= Func->getAddress();
diff --git a/bolt/test/X86/pre-aggregated-perf.test b/bolt/test/X86/pre-aggregated-perf.test
index 92e093c238e00..cc79cbd339505 100644
--- a/bolt/test/X86/pre-aggregated-perf.test
+++ b/bolt/test/X86/pre-aggregated-perf.test
@@ -67,10 +67,10 @@ BASIC-ERROR: BOLT-INFO: 0 out of 7 functions in the binary (0.0%) have non-empty
BASIC-SUCCESS: BOLT-INFO: 4 out of 7 functions in the binary (57.1%) have non-empty execution profile
CHECK-BASIC-NL: no_lbr cycles
-PERF2BOLT: 0 [unknown] 7f36d18d60c0 1 main 53c 0 2
+PERF2BOLT: 0 [unknown] 0 1 main 53c 0 2
PERF2BOLT: 1 main 451 1 SolveCubic 0 0 2
-PERF2BOLT: 1 main 490 0 [unknown] 4005f0 0 1
-PERF2BOLT: 1 main 537 0 [unknown] 400610 0 1
+PERF2BOLT: 1 main 490 0 [unknown] 0 0 1
+PERF2BOLT: 1 main 537 0 [unknown] 0 0 1
PERF2BOLT: 1 usqrt 30 1 usqrt 32 0 22
PERF2BOLT: 1 usqrt 30 1 usqrt 39 4 33
PERF2BOLT: 1 usqrt 35 1 usqrt 39 0 22
>From 867bac6dfba4b68746775aa1ebfca0667b5ee7c7 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Mon, 9 Jun 2025 15:57:14 -0700
Subject: [PATCH 3/5] cleanup
Created using spr 1.3.4
---
bolt/include/bolt/Profile/DataAggregator.h | 19 +++++++++----------
bolt/lib/Profile/DataAggregator.cpp | 14 +++++++++-----
2 files changed, 18 insertions(+), 15 deletions(-)
diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h
index 1e115b0231055..10d96fbeca3e2 100644
--- a/bolt/include/bolt/Profile/DataAggregator.h
+++ b/bolt/include/bolt/Profile/DataAggregator.h
@@ -101,27 +101,26 @@ class DataAggregator : public DataReader {
/// Container for the unit of branch data.
/// Backwards compatible with legacy use for branches and fall-throughs:
- /// - if \p Branch is FT_ONLY or FT_EXTERNAL_ORIGIN, the trace only contains
- /// fall-through data,
- /// - if \p To is EXTERNAL, the trace only contains branch data.
+ /// - if \p Branch is FT_ONLY or FT_EXTERNAL_ORIGIN, the trace only
+ /// contains fall-through data,
+ /// - if \p To is BR_ONLY, the trace only contains branch data.
struct Trace {
static constexpr const uint64_t EXTERNAL = 0ULL;
+ static constexpr const uint64_t BR_ONLY = -1ULL;
static constexpr const uint64_t FT_ONLY = -1ULL;
static constexpr const uint64_t FT_EXTERNAL_ORIGIN = -2ULL;
uint64_t Branch;
uint64_t From;
uint64_t To;
- bool operator==(const Trace &Other) const {
- return Branch == Other.Branch && From == Other.From && To == Other.To;
- }
+ auto tie() const { return std::tie(Branch, From, To); }
+ bool operator==(const Trace &Other) const { return tie() == Other.tie(); }
+ bool operator<(const Trace &Other) const { return tie() < Other.tie(); }
};
friend raw_ostream &operator<<(raw_ostream &OS, const Trace &);
struct TraceHash {
- size_t operator()(const Trace &L) const {
- return llvm::hash_combine(L.Branch, L.From, L.To);
- }
+ size_t operator()(const Trace &L) const { return hash_combine(L.tie()); }
};
struct TakenBranchInfo {
@@ -531,7 +530,7 @@ inline raw_ostream &operator<<(raw_ostream &OS,
OS << Twine::utohexstr(T.Branch) << " -> ";
}
OS << Twine::utohexstr(T.From);
- if (T.To)
+ if (T.To != DataAggregator::Trace::BR_ONLY)
OS << " ... " << Twine::utohexstr(T.To);
return OS;
}
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index bd7e550569140..8e92c7ba1668e 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -514,6 +514,10 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {
deleteTempFiles();
heatmap:
+ // Sort parsed traces for faster processing.
+ if (!opts::BasicAggregation)
+ llvm::sort(Traces, llvm::less_first());
+
if (!opts::HeatmapMode)
return Error::success();
@@ -1283,7 +1287,7 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
}
if (Type == BRANCH) {
- Addr[2] = Location(Trace::EXTERNAL);
+ Addr[2] = Location(Trace::BR_ONLY);
}
Trace T{Addr[0]->Offset, Addr[1]->Offset, Addr[2]->Offset};
@@ -1291,7 +1295,7 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
Traces.emplace_back(T, TI);
- if (Addr[2]->Offset)
+ if (Addr[2]->Offset != Trace::BR_ONLY)
NumTraces += Count;
NumTotalSamples += Count;
@@ -1305,7 +1309,7 @@ bool DataAggregator::ignoreKernelInterrupt(LBREntry &LBR) const {
std::error_code DataAggregator::printLBRHeatMap() {
outs() << "PERF2BOLT: parse branch events...\n";
- NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
+ NamedRegionTimer T("buildHeatmap", "Building heatmap", TimerGroupName,
TimerGroupDesc, opts::TimeAggregator);
if (BC->IsLinuxKernel) {
@@ -1342,7 +1346,7 @@ std::error_code DataAggregator::printLBRHeatMap() {
for (const auto &[PC, Hits] : BasicSamples)
HM.registerAddress(PC, Hits);
for (const auto &[Trace, Info] : Traces)
- if (Trace.To)
+ if (Trace.To != Trace::BR_ONLY)
HM.registerAddressRange(Trace.From, Trace.To, Info.TakenCount);
if (HM.getNumInvalidRanges())
@@ -1540,7 +1544,7 @@ void DataAggregator::processBranchEvents() {
TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
for (const auto &[Trace, Info] : Traces) {
- if (Trace.To)
+ if (Trace.To != Trace::BR_ONLY)
doTrace(Trace, Info.TakenCount);
if (Trace.Branch != Trace::FT_ONLY &&
Trace.Branch != Trace::FT_EXTERNAL_ORIGIN)
>From 137df98ba094b648b4e185a10cae8b7c0b5581c7 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Mon, 9 Jun 2025 17:07:43 -0700
Subject: [PATCH 4/5] drop use of external
Created using spr 1.3.4
---
bolt/lib/Profile/DataAggregator.cpp | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 8e92c7ba1668e..5f384e7f60f25 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -1286,17 +1286,15 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
Addr[0] = Location(Type == FT ? Trace::FT_ONLY : Trace::FT_EXTERNAL_ORIGIN);
}
- if (Type == BRANCH) {
+ if (Type == BRANCH)
Addr[2] = Location(Trace::BR_ONLY);
- }
+ else
+ NumTraces += Count;
Trace T{Addr[0]->Offset, Addr[1]->Offset, Addr[2]->Offset};
TakenBranchInfo TI{(uint64_t)Count, (uint64_t)Mispreds};
-
Traces.emplace_back(T, TI);
- if (Addr[2]->Offset != Trace::BR_ONLY)
- NumTraces += Count;
NumTotalSamples += Count;
return std::error_code();
@@ -1393,7 +1391,7 @@ void DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
// chronological order)
if (NeedsSkylakeFix && NumEntry <= 2)
continue;
- uint64_t TraceTo = Trace::EXTERNAL;
+ uint64_t TraceTo = Trace::BR_ONLY;
if (NextLBR) {
TraceTo = NextLBR->From;
++NumTraces;
>From 5d99da3ce969a1f1ccf0922cbc978ff5a725ee66 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Mon, 9 Jun 2025 17:12:34 -0700
Subject: [PATCH 5/5] drop accidental change
Created using spr 1.3.4
---
bolt/lib/Profile/DataAggregator.cpp | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 5f384e7f60f25..949014e8cc1e2 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -1286,15 +1286,18 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
Addr[0] = Location(Type == FT ? Trace::FT_ONLY : Trace::FT_EXTERNAL_ORIGIN);
}
- if (Type == BRANCH)
+ if (Type == BRANCH) {
Addr[2] = Location(Trace::BR_ONLY);
- else
- NumTraces += Count;
+ }
Trace T{Addr[0]->Offset, Addr[1]->Offset, Addr[2]->Offset};
TakenBranchInfo TI{(uint64_t)Count, (uint64_t)Mispreds};
+
Traces.emplace_back(T, TI);
+ if (Addr[2]->Offset != Trace::BR_ONLY)
+ NumTraces += Count;
+
NumTotalSamples += Count;
return std::error_code();
More information about the llvm-commits
mailing list