[llvm] [BOLT] Support profile density with basic samples (PR #137644)
Amir Ayupov via llvm-commits
llvm-commits at lists.llvm.org
Fri May 9 17:03:47 PDT 2025
https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/137644
>From 58548480e97d6c177ce3ef735bed188308699eb5 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Mon, 28 Apr 2025 07:56:03 -0700
Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?=
=?UTF-8?q?l=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.4
---
bolt/include/bolt/Core/BinaryFunction.h | 15 +++++++--------
bolt/include/bolt/Profile/DataReader.h | 3 +++
bolt/lib/Core/BinaryFunction.cpp | 2 +-
bolt/lib/Passes/BinaryPasses.cpp | 2 +-
bolt/lib/Profile/DataAggregator.cpp | 23 +++++++++++++++--------
bolt/lib/Profile/DataReader.cpp | 11 +++++++++--
bolt/lib/Profile/YAMLProfileReader.cpp | 6 +++---
bolt/test/perf2bolt/perf_test.test | 1 +
8 files changed, 40 insertions(+), 23 deletions(-)
diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index a52998564ee1b..e82b857446ce2 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -386,8 +386,8 @@ class BinaryFunction {
/// Profile match ratio.
float ProfileMatchRatio{0.0f};
- /// Raw branch count for this function in the profile.
- uint64_t RawBranchCount{0};
+ /// Raw sample/branch count for this function in the profile.
+ uint64_t RawSampleCount{0};
/// Dynamically executed function bytes, used for density computation.
uint64_t SampleCountInBytes{0};
@@ -1880,13 +1880,12 @@ class BinaryFunction {
/// Return COUNT_NO_PROFILE if there's no profile info.
uint64_t getExecutionCount() const { return ExecutionCount; }
- /// Return the raw profile information about the number of branch
- /// executions corresponding to this function.
- uint64_t getRawBranchCount() const { return RawBranchCount; }
+ /// Return the raw profile information about the number of samples (basic
+ /// profile) or branch executions (branch profile) recorded in this function.
+ uint64_t getRawSampleCount() const { return RawSampleCount; }
- /// Set the profile data about the number of branch executions corresponding
- /// to this function.
- void setRawBranchCount(uint64_t Count) { RawBranchCount = Count; }
+ /// Set raw count of samples or branches recorded in this function.
+ void setRawSampleCount(uint64_t Count) { RawSampleCount = Count; }
/// Return the number of dynamically executed bytes, from raw perf data.
uint64_t getSampleCountInBytes() const { return SampleCountInBytes; }
diff --git a/bolt/include/bolt/Profile/DataReader.h b/bolt/include/bolt/Profile/DataReader.h
index 314dcc9115586..a7a0933bd4f03 100644
--- a/bolt/include/bolt/Profile/DataReader.h
+++ b/bolt/include/bolt/Profile/DataReader.h
@@ -252,6 +252,9 @@ struct FuncSampleData {
/// Get the number of samples recorded in [Start, End)
uint64_t getSamples(uint64_t Start, uint64_t End) const;
+ /// Returns the total number of samples recorded in this function.
+ uint64_t getSamples() const;
+
/// Aggregation helper
DenseMap<uint64_t, size_t> Index;
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index 4624abadc701a..fea5101b16cd7 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -471,7 +471,7 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation) {
OS << "\n Image : 0x" << Twine::utohexstr(getImageAddress());
if (ExecutionCount != COUNT_NO_PROFILE) {
OS << "\n Exec Count : " << ExecutionCount;
- OS << "\n Branch Count: " << RawBranchCount;
+ OS << "\n Branch Count: " << RawSampleCount;
OS << "\n Profile Acc : " << format("%.1f%%", ProfileMatchRatio * 100.0f);
}
diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp
index d8628c62d8654..420ffc8e01c5c 100644
--- a/bolt/lib/Passes/BinaryPasses.cpp
+++ b/bolt/lib/Passes/BinaryPasses.cpp
@@ -1445,7 +1445,7 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
if (!Function.hasProfile())
continue;
- uint64_t SampleCount = Function.getRawBranchCount();
+ uint64_t SampleCount = Function.getRawSampleCount();
TotalSampleCount += SampleCount;
if (Function.hasValidProfile()) {
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index a8a187974418d..a622abd450cf6 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -567,15 +567,14 @@ void DataAggregator::processProfile(BinaryContext &BC) {
processMemEvents();
// Mark all functions with registered events as having a valid profile.
- const auto Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE
- : BinaryFunction::PF_LBR;
for (auto &BFI : BC.getBinaryFunctions()) {
BinaryFunction &BF = BFI.second;
- FuncBranchData *FBD = getBranchData(BF);
- if (FBD || getFuncSampleData(BF.getNames())) {
- BF.markProfiled(Flags);
- if (FBD)
- BF.RawBranchCount = FBD->getNumExecutedBranches();
+ if (FuncBranchData *FBD = getBranchData(BF)) {
+ BF.markProfiled(BinaryFunction::PF_LBR);
+ BF.RawSampleCount = FBD->getNumExecutedBranches();
+ } else if (FuncSampleData *FSD = getFuncSampleData(BF.getNames())) {
+ BF.markProfiled(BinaryFunction::PF_SAMPLE);
+ BF.RawSampleCount = FSD->getSamples();
}
}
@@ -632,10 +631,18 @@ StringRef DataAggregator::getLocationName(const BinaryFunction &Func,
bool DataAggregator::doSample(BinaryFunction &OrigFunc, uint64_t Address,
uint64_t Count) {
+ // To record executed bytes, use basic block size as is regardless of BAT.
+ uint64_t BlockSize = 0;
+ if (BinaryBasicBlock *BB = OrigFunc.getBasicBlockContainingOffset(
+ Address - OrigFunc.getAddress()))
+ BlockSize = BB->getOriginalSize();
+
BinaryFunction *ParentFunc = getBATParentFunction(OrigFunc);
BinaryFunction &Func = ParentFunc ? *ParentFunc : OrigFunc;
- if (ParentFunc || (BAT && !BAT->isBATFunction(OrigFunc.getAddress())))
+ if (ParentFunc || (BAT && !BAT->isBATFunction(Func.getAddress())))
NumColdSamples += Count;
+ // Attach executed bytes to parent function in case of cold fragment.
+ Func.SampleCountInBytes += Count * BlockSize;
auto I = NamesToSamples.find(Func.getOneName());
if (I == NamesToSamples.end()) {
diff --git a/bolt/lib/Profile/DataReader.cpp b/bolt/lib/Profile/DataReader.cpp
index f2e999bbfdc6d..4a92c9eb0a912 100644
--- a/bolt/lib/Profile/DataReader.cpp
+++ b/bolt/lib/Profile/DataReader.cpp
@@ -128,6 +128,13 @@ uint64_t FuncSampleData::getSamples(uint64_t Start, uint64_t End) const {
return Result;
}
+uint64_t FuncSampleData::getSamples() const {
+ uint64_t Result = 0;
+ for (const SampleInfo &I : Data)
+ Result += I.Hits;
+ return Result;
+}
+
void FuncSampleData::bumpCount(uint64_t Offset, uint64_t Count) {
auto Iter = Index.find(Offset);
if (Iter == Index.end()) {
@@ -407,12 +414,12 @@ void DataReader::matchProfileData(BinaryFunction &BF) {
FuncBranchData *FBD = getBranchData(BF);
if (FBD) {
BF.ProfileMatchRatio = evaluateProfileData(BF, *FBD);
- BF.RawBranchCount = FBD->getNumExecutedBranches();
+ BF.RawSampleCount = FBD->getNumExecutedBranches();
if (BF.ProfileMatchRatio == 1.0f) {
if (fetchProfileForOtherEntryPoints(BF)) {
BF.ProfileMatchRatio = evaluateProfileData(BF, *FBD);
BF.ExecutionCount = FBD->ExecutionCount;
- BF.RawBranchCount = FBD->getNumExecutedBranches();
+ BF.RawSampleCount = FBD->getNumExecutedBranches();
}
return;
}
diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index f5636bfe3e1f1..88b806c7a9ca2 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -177,11 +177,11 @@ bool YAMLProfileReader::parseFunctionProfile(
BF.setExecutionCount(YamlBF.ExecCount);
- uint64_t FuncRawBranchCount = 0;
+ uint64_t FuncRawSampleCount = 0;
for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks)
for (const yaml::bolt::SuccessorInfo &YamlSI : YamlBB.Successors)
- FuncRawBranchCount += YamlSI.Count;
- BF.setRawBranchCount(FuncRawBranchCount);
+ FuncRawSampleCount += YamlSI.Count;
+ BF.setRawSampleCount(FuncRawSampleCount);
if (BF.empty())
return true;
diff --git a/bolt/test/perf2bolt/perf_test.test b/bolt/test/perf2bolt/perf_test.test
index 7bec4420214d6..44111de89a4ea 100644
--- a/bolt/test/perf2bolt/perf_test.test
+++ b/bolt/test/perf2bolt/perf_test.test
@@ -8,6 +8,7 @@ RUN: perf2bolt %t -p=%t2 -o %t3 -nl -ignore-build-id 2>&1 | FileCheck %s
CHECK-NOT: PERF2BOLT-ERROR
CHECK-NOT: !! WARNING !! This high mismatch ratio indicates the input binary is probably not the same binary used during profiling collection.
+CHECK: BOLT-INFO: Functions with density >= {{.*}} account for 99.00% total sample counts.
RUN: %clang %S/Inputs/perf_test.c -no-pie -fuse-ld=lld -o %t4
RUN: perf record -Fmax -e cycles:u -o %t5 -- %t4
More information about the llvm-commits
mailing list