[llvm] [BOLT] Support profile density with basic samples (PR #137644)

via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 28 14:07:53 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-bolt

Author: Amir Ayupov (aaupov)

<details>
<summary>Changes</summary>

For profile with LBR samples, binary function profile density is
computed as a ratio of executed fallthrough bytes to function size (b).

For profile with IP samples, use the size of basic block containing the
sample IP as a numerator.

Also rename RawBranchCount to RawSampleCount so it's used for both
branch profile and basic profile.

Test Plan: updated perf_test.test


---
Full diff: https://github.com/llvm/llvm-project/pull/137644.diff


8 Files Affected:

- (modified) bolt/include/bolt/Core/BinaryFunction.h (+7-8) 
- (modified) bolt/include/bolt/Profile/DataReader.h (+3) 
- (modified) bolt/lib/Core/BinaryFunction.cpp (+1-1) 
- (modified) bolt/lib/Passes/BinaryPasses.cpp (+1-1) 
- (modified) bolt/lib/Profile/DataAggregator.cpp (+15-8) 
- (modified) bolt/lib/Profile/DataReader.cpp (+9-2) 
- (modified) bolt/lib/Profile/YAMLProfileReader.cpp (+3-3) 
- (modified) bolt/test/perf2bolt/perf_test.test (+1) 


``````````diff
diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index a52998564ee1b..e82b857446ce2 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -386,8 +386,8 @@ class BinaryFunction {
   /// Profile match ratio.
   float ProfileMatchRatio{0.0f};
 
-  /// Raw branch count for this function in the profile.
-  uint64_t RawBranchCount{0};
+  /// Raw sample/branch count for this function in the profile.
+  uint64_t RawSampleCount{0};
 
   /// Dynamically executed function bytes, used for density computation.
   uint64_t SampleCountInBytes{0};
@@ -1880,13 +1880,12 @@ class BinaryFunction {
   /// Return COUNT_NO_PROFILE if there's no profile info.
   uint64_t getExecutionCount() const { return ExecutionCount; }
 
-  /// Return the raw profile information about the number of branch
-  /// executions corresponding to this function.
-  uint64_t getRawBranchCount() const { return RawBranchCount; }
+  /// Return the raw profile information about the number of samples (basic
+  /// profile) or branch executions (branch profile) recorded in this function.
+  uint64_t getRawSampleCount() const { return RawSampleCount; }
 
-  /// Set the profile data about the number of branch executions corresponding
-  /// to this function.
-  void setRawBranchCount(uint64_t Count) { RawBranchCount = Count; }
+  /// Set raw count of samples or branches recorded in this function.
+  void setRawSampleCount(uint64_t Count) { RawSampleCount = Count; }
 
   /// Return the number of dynamically executed bytes, from raw perf data.
   uint64_t getSampleCountInBytes() const { return SampleCountInBytes; }
diff --git a/bolt/include/bolt/Profile/DataReader.h b/bolt/include/bolt/Profile/DataReader.h
index 314dcc9115586..a7a0933bd4f03 100644
--- a/bolt/include/bolt/Profile/DataReader.h
+++ b/bolt/include/bolt/Profile/DataReader.h
@@ -252,6 +252,9 @@ struct FuncSampleData {
   /// Get the number of samples recorded in [Start, End)
   uint64_t getSamples(uint64_t Start, uint64_t End) const;
 
+  /// Returns the total number of samples recorded in this function.
+  uint64_t getSamples() const;
+
   /// Aggregation helper
   DenseMap<uint64_t, size_t> Index;
 
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index 4624abadc701a..fea5101b16cd7 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -471,7 +471,7 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation) {
     OS << "\n  Image       : 0x" << Twine::utohexstr(getImageAddress());
   if (ExecutionCount != COUNT_NO_PROFILE) {
     OS << "\n  Exec Count  : " << ExecutionCount;
-    OS << "\n  Branch Count: " << RawBranchCount;
+    OS << "\n  Branch Count: " << RawSampleCount;
     OS << "\n  Profile Acc : " << format("%.1f%%", ProfileMatchRatio * 100.0f);
   }
 
diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp
index d8628c62d8654..420ffc8e01c5c 100644
--- a/bolt/lib/Passes/BinaryPasses.cpp
+++ b/bolt/lib/Passes/BinaryPasses.cpp
@@ -1445,7 +1445,7 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
     if (!Function.hasProfile())
       continue;
 
-    uint64_t SampleCount = Function.getRawBranchCount();
+    uint64_t SampleCount = Function.getRawSampleCount();
     TotalSampleCount += SampleCount;
 
     if (Function.hasValidProfile()) {
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index a8a187974418d..a622abd450cf6 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -567,15 +567,14 @@ void DataAggregator::processProfile(BinaryContext &BC) {
   processMemEvents();
 
   // Mark all functions with registered events as having a valid profile.
-  const auto Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE
-                                            : BinaryFunction::PF_LBR;
   for (auto &BFI : BC.getBinaryFunctions()) {
     BinaryFunction &BF = BFI.second;
-    FuncBranchData *FBD = getBranchData(BF);
-    if (FBD || getFuncSampleData(BF.getNames())) {
-      BF.markProfiled(Flags);
-      if (FBD)
-        BF.RawBranchCount = FBD->getNumExecutedBranches();
+    if (FuncBranchData *FBD = getBranchData(BF)) {
+      BF.markProfiled(BinaryFunction::PF_LBR);
+      BF.RawSampleCount = FBD->getNumExecutedBranches();
+    } else if (FuncSampleData *FSD = getFuncSampleData(BF.getNames())) {
+      BF.markProfiled(BinaryFunction::PF_SAMPLE);
+      BF.RawSampleCount = FSD->getSamples();
     }
   }
 
@@ -632,10 +631,18 @@ StringRef DataAggregator::getLocationName(const BinaryFunction &Func,
 
 bool DataAggregator::doSample(BinaryFunction &OrigFunc, uint64_t Address,
                               uint64_t Count) {
+  // To record executed bytes, use basic block size as is regardless of BAT.
+  uint64_t BlockSize = 0;
+  if (BinaryBasicBlock *BB = OrigFunc.getBasicBlockContainingOffset(
+          Address - OrigFunc.getAddress()))
+    BlockSize = BB->getOriginalSize();
+
   BinaryFunction *ParentFunc = getBATParentFunction(OrigFunc);
   BinaryFunction &Func = ParentFunc ? *ParentFunc : OrigFunc;
-  if (ParentFunc || (BAT && !BAT->isBATFunction(OrigFunc.getAddress())))
+  if (ParentFunc || (BAT && !BAT->isBATFunction(Func.getAddress())))
     NumColdSamples += Count;
+  // Attach executed bytes to parent function in case of cold fragment.
+  Func.SampleCountInBytes += Count * BlockSize;
 
   auto I = NamesToSamples.find(Func.getOneName());
   if (I == NamesToSamples.end()) {
diff --git a/bolt/lib/Profile/DataReader.cpp b/bolt/lib/Profile/DataReader.cpp
index f2e999bbfdc6d..4a92c9eb0a912 100644
--- a/bolt/lib/Profile/DataReader.cpp
+++ b/bolt/lib/Profile/DataReader.cpp
@@ -128,6 +128,13 @@ uint64_t FuncSampleData::getSamples(uint64_t Start, uint64_t End) const {
   return Result;
 }
 
+uint64_t FuncSampleData::getSamples() const {
+  uint64_t Result = 0;
+  for (const SampleInfo &I : Data)
+    Result += I.Hits;
+  return Result;
+}
+
 void FuncSampleData::bumpCount(uint64_t Offset, uint64_t Count) {
   auto Iter = Index.find(Offset);
   if (Iter == Index.end()) {
@@ -407,12 +414,12 @@ void DataReader::matchProfileData(BinaryFunction &BF) {
   FuncBranchData *FBD = getBranchData(BF);
   if (FBD) {
     BF.ProfileMatchRatio = evaluateProfileData(BF, *FBD);
-    BF.RawBranchCount = FBD->getNumExecutedBranches();
+    BF.RawSampleCount = FBD->getNumExecutedBranches();
     if (BF.ProfileMatchRatio == 1.0f) {
       if (fetchProfileForOtherEntryPoints(BF)) {
         BF.ProfileMatchRatio = evaluateProfileData(BF, *FBD);
         BF.ExecutionCount = FBD->ExecutionCount;
-        BF.RawBranchCount = FBD->getNumExecutedBranches();
+        BF.RawSampleCount = FBD->getNumExecutedBranches();
       }
       return;
     }
diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index f5636bfe3e1f1..88b806c7a9ca2 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -177,11 +177,11 @@ bool YAMLProfileReader::parseFunctionProfile(
 
   BF.setExecutionCount(YamlBF.ExecCount);
 
-  uint64_t FuncRawBranchCount = 0;
+  uint64_t FuncRawSampleCount = 0;
   for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks)
     for (const yaml::bolt::SuccessorInfo &YamlSI : YamlBB.Successors)
-      FuncRawBranchCount += YamlSI.Count;
-  BF.setRawBranchCount(FuncRawBranchCount);
+      FuncRawSampleCount += YamlSI.Count;
+  BF.setRawSampleCount(FuncRawSampleCount);
 
   if (BF.empty())
     return true;
diff --git a/bolt/test/perf2bolt/perf_test.test b/bolt/test/perf2bolt/perf_test.test
index 7bec4420214d6..44111de89a4ea 100644
--- a/bolt/test/perf2bolt/perf_test.test
+++ b/bolt/test/perf2bolt/perf_test.test
@@ -8,6 +8,7 @@ RUN: perf2bolt %t -p=%t2 -o %t3 -nl -ignore-build-id 2>&1 | FileCheck %s
 
 CHECK-NOT: PERF2BOLT-ERROR
 CHECK-NOT: !! WARNING !! This high mismatch ratio indicates the input binary is probably not the same binary used during profiling collection.
+CHECK: BOLT-INFO: Functions with density >= {{.*}} account for 99.00% total sample counts.
 
 RUN: %clang %S/Inputs/perf_test.c -no-pie -fuse-ld=lld -o %t4
 RUN: perf record -Fmax -e cycles:u -o %t5 -- %t4

``````````

</details>


https://github.com/llvm/llvm-project/pull/137644


More information about the llvm-commits mailing list