[llvm-branch-commits] [BOLT] Support pre-aggregated basic sample profile (PR #140196)

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Thu May 15 23:15:33 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-bolt

Author: Amir Ayupov (aaupov)

<details>
<summary>Changes</summary>

Define a pre-aggregated basic sample format:
```
S <location> <count>
```

Test Plan: update pre-aggregated-perf.test


---
Full diff: https://github.com/llvm/llvm-project/pull/140196.diff


5 Files Affected:

- (modified) bolt/include/bolt/Profile/DataAggregator.h (+20-14) 
- (modified) bolt/lib/Profile/DataAggregator.cpp (+49-39) 
- (added) bolt/test/X86/Inputs/pre-aggregated-basic.txt (+18) 
- (modified) bolt/test/X86/pre-aggregated-perf.test (+8) 
- (modified) bolt/test/link_fdata.py (+3-3) 


``````````diff
diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h
index 6d918134137d5..7d60b4689fb04 100644
--- a/bolt/include/bolt/Profile/DataAggregator.h
+++ b/bolt/include/bolt/Profile/DataAggregator.h
@@ -370,26 +370,25 @@ class DataAggregator : public DataReader {
   /// memory.
   ///
   /// File format syntax:
-  /// {B|F|f|T} [<start_id>:]<start_offset> [<end_id>:]<end_offset> [<ft_end>]
-  ///       <count> [<mispred_count>]
+  /// {S|B|F|f|T} <start> [<end>] [<ft_end>] <count> [<mispred_count>]
   ///
-  /// B - indicates an aggregated branch
-  /// F - an aggregated fall-through
+  /// where <start>, <end>, <ft_end> have the format [<id>:]<offset>
+  ///
+  /// S - indicates an aggregated basic sample at <start>
+  /// B - indicates an aggregated branch from <start> to <end>
+  /// F - an aggregated fall-through from <start> to <end>
   /// f - an aggregated fall-through with external origin - used to disambiguate
   ///       between a return hitting a basic block head and a regular internal
   ///       jump to the block
-  /// T - an aggregated trace: branch with a fall-through (from, to, ft_end)
-  ///
-  /// <start_id> - build id of the object containing the start address. We can
-  /// skip it for the main binary and use "X" for an unknown object. This will
-  /// save some space and facilitate human parsing.
-  ///
-  /// <start_offset> - hex offset from the object base load address (0 for the
-  /// main executable unless it's PIE) to the start address.
+  /// T - an aggregated trace: branch from <start> to <end> with a fall-through
+  ///       to <ft_end>
   ///
-  /// <end_id>, <end_offset> - same for the end address.
+  /// <id> - build id of the object containing the address. We can skip it for
+  /// the main binary and use "X" for an unknown object. This will save some
+  /// space and facilitate human parsing.
   ///
-  /// <ft_end> - same for the fallthrough_end address.
+  /// <offset> - hex offset from the object base load address (0 for the
+  /// main executable unless it's PIE) to the address.
   ///
   /// <count> - total aggregated count of the branch or a fall-through.
   ///
@@ -397,10 +396,17 @@ class DataAggregator : public DataReader {
   /// Omitted for fall-throughs.
   ///
   /// Example:
+  /// Basic samples profile:
+  /// S 41be50 3
+  ///
+  /// Soft-deprecated branch profile with separate branches and fall-throughs:
   /// F 41be50 41be50 3
   /// F 41be90 41be90 4
   /// B 4b1942 39b57f0 3 0
   /// B 4b196f 4b19e0 2 0
+  ///
+  /// Recommended branch profile with pre-aggregated traces:
+  /// T 4b196f 4b19e0 4b19ef 2
   void parsePreAggregated();
 
   /// Parse the full output of pre-aggregated LBR samples generated by
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index aa681e633c0d8..6e889908003da 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -1216,54 +1216,54 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
   ErrorOr<StringRef> TypeOrErr = parseString(FieldSeparator);
   if (std::error_code EC = TypeOrErr.getError())
     return EC;
-  enum AggregatedLBREntry { TRACE, BRANCH, FT, FT_EXTERNAL_ORIGIN, INVALID };
+  enum AggregatedLBREntry {
+    TRACE,
+    SAMPLE,
+    BRANCH,
+    FT,
+    FT_EXTERNAL_ORIGIN,
+    INVALID
+  };
   auto Type = StringSwitch<AggregatedLBREntry>(TypeOrErr.get())
                   .Case("T", TRACE)
+                  .Case("S", SAMPLE)
                   .Case("B", BRANCH)
                   .Case("F", FT)
                   .Case("f", FT_EXTERNAL_ORIGIN)
                   .Default(INVALID);
   if (Type == INVALID) {
-    reportError("expected T, B, F or f");
+    reportError("expected T, S, B, F or f");
     return make_error_code(llvm::errc::io_error);
   }
 
-  while (checkAndConsumeFS()) {
-  }
-  ErrorOr<Location> From = parseLocationOrOffset();
-  if (std::error_code EC = From.getError())
-    return EC;
+  std::optional<Location> Addrs[3];
+  int AddrNum = 2;
+  if (Type == TRACE)
+    AddrNum = 3;
+  else if (Type == SAMPLE)
+    AddrNum = 1;
 
-  while (checkAndConsumeFS()) {
-  }
-  ErrorOr<Location> To = parseLocationOrOffset();
-  if (std::error_code EC = To.getError())
-    return EC;
+  int64_t Counters[2];
+  int CounterNum = 1;
+  if (Type == BRANCH)
+    CounterNum = 2;
 
-  ErrorOr<Location> TraceFtEnd = std::error_code();
-  if (Type == AggregatedLBREntry::TRACE) {
+  for (int I = 0; I < AddrNum; ++I) {
     while (checkAndConsumeFS()) {
     }
-    TraceFtEnd = parseLocationOrOffset();
-    if (std::error_code EC = TraceFtEnd.getError())
-      return EC;
-  }
-
-  while (checkAndConsumeFS()) {
+    if (ErrorOr<Location> Addr = parseLocationOrOffset())
+      Addrs[I] = Addr.get();
+    else
+      return Addr.getError();
   }
-  ErrorOr<int64_t> Frequency =
-      parseNumberField(FieldSeparator, Type != AggregatedLBREntry::BRANCH);
-  if (std::error_code EC = Frequency.getError())
-    return EC;
 
-  uint64_t Mispreds = 0;
-  if (Type == AggregatedLBREntry::BRANCH) {
+  for (int I = 0; I < CounterNum; ++I) {
     while (checkAndConsumeFS()) {
     }
-    ErrorOr<int64_t> MispredsOrErr = parseNumberField(FieldSeparator, true);
-    if (std::error_code EC = MispredsOrErr.getError())
-      return EC;
-    Mispreds = static_cast<uint64_t>(MispredsOrErr.get());
+    if (ErrorOr<int64_t> Count = parseNumberField(FieldSeparator, I + 1 == CounterNum))
+      Counters[I] = Count.get();
+    else
+      return Count.getError();
   }
 
   if (!checkAndConsumeNewLine()) {
@@ -1271,16 +1271,25 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
     return make_error_code(llvm::errc::io_error);
   }
 
-  BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(From->Offset);
-  BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(To->Offset);
+  const uint64_t FromOffset = Addrs[0]->Offset;
+  BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(FromOffset);
+  if (FromFunc)
+    FromFunc->setHasProfileAvailable();
 
-  for (BinaryFunction *BF : {FromFunc, ToFunc})
-    if (BF)
-      BF->setHasProfileAvailable();
+  if (Type == SAMPLE) {
+    BasicSamples[FromOffset] += Counters[0];
+    return std::error_code();
+  }
+
+  const uint64_t ToOffset = Addrs[1]->Offset;
+  BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(ToOffset);
+  if (ToFunc)
+    ToFunc->setHasProfileAvailable();
 
-  uint64_t Count = static_cast<uint64_t>(Frequency.get());
+  int64_t Count = Counters[0];
+  int64_t Mispreds = Counters[1];
 
-  Trace Trace(From->Offset, To->Offset);
+  Trace Trace(FromOffset, ToOffset);
   // Taken trace
   if (Type == TRACE || Type == BRANCH) {
     TakenBranchInfo &Info = BranchLBRs[Trace];
@@ -1291,8 +1300,9 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
   }
   // Construct fallthrough part of the trace
   if (Type == TRACE) {
-    Trace.From = To->Offset;
-    Trace.To = TraceFtEnd->Offset;
+    const uint64_t TraceFtEndOffset = Addrs[2]->Offset;
+    Trace.From = ToOffset;
+    Trace.To = TraceFtEndOffset;
     Type = FromFunc == ToFunc ? FT : FT_EXTERNAL_ORIGIN;
   }
   // Add fallthrough trace
diff --git a/bolt/test/X86/Inputs/pre-aggregated-basic.txt b/bolt/test/X86/Inputs/pre-aggregated-basic.txt
new file mode 100644
index 0000000000000..28bcacca70ee1
--- /dev/null
+++ b/bolt/test/X86/Inputs/pre-aggregated-basic.txt
@@ -0,0 +1,18 @@
+S 4005f0 1
+S 4005f0 1
+S 400610 1
+S 400ad1 2
+S 400b10 1
+S 400bb7 1
+S 400bbc 2
+S 400d90 1
+S 400dae 1
+S 400e00 2
+S 401170 22
+S 401180 58
+S 4011a0 33
+S 4011a9 33
+S 4011ad 58
+S 4011b2 22
+S X:7f36d18d60c0 2
+S X:7f36d18f2ce0 1
diff --git a/bolt/test/X86/pre-aggregated-perf.test b/bolt/test/X86/pre-aggregated-perf.test
index c05a06bf74945..63488a9062bdd 100644
--- a/bolt/test/X86/pre-aggregated-perf.test
+++ b/bolt/test/X86/pre-aggregated-perf.test
@@ -57,6 +57,14 @@ RUN: llvm-bolt %t.exe -o %t.bolt.yaml --pa -p %p/Inputs/pre-aggregated.txt \
 RUN:   --aggregate-only --profile-format=yaml --profile-use-dfs
 RUN: cat %t.bolt.yaml | FileCheck %s -check-prefix=NEWFORMAT
 
+## Test pre-aggregated basic profile
+RUN: perf2bolt %t.exe -o %t --pa -p %p/Inputs/pre-aggregated-basic.txt -o %t.ba \
+RUN:   2>&1 | FileCheck %s --check-prefix=BASIC-ERROR
+RUN: perf2bolt %t.exe -o %t --pa -p %p/Inputs/pre-aggregated-basic.txt -o %t.ba \
+RUN:   -nl 2>&1 | FileCheck %s --check-prefix=BASIC-SUCCESS
+BASIC-ERROR: BOLT-INFO: 0 out of 7 functions in the binary (0.0%) have non-empty execution profile
+BASIC-SUCCESS: BOLT-INFO: 4 out of 7 functions in the binary (57.1%) have non-empty execution profile
+
 PERF2BOLT: 0 [unknown] 7f36d18d60c0 1 main 53c 0 2
 PERF2BOLT: 1 main 451 1 SolveCubic 0 0 2
 PERF2BOLT: 1 main 490 0 [unknown] 4005f0 0 1
diff --git a/bolt/test/link_fdata.py b/bolt/test/link_fdata.py
index bcf9a777922d5..6a391c10b9481 100755
--- a/bolt/test/link_fdata.py
+++ b/bolt/test/link_fdata.py
@@ -36,9 +36,9 @@
 fdata_pat = re.compile(r"([01].*) (?P<exec>\d+) (?P<mispred>\d+)")
 
 # Pre-aggregated profile:
-# {T|B|F|f} [<start_id>:]<start_offset> [<end_id>:]<end_offset> [<ft_end>]
-# <count> [<mispred_count>]
-preagg_pat = re.compile(r"(?P<type>[TBFf]) (?P<offsets_count>.*)")
+# {T|S|B|F|f} <start> [<end>] [<ft_end>] <count> [<mispred_count>]
+# <loc>: [<id>:]<offset>
+preagg_pat = re.compile(r"(?P<type>[TSBFf]) (?P<offsets_count>.*)")
 
 # No-LBR profile:
 # <is symbol?> <closest elf symbol or DSO name> <relative address> <count>

``````````

</details>


https://github.com/llvm/llvm-project/pull/140196


More information about the llvm-branch-commits mailing list