[llvm] [BOLT][NFCI] Emit uniform diagnostics in DataAggregator (PR #136530)

Amir Ayupov via llvm-commits llvm-commits at lists.llvm.org
Sun Apr 20 20:04:11 PDT 2025


https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/136530

>From 1f319f789b8d6f5321a89425699d0e55d099565a Mon Sep 17 00:00:00 2001
From: Amir Aupov <amir.aupov at gmail.com>
Date: Sun, 20 Apr 2025 19:48:39 -0700
Subject: [PATCH 1/2] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?=
 =?UTF-8?q?itial=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.4
---
 bolt/include/bolt/Profile/DataAggregator.h |   1 +
 bolt/lib/Profile/DataAggregator.cpp        | 194 +++++++++------------
 2 files changed, 81 insertions(+), 114 deletions(-)

diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h
index 56eb463fc98fc..1aa05f4121874 100644
--- a/bolt/include/bolt/Profile/DataAggregator.h
+++ b/bolt/include/bolt/Profile/DataAggregator.h
@@ -231,6 +231,7 @@ class DataAggregator : public DataReader {
   /// for the source of the branch to avoid counting cold activity twice (one
   /// for source and another for destination).
   uint64_t NumColdSamples{0};
+  uint64_t NumTotalSamples{0};
 
   /// Looks into system PATH for Linux Perf and set up the aggregator to use it
   void findPerfExecutable();
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index f016576ff61af..734b3a1244e11 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -1490,12 +1490,86 @@ uint64_t DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
   return NumTraces;
 }
 
+static void printColdSamplesDiagnostic() {
+  if (NumColdSamples > 0) {
+    const float ColdSamples = NumColdSamples * 100.0f / NumTotalSamples;
+    outs() << "PERF2BOLT: " << NumColdSamples
+           << format(" (%.1f%%)", ColdSamples)
+           << " samples recorded in cold regions of split functions.\n";
+    if (ColdSamples > 5.0f)
+      outs()
+          << "WARNING: The BOLT-processed binary where samples were collected "
+             "likely used bad data or your service observed a large shift in "
+             "profile. You may want to audit this.\n";
+  }
+}
+
+static void printLongRangeTracesDiagnostic() {
+  outs() << "PERF2BOLT: out of range traces involving unknown regions: "
+         << NumLongRangeTraces;
+  if (NumTraces > 0)
+    outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
+  outs() << "\n";
+}
+
+static float printColoredPct(uint64_t Numerator, uint64_t Denominator, float T1,
+                             float T2) {
+  if (Denominator == 0) {
+    outs() << "\n";
+    return 0;
+  }
+  float Percent = Numerator * 100.0f / Denominator;
+  outs() << " (";
+  if (outs().has_colors()) {
+    if (Percent > T2)
+      outs().changeColor(raw_ostream::RED);
+    else if (Percent > T1)
+      outs().changeColor(raw_ostream::YELLOW);
+    else
+      outs().changeColor(raw_ostream::GREEN);
+  }
+  outs() << format("%.1f%%", Percent);
+  if (outs().has_colors())
+    outs().resetColor();
+  outs() << ")\n";
+  return Perc;
+}
+
+static void printBranchSamplesDiagnostics() {
+  outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
+         << NumInvalidTraces;
+  if (printColoredPct(NumInvalidTraces, NumTraces, 5, 10) > 10)
+    outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
+              "binary is probably not the same binary used during profiling "
+              "collection. The generated data may be ineffective for improving "
+              "performance.\n\n";
+  printLongRangeTracesDiagnostic();
+  printColdSamplesDiagnostic();
+}
+
+static void printBasicSamplesDiagnostics(uint64_t OutOfRangeSamples) {
+  outs() << "PERF2BOLT: out of range samples recorded in unknown regions: "
+         << OutOfRangeSamples;
+  if (printColoredPct(OutOfRangeSamples, NumTotalSamples, 40, 60) > 80)
+    outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
+              "binary is probably not the same binary used during profiling "
+              "collection. The generated data may be ineffective for improving "
+              "performance.\n\n";
+  printColdSamplesDiagnostic();
+}
+
+static void printBranchStacksDiagnostics(uint64_t IgnoredSamples) {
+  outs() << "PERF2BOLT: ignored samples: " << IgnoredSamples;
+  if (printColoredPct(IgnoredSamples, NumTotalSamples, 20, 50) > 50)
+    errs() << "PERF2BOLT-WARNING: less than 50% of all recorded samples "
+              "were attributed to the input binary\n";
+}
+
 std::error_code DataAggregator::parseBranchEvents() {
   outs() << "PERF2BOLT: parse branch events...\n";
   NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
                      TimerGroupDesc, opts::TimeAggregator);
 
-  uint64_t NumTotalSamples = 0;
   uint64_t NumEntries = 0;
   uint64_t NumSamples = 0;
   uint64_t NumSamplesNoLBR = 0;
@@ -1534,22 +1608,6 @@ std::error_code DataAggregator::parseBranchEvents() {
       if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Addr))
         BF->setHasProfileAvailable();
 
-  auto printColored = [](raw_ostream &OS, float Percent, float T1, float T2) {
-    OS << " (";
-    if (OS.has_colors()) {
-      if (Percent > T2)
-        OS.changeColor(raw_ostream::RED);
-      else if (Percent > T1)
-        OS.changeColor(raw_ostream::YELLOW);
-      else
-        OS.changeColor(raw_ostream::GREEN);
-    }
-    OS << format("%.1f%%", Percent);
-    if (OS.has_colors())
-      OS.resetColor();
-    OS << ")";
-  };
-
   outs() << "PERF2BOLT: read " << NumSamples << " samples and " << NumEntries
          << " LBR entries\n";
   if (NumTotalSamples) {
@@ -1561,47 +1619,10 @@ std::error_code DataAggregator::parseBranchEvents() {
                 "in no-LBR mode with -nl (the performance improvement in -nl "
                 "mode may be limited)\n";
     } else {
-      const uint64_t IgnoredSamples = NumTotalSamples - NumSamples;
-      const float PercentIgnored = 100.0f * IgnoredSamples / NumTotalSamples;
-      outs() << "PERF2BOLT: " << IgnoredSamples << " samples";
-      printColored(outs(), PercentIgnored, 20, 50);
-      outs() << " were ignored\n";
-      if (PercentIgnored > 50.0f)
-        errs() << "PERF2BOLT-WARNING: less than 50% of all recorded samples "
-                  "were attributed to the input binary\n";
+      printBranchStacksDiagnostics(NumTotalSamples - NumSamples);
     }
   }
-  outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
-         << NumInvalidTraces;
-  float Perc = 0.0f;
-  if (NumTraces > 0) {
-    Perc = NumInvalidTraces * 100.0f / NumTraces;
-    printColored(outs(), Perc, 5, 10);
-  }
-  outs() << "\n";
-  if (Perc > 10.0f)
-    outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
-              "binary is probably not the same binary used during profiling "
-              "collection. The generated data may be ineffective for improving "
-              "performance.\n\n";
-
-  outs() << "PERF2BOLT: out of range traces involving unknown regions: "
-         << NumLongRangeTraces;
-  if (NumTraces > 0)
-    outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
-  outs() << "\n";
-
-  if (NumColdSamples > 0) {
-    const float ColdSamples = NumColdSamples * 100.0f / NumTotalSamples;
-    outs() << "PERF2BOLT: " << NumColdSamples
-           << format(" (%.1f%%)", ColdSamples)
-           << " samples recorded in cold regions of split functions.\n";
-    if (ColdSamples > 5.0f)
-      outs()
-          << "WARNING: The BOLT-processed binary where samples were collected "
-             "likely used bad data or your service observed a large shift in "
-             "profile. You may want to audit this.\n";
-  }
+  printBranchSamplesDiagnostics();
 
   return std::error_code();
 }
@@ -1658,11 +1679,10 @@ void DataAggregator::processBasicEvents() {
   NamedRegionTimer T("processBasic", "Processing basic events", TimerGroupName,
                      TimerGroupDesc, opts::TimeAggregator);
   uint64_t OutOfRangeSamples = 0;
-  uint64_t NumSamples = 0;
   for (auto &Sample : BasicSamples) {
     const uint64_t PC = Sample.first;
     const uint64_t HitCount = Sample.second;
-    NumSamples += HitCount;
+    NumTotalSamples += HitCount;
     BinaryFunction *Func = getBinaryFunctionContainingAddress(PC);
     if (!Func) {
       OutOfRangeSamples += HitCount;
@@ -1673,31 +1693,7 @@ void DataAggregator::processBasicEvents() {
   }
   outs() << "PERF2BOLT: read " << NumSamples << " samples\n";
 
-  outs() << "PERF2BOLT: out of range samples recorded in unknown regions: "
-         << OutOfRangeSamples;
-  float Perc = 0.0f;
-  if (NumSamples > 0) {
-    outs() << " (";
-    Perc = OutOfRangeSamples * 100.0f / NumSamples;
-    if (outs().has_colors()) {
-      if (Perc > 60.0f)
-        outs().changeColor(raw_ostream::RED);
-      else if (Perc > 40.0f)
-        outs().changeColor(raw_ostream::YELLOW);
-      else
-        outs().changeColor(raw_ostream::GREEN);
-    }
-    outs() << format("%.1f%%", Perc);
-    if (outs().has_colors())
-      outs().resetColor();
-    outs() << ")";
-  }
-  outs() << "\n";
-  if (Perc > 80.0f)
-    outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
-              "binary is probably not the same binary used during profiling "
-              "collection. The generated data may be ineffective for improving "
-              "performance.\n\n";
+  printBasicSamplesDiagnostics(OutOfRangeSamples);
 }
 
 std::error_code DataAggregator::parseMemEvents() {
@@ -1799,37 +1795,7 @@ void DataAggregator::processPreAggregated() {
 
   outs() << "PERF2BOLT: read " << AggregatedLBRs.size()
          << " aggregated LBR entries\n";
-  outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
-         << NumInvalidTraces;
-  float Perc = 0.0f;
-  if (NumTraces > 0) {
-    outs() << " (";
-    Perc = NumInvalidTraces * 100.0f / NumTraces;
-    if (outs().has_colors()) {
-      if (Perc > 10.0f)
-        outs().changeColor(raw_ostream::RED);
-      else if (Perc > 5.0f)
-        outs().changeColor(raw_ostream::YELLOW);
-      else
-        outs().changeColor(raw_ostream::GREEN);
-    }
-    outs() << format("%.1f%%", Perc);
-    if (outs().has_colors())
-      outs().resetColor();
-    outs() << ")";
-  }
-  outs() << "\n";
-  if (Perc > 10.0f)
-    outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
-              "binary is probably not the same binary used during profiling "
-              "collection. The generated data may be ineffective for improving "
-              "performance.\n\n";
-
-  outs() << "PERF2BOLT: Out of range traces involving unknown regions: "
-         << NumLongRangeTraces;
-  if (NumTraces > 0)
-    outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
-  outs() << "\n";
+  printBranchSamplesDiagnostics();
 }
 
 std::optional<int32_t> DataAggregator::parseCommExecEvent() {

>From 1fe232095bb590257cc73a511b3ab119bac4cbad Mon Sep 17 00:00:00 2001
From: Amir Aupov <amir.aupov at gmail.com>
Date: Sun, 20 Apr 2025 20:04:01 -0700
Subject: [PATCH 2/2] fix build

Created using spr 1.3.4
---
 bolt/include/bolt/Profile/DataAggregator.h | 14 +++++++++---
 bolt/lib/Profile/DataAggregator.cpp        | 26 ++++++++++------------
 2 files changed, 23 insertions(+), 17 deletions(-)

diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h
index 1aa05f4121874..79a91861554df 100644
--- a/bolt/include/bolt/Profile/DataAggregator.h
+++ b/bolt/include/bolt/Profile/DataAggregator.h
@@ -223,7 +223,8 @@ class DataAggregator : public DataReader {
   bool recordExit(BinaryFunction &BF, uint64_t From, bool Mispred,
                   uint64_t Count = 1) const;
 
-  /// Aggregation statistics
+  /// Branch stacks aggregation statistics
+  uint64_t NumTraces{0};
   uint64_t NumInvalidTraces{0};
   uint64_t NumLongRangeTraces{0};
   /// Specifies how many samples were recorded in cold areas if we are dealing
@@ -328,8 +329,8 @@ class DataAggregator : public DataReader {
   /// Parse a single LBR entry as output by perf script -Fbrstack
   ErrorOr<LBREntry> parseLBREntry();
 
-  /// Parse LBR sample, returns the number of traces.
-  uint64_t parseLBRSample(const PerfBranchSample &Sample, bool NeedsSkylakeFix);
+  /// Parse LBR sample.
+  void parseLBRSample(const PerfBranchSample &Sample, bool NeedsSkylakeFix);
 
   /// Parse and pre-aggregate branch events.
   std::error_code parseBranchEvents();
@@ -488,6 +489,13 @@ class DataAggregator : public DataReader {
   void dump(const PerfBranchSample &Sample) const;
   void dump(const PerfMemSample &Sample) const;
 
+  /// Profile diagnostics print methods
+  void printColdSamplesDiagnostic() const;
+  void printLongRangeTracesDiagnostic() const;
+  void printBranchSamplesDiagnostics() const;
+  void printBasicSamplesDiagnostics(uint64_t OutOfRangeSamples) const;
+  void printBranchStacksDiagnostics(uint64_t IgnoredSamples) const;
+
 public:
   /// If perf.data was collected without build ids, the buildid-list may contain
   /// incomplete entries. Return true if the buffer containing
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 734b3a1244e11..1127cc1f9331a 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -1422,9 +1422,8 @@ std::error_code DataAggregator::printLBRHeatMap() {
   return std::error_code();
 }
 
-uint64_t DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
-                                        bool NeedsSkylakeFix) {
-  uint64_t NumTraces{0};
+void DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
+                                    bool NeedsSkylakeFix) {
   // LBRs are stored in reverse execution order. NextLBR refers to the next
   // executed branch record.
   const LBREntry *NextLBR = nullptr;
@@ -1487,10 +1486,9 @@ uint64_t DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
     ++Info.TakenCount;
     Info.MispredCount += LBR.Mispred;
   }
-  return NumTraces;
 }
 
-static void printColdSamplesDiagnostic() {
+void DataAggregator::printColdSamplesDiagnostic() const {
   if (NumColdSamples > 0) {
     const float ColdSamples = NumColdSamples * 100.0f / NumTotalSamples;
     outs() << "PERF2BOLT: " << NumColdSamples
@@ -1504,7 +1502,7 @@ static void printColdSamplesDiagnostic() {
   }
 }
 
-static void printLongRangeTracesDiagnostic() {
+void DataAggregator::printLongRangeTracesDiagnostic() const {
   outs() << "PERF2BOLT: out of range traces involving unknown regions: "
          << NumLongRangeTraces;
   if (NumTraces > 0)
@@ -1532,10 +1530,10 @@ static float printColoredPct(uint64_t Numerator, uint64_t Denominator, float T1,
   if (outs().has_colors())
     outs().resetColor();
   outs() << ")\n";
-  return Perc;
+  return Percent;
 }
 
-static void printBranchSamplesDiagnostics() {
+void DataAggregator::printBranchSamplesDiagnostics() const {
   outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
          << NumInvalidTraces;
   if (printColoredPct(NumInvalidTraces, NumTraces, 5, 10) > 10)
@@ -1547,7 +1545,8 @@ static void printBranchSamplesDiagnostics() {
   printColdSamplesDiagnostic();
 }
 
-static void printBasicSamplesDiagnostics(uint64_t OutOfRangeSamples) {
+void DataAggregator::printBasicSamplesDiagnostics(
+    uint64_t OutOfRangeSamples) const {
   outs() << "PERF2BOLT: out of range samples recorded in unknown regions: "
          << OutOfRangeSamples;
   if (printColoredPct(OutOfRangeSamples, NumTotalSamples, 40, 60) > 80)
@@ -1558,7 +1557,8 @@ static void printBasicSamplesDiagnostics(uint64_t OutOfRangeSamples) {
   printColdSamplesDiagnostic();
 }
 
-static void printBranchStacksDiagnostics(uint64_t IgnoredSamples) {
+void DataAggregator::printBranchStacksDiagnostics(
+    uint64_t IgnoredSamples) const {
   outs() << "PERF2BOLT: ignored samples: " << IgnoredSamples;
   if (printColoredPct(IgnoredSamples, NumTotalSamples, 20, 50) > 50)
     errs() << "PERF2BOLT-WARNING: less than 50% of all recorded samples "
@@ -1573,7 +1573,6 @@ std::error_code DataAggregator::parseBranchEvents() {
   uint64_t NumEntries = 0;
   uint64_t NumSamples = 0;
   uint64_t NumSamplesNoLBR = 0;
-  uint64_t NumTraces = 0;
   bool NeedsSkylakeFix = false;
 
   while (hasData() && NumTotalSamples < opts::MaxSamples) {
@@ -1600,7 +1599,7 @@ std::error_code DataAggregator::parseBranchEvents() {
       NeedsSkylakeFix = true;
     }
 
-    NumTraces += parseLBRSample(Sample, NeedsSkylakeFix);
+    parseLBRSample(Sample, NeedsSkylakeFix);
   }
 
   for (const Trace &Trace : llvm::make_first_range(BranchLBRs))
@@ -1691,7 +1690,7 @@ void DataAggregator::processBasicEvents() {
 
     doSample(*Func, PC, HitCount);
   }
-  outs() << "PERF2BOLT: read " << NumSamples << " samples\n";
+  outs() << "PERF2BOLT: read " << NumTotalSamples << " samples\n";
 
   printBasicSamplesDiagnostics(OutOfRangeSamples);
 }
@@ -1771,7 +1770,6 @@ void DataAggregator::processPreAggregated() {
   NamedRegionTimer T("processAggregated", "Processing aggregated branch events",
                      TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
 
-  uint64_t NumTraces = 0;
   for (const AggregatedLBREntry &AggrEntry : AggregatedLBRs) {
     switch (AggrEntry.EntryType) {
     case AggregatedLBREntry::BRANCH:



More information about the llvm-commits mailing list