[llvm] [BOLT]Fix profile quality reporting for small binaries (PR #130810)

via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 3 08:01:59 PDT 2025


https://github.com/ShatianWang updated https://github.com/llvm/llvm-project/pull/130810

>From 39112aed99fb6342d2e821896fa35485569ab7e2 Mon Sep 17 00:00:00 2001
From: Shatian Wang <shatian at meta.com>
Date: Tue, 11 Mar 2025 09:05:08 -0700
Subject: [PATCH 1/2] Report 0.00% profile quality gaps under empty eligible
 functions / blocks

---
 bolt/lib/Passes/ProfileQualityStats.cpp       | 13 +++++--
 .../profile-quality-reporting-small-binary.s  | 35 +++++++++++++++++++
 2 files changed, 45 insertions(+), 3 deletions(-)
 create mode 100644 bolt/test/X86/profile-quality-reporting-small-binary.s

diff --git a/bolt/lib/Passes/ProfileQualityStats.cpp b/bolt/lib/Passes/ProfileQualityStats.cpp
index 332c78da8a1e3..61d67b4f9068e 100644
--- a/bolt/lib/Passes/ProfileQualityStats.cpp
+++ b/bolt/lib/Passes/ProfileQualityStats.cpp
@@ -157,8 +157,10 @@ void printCFGContinuityStats(raw_ostream &OS,
     FractionECUnreachables.push_back(FractionECUnreachable);
   }
 
-  if (FractionECUnreachables.empty())
+  if (FractionECUnreachables.empty()) {
+    OS << "function CFG discontinuity 0.00%; ";
     return;
+  }
 
   llvm::sort(FractionECUnreachables);
   const int Rank = int(FractionECUnreachables.size() *
@@ -251,8 +253,10 @@ void printCallGraphFlowConservationStats(
     }
   }
 
-  if (CallGraphGaps.empty())
+  if (CallGraphGaps.empty()) {
+    OS << "call graph flow conservation gap 0.00%; ";
     return;
+  }
 
   llvm::sort(CallGraphGaps);
   const int Rank =
@@ -340,8 +344,11 @@ void printCFGFlowConservationStats(raw_ostream &OS,
     }
   }
 
-  if (CFGGapsWeightedAvg.empty())
+  if (CFGGapsWeightedAvg.empty()) {
+    OS << "CFG flow conservation gap 0.00% (weighted) 0.00% (worst)\n";
     return;
+  }
+
   llvm::sort(CFGGapsWeightedAvg);
   const int RankWA = int(CFGGapsWeightedAvg.size() *
                          opts::PercentileForProfileQualityCheck / 100);
diff --git a/bolt/test/X86/profile-quality-reporting-small-binary.s b/bolt/test/X86/profile-quality-reporting-small-binary.s
new file mode 100644
index 0000000000000..603d5c3218bc3
--- /dev/null
+++ b/bolt/test/X86/profile-quality-reporting-small-binary.s
@@ -0,0 +1,35 @@
+## Test that BOLT-INFO is correctly formatted after profile quality reporting for
+## a small binary.
+
+# RUN: llvm-mc --filetype=obj --triple x86_64-unknown-unknown %s -o %t.o
+# RUN: link_fdata %s %t.o %t.fdata
+# RUN: llvm-strip --strip-unneeded %t.o
+# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q
+# RUN: llvm-bolt %t.exe -o %t.bolt --data=%t.fdata \
+# RUN:     2>&1 | FileCheck %s
+
+# CHECK: BOLT-INFO: profile quality metrics for the hottest 2 functions (reporting top 5% values): function CFG discontinuity 0.00%; call graph flow conservation gap 0.00%; CFG flow conservation gap 0.00% (weighted) 0.00% (worst)
+# CHECK-NEXT: BOLT-INFO:
+
+        .text
+        .globl  func
+        .type   func, @function
+func:
+        pushq   %rbp
+        ret
+LLfunc_end:
+        .size   func, LLfunc_end-func
+
+
+        .globl  main
+        .type   main, @function
+main:
+        pushq   %rbp
+        movq    %rsp, %rbp
+LLmain_func:
+        call    func
+# FDATA: 1 main #LLmain_func# 1 func 0 0 500
+        movl    $4, %edi
+        retq
+.Lmain_end:
+        .size   main, .Lmain_end-main

>From 8121b86517884ef5fd8ae4f6595365577f0aeb34 Mon Sep 17 00:00:00 2001
From: Shatian Wang <shatian at meta.com>
Date: Wed, 19 Mar 2025 12:36:31 -0700
Subject: [PATCH 2/2] Fixed format; added EH stats; skipped selected blocks in
 CFG flow conservation score calculation

---
 bolt/lib/Passes/ProfileQualityStats.cpp       | 181 +++++++++++++-----
 .../profile-quality-reporting-small-binary.s  |   2 +-
 bolt/test/X86/profile-quality-reporting.test  |   2 +-
 3 files changed, 137 insertions(+), 48 deletions(-)

diff --git a/bolt/lib/Passes/ProfileQualityStats.cpp b/bolt/lib/Passes/ProfileQualityStats.cpp
index 61d67b4f9068e..c19ba76d10c93 100644
--- a/bolt/lib/Passes/ProfileQualityStats.cpp
+++ b/bolt/lib/Passes/ProfileQualityStats.cpp
@@ -91,8 +91,12 @@ void printCFGContinuityStats(raw_ostream &OS,
   std::vector<double> FractionECUnreachables;
 
   for (const BinaryFunction *Function : Functions) {
-    if (Function->size() <= 1)
+    if (Function->size() <= 1) {
+      NumUnreachables.push_back(0);
+      SumECUnreachables.push_back(0);
+      FractionECUnreachables.push_back(0.0);
       continue;
+    }
 
     // Compute the sum of all BB execution counts (ECs).
     size_t NumPosECBBs = 0;
@@ -157,11 +161,6 @@ void printCFGContinuityStats(raw_ostream &OS,
     FractionECUnreachables.push_back(FractionECUnreachable);
   }
 
-  if (FractionECUnreachables.empty()) {
-    OS << "function CFG discontinuity 0.00%; ";
-    return;
-  }
-
   llvm::sort(FractionECUnreachables);
   const int Rank = int(FractionECUnreachables.size() *
                        opts::PercentileForProfileQualityCheck / 100);
@@ -189,8 +188,10 @@ void printCallGraphFlowConservationStats(
   std::vector<double> CallGraphGaps;
 
   for (const BinaryFunction *Function : Functions) {
-    if (Function->size() <= 1 || !Function->isSimple())
+    if (Function->size() <= 1 || !Function->isSimple()) {
+      CallGraphGaps.push_back(0.0);
       continue;
+    }
 
     const uint64_t FunctionNum = Function->getFunctionNumber();
     std::vector<uint64_t> &IncomingFlows =
@@ -253,11 +254,6 @@ void printCallGraphFlowConservationStats(
     }
   }
 
-  if (CallGraphGaps.empty()) {
-    OS << "call graph flow conservation gap 0.00%; ";
-    return;
-  }
-
   llvm::sort(CallGraphGaps);
   const int Rank =
       int(CallGraphGaps.size() * opts::PercentileForProfileQualityCheck / 100);
@@ -269,7 +265,7 @@ void printCallGraphFlowConservationStats(
   }
 }
 
-void printCFGFlowConservationStats(raw_ostream &OS,
+void printCFGFlowConservationStats(const BinaryContext &BC, raw_ostream &OS,
                                    iterator_range<function_iterator> &Functions,
                                    FlowInfo &TotalFlowMap) {
   std::vector<double> CFGGapsWeightedAvg;
@@ -279,8 +275,12 @@ void printCFGFlowConservationStats(raw_ostream &OS,
   // reporting the distribution of worst gaps.
   const uint16_t MinBlockCount = 500;
   for (const BinaryFunction *Function : Functions) {
-    if (Function->size() <= 1 || !Function->isSimple())
+    if (Function->size() <= 1 || !Function->isSimple()) {
+      CFGGapsWeightedAvg.push_back(0.0);
+      CFGGapsWorst.push_back(0.0);
+      CFGGapsWorstAbs.push_back(0);
       continue;
+    }
 
     const uint64_t FunctionNum = Function->getFunctionNumber();
     std::vector<uint64_t> &MaxCountMaps =
@@ -299,12 +299,38 @@ void printCFGFlowConservationStats(raw_ostream &OS,
       if (BB.isEntryPoint() || BB.succ_size() == 0)
         continue;
 
+      if (BB.getKnownExecutionCount() == 0 || BB.getNumNonPseudos() == 0)
+        continue;
+
+      // We don't consider blocks that is a landing pad or has a
+      // positive-execution-count landing pad
+      if (BB.isLandingPad())
+        continue;
+
+      bool HasPosECLP = false;
+      for (const BinaryBasicBlock *LP : BB.landing_pads()) {
+        if (LP->getKnownExecutionCount() > 0) {
+          HasPosECLP = true;
+          break;
+        }
+      }
+      if (HasPosECLP)
+        continue;
+
+      // We don't consider blocks that end with a recursive call instruction
+      const MCInst *Inst = BB.getLastNonPseudoInstr();
+      if (BC.MIB->isCall(*Inst)) {
+        const MCSymbol *DstSym = BC.MIB->getTargetSymbol(*Inst);
+        const BinaryFunction *DstFunc =
+            DstSym ? BC.getFunctionForSymbol(DstSym) : nullptr;
+        if (DstFunc == Function)
+          continue;
+      }
+
       const uint64_t Max = MaxCountMaps[BB.getLayoutIndex()];
       const uint64_t Min = MinCountMaps[BB.getLayoutIndex()];
       const double Gap = 1 - (double)Min / Max;
       double Weight = BB.getKnownExecutionCount() * BB.getNumNonPseudos();
-      if (Weight == 0)
-        continue;
       // We use log to prevent the stats from being dominated by extremely hot
       // blocks
       Weight = log(Weight);
@@ -320,33 +346,27 @@ void printCFGFlowConservationStats(raw_ostream &OS,
         BBWorstGapAbs = &BB;
       }
     }
-    if (WeightSum > 0) {
-      const double WeightedGap = WeightedGapSum / WeightSum;
-      if (opts::Verbosity >= 2 && (WeightedGap >= 0.1 || WorstGap >= 0.9)) {
-        OS << "Nontrivial CFG gap observed in function "
-           << Function->getPrintName() << "\n"
-           << "Weighted gap: " << formatv("{0:P}", WeightedGap) << "\n";
-        if (BBWorstGap)
-          OS << "Worst gap: " << formatv("{0:P}", WorstGap)
-             << " at BB with input offset: 0x"
-             << Twine::utohexstr(BBWorstGap->getInputOffset()) << "\n";
-        if (BBWorstGapAbs)
-          OS << "Worst gap (absolute value): " << WorstGapAbs << " at BB with "
-             << "input offset 0x"
-             << Twine::utohexstr(BBWorstGapAbs->getInputOffset()) << "\n";
-        if (opts::Verbosity >= 3)
-          Function->dump();
-      }
-
-      CFGGapsWeightedAvg.push_back(WeightedGap);
-      CFGGapsWorst.push_back(WorstGap);
-      CFGGapsWorstAbs.push_back(WorstGapAbs);
+    double WeightedGap = WeightedGapSum;
+    if (WeightSum > 0) 
+      WeightedGap /= WeightSum;
+    if (opts::Verbosity >= 2 && WorstGap >= 0.9) {
+      OS << "Nontrivial CFG gap observed in function "
+          << Function->getPrintName() << "\n"
+          << "Weighted gap: " << formatv("{0:P}", WeightedGap) << "\n";
+      if (BBWorstGap)
+        OS << "Worst gap: " << formatv("{0:P}", WorstGap)
+            << " at BB with input offset: 0x"
+            << Twine::utohexstr(BBWorstGap->getInputOffset()) << "\n";
+      if (BBWorstGapAbs)
+        OS << "Worst gap (absolute value): " << WorstGapAbs << " at BB with "
+            << "input offset 0x"
+            << Twine::utohexstr(BBWorstGapAbs->getInputOffset()) << "\n";
+      if (opts::Verbosity >= 3)
+        Function->dump();
     }
-  }
-
-  if (CFGGapsWeightedAvg.empty()) {
-    OS << "CFG flow conservation gap 0.00% (weighted) 0.00% (worst)\n";
-    return;
+    CFGGapsWeightedAvg.push_back(WeightedGap);
+    CFGGapsWorst.push_back(WorstGap);
+    CFGGapsWorstAbs.push_back(WorstGapAbs);
   }
 
   llvm::sort(CFGGapsWeightedAvg);
@@ -355,7 +375,7 @@ void printCFGFlowConservationStats(raw_ostream &OS,
   llvm::sort(CFGGapsWorst);
   const int RankW =
       int(CFGGapsWorst.size() * opts::PercentileForProfileQualityCheck / 100);
-  OS << formatv("CFG flow conservation gap {0:P} (weighted) {1:P} (worst)\n",
+  OS << formatv("CFG flow conservation gap {0:P} (weighted) {1:P} (worst); ",
                 CFGGapsWeightedAvg[RankWA], CFGGapsWorst[RankW]);
   if (opts::Verbosity >= 1) {
     OS << "distribution of weighted CFG flow conservation gaps\n";
@@ -372,6 +392,74 @@ void printCFGFlowConservationStats(raw_ostream &OS,
   }
 }
 
+void printExceptionHandlingStats(const BinaryContext &BC, raw_ostream &OS,
+                                 iterator_range<function_iterator> &Functions) {
+  std::vector<double> LPCountFractionsOfTotalBBEC;
+  std::vector<double> LPCountFractionsOfTotalInvokeEC;
+  for (const BinaryFunction *Function : Functions) {
+    size_t LPECSum = 0;
+    size_t BBECSum = 0;
+    size_t InvokeECSum = 0;
+    for (BinaryBasicBlock &BB : *Function) {
+      const size_t BBEC = BB.getKnownExecutionCount();
+      BBECSum += BBEC;
+      if (BB.isLandingPad())
+        LPECSum += BBEC;
+      for (const MCInst &Inst : BB) {
+        if (!BC.MIB->isCall(Inst))
+          continue;
+        if (BC.MIB->isInvoke(Inst)) {
+          const std::optional<MCPlus::MCLandingPad> EHInfo =
+              BC.MIB->getEHInfo(Inst);
+          if (EHInfo->first)
+            InvokeECSum += BBEC;
+        }
+      }
+    }
+    // We only consider functions with at least MinLPECSum counts in landing
+    // pads to avoid false positives due to sampling noise
+    const uint16_t MinLPECSum = 50;
+    if (LPECSum <= MinLPECSum) {
+      LPCountFractionsOfTotalBBEC.push_back(0.0);
+      LPCountFractionsOfTotalInvokeEC.push_back(0.0);
+      continue;
+    }
+    const double FracTotalBBEC = (double)LPECSum / BBECSum;
+    const double FracTotalInvokeEC = (double)LPECSum / InvokeECSum;
+    LPCountFractionsOfTotalBBEC.push_back(FracTotalBBEC);
+    LPCountFractionsOfTotalInvokeEC.push_back(FracTotalInvokeEC);
+
+    if (opts::Verbosity >= 2 && FracTotalInvokeEC >= 0.05) {
+      OS << "Non-trivial usage of exception handling observed in function "
+         << Function->getPrintName() << "\n"
+         << formatv(
+                "Fraction of total InvokeEC that goes to landing pads: {0:P}\n",
+                FracTotalInvokeEC);
+      if (opts::Verbosity >= 3)
+        Function->dump();
+    }
+  }
+
+  llvm::sort(LPCountFractionsOfTotalBBEC);
+  const int RankBBEC = int(LPCountFractionsOfTotalBBEC.size() *
+                           opts::PercentileForProfileQualityCheck / 100);
+  llvm::sort(LPCountFractionsOfTotalInvokeEC);
+  const int RankInvoke = int(LPCountFractionsOfTotalInvokeEC.size() *
+                             opts::PercentileForProfileQualityCheck / 100);
+  OS << formatv("exception handling usage {0:P} (of total BBEC) {1:P} (of "
+                "total InvokeEC)\n",
+                LPCountFractionsOfTotalBBEC[RankBBEC],
+                LPCountFractionsOfTotalInvokeEC[RankInvoke]);
+  if (opts::Verbosity >= 1) {
+    OS << "distribution of exception handling usage as a fraction of total "
+          "BBEC of each function\n";
+    printDistribution(OS, LPCountFractionsOfTotalBBEC, /*Fraction=*/true);
+    OS << "distribution of exception handling usage as a fraction of total "
+          "InvokeEC of each function\n";
+    printDistribution(OS, LPCountFractionsOfTotalInvokeEC, /*Fraction=*/true);
+  }
+}
+
 void computeFlowMappings(const BinaryContext &BC, FlowInfo &TotalFlowMap) {
   // Increment block inflow and outflow with CFG jump counts.
   TotalFlowMapTy &TotalIncomingFlows = TotalFlowMap.TotalIncomingFlows;
@@ -526,8 +614,8 @@ void printAll(BinaryContext &BC, FunctionListType &ValidFunctions,
                       100 - opts::PercentileForProfileQualityCheck);
   printCFGContinuityStats(BC.outs(), Functions);
   printCallGraphFlowConservationStats(BC.outs(), Functions, TotalFlowMap);
-  printCFGFlowConservationStats(BC.outs(), Functions, TotalFlowMap);
-
+  printCFGFlowConservationStats(BC, BC.outs(), Functions, TotalFlowMap);
+  printExceptionHandlingStats(BC, BC.outs(), Functions);
   // Print more detailed bucketed stats if requested.
   if (opts::Verbosity >= 1 && RealNumTopFunctions >= 5) {
     const size_t PerBucketSize = RealNumTopFunctions / 5;
@@ -557,7 +645,8 @@ void printAll(BinaryContext &BC, FunctionListType &ValidFunctions,
                        MaxFunctionExecutionCount);
       printCFGContinuityStats(BC.outs(), Functions);
       printCallGraphFlowConservationStats(BC.outs(), Functions, TotalFlowMap);
-      printCFGFlowConservationStats(BC.outs(), Functions, TotalFlowMap);
+      printCFGFlowConservationStats(BC, BC.outs(), Functions, TotalFlowMap);
+      printExceptionHandlingStats(BC, BC.outs(), Functions);
     }
   }
 }
diff --git a/bolt/test/X86/profile-quality-reporting-small-binary.s b/bolt/test/X86/profile-quality-reporting-small-binary.s
index 603d5c3218bc3..2b147c5eca81e 100644
--- a/bolt/test/X86/profile-quality-reporting-small-binary.s
+++ b/bolt/test/X86/profile-quality-reporting-small-binary.s
@@ -8,7 +8,7 @@
 # RUN: llvm-bolt %t.exe -o %t.bolt --data=%t.fdata \
 # RUN:     2>&1 | FileCheck %s
 
-# CHECK: BOLT-INFO: profile quality metrics for the hottest 2 functions (reporting top 5% values): function CFG discontinuity 0.00%; call graph flow conservation gap 0.00%; CFG flow conservation gap 0.00% (weighted) 0.00% (worst)
+# CHECK: BOLT-INFO: profile quality metrics for the hottest 2 functions (reporting top 5% values): function CFG discontinuity 0.00%; call graph flow conservation gap 0.00%; CFG flow conservation gap 0.00% (weighted) 0.00% (worst); exception handling usage 0.00% (of total BBEC) 0.00% (of total InvokeEC)
 # CHECK-NEXT: BOLT-INFO:
 
         .text
diff --git a/bolt/test/X86/profile-quality-reporting.test b/bolt/test/X86/profile-quality-reporting.test
index 2e15a6b245afa..210d3e10a3890 100644
--- a/bolt/test/X86/profile-quality-reporting.test
+++ b/bolt/test/X86/profile-quality-reporting.test
@@ -1,4 +1,4 @@
 ## Check profile quality stats reporting
 RUN: yaml2obj %p/Inputs/blarge_new.yaml &> %t.exe
 RUN: llvm-bolt %t.exe -o %t.out --pa -p %p/Inputs/blarge_new.preagg.txt | FileCheck %s
-CHECK: profile quality metrics for the hottest 5 functions (reporting top 5% values): function CFG discontinuity 100.00%; call graph flow conservation gap 60.00%; CFG flow conservation gap 45.53% (weighted) 96.87% (worst)
+CHECK: profile quality metrics for the hottest 5 functions (reporting top 5% values): function CFG discontinuity 100.00%; call graph flow conservation gap 60.00%; CFG flow conservation gap 45.53% (weighted) 96.87% (worst); exception handling usage 0.00% (of total BBEC) 0.00% (of total InvokeEC)



More information about the llvm-commits mailing list