[llvm] [BOLT] Flow conservation scores (PR #127954)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 19 21:07:00 PST 2025
https://github.com/ShatianWang created https://github.com/llvm/llvm-project/pull/127954
None
>From b18bb596c9345eb85231490436498ab434539c4b Mon Sep 17 00:00:00 2001
From: Shatian Wang <shatian at meta.com>
Date: Mon, 13 Jan 2025 11:17:55 -0800
Subject: [PATCH] [BOLT] Flow conservation scores
---
bolt/include/bolt/Passes/ContinuityStats.h | 61 --
.../include/bolt/Passes/ProfileQualityStats.h | 98 +++
bolt/lib/Passes/CMakeLists.txt | 2 +-
bolt/lib/Passes/ContinuityStats.cpp | 250 --------
bolt/lib/Passes/ProfileQualityStats.cpp | 579 ++++++++++++++++++
bolt/lib/Rewrite/BinaryPassManager.cpp | 46 +-
.../test/X86/cfg-discontinuity-reporting.test | 4 -
bolt/test/X86/profile-quality-reporting.test | 6 +
8 files changed, 707 insertions(+), 339 deletions(-)
delete mode 100644 bolt/include/bolt/Passes/ContinuityStats.h
create mode 100644 bolt/include/bolt/Passes/ProfileQualityStats.h
delete mode 100644 bolt/lib/Passes/ContinuityStats.cpp
create mode 100644 bolt/lib/Passes/ProfileQualityStats.cpp
delete mode 100644 bolt/test/X86/cfg-discontinuity-reporting.test
create mode 100644 bolt/test/X86/profile-quality-reporting.test
diff --git a/bolt/include/bolt/Passes/ContinuityStats.h b/bolt/include/bolt/Passes/ContinuityStats.h
deleted file mode 100644
index bd4d491ad4a55..0000000000000
--- a/bolt/include/bolt/Passes/ContinuityStats.h
+++ /dev/null
@@ -1,61 +0,0 @@
-//===- bolt/Passes/ContinuityStats.h ----------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass checks how well the BOLT input profile satisfies the following
-// "CFG continuity" property of a perfect profile:
-//
-// Each positive-execution-count block in the function’s CFG
-// should be *reachable* from a positive-execution-count function
-// entry block through a positive-execution-count path.
-//
-// More specifically, for each of the hottest 1000 functions, the pass
-// calculates the function’s fraction of basic block execution counts
-// that is *unreachable*. It then reports the 95th percentile of the
-// distribution of the 1000 unreachable fractions in a single BOLT-INFO line.
-// The smaller the reported value is, the better the BOLT profile
-// satisfies the CFG continuity property.
-
-// The default value of 1000 above can be changed via the hidden BOLT option
-// `-num-functions-for-continuity-check=[N]`.
-// If more detailed stats are needed, `-v=1` can be used: the hottest N
-// functions will be grouped into 5 equally-sized buckets, from the hottest
-// to the coldest; for each bucket, various summary statistics of the
-// distribution of the unreachable fractions and the raw unreachable execution
-// counts will be reported.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef BOLT_PASSES_CONTINUITYSTATS_H
-#define BOLT_PASSES_CONTINUITYSTATS_H
-
-#include "bolt/Passes/BinaryPasses.h"
-#include <vector>
-
-namespace llvm {
-
-class raw_ostream;
-
-namespace bolt {
-class BinaryContext;
-
-/// Compute and report to the user the function CFG continuity quality
-class PrintContinuityStats : public BinaryFunctionPass {
-public:
- explicit PrintContinuityStats(const cl::opt<bool> &PrintPass)
- : BinaryFunctionPass(PrintPass) {}
-
- bool shouldOptimize(const BinaryFunction &BF) const override;
- const char *getName() const override { return "continuity-stats"; }
- bool shouldPrint(const BinaryFunction &) const override { return false; }
- Error runOnFunctions(BinaryContext &BC) override;
-};
-
-} // namespace bolt
-} // namespace llvm
-
-#endif // BOLT_PASSES_CONTINUITYSTATS_H
diff --git a/bolt/include/bolt/Passes/ProfileQualityStats.h b/bolt/include/bolt/Passes/ProfileQualityStats.h
new file mode 100644
index 0000000000000..761fd33431a46
--- /dev/null
+++ b/bolt/include/bolt/Passes/ProfileQualityStats.h
@@ -0,0 +1,98 @@
+//===- bolt/Passes/ProfileQualityStats.h ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass checks the BOLT input profile quality.
+//
+// Check 1: how well the input profile satisfies the following
+// "CFG continuity" property of a perfect profile:
+//
+// Each positive-execution-count block in the function’s CFG
+// is *reachable* from a positive-execution-count function
+// entry block through a positive-execution-count path.
+//
+// More specifically, for each of the hottest 1000 functions, the pass
+// calculates the function’s fraction of basic block execution counts
+// that is *unreachable*. It then reports the 95th percentile of the
+// distribution of the 1000 unreachable fractions in a single BOLT-INFO line.
+// The smaller the reported value is, the better the BOLT profile
+// satisfies the CFG continuity property.
+//
+// Check 2: how well the input profile satisfies the "call graph flow
+// conservation" property of a perfect profile:
+//
+// For each function that is not a program entry, the number of times the
+// function is called is equal to the net CFG outflow of the
+// function's entry block(s).
+//
+// More specifically, for each of the hottest 1000 functions, the pass obtains
+// A = number of times the function is called, B = the function's entry blocks'
+// inflow, C = the function's entry blocks' outflow, where B and C are computed
+// using the function's weighted CFG. It then computes gap = 1 - MIN(A,C-B) /
+// MAX(A, C-B). The pass reports the 95th percentile of the distribution of the
+// 1000 gaps in a single BOLT-INFO line. The smaller the reported value is, the
+// better the BOLT profile satisfies the call graph flow conservation property.
+//
+// Check 3: how well the input profile satisfies the "function CFG flow
+// conservation property" of a perfect profile:
+//
+// A non-entry non-exit basic block's inflow is equal to its outflow.
+//
+// More specifically, for each of the hottest 1000 functions, the pass loops
+// over its basic blocks that are non-entry and non-exit, and for each block
+// obtains a block gap = 1 - MIN(block inflow, block outflow, block call count
+// if any) / MAX(block inflow, block outflow, block call count if any). It then
+// aggregates the block gaps into 2 values for the function: "weighted" is the
+// weighted average of the block conservation gaps, where the weights depend on
+// each block's execution count and instruction count; "worst" is the worst
+// (biggest) block gap acorss all basic blocks in the function with an execution
+// count of > 500. The pass then reports the 95th percentile of the weighted and
+// worst values of the 1000 functions in a single BOLT-INFO line. The smaller
+// the reported values are, the better the BOLT profile satisfies the function
+// CFG flow conservation property.
+//
+// The default value of 1000 above can be changed via the hidden BOLT option
+// `-num-functions-for-profile-quality-check=[N]`.
+// The default reporting of the 95th percentile can be changed via the hidden
+// BOLT option `-percentile-for-profile-quality-check=[M]`.
+//
+// If more detailed stats are needed, `-v=1` can be used: the hottest N
+// functions will be grouped into 5 equally-sized buckets, from the hottest
+// to the coldest; for each bucket, various summary statistics of the
+// profile quality will be reported.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BOLT_PASSES_PROFILEQUALITYSTATS_H
+#define BOLT_PASSES_PROFILEQUALITYSTATS_H
+
+#include "bolt/Passes/BinaryPasses.h"
+#include <vector>
+
+namespace llvm {
+
+class raw_ostream;
+
+namespace bolt {
+class BinaryContext;
+
+/// Compute and report to the user the profile quality
+class PrintProfileQualityStats : public BinaryFunctionPass {
+public:
+ explicit PrintProfileQualityStats(const cl::opt<bool> &PrintPass)
+ : BinaryFunctionPass(PrintPass) {}
+
+ bool shouldOptimize(const BinaryFunction &BF) const override;
+ const char *getName() const override { return "profile-quality-stats"; }
+ bool shouldPrint(const BinaryFunction &) const override { return false; }
+ Error runOnFunctions(BinaryContext &BC) override;
+};
+
+} // namespace bolt
+} // namespace llvm
+
+#endif // BOLT_PASSES_PROFILEQUALITYSTATS_H
diff --git a/bolt/lib/Passes/CMakeLists.txt b/bolt/lib/Passes/CMakeLists.txt
index 1e3289484a5ba..692c82f2a5336 100644
--- a/bolt/lib/Passes/CMakeLists.txt
+++ b/bolt/lib/Passes/CMakeLists.txt
@@ -26,7 +26,7 @@ add_llvm_library(LLVMBOLTPasses
PatchEntries.cpp
PettisAndHansen.cpp
PLTCall.cpp
- ContinuityStats.cpp
+ ProfileQualityStats.cpp
RegAnalysis.cpp
RegReAssign.cpp
ReorderAlgorithm.cpp
diff --git a/bolt/lib/Passes/ContinuityStats.cpp b/bolt/lib/Passes/ContinuityStats.cpp
deleted file mode 100644
index b32365b59065d..0000000000000
--- a/bolt/lib/Passes/ContinuityStats.cpp
+++ /dev/null
@@ -1,250 +0,0 @@
-//===- bolt/Passes/ContinuityStats.cpp --------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the continuity stats calculation pass.
-//
-//===----------------------------------------------------------------------===//
-
-#include "bolt/Passes/ContinuityStats.h"
-#include "bolt/Core/BinaryBasicBlock.h"
-#include "bolt/Core/BinaryFunction.h"
-#include "bolt/Utils/CommandLineOpts.h"
-#include "llvm/Support/CommandLine.h"
-#include <queue>
-#include <unordered_map>
-#include <unordered_set>
-
-#define DEBUG_TYPE "bolt-opts"
-
-using namespace llvm;
-using namespace bolt;
-
-namespace opts {
-extern cl::opt<unsigned> Verbosity;
-cl::opt<unsigned> NumFunctionsForContinuityCheck(
- "num-functions-for-continuity-check",
- cl::desc("number of hottest functions to print aggregated "
- "CFG discontinuity stats of."),
- cl::init(1000), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
-} // namespace opts
-
-namespace {
-using FunctionListType = std::vector<const BinaryFunction *>;
-using function_iterator = FunctionListType::iterator;
-
-template <typename T>
-void printDistribution(raw_ostream &OS, std::vector<T> &values,
- bool Fraction = false) {
- if (values.empty())
- return;
- // Sort values from largest to smallest and print the MAX, TOP 1%, 5%, 10%,
- // 20%, 50%, 80%, MIN. If Fraction is true, then values are printed as
- // fractions instead of integers.
- std::sort(values.begin(), values.end());
-
- auto printLine = [&](std::string Text, double Percent) {
- int Rank = int(values.size() * (1.0 - Percent / 100));
- if (Percent == 0)
- Rank = values.size() - 1;
- if (Fraction)
- OS << " " << Text << std::string(9 - Text.length(), ' ') << ": "
- << format("%.2lf%%", values[Rank] * 100) << "\n";
- else
- OS << " " << Text << std::string(9 - Text.length(), ' ') << ": "
- << values[Rank] << "\n";
- };
-
- printLine("MAX", 0);
- const int percentages[] = {1, 5, 10, 20, 50, 80};
- for (size_t i = 0; i < sizeof(percentages) / sizeof(percentages[0]); ++i) {
- printLine("TOP " + std::to_string(percentages[i]) + "%", percentages[i]);
- }
- printLine("MIN", 100);
-}
-
-void printCFGContinuityStats(raw_ostream &OS,
- iterator_range<function_iterator> &Functions) {
- // Given a perfect profile, every positive-execution-count BB should be
- // connected to an entry of the function through a positive-execution-count
- // directed path in the control flow graph.
- std::vector<size_t> NumUnreachables;
- std::vector<size_t> SumECUnreachables;
- std::vector<double> FractionECUnreachables;
-
- for (auto it = Functions.begin(); it != Functions.end(); ++it) {
- const BinaryFunction *Function = *it;
- if (Function->size() <= 1)
- continue;
-
- // Compute the sum of all BB execution counts (ECs).
- size_t NumPosECBBs = 0;
- size_t SumAllBBEC = 0;
- for (const BinaryBasicBlock &BB : *Function) {
- const size_t BBEC = BB.getKnownExecutionCount();
- NumPosECBBs += BBEC > 0 ? 1 : 0;
- SumAllBBEC += BBEC;
- }
-
- // Perform BFS on subgraph of CFG induced by positive weight edges.
- // Compute the number of BBs reachable from the entry(s) of the function and
- // the sum of their execution counts (ECs).
- std::unordered_map<unsigned, const BinaryBasicBlock *> IndexToBB;
- std::unordered_set<unsigned> Visited;
- std::queue<unsigned> Queue;
- for (const BinaryBasicBlock &BB : *Function) {
- // Make sure BB.getIndex() is not already in IndexToBB.
- assert(IndexToBB.find(BB.getIndex()) == IndexToBB.end());
- IndexToBB[BB.getIndex()] = &BB;
- if (BB.isEntryPoint() && BB.getKnownExecutionCount() > 0) {
- Queue.push(BB.getIndex());
- Visited.insert(BB.getIndex());
- }
- }
- while (!Queue.empty()) {
- const unsigned BBIndex = Queue.front();
- const BinaryBasicBlock *BB = IndexToBB[BBIndex];
- Queue.pop();
- auto SuccBIIter = BB->branch_info_begin();
- for (const BinaryBasicBlock *Succ : BB->successors()) {
- const uint64_t Count = SuccBIIter->Count;
- if (Count == BinaryBasicBlock::COUNT_NO_PROFILE || Count == 0) {
- ++SuccBIIter;
- continue;
- }
- if (!Visited.insert(Succ->getIndex()).second) {
- ++SuccBIIter;
- continue;
- }
- Queue.push(Succ->getIndex());
- ++SuccBIIter;
- }
- }
-
- const size_t NumReachableBBs = Visited.size();
-
- // Loop through Visited, and sum the corresponding BBs' execution counts
- // (ECs).
- size_t SumReachableBBEC = 0;
- for (const unsigned BBIndex : Visited) {
- const BinaryBasicBlock *BB = IndexToBB[BBIndex];
- SumReachableBBEC += BB->getKnownExecutionCount();
- }
-
- const size_t NumPosECBBsUnreachableFromEntry =
- NumPosECBBs - NumReachableBBs;
- const size_t SumUnreachableBBEC = SumAllBBEC - SumReachableBBEC;
- const double FractionECUnreachable =
- (double)SumUnreachableBBEC / SumAllBBEC;
-
- if (opts::Verbosity >= 2 && FractionECUnreachable >= 0.05) {
- OS << "Non-trivial CFG discontinuity observed in function "
- << Function->getPrintName() << "\n";
- LLVM_DEBUG(Function->dump());
- }
-
- NumUnreachables.push_back(NumPosECBBsUnreachableFromEntry);
- SumECUnreachables.push_back(SumUnreachableBBEC);
- FractionECUnreachables.push_back(FractionECUnreachable);
- }
-
- if (FractionECUnreachables.empty())
- return;
-
- std::sort(FractionECUnreachables.begin(), FractionECUnreachables.end());
- const int Rank = int(FractionECUnreachables.size() * 0.95);
- OS << format("top 5%% function CFG discontinuity is %.2lf%%\n",
- FractionECUnreachables[Rank] * 100);
-
- if (opts::Verbosity >= 1) {
- OS << "abbreviations: EC = execution count, POS BBs = positive EC BBs\n"
- << "distribution of NUM(unreachable POS BBs) among all focal "
- "functions\n";
- printDistribution(OS, NumUnreachables);
-
- OS << "distribution of SUM_EC(unreachable POS BBs) among all focal "
- "functions\n";
- printDistribution(OS, SumECUnreachables);
-
- OS << "distribution of [(SUM_EC(unreachable POS BBs) / SUM_EC(all "
- "POS BBs))] among all focal functions\n";
- printDistribution(OS, FractionECUnreachables, /*Fraction=*/true);
- }
-}
-
-void printAll(BinaryContext &BC, FunctionListType &ValidFunctions,
- size_t NumTopFunctions) {
- // Sort the list of functions by execution counts (reverse).
- llvm::sort(ValidFunctions,
- [&](const BinaryFunction *A, const BinaryFunction *B) {
- return A->getKnownExecutionCount() > B->getKnownExecutionCount();
- });
-
- const size_t RealNumTopFunctions =
- std::min(NumTopFunctions, ValidFunctions.size());
-
- iterator_range<function_iterator> Functions(
- ValidFunctions.begin(), ValidFunctions.begin() + RealNumTopFunctions);
-
- BC.outs() << format("BOLT-INFO: among the hottest %zu functions ",
- RealNumTopFunctions);
- printCFGContinuityStats(BC.outs(), Functions);
-
- // Print more detailed bucketed stats if requested.
- if (opts::Verbosity >= 1 && RealNumTopFunctions >= 5) {
- const size_t PerBucketSize = RealNumTopFunctions / 5;
- BC.outs() << format(
- "Detailed stats for 5 buckets, each with %zu functions:\n",
- PerBucketSize);
-
- // For each bucket, print the CFG continuity stats of the functions in the
- // bucket.
- for (size_t BucketIndex = 0; BucketIndex < 5; ++BucketIndex) {
- const size_t StartIndex = BucketIndex * PerBucketSize;
- const size_t EndIndex = StartIndex + PerBucketSize;
- iterator_range<function_iterator> Functions(
- ValidFunctions.begin() + StartIndex,
- ValidFunctions.begin() + EndIndex);
- const size_t MaxFunctionExecutionCount =
- ValidFunctions[StartIndex]->getKnownExecutionCount();
- const size_t MinFunctionExecutionCount =
- ValidFunctions[EndIndex - 1]->getKnownExecutionCount();
- BC.outs() << format("----------------\n| Bucket %zu: "
- "|\n----------------\n",
- BucketIndex + 1)
- << format(
- "execution counts of the %zu functions in the bucket: "
- "%zu-%zu\n",
- EndIndex - StartIndex, MinFunctionExecutionCount,
- MaxFunctionExecutionCount);
- printCFGContinuityStats(BC.outs(), Functions);
- }
- }
-}
-} // namespace
-
-bool PrintContinuityStats::shouldOptimize(const BinaryFunction &BF) const {
- if (BF.empty() || !BF.hasValidProfile())
- return false;
-
- return BinaryFunctionPass::shouldOptimize(BF);
-}
-
-Error PrintContinuityStats::runOnFunctions(BinaryContext &BC) {
- // Create a list of functions with valid profiles.
- FunctionListType ValidFunctions;
- for (const auto &BFI : BC.getBinaryFunctions()) {
- const BinaryFunction *Function = &BFI.second;
- if (PrintContinuityStats::shouldOptimize(*Function))
- ValidFunctions.push_back(Function);
- }
- if (ValidFunctions.empty() || opts::NumFunctionsForContinuityCheck == 0)
- return Error::success();
-
- printAll(BC, ValidFunctions, opts::NumFunctionsForContinuityCheck);
- return Error::success();
-}
diff --git a/bolt/lib/Passes/ProfileQualityStats.cpp b/bolt/lib/Passes/ProfileQualityStats.cpp
new file mode 100644
index 0000000000000..a42b4dc27947a
--- /dev/null
+++ b/bolt/lib/Passes/ProfileQualityStats.cpp
@@ -0,0 +1,579 @@
+//===- bolt/Passes/ProfileQualityStats.cpp ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the profile quality stats calculation pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "bolt/Passes/ProfileQualityStats.h"
+#include "bolt/Core/BinaryBasicBlock.h"
+#include "bolt/Core/BinaryFunction.h"
+#include "bolt/Utils/CommandLineOpts.h"
+#include "llvm/Support/CommandLine.h"
+#include <queue>
+#include <unordered_map>
+#include <unordered_set>
+
+#define DEBUG_TYPE "bolt-opts"
+
+using namespace llvm;
+using namespace bolt;
+
+namespace opts {
+extern cl::opt<unsigned> Verbosity;
+cl::opt<unsigned> NumFunctionsForProfileQualityCheck(
+ "num-functions-for-profile-quality-check",
+ cl::desc("number of hottest functions to print aggregated "
+ "profile quality stats of."),
+ cl::init(1000), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
+cl::opt<unsigned> PercentileForProfileQualityCheck(
+ "percentile-for-profile-quality-check",
+ cl::desc("Percentile of profile quality distributions over hottest "
+ "functions to display."),
+ cl::init(95), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
+} // namespace opts
+
+namespace {
+using FunctionListType = std::vector<const BinaryFunction *>;
+using function_iterator = FunctionListType::iterator;
+
+// BB index -> flow count
+using FlowMapTy = std::unordered_map<unsigned, uint64_t>;
+// Function number -> FlowMapTy
+using TotalFlowMapTy = std::unordered_map<uint64_t, FlowMapTy>;
+// Function number -> flow count
+using FunctionFlowMapTy = std::unordered_map<uint64_t, uint64_t>;
+struct FlowInfo {
+ TotalFlowMapTy TotalIncomingMaps;
+ TotalFlowMapTy TotalOutgoingMaps;
+ TotalFlowMapTy TotalMaxCountMaps;
+ TotalFlowMapTy TotalMinCountMaps;
+ FunctionFlowMapTy CallGraphIncomingMap;
+};
+
+template <typename T>
+void printDistribution(raw_ostream &OS, std::vector<T> &values,
+ bool Fraction = false) {
+ // Assume values are sorted.
+ if (values.empty())
+ return;
+
+ OS << " Length : " << values.size() << "\n";
+
+ auto printLine = [&](std::string Text, double Percent) {
+ int Rank = int(values.size() * (100 - Percent) / 100);
+ if (Percent == 0)
+ Rank = values.size() - 1;
+ if (Fraction)
+ OS << " " << Text << std::string(11 - Text.length(), ' ') << ": "
+ << format("%.2lf%%", values[Rank] * 100) << "\n";
+ else
+ OS << " " << Text << std::string(11 - Text.length(), ' ') << ": "
+ << values[Rank] << "\n";
+ };
+
+ printLine("MAX", 0);
+ const int percentages[] = {1, 5, 10, 20, 50, 80};
+ for (size_t i = 0; i < sizeof(percentages) / sizeof(percentages[0]); ++i) {
+ printLine("TOP " + std::to_string(percentages[i]) + "%", percentages[i]);
+ }
+ printLine("MIN", 100);
+}
+
+void printCFGContinuityStats(raw_ostream &OS,
+ iterator_range<function_iterator> &Functions) {
+ // Given a perfect profile, every positive-execution-count BB should be
+ // connected to an entry of the function through a positive-execution-count
+ // directed path in the control flow graph.
+ std::vector<size_t> NumUnreachables;
+ std::vector<size_t> SumECUnreachables;
+ std::vector<double> FractionECUnreachables;
+
+ for (auto it = Functions.begin(); it != Functions.end(); ++it) {
+ const BinaryFunction *Function = *it;
+ if (Function->size() <= 1)
+ continue;
+
+ // Compute the sum of all BB execution counts (ECs).
+ size_t NumPosECBBs = 0;
+ size_t SumAllBBEC = 0;
+ for (const BinaryBasicBlock &BB : *Function) {
+ const size_t BBEC = BB.getKnownExecutionCount();
+ NumPosECBBs += BBEC > 0 ? 1 : 0;
+ SumAllBBEC += BBEC;
+ }
+
+ // Perform BFS on subgraph of CFG induced by positive weight edges.
+ // Compute the number of BBs reachable from the entry(s) of the function and
+ // the sum of their execution counts (ECs).
+ std::unordered_map<unsigned, const BinaryBasicBlock *> IndexToBB;
+ std::unordered_set<unsigned> Visited;
+ std::queue<unsigned> Queue;
+ for (const BinaryBasicBlock &BB : *Function) {
+ // Make sure BB.getIndex() is not already in IndexToBB.
+ assert(IndexToBB.find(BB.getIndex()) == IndexToBB.end());
+ IndexToBB[BB.getIndex()] = &BB;
+ if (BB.isEntryPoint() && BB.getKnownExecutionCount() > 0) {
+ Queue.push(BB.getIndex());
+ Visited.insert(BB.getIndex());
+ }
+ }
+ while (!Queue.empty()) {
+ const unsigned BBIndex = Queue.front();
+ const BinaryBasicBlock *BB = IndexToBB[BBIndex];
+ Queue.pop();
+ auto SuccBIIter = BB->branch_info_begin();
+ for (const BinaryBasicBlock *Succ : BB->successors()) {
+ const uint64_t Count = SuccBIIter->Count;
+ if (Count == BinaryBasicBlock::COUNT_NO_PROFILE || Count == 0) {
+ ++SuccBIIter;
+ continue;
+ }
+ if (!Visited.insert(Succ->getIndex()).second) {
+ ++SuccBIIter;
+ continue;
+ }
+ Queue.push(Succ->getIndex());
+ ++SuccBIIter;
+ }
+ }
+
+ const size_t NumReachableBBs = Visited.size();
+
+ // Loop through Visited, and sum the corresponding BBs' execution counts
+ // (ECs).
+ size_t SumReachableBBEC = 0;
+ for (const unsigned BBIndex : Visited) {
+ const BinaryBasicBlock *BB = IndexToBB[BBIndex];
+ SumReachableBBEC += BB->getKnownExecutionCount();
+ }
+
+ const size_t NumPosECBBsUnreachableFromEntry =
+ NumPosECBBs - NumReachableBBs;
+ const size_t SumUnreachableBBEC = SumAllBBEC - SumReachableBBEC;
+ const double FractionECUnreachable =
+ (double)SumUnreachableBBEC / SumAllBBEC;
+
+ if (opts::Verbosity >= 2 && FractionECUnreachable >= 0.05) {
+ OS << "Non-trivial CFG discontinuity observed in function "
+ << Function->getPrintName() << "\n";
+ LLVM_DEBUG(Function->dump());
+ }
+
+ NumUnreachables.push_back(NumPosECBBsUnreachableFromEntry);
+ SumECUnreachables.push_back(SumUnreachableBBEC);
+ FractionECUnreachables.push_back(FractionECUnreachable);
+ }
+
+ if (FractionECUnreachables.empty())
+ return;
+
+ std::sort(FractionECUnreachables.begin(), FractionECUnreachables.end());
+ const int Rank = int(FractionECUnreachables.size() *
+ opts::PercentileForProfileQualityCheck / 100);
+ OS << format("top %zu%% function CFG discontinuity is %.2lf%%\n",
+ 100 - opts::PercentileForProfileQualityCheck,
+ FractionECUnreachables[Rank] * 100);
+ if (opts::Verbosity >= 1) {
+ OS << "abbreviations: EC = execution count, POS BBs = positive EC BBs\n"
+ << "distribution of NUM(unreachable POS BBs) per function\n";
+ std::sort(NumUnreachables.begin(), NumUnreachables.end());
+ printDistribution(OS, NumUnreachables);
+
+ OS << "distribution of SUM_EC(unreachable POS BBs) per function\n";
+ std::sort(SumECUnreachables.begin(), SumECUnreachables.end());
+ printDistribution(OS, SumECUnreachables);
+
+ OS << "distribution of [(SUM_EC(unreachable POS BBs) / SUM_EC(all "
+ "POS BBs))] per function\n";
+ printDistribution(OS, FractionECUnreachables, /*Fraction=*/true);
+ }
+}
+
+void printCallGraphFlowConservationStats(
+ raw_ostream &OS, iterator_range<function_iterator> &Functions,
+ FlowInfo &TotalFlowMap) {
+ std::vector<double> CallGraphGaps;
+
+ for (auto it = Functions.begin(); it != Functions.end(); ++it) {
+ const BinaryFunction *Function = *it;
+ if (Function->size() <= 1 || !Function->isSimple())
+ continue;
+
+ const uint64_t FunctionNum = Function->getFunctionNumber();
+ FlowMapTy &IncomingMap = TotalFlowMap.TotalIncomingMaps[FunctionNum];
+ FlowMapTy &OutgoingMap = TotalFlowMap.TotalOutgoingMaps[FunctionNum];
+ FunctionFlowMapTy &CallGraphIncomingMap = TotalFlowMap.CallGraphIncomingMap;
+
+ // Only consider functions that are not a program entry.
+ if (CallGraphIncomingMap.find(FunctionNum) != CallGraphIncomingMap.end()) {
+ uint64_t EntryInflow = 0;
+ uint64_t EntryOutflow = 0;
+ uint32_t NumConsideredEntryBlocks = 0;
+ for (const BinaryBasicBlock &BB : *Function) {
+ if (BB.isEntryPoint()) {
+ // If entry is an exit, then we don't consider it for flow
+ // conservation
+ if (BB.succ_size() == 0)
+ continue;
+ NumConsideredEntryBlocks++;
+
+ EntryInflow += IncomingMap[BB.getIndex()];
+ EntryOutflow += OutgoingMap[BB.getIndex()];
+ }
+ }
+ uint64_t NetEntryOutflow = 0;
+ if (EntryOutflow < EntryInflow) {
+ if (opts::Verbosity >= 1) {
+ // We expect entry blocks' CFG outflow >= inflow, i.e., it has a
+ // non-negative net outflow. If this is not the case, then raise a
+ // warning if requested.
+ OS << "BOLT WARNING: unexpected entry block CFG outflow < inflow in "
+ "function "
+ << Function->getPrintName() << "\n";
+ LLVM_DEBUG(Function->dump());
+ }
+ } else {
+ NetEntryOutflow = EntryOutflow - EntryInflow;
+ }
+ if (NumConsideredEntryBlocks > 0) {
+ const uint64_t CallGraphInflow =
+ TotalFlowMap.CallGraphIncomingMap[Function->getFunctionNumber()];
+ const uint64_t Min = std::min(NetEntryOutflow, CallGraphInflow);
+ const uint64_t Max = std::max(NetEntryOutflow, CallGraphInflow);
+ const double CallGraphGap = 1 - (double)Min / Max;
+
+ if (opts::Verbosity >= 2 && CallGraphGap >= 0.5) {
+ OS << "Nontrivial call graph gap of size "
+ << format("%.2lf%%", 100 * CallGraphGap)
+ << " observed in function " << Function->getPrintName() << "\n";
+ LLVM_DEBUG(Function->dump());
+ }
+
+ CallGraphGaps.push_back(CallGraphGap);
+ }
+ }
+ }
+
+ if (!CallGraphGaps.empty()) {
+ std::sort(CallGraphGaps.begin(), CallGraphGaps.end());
+ const int Rank = int(CallGraphGaps.size() *
+ opts::PercentileForProfileQualityCheck / 100);
+ OS << format("top %zu%% call graph flow conservation gap is %.2lf%%\n",
+ 100 - opts::PercentileForProfileQualityCheck,
+ CallGraphGaps[Rank] * 100);
+ if (opts::Verbosity >= 1) {
+ OS << "distribution of function entry flow conservation gaps\n";
+ printDistribution(OS, CallGraphGaps, /*Fraction=*/true);
+ }
+ }
+}
+
+void printCFGFlowConservationStats(raw_ostream &OS,
+ iterator_range<function_iterator> &Functions,
+ FlowInfo &TotalFlowMap) {
+ std::vector<double> CFGGapsWeightedAvg;
+ std::vector<double> CFGGapsWorst;
+ std::vector<uint64_t> CFGGapsWorstAbs;
+
+ for (auto it = Functions.begin(); it != Functions.end(); ++it) {
+ const BinaryFunction *Function = *it;
+ if (Function->size() <= 1 || !Function->isSimple())
+ continue;
+
+ const uint64_t FunctionNum = Function->getFunctionNumber();
+ FlowMapTy &MaxCountMaps = TotalFlowMap.TotalMaxCountMaps[FunctionNum];
+ FlowMapTy &MinCountMaps = TotalFlowMap.TotalMinCountMaps[FunctionNum];
+ double WeightedGapSum = 0.0;
+ double WeightSum = 0.0;
+ double WorstGap = 0.0;
+ uint64_t WorstGapAbs = 0;
+ BinaryBasicBlock *BBWorstGap = nullptr;
+ BinaryBasicBlock *BBWorstGapAbs = nullptr;
+ for (BinaryBasicBlock &BB : *Function) {
+ // We don't consider function entry or exit blocks for CFG flow
+ // conservation
+ if (BB.isEntryPoint() || BB.succ_size() == 0)
+ continue;
+
+ const uint64_t Max = MaxCountMaps[BB.getIndex()];
+ const uint64_t Min = MinCountMaps[BB.getIndex()];
+ const double Gap = 1 - (double)Min / Max;
+ double Weight = BB.getKnownExecutionCount() * BB.getNumNonPseudos();
+ if (Weight == 0)
+ continue;
+ // We use log to prevent the stats from being dominated by extremely hot
+ // blocks
+ Weight = log(Weight);
+ WeightedGapSum += Gap * Weight;
+ WeightSum += Weight;
+ if (BB.getKnownExecutionCount() > 500 && Gap > WorstGap) {
+ WorstGap = Gap;
+ BBWorstGap = &BB;
+ }
+ if (BB.getKnownExecutionCount() > 500 && Max - Min > WorstGapAbs) {
+ WorstGapAbs = Max - Min;
+ BBWorstGapAbs = &BB;
+ }
+ }
+ if (WeightSum > 0) {
+ const double WeightedGap = WeightedGapSum / WeightSum;
+ if (opts::Verbosity >= 2 && (WeightedGap >= 0.1 || WorstGap >= 0.9)) {
+ OS << "Nontrivial CFG gap observed in function "
+ << Function->getPrintName() << "\n"
+ << "Weighted gap: " << format("%.2lf%%", 100 * WeightedGap) << "\n";
+ if (BBWorstGap)
+ OS << "Worst gap: " << format("%.2lf%%", 100 * WorstGap)
+ << " at BB with input offset: 0x"
+ << Twine::utohexstr(BBWorstGap->getInputOffset()) << "\n";
+ if (BBWorstGapAbs)
+ OS << "Worst gap (absolute value): " << WorstGapAbs << " at BB with "
+ << "input offset 0x"
+ << Twine::utohexstr(BBWorstGapAbs->getInputOffset()) << "\n";
+ LLVM_DEBUG(Function->dump());
+ }
+
+ CFGGapsWeightedAvg.push_back(WeightedGap);
+ CFGGapsWorst.push_back(WorstGap);
+ CFGGapsWorstAbs.push_back(WorstGapAbs);
+ }
+ }
+
+ if (!CFGGapsWeightedAvg.empty()) {
+ std::sort(CFGGapsWeightedAvg.begin(), CFGGapsWeightedAvg.end());
+ const int RankWA = int(CFGGapsWeightedAvg.size() *
+ opts::PercentileForProfileQualityCheck / 100);
+ std::sort(CFGGapsWorst.begin(), CFGGapsWorst.end());
+ const int RankW =
+ int(CFGGapsWorst.size() * opts::PercentileForProfileQualityCheck / 100);
+ OS << format(
+ "top %zu%% CFG flow conservation gap is %.2lf%% (weighted) and "
+ "%.2lf%% (worst)\n",
+ 100 - opts::PercentileForProfileQualityCheck,
+ CFGGapsWeightedAvg[RankWA] * 100, CFGGapsWorst[RankW] * 100);
+ if (opts::Verbosity >= 1) {
+ OS << "distribution of weighted CFG flow conservation gaps\n";
+ printDistribution(OS, CFGGapsWeightedAvg, /*Fraction=*/true);
+ OS << "Consider only blocks with execution counts > 500:\n"
+ << "distribution of worst block flow conservation gap per "
+ "function \n";
+ printDistribution(OS, CFGGapsWorst, /*Fraction=*/true);
+ OS << "distribution of worst block flow conservation gap (absolute "
+ "value) per function\n";
+ std::sort(CFGGapsWorstAbs.begin(), CFGGapsWorstAbs.end());
+ printDistribution(OS, CFGGapsWorstAbs, /*Fraction=*/false);
+ }
+ }
+}
+
+void computeFlowMappings(const BinaryContext &BC, FlowInfo &TotalFlowMap) {
+ // Increment block inflow and outflow with CFG jump counts.
+ TotalFlowMapTy &TotalIncomingMaps = TotalFlowMap.TotalIncomingMaps;
+ TotalFlowMapTy &TotalOutgoingMaps = TotalFlowMap.TotalOutgoingMaps;
+ for (const auto &BFI : BC.getBinaryFunctions()) {
+ const BinaryFunction *Function = &BFI.second;
+ if (Function->empty() || !Function->hasValidProfile())
+ continue;
+ FlowMapTy &IncomingMap = TotalIncomingMaps[Function->getFunctionNumber()];
+ FlowMapTy &OutgoingMap = TotalOutgoingMaps[Function->getFunctionNumber()];
+ for (const BinaryBasicBlock &BB : *Function) {
+ uint64_t TotalOutgoing = 0ULL;
+ auto SuccBIIter = BB.branch_info_begin();
+ for (BinaryBasicBlock *Succ : BB.successors()) {
+ const uint64_t Count = SuccBIIter->Count;
+ if (Count == BinaryBasicBlock::COUNT_NO_PROFILE || Count == 0) {
+ ++SuccBIIter;
+ continue;
+ }
+ TotalOutgoing += Count;
+ IncomingMap[Succ->getIndex()] += Count;
+ ++SuccBIIter;
+ }
+ OutgoingMap[BB.getIndex()] = TotalOutgoing;
+ }
+ }
+
+ // Initialize TotalMaxCountMaps and TotalMinCountMaps using
+ // TotalIncomingMaps and TotalOutgoingMaps
+ TotalFlowMapTy &TotalMaxCountMaps = TotalFlowMap.TotalMaxCountMaps;
+ TotalFlowMapTy &TotalMinCountMaps = TotalFlowMap.TotalMinCountMaps;
+ for (const auto &BFI : BC.getBinaryFunctions()) {
+ const BinaryFunction *Function = &BFI.second;
+ if (Function->empty() || !Function->hasValidProfile())
+ continue;
+ uint64_t FunctionNum = Function->getFunctionNumber();
+ FlowMapTy &IncomingMap = TotalIncomingMaps[FunctionNum];
+ FlowMapTy &OutgoingMap = TotalOutgoingMaps[FunctionNum];
+ FlowMapTy &MaxCountMap = TotalMaxCountMaps[FunctionNum];
+ FlowMapTy &MinCountMap = TotalMinCountMaps[FunctionNum];
+ for (const BinaryBasicBlock &BB : *Function) {
+ uint64_t BBNum = BB.getIndex();
+ MaxCountMap[BBNum] = std::max(IncomingMap[BBNum], OutgoingMap[BBNum]);
+ MinCountMap[BBNum] = std::min(IncomingMap[BBNum], OutgoingMap[BBNum]);
+ }
+ }
+
+ // Modify TotalMaxCountMaps and TotalMinCountMaps using call counts and
+ // fill out CallGraphIncomingMap
+ FunctionFlowMapTy &CallGraphIncomingMap = TotalFlowMap.CallGraphIncomingMap;
+ for (const auto &BFI : BC.getBinaryFunctions()) {
+ const BinaryFunction *Function = &BFI.second;
+ uint64_t FunctionNum = Function->getFunctionNumber();
+ FlowMapTy &MaxCountMap = TotalMaxCountMaps[FunctionNum];
+ FlowMapTy &MinCountMap = TotalMinCountMaps[FunctionNum];
+
+ // Update MaxCountMap, MinCountMap, and CallGraphIncomingMap
+ auto recordCall = [&](const BinaryBasicBlock *SourceBB,
+ const MCSymbol *DestSymbol, uint64_t Count) {
+ if (Count == BinaryBasicBlock::COUNT_NO_PROFILE)
+ Count = 0;
+ const BinaryFunction *DstFunc =
+ DestSymbol ? BC.getFunctionForSymbol(DestSymbol) : nullptr;
+ if (DstFunc)
+ CallGraphIncomingMap[DstFunc->getFunctionNumber()] += Count;
+ if (SourceBB) {
+ unsigned BlockIndex = SourceBB->getIndex();
+ MaxCountMap[BlockIndex] = std::max(MaxCountMap[BlockIndex], Count);
+ MinCountMap[BlockIndex] = std::min(MinCountMap[BlockIndex], Count);
+ }
+ };
+
+ // Get pairs of (symbol, count) for each target at this callsite.
+ // If the call is to an unknown function the symbol will be nullptr.
+ // If there is no profiling data the count will be COUNT_NO_PROFILE.
+ using TargetDesc = std::pair<const MCSymbol *, uint64_t>;
+ using CallInfoTy = std::vector<TargetDesc>;
+ auto getCallInfo = [&](const BinaryBasicBlock *BB, const MCInst &Inst) {
+ CallInfoTy Counts;
+ const MCSymbol *DstSym = BC.MIB->getTargetSymbol(Inst);
+
+ // If this is an indirect call use perf data directly.
+ if (!DstSym && BC.MIB->hasAnnotation(Inst, "CallProfile")) {
+ const auto &ICSP = BC.MIB->getAnnotationAs<IndirectCallSiteProfile>(
+ Inst, "CallProfile");
+ for (const IndirectCallProfile &CSI : ICSP)
+ if (CSI.Symbol)
+ Counts.emplace_back(CSI.Symbol, CSI.Count);
+ } else {
+ const uint64_t Count = BB->getExecutionCount();
+ Counts.emplace_back(DstSym, Count);
+ }
+
+ return Counts;
+ };
+
+ // If the function has an invalid profile, try to use the perf data
+ // directly. The call EC is only used to update CallGraphIncomingMap.
+ if (!Function->hasValidProfile() && !Function->getAllCallSites().empty()) {
+ for (const IndirectCallProfile &CSI : Function->getAllCallSites()) {
+ if (!CSI.Symbol)
+ continue;
+ recordCall(nullptr, CSI.Symbol, CSI.Count);
+ }
+ continue;
+ } else {
+ // If the function has a valid profile
+ for (BinaryBasicBlock &BB : *Function) {
+ for (MCInst &Inst : BB) {
+ if (!BC.MIB->isCall(Inst))
+ continue;
+ // Find call instructions and extract target symbols from each
+ // one.
+ const CallInfoTy CallInfo = getCallInfo(&BB, Inst);
+ for (const TargetDesc &CI : CallInfo) {
+ recordCall(&BB, CI.first, CI.second);
+ }
+ }
+ }
+ }
+ }
+}
+
+void printAll(BinaryContext &BC, FunctionListType &ValidFunctions,
+ size_t NumTopFunctions) {
+ // Sort the list of functions by execution counts (reverse).
+ llvm::sort(ValidFunctions,
+ [&](const BinaryFunction *A, const BinaryFunction *B) {
+ return A->getKnownExecutionCount() > B->getKnownExecutionCount();
+ });
+
+ const size_t RealNumTopFunctions =
+ std::min(NumTopFunctions, ValidFunctions.size());
+
+ iterator_range<function_iterator> Functions(
+ ValidFunctions.begin(), ValidFunctions.begin() + RealNumTopFunctions);
+
+ FlowInfo TotalFlowMap;
+ computeFlowMappings(BC, TotalFlowMap);
+
+ BC.outs() << format("BOLT-INFO: among the hottest %zu functions ",
+ RealNumTopFunctions);
+ printCFGContinuityStats(BC.outs(), Functions);
+ BC.outs() << format("BOLT-INFO: among the hottest %zu functions ",
+ RealNumTopFunctions);
+ printCallGraphFlowConservationStats(BC.outs(), Functions, TotalFlowMap);
+ BC.outs() << format("BOLT-INFO: among the hottest %zu functions ",
+ RealNumTopFunctions);
+ printCFGFlowConservationStats(BC.outs(), Functions, TotalFlowMap);
+
+ // Print more detailed bucketed stats if requested.
+ if (opts::Verbosity >= 1 && RealNumTopFunctions >= 5) {
+ const size_t PerBucketSize = RealNumTopFunctions / 5;
+ BC.outs() << format(
+ "Detailed stats for 5 buckets, each with %zu functions:\n",
+ PerBucketSize);
+
+ // For each bucket, print the CFG continuity stats of the functions in
+ // the bucket.
+ for (size_t BucketIndex = 0; BucketIndex < 5; ++BucketIndex) {
+ const size_t StartIndex = BucketIndex * PerBucketSize;
+ const size_t EndIndex = StartIndex + PerBucketSize;
+ iterator_range<function_iterator> Functions(
+ ValidFunctions.begin() + StartIndex,
+ ValidFunctions.begin() + EndIndex);
+ const size_t MaxFunctionExecutionCount =
+ ValidFunctions[StartIndex]->getKnownExecutionCount();
+ const size_t MinFunctionExecutionCount =
+ ValidFunctions[EndIndex - 1]->getKnownExecutionCount();
+ BC.outs() << format("----------------\n| Bucket %zu: "
+ "|\n----------------\n",
+ BucketIndex + 1)
+ << format(
+ "execution counts of the %zu functions in the bucket: "
+ "%zu-%zu\n",
+ EndIndex - StartIndex, MinFunctionExecutionCount,
+ MaxFunctionExecutionCount);
+ printCFGContinuityStats(BC.outs(), Functions);
+ printCallGraphFlowConservationStats(BC.outs(), Functions, TotalFlowMap);
+ printCFGFlowConservationStats(BC.outs(), Functions, TotalFlowMap);
+ }
+ }
+}
+} // namespace
+
+bool PrintProfileQualityStats::shouldOptimize(const BinaryFunction &BF) const {
+ if (BF.empty() || !BF.hasValidProfile())
+ return false;
+
+ return BinaryFunctionPass::shouldOptimize(BF);
+}
+
+Error PrintProfileQualityStats::runOnFunctions(BinaryContext &BC) {
+ // Create a list of functions with valid profiles.
+ FunctionListType ValidFunctions;
+ for (const auto &BFI : BC.getBinaryFunctions()) {
+ const BinaryFunction *Function = &BFI.second;
+ if (PrintProfileQualityStats::shouldOptimize(*Function))
+ ValidFunctions.push_back(Function);
+ }
+ if (ValidFunctions.empty() || opts::NumFunctionsForProfileQualityCheck == 0)
+ return Error::success();
+
+ printAll(BC, ValidFunctions, opts::NumFunctionsForProfileQualityCheck);
+ return Error::success();
+}
diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp
index 2d851c751ae10..6f750b95adf9a 100644
--- a/bolt/lib/Rewrite/BinaryPassManager.cpp
+++ b/bolt/lib/Rewrite/BinaryPassManager.cpp
@@ -12,7 +12,6 @@
#include "bolt/Passes/AllocCombiner.h"
#include "bolt/Passes/AsmDump.h"
#include "bolt/Passes/CMOVConversion.h"
-#include "bolt/Passes/ContinuityStats.h"
#include "bolt/Passes/FixRISCVCallsPass.h"
#include "bolt/Passes/FixRelaxationPass.h"
#include "bolt/Passes/FrameOptimizer.h"
@@ -27,6 +26,7 @@
#include "bolt/Passes/MCF.h"
#include "bolt/Passes/PLTCall.h"
#include "bolt/Passes/PatchEntries.h"
+#include "bolt/Passes/ProfileQualityStats.h"
#include "bolt/Passes/RegReAssign.h"
#include "bolt/Passes/ReorderData.h"
#include "bolt/Passes/ReorderFunctions.h"
@@ -58,10 +58,10 @@ extern cl::opt<bolt::IdenticalCodeFolding::ICFLevel, false,
llvm::bolt::DeprecatedICFNumericOptionParser>
ICF;
-static cl::opt<bool>
-DynoStatsAll("dyno-stats-all",
- cl::desc("print dyno stats after each stage"),
- cl::ZeroOrMore, cl::Hidden, cl::cat(BoltCategory));
+static cl::opt<bool> DynoStatsAll("dyno-stats-all",
+ cl::desc("print dyno stats after each stage"),
+ cl::ZeroOrMore, cl::Hidden,
+ cl::cat(BoltCategory));
static cl::opt<bool>
EliminateUnreachable("eliminate-unreachable",
@@ -82,25 +82,24 @@ cl::opt<bool>
cl::opt<bool> NeverPrint("never-print", cl::desc("never print"),
cl::ReallyHidden, cl::cat(BoltOptCategory));
-cl::opt<bool>
-PrintAfterBranchFixup("print-after-branch-fixup",
- cl::desc("print function after fixing local branches"),
- cl::Hidden, cl::cat(BoltOptCategory));
+cl::opt<bool> PrintAfterBranchFixup(
+ "print-after-branch-fixup",
+ cl::desc("print function after fixing local branches"), cl::Hidden,
+ cl::cat(BoltOptCategory));
static cl::opt<bool>
-PrintAfterLowering("print-after-lowering",
- cl::desc("print function after instruction lowering"),
- cl::Hidden, cl::cat(BoltOptCategory));
+ PrintAfterLowering("print-after-lowering",
+ cl::desc("print function after instruction lowering"),
+ cl::Hidden, cl::cat(BoltOptCategory));
static cl::opt<bool> PrintEstimateEdgeCounts(
"print-estimate-edge-counts",
cl::desc("print function after edge counts are set for no-LBR profile"),
cl::Hidden, cl::cat(BoltOptCategory));
-cl::opt<bool>
-PrintFinalized("print-finalized",
- cl::desc("print function after CFG is finalized"),
- cl::Hidden, cl::cat(BoltOptCategory));
+cl::opt<bool> PrintFinalized("print-finalized",
+ cl::desc("print function after CFG is finalized"),
+ cl::Hidden, cl::cat(BoltOptCategory));
static cl::opt<bool>
PrintFOP("print-fop",
@@ -235,12 +234,13 @@ static cl::opt<bool> SimplifyRODataLoads(
"operand with the constant found in the corresponding section"),
cl::cat(BoltOptCategory));
-static cl::list<std::string>
-SpecializeMemcpy1("memcpy1-spec",
- cl::desc("list of functions with call sites for which to specialize memcpy() "
- "for size 1"),
- cl::value_desc("func1,func2:cs1:cs2,func3:cs1,..."),
- cl::ZeroOrMore, cl::cat(BoltOptCategory));
+static cl::list<std::string> SpecializeMemcpy1(
+ "memcpy1-spec",
+ cl::desc(
+ "list of functions with call sites for which to specialize memcpy() "
+ "for size 1"),
+ cl::value_desc("func1,func2:cs1:cs2,func3:cs1,..."), cl::ZeroOrMore,
+ cl::cat(BoltOptCategory));
static cl::opt<bool> Stoke("stoke", cl::desc("turn on the stoke analysis"),
cl::cat(BoltOptCategory));
@@ -379,7 +379,7 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
if (opts::PrintProfileStats)
Manager.registerPass(std::make_unique<PrintProfileStats>(NeverPrint));
- Manager.registerPass(std::make_unique<PrintContinuityStats>(NeverPrint));
+ Manager.registerPass(std::make_unique<PrintProfileQualityStats>(NeverPrint));
Manager.registerPass(std::make_unique<ValidateInternalCalls>(NeverPrint));
diff --git a/bolt/test/X86/cfg-discontinuity-reporting.test b/bolt/test/X86/cfg-discontinuity-reporting.test
deleted file mode 100644
index 4d7d3305cdb75..0000000000000
--- a/bolt/test/X86/cfg-discontinuity-reporting.test
+++ /dev/null
@@ -1,4 +0,0 @@
-## Check profile discontinuity reporting
-RUN: yaml2obj %p/Inputs/blarge_new.yaml &> %t.exe
-RUN: llvm-bolt %t.exe -o %t.out --pa -p %p/Inputs/blarge_new.preagg.txt | FileCheck %s
-CHECK: among the hottest 5 functions top 5% function CFG discontinuity is 100.00%
diff --git a/bolt/test/X86/profile-quality-reporting.test b/bolt/test/X86/profile-quality-reporting.test
new file mode 100644
index 0000000000000..ee4ecb9853dff
--- /dev/null
+++ b/bolt/test/X86/profile-quality-reporting.test
@@ -0,0 +1,6 @@
+## Check profile quality stats reporting
+RUN: yaml2obj %p/Inputs/blarge_new.yaml &> %t.exe
+RUN: llvm-bolt %t.exe -o %t.out --pa -p %p/Inputs/blarge_new.preagg.txt | FileCheck %s
+CHECK: among the hottest 5 functions top 5% function CFG discontinuity is 100.00%
+CHECK: among the hottest 5 functions top 5% call graph flow conservation gap is 60.00%
+CHECK: among the hottest 5 functions top 5% CFG flow conservation gap is 45.53% (weighted) and 96.87% (worst)
More information about the llvm-commits
mailing list