[llvm] [BOLT] Report flow conservation scores (PR #127954)
Thu Feb 27 20:06:53 PST 2025
@@ -0,0 +1,575 @@
+//===- bolt/Passes/ProfileQualityStats.cpp ----------------------*- C++ -*-===//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// This file implements the profile quality stats calculation pass.
+#include "bolt/Passes/ProfileQualityStats.h"
+#include "bolt/Core/BinaryBasicBlock.h"
+#include "bolt/Core/BinaryFunction.h"
+#include "bolt/Utils/CommandLineOpts.h"
+#include "llvm/Support/CommandLine.h"
+#include <queue>
+#include <unordered_map>
+#include <unordered_set>
+using namespace llvm;
+using namespace bolt;
+namespace opts {
+extern cl::opt<unsigned> Verbosity;
+cl::opt<unsigned> NumFunctionsForProfileQualityCheck(
+ "num-functions-for-profile-quality-check",
+ cl::desc("number of hottest functions to print aggregated "
+ "profile quality stats of."),
+ cl::init(1000), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
+cl::opt<unsigned> PercentileForProfileQualityCheck(
+ "percentile-for-profile-quality-check",
+ cl::desc("Percentile of profile quality distributions over hottest "
+ "functions to report."),
+ cl::init(95), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
+} // namespace opts
+namespace {
+using FunctionListType = std::vector<const BinaryFunction *>;
+using function_iterator = FunctionListType::iterator;
+// BB index -> flow count
+using FlowMapTy = std::unordered_map<unsigned, uint64_t>;
+// Function number -> FlowMapTy
+using TotalFlowMapTy = std::unordered_map<uint64_t, FlowMapTy>;
+// Function number -> flow count
+using FunctionFlowMapTy = std::unordered_map<uint64_t, uint64_t>;
+struct FlowInfo {
+ TotalFlowMapTy TotalIncomingMaps;
+ TotalFlowMapTy TotalOutgoingMaps;
+ TotalFlowMapTy TotalMaxCountMaps;
+ TotalFlowMapTy TotalMinCountMaps;
+ FunctionFlowMapTy CallGraphIncomingMap;
+template <typename T>
+void printDistribution(raw_ostream &OS, std::vector<T> &values,
+ bool Fraction = false) {
+ // Assume values are sorted.
+ if (values.empty())
+ return;
+ OS << " Length : " << values.size() << "\n";
+ auto printLine = [&](std::string Text, double Percent) {
+ int Rank = int(values.size() * (100 - Percent) / 100);
+ if (Percent == 0)
+ Rank = values.size() - 1;
+ if (Fraction)
+ OS << " " << Text << std::string(11 - Text.length(), ' ') << ": "
+ << format("%.2lf%%", values[Rank] * 100) << "\n";
+ else
+ OS << " " << Text << std::string(11 - Text.length(), ' ') << ": "
+ << values[Rank] << "\n";
+ };
+ printLine("MAX", 0);
+ const int percentages[] = {1, 5, 10, 20, 50, 80};
+ for (size_t i = 0; i < sizeof(percentages) / sizeof(percentages[0]); ++i) {
+ printLine("TOP " + std::to_string(percentages[i]) + "%", percentages[i]);
+ }
+ printLine("MIN", 100);
+void printCFGContinuityStats(raw_ostream &OS,
+ iterator_range<function_iterator> &Functions) {
+ // Given a perfect profile, every positive-execution-count BB should be
+ // connected to an entry of the function through a positive-execution-count
+ // directed path in the control flow graph.
+ std::vector<size_t> NumUnreachables;
+ std::vector<size_t> SumECUnreachables;
+ std::vector<double> FractionECUnreachables;
+ for (const BinaryFunction *Function : Functions) {
+ if (Function->size() <= 1)
+ continue;
+ // Compute the sum of all BB execution counts (ECs).
+ size_t NumPosECBBs = 0;
+ size_t SumAllBBEC = 0;
+ for (const BinaryBasicBlock &BB : *Function) {
+ const size_t BBEC = BB.getKnownExecutionCount();
+ NumPosECBBs += !!BBEC;
+ SumAllBBEC += BBEC;
+ }
+ // Perform BFS on subgraph of CFG induced by positive weight edges.
+ // Compute the number of BBs reachable from the entry(s) of the function and
+ // the sum of their execution counts (ECs).
+ std::unordered_set<unsigned> Visited;
+ std::queue<unsigned> Queue;
+ size_t SumReachableBBEC = 0;
+ Function->forEachEntryPoint([&](uint64_t Offset, const MCSymbol *Label) {
+ const BinaryBasicBlock *EntryBB = Function->getBasicBlockAtOffset(Offset);
+ if (EntryBB && EntryBB->getKnownExecutionCount() > 0) {
+ Queue.push(EntryBB->getLayoutIndex());
+ Visited.insert(EntryBB->getLayoutIndex());
+ SumReachableBBEC += EntryBB->getKnownExecutionCount();
+ }
+ return true;
+ });
+ const FunctionLayout &Layout = Function->getLayout();
+ while (!Queue.empty()) {
+ const unsigned BBIndex = Queue.front();
+ const BinaryBasicBlock *BB = Layout.getBlock(BBIndex);
+ Queue.pop();
+ auto SuccBIIter = BB->branch_info_begin();
+ for (const BinaryBasicBlock *Succ : BB->successors()) {
+ const uint64_t Count = SuccBIIter->Count;
+ if (Count == BinaryBasicBlock::COUNT_NO_PROFILE || Count == 0) {
+ ++SuccBIIter;
+ continue;
+ }
+ if (!Visited.insert(Succ->getLayoutIndex()).second) {
+ ++SuccBIIter;
+ continue;
+ }
+ SumReachableBBEC += Succ->getKnownExecutionCount();
+ Queue.push(Succ->getLayoutIndex());
+ ++SuccBIIter;
+ }
+ }
+ const size_t NumReachableBBs = Visited.size();
+ const size_t NumPosECBBsUnreachableFromEntry =
+ NumPosECBBs - NumReachableBBs;
+ const size_t SumUnreachableBBEC = SumAllBBEC - SumReachableBBEC;
+ const double FractionECUnreachable =
+ (double)SumUnreachableBBEC / SumAllBBEC;
+ if (opts::Verbosity >= 2 && FractionECUnreachable >= 0.05) {
+ OS << "Non-trivial CFG discontinuity observed in function "
+ << Function->getPrintName() << "\n";
+ if (opts::Verbosity >= 3)
+ Function->dump();
+ }
+ NumUnreachables.push_back(NumPosECBBsUnreachableFromEntry);
+ SumECUnreachables.push_back(SumUnreachableBBEC);
+ FractionECUnreachables.push_back(FractionECUnreachable);
+ }
+ if (FractionECUnreachables.empty())
+ return;
+ std::sort(FractionECUnreachables.begin(), FractionECUnreachables.end());
+ const int Rank = int(FractionECUnreachables.size() *
+ opts::PercentileForProfileQualityCheck / 100);
+ OS << format("function CFG discontinuity %.2lf%%; ",
+ FractionECUnreachables[Rank] * 100);
+ if (opts::Verbosity >= 1) {
+ OS << "\nabbreviations: EC = execution count, POS BBs = positive EC BBs\n"
+ << "distribution of NUM(unreachable POS BBs) per function\n";
+ std::sort(NumUnreachables.begin(), NumUnreachables.end());
+ printDistribution(OS, NumUnreachables);
+ OS << "distribution of SUM_EC(unreachable POS BBs) per function\n";
+ std::sort(SumECUnreachables.begin(), SumECUnreachables.end());
+ printDistribution(OS, SumECUnreachables);
+ OS << "distribution of [(SUM_EC(unreachable POS BBs) / SUM_EC(all "
+ "POS BBs))] per function\n";
+ printDistribution(OS, FractionECUnreachables, /*Fraction=*/true);
+ }
+void printCallGraphFlowConservationStats(
+ raw_ostream &OS, iterator_range<function_iterator> &Functions,
+ FlowInfo &TotalFlowMap) {
+ std::vector<double> CallGraphGaps;
+ for (const BinaryFunction *Function : Functions) {
+ if (Function->size() <= 1 || !Function->isSimple())
+ continue;
+ const uint64_t FunctionNum = Function->getFunctionNumber();
+ FlowMapTy &IncomingMap = TotalFlowMap.TotalIncomingMaps[FunctionNum];
+ FlowMapTy &OutgoingMap = TotalFlowMap.TotalOutgoingMaps[FunctionNum];
+ FunctionFlowMapTy &CallGraphIncomingMap = TotalFlowMap.CallGraphIncomingMap;
+ // Only consider functions that are not a program entry.
+ if (CallGraphIncomingMap.find(FunctionNum) != CallGraphIncomingMap.end()) {
+ uint64_t EntryInflow = 0;
+ uint64_t EntryOutflow = 0;
+ uint32_t NumConsideredEntryBlocks = 0;
+ for (const BinaryBasicBlock &BB : *Function) {
+ if (BB.isEntryPoint()) {
+ // If entry is an exit, then we don't consider it for flow
+ // conservation
+ if (BB.succ_size() == 0)
+ continue;
+ NumConsideredEntryBlocks++;
+ EntryInflow += IncomingMap[BB.getLayoutIndex()];
+ EntryOutflow += OutgoingMap[BB.getLayoutIndex()];
+ }
+ }
+ uint64_t NetEntryOutflow = 0;
+ if (EntryOutflow < EntryInflow) {
+ if (opts::Verbosity >= 2) {
+ // We expect entry blocks' CFG outflow >= inflow, i.e., it has a
+ // non-negative net outflow. If this is not the case, then raise a
+ // warning if requested.
+ OS << "BOLT WARNING: unexpected entry block CFG outflow < inflow "
+ "in "
+ "function "
+ << Function->getPrintName() << "\n";
+ if (opts::Verbosity >= 3)
+ Function->dump();
+ }
+ } else {
+ NetEntryOutflow = EntryOutflow - EntryInflow;
+ }
+ if (NumConsideredEntryBlocks > 0) {
+ const uint64_t CallGraphInflow =
+ TotalFlowMap.CallGraphIncomingMap[Function->getFunctionNumber()];
+ const uint64_t Min = std::min(NetEntryOutflow, CallGraphInflow);
+ const uint64_t Max = std::max(NetEntryOutflow, CallGraphInflow);
+ const double CallGraphGap = 1 - (double)Min / Max;
+ if (opts::Verbosity >= 2 && CallGraphGap >= 0.5) {
+ OS << "Nontrivial call graph gap of size "
+ << format("%.2lf%%", 100 * CallGraphGap)
+ << " observed in function " << Function->getPrintName() << "\n";
+ if (opts::Verbosity >= 3)
+ Function->dump();
+ }
+ CallGraphGaps.push_back(CallGraphGap);
+ }
+ }
+ }
+ if (CallGraphGaps.empty())
+ return;
+ std::sort(CallGraphGaps.begin(), CallGraphGaps.end());
+ const int Rank =
+ int(CallGraphGaps.size() * opts::PercentileForProfileQualityCheck / 100);
+ OS << format("call graph flow conservation gap %.2lf%%; ",
+ CallGraphGaps[Rank] * 100);
+ if (opts::Verbosity >= 1) {
+ OS << "\ndistribution of function entry flow conservation gaps\n";
+ printDistribution(OS, CallGraphGaps, /*Fraction=*/true);
+ }
+void printCFGFlowConservationStats(raw_ostream &OS,
+ iterator_range<function_iterator> &Functions,
+ FlowInfo &TotalFlowMap) {
+ std::vector<double> CFGGapsWeightedAvg;
+ std::vector<double> CFGGapsWorst;
+ std::vector<uint64_t> CFGGapsWorstAbs;
+ for (const BinaryFunction *Function : Functions) {
+ if (Function->size() <= 1 || !Function->isSimple())
+ continue;
+ const uint64_t FunctionNum = Function->getFunctionNumber();
+ FlowMapTy &MaxCountMaps = TotalFlowMap.TotalMaxCountMaps[FunctionNum];
+ FlowMapTy &MinCountMaps = TotalFlowMap.TotalMinCountMaps[FunctionNum];
+ double WeightedGapSum = 0.0;
+ double WeightSum = 0.0;
+ double WorstGap = 0.0;
+ uint64_t WorstGapAbs = 0;
+ BinaryBasicBlock *BBWorstGap = nullptr;
+ BinaryBasicBlock *BBWorstGapAbs = nullptr;
+ for (BinaryBasicBlock &BB : *Function) {
+ // We don't consider function entry or exit blocks for CFG flow
+ // conservation
+ if (BB.isEntryPoint() || BB.succ_size() == 0)
+ continue;
+ const uint64_t Max = MaxCountMaps[BB.getLayoutIndex()];
+ const uint64_t Min = MinCountMaps[BB.getLayoutIndex()];
+ const double Gap = 1 - (double)Min / Max;
+ double Weight = BB.getKnownExecutionCount() * BB.getNumNonPseudos();
+ if (Weight == 0)
+ continue;
+ // We use log to prevent the stats from being dominated by extremely hot
+ // blocks
+ Weight = log(Weight);
+ WeightedGapSum += Gap * Weight;
+ WeightSum += Weight;
+ if (BB.getKnownExecutionCount() > 500 && Gap > WorstGap) {
+ WorstGap = Gap;
+ BBWorstGap = &BB;
+ }
+ if (BB.getKnownExecutionCount() > 500 && Max - Min > WorstGapAbs) {
+ WorstGapAbs = Max - Min;
+ BBWorstGapAbs = &BB;
+ }
+ }
+ if (WeightSum > 0) {
+ const double WeightedGap = WeightedGapSum / WeightSum;
+ if (opts::Verbosity >= 2 && (WeightedGap >= 0.1 || WorstGap >= 0.9)) {
+ OS << "Nontrivial CFG gap observed in function "
+ << Function->getPrintName() << "\n"
+ << "Weighted gap: " << format("%.2lf%%", 100 * WeightedGap) << "\n";
+ if (BBWorstGap)
+ OS << "Worst gap: " << format("%.2lf%%", 100 * WorstGap)
+ << " at BB with input offset: 0x"
+ << Twine::utohexstr(BBWorstGap->getInputOffset()) << "\n";
+ if (BBWorstGapAbs)
+ OS << "Worst gap (absolute value): " << WorstGapAbs << " at BB with "
+ << "input offset 0x"
+ << Twine::utohexstr(BBWorstGapAbs->getInputOffset()) << "\n";
+ if (opts::Verbosity >= 3)
+ Function->dump();
+ }
+ CFGGapsWeightedAvg.push_back(WeightedGap);
+ CFGGapsWorst.push_back(WorstGap);
+ CFGGapsWorstAbs.push_back(WorstGapAbs);
+ }
+ }
+ if (CFGGapsWeightedAvg.empty())
+ return;
+ std::sort(CFGGapsWeightedAvg.begin(), CFGGapsWeightedAvg.end());
+ const int RankWA = int(CFGGapsWeightedAvg.size() *
+ opts::PercentileForProfileQualityCheck / 100);
+ std::sort(CFGGapsWorst.begin(), CFGGapsWorst.end());
+ const int RankW =
+ int(CFGGapsWorst.size() * opts::PercentileForProfileQualityCheck / 100);
+ OS << format("CFG flow conservation gap %.2lf%% (weighted) %.2lf%% (worst)\n",
+ CFGGapsWeightedAvg[RankWA] * 100, CFGGapsWorst[RankW] * 100);
+ if (opts::Verbosity >= 1) {
+ OS << "distribution of weighted CFG flow conservation gaps\n";
+ printDistribution(OS, CFGGapsWeightedAvg, /*Fraction=*/true);
+ OS << "Consider only blocks with execution counts > 500:\n"
+ << "distribution of worst block flow conservation gap per "
+ "function \n";
+ printDistribution(OS, CFGGapsWorst, /*Fraction=*/true);
+ OS << "distribution of worst block flow conservation gap (absolute "
+ "value) per function\n";
+ std::sort(CFGGapsWorstAbs.begin(), CFGGapsWorstAbs.end());
+ printDistribution(OS, CFGGapsWorstAbs, /*Fraction=*/false);
+ }
+void computeFlowMappings(const BinaryContext &BC, FlowInfo &TotalFlowMap) {
+ // Increment block inflow and outflow with CFG jump counts.
+ TotalFlowMapTy &TotalIncomingMaps = TotalFlowMap.TotalIncomingMaps;
+ TotalFlowMapTy &TotalOutgoingMaps = TotalFlowMap.TotalOutgoingMaps;
+ for (const auto &BFI : BC.getBinaryFunctions()) {
+ const BinaryFunction *Function = &BFI.second;
+ if (Function->empty() || !Function->hasValidProfile())
ShatianWang wrote:
Ah sorry I wasn't being very clear. I meant including functions with stale profile whose profile are later inferred by stale profile matching (hence being set valid), and excluding functions with stale profile that stale profile matching failed to infer (hence remains invalid). I.e., leaving the filter as it is.
