[llvm] [BOLT] Report flow conservation scores (PR #127954)
Amir Ayupov via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 27 20:19:38 PST 2025
================
@@ -0,0 +1,575 @@
+//===- bolt/Passes/ProfileQualityStats.cpp ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the profile quality stats calculation pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "bolt/Passes/ProfileQualityStats.h"
+#include "bolt/Core/BinaryBasicBlock.h"
+#include "bolt/Core/BinaryFunction.h"
+#include "bolt/Utils/CommandLineOpts.h"
+#include "llvm/Support/CommandLine.h"
+#include <queue>
+#include <unordered_map>
+#include <unordered_set>
+
+using namespace llvm;
+using namespace bolt;
+
+namespace opts {
+extern cl::opt<unsigned> Verbosity;
+cl::opt<unsigned> NumFunctionsForProfileQualityCheck(
+ "num-functions-for-profile-quality-check",
+ cl::desc("number of hottest functions to print aggregated "
+ "profile quality stats of."),
+ cl::init(1000), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
+cl::opt<unsigned> PercentileForProfileQualityCheck(
+ "percentile-for-profile-quality-check",
+ cl::desc("Percentile of profile quality distributions over hottest "
+ "functions to report."),
+ cl::init(95), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
+} // namespace opts
+
+namespace {
+using FunctionListType = std::vector<const BinaryFunction *>;
+using function_iterator = FunctionListType::iterator;
+
+// BB index -> flow count
+using FlowMapTy = std::unordered_map<unsigned, uint64_t>;
+// Function number -> FlowMapTy
+using TotalFlowMapTy = std::unordered_map<uint64_t, FlowMapTy>;
+// Function number -> flow count
+using FunctionFlowMapTy = std::unordered_map<uint64_t, uint64_t>;
+struct FlowInfo {
+ TotalFlowMapTy TotalIncomingMaps;
+ TotalFlowMapTy TotalOutgoingMaps;
+ TotalFlowMapTy TotalMaxCountMaps;
+ TotalFlowMapTy TotalMinCountMaps;
+ FunctionFlowMapTy CallGraphIncomingMap;
+};
+
+template <typename T>
+void printDistribution(raw_ostream &OS, std::vector<T> &values,
+ bool Fraction = false) {
+ // Assume values are sorted.
+ if (values.empty())
+ return;
+
+ OS << " Length : " << values.size() << "\n";
+
+ auto printLine = [&](std::string Text, double Percent) {
+ int Rank = int(values.size() * (100 - Percent) / 100);
+ if (Percent == 0)
+ Rank = values.size() - 1;
+ if (Fraction)
+ OS << " " << Text << std::string(11 - Text.length(), ' ') << ": "
+ << format("%.2lf%%", values[Rank] * 100) << "\n";
+ else
+ OS << " " << Text << std::string(11 - Text.length(), ' ') << ": "
+ << values[Rank] << "\n";
+ };
+
+ printLine("MAX", 0);
+ const int percentages[] = {1, 5, 10, 20, 50, 80};
+ for (size_t i = 0; i < sizeof(percentages) / sizeof(percentages[0]); ++i) {
+ printLine("TOP " + std::to_string(percentages[i]) + "%", percentages[i]);
+ }
+ printLine("MIN", 100);
+}
+
+void printCFGContinuityStats(raw_ostream &OS,
+ iterator_range<function_iterator> &Functions) {
+ // Given a perfect profile, every positive-execution-count BB should be
+ // connected to an entry of the function through a positive-execution-count
+ // directed path in the control flow graph.
+ std::vector<size_t> NumUnreachables;
+ std::vector<size_t> SumECUnreachables;
+ std::vector<double> FractionECUnreachables;
+
+ for (const BinaryFunction *Function : Functions) {
+ if (Function->size() <= 1)
+ continue;
+
+ // Compute the sum of all BB execution counts (ECs).
+ size_t NumPosECBBs = 0;
+ size_t SumAllBBEC = 0;
+ for (const BinaryBasicBlock &BB : *Function) {
+ const size_t BBEC = BB.getKnownExecutionCount();
+ NumPosECBBs += !!BBEC;
+ SumAllBBEC += BBEC;
+ }
+
+ // Perform BFS on subgraph of CFG induced by positive weight edges.
+ // Compute the number of BBs reachable from the entry(s) of the function and
+ // the sum of their execution counts (ECs).
+ std::unordered_set<unsigned> Visited;
+ std::queue<unsigned> Queue;
+ size_t SumReachableBBEC = 0;
+
+ Function->forEachEntryPoint([&](uint64_t Offset, const MCSymbol *Label) {
+ const BinaryBasicBlock *EntryBB = Function->getBasicBlockAtOffset(Offset);
+ if (EntryBB && EntryBB->getKnownExecutionCount() > 0) {
+ Queue.push(EntryBB->getLayoutIndex());
+ Visited.insert(EntryBB->getLayoutIndex());
+ SumReachableBBEC += EntryBB->getKnownExecutionCount();
+ }
+ return true;
+ });
+
+ const FunctionLayout &Layout = Function->getLayout();
+
+ while (!Queue.empty()) {
+ const unsigned BBIndex = Queue.front();
+ const BinaryBasicBlock *BB = Layout.getBlock(BBIndex);
+ Queue.pop();
+ auto SuccBIIter = BB->branch_info_begin();
+ for (const BinaryBasicBlock *Succ : BB->successors()) {
+ const uint64_t Count = SuccBIIter->Count;
+ if (Count == BinaryBasicBlock::COUNT_NO_PROFILE || Count == 0) {
+ ++SuccBIIter;
+ continue;
+ }
+ if (!Visited.insert(Succ->getLayoutIndex()).second) {
+ ++SuccBIIter;
+ continue;
+ }
+ SumReachableBBEC += Succ->getKnownExecutionCount();
+ Queue.push(Succ->getLayoutIndex());
+ ++SuccBIIter;
+ }
+ }
+
+ const size_t NumReachableBBs = Visited.size();
+
+ const size_t NumPosECBBsUnreachableFromEntry =
+ NumPosECBBs - NumReachableBBs;
+ const size_t SumUnreachableBBEC = SumAllBBEC - SumReachableBBEC;
+ const double FractionECUnreachable =
+ (double)SumUnreachableBBEC / SumAllBBEC;
+
+ if (opts::Verbosity >= 2 && FractionECUnreachable >= 0.05) {
+ OS << "Non-trivial CFG discontinuity observed in function "
+ << Function->getPrintName() << "\n";
+ if (opts::Verbosity >= 3)
+ Function->dump();
+ }
+
+ NumUnreachables.push_back(NumPosECBBsUnreachableFromEntry);
+ SumECUnreachables.push_back(SumUnreachableBBEC);
+ FractionECUnreachables.push_back(FractionECUnreachable);
+ }
+
+ if (FractionECUnreachables.empty())
+ return;
+
+ std::sort(FractionECUnreachables.begin(), FractionECUnreachables.end());
+ const int Rank = int(FractionECUnreachables.size() *
+ opts::PercentileForProfileQualityCheck / 100);
+ OS << format("function CFG discontinuity %.2lf%%; ",
+ FractionECUnreachables[Rank] * 100);
+ if (opts::Verbosity >= 1) {
+ OS << "\nabbreviations: EC = execution count, POS BBs = positive EC BBs\n"
+ << "distribution of NUM(unreachable POS BBs) per function\n";
+ std::sort(NumUnreachables.begin(), NumUnreachables.end());
+ printDistribution(OS, NumUnreachables);
+
+ OS << "distribution of SUM_EC(unreachable POS BBs) per function\n";
+ std::sort(SumECUnreachables.begin(), SumECUnreachables.end());
+ printDistribution(OS, SumECUnreachables);
+
+ OS << "distribution of [(SUM_EC(unreachable POS BBs) / SUM_EC(all "
+ "POS BBs))] per function\n";
+ printDistribution(OS, FractionECUnreachables, /*Fraction=*/true);
+ }
+}
+
+void printCallGraphFlowConservationStats(
+ raw_ostream &OS, iterator_range<function_iterator> &Functions,
+ FlowInfo &TotalFlowMap) {
+ std::vector<double> CallGraphGaps;
+
+ for (const BinaryFunction *Function : Functions) {
+ if (Function->size() <= 1 || !Function->isSimple())
+ continue;
+
+ const uint64_t FunctionNum = Function->getFunctionNumber();
+ FlowMapTy &IncomingMap = TotalFlowMap.TotalIncomingMaps[FunctionNum];
+ FlowMapTy &OutgoingMap = TotalFlowMap.TotalOutgoingMaps[FunctionNum];
+ FunctionFlowMapTy &CallGraphIncomingMap = TotalFlowMap.CallGraphIncomingMap;
+
+ // Only consider functions that are not a program entry.
+ if (CallGraphIncomingMap.find(FunctionNum) != CallGraphIncomingMap.end()) {
+ uint64_t EntryInflow = 0;
+ uint64_t EntryOutflow = 0;
+ uint32_t NumConsideredEntryBlocks = 0;
+ for (const BinaryBasicBlock &BB : *Function) {
+ if (BB.isEntryPoint()) {
+ // If entry is an exit, then we don't consider it for flow
+ // conservation
+ if (BB.succ_size() == 0)
+ continue;
+ NumConsideredEntryBlocks++;
+
+ EntryInflow += IncomingMap[BB.getLayoutIndex()];
+ EntryOutflow += OutgoingMap[BB.getLayoutIndex()];
+ }
+ }
+ uint64_t NetEntryOutflow = 0;
+ if (EntryOutflow < EntryInflow) {
+ if (opts::Verbosity >= 2) {
+ // We expect entry blocks' CFG outflow >= inflow, i.e., it has a
+ // non-negative net outflow. If this is not the case, then raise a
+ // warning if requested.
+ OS << "BOLT WARNING: unexpected entry block CFG outflow < inflow "
+ "in "
+ "function "
+ << Function->getPrintName() << "\n";
+ if (opts::Verbosity >= 3)
+ Function->dump();
+ }
+ } else {
+ NetEntryOutflow = EntryOutflow - EntryInflow;
+ }
+ if (NumConsideredEntryBlocks > 0) {
+ const uint64_t CallGraphInflow =
+ TotalFlowMap.CallGraphIncomingMap[Function->getFunctionNumber()];
+ const uint64_t Min = std::min(NetEntryOutflow, CallGraphInflow);
+ const uint64_t Max = std::max(NetEntryOutflow, CallGraphInflow);
+ const double CallGraphGap = 1 - (double)Min / Max;
+
+ if (opts::Verbosity >= 2 && CallGraphGap >= 0.5) {
+ OS << "Nontrivial call graph gap of size "
+ << format("%.2lf%%", 100 * CallGraphGap)
+ << " observed in function " << Function->getPrintName() << "\n";
+ if (opts::Verbosity >= 3)
+ Function->dump();
+ }
+
+ CallGraphGaps.push_back(CallGraphGap);
+ }
+ }
+ }
+
+ if (CallGraphGaps.empty())
+ return;
+
+ std::sort(CallGraphGaps.begin(), CallGraphGaps.end());
+ const int Rank =
+ int(CallGraphGaps.size() * opts::PercentileForProfileQualityCheck / 100);
+ OS << format("call graph flow conservation gap %.2lf%%; ",
+ CallGraphGaps[Rank] * 100);
+ if (opts::Verbosity >= 1) {
+ OS << "\ndistribution of function entry flow conservation gaps\n";
+ printDistribution(OS, CallGraphGaps, /*Fraction=*/true);
+ }
+}
+
+void printCFGFlowConservationStats(raw_ostream &OS,
+ iterator_range<function_iterator> &Functions,
+ FlowInfo &TotalFlowMap) {
+ std::vector<double> CFGGapsWeightedAvg;
+ std::vector<double> CFGGapsWorst;
+ std::vector<uint64_t> CFGGapsWorstAbs;
+
+ for (const BinaryFunction *Function : Functions) {
+ if (Function->size() <= 1 || !Function->isSimple())
+ continue;
+
+ const uint64_t FunctionNum = Function->getFunctionNumber();
+ FlowMapTy &MaxCountMaps = TotalFlowMap.TotalMaxCountMaps[FunctionNum];
+ FlowMapTy &MinCountMaps = TotalFlowMap.TotalMinCountMaps[FunctionNum];
+ double WeightedGapSum = 0.0;
+ double WeightSum = 0.0;
+ double WorstGap = 0.0;
+ uint64_t WorstGapAbs = 0;
+ BinaryBasicBlock *BBWorstGap = nullptr;
+ BinaryBasicBlock *BBWorstGapAbs = nullptr;
+ for (BinaryBasicBlock &BB : *Function) {
+ // We don't consider function entry or exit blocks for CFG flow
+ // conservation
+ if (BB.isEntryPoint() || BB.succ_size() == 0)
+ continue;
+
+ const uint64_t Max = MaxCountMaps[BB.getLayoutIndex()];
+ const uint64_t Min = MinCountMaps[BB.getLayoutIndex()];
+ const double Gap = 1 - (double)Min / Max;
+ double Weight = BB.getKnownExecutionCount() * BB.getNumNonPseudos();
+ if (Weight == 0)
+ continue;
+ // We use log to prevent the stats from being dominated by extremely hot
+ // blocks
+ Weight = log(Weight);
+ WeightedGapSum += Gap * Weight;
+ WeightSum += Weight;
+ if (BB.getKnownExecutionCount() > 500 && Gap > WorstGap) {
+ WorstGap = Gap;
+ BBWorstGap = &BB;
+ }
+ if (BB.getKnownExecutionCount() > 500 && Max - Min > WorstGapAbs) {
+ WorstGapAbs = Max - Min;
+ BBWorstGapAbs = &BB;
+ }
+ }
+ if (WeightSum > 0) {
+ const double WeightedGap = WeightedGapSum / WeightSum;
+ if (opts::Verbosity >= 2 && (WeightedGap >= 0.1 || WorstGap >= 0.9)) {
+ OS << "Nontrivial CFG gap observed in function "
+ << Function->getPrintName() << "\n"
+ << "Weighted gap: " << format("%.2lf%%", 100 * WeightedGap) << "\n";
+ if (BBWorstGap)
+ OS << "Worst gap: " << format("%.2lf%%", 100 * WorstGap)
+ << " at BB with input offset: 0x"
+ << Twine::utohexstr(BBWorstGap->getInputOffset()) << "\n";
+ if (BBWorstGapAbs)
+ OS << "Worst gap (absolute value): " << WorstGapAbs << " at BB with "
+ << "input offset 0x"
+ << Twine::utohexstr(BBWorstGapAbs->getInputOffset()) << "\n";
+ if (opts::Verbosity >= 3)
+ Function->dump();
+ }
+
+ CFGGapsWeightedAvg.push_back(WeightedGap);
+ CFGGapsWorst.push_back(WorstGap);
+ CFGGapsWorstAbs.push_back(WorstGapAbs);
+ }
+ }
+
+ if (CFGGapsWeightedAvg.empty())
+ return;
+ std::sort(CFGGapsWeightedAvg.begin(), CFGGapsWeightedAvg.end());
+ const int RankWA = int(CFGGapsWeightedAvg.size() *
+ opts::PercentileForProfileQualityCheck / 100);
+ std::sort(CFGGapsWorst.begin(), CFGGapsWorst.end());
+ const int RankW =
+ int(CFGGapsWorst.size() * opts::PercentileForProfileQualityCheck / 100);
+ OS << format("CFG flow conservation gap %.2lf%% (weighted) %.2lf%% (worst)\n",
+ CFGGapsWeightedAvg[RankWA] * 100, CFGGapsWorst[RankW] * 100);
+ if (opts::Verbosity >= 1) {
+ OS << "distribution of weighted CFG flow conservation gaps\n";
+ printDistribution(OS, CFGGapsWeightedAvg, /*Fraction=*/true);
+ OS << "Consider only blocks with execution counts > 500:\n"
+ << "distribution of worst block flow conservation gap per "
+ "function \n";
+ printDistribution(OS, CFGGapsWorst, /*Fraction=*/true);
+ OS << "distribution of worst block flow conservation gap (absolute "
+ "value) per function\n";
+ std::sort(CFGGapsWorstAbs.begin(), CFGGapsWorstAbs.end());
+ printDistribution(OS, CFGGapsWorstAbs, /*Fraction=*/false);
+ }
+}
+
+void computeFlowMappings(const BinaryContext &BC, FlowInfo &TotalFlowMap) {
+ // Increment block inflow and outflow with CFG jump counts.
+ TotalFlowMapTy &TotalIncomingMaps = TotalFlowMap.TotalIncomingMaps;
+ TotalFlowMapTy &TotalOutgoingMaps = TotalFlowMap.TotalOutgoingMaps;
+ for (const auto &BFI : BC.getBinaryFunctions()) {
+ const BinaryFunction *Function = &BFI.second;
+ if (Function->empty() || !Function->hasValidProfile())
----------------
aaupov wrote:
Sounds good. I may experiment with that as part of the effort to improve stale matching/continuous mode.
https://github.com/llvm/llvm-project/pull/127954
More information about the llvm-commits
mailing list