[llvm] [BOLT] Infer non-stale profile (PR #160805)
Amir Ayupov via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 25 21:32:31 PDT 2025
https://github.com/aaupov created https://github.com/llvm/llvm-project/pull/160805
Run profi on non-stale/non-inferred profiles to improve profile quality.
Test Plan: updated profile-quality-reporting.test
>From 12a0978dae3e309f233e95c7edd3a5ae2f9d5873 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Thu, 25 Sep 2025 21:32:16 -0700
Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?=
=?UTF-8?q?l=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.4
---
bolt/include/bolt/Passes/InferNonStale.h | 41 +++++
bolt/lib/Passes/CMakeLists.txt | 2 +
bolt/lib/Passes/InferNonStale.cpp | 168 +++++++++++++++++++
bolt/lib/Profile/StaleProfileMatching.cpp | 24 +--
bolt/lib/Rewrite/BinaryPassManager.cpp | 16 ++
bolt/test/X86/profile-quality-reporting.test | 5 +
6 files changed, 244 insertions(+), 12 deletions(-)
create mode 100644 bolt/include/bolt/Passes/InferNonStale.h
create mode 100644 bolt/lib/Passes/InferNonStale.cpp
diff --git a/bolt/include/bolt/Passes/InferNonStale.h b/bolt/include/bolt/Passes/InferNonStale.h
new file mode 100644
index 0000000000000..16e7aecbd6eb9
--- /dev/null
+++ b/bolt/include/bolt/Passes/InferNonStale.h
@@ -0,0 +1,41 @@
+//===- bolt/Passes/InferNonStale.h - Non-stale profile inference --------*- C++
+//-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the pass that runs stale profile matching on functions
+// with non-stale/non-inferred profile to improve profile quality.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BOLT_PASSES_INFERNONSTALE_H
+#define BOLT_PASSES_INFERNONSTALE_H
+
+#include "bolt/Passes/BinaryPasses.h"
+
+namespace llvm {
+namespace bolt {
+
+/// Run stale profile matching inference on functions with non-stale profile
+/// to improve edge count estimates and profile quality.
+class InferNonStale : public BinaryFunctionPass {
+ void runOnFunction(BinaryFunction &BF);
+
+public:
+ explicit InferNonStale(const cl::opt<bool> &PrintPass)
+ : BinaryFunctionPass(PrintPass) {}
+
+ const char *getName() const override { return "infer-non-stale"; }
+
+ /// Pass entry point
+ Error runOnFunctions(BinaryContext &BC) override;
+};
+
+} // namespace bolt
+} // namespace llvm
+
+#endif
diff --git a/bolt/lib/Passes/CMakeLists.txt b/bolt/lib/Passes/CMakeLists.txt
index 77d2bb9c2bcb5..9f36d3e02afb5 100644
--- a/bolt/lib/Passes/CMakeLists.txt
+++ b/bolt/lib/Passes/CMakeLists.txt
@@ -16,6 +16,7 @@ add_llvm_library(LLVMBOLTPasses
Hugify.cpp
IdenticalCodeFolding.cpp
IndirectCallPromotion.cpp
+ InferNonStale.cpp
Inliner.cpp
Instrumentation.cpp
JTFootprintReduction.cpp
@@ -64,5 +65,6 @@ add_llvm_library(LLVMBOLTPasses
target_link_libraries(LLVMBOLTPasses
PRIVATE
LLVMBOLTCore
+ LLVMBOLTProfile
LLVMBOLTUtils
)
diff --git a/bolt/lib/Passes/InferNonStale.cpp b/bolt/lib/Passes/InferNonStale.cpp
new file mode 100644
index 0000000000000..f7d1944ecb0c4
--- /dev/null
+++ b/bolt/lib/Passes/InferNonStale.cpp
@@ -0,0 +1,168 @@
+//===- bolt/Passes/InferNonStale.cpp - Non-stale profile inference ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the InferNonStale pass that runs stale profile
+// matching on functions with non-stale/non-inferred profile to improve
+// profile quality.
+//
+//===----------------------------------------------------------------------===//
+
+#include "bolt/Passes/InferNonStale.h"
+
+#include "bolt/Core/BinaryFunction.h"
+#include "bolt/Core/ParallelUtilities.h"
+#include "bolt/Utils/CommandLineOpts.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Transforms/Utils/SampleProfileInference.h"
+
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "infer-non-stale"
+
+using namespace llvm;
+using namespace bolt;
+
+namespace opts {
+
+extern cl::opt<bool> TimeRewrite;
+extern cl::OptionCategory BoltOptCategory;
+
+cl::opt<bool>
+ InferNonStaleProfile("infer-non-stale-profile",
+ cl::desc("Infer profile counts for functions with "
+ "non-stale profile using profi"),
+ cl::init(false), cl::cat(BoltOptCategory));
+
+// Reuse existing stale matching parameters
+extern cl::opt<bool> StaleMatchingEvenFlowDistribution;
+extern cl::opt<bool> StaleMatchingRebalanceUnknown;
+extern cl::opt<bool> StaleMatchingJoinIslands;
+extern cl::opt<unsigned> StaleMatchingCostBlockInc;
+extern cl::opt<unsigned> StaleMatchingCostBlockDec;
+extern cl::opt<unsigned> StaleMatchingCostJumpInc;
+extern cl::opt<unsigned> StaleMatchingCostJumpDec;
+extern cl::opt<unsigned> StaleMatchingCostBlockUnknownInc;
+extern cl::opt<unsigned> StaleMatchingCostJumpUnknownInc;
+extern cl::opt<unsigned> StaleMatchingCostJumpUnknownFTInc;
+
+} // namespace opts
+
+namespace llvm {
+namespace bolt {
+
+// Forward declarations of functions from StaleProfileMatching.cpp
+FlowFunction
+createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder);
+void preprocessUnreachableBlocks(FlowFunction &Func);
+void assignProfile(BinaryFunction &BF,
+ const BinaryFunction::BasicBlockOrderType &BlockOrder,
+ FlowFunction &Func);
+
+} // namespace bolt
+} // namespace llvm
+
+namespace llvm {
+namespace bolt {
+
+void InferNonStale::runOnFunction(BinaryFunction &BF) {
+ NamedRegionTimer T("inferNonStale", "non-stale profile inference", "rewrite",
+ "Rewrite passes", opts::TimeRewrite);
+
+ assert(BF.hasCFG() && "Function must have CFG");
+
+ // Only process functions with profile that are not already inferred
+ assert(BF.hasValidProfile() && "Function must have valid profile");
+
+ assert(!BF.hasInferredProfile() && "Function must not have inferred profile");
+
+ LLVM_DEBUG(dbgs() << "BOLT-INFO: applying non-stale profile inference for "
+ << "\"" << BF.getPrintName() << "\"\n");
+
+ // Make sure that block hashes are up to date.
+ BF.computeBlockHashes();
+
+ const BinaryFunction::BasicBlockOrderType BlockOrder(
+ BF.getLayout().block_begin(), BF.getLayout().block_end());
+
+ // Create a wrapper flow function to use with the profile inference algorithm.
+ FlowFunction Func = createFlowFunction(BlockOrder);
+
+ // Assign existing profile counts to the flow function
+ // This differs from stale matching - we use existing counts directly
+ for (uint64_t I = 0; I < BlockOrder.size(); I++) {
+ BinaryBasicBlock *BB = BlockOrder[I];
+ FlowBlock &Block = Func.Blocks[I + 1]; // Skip dummy entry block
+
+ // Set block weight from existing execution count
+ Block.Weight = BB->getKnownExecutionCount();
+ Block.HasUnknownWeight = (Block.Weight == 0);
+
+ // Set jump weights from existing branch info
+ for (FlowJump *Jump : Block.SuccJumps) {
+ if (Jump->Target == Func.Blocks.size() - 1) // Skip artificial sink
+ continue;
+
+ BinaryBasicBlock *SuccBB = BlockOrder[Jump->Target - 1];
+ if (BB->getSuccessor(SuccBB->getLabel())) {
+ BinaryBasicBlock::BinaryBranchInfo &BI = BB->getBranchInfo(*SuccBB);
+ Jump->Weight = BI.Count;
+ Jump->HasUnknownWeight = (Jump->Weight == 0);
+ }
+ }
+ }
+
+ // Adjust the flow function by marking unreachable blocks Unlikely
+ preprocessUnreachableBlocks(Func);
+
+ // Set up inference parameters
+ ProfiParams Params;
+ Params.EvenFlowDistribution = opts::StaleMatchingEvenFlowDistribution;
+ Params.RebalanceUnknown = opts::StaleMatchingRebalanceUnknown;
+ Params.JoinIslands = opts::StaleMatchingJoinIslands;
+
+ Params.CostBlockInc = opts::StaleMatchingCostBlockInc;
+ Params.CostBlockEntryInc = opts::StaleMatchingCostBlockInc;
+ Params.CostBlockDec = opts::StaleMatchingCostBlockDec;
+ Params.CostBlockEntryDec = opts::StaleMatchingCostBlockDec;
+ Params.CostBlockUnknownInc = opts::StaleMatchingCostBlockUnknownInc;
+
+ Params.CostJumpInc = opts::StaleMatchingCostJumpInc;
+ Params.CostJumpFTInc = opts::StaleMatchingCostJumpInc;
+ Params.CostJumpDec = opts::StaleMatchingCostJumpDec;
+ Params.CostJumpFTDec = opts::StaleMatchingCostJumpDec;
+ Params.CostJumpUnknownInc = opts::StaleMatchingCostJumpUnknownInc;
+ Params.CostJumpUnknownFTInc = opts::StaleMatchingCostJumpUnknownFTInc;
+
+ // Apply the profile inference algorithm
+ applyFlowInference(Params, Func);
+
+ // Collect inferred counts and update function annotations
+ assignProfile(BF, BlockOrder, Func);
+
+ // Mark the function as having inferred profile
+ BF.setHasInferredProfile(true);
+}
+
+Error InferNonStale::runOnFunctions(BinaryContext &BC) {
+ ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
+ runOnFunction(BF);
+ };
+
+ ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) {
+ return !BF.hasValidProfile() || BF.hasInferredProfile() || !BF.hasCFG();
+ };
+
+ ParallelUtilities::runOnEachFunction(
+ BC, ParallelUtilities::SchedulingPolicy::SP_BB_QUADRATIC, WorkFun,
+ SkipFunc, "InferNonStale");
+
+ return Error::success();
+}
+
+} // namespace bolt
+} // namespace llvm
diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index 1a61949d77472..b66a3f478f1a7 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -52,66 +52,66 @@ cl::opt<bool>
cl::desc("Infer counts from stale profile data."),
cl::init(false), cl::Hidden, cl::cat(BoltOptCategory));
-static cl::opt<unsigned> StaleMatchingMinMatchedBlock(
+cl::opt<unsigned> StaleMatchingMinMatchedBlock(
"stale-matching-min-matched-block",
cl::desc("Percentage threshold of matched basic blocks at which stale "
"profile inference is executed."),
cl::init(0), cl::Hidden, cl::cat(BoltOptCategory));
-static cl::opt<unsigned> StaleMatchingMaxFuncSize(
+cl::opt<unsigned> StaleMatchingMaxFuncSize(
"stale-matching-max-func-size",
cl::desc("The maximum size of a function to consider for inference."),
cl::init(10000), cl::Hidden, cl::cat(BoltOptCategory));
// Parameters of the profile inference algorithm. The default values are tuned
// on several benchmarks.
-static cl::opt<bool> StaleMatchingEvenFlowDistribution(
+cl::opt<bool> StaleMatchingEvenFlowDistribution(
"stale-matching-even-flow-distribution",
cl::desc("Try to evenly distribute flow when there are multiple equally "
"likely options."),
cl::init(true), cl::ReallyHidden, cl::cat(BoltOptCategory));
-static cl::opt<bool> StaleMatchingRebalanceUnknown(
+cl::opt<bool> StaleMatchingRebalanceUnknown(
"stale-matching-rebalance-unknown",
cl::desc("Evenly re-distribute flow among unknown subgraphs."),
cl::init(false), cl::ReallyHidden, cl::cat(BoltOptCategory));
-static cl::opt<bool> StaleMatchingJoinIslands(
+cl::opt<bool> StaleMatchingJoinIslands(
"stale-matching-join-islands",
cl::desc("Join isolated components having positive flow."), cl::init(true),
cl::ReallyHidden, cl::cat(BoltOptCategory));
-static cl::opt<unsigned> StaleMatchingCostBlockInc(
+cl::opt<unsigned> StaleMatchingCostBlockInc(
"stale-matching-cost-block-inc",
cl::desc("The cost of increasing a block count by one."), cl::init(150),
cl::ReallyHidden, cl::cat(BoltOptCategory));
-static cl::opt<unsigned> StaleMatchingCostBlockDec(
+cl::opt<unsigned> StaleMatchingCostBlockDec(
"stale-matching-cost-block-dec",
cl::desc("The cost of decreasing a block count by one."), cl::init(150),
cl::ReallyHidden, cl::cat(BoltOptCategory));
-static cl::opt<unsigned> StaleMatchingCostJumpInc(
+cl::opt<unsigned> StaleMatchingCostJumpInc(
"stale-matching-cost-jump-inc",
cl::desc("The cost of increasing a jump count by one."), cl::init(150),
cl::ReallyHidden, cl::cat(BoltOptCategory));
-static cl::opt<unsigned> StaleMatchingCostJumpDec(
+cl::opt<unsigned> StaleMatchingCostJumpDec(
"stale-matching-cost-jump-dec",
cl::desc("The cost of decreasing a jump count by one."), cl::init(150),
cl::ReallyHidden, cl::cat(BoltOptCategory));
-static cl::opt<unsigned> StaleMatchingCostBlockUnknownInc(
+cl::opt<unsigned> StaleMatchingCostBlockUnknownInc(
"stale-matching-cost-block-unknown-inc",
cl::desc("The cost of increasing an unknown block count by one."),
cl::init(1), cl::ReallyHidden, cl::cat(BoltOptCategory));
-static cl::opt<unsigned> StaleMatchingCostJumpUnknownInc(
+cl::opt<unsigned> StaleMatchingCostJumpUnknownInc(
"stale-matching-cost-jump-unknown-inc",
cl::desc("The cost of increasing an unknown jump count by one."),
cl::init(140), cl::ReallyHidden, cl::cat(BoltOptCategory));
-static cl::opt<unsigned> StaleMatchingCostJumpUnknownFTInc(
+cl::opt<unsigned> StaleMatchingCostJumpUnknownFTInc(
"stale-matching-cost-jump-unknown-ft-inc",
cl::desc(
"The cost of increasing an unknown fall-through jump count by one."),
diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp
index d9b7a2bd9a14c..2aa7a8d90cdc5 100644
--- a/bolt/lib/Rewrite/BinaryPassManager.cpp
+++ b/bolt/lib/Rewrite/BinaryPassManager.cpp
@@ -18,6 +18,7 @@
#include "bolt/Passes/Hugify.h"
#include "bolt/Passes/IdenticalCodeFolding.h"
#include "bolt/Passes/IndirectCallPromotion.h"
+#include "bolt/Passes/InferNonStale.h"
#include "bolt/Passes/Inliner.h"
#include "bolt/Passes/Instrumentation.h"
#include "bolt/Passes/JTFootprintReduction.h"
@@ -58,6 +59,7 @@ extern cl::opt<bolt::PLTCall::OptType> PLT;
extern cl::opt<bolt::IdenticalCodeFolding::ICFLevel, false,
llvm::bolt::DeprecatedICFNumericOptionParser>
ICF;
+extern cl::opt<bool> InferNonStaleProfile;
static cl::opt<bool>
DynoStatsAll("dyno-stats-all",
@@ -98,6 +100,11 @@ static cl::opt<bool> PrintEstimateEdgeCounts(
cl::desc("print function after edge counts are set for no-LBR profile"),
cl::Hidden, cl::cat(BoltOptCategory));
+static cl::opt<bool> PrintInferNonStale(
+ "print-infer-non-stale",
+ cl::desc("print function after non-stale profile inference"), cl::Hidden,
+ cl::cat(BoltOptCategory));
+
cl::opt<bool>
PrintFinalized("print-finalized",
cl::desc("print function after CFG is finalized"),
@@ -384,6 +391,15 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
Manager.registerPass(std::make_unique<PrintProfileQualityStats>(NeverPrint));
+ // Optionally run profile inference on non-stale profiles
+ if (opts::InferNonStaleProfile) {
+ Manager.registerPass(std::make_unique<InferNonStale>(PrintInferNonStale));
+
+ // Print profile quality stats after inference to show improvement
+ Manager.registerPass(
+ std::make_unique<PrintProfileQualityStats>(NeverPrint));
+ }
+
Manager.registerPass(std::make_unique<ValidateInternalCalls>(NeverPrint));
Manager.registerPass(std::make_unique<ValidateMemRefs>(NeverPrint));
diff --git a/bolt/test/X86/profile-quality-reporting.test b/bolt/test/X86/profile-quality-reporting.test
index 210d3e10a3890..9908f1e2fe5ce 100644
--- a/bolt/test/X86/profile-quality-reporting.test
+++ b/bolt/test/X86/profile-quality-reporting.test
@@ -2,3 +2,8 @@
RUN: yaml2obj %p/Inputs/blarge_new.yaml &> %t.exe
RUN: llvm-bolt %t.exe -o %t.out --pa -p %p/Inputs/blarge_new.preagg.txt | FileCheck %s
CHECK: profile quality metrics for the hottest 5 functions (reporting top 5% values): function CFG discontinuity 100.00%; call graph flow conservation gap 60.00%; CFG flow conservation gap 45.53% (weighted) 96.87% (worst); exception handling usage 0.00% (of total BBEC) 0.00% (of total InvokeEC)
+
+## Check profile quality with infer-non-stale-profile option
+RUN: llvm-bolt %t.exe -o %t.out --pa -p %p/Inputs/blarge_new.preagg.txt \
+RUN: --infer-non-stale-profile | FileCheck %s --check-prefix CHECK-INFER
+CHECK-INFER: profile quality metrics for the hottest 5 functions (reporting top 5% values): function CFG discontinuity 100.00%; call graph flow conservation gap 60.00%; CFG flow conservation gap 45.53% (weighted) 96.87% (worst); exception handling usage 0.00% (of total BBEC) 0.00% (of total InvokeEC)
More information about the llvm-commits
mailing list