[llvm] [BOLT] Infer non-stale profile (PR #160805)

Amir Ayupov via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 25 21:32:31 PDT 2025


https://github.com/aaupov created https://github.com/llvm/llvm-project/pull/160805

Run profi on non-stale/non-inferred profiles to improve profile quality.

Test Plan: updated profile-quality-reporting.test


>From 12a0978dae3e309f233e95c7edd3a5ae2f9d5873 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Thu, 25 Sep 2025 21:32:16 -0700
Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?=
 =?UTF-8?q?l=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.4
---
 bolt/include/bolt/Passes/InferNonStale.h     |  41 +++++
 bolt/lib/Passes/CMakeLists.txt               |   2 +
 bolt/lib/Passes/InferNonStale.cpp            | 168 +++++++++++++++++++
 bolt/lib/Profile/StaleProfileMatching.cpp    |  24 +--
 bolt/lib/Rewrite/BinaryPassManager.cpp       |  16 ++
 bolt/test/X86/profile-quality-reporting.test |   5 +
 6 files changed, 244 insertions(+), 12 deletions(-)
 create mode 100644 bolt/include/bolt/Passes/InferNonStale.h
 create mode 100644 bolt/lib/Passes/InferNonStale.cpp

diff --git a/bolt/include/bolt/Passes/InferNonStale.h b/bolt/include/bolt/Passes/InferNonStale.h
new file mode 100644
index 0000000000000..16e7aecbd6eb9
--- /dev/null
+++ b/bolt/include/bolt/Passes/InferNonStale.h
@@ -0,0 +1,41 @@
+//===- bolt/Passes/InferNonStale.h - Non-stale profile inference --------*- C++
+//-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the pass that runs stale profile matching on functions
+// with non-stale/non-inferred profile to improve profile quality.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BOLT_PASSES_INFERNONSTALE_H
+#define BOLT_PASSES_INFERNONSTALE_H
+
+#include "bolt/Passes/BinaryPasses.h"
+
+namespace llvm {
+namespace bolt {
+
+/// Run stale profile matching inference on functions with non-stale profile
+/// to improve edge count estimates and profile quality.
+class InferNonStale : public BinaryFunctionPass {
+  void runOnFunction(BinaryFunction &BF);
+
+public:
+  explicit InferNonStale(const cl::opt<bool> &PrintPass)
+      : BinaryFunctionPass(PrintPass) {}
+
+  const char *getName() const override { return "infer-non-stale"; }
+
+  /// Pass entry point
+  Error runOnFunctions(BinaryContext &BC) override;
+};
+
+} // namespace bolt
+} // namespace llvm
+
+#endif
diff --git a/bolt/lib/Passes/CMakeLists.txt b/bolt/lib/Passes/CMakeLists.txt
index 77d2bb9c2bcb5..9f36d3e02afb5 100644
--- a/bolt/lib/Passes/CMakeLists.txt
+++ b/bolt/lib/Passes/CMakeLists.txt
@@ -16,6 +16,7 @@ add_llvm_library(LLVMBOLTPasses
   Hugify.cpp
   IdenticalCodeFolding.cpp
   IndirectCallPromotion.cpp
+  InferNonStale.cpp
   Inliner.cpp
   Instrumentation.cpp
   JTFootprintReduction.cpp
@@ -64,5 +65,6 @@ add_llvm_library(LLVMBOLTPasses
 target_link_libraries(LLVMBOLTPasses
   PRIVATE
   LLVMBOLTCore
+  LLVMBOLTProfile
   LLVMBOLTUtils
   )
diff --git a/bolt/lib/Passes/InferNonStale.cpp b/bolt/lib/Passes/InferNonStale.cpp
new file mode 100644
index 0000000000000..f7d1944ecb0c4
--- /dev/null
+++ b/bolt/lib/Passes/InferNonStale.cpp
@@ -0,0 +1,168 @@
+//===- bolt/Passes/InferNonStale.cpp - Non-stale profile inference ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the InferNonStale pass that runs stale profile
+// matching on functions with non-stale/non-inferred profile to improve
+// profile quality.
+//
+//===----------------------------------------------------------------------===//
+
+#include "bolt/Passes/InferNonStale.h"
+
+#include "bolt/Core/BinaryFunction.h"
+#include "bolt/Core/ParallelUtilities.h"
+#include "bolt/Utils/CommandLineOpts.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Transforms/Utils/SampleProfileInference.h"
+
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "infer-non-stale"
+
+using namespace llvm;
+using namespace bolt;
+
+namespace opts {
+
+extern cl::opt<bool> TimeRewrite;
+extern cl::OptionCategory BoltOptCategory;
+
+cl::opt<bool>
+    InferNonStaleProfile("infer-non-stale-profile",
+                         cl::desc("Infer profile counts for functions with "
+                                  "non-stale profile using profi"),
+                         cl::init(false), cl::cat(BoltOptCategory));
+
+// Reuse existing stale matching parameters
+extern cl::opt<bool> StaleMatchingEvenFlowDistribution;
+extern cl::opt<bool> StaleMatchingRebalanceUnknown;
+extern cl::opt<bool> StaleMatchingJoinIslands;
+extern cl::opt<unsigned> StaleMatchingCostBlockInc;
+extern cl::opt<unsigned> StaleMatchingCostBlockDec;
+extern cl::opt<unsigned> StaleMatchingCostJumpInc;
+extern cl::opt<unsigned> StaleMatchingCostJumpDec;
+extern cl::opt<unsigned> StaleMatchingCostBlockUnknownInc;
+extern cl::opt<unsigned> StaleMatchingCostJumpUnknownInc;
+extern cl::opt<unsigned> StaleMatchingCostJumpUnknownFTInc;
+
+} // namespace opts
+
+namespace llvm {
+namespace bolt {
+
+// Forward declarations of functions from StaleProfileMatching.cpp
+FlowFunction
+createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder);
+void preprocessUnreachableBlocks(FlowFunction &Func);
+void assignProfile(BinaryFunction &BF,
+                   const BinaryFunction::BasicBlockOrderType &BlockOrder,
+                   FlowFunction &Func);
+
+} // namespace bolt
+} // namespace llvm
+
+namespace llvm {
+namespace bolt {
+
+void InferNonStale::runOnFunction(BinaryFunction &BF) {
+  NamedRegionTimer T("inferNonStale", "non-stale profile inference", "rewrite",
+                     "Rewrite passes", opts::TimeRewrite);
+
+  assert(BF.hasCFG() && "Function must have CFG");
+
+  // Only process functions with profile that are not already inferred
+  assert(BF.hasValidProfile() && "Function must have valid profile");
+
+  assert(!BF.hasInferredProfile() && "Function must not have inferred profile");
+
+  LLVM_DEBUG(dbgs() << "BOLT-INFO: applying non-stale profile inference for "
+                    << "\"" << BF.getPrintName() << "\"\n");
+
+  // Make sure that block hashes are up to date.
+  BF.computeBlockHashes();
+
+  const BinaryFunction::BasicBlockOrderType BlockOrder(
+      BF.getLayout().block_begin(), BF.getLayout().block_end());
+
+  // Create a wrapper flow function to use with the profile inference algorithm.
+  FlowFunction Func = createFlowFunction(BlockOrder);
+
+  // Assign existing profile counts to the flow function
+  // This differs from stale matching - we use existing counts directly
+  for (uint64_t I = 0; I < BlockOrder.size(); I++) {
+    BinaryBasicBlock *BB = BlockOrder[I];
+    FlowBlock &Block = Func.Blocks[I + 1]; // Skip dummy entry block
+
+    // Set block weight from existing execution count
+    Block.Weight = BB->getKnownExecutionCount();
+    Block.HasUnknownWeight = (Block.Weight == 0);
+
+    // Set jump weights from existing branch info
+    for (FlowJump *Jump : Block.SuccJumps) {
+      if (Jump->Target == Func.Blocks.size() - 1) // Skip artificial sink
+        continue;
+
+      BinaryBasicBlock *SuccBB = BlockOrder[Jump->Target - 1];
+      if (BB->getSuccessor(SuccBB->getLabel())) {
+        BinaryBasicBlock::BinaryBranchInfo &BI = BB->getBranchInfo(*SuccBB);
+        Jump->Weight = BI.Count;
+        Jump->HasUnknownWeight = (Jump->Weight == 0);
+      }
+    }
+  }
+
+  // Adjust the flow function by marking unreachable blocks Unlikely
+  preprocessUnreachableBlocks(Func);
+
+  // Set up inference parameters
+  ProfiParams Params;
+  Params.EvenFlowDistribution = opts::StaleMatchingEvenFlowDistribution;
+  Params.RebalanceUnknown = opts::StaleMatchingRebalanceUnknown;
+  Params.JoinIslands = opts::StaleMatchingJoinIslands;
+
+  Params.CostBlockInc = opts::StaleMatchingCostBlockInc;
+  Params.CostBlockEntryInc = opts::StaleMatchingCostBlockInc;
+  Params.CostBlockDec = opts::StaleMatchingCostBlockDec;
+  Params.CostBlockEntryDec = opts::StaleMatchingCostBlockDec;
+  Params.CostBlockUnknownInc = opts::StaleMatchingCostBlockUnknownInc;
+
+  Params.CostJumpInc = opts::StaleMatchingCostJumpInc;
+  Params.CostJumpFTInc = opts::StaleMatchingCostJumpInc;
+  Params.CostJumpDec = opts::StaleMatchingCostJumpDec;
+  Params.CostJumpFTDec = opts::StaleMatchingCostJumpDec;
+  Params.CostJumpUnknownInc = opts::StaleMatchingCostJumpUnknownInc;
+  Params.CostJumpUnknownFTInc = opts::StaleMatchingCostJumpUnknownFTInc;
+
+  // Apply the profile inference algorithm
+  applyFlowInference(Params, Func);
+
+  // Collect inferred counts and update function annotations
+  assignProfile(BF, BlockOrder, Func);
+
+  // Mark the function as having inferred profile
+  BF.setHasInferredProfile(true);
+}
+
+Error InferNonStale::runOnFunctions(BinaryContext &BC) {
+  ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
+    runOnFunction(BF);
+  };
+
+  ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) {
+    return !BF.hasValidProfile() || BF.hasInferredProfile() || !BF.hasCFG();
+  };
+
+  ParallelUtilities::runOnEachFunction(
+      BC, ParallelUtilities::SchedulingPolicy::SP_BB_QUADRATIC, WorkFun,
+      SkipFunc, "InferNonStale");
+
+  return Error::success();
+}
+
+} // namespace bolt
+} // namespace llvm
diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index 1a61949d77472..b66a3f478f1a7 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -52,66 +52,66 @@ cl::opt<bool>
                       cl::desc("Infer counts from stale profile data."),
                       cl::init(false), cl::Hidden, cl::cat(BoltOptCategory));
 
-static cl::opt<unsigned> StaleMatchingMinMatchedBlock(
+cl::opt<unsigned> StaleMatchingMinMatchedBlock(
     "stale-matching-min-matched-block",
     cl::desc("Percentage threshold of matched basic blocks at which stale "
              "profile inference is executed."),
     cl::init(0), cl::Hidden, cl::cat(BoltOptCategory));
 
-static cl::opt<unsigned> StaleMatchingMaxFuncSize(
+cl::opt<unsigned> StaleMatchingMaxFuncSize(
     "stale-matching-max-func-size",
     cl::desc("The maximum size of a function to consider for inference."),
     cl::init(10000), cl::Hidden, cl::cat(BoltOptCategory));
 
 // Parameters of the profile inference algorithm. The default values are tuned
 // on several benchmarks.
-static cl::opt<bool> StaleMatchingEvenFlowDistribution(
+cl::opt<bool> StaleMatchingEvenFlowDistribution(
     "stale-matching-even-flow-distribution",
     cl::desc("Try to evenly distribute flow when there are multiple equally "
              "likely options."),
     cl::init(true), cl::ReallyHidden, cl::cat(BoltOptCategory));
 
-static cl::opt<bool> StaleMatchingRebalanceUnknown(
+cl::opt<bool> StaleMatchingRebalanceUnknown(
     "stale-matching-rebalance-unknown",
     cl::desc("Evenly re-distribute flow among unknown subgraphs."),
     cl::init(false), cl::ReallyHidden, cl::cat(BoltOptCategory));
 
-static cl::opt<bool> StaleMatchingJoinIslands(
+cl::opt<bool> StaleMatchingJoinIslands(
     "stale-matching-join-islands",
     cl::desc("Join isolated components having positive flow."), cl::init(true),
     cl::ReallyHidden, cl::cat(BoltOptCategory));
 
-static cl::opt<unsigned> StaleMatchingCostBlockInc(
+cl::opt<unsigned> StaleMatchingCostBlockInc(
     "stale-matching-cost-block-inc",
     cl::desc("The cost of increasing a block count by one."), cl::init(150),
     cl::ReallyHidden, cl::cat(BoltOptCategory));
 
-static cl::opt<unsigned> StaleMatchingCostBlockDec(
+cl::opt<unsigned> StaleMatchingCostBlockDec(
     "stale-matching-cost-block-dec",
     cl::desc("The cost of decreasing a block count by one."), cl::init(150),
     cl::ReallyHidden, cl::cat(BoltOptCategory));
 
-static cl::opt<unsigned> StaleMatchingCostJumpInc(
+cl::opt<unsigned> StaleMatchingCostJumpInc(
     "stale-matching-cost-jump-inc",
     cl::desc("The cost of increasing a jump count by one."), cl::init(150),
     cl::ReallyHidden, cl::cat(BoltOptCategory));
 
-static cl::opt<unsigned> StaleMatchingCostJumpDec(
+cl::opt<unsigned> StaleMatchingCostJumpDec(
     "stale-matching-cost-jump-dec",
     cl::desc("The cost of decreasing a jump count by one."), cl::init(150),
     cl::ReallyHidden, cl::cat(BoltOptCategory));
 
-static cl::opt<unsigned> StaleMatchingCostBlockUnknownInc(
+cl::opt<unsigned> StaleMatchingCostBlockUnknownInc(
     "stale-matching-cost-block-unknown-inc",
     cl::desc("The cost of increasing an unknown block count by one."),
     cl::init(1), cl::ReallyHidden, cl::cat(BoltOptCategory));
 
-static cl::opt<unsigned> StaleMatchingCostJumpUnknownInc(
+cl::opt<unsigned> StaleMatchingCostJumpUnknownInc(
     "stale-matching-cost-jump-unknown-inc",
     cl::desc("The cost of increasing an unknown jump count by one."),
     cl::init(140), cl::ReallyHidden, cl::cat(BoltOptCategory));
 
-static cl::opt<unsigned> StaleMatchingCostJumpUnknownFTInc(
+cl::opt<unsigned> StaleMatchingCostJumpUnknownFTInc(
     "stale-matching-cost-jump-unknown-ft-inc",
     cl::desc(
         "The cost of increasing an unknown fall-through jump count by one."),
diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp
index d9b7a2bd9a14c..2aa7a8d90cdc5 100644
--- a/bolt/lib/Rewrite/BinaryPassManager.cpp
+++ b/bolt/lib/Rewrite/BinaryPassManager.cpp
@@ -18,6 +18,7 @@
 #include "bolt/Passes/Hugify.h"
 #include "bolt/Passes/IdenticalCodeFolding.h"
 #include "bolt/Passes/IndirectCallPromotion.h"
+#include "bolt/Passes/InferNonStale.h"
 #include "bolt/Passes/Inliner.h"
 #include "bolt/Passes/Instrumentation.h"
 #include "bolt/Passes/JTFootprintReduction.h"
@@ -58,6 +59,7 @@ extern cl::opt<bolt::PLTCall::OptType> PLT;
 extern cl::opt<bolt::IdenticalCodeFolding::ICFLevel, false,
                llvm::bolt::DeprecatedICFNumericOptionParser>
     ICF;
+extern cl::opt<bool> InferNonStaleProfile;
 
 static cl::opt<bool>
 DynoStatsAll("dyno-stats-all",
@@ -98,6 +100,11 @@ static cl::opt<bool> PrintEstimateEdgeCounts(
     cl::desc("print function after edge counts are set for no-LBR profile"),
     cl::Hidden, cl::cat(BoltOptCategory));
 
+static cl::opt<bool> PrintInferNonStale(
+    "print-infer-non-stale",
+    cl::desc("print function after non-stale profile inference"), cl::Hidden,
+    cl::cat(BoltOptCategory));
+
 cl::opt<bool>
 PrintFinalized("print-finalized",
   cl::desc("print function after CFG is finalized"),
@@ -384,6 +391,15 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
 
   Manager.registerPass(std::make_unique<PrintProfileQualityStats>(NeverPrint));
 
+  // Optionally run profile inference on non-stale profiles
+  if (opts::InferNonStaleProfile) {
+    Manager.registerPass(std::make_unique<InferNonStale>(PrintInferNonStale));
+
+    // Print profile quality stats after inference to show improvement
+    Manager.registerPass(
+        std::make_unique<PrintProfileQualityStats>(NeverPrint));
+  }
+
   Manager.registerPass(std::make_unique<ValidateInternalCalls>(NeverPrint));
 
   Manager.registerPass(std::make_unique<ValidateMemRefs>(NeverPrint));
diff --git a/bolt/test/X86/profile-quality-reporting.test b/bolt/test/X86/profile-quality-reporting.test
index 210d3e10a3890..9908f1e2fe5ce 100644
--- a/bolt/test/X86/profile-quality-reporting.test
+++ b/bolt/test/X86/profile-quality-reporting.test
@@ -2,3 +2,8 @@
 RUN: yaml2obj %p/Inputs/blarge_new.yaml &> %t.exe
 RUN: llvm-bolt %t.exe -o %t.out --pa -p %p/Inputs/blarge_new.preagg.txt | FileCheck %s
 CHECK: profile quality metrics for the hottest 5 functions (reporting top 5% values): function CFG discontinuity 100.00%; call graph flow conservation gap 60.00%; CFG flow conservation gap 45.53% (weighted) 96.87% (worst); exception handling usage 0.00% (of total BBEC) 0.00% (of total InvokeEC)
+
+## Check profile quality with infer-non-stale-profile option
+RUN: llvm-bolt %t.exe -o %t.out --pa -p %p/Inputs/blarge_new.preagg.txt \
+RUN:   --infer-non-stale-profile | FileCheck %s --check-prefix CHECK-INFER
+CHECK-INFER: profile quality metrics for the hottest 5 functions (reporting top 5% values): function CFG discontinuity 100.00%; call graph flow conservation gap 60.00%; CFG flow conservation gap 45.53% (weighted) 96.87% (worst); exception handling usage 0.00% (of total BBEC) 0.00% (of total InvokeEC)



More information about the llvm-commits mailing list