[llvm] [BOLT] Optimize basic block loops to avoid n^2 loop (PR #156243)
Mark Rousskov via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 21 09:34:18 PDT 2025
https://github.com/Mark-Simulacrum updated https://github.com/llvm/llvm-project/pull/156243
>From 480d48e42720915de2f9958b69ad05df7c0bf802 Mon Sep 17 00:00:00 2001
From: Mark Rousskov <mark.simulacrum at gmail.com>
Date: Sun, 31 Aug 2025 09:03:27 -0400
Subject: [PATCH 1/2] [BOLT] Optimize basic block loops to avoid n^2 loop
This improves BOLT runtime when optimizing rustc_driver.so from 15
minutes to 7 minutes (49 minutes to 37 minutes of userspace time).
---
bolt/lib/Core/BinaryFunction.cpp | 14 +++++++++++++-
1 file changed, 13 insertions(+), 1 deletion(-)
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index 6cac2d0cca2cb..a86e204cae974 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -3591,6 +3591,18 @@ void BinaryFunction::fixBranches() {
auto &MIB = BC.MIB;
MCContext *Ctx = BC.Ctx.get();
+ // Caches `FunctionLayout::nextBasicBlock(IgnoreSplits = false)`.
+ // nextBasicBlock uses linear search to find the next block, so the loop
+ // below becomes O(n^2). This avoids that.
+ DenseMap<BinaryBasicBlock *, BinaryBasicBlock *> nextBasicBlock(
+ Layout.block_size());
+ for (size_t i = 0; i + 1 < Layout.block_size(); i++) {
+ auto current = Layout.block_begin() + i;
+ auto next = Layout.block_begin() + i + 1;
+ if (next != Layout.getFragment((*current)->getFragmentNum()).end())
+ nextBasicBlock.insert(std::pair(*current, *next));
+ }
+
for (BinaryBasicBlock *BB : BasicBlocks) {
const MCSymbol *TBB = nullptr;
const MCSymbol *FBB = nullptr;
@@ -3605,7 +3617,7 @@ void BinaryFunction::fixBranches() {
// Basic block that follows the current one in the final layout.
const BinaryBasicBlock *const NextBB =
- Layout.getBasicBlockAfter(BB, /*IgnoreSplits=*/false);
+ nextBasicBlock.lookup_or(BB, nullptr);
if (BB->succ_size() == 1) {
// __builtin_unreachable() could create a conditional branch that
>From db72cc241e075fac0f201f24f36627950434e1f8 Mon Sep 17 00:00:00 2001
From: Mark Rousskov <mark.simulacrum at gmail.com>
Date: Sun, 21 Sep 2025 12:32:56 -0400
Subject: [PATCH 2/2] [BOLT] Split getBasicBlocksAfter cache into a distinct
function
This enables future re-use in other code that calls getBasicBlockAfter
in loops, though for now those uses aren't introduced.
---
bolt/include/bolt/Core/FunctionLayout.h | 9 +++++++++
bolt/lib/Core/BinaryFunction.cpp | 14 ++------------
bolt/lib/Core/FunctionLayout.cpp | 16 ++++++++++++++++
3 files changed, 27 insertions(+), 12 deletions(-)
diff --git a/bolt/include/bolt/Core/FunctionLayout.h b/bolt/include/bolt/Core/FunctionLayout.h
index ee4dd689b8dd6..f600d8ab0dabf 100644
--- a/bolt/include/bolt/Core/FunctionLayout.h
+++ b/bolt/include/bolt/Core/FunctionLayout.h
@@ -243,9 +243,18 @@ class FunctionLayout {
/// Returns the basic block after the given basic block in the layout or
/// nullptr if the last basic block is given.
+ ///
+ /// Note that this performs a linear search for BB.
const BinaryBasicBlock *getBasicBlockAfter(const BinaryBasicBlock *BB,
bool IgnoreSplits = true) const;
+ /// Returns a mapping from BB -> getBasicBlockAfter(BB).
+ ///
+ /// This should be preferred in loops that call getBasicBlockAfter without
+ /// changes to the function layout. Caching the results avoid n^2 lookup cost.
+ DenseMap<BinaryBasicBlock *, BinaryBasicBlock *>
+ getBasicBlocksAfter(bool IgnoreSplits = true) const;
+
/// True if the layout contains at least two non-empty fragments.
bool isSplit() const;
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index a86e204cae974..35c00a8012426 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -3591,17 +3591,7 @@ void BinaryFunction::fixBranches() {
auto &MIB = BC.MIB;
MCContext *Ctx = BC.Ctx.get();
- // Caches `FunctionLayout::nextBasicBlock(IgnoreSplits = false)`.
- // nextBasicBlock uses linear search to find the next block, so the loop
- // below becomes O(n^2). This avoids that.
- DenseMap<BinaryBasicBlock *, BinaryBasicBlock *> nextBasicBlock(
- Layout.block_size());
- for (size_t i = 0; i + 1 < Layout.block_size(); i++) {
- auto current = Layout.block_begin() + i;
- auto next = Layout.block_begin() + i + 1;
- if (next != Layout.getFragment((*current)->getFragmentNum()).end())
- nextBasicBlock.insert(std::pair(*current, *next));
- }
+ auto NextBasicBlock = Layout.getBasicBlocksAfter(/* IgnoreSplits */ false);
for (BinaryBasicBlock *BB : BasicBlocks) {
const MCSymbol *TBB = nullptr;
@@ -3617,7 +3607,7 @@ void BinaryFunction::fixBranches() {
// Basic block that follows the current one in the final layout.
const BinaryBasicBlock *const NextBB =
- nextBasicBlock.lookup_or(BB, nullptr);
+ NextBasicBlock.lookup_or(BB, nullptr);
if (BB->succ_size() == 1) {
// __builtin_unreachable() could create a conditional branch that
diff --git a/bolt/lib/Core/FunctionLayout.cpp b/bolt/lib/Core/FunctionLayout.cpp
index 4498fc44da954..4f8d75585b4e4 100644
--- a/bolt/lib/Core/FunctionLayout.cpp
+++ b/bolt/lib/Core/FunctionLayout.cpp
@@ -241,6 +241,22 @@ FunctionLayout::getBasicBlockAfter(const BinaryBasicBlock *BB,
return *BlockAfter;
}
+DenseMap<BinaryBasicBlock *, BinaryBasicBlock *>
+FunctionLayout::getBasicBlocksAfter(bool IgnoreSplits) const {
+ DenseMap<BinaryBasicBlock *, BinaryBasicBlock *> NextBasicBlock(block_size());
+ for (size_t i = 0; i + 1 < block_size(); i++) {
+ auto Current = block_begin() + i;
+ auto Next = block_begin() + i + 1;
+
+ if (IgnoreSplits) {
+ NextBasicBlock.insert(std::pair(*Current, *Next));
+ } else if (Next != getFragment((*Current)->getFragmentNum()).end()) {
+ NextBasicBlock.insert(std::pair(*Current, *Next));
+ }
+ }
+ return NextBasicBlock;
+}
+
bool FunctionLayout::isSplit() const {
const unsigned NonEmptyFragCount = llvm::count_if(
fragments(), [](const FunctionFragment &FF) { return !FF.empty(); });
More information about the llvm-commits
mailing list