[llvm] [BOLT] Optimize basic block loops to avoid n^2 loop (PR #156243)

Mark Rousskov via llvm-commits llvm-commits at lists.llvm.org
Sun Sep 21 09:34:18 PDT 2025


https://github.com/Mark-Simulacrum updated https://github.com/llvm/llvm-project/pull/156243

>From 480d48e42720915de2f9958b69ad05df7c0bf802 Mon Sep 17 00:00:00 2001
From: Mark Rousskov <mark.simulacrum at gmail.com>
Date: Sun, 31 Aug 2025 09:03:27 -0400
Subject: [PATCH 1/2] [BOLT] Optimize basic block loops to avoid n^2 loop

This improves BOLT runtime when optimizing rustc_driver.so from 15
minutes to 7 minutes (49 minutes to 37 minutes of userspace time).
---
 bolt/lib/Core/BinaryFunction.cpp | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index 6cac2d0cca2cb..a86e204cae974 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -3591,6 +3591,18 @@ void BinaryFunction::fixBranches() {
   auto &MIB = BC.MIB;
   MCContext *Ctx = BC.Ctx.get();
 
+  // Caches `FunctionLayout::nextBasicBlock(IgnoreSplits = false)`.
+  // nextBasicBlock uses linear search to find the next block, so the loop
+  // below becomes O(n^2). This avoids that.
+  DenseMap<BinaryBasicBlock *, BinaryBasicBlock *> nextBasicBlock(
+      Layout.block_size());
+  for (size_t i = 0; i + 1 < Layout.block_size(); i++) {
+    auto current = Layout.block_begin() + i;
+    auto next = Layout.block_begin() + i + 1;
+    if (next != Layout.getFragment((*current)->getFragmentNum()).end())
+      nextBasicBlock.insert(std::pair(*current, *next));
+  }
+
   for (BinaryBasicBlock *BB : BasicBlocks) {
     const MCSymbol *TBB = nullptr;
     const MCSymbol *FBB = nullptr;
@@ -3605,7 +3617,7 @@ void BinaryFunction::fixBranches() {
 
     // Basic block that follows the current one in the final layout.
     const BinaryBasicBlock *const NextBB =
-        Layout.getBasicBlockAfter(BB, /*IgnoreSplits=*/false);
+        nextBasicBlock.lookup_or(BB, nullptr);
 
     if (BB->succ_size() == 1) {
       // __builtin_unreachable() could create a conditional branch that

>From db72cc241e075fac0f201f24f36627950434e1f8 Mon Sep 17 00:00:00 2001
From: Mark Rousskov <mark.simulacrum at gmail.com>
Date: Sun, 21 Sep 2025 12:32:56 -0400
Subject: [PATCH 2/2] [BOLT] Split getBasicBlocksAfter cache into a distinct
 function

This enables future re-use in other code that calls getBasicBlockAfter
in loops, though for now those uses aren't introduced.
---
 bolt/include/bolt/Core/FunctionLayout.h |  9 +++++++++
 bolt/lib/Core/BinaryFunction.cpp        | 14 ++------------
 bolt/lib/Core/FunctionLayout.cpp        | 16 ++++++++++++++++
 3 files changed, 27 insertions(+), 12 deletions(-)

diff --git a/bolt/include/bolt/Core/FunctionLayout.h b/bolt/include/bolt/Core/FunctionLayout.h
index ee4dd689b8dd6..f600d8ab0dabf 100644
--- a/bolt/include/bolt/Core/FunctionLayout.h
+++ b/bolt/include/bolt/Core/FunctionLayout.h
@@ -243,9 +243,18 @@ class FunctionLayout {
 
   /// Returns the basic block after the given basic block in the layout or
   /// nullptr if the last basic block is given.
+  ///
+  /// Note that this performs a linear search for BB.
   const BinaryBasicBlock *getBasicBlockAfter(const BinaryBasicBlock *BB,
                                              bool IgnoreSplits = true) const;
 
+  /// Returns a mapping from BB -> getBasicBlockAfter(BB).
+  ///
+  /// This should be preferred in loops that call getBasicBlockAfter without
+  /// changes to the function layout. Caching the results avoid n^2 lookup cost.
+  DenseMap<BinaryBasicBlock *, BinaryBasicBlock *>
+  getBasicBlocksAfter(bool IgnoreSplits = true) const;
+
   /// True if the layout contains at least two non-empty fragments.
   bool isSplit() const;
 
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index a86e204cae974..35c00a8012426 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -3591,17 +3591,7 @@ void BinaryFunction::fixBranches() {
   auto &MIB = BC.MIB;
   MCContext *Ctx = BC.Ctx.get();
 
-  // Caches `FunctionLayout::nextBasicBlock(IgnoreSplits = false)`.
-  // nextBasicBlock uses linear search to find the next block, so the loop
-  // below becomes O(n^2). This avoids that.
-  DenseMap<BinaryBasicBlock *, BinaryBasicBlock *> nextBasicBlock(
-      Layout.block_size());
-  for (size_t i = 0; i + 1 < Layout.block_size(); i++) {
-    auto current = Layout.block_begin() + i;
-    auto next = Layout.block_begin() + i + 1;
-    if (next != Layout.getFragment((*current)->getFragmentNum()).end())
-      nextBasicBlock.insert(std::pair(*current, *next));
-  }
+  auto NextBasicBlock = Layout.getBasicBlocksAfter(/* IgnoreSplits */ false);
 
   for (BinaryBasicBlock *BB : BasicBlocks) {
     const MCSymbol *TBB = nullptr;
@@ -3617,7 +3607,7 @@ void BinaryFunction::fixBranches() {
 
     // Basic block that follows the current one in the final layout.
     const BinaryBasicBlock *const NextBB =
-        nextBasicBlock.lookup_or(BB, nullptr);
+        NextBasicBlock.lookup_or(BB, nullptr);
 
     if (BB->succ_size() == 1) {
       // __builtin_unreachable() could create a conditional branch that
diff --git a/bolt/lib/Core/FunctionLayout.cpp b/bolt/lib/Core/FunctionLayout.cpp
index 4498fc44da954..4f8d75585b4e4 100644
--- a/bolt/lib/Core/FunctionLayout.cpp
+++ b/bolt/lib/Core/FunctionLayout.cpp
@@ -241,6 +241,22 @@ FunctionLayout::getBasicBlockAfter(const BinaryBasicBlock *BB,
   return *BlockAfter;
 }
 
+DenseMap<BinaryBasicBlock *, BinaryBasicBlock *>
+FunctionLayout::getBasicBlocksAfter(bool IgnoreSplits) const {
+  DenseMap<BinaryBasicBlock *, BinaryBasicBlock *> NextBasicBlock(block_size());
+  for (size_t i = 0; i + 1 < block_size(); i++) {
+    auto Current = block_begin() + i;
+    auto Next = block_begin() + i + 1;
+
+    if (IgnoreSplits) {
+      NextBasicBlock.insert(std::pair(*Current, *Next));
+    } else if (Next != getFragment((*Current)->getFragmentNum()).end()) {
+      NextBasicBlock.insert(std::pair(*Current, *Next));
+    }
+  }
+  return NextBasicBlock;
+}
+
 bool FunctionLayout::isSplit() const {
   const unsigned NonEmptyFragCount = llvm::count_if(
       fragments(), [](const FunctionFragment &FF) { return !FF.empty(); });



More information about the llvm-commits mailing list