[lld] [lld-macho] Fix thunk insertion for cross-section branches in multi-section segments (PR #175704)

via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 12 20:25:21 PST 2026


https://github.com/alx32 updated https://github.com/llvm/llvm-project/pull/175704

>From 4c08ca99014455010f3b1b882f93364f660c109b Mon Sep 17 00:00:00 2001
From: alexborcan <alexborcan at meta.com>
Date: Mon, 12 Jan 2026 19:05:42 -0800
Subject: [PATCH 1/2] [lld-macho] Fix thunks when having multiple .text
 sections

---
 lld/MachO/ConcatOutputSection.cpp          | 69 ++++++++++++----
 lld/test/MachO/arm64-thunks-crosssection.s | 96 ++++++++++++++++++++++
 2 files changed, 151 insertions(+), 14 deletions(-)
 create mode 100644 lld/test/MachO/arm64-thunks-crosssection.s

diff --git a/lld/MachO/ConcatOutputSection.cpp b/lld/MachO/ConcatOutputSection.cpp
index e559676ef5e9b..78f8c3ceca450 100644
--- a/lld/MachO/ConcatOutputSection.cpp
+++ b/lld/MachO/ConcatOutputSection.cpp
@@ -9,6 +9,7 @@
 #include "ConcatOutputSection.h"
 #include "Config.h"
 #include "OutputSegment.h"
+#include "Sections.h"
 #include "SymbolTable.h"
 #include "Symbols.h"
 #include "SyntheticSections.h"
@@ -122,25 +123,65 @@ DenseMap<Symbol *, ThunkInfo> lld::macho::thunkMap;
 bool TextOutputSection::needsThunks() const {
   if (!target->usesThunks())
     return false;
-  uint64_t isecAddr = addr;
-  for (ConcatInputSection *isec : inputs)
-    isecAddr = alignToPowerOf2(isecAddr, isec->align) + isec->getSize();
-  // Other sections besides __text might be small enough to pass this
-  // test but nevertheless need thunks for calling into other sections.
-  // An imperfect heuristic to use in this case is that if a section
-  // we've already processed in this segment needs thunks, so do the
-  // rest.
+
+  // Other sections besides __text might be small enough to pass a per-section
+  // range test but nevertheless need thunks for calling into other code
+  // sections in the same segment. We track this at the segment level: once any
+  // code section in a segment needs thunks, all subsequent code sections do
+  // too.
   bool needsThunks = parent && parent->needsThunks;
 
-  // Calculate the total size of all branch target sections
-  uint64_t branchTargetsSize = in.stubs->getSize();
+  auto estimateTextEndVA = [](const TextOutputSection *osec, uint64_t startVA) {
+    uint64_t endVA = startVA;
+    for (ConcatInputSection *isec : osec->inputs)
+      endVA = alignToPowerOf2(endVA, isec->align) + isec->getSize();
+    return endVA;
+  };
+
+  // Compute the end address of the last section in this segment that can be a
+  // target of a BRANCH relocation. If the distance from the start of this text
+  // section to that end address fits in the branch range, then all branch
+  // relocations originating from this section are guaranteed to be in-range.
+  uint64_t curVA = estimateTextEndVA(this, addr);
+  uint64_t lastBranchTargetEndVA =
+      (sections::isCodeSection(name, segment_names::text, flags) ||
+       name == section_names::stubs || name == section_names::objcStubs)
+          ? curVA
+          : addr;
 
-  // Add the size of __objc_stubs section if it exists
-  if (in.objcStubs && in.objcStubs->isNeeded())
-    branchTargetsSize += in.objcStubs->getSize();
+  if (parent) {
+    bool foundThis = false;
+    for (OutputSection *osec : parent->getSections()) {
+      if (!osec->isNeeded())
+        continue;
+      if (!foundThis) {
+        if (osec != this)
+          continue;
+        foundThis = true;
+        continue;
+      }
+
+      curVA = alignToPowerOf2(curVA, osec->align);
+
+      uint64_t endVA;
+      if (auto *textOsec = dyn_cast<TextOutputSection>(osec)) {
+        endVA = estimateTextEndVA(textOsec, curVA);
+      } else {
+        endVA = curVA + osec->getSize();
+      }
+
+      if (sections::isCodeSection(osec->name, segment_names::text,
+                                  osec->flags) ||
+          osec->name == section_names::stubs ||
+          osec->name == section_names::objcStubs)
+        lastBranchTargetEndVA = endVA;
+
+      curVA = endVA;
+    }
+  }
 
   if (!needsThunks &&
-      isecAddr - addr + branchTargetsSize <=
+      lastBranchTargetEndVA - addr <=
           std::min(target->backwardBranchRange, target->forwardBranchRange))
     return false;
   // Yes, this program is large enough to need thunks.
diff --git a/lld/test/MachO/arm64-thunks-crosssection.s b/lld/test/MachO/arm64-thunks-crosssection.s
new file mode 100644
index 0000000000000..e1ac3ef418ac5
--- /dev/null
+++ b/lld/test/MachO/arm64-thunks-crosssection.s
@@ -0,0 +1,96 @@
+# REQUIRES: aarch64
+
+## This test verifies that thunks are created for branches between multiple
+## code sections in the same segment when the combined span exceeds the
+## branch range, even if each individual section is within range.
+##
+## The bug occurs when:
+## (1) Section __text is within branch range (e.g., 64 MB < 128 MB)
+## (2) Section __text_second is also within range (e.g., 64 MB < 128 MB)
+## (3) Combined span exceeds branch range (128+ MB total)
+## (4) Calls from early in __text to __text_second need thunks
+##
+## Without the fix, needsThunks() only considered individual section sizes,
+## not the total span, causing BRANCH26 out of range errors.
+
+# RUN: rm -rf %t; mkdir %t
+# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t/input.o
+# RUN: %lld -arch arm64 -lSystem -o %t/out %t/input.o
+# RUN: llvm-objdump --no-print-imm-hex -d --no-show-raw-insn %t/out | FileCheck %s
+
+# CHECK: Disassembly of section __TEXT,__text:
+
+## _early_func is at the start of __text, it calls _far_func which is in
+## __text_second section. This branch crosses sections and needs a thunk
+## because the total span exceeds branch range.
+# CHECK-LABEL: <_early_func>:
+# CHECK:         bl {{.*}} <_far_func.thunk.0>
+# CHECK:         bl {{.*}} <_helper>
+# CHECK:         ret
+
+# CHECK-LABEL: <_helper>:
+# CHECK:         ret
+
+## After padding, there's another function that also needs a thunk
+
+## Verify thunk is created - it appears before _mid_func in output
+# CHECK-LABEL: <_far_func.thunk.0>:
+# CHECK:         adrp x16
+# CHECK:         add  x16, x16
+# CHECK:         br   x16
+
+# CHECK-LABEL: <_mid_func>:
+# CHECK:         bl {{.*}} <_far_func.thunk.0>
+# CHECK:         ret
+
+# CHECK: Disassembly of section __TEXT,__text_second:
+
+# CHECK-LABEL: <_far_func>:
+# CHECK:         ret
+
+
+.text
+.globl _main
+.p2align 2
+_main:
+  bl _early_func
+  ret
+
+.globl _early_func
+.p2align 2
+_early_func:
+  ## This call to _far_func crosses sections and exceeds branch range
+  bl _far_func
+  bl _helper
+  ret
+
+.globl _helper
+.p2align 2
+_helper:
+  ret
+
+## Pad __text section to ~64 MB
+## 0x4000000 = 64 Mi = half the branch range
+.space 0x4000000-0x20
+
+.globl _mid_func
+.p2align 2
+_mid_func:
+  bl _far_func
+  ret
+
+## More padding to push __text to ~128 MB
+.space 0x4000000-0x10
+
+## This is a second code section in __TEXT segment
+.section __TEXT,__text_second,regular,pure_instructions
+
+.globl _far_func
+.p2align 2
+_far_func:
+  ret
+
+## Add padding in second section to ensure total span exceeds branch range
+.space 0x100000
+
+.subsections_via_symbols

>From 53d44ec5fa7284cc8aa974b95aee6063134daca0 Mon Sep 17 00:00:00 2001
From: alexborcan <alexborcan at meta.com>
Date: Mon, 12 Jan 2026 19:33:18 -0800
Subject: [PATCH 2/2] [lld-macho] Refactor TextOutputSection::needsThunks for
 better readability

---
 lld/MachO/ConcatOutputSection.cpp | 162 ++++++++++++++++--------------
 lld/MachO/ConcatOutputSection.h   |  11 ++
 2 files changed, 98 insertions(+), 75 deletions(-)

diff --git a/lld/MachO/ConcatOutputSection.cpp b/lld/MachO/ConcatOutputSection.cpp
index 78f8c3ceca450..cedd2b4a201fe 100644
--- a/lld/MachO/ConcatOutputSection.cpp
+++ b/lld/MachO/ConcatOutputSection.cpp
@@ -115,96 +115,108 @@ void ConcatOutputSection::addInput(ConcatInputSection *input) {
 
 DenseMap<Symbol *, ThunkInfo> lld::macho::thunkMap;
 
-// Determine whether we need thunks, which depends on the target arch -- RISC
-// (i.e., ARM) generally does because it has limited-range branch/call
-// instructions, whereas CISC (i.e., x86) generally doesn't. RISC only needs
-// thunks for programs so large that branch source & destination addresses
-// might differ more than the range of branch instruction(s).
-bool TextOutputSection::needsThunks() const {
-  if (!target->usesThunks())
-    return false;
+namespace {
+
+// Returns true if `osec` can be the target of a BRANCH relocation.
+// Branch targets include code sections and stub sections for dynamic calls.
+bool isBranchTargetSection(const OutputSection *osec) {
+  return sections::isCodeSection(osec->name, segment_names::text,
+                                 osec->flags) ||
+         osec->name == section_names::stubs ||
+         osec->name == section_names::objcStubs;
+}
 
-  // Other sections besides __text might be small enough to pass a per-section
-  // range test but nevertheless need thunks for calling into other code
-  // sections in the same segment. We track this at the segment level: once any
-  // code section in a segment needs thunks, all subsequent code sections do
-  // too.
-  bool needsThunks = parent && parent->needsThunks;
-
-  auto estimateTextEndVA = [](const TextOutputSection *osec, uint64_t startVA) {
-    uint64_t endVA = startVA;
-    for (ConcatInputSection *isec : osec->inputs)
-      endVA = alignToPowerOf2(endVA, isec->align) + isec->getSize();
-    return endVA;
-  };
-
-  // Compute the end address of the last section in this segment that can be a
-  // target of a BRANCH relocation. If the distance from the start of this text
-  // section to that end address fits in the branch range, then all branch
-  // relocations originating from this section are guaranteed to be in-range.
-  uint64_t curVA = estimateTextEndVA(this, addr);
-  uint64_t lastBranchTargetEndVA =
-      (sections::isCodeSection(name, segment_names::text, flags) ||
-       name == section_names::stubs || name == section_names::objcStubs)
-          ? curVA
-          : addr;
-
-  if (parent) {
-    bool foundThis = false;
-    for (OutputSection *osec : parent->getSections()) {
-      if (!osec->isNeeded())
-        continue;
-      if (!foundThis) {
-        if (osec != this)
-          continue;
-        foundThis = true;
-        continue;
-      }
+} // namespace
 
-      curVA = alignToPowerOf2(curVA, osec->align);
+uint64_t TextOutputSection::estimateEndVA(uint64_t startVA) const {
+  uint64_t endVA = startVA;
+  for (ConcatInputSection *isec : inputs)
+    endVA = alignToPowerOf2(endVA, isec->align) + isec->getSize();
+  return endVA;
+}
 
-      uint64_t endVA;
-      if (auto *textOsec = dyn_cast<TextOutputSection>(osec)) {
-        endVA = estimateTextEndVA(textOsec, curVA);
-      } else {
-        endVA = curVA + osec->getSize();
-      }
+uint64_t TextOutputSection::estimateFurthestBranchTargetEndVA() const {
+  uint64_t curVA = estimateEndVA(addr);
+  uint64_t furthestTargetEndVA = isBranchTargetSection(this) ? curVA : addr;
 
-      if (sections::isCodeSection(osec->name, segment_names::text,
-                                  osec->flags) ||
-          osec->name == section_names::stubs ||
-          osec->name == section_names::objcStubs)
-        lastBranchTargetEndVA = endVA;
+  if (!parent)
+    return furthestTargetEndVA;
 
-      curVA = endVA;
-    }
-  }
+  // Find this section in the segment's section list.
+  const std::vector<OutputSection *> &sections = parent->getSections();
+  auto it = llvm::find(sections, this);
+  assert(it != sections.end() && "section not found in parent segment");
 
-  if (!needsThunks &&
-      lastBranchTargetEndVA - addr <=
-          std::min(target->backwardBranchRange, target->forwardBranchRange))
-    return false;
-  // Yes, this program is large enough to need thunks.
-  if (parent) {
-    parent->needsThunks = true;
+  // Walk sections after this one, simulating layout (alignment + size).
+  // Track the end VA of the furthest branch target section.
+  for (++it; it != sections.end(); ++it) {
+    OutputSection *osec = *it;
+    if (!osec->isNeeded())
+      continue;
+
+    curVA = alignToPowerOf2(curVA, osec->align);
+    uint64_t endVA;
+    if (auto *textOsec = dyn_cast<TextOutputSection>(osec))
+      endVA = textOsec->estimateEndVA(curVA);
+    else
+      endVA = curVA + osec->getSize();
+
+    if (isBranchTargetSection(osec))
+      furthestTargetEndVA = endVA;
+
+    curVA = endVA;
   }
+
+  return furthestTargetEndVA;
+}
+
+void TextOutputSection::recordCallSites() const {
   for (ConcatInputSection *isec : inputs) {
     for (Reloc &r : isec->relocs) {
       if (!target->hasAttr(r.type, RelocAttrBits::BRANCH))
         continue;
       auto *sym = cast<Symbol *>(r.referent);
-      // Pre-populate the thunkMap and memoize call site counts for every
-      // InputSection and ThunkInfo. We do this for the benefit of
-      // estimateBranchTargetThresholdVA().
-      ThunkInfo &thunkInfo = thunkMap[sym];
-      // Knowing ThunkInfo call site count will help us know whether or not we
-      // might need to create more for this referent at the time we are
-      // estimating distance to __stubs in estimateBranchTargetThresholdVA().
-      ++thunkInfo.callSiteCount;
-      // We can avoid work on InputSections that have no BRANCH relocs.
+      ++thunkMap[sym].callSiteCount;
       isec->hasCallSites = true;
     }
   }
+}
+
+// Determine whether we need thunks, which depends on the target arch -- RISC
+// (i.e., ARM) generally does because it has limited-range branch/call
+// instructions, whereas CISC (i.e., x86) generally doesn't. RISC only needs
+// thunks for programs so large that branch source & destination addresses
+// might differ more than the range of branch instruction(s).
+bool TextOutputSection::needsThunks() const {
+  if (!target->usesThunks())
+    return false;
+
+  // If an earlier section in this segment needed thunks, conservatively assume
+  // this one does too. This handles backward branches without computing the
+  // distance to the earliest branch target. In theory, a middle section might
+  // not need thunks if all its branches (forward and backward) are in range,
+  // but this heuristic is simpler and the extra thunk processing is low cost.
+  if (parent && parent->needsThunks) {
+    recordCallSites();
+    return true;
+  }
+
+  // Check if any forward branch from this section could be out of range.
+  // We compute the distance from the start of this section to the end of the
+  // furthest possible branch target in the segment.
+  uint64_t branchRange =
+      std::min(target->backwardBranchRange, target->forwardBranchRange);
+  uint64_t maxForwardDistance = estimateFurthestBranchTargetEndVA() - addr;
+
+  if (maxForwardDistance <= branchRange)
+    return false;
+
+  // This section needs thunks. Mark the segment so subsequent sections
+  // conservatively enable thunk processing too.
+  if (parent)
+    parent->needsThunks = true;
+
+  recordCallSites();
   return true;
 }
 
diff --git a/lld/MachO/ConcatOutputSection.h b/lld/MachO/ConcatOutputSection.h
index 1c68018f97552..bf2b63273f6ee 100644
--- a/lld/MachO/ConcatOutputSection.h
+++ b/lld/MachO/ConcatOutputSection.h
@@ -80,6 +80,17 @@ class TextOutputSection : public ConcatOutputSection {
   }
 
 private:
+  // Estimate the end VA of this section if it were to start at `startVA`.
+  uint64_t estimateEndVA(uint64_t startVA) const;
+
+  // Compute the end VA of the furthest section in this segment that can be
+  // the target of a branch relocation.
+  uint64_t estimateFurthestBranchTargetEndVA() const;
+
+  // Pre-populate thunkMap with call site counts for
+  // estimateBranchTargetThresholdVA().
+  void recordCallSites() const;
+
   uint64_t estimateBranchTargetThresholdVA(size_t callIdx) const;
 
   std::vector<ConcatInputSection *> thunks;



More information about the llvm-commits mailing list