[lld] [lld-macho] Fix thunk insertion for cross-section branches in multi-section segments (PR #175704)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 12 20:25:21 PST 2026
https://github.com/alx32 updated https://github.com/llvm/llvm-project/pull/175704
>From 4c08ca99014455010f3b1b882f93364f660c109b Mon Sep 17 00:00:00 2001
From: alexborcan <alexborcan at meta.com>
Date: Mon, 12 Jan 2026 19:05:42 -0800
Subject: [PATCH 1/2] [lld-macho] Fix thunks when having multiple .text
sections
---
lld/MachO/ConcatOutputSection.cpp | 69 ++++++++++++----
lld/test/MachO/arm64-thunks-crosssection.s | 96 ++++++++++++++++++++++
2 files changed, 151 insertions(+), 14 deletions(-)
create mode 100644 lld/test/MachO/arm64-thunks-crosssection.s
diff --git a/lld/MachO/ConcatOutputSection.cpp b/lld/MachO/ConcatOutputSection.cpp
index e559676ef5e9b..78f8c3ceca450 100644
--- a/lld/MachO/ConcatOutputSection.cpp
+++ b/lld/MachO/ConcatOutputSection.cpp
@@ -9,6 +9,7 @@
#include "ConcatOutputSection.h"
#include "Config.h"
#include "OutputSegment.h"
+#include "Sections.h"
#include "SymbolTable.h"
#include "Symbols.h"
#include "SyntheticSections.h"
@@ -122,25 +123,65 @@ DenseMap<Symbol *, ThunkInfo> lld::macho::thunkMap;
bool TextOutputSection::needsThunks() const {
if (!target->usesThunks())
return false;
- uint64_t isecAddr = addr;
- for (ConcatInputSection *isec : inputs)
- isecAddr = alignToPowerOf2(isecAddr, isec->align) + isec->getSize();
- // Other sections besides __text might be small enough to pass this
- // test but nevertheless need thunks for calling into other sections.
- // An imperfect heuristic to use in this case is that if a section
- // we've already processed in this segment needs thunks, so do the
- // rest.
+
+ // Other sections besides __text might be small enough to pass a per-section
+ // range test but nevertheless need thunks for calling into other code
+ // sections in the same segment. We track this at the segment level: once any
+ // code section in a segment needs thunks, all subsequent code sections do
+ // too.
bool needsThunks = parent && parent->needsThunks;
- // Calculate the total size of all branch target sections
- uint64_t branchTargetsSize = in.stubs->getSize();
+ auto estimateTextEndVA = [](const TextOutputSection *osec, uint64_t startVA) {
+ uint64_t endVA = startVA;
+ for (ConcatInputSection *isec : osec->inputs)
+ endVA = alignToPowerOf2(endVA, isec->align) + isec->getSize();
+ return endVA;
+ };
+
+ // Compute the end address of the last section in this segment that can be a
+ // target of a BRANCH relocation. If the distance from the start of this text
+ // section to that end address fits in the branch range, then all branch
+ // relocations originating from this section are guaranteed to be in-range.
+ uint64_t curVA = estimateTextEndVA(this, addr);
+ uint64_t lastBranchTargetEndVA =
+ (sections::isCodeSection(name, segment_names::text, flags) ||
+ name == section_names::stubs || name == section_names::objcStubs)
+ ? curVA
+ : addr;
- // Add the size of __objc_stubs section if it exists
- if (in.objcStubs && in.objcStubs->isNeeded())
- branchTargetsSize += in.objcStubs->getSize();
+ if (parent) {
+ bool foundThis = false;
+ for (OutputSection *osec : parent->getSections()) {
+ if (!osec->isNeeded())
+ continue;
+ if (!foundThis) {
+ if (osec != this)
+ continue;
+ foundThis = true;
+ continue;
+ }
+
+ curVA = alignToPowerOf2(curVA, osec->align);
+
+ uint64_t endVA;
+ if (auto *textOsec = dyn_cast<TextOutputSection>(osec)) {
+ endVA = estimateTextEndVA(textOsec, curVA);
+ } else {
+ endVA = curVA + osec->getSize();
+ }
+
+ if (sections::isCodeSection(osec->name, segment_names::text,
+ osec->flags) ||
+ osec->name == section_names::stubs ||
+ osec->name == section_names::objcStubs)
+ lastBranchTargetEndVA = endVA;
+
+ curVA = endVA;
+ }
+ }
if (!needsThunks &&
- isecAddr - addr + branchTargetsSize <=
+ lastBranchTargetEndVA - addr <=
std::min(target->backwardBranchRange, target->forwardBranchRange))
return false;
// Yes, this program is large enough to need thunks.
diff --git a/lld/test/MachO/arm64-thunks-crosssection.s b/lld/test/MachO/arm64-thunks-crosssection.s
new file mode 100644
index 0000000000000..e1ac3ef418ac5
--- /dev/null
+++ b/lld/test/MachO/arm64-thunks-crosssection.s
@@ -0,0 +1,96 @@
+# REQUIRES: aarch64
+
+## This test verifies that thunks are created for branches between multiple
+## code sections in the same segment when the combined span exceeds the
+## branch range, even if each individual section is within range.
+##
+## The bug occurs when:
+## (1) Section __text is within branch range (e.g., 64 MB < 128 MB)
+## (2) Section __text_second is also within range (e.g., 64 MB < 128 MB)
+## (3) Combined span exceeds branch range (128+ MB total)
+## (4) Calls from early in __text to __text_second need thunks
+##
+## Without the fix, needsThunks() only considered individual section sizes,
+## not the total span, causing BRANCH26 out of range errors.
+
+# RUN: rm -rf %t; mkdir %t
+# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t/input.o
+# RUN: %lld -arch arm64 -lSystem -o %t/out %t/input.o
+# RUN: llvm-objdump --no-print-imm-hex -d --no-show-raw-insn %t/out | FileCheck %s
+
+# CHECK: Disassembly of section __TEXT,__text:
+
+## _early_func is at the start of __text, it calls _far_func which is in
+## __text_second section. This branch crosses sections and needs a thunk
+## because the total span exceeds branch range.
+# CHECK-LABEL: <_early_func>:
+# CHECK: bl {{.*}} <_far_func.thunk.0>
+# CHECK: bl {{.*}} <_helper>
+# CHECK: ret
+
+# CHECK-LABEL: <_helper>:
+# CHECK: ret
+
+## After padding, there's another function that also needs a thunk
+
+## Verify thunk is created - it appears before _mid_func in output
+# CHECK-LABEL: <_far_func.thunk.0>:
+# CHECK: adrp x16
+# CHECK: add x16, x16
+# CHECK: br x16
+
+# CHECK-LABEL: <_mid_func>:
+# CHECK: bl {{.*}} <_far_func.thunk.0>
+# CHECK: ret
+
+# CHECK: Disassembly of section __TEXT,__text_second:
+
+# CHECK-LABEL: <_far_func>:
+# CHECK: ret
+
+
+.text
+.globl _main
+.p2align 2
+_main:
+ bl _early_func
+ ret
+
+.globl _early_func
+.p2align 2
+_early_func:
+ ## This call to _far_func crosses sections and exceeds branch range
+ bl _far_func
+ bl _helper
+ ret
+
+.globl _helper
+.p2align 2
+_helper:
+ ret
+
+## Pad __text section to ~64 MB
+## 0x4000000 = 64 Mi = half the branch range
+.space 0x4000000-0x20
+
+.globl _mid_func
+.p2align 2
+_mid_func:
+ bl _far_func
+ ret
+
+## More padding to push __text to ~128 MB
+.space 0x4000000-0x10
+
+## This is a second code section in __TEXT segment
+.section __TEXT,__text_second,regular,pure_instructions
+
+.globl _far_func
+.p2align 2
+_far_func:
+ ret
+
+## Add padding in second section to ensure total span exceeds branch range
+.space 0x100000
+
+.subsections_via_symbols
>From 53d44ec5fa7284cc8aa974b95aee6063134daca0 Mon Sep 17 00:00:00 2001
From: alexborcan <alexborcan at meta.com>
Date: Mon, 12 Jan 2026 19:33:18 -0800
Subject: [PATCH 2/2] [lld-macho] Refactor TextOutputSection::needsThunks for
better readability
---
lld/MachO/ConcatOutputSection.cpp | 162 ++++++++++++++++--------------
lld/MachO/ConcatOutputSection.h | 11 ++
2 files changed, 98 insertions(+), 75 deletions(-)
diff --git a/lld/MachO/ConcatOutputSection.cpp b/lld/MachO/ConcatOutputSection.cpp
index 78f8c3ceca450..cedd2b4a201fe 100644
--- a/lld/MachO/ConcatOutputSection.cpp
+++ b/lld/MachO/ConcatOutputSection.cpp
@@ -115,96 +115,108 @@ void ConcatOutputSection::addInput(ConcatInputSection *input) {
DenseMap<Symbol *, ThunkInfo> lld::macho::thunkMap;
-// Determine whether we need thunks, which depends on the target arch -- RISC
-// (i.e., ARM) generally does because it has limited-range branch/call
-// instructions, whereas CISC (i.e., x86) generally doesn't. RISC only needs
-// thunks for programs so large that branch source & destination addresses
-// might differ more than the range of branch instruction(s).
-bool TextOutputSection::needsThunks() const {
- if (!target->usesThunks())
- return false;
+namespace {
+
+// Returns true if `osec` can be the target of a BRANCH relocation.
+// Branch targets include code sections and stub sections for dynamic calls.
+bool isBranchTargetSection(const OutputSection *osec) {
+ return sections::isCodeSection(osec->name, segment_names::text,
+ osec->flags) ||
+ osec->name == section_names::stubs ||
+ osec->name == section_names::objcStubs;
+}
- // Other sections besides __text might be small enough to pass a per-section
- // range test but nevertheless need thunks for calling into other code
- // sections in the same segment. We track this at the segment level: once any
- // code section in a segment needs thunks, all subsequent code sections do
- // too.
- bool needsThunks = parent && parent->needsThunks;
-
- auto estimateTextEndVA = [](const TextOutputSection *osec, uint64_t startVA) {
- uint64_t endVA = startVA;
- for (ConcatInputSection *isec : osec->inputs)
- endVA = alignToPowerOf2(endVA, isec->align) + isec->getSize();
- return endVA;
- };
-
- // Compute the end address of the last section in this segment that can be a
- // target of a BRANCH relocation. If the distance from the start of this text
- // section to that end address fits in the branch range, then all branch
- // relocations originating from this section are guaranteed to be in-range.
- uint64_t curVA = estimateTextEndVA(this, addr);
- uint64_t lastBranchTargetEndVA =
- (sections::isCodeSection(name, segment_names::text, flags) ||
- name == section_names::stubs || name == section_names::objcStubs)
- ? curVA
- : addr;
-
- if (parent) {
- bool foundThis = false;
- for (OutputSection *osec : parent->getSections()) {
- if (!osec->isNeeded())
- continue;
- if (!foundThis) {
- if (osec != this)
- continue;
- foundThis = true;
- continue;
- }
+} // namespace
- curVA = alignToPowerOf2(curVA, osec->align);
+uint64_t TextOutputSection::estimateEndVA(uint64_t startVA) const {
+ uint64_t endVA = startVA;
+ for (ConcatInputSection *isec : inputs)
+ endVA = alignToPowerOf2(endVA, isec->align) + isec->getSize();
+ return endVA;
+}
- uint64_t endVA;
- if (auto *textOsec = dyn_cast<TextOutputSection>(osec)) {
- endVA = estimateTextEndVA(textOsec, curVA);
- } else {
- endVA = curVA + osec->getSize();
- }
+uint64_t TextOutputSection::estimateFurthestBranchTargetEndVA() const {
+ uint64_t curVA = estimateEndVA(addr);
+ uint64_t furthestTargetEndVA = isBranchTargetSection(this) ? curVA : addr;
- if (sections::isCodeSection(osec->name, segment_names::text,
- osec->flags) ||
- osec->name == section_names::stubs ||
- osec->name == section_names::objcStubs)
- lastBranchTargetEndVA = endVA;
+ if (!parent)
+ return furthestTargetEndVA;
- curVA = endVA;
- }
- }
+ // Find this section in the segment's section list.
+ const std::vector<OutputSection *> §ions = parent->getSections();
+ auto it = llvm::find(sections, this);
+ assert(it != sections.end() && "section not found in parent segment");
- if (!needsThunks &&
- lastBranchTargetEndVA - addr <=
- std::min(target->backwardBranchRange, target->forwardBranchRange))
- return false;
- // Yes, this program is large enough to need thunks.
- if (parent) {
- parent->needsThunks = true;
+ // Walk sections after this one, simulating layout (alignment + size).
+ // Track the end VA of the furthest branch target section.
+ for (++it; it != sections.end(); ++it) {
+ OutputSection *osec = *it;
+ if (!osec->isNeeded())
+ continue;
+
+ curVA = alignToPowerOf2(curVA, osec->align);
+ uint64_t endVA;
+ if (auto *textOsec = dyn_cast<TextOutputSection>(osec))
+ endVA = textOsec->estimateEndVA(curVA);
+ else
+ endVA = curVA + osec->getSize();
+
+ if (isBranchTargetSection(osec))
+ furthestTargetEndVA = endVA;
+
+ curVA = endVA;
}
+
+ return furthestTargetEndVA;
+}
+
+void TextOutputSection::recordCallSites() const {
for (ConcatInputSection *isec : inputs) {
for (Reloc &r : isec->relocs) {
if (!target->hasAttr(r.type, RelocAttrBits::BRANCH))
continue;
auto *sym = cast<Symbol *>(r.referent);
- // Pre-populate the thunkMap and memoize call site counts for every
- // InputSection and ThunkInfo. We do this for the benefit of
- // estimateBranchTargetThresholdVA().
- ThunkInfo &thunkInfo = thunkMap[sym];
- // Knowing ThunkInfo call site count will help us know whether or not we
- // might need to create more for this referent at the time we are
- // estimating distance to __stubs in estimateBranchTargetThresholdVA().
- ++thunkInfo.callSiteCount;
- // We can avoid work on InputSections that have no BRANCH relocs.
+ ++thunkMap[sym].callSiteCount;
isec->hasCallSites = true;
}
}
+}
+
+// Determine whether we need thunks, which depends on the target arch -- RISC
+// (i.e., ARM) generally does because it has limited-range branch/call
+// instructions, whereas CISC (i.e., x86) generally doesn't. RISC only needs
+// thunks for programs so large that branch source & destination addresses
+// might differ more than the range of branch instruction(s).
+bool TextOutputSection::needsThunks() const {
+ if (!target->usesThunks())
+ return false;
+
+ // If an earlier section in this segment needed thunks, conservatively assume
+ // this one does too. This handles backward branches without computing the
+ // distance to the earliest branch target. In theory, a middle section might
+ // not need thunks if all its branches (forward and backward) are in range,
+ // but this heuristic is simpler and the extra thunk processing is low cost.
+ if (parent && parent->needsThunks) {
+ recordCallSites();
+ return true;
+ }
+
+ // Check if any forward branch from this section could be out of range.
+ // We compute the distance from the start of this section to the end of the
+ // furthest possible branch target in the segment.
+ uint64_t branchRange =
+ std::min(target->backwardBranchRange, target->forwardBranchRange);
+ uint64_t maxForwardDistance = estimateFurthestBranchTargetEndVA() - addr;
+
+ if (maxForwardDistance <= branchRange)
+ return false;
+
+ // This section needs thunks. Mark the segment so subsequent sections
+ // conservatively enable thunk processing too.
+ if (parent)
+ parent->needsThunks = true;
+
+ recordCallSites();
return true;
}
diff --git a/lld/MachO/ConcatOutputSection.h b/lld/MachO/ConcatOutputSection.h
index 1c68018f97552..bf2b63273f6ee 100644
--- a/lld/MachO/ConcatOutputSection.h
+++ b/lld/MachO/ConcatOutputSection.h
@@ -80,6 +80,17 @@ class TextOutputSection : public ConcatOutputSection {
}
private:
+ // Estimate the end VA of this section if it were to start at `startVA`.
+ uint64_t estimateEndVA(uint64_t startVA) const;
+
+ // Compute the end VA of the furthest section in this segment that can be
+ // the target of a branch relocation.
+ uint64_t estimateFurthestBranchTargetEndVA() const;
+
+ // Pre-populate thunkMap with call site counts for
+ // estimateBranchTargetThresholdVA().
+ void recordCallSites() const;
+
uint64_t estimateBranchTargetThresholdVA(size_t callIdx) const;
std::vector<ConcatInputSection *> thunks;
More information about the llvm-commits
mailing list