[llvm-branch-commits] [lld] [lld][macho] Track max thunks to create and remove --slop_scale (PR #193372)
Ellis Hoag via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Apr 29 16:02:17 PDT 2026
https://github.com/ellishg updated https://github.com/llvm/llvm-project/pull/193372
>From 3a2c2421d5494fc83f28d7079257817f14fcfb7c Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellishoag at meta.com>
Date: Mon, 20 Apr 2026 21:10:43 -0700
Subject: [PATCH 1/4] [lld][macho] Remove --slop_scale flag
---
lld/MachO/ConcatOutputSection.cpp | 44 ++++++++++++++++++++++++-------
lld/MachO/ConcatOutputSection.h | 2 ++
lld/MachO/Config.h | 1 -
lld/MachO/Driver.cpp | 8 ------
lld/MachO/Options.td | 12 +++------
lld/test/MachO/set-slop-scale.s | 11 --------
6 files changed, 40 insertions(+), 38 deletions(-)
delete mode 100644 lld/test/MachO/set-slop-scale.s
diff --git a/lld/MachO/ConcatOutputSection.cpp b/lld/MachO/ConcatOutputSection.cpp
index b312bf403152d..d9ad94b57acc2 100644
--- a/lld/MachO/ConcatOutputSection.cpp
+++ b/lld/MachO/ConcatOutputSection.cpp
@@ -151,13 +151,13 @@ void TextOutputSection::finalize() {
assert(isec.isFinal);
uint64_t highVA = isec.getVA() + r.offset + forwardBranchRange;
if (addr + size > highVA) {
- // There were too many consecutive branch instructions for `slop`
- // below. If you hit this: For the current algorithm, just bumping up
- // slop above and trying again is probably simplest. (See also PR51578
- // comment 5).
- fatal(Twine(__FUNCTION__) +
- ": FIXME: thunk range overrun. Consider increasing the "
- "slop-scale with `--slop-scale=<unsigned_int>`.");
+ // We can only encounter this if we have a massive section (> ~128MB) or
+ // an enormous number of branch instructions within a single section
+ // (> ~16M), neither of which is feasible in practice. To fix we could
+ // implement branch islands when the available space for thunks become too
+ // small.
+ fatal("encountered a branch whose target is out of range, but there is "
+ "no more space for a new thunk");
}
auto *funcSym = cast<Symbol *>(r.referent);
ThunkInfo &thunkInfo = thunkMap[ThunkKey{funcSym, r.addend}];
@@ -200,17 +200,23 @@ void TextOutputSection::finalize() {
branchesToProcess;
SmallVector<std::tuple<ConcatInputSection *, Relocation *, Defined *>>
deferredBranchRedirects;
+ unsigned numPendingThunkTargets = 0;
- const uint64_t slop = config->slopScale * thunkSize;
for (auto *isec : inputs) {
while (!branchesToProcess.empty()) {
auto &[callerIsec, r, thunkKey] = branchesToProcess.front();
assert(callerIsec->isFinal);
+ auto &thunkInfo = thunkMap[thunkKey];
if (isTargetInRange(*callerIsec, *r)) {
+ if (thunkInfo.pendingBranches.erase(r))
+ if (thunkInfo.pendingBranches.empty())
+ --numPendingThunkTargets;
branchesToProcess.pop_front();
continue;
}
if (auto *sym = getThunkInRange(*callerIsec, *r)) {
+ // The pending thunk target was already decremented when we created the
+ // thunk
deferredBranchRedirects.emplace_back(callerIsec, r, sym);
branchesToProcess.pop_front();
continue;
@@ -218,9 +224,11 @@ void TextOutputSection::finalize() {
uint64_t highVA = callerIsec->getVA() + r->offset + forwardBranchRange;
uint64_t nextEnd =
alignToPowerOf2(addr + size, isec->align) + isec->getSize();
- if (nextEnd + slop <= highVA)
+ if (nextEnd + numPendingThunkTargets * thunkSize <= highVA)
break;
+ thunkInfo.pendingBranches.clear();
+ --numPendingThunkTargets;
createThunk(*callerIsec, *r);
branchesToProcess.pop_front();
}
@@ -250,6 +258,10 @@ void TextOutputSection::finalize() {
auto *funcSym = cast<Symbol *>(r.referent);
ThunkKey key{funcSym, r.addend};
branchesToProcess.emplace_back(isec, &r, key);
+ auto &thunkInfo = thunkMap[key];
+ if (thunkInfo.pendingBranches.empty())
+ ++numPendingThunkTargets;
+ thunkInfo.pendingBranches.insert(&r);
}
}
for (auto [callerIsec, r, thunk] : deferredBranchRedirects) {
@@ -263,6 +275,12 @@ void TextOutputSection::finalize() {
llvm::erase_if(branchesToProcess, [&](auto &tuple) {
auto [callerIsec, r, thunkKey] = tuple;
+ auto &thunkInfo = thunkMap[thunkKey];
+ if (isTargetInRange(*callerIsec, *r) || getThunkInRange(*callerIsec, *r)) {
+ if (thunkInfo.pendingBranches.erase(r))
+ if (thunkInfo.pendingBranches.empty())
+ --numPendingThunkTargets;
+ }
return isTargetInRange(*callerIsec, *r);
});
// Count the number of new thunks we will need to create
@@ -270,8 +288,9 @@ void TextOutputSection::finalize() {
for (auto [callerIsec, r, thunkKey] : branchesToProcess)
if (!getThunkInRange(*callerIsec, *r))
branchTargets.insert(thunkKey);
+ assert(numPendingThunkTargets == branchTargets.size());
- uint64_t estimatedTextEnd = addr + size + branchTargets.size() * thunkSize;
+ uint64_t estimatedTextEnd = addr + size + numPendingThunkTargets * thunkSize;
uint64_t estimatedStubsEnd =
alignToPowerOf2(estimatedTextEnd, in.stubs->align) + in.stubs->getSize();
if (in.objcStubs && in.objcStubs->isNeeded())
@@ -280,6 +299,10 @@ void TextOutputSection::finalize() {
in.objcStubs->getSize();
for (auto [isec, r, thunkKey] : branchesToProcess) {
+ auto &thunkInfo = thunkMap[thunkKey];
+ if (thunkInfo.pendingBranches.erase(r))
+ if (thunkInfo.pendingBranches.empty())
+ --numPendingThunkTargets;
uint64_t highVA = isec->getVA() + r->offset + forwardBranchRange;
auto *funcSym = cast<Symbol *>(r->referent);
if ((funcSym->isInStubs() ||
@@ -300,6 +323,7 @@ void TextOutputSection::finalize() {
}
createThunk(*isec, *r);
}
+ assert(numPendingThunkTargets == 0);
if (thunkCount)
log("Created " + Twine(thunkCount) + " (" +
diff --git a/lld/MachO/ConcatOutputSection.h b/lld/MachO/ConcatOutputSection.h
index 154272392928a..90012fed980b3 100644
--- a/lld/MachO/ConcatOutputSection.h
+++ b/lld/MachO/ConcatOutputSection.h
@@ -99,6 +99,8 @@ struct ThunkInfo {
Defined *sym = nullptr; // private-extern symbol for active thunk
ConcatInputSection *isec = nullptr; // input section for active thunk
+ llvm::DenseSet<const Relocation *> pendingBranches;
+
// The following value is cumulative across all thunks on this function
uint8_t sequence = 0; // how many thunks created so-far?
};
diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h
index 9767cc5e5b6e4..9d3be9c17a39c 100644
--- a/lld/MachO/Config.h
+++ b/lld/MachO/Config.h
@@ -226,7 +226,6 @@ struct Configuration {
bool disableVerify;
bool separateCstringLiteralSections;
bool tailMergeStrings;
- unsigned slopScale = 256;
bool callGraphProfileSort = false;
llvm::StringRef printSymbolOrder;
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 26b39f7a28d0d..9d8cbb43b64ea 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -2015,14 +2015,6 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
OPT_no_separate_cstring_literal_sections, false);
config->tailMergeStrings =
args.hasFlag(OPT_tail_merge_strings, OPT_no_tail_merge_strings, false);
- if (auto *arg = args.getLastArg(OPT_slop_scale_eq)) {
- StringRef v(arg->getValue());
- unsigned slop = 0;
- if (!llvm::to_integer(v, slop))
- error(arg->getSpelling() +
- ": expected a non-negative integer, but got '" + v + "'");
- config->slopScale = slop;
- }
auto IncompatWithCGSort = [&](StringRef firstArgStr) {
// Throw an error only if --call-graph-profile-sort is explicitly specified
diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td
index b7686d66a258e..fd67be034b60d 100644
--- a/lld/MachO/Options.td
+++ b/lld/MachO/Options.td
@@ -1105,14 +1105,10 @@ defm tail_merge_strings
: BB<"tail-merge-strings", "Enable string tail merging",
"Disable string tail merging to improve link-time performance">,
Group<grp_rare>;
-def slop_scale_eq
- : Joined<["--"], "slop_scale=">,
- MetaVarName<"<unsigned_int>">,
- HelpText<"Specify the slop scale. Default value is 256. If your binary "
- "has too many consecutive branch instructions resulting in "
- "thunk-range overrun, then you need to increase this value to a "
- "higher value, such as 512 or 1024, etc">,
- Group<grp_rare>;
+def slop_scale_eq : Joined<["--"], "slop_scale=">,
+ MetaVarName<"<unsigned_int>">,
+ HelpText<"Deprecated. Has no effect">,
+ Group<grp_rare>;
def grp_deprecated : OptionGroup<"deprecated">, HelpText<"DEPRECATED">;
diff --git a/lld/test/MachO/set-slop-scale.s b/lld/test/MachO/set-slop-scale.s
deleted file mode 100644
index a18acce4b4543..0000000000000
--- a/lld/test/MachO/set-slop-scale.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# REQUIRES: x86
-# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-darwin %s -o %t.o
-# RUN: %lld -o /dev/null %t.o --slop_scale=1
-# RUN: not %lld -o /dev/null %t.o --slop_scale=-1 2>&1 | FileCheck %s
-# CHECK: error: --slop_scale=: expected a non-negative integer, but got '-1'
-
-.text
-.global _main
-_main:
- mov $0, %rax
- ret
>From 66119a3376410f43a6465054f9dc670ceb99249c Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellishoag at meta.com>
Date: Tue, 21 Apr 2026 17:59:30 -0700
Subject: [PATCH 2/4] remove dead code
---
lld/MachO/ConcatOutputSection.cpp | 6 ------
1 file changed, 6 deletions(-)
diff --git a/lld/MachO/ConcatOutputSection.cpp b/lld/MachO/ConcatOutputSection.cpp
index d9ad94b57acc2..b84d4baacf290 100644
--- a/lld/MachO/ConcatOutputSection.cpp
+++ b/lld/MachO/ConcatOutputSection.cpp
@@ -283,12 +283,6 @@ void TextOutputSection::finalize() {
}
return isTargetInRange(*callerIsec, *r);
});
- // Count the number of new thunks we will need to create
- DenseSet<ThunkKey, ThunkMapKeyInfo> branchTargets;
- for (auto [callerIsec, r, thunkKey] : branchesToProcess)
- if (!getThunkInRange(*callerIsec, *r))
- branchTargets.insert(thunkKey);
- assert(numPendingThunkTargets == branchTargets.size());
uint64_t estimatedTextEnd = addr + size + numPendingThunkTargets * thunkSize;
uint64_t estimatedStubsEnd =
>From 88cb362f304d6eb0410f8dee3ccdaa8431ff5f09 Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellishoag at meta.com>
Date: Wed, 29 Apr 2026 15:10:06 -0700
Subject: [PATCH 3/4] resolve some comments
---
lld/MachO/ConcatOutputSection.cpp | 19 +++++++++----------
lld/MachO/Driver.cpp | 2 ++
2 files changed, 11 insertions(+), 10 deletions(-)
diff --git a/lld/MachO/ConcatOutputSection.cpp b/lld/MachO/ConcatOutputSection.cpp
index b5b664e07d1bc..8bceeb90b0ff1 100644
--- a/lld/MachO/ConcatOutputSection.cpp
+++ b/lld/MachO/ConcatOutputSection.cpp
@@ -152,11 +152,11 @@ void TextOutputSection::createThunk(const ConcatInputSection &isec,
assert(isec.isFinal);
uint64_t highVA = isec.getVA() + r.offset + target->forwardBranchRange;
if (addr + size > highVA) {
- // We can only encounter this if we have a massive section (> ~128MB) or
- // an enormous number of branch instructions within a single section
- // (> ~16M), neither of which is feasible in practice. To fix we could
- // implement branch islands when the available space for thunks become too
- // small.
+ // We can only encounter this if we have a massive function
+ // (> ~128MB on arm64) or an enormous number of branch instructions within a
+ // single function (> ~16M on arm64), neither of which is feasible in
+ // practice. To fix we could implement branch islands when the available
+ // space for thunks become too small.
fatal("encountered a branch whose target is out of range, but there is "
"no more space for a new thunk");
}
@@ -250,7 +250,9 @@ void TextOutputSection::finalize() {
if (nextEnd + numPendingThunkTargets * target->thunkSize <= highVA)
break;
+ assert(thunkInfo.pendingBranches.size());
thunkInfo.pendingBranches.clear();
+ assert(numPendingThunkTargets);
--numPendingThunkTargets;
createThunk(*callerIsec, *r);
branchesToProcess.pop_front();
@@ -299,6 +301,8 @@ void TextOutputSection::finalize() {
if (thunkInfo.pendingBranches.empty())
--numPendingThunkTargets;
}
+ // Do not remove branches if a thunk is in range because if the target is a
+ // stub we may discover that it is in range for a direct branch
return targetInRange;
});
@@ -328,10 +332,6 @@ void TextOutputSection::finalize() {
}
for (auto [isec, r, thunkKey] : branchesToProcess) {
- auto &thunkInfo = thunkMap[thunkKey];
- if (thunkInfo.pendingBranches.erase(r))
- if (thunkInfo.pendingBranches.empty())
- --numPendingThunkTargets;
if (isTargetStubsAndInRange(*isec, *r, estimatedStubsEnd))
continue;
if (auto *thunk = getThunkInRange(*isec, *r)) {
@@ -340,7 +340,6 @@ void TextOutputSection::finalize() {
}
createThunk(*isec, *r);
}
- assert(numPendingThunkTargets == 0);
if (!thunks.empty())
log(name + ": Created " + Twine(thunks.size()) + " (" +
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 9d8cbb43b64ea..adce6b6dc6717 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -2015,6 +2015,8 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
OPT_no_separate_cstring_literal_sections, false);
config->tailMergeStrings =
args.hasFlag(OPT_tail_merge_strings, OPT_no_tail_merge_strings, false);
+ if (auto *arg = args.getLastArg(OPT_slop_scale_eq))
+ warn(arg->getSpelling() + " has no effect and is deprecated");
auto IncompatWithCGSort = [&](StringRef firstArgStr) {
// Throw an error only if --call-graph-profile-sort is explicitly specified
>From 49d1797caa44e454695347989475459428f89578 Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellishoag at meta.com>
Date: Wed, 29 Apr 2026 16:02:01 -0700
Subject: [PATCH 4/4] Add helper function
---
lld/MachO/ConcatOutputSection.cpp | 41 ++++++++++++++-----------
lld/MachO/ConcatOutputSection.h | 50 ++++++++++++++++++-------------
2 files changed, 53 insertions(+), 38 deletions(-)
diff --git a/lld/MachO/ConcatOutputSection.cpp b/lld/MachO/ConcatOutputSection.cpp
index 8bceeb90b0ff1..3bf92807bfb09 100644
--- a/lld/MachO/ConcatOutputSection.cpp
+++ b/lld/MachO/ConcatOutputSection.cpp
@@ -206,6 +206,16 @@ bool TextOutputSection::isTargetStubsAndInRange(
return estimatedStubsEnd <= highVA;
}
+void TextOutputSection::markBranchAsResolved(ThunkInfo &thunkInfo,
+ const Relocation *r) {
+ if (!thunkInfo.pendingBranches.erase(r))
+ return;
+ if (!thunkInfo.pendingBranches.empty())
+ return;
+ assert(numPendingThunkTargets);
+ --numPendingThunkTargets;
+}
+
void TextOutputSection::finalize() {
if (!needsThunks()) {
for (ConcatInputSection *isec : inputs)
@@ -222,7 +232,6 @@ void TextOutputSection::finalize() {
// can still direct call to their targets after they have all been finalized.
SmallVector<std::tuple<ConcatInputSection *, Relocation *, Defined *>>
deferredBranchRedirects;
- unsigned numPendingThunkTargets = 0;
for (auto *isec : inputs) {
while (!branchesToProcess.empty()) {
@@ -231,14 +240,10 @@ void TextOutputSection::finalize() {
auto &thunkInfo = thunkMap[thunkKey];
if (isTargetKnownInRange(*callerIsec, *r)) {
branchesToProcess.pop_front();
- if (thunkInfo.pendingBranches.erase(r))
- if (thunkInfo.pendingBranches.empty())
- --numPendingThunkTargets;
+ markBranchAsResolved(thunkInfo, r);
continue;
}
if (auto *thunk = getThunkInRange(*callerIsec, *r)) {
- // The pending thunk target was already decremented when we created the
- // thunk
deferredBranchRedirects.emplace_back(callerIsec, r, thunk);
branchesToProcess.pop_front();
continue;
@@ -247,10 +252,14 @@ void TextOutputSection::finalize() {
callerIsec->getVA() + r->offset + target->forwardBranchRange;
uint64_t nextEnd =
alignToPowerOf2(addr + size, isec->align) + isec->getSize();
+ // If we were to emit this section, would we have enough space for more
+ // thunks? If we do, then we can delay processing this thunk so we may
+ // finalize more potencial target sections. Otherwise we must emit thunks
+ // until we have enough space.
if (nextEnd + numPendingThunkTargets * target->thunkSize <= highVA)
break;
- assert(thunkInfo.pendingBranches.size());
+ assert(thunkInfo.pendingBranches.contains(r));
thunkInfo.pendingBranches.clear();
assert(numPendingThunkTargets);
--numPendingThunkTargets;
@@ -294,16 +303,10 @@ void TextOutputSection::finalize() {
llvm::erase_if(branchesToProcess, [&](auto &tuple) {
auto [callerIsec, r, thunkKey] = tuple;
- bool targetInRange = isTargetKnownInRange(*callerIsec, *r);
- auto &thunkInfo = thunkMap[thunkKey];
- if (targetInRange || getThunkInRange(*callerIsec, *r)) {
- if (thunkInfo.pendingBranches.erase(r))
- if (thunkInfo.pendingBranches.empty())
- --numPendingThunkTargets;
- }
- // Do not remove branches if a thunk is in range because if the target is a
- // stub we may discover that it is in range for a direct branch
- return targetInRange;
+ if (!isTargetKnownInRange(*callerIsec, *r))
+ return false;
+ markBranchAsResolved(thunkMap[thunkKey], r);
+ return true;
});
#ifndef NDEBUG
@@ -332,6 +335,9 @@ void TextOutputSection::finalize() {
}
for (auto [isec, r, thunkKey] : branchesToProcess) {
+#ifndef NDEBUG
+ markBranchAsResolved(thunkMap[thunkKey], r);
+#endif
if (isTargetStubsAndInRange(*isec, *r, estimatedStubsEnd))
continue;
if (auto *thunk = getThunkInRange(*isec, *r)) {
@@ -340,6 +346,7 @@ void TextOutputSection::finalize() {
}
createThunk(*isec, *r);
}
+ assert(numPendingThunkTargets == 0);
if (!thunks.empty())
log(name + ": Created " + Twine(thunks.size()) + " (" +
diff --git a/lld/MachO/ConcatOutputSection.h b/lld/MachO/ConcatOutputSection.h
index fa0736160df72..2b192a66c802f 100644
--- a/lld/MachO/ConcatOutputSection.h
+++ b/lld/MachO/ConcatOutputSection.h
@@ -62,6 +62,29 @@ class ConcatOutputSection : public OutputSection {
void finalizeFlags(InputSection *input);
};
+// We maintain one ThunkInfo per real function.
+//
+// The "active thunk" is represented by the sym/isec pair that
+// turns-over during finalize(): as the call-site address advances,
+// the active thunk goes out of branch-range, and we create a new
+// thunk to take its place.
+//
+// The remaining members -- bools and counters -- apply to the
+// collection of thunks associated with the real function.
+
+struct ThunkInfo {
+ // These denote the active thunk:
+ Defined *sym = nullptr; // private-extern symbol for active thunk
+ ConcatInputSection *isec = nullptr; // input section for active thunk
+
+ /// Before this thunk is created, this contains the set of branches with the
+ /// same target that could trigger this thunk's creation.
+ llvm::DenseSet<const Relocation *> pendingBranches;
+
+ // The following value is cumulative across all thunks on this function
+ uint8_t sequence = 0; // how many thunks created so-far?
+};
+
// ConcatOutputSections that contain code (text) require special handling to
// support thunk insertion.
class TextOutputSection : public ConcatOutputSection {
@@ -98,29 +121,14 @@ class TextOutputSection : public ConcatOutputSection {
bool isTargetStubsAndInRange(const ConcatInputSection &isec,
const Relocation &r,
uint64_t estimatedStubsEnd) const;
+ /// Mark the branch at \p r as resolved and possibly decrement
+ /// numPendingThunkTargets.
+ void markBranchAsResolved(ThunkInfo &thunkInfo, const Relocation *r);
/// The number of relocations updated to point to thunks.
size_t thunkCallCount = 0;
-};
-
-// We maintain one ThunkInfo per real function.
-//
-// The "active thunk" is represented by the sym/isec pair that
-// turns-over during finalize(): as the call-site address advances,
-// the active thunk goes out of branch-range, and we create a new
-// thunk to take its place.
-//
-// The remaining members -- bools and counters -- apply to the
-// collection of thunks associated with the real function.
-
-struct ThunkInfo {
- // These denote the active thunk:
- Defined *sym = nullptr; // private-extern symbol for active thunk
- ConcatInputSection *isec = nullptr; // input section for active thunk
-
- llvm::DenseSet<const Relocation *> pendingBranches;
-
- // The following value is cumulative across all thunks on this function
- uint8_t sequence = 0; // how many thunks created so-far?
+ /// The number of new thunks that could be created from our current list of
+ /// pending branches.
+ unsigned numPendingThunkTargets = 0;
};
NamePair maybeRenameSection(NamePair key);
More information about the llvm-branch-commits
mailing list