[lld] [ELF] Optimize binary search in getSectionPiece (PR #187916)
Fangrui Song via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 27 09:58:15 PDT 2026
https://github.com/MaskRay updated https://github.com/llvm/llvm-project/pull/187916
>From 3d3068d551742c3388af5030ce086a9f57157706 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i at maskray.me>
Date: Sat, 21 Mar 2026 21:10:33 -0700
Subject: [PATCH] [ELF] Optimize binary search in getSectionPiece
Two optimizations to make getSectionPiece O(1) for common cases:
1. For non-string fixed-size merge sections, use direct computation
(offset / entsize) instead of binary search.
2. Pre-resolve piece indices for non-section Defined symbols during
splitSections. The piece index and intra-piece offset are packed
into Defined::value as ((pieceIdx+1) << 32) | intraPieceOffset,
replacing repeated binary searches (MarkLive, includeInSymtab,
getRelocTargetVA) with a single upfront resolution.
On x86-64, references to mergeable strings use local labels:
leaq .LC0(%rip), %rax # R_X86_64_PC32 .LC0-4
The relocations use non-section symbols and benefit from optimization 2.
On many other targets (e.g. AArch64), the addend is 0 and the assembler
adjusts such relocations to reference section symbols, which still use
binary search.
On a clang link (clang-relassert reproduce tarball, x86-64):
- --gc-sections: 1.05x as fast
---
lld/ELF/InputSection.cpp | 12 ++++++++++++
lld/ELF/InputSection.h | 7 ++++++-
lld/ELF/SyntheticSections.cpp | 20 ++++++++++++++++++++
3 files changed, 38 insertions(+), 1 deletion(-)
diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index c8c0a7cdbf109..9c5b932634cd0 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -1548,16 +1548,28 @@ void MergeInputSection::splitIntoPieces() {
}
SectionPiece &MergeInputSection::getSectionPiece(uint64_t offset) {
+ // Pre-resolved by splitSections: pieceIdx + 1 in upper bits,
+ // intra-piece offset in lower bits.
+ if (uint32_t idx = offset >> mergeValueShift; idx && idx <= pieces.size())
+ return pieces[idx - 1];
if (content().size() <= offset) {
Err(getCtx()) << this << ": offset is outside the section";
return pieces[0];
}
+ // For non-string fixed-size records, piece index = offset / entsize.
+ if (!(flags & SHF_STRINGS))
+ return pieces[offset / entsize];
return partition_point(
pieces, [=](SectionPiece p) { return p.inputOff <= offset; })[-1];
}
// Return the offset in an output section for a given input offset.
uint64_t MergeInputSection::getParentOffset(uint64_t offset) const {
+ // Pre-resolved by splitSections: pieceIdx + 1 in upper bits,
+ // intra-piece offset in lower bits.
+ if (uint32_t idx = offset >> mergeValueShift; idx && idx <= pieces.size())
+ return pieces[idx - 1].outputOff +
+ (offset & llvm::maskTrailingOnes<uint64_t>(mergeValueShift));
const SectionPiece &piece = getSectionPiece(offset);
return piece.outputOff + (offset - piece.inputOff);
}
diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h
index 30df85d7aa10d..061af258ce8a4 100644
--- a/lld/ELF/InputSection.h
+++ b/lld/ELF/InputSection.h
@@ -326,6 +326,10 @@ struct SectionPiece {
static_assert(sizeof(SectionPiece) == 16, "SectionPiece is too big");
+// Used by splitSections to pre-resolve section piece indexes. 32 bits of offset
+// supports section piece up to 4GB.
+constexpr unsigned mergeValueShift = 32;
+
// This corresponds to a SHF_MERGE section of an input file.
class MergeInputSection : public InputSectionBase {
public:
@@ -339,7 +343,8 @@ class MergeInputSection : public InputSectionBase {
void splitIntoPieces();
// Translate an offset in the input section to an offset in the parent
- // MergeSyntheticSection.
+ // MergeSyntheticSection. If the offset was pre-resolved by
+ // resolveSymbolPieces (upper bits non-zero), this is O(1).
uint64_t getParentOffset(uint64_t offset) const;
// Splittable sections are handled as a sequence of data
diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp
index b696ff11d2232..b23a8a3ed7aa6 100644
--- a/lld/ELF/SyntheticSections.cpp
+++ b/lld/ELF/SyntheticSections.cpp
@@ -3806,6 +3806,26 @@ template <class ELFT> void elf::splitSections(Ctx &ctx) {
else if (auto *eh = dyn_cast<EhInputSection>(sec))
eh->split<ELFT>();
}
+
+ // For non-section Defined symbols in merge sections, pre-resolve the piece
+ // index to avoid potentially repeated binary search (MarkLive, RelocScan,
+ // includeInSymtab). Encode each non-section Defined symbol's value as
+ // ((pieceIdx + 1) << mergeValueShift) | intraPieceOffset.
+ auto resolve = [](Defined *d) {
+ auto *ms = dyn_cast_or_null<MergeInputSection>(d->section);
+ if (!ms || d->isSection())
+ return;
+ SectionPiece &piece = ms->getSectionPiece(d->value);
+ uint32_t idx = &piece - ms->pieces.data();
+ uint64_t off = d->value - piece.inputOff;
+ d->value = ((uint64_t)(idx + 1) << mergeValueShift) | off;
+ };
+ for (Symbol *sym : file->getLocalSymbols())
+ if (auto *d = dyn_cast<Defined>(sym))
+ resolve(d);
+ for (Symbol *sym : file->getGlobalSymbols())
+ if (auto *d = dyn_cast<Defined>(sym); d && d->file == file)
+ resolve(d);
});
}
More information about the llvm-commits
mailing list