[lld] 42cc454 - [ELF] Optimize binary search in getSectionPiece (#187916)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 30 20:51:35 PDT 2026
Author: Fangrui Song
Date: 2026-03-30T20:51:30-07:00
New Revision: 42cc454777274a06933abcd098ec3281158717f9
URL: https://github.com/llvm/llvm-project/commit/42cc454777274a06933abcd098ec3281158717f9
DIFF: https://github.com/llvm/llvm-project/commit/42cc454777274a06933abcd098ec3281158717f9.diff
LOG: [ELF] Optimize binary search in getSectionPiece (#187916)
Two optimizations to make getSectionPiece O(1) for common cases:
1. For non-string fixed-size merge sections, use direct computation
(offset / entsize) instead of binary search.
2. Pre-resolve piece indices for non-section Defined symbols during
splitSections. The piece index and intra-piece offset are packed
into Defined::value as ((pieceIdx+1) << 32) | intraPieceOffset,
replacing repeated binary searches (MarkLive, includeInSymtab,
getRelocTargetVA) with a single upfront resolution.
On x86-64, references to mergeable strings use local labels:
leaq .LC0(%rip), %rax # R_X86_64_PC32 .LC0-4
The relocations use non-section symbols and benefit from optimization 2.
On many other targets (e.g. AArch64), the addend is 0 and the assembler
adjusts such relocations to reference section symbols, which still use
binary search.
On a clang link (clang-relassert reproduce tarball, x86-64):
- --gc-sections: 1.05x as fast
Added:
Modified:
lld/ELF/InputSection.cpp
lld/ELF/InputSection.h
lld/ELF/SyntheticSections.cpp
lld/test/ELF/merge-piece-oob.s
Removed:
################################################################################
diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index 855d520b6194e..fc82433cdcc92 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -1548,13 +1548,25 @@ void MergeInputSection::splitIntoPieces() {
}
SectionPiece &MergeInputSection::getSectionPiece(uint64_t offset) {
+ // Pre-resolved by splitSections: pieceIdx + 1 in upper bits,
+ // intra-piece offset in lower bits.
+ if (uint32_t idx = offset >> mergeValueShift)
+ return pieces[idx - 1];
assert(offset < content().size());
+ // For non-string fixed-size records, piece index = offset / entsize.
+ if (!(flags & SHF_STRINGS))
+ return pieces[offset / entsize];
return partition_point(
pieces, [=](SectionPiece p) { return p.inputOff <= offset; })[-1];
}
// Return the offset in an output section for a given input offset.
uint64_t MergeInputSection::getParentOffset(uint64_t offset) const {
+ // Pre-resolved by splitSections: pieceIdx + 1 in upper bits,
+ // intra-piece offset in lower bits.
+ if (uint32_t idx = offset >> mergeValueShift)
+ return pieces[idx - 1].outputOff +
+ (offset & llvm::maskTrailingOnes<uint64_t>(mergeValueShift));
const SectionPiece &piece = getSectionPiece(offset);
return piece.outputOff + (offset - piece.inputOff);
}
diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h
index 30df85d7aa10d..061af258ce8a4 100644
--- a/lld/ELF/InputSection.h
+++ b/lld/ELF/InputSection.h
@@ -326,6 +326,10 @@ struct SectionPiece {
static_assert(sizeof(SectionPiece) == 16, "SectionPiece is too big");
+// Used by splitSections to pre-resolve section piece indexes. 32 bits of offset
+// supports section piece up to 4GB.
+constexpr unsigned mergeValueShift = 32;
+
// This corresponds to a SHF_MERGE section of an input file.
class MergeInputSection : public InputSectionBase {
public:
@@ -339,7 +343,8 @@ class MergeInputSection : public InputSectionBase {
void splitIntoPieces();
// Translate an offset in the input section to an offset in the parent
- // MergeSyntheticSection.
+ // MergeSyntheticSection. If the offset was pre-resolved by
+ // resolveSymbolPieces (upper bits non-zero), this is O(1).
uint64_t getParentOffset(uint64_t offset) const;
// Splittable sections are handled as a sequence of data
diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp
index 6b51fa41f0bf0..2da3f1afeb821 100644
--- a/lld/ELF/SyntheticSections.cpp
+++ b/lld/ELF/SyntheticSections.cpp
@@ -3809,6 +3809,26 @@ template <class ELFT> void elf::splitSections(Ctx &ctx) {
else if (auto *eh = dyn_cast<EhInputSection>(sec))
eh->split<ELFT>();
}
+
+ // For non-section Defined symbols in merge sections, pre-resolve the piece
+ // index to avoid potentially repeated binary search (MarkLive, RelocScan,
+ // includeInSymtab). Encode each non-section Defined symbol's value as
+ // ((pieceIdx + 1) << mergeValueShift) | intraPieceOffset.
+ auto resolve = [](Defined *d) {
+ auto *ms = dyn_cast_or_null<MergeInputSection>(d->section);
+ if (!ms || d->isSection())
+ return;
+ SectionPiece &piece = ms->getSectionPiece(d->value);
+ uint32_t idx = &piece - ms->pieces.data();
+ uint64_t off = d->value - piece.inputOff;
+ d->value = ((uint64_t)(idx + 1) << mergeValueShift) | off;
+ };
+ for (Symbol *sym : file->getLocalSymbols())
+ if (auto *d = dyn_cast<Defined>(sym))
+ resolve(d);
+ for (Symbol *sym : file->getGlobalSymbols())
+ if (auto *d = dyn_cast<Defined>(sym); d && d->file == file)
+ resolve(d);
});
}
diff --git a/lld/test/ELF/merge-piece-oob.s b/lld/test/ELF/merge-piece-oob.s
index 1ff34768a4d13..d2bf9fab443a1 100644
--- a/lld/test/ELF/merge-piece-oob.s
+++ b/lld/test/ELF/merge-piece-oob.s
@@ -12,7 +12,7 @@
# CHECK-NEXT: [[PREFIX]]: {{.*}}:(.foo): offset 0xffffffffffffffff is outside the section
## .rodata.str1.1 is "abc\0" (4 bytes). offset<=size is accepted.
# CHECK-NEXT: [[PREFIX]]: {{.*}}:(.rodata.str1.1): offset 0x5 is outside the section
-## .data.retain references .foo-1 as well.
+## .data.retain references .foo-2 as well.
# CHECK-NEXT: [[PREFIX]]: {{.*}}:(.foo): offset 0xfffffffffffffffe is outside the section
## Test that --gc-sections with an out-of-bounds offset doesn't crash.
More information about the llvm-commits
mailing list