[lld] 42cc454 - [ELF] Optimize binary search in getSectionPiece (#187916)

via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 30 20:51:35 PDT 2026


Author: Fangrui Song
Date: 2026-03-30T20:51:30-07:00
New Revision: 42cc454777274a06933abcd098ec3281158717f9

URL: https://github.com/llvm/llvm-project/commit/42cc454777274a06933abcd098ec3281158717f9
DIFF: https://github.com/llvm/llvm-project/commit/42cc454777274a06933abcd098ec3281158717f9.diff

LOG: [ELF] Optimize binary search in getSectionPiece (#187916)

Two optimizations to make getSectionPiece O(1) for common cases:

1. For non-string fixed-size merge sections, use direct computation
   (offset / entsize) instead of binary search.

2. Pre-resolve piece indices for non-section Defined symbols during
   splitSections. The piece index and intra-piece offset are packed
   into Defined::value as ((pieceIdx+1) << 32) | intraPieceOffset,
   replacing repeated binary searches (MarkLive, includeInSymtab,
   getRelocTargetVA) with a single upfront resolution.

On x86-64, references to mergeable strings use local labels:

    leaq .LC0(%rip), %rax  # R_X86_64_PC32 .LC0-4

The relocations use non-section symbols and benefit from optimization 2.
On many other targets (e.g. AArch64), the addend is 0 and the assembler
adjusts such relocations to reference section symbols, which still use
binary search.

On a clang link (clang-relassert reproduce tarball, x86-64):
- --gc-sections: 1.05x as fast

Added: 
    

Modified: 
    lld/ELF/InputSection.cpp
    lld/ELF/InputSection.h
    lld/ELF/SyntheticSections.cpp
    lld/test/ELF/merge-piece-oob.s

Removed: 
    


################################################################################
diff  --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index 855d520b6194e..fc82433cdcc92 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -1548,13 +1548,25 @@ void MergeInputSection::splitIntoPieces() {
 }
 
 SectionPiece &MergeInputSection::getSectionPiece(uint64_t offset) {
+  // Pre-resolved by splitSections: pieceIdx + 1 in upper bits,
+  // intra-piece offset in lower bits.
+  if (uint32_t idx = offset >> mergeValueShift)
+    return pieces[idx - 1];
   assert(offset < content().size());
+  // For non-string fixed-size records, piece index = offset / entsize.
+  if (!(flags & SHF_STRINGS))
+    return pieces[offset / entsize];
   return partition_point(
       pieces, [=](SectionPiece p) { return p.inputOff <= offset; })[-1];
 }
 
 // Return the offset in an output section for a given input offset.
 uint64_t MergeInputSection::getParentOffset(uint64_t offset) const {
+  // Pre-resolved by splitSections: pieceIdx + 1 in upper bits,
+  // intra-piece offset in lower bits.
+  if (uint32_t idx = offset >> mergeValueShift)
+    return pieces[idx - 1].outputOff +
+           (offset & llvm::maskTrailingOnes<uint64_t>(mergeValueShift));
   const SectionPiece &piece = getSectionPiece(offset);
   return piece.outputOff + (offset - piece.inputOff);
 }

diff  --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h
index 30df85d7aa10d..061af258ce8a4 100644
--- a/lld/ELF/InputSection.h
+++ b/lld/ELF/InputSection.h
@@ -326,6 +326,10 @@ struct SectionPiece {
 
 static_assert(sizeof(SectionPiece) == 16, "SectionPiece is too big");
 
+// Used by splitSections to pre-resolve section piece indexes. 32 bits of offset
+// supports section piece up to 4GB.
+constexpr unsigned mergeValueShift = 32;
+
 // This corresponds to a SHF_MERGE section of an input file.
 class MergeInputSection : public InputSectionBase {
 public:
@@ -339,7 +343,8 @@ class MergeInputSection : public InputSectionBase {
   void splitIntoPieces();
 
   // Translate an offset in the input section to an offset in the parent
-  // MergeSyntheticSection.
+  // MergeSyntheticSection. If the offset was pre-resolved by
+  // resolveSymbolPieces (upper bits non-zero), this is O(1).
   uint64_t getParentOffset(uint64_t offset) const;
 
   // Splittable sections are handled as a sequence of data

diff  --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp
index 6b51fa41f0bf0..2da3f1afeb821 100644
--- a/lld/ELF/SyntheticSections.cpp
+++ b/lld/ELF/SyntheticSections.cpp
@@ -3809,6 +3809,26 @@ template <class ELFT> void elf::splitSections(Ctx &ctx) {
       else if (auto *eh = dyn_cast<EhInputSection>(sec))
         eh->split<ELFT>();
     }
+
+    // For non-section Defined symbols in merge sections, pre-resolve the piece
+    // index to avoid potentially repeated binary search (MarkLive, RelocScan,
+    // includeInSymtab). Encode each non-section Defined symbol's value as
+    // ((pieceIdx + 1) << mergeValueShift) | intraPieceOffset.
+    auto resolve = [](Defined *d) {
+      auto *ms = dyn_cast_or_null<MergeInputSection>(d->section);
+      if (!ms || d->isSection())
+        return;
+      SectionPiece &piece = ms->getSectionPiece(d->value);
+      uint32_t idx = &piece - ms->pieces.data();
+      uint64_t off = d->value - piece.inputOff;
+      d->value = ((uint64_t)(idx + 1) << mergeValueShift) | off;
+    };
+    for (Symbol *sym : file->getLocalSymbols())
+      if (auto *d = dyn_cast<Defined>(sym))
+        resolve(d);
+    for (Symbol *sym : file->getGlobalSymbols())
+      if (auto *d = dyn_cast<Defined>(sym); d && d->file == file)
+        resolve(d);
   });
 }
 

diff  --git a/lld/test/ELF/merge-piece-oob.s b/lld/test/ELF/merge-piece-oob.s
index 1ff34768a4d13..d2bf9fab443a1 100644
--- a/lld/test/ELF/merge-piece-oob.s
+++ b/lld/test/ELF/merge-piece-oob.s
@@ -12,7 +12,7 @@
 # CHECK-NEXT: [[PREFIX]]: {{.*}}:(.foo): offset 0xffffffffffffffff is outside the section
 ## .rodata.str1.1 is "abc\0" (4 bytes). offset<=size is accepted.
 # CHECK-NEXT: [[PREFIX]]: {{.*}}:(.rodata.str1.1): offset 0x5 is outside the section
-## .data.retain references .foo-1 as well.
+## .data.retain references .foo-2 as well.
 # CHECK-NEXT: [[PREFIX]]: {{.*}}:(.foo): offset 0xfffffffffffffffe is outside the section
 
 ## Test that --gc-sections with an out-of-bounds offset doesn't crash.


        


More information about the llvm-commits mailing list