[lld] [ELF] Optimize binary search in getSectionPiece (PR #187916)

Fangrui Song via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 27 09:58:15 PDT 2026


https://github.com/MaskRay updated https://github.com/llvm/llvm-project/pull/187916

>From 3d3068d551742c3388af5030ce086a9f57157706 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i at maskray.me>
Date: Sat, 21 Mar 2026 21:10:33 -0700
Subject: [PATCH] [ELF] Optimize binary search in getSectionPiece

Two optimizations to make getSectionPiece O(1) for common cases:

1. For non-string fixed-size merge sections, use direct computation
   (offset / entsize) instead of binary search.

2. Pre-resolve piece indices for non-section Defined symbols during
   splitSections. The piece index and intra-piece offset are packed
   into Defined::value as ((pieceIdx+1) << 32) | intraPieceOffset,
   replacing repeated binary searches (MarkLive, includeInSymtab,
   getRelocTargetVA) with a single upfront resolution.

On x86-64, references to mergeable strings use local labels:

    leaq .LC0(%rip), %rax  # R_X86_64_PC32 .LC0-4

The relocations use non-section symbols and benefit from optimization 2.
On many other targets (e.g. AArch64), the addend is 0 and the assembler
adjusts such relocations to reference section symbols, which still use
binary search.

On a clang link (clang-relassert reproduce tarball, x86-64):
- --gc-sections: 1.05x as fast
---
 lld/ELF/InputSection.cpp      | 12 ++++++++++++
 lld/ELF/InputSection.h        |  7 ++++++-
 lld/ELF/SyntheticSections.cpp | 20 ++++++++++++++++++++
 3 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index c8c0a7cdbf109..9c5b932634cd0 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -1548,16 +1548,28 @@ void MergeInputSection::splitIntoPieces() {
 }
 
 SectionPiece &MergeInputSection::getSectionPiece(uint64_t offset) {
+  // Pre-resolved by splitSections: pieceIdx + 1 in upper bits,
+  // intra-piece offset in lower bits.
+  if (uint32_t idx = offset >> mergeValueShift; idx && idx <= pieces.size())
+    return pieces[idx - 1];
   if (content().size() <= offset) {
     Err(getCtx()) << this << ": offset is outside the section";
     return pieces[0];
   }
+  // For non-string fixed-size records, piece index = offset / entsize.
+  if (!(flags & SHF_STRINGS))
+    return pieces[offset / entsize];
   return partition_point(
       pieces, [=](SectionPiece p) { return p.inputOff <= offset; })[-1];
 }
 
 // Return the offset in an output section for a given input offset.
 uint64_t MergeInputSection::getParentOffset(uint64_t offset) const {
+  // Pre-resolved by splitSections: pieceIdx + 1 in upper bits,
+  // intra-piece offset in lower bits.
+  if (uint32_t idx = offset >> mergeValueShift; idx && idx <= pieces.size())
+    return pieces[idx - 1].outputOff +
+           (offset & llvm::maskTrailingOnes<uint64_t>(mergeValueShift));
   const SectionPiece &piece = getSectionPiece(offset);
   return piece.outputOff + (offset - piece.inputOff);
 }
diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h
index 30df85d7aa10d..061af258ce8a4 100644
--- a/lld/ELF/InputSection.h
+++ b/lld/ELF/InputSection.h
@@ -326,6 +326,10 @@ struct SectionPiece {
 
 static_assert(sizeof(SectionPiece) == 16, "SectionPiece is too big");
 
+// Used by splitSections to pre-resolve section piece indexes. 32 bits of offset
+// supports section piece up to 4GB.
+constexpr unsigned mergeValueShift = 32;
+
 // This corresponds to a SHF_MERGE section of an input file.
 class MergeInputSection : public InputSectionBase {
 public:
@@ -339,7 +343,8 @@ class MergeInputSection : public InputSectionBase {
   void splitIntoPieces();
 
   // Translate an offset in the input section to an offset in the parent
-  // MergeSyntheticSection.
+  // MergeSyntheticSection. If the offset was pre-resolved by
+  // resolveSymbolPieces (upper bits non-zero), this is O(1).
   uint64_t getParentOffset(uint64_t offset) const;
 
   // Splittable sections are handled as a sequence of data
diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp
index b696ff11d2232..b23a8a3ed7aa6 100644
--- a/lld/ELF/SyntheticSections.cpp
+++ b/lld/ELF/SyntheticSections.cpp
@@ -3806,6 +3806,26 @@ template <class ELFT> void elf::splitSections(Ctx &ctx) {
       else if (auto *eh = dyn_cast<EhInputSection>(sec))
         eh->split<ELFT>();
     }
+
+    // For non-section Defined symbols in merge sections, pre-resolve the piece
+    // index to avoid potentially repeated binary search (MarkLive, RelocScan,
+    // includeInSymtab). Encode each non-section Defined symbol's value as
+    // ((pieceIdx + 1) << mergeValueShift) | intraPieceOffset.
+    auto resolve = [](Defined *d) {
+      auto *ms = dyn_cast_or_null<MergeInputSection>(d->section);
+      if (!ms || d->isSection())
+        return;
+      SectionPiece &piece = ms->getSectionPiece(d->value);
+      uint32_t idx = &piece - ms->pieces.data();
+      uint64_t off = d->value - piece.inputOff;
+      d->value = ((uint64_t)(idx + 1) << mergeValueShift) | off;
+    };
+    for (Symbol *sym : file->getLocalSymbols())
+      if (auto *d = dyn_cast<Defined>(sym))
+        resolve(d);
+    for (Symbol *sym : file->getGlobalSymbols())
+      if (auto *d = dyn_cast<Defined>(sym); d && d->file == file)
+        resolve(d);
   });
 }
 



More information about the llvm-commits mailing list