[lld] 6683099 - [ELF] Optimize RelocationSection<ELFT>::writeTo

Fangrui Song via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 21 09:43:48 PST 2021


Author: Fangrui Song
Date: 2021-12-21T09:43:44-08:00
New Revision: 6683099a0d0a17fcde3576733e9c85e3b5f71de5

URL: https://github.com/llvm/llvm-project/commit/6683099a0d0a17fcde3576733e9c85e3b5f71de5
DIFF: https://github.com/llvm/llvm-project/commit/6683099a0d0a17fcde3576733e9c85e3b5f71de5.diff

LOG: [ELF] Optimize RelocationSection<ELFT>::writeTo

When linking a 1.2G output (nearly no debug info, 2846621 dynamic relocations) using `--threads=8`, I measured

```
9.131462 Total ExecuteLinker
1.449913 Total Write output file
1.445784 Total Write sections
0.657152 Write sections {"detail":".rela.dyn"}
```

This change decreases the .rela.dyn time to 0.25, leading to 4% speed up in the total time.

* The parallelSort is slow because of expensive r_sym/r_offset computation. Cache the values.
* The iteration is slow. Move r_sym/r_addend computation ahead of time and parallelize it.

With the change, the new encodeDynamicReloc is cheap (0.05s). So no need to parallelize it.

Reviewed By: ikudrin

Differential Revision: https://reviews.llvm.org/D115993

Added: 
    

Modified: 
    lld/ELF/OutputSections.cpp
    lld/ELF/SyntheticSections.cpp
    lld/ELF/SyntheticSections.h

Removed: 
    


################################################################################
diff  --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp
index 00f961ec7d238..71ef547a5f38f 100644
--- a/lld/ELF/OutputSections.cpp
+++ b/lld/ELF/OutputSections.cpp
@@ -560,7 +560,7 @@ void OutputSection::checkDynRelAddends(const uint8_t *bufStart) {
     if (!sec)
       return;
     for (const DynamicReloc &rel : sec->relocs) {
-      int64_t addend = rel.computeAddend();
+      int64_t addend = rel.addend;
       const OutputSection *relOsec = rel.inputSec->getOutputSection();
       assert(relOsec != nullptr && "missing output section for relocation");
       const uint8_t *relocTarget =

diff  --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp
index 884798e1e6abf..a901c898b270d 100644
--- a/lld/ELF/SyntheticSections.cpp
+++ b/lld/ELF/SyntheticSections.cpp
@@ -1653,13 +1653,19 @@ RelrBaseSection::RelrBaseSection()
                        config->wordsize, ".relr.dyn") {}
 
 template <class ELFT>
-static void encodeDynamicReloc(SymbolTableBaseSection *symTab,
-                               typename ELFT::Rela *p,
+static void encodeDynamicReloc(typename ELFT::Rela *p,
                                const DynamicReloc &rel) {
+  p->r_offset = rel.r_offset;
+  p->setSymbolAndType(rel.r_sym, rel.type, config->isMips64EL);
   if (config->isRela)
-    p->r_addend = rel.computeAddend();
-  p->r_offset = rel.getOffset();
-  p->setSymbolAndType(rel.getSymIndex(symTab), rel.type, config->isMips64EL);
+    p->r_addend = rel.addend;
+}
+
+void DynamicReloc::computeRaw(SymbolTableBaseSection *symtab) {
+  r_offset = getOffset();
+  r_sym = getSymIndex(symtab);
+  addend = computeAddend();
+  kind = AddendOnly; // Catch errors
 }
 
 template <class ELFT>
@@ -1674,20 +1680,21 @@ RelocationSection<ELFT>::RelocationSection(StringRef name, bool sort)
 template <class ELFT> void RelocationSection<ELFT>::writeTo(uint8_t *buf) {
   SymbolTableBaseSection *symTab = getPartition().dynSymTab;
 
+  parallelForEach(relocs,
+                  [symTab](DynamicReloc &rel) { rel.computeRaw(symTab); });
   // Sort by (!IsRelative,SymIndex,r_offset). DT_REL[A]COUNT requires us to
   // place R_*_RELATIVE first. SymIndex is to improve locality, while r_offset
   // is to make results easier to read.
-  if (sort)
-    parallelSort(
-        relocs, [&](const DynamicReloc &a, const DynamicReloc &b) {
-          return std::make_tuple(a.type != target->relativeRel,
-                                 a.getSymIndex(symTab), a.getOffset()) <
-                 std::make_tuple(b.type != target->relativeRel,
-                                 b.getSymIndex(symTab), b.getOffset());
-        });
+  if (sort) {
+    const RelType relativeRel = target->relativeRel;
+    parallelSort(relocs, [&](const DynamicReloc &a, const DynamicReloc &b) {
+      return std::make_tuple(a.type != relativeRel, a.r_sym, a.r_offset) <
+             std::make_tuple(b.type != relativeRel, b.r_sym, b.r_offset);
+    });
+  }
 
   for (const DynamicReloc &rel : relocs) {
-    encodeDynamicReloc<ELFT>(symTab, reinterpret_cast<Elf_Rela *>(buf), rel);
+    encodeDynamicReloc<ELFT>(reinterpret_cast<Elf_Rela *>(buf), rel);
     buf += config->isRela ? sizeof(Elf_Rela) : sizeof(Elf_Rel);
   }
 }
@@ -1765,7 +1772,11 @@ bool AndroidPackedRelocationSection<ELFT>::updateAllocSize() {
 
   for (const DynamicReloc &rel : relocs) {
     Elf_Rela r;
-    encodeDynamicReloc<ELFT>(getPartition().dynSymTab, &r, rel);
+    r.r_offset = rel.getOffset();
+    r.setSymbolAndType(rel.getSymIndex(getPartition().dynSymTab), rel.type,
+                       false);
+    if (config->isRela)
+      r.r_addend = rel.computeAddend();
 
     if (r.getType(config->isMips64EL) == target->relativeRel)
       relatives.push_back(r);

diff  --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h
index 7f17d4ebe230b..04a3b9b394afa 100644
--- a/lld/ELF/SyntheticSections.h
+++ b/lld/ELF/SyntheticSections.h
@@ -449,21 +449,21 @@ class DynamicReloc {
   DynamicReloc(RelType type, const InputSectionBase *inputSec,
                uint64_t offsetInSec, Kind kind, Symbol &sym, int64_t addend,
                RelExpr expr)
-      : type(type), sym(&sym), inputSec(inputSec), offsetInSec(offsetInSec),
-        kind(kind), expr(expr), addend(addend) {}
+      : sym(&sym), inputSec(inputSec), offsetInSec(offsetInSec), type(type),
+        addend(addend), kind(kind), expr(expr) {}
   /// This constructor records a relative relocation with no symbol.
   DynamicReloc(RelType type, const InputSectionBase *inputSec,
                uint64_t offsetInSec, int64_t addend = 0)
-      : type(type), sym(nullptr), inputSec(inputSec), offsetInSec(offsetInSec),
-        kind(AddendOnly), expr(R_ADDEND), addend(addend) {}
+      : sym(nullptr), inputSec(inputSec), offsetInSec(offsetInSec), type(type),
+        addend(addend), kind(AddendOnly), expr(R_ADDEND) {}
   /// This constructor records dynamic relocation settings used by the MIPS
   /// multi-GOT implementation.
   DynamicReloc(RelType type, const InputSectionBase *inputSec,
                uint64_t offsetInSec, const OutputSection *outputSec,
                int64_t addend)
-      : type(type), sym(nullptr), inputSec(inputSec), offsetInSec(offsetInSec),
-        kind(MipsMultiGotPage), expr(R_ADDEND), addend(addend),
-        outputSec(outputSec) {}
+      : sym(nullptr), outputSec(outputSec), inputSec(inputSec),
+        offsetInSec(offsetInSec), type(type), addend(addend),
+        kind(MipsMultiGotPage), expr(R_ADDEND) {}
 
   uint64_t getOffset() const;
   uint32_t getSymIndex(SymbolTableBaseSection *symTab) const;
@@ -476,18 +476,24 @@ class DynamicReloc {
   /// address/the address of the corresponding GOT entry/etc.
   int64_t computeAddend() const;
 
-  RelType type;
+  void computeRaw(SymbolTableBaseSection *symtab);
+
   Symbol *sym;
+  const OutputSection *outputSec = nullptr;
   const InputSectionBase *inputSec;
   uint64_t offsetInSec;
+  uint64_t r_offset;
+  RelType type;
+  uint32_t r_sym;
+  // Initially input addend, then the output addend after
+  // RelocationSection<ELFT>::writeTo.
+  int64_t addend;
 
 private:
   Kind kind;
   // The kind of expression used to calculate the added (required e.g. for
   // relative GOT relocations).
   RelExpr expr;
-  int64_t addend;
-  const OutputSection *outputSec = nullptr;
 };
 
 template <class ELFT> class DynamicSection final : public SyntheticSection {


        


More information about the llvm-commits mailing list