[llvm] e6aebff - [ELF] Parallelize relocation scanning

Fangrui Song via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 12 12:56:44 PDT 2022


Author: Fangrui Song
Date: 2022-09-12T12:56:35-07:00
New Revision: e6aebff67426fa0f9779a0c19d6188a043bf15e7

URL: https://github.com/llvm/llvm-project/commit/e6aebff67426fa0f9779a0c19d6188a043bf15e7
DIFF: https://github.com/llvm/llvm-project/commit/e6aebff67426fa0f9779a0c19d6188a043bf15e7.diff

LOG: [ELF] Parallelize relocation scanning

* Change `Symbol::flags` to a `std::atomic<uint16_t>`
* Add `llvm::parallel::threadIndex` as a thread-local non-negative integer
* Add `relocsVec` to part.relaDyn and part.relrDyn so that relative relocations can be added without a mutex
* Arbitrarily change -z nocombreloc to move relative relocations to the end. Disable parallelism for deterministic output.

MIPS and PPC64 use global states for relocation scanning. Keep serial scanning.

Speed-up with mimalloc and --threads=8 on an Intel Skylake machine:

* clang (Release): 1.27x as fast
* clang (Debug): 1.06x as fast
* chrome (default): 1.05x as fast
* scylladb (default): 1.04x as fast

Speed-up with glibc malloc and --threads=16 on a ThunderX2 (AArch64):

* clang (Release): 1.31x as fast
* scylladb (default): 1.06x as fast

Reviewed By: andrewng

Differential Revision: https://reviews.llvm.org/D133003

Added: 
    

Modified: 
    lld/ELF/Config.h
    lld/ELF/Relocations.cpp
    lld/ELF/Symbols.h
    lld/ELF/SyntheticSections.cpp
    lld/ELF/SyntheticSections.h
    lld/ELF/Writer.cpp
    lld/test/ELF/combreloc.s
    lld/test/ELF/comdat-discarded-error.s
    lld/test/ELF/undef-multi.s
    lld/test/ELF/undef.s
    llvm/include/llvm/Support/Parallel.h
    llvm/lib/Support/Parallel.cpp

Removed: 
    


################################################################################
diff  --git a/lld/ELF/Config.h b/lld/ELF/Config.h
index 1f8ff9362e91c..26a6d63818957 100644
--- a/lld/ELF/Config.h
+++ b/lld/ELF/Config.h
@@ -323,9 +323,6 @@ struct Configuration {
   // if that's true.)
   bool isMips64EL;
 
-  // True if we need to reserve two .got entries for local-dynamic TLS model.
-  bool needsTlsLd = false;
-
   // True if we need to set the DF_STATIC_TLS flag to an output file, which
   // works as a hint to the dynamic loader that the shared object contains code
   // compiled with the initial-exec TLS model.
@@ -393,6 +390,8 @@ struct Ctx {
   SmallVector<std::pair<Symbol *, unsigned>, 0> nonPrevailingSyms;
   // True if SHT_LLVM_SYMPART is used.
   std::atomic<bool> hasSympart{false};
+  // True if we need to reserve two .got entries for local-dynamic TLS model.
+  std::atomic<bool> needsTlsLd{false};
   // A tuple of (reference, extractedFile, sym). Used by --why-extract=.
   SmallVector<std::tuple<std::string, const InputFile *, const Symbol &>, 0>
       whyExtractRecords;

diff  --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp
index ce819de7eb5fe..36166c6f91383 100644
--- a/lld/ELF/Relocations.cpp
+++ b/lld/ELF/Relocations.cpp
@@ -580,6 +580,7 @@ struct UndefinedDiag {
 };
 
 std::vector<UndefinedDiag> undefs;
+std::mutex relocMutex;
 }
 
 // Check whether the definition name def is a mangled function name that matches
@@ -822,6 +823,7 @@ void elf::reportUndefinedSymbols() {
 // Returns true if the undefined symbol will produce an error message.
 static bool maybeReportUndefined(Undefined &sym, InputSectionBase &sec,
                                  uint64_t offset) {
+  std::lock_guard<std::mutex> lock(relocMutex);
   // If versioned, issue an error (even if the symbol is weak) because we don't
   // know the defining filename which is required to construct a Verneed entry.
   if (sym.hasVersionSuffix) {
@@ -870,6 +872,7 @@ RelType RelocationScanner::getMipsN32RelType(RelTy *&rel) const {
   return type;
 }
 
+template <bool shard = false>
 static void addRelativeReloc(InputSectionBase &isec, uint64_t offsetInSec,
                              Symbol &sym, int64_t addend, RelExpr expr,
                              RelType type) {
@@ -883,11 +886,15 @@ static void addRelativeReloc(InputSectionBase &isec, uint64_t offsetInSec,
   // address.
   if (part.relrDyn && isec.alignment >= 2 && offsetInSec % 2 == 0) {
     isec.relocations.push_back({expr, type, offsetInSec, addend, &sym});
-    part.relrDyn->relocs.push_back({&isec, offsetInSec});
+    if (shard)
+      part.relrDyn->relocsVec[parallel::threadIndex].push_back(
+          {&isec, offsetInSec});
+    else
+      part.relrDyn->relocs.push_back({&isec, offsetInSec});
     return;
   }
-  part.relaDyn->addRelativeReloc(target->relativeRel, isec, offsetInSec, sym,
-                                 addend, type, expr);
+  part.relaDyn->addRelativeReloc<shard>(target->relativeRel, isec, offsetInSec,
+                                        sym, addend, type, expr);
 }
 
 template <class PltSection, class GotPltSection>
@@ -1055,11 +1062,12 @@ void RelocationScanner::processAux(RelExpr expr, RelType type, uint64_t offset,
   if (canWrite) {
     RelType rel = target.getDynRel(type);
     if (expr == R_GOT || (rel == target.symbolicRel && !sym.isPreemptible)) {
-      addRelativeReloc(*sec, offset, sym, addend, expr, type);
+      addRelativeReloc<true>(*sec, offset, sym, addend, expr, type);
       return;
     } else if (rel != 0) {
       if (config->emachine == EM_MIPS && rel == target.symbolicRel)
         rel = target.relativeRel;
+      std::lock_guard<std::mutex> lock(relocMutex);
       sec->getPartition().relaDyn->addSymbolReloc(rel, *sec, offset, sym,
                                                   addend, type);
 
@@ -1231,7 +1239,7 @@ static unsigned handleTlsRelocation(RelType type, Symbol &sym,
     }
     if (expr == R_TLSLD_HINT)
       return 1;
-    config->needsTlsLd = true;
+    ctx->needsTlsLd.store(true, std::memory_order_relaxed);
     c.relocations.push_back({expr, type, offset, addend, &sym});
     return 1;
   }
@@ -1286,7 +1294,7 @@ static unsigned handleTlsRelocation(RelType type, Symbol &sym,
       sym.setFlags(NEEDS_TLSIE);
       // R_GOT needs a relative relocation for PIC on i386 and Hexagon.
       if (expr == R_GOT && config->isPic && !target->usesOnlyLowPageBits(type))
-        addRelativeReloc(c, offset, sym, addend, expr, type);
+        addRelativeReloc<true>(c, offset, sym, addend, expr, type);
       else
         c.relocations.push_back({expr, type, offset, addend, &sym});
     }
@@ -1371,10 +1379,10 @@ template <class ELFT, class RelTy> void RelocationScanner::scanOne(RelTy *&i) {
   // The 5 types that relative GOTPLT are all x86 and x86-64 specific.
   if (oneof<R_GOTPLTONLY_PC, R_GOTPLTREL, R_GOTPLT, R_PLT_GOTPLT,
             R_TLSDESC_GOTPLT, R_TLSGD_GOTPLT>(expr)) {
-    in.gotPlt->hasGotPltOffRel = true;
+    in.gotPlt->hasGotPltOffRel.store(true, std::memory_order_relaxed);
   } else if (oneof<R_GOTONLY_PC, R_GOTREL, R_PPC32_PLTREL, R_PPC64_TOCBASE,
                    R_PPC64_RELAX_TOC>(expr)) {
-    in.got->hasGotOffRel = true;
+    in.got->hasGotOffRel.store(true, std::memory_order_relaxed);
   }
 
   // Process TLS relocations, including relaxing TLS relocations. Note that
@@ -1422,6 +1430,7 @@ template <class ELFT, class RelTy> void RelocationScanner::scanOne(RelTy *&i) {
   // We were asked not to generate PLT entries for ifuncs. Instead, pass the
   // direct relocation on through.
   if (LLVM_UNLIKELY(isIfunc) && config->zIfuncNoplt) {
+    std::lock_guard<std::mutex> lock(relocMutex);
     sym.exportDynamic = true;
     mainPart->relaDyn->addSymbolReloc(type, *sec, offset, sym, addend, type);
     return;
@@ -1530,17 +1539,42 @@ template <class ELFT> void elf::scanRelocations() {
   // determine if it needs special treatment, such as creating GOT, PLT,
   // copy relocations, etc. Note that relocations for non-alloc sections are
   // directly processed by InputSection::relocateNonAlloc.
-  RelocationScanner scanner;
-  for (InputSectionBase *sec : inputSections)
-    if (sec->isLive() && (sec->flags & SHF_ALLOC))
-      scanner.template scanSection<ELFT>(*sec);
-  for (Partition &part : partitions) {
-    for (EhInputSection *sec : part.ehFrame->sections)
-      scanner.template scanSection<ELFT>(*sec);
-    if (part.armExidx && part.armExidx->isLive())
-      for (InputSection *sec : part.armExidx->exidxSections)
-        scanner.template scanSection<ELFT>(*sec);
+
+  // Deterministic parallellism needs sorting relocations which is unsuitable
+  // for -z nocombreloc. MIPS and PPC64 use global states which are not suitable
+  // for parallelism.
+  bool serial = !config->zCombreloc || config->emachine == EM_MIPS ||
+                config->emachine == EM_PPC64;
+  parallel::TaskGroup tg;
+  for (ELFFileBase *f : ctx->objectFiles) {
+    auto fn = [f]() {
+      RelocationScanner scanner;
+      for (InputSectionBase *s : f->getSections()) {
+        if (s && s->kind() == SectionBase::Regular && s->isLive() &&
+            (s->flags & SHF_ALLOC) &&
+            !(s->type == SHT_ARM_EXIDX && config->emachine == EM_ARM))
+          scanner.template scanSection<ELFT>(*s);
+      }
+    };
+    if (serial)
+      fn();
+    else
+      tg.execute(fn);
   }
+
+  // Both the main thread and thread pool index 0 use threadIndex==0. Be
+  // careful that they don't concurrently run scanSections. When serial is
+  // true, fn() has finished at this point, so running execute is safe.
+  tg.execute([] {
+    RelocationScanner scanner;
+    for (Partition &part : partitions) {
+      for (EhInputSection *sec : part.ehFrame->sections)
+        scanner.template scanSection<ELFT>(*sec);
+      if (part.armExidx && part.armExidx->isLive())
+        for (InputSection *sec : part.armExidx->exidxSections)
+          scanner.template scanSection<ELFT>(*sec);
+    }
+  });
 }
 
 static bool handleNonPreemptibleIfunc(Symbol &sym, uint16_t flags) {
@@ -1624,7 +1658,7 @@ static bool handleNonPreemptibleIfunc(Symbol &sym, uint16_t flags) {
 
 void elf::postScanRelocations() {
   auto fn = [](Symbol &sym) {
-    auto flags = sym.flags;
+    auto flags = sym.flags.load(std::memory_order_relaxed);
     if (handleNonPreemptibleIfunc(sym, flags))
       return;
     if (!sym.needsDynReloc())
@@ -1705,7 +1739,8 @@ void elf::postScanRelocations() {
       addTpOffsetGotEntry(sym);
   };
 
-  if (config->needsTlsLd && in.got->addTlsIndex()) {
+  if (ctx->needsTlsLd.load(std::memory_order_relaxed) &&
+      in.got->addTlsIndex()) {
     static Undefined dummy(nullptr, "", STB_LOCAL, 0, 0);
     if (config->shared)
       mainPart->relaDyn->addReloc(

diff  --git a/lld/ELF/Symbols.h b/lld/ELF/Symbols.h
index 1c0a5f58d1cc9..8bca6c8b657f5 100644
--- a/lld/ELF/Symbols.h
+++ b/lld/ELF/Symbols.h
@@ -81,6 +81,10 @@ class Symbol {
   // The file from which this symbol was created.
   InputFile *file;
 
+  // The default copy constructor is deleted due to atomic flags. Define one for
+  // places where no atomic is needed.
+  Symbol(const Symbol &o) { memcpy(this, &o, sizeof(o)); }
+
 protected:
   const char *nameData;
   // 32-bit size saves space.
@@ -295,7 +299,7 @@ class Symbol {
 
   // Temporary flags used to communicate which symbol entries need PLT and GOT
   // entries during postScanRelocations();
-  uint16_t flags = 0;
+  std::atomic<uint16_t> flags = 0;
 
   // A symAux index used to access GOT/PLT entry indexes. This is allocated in
   // postScanRelocations().
@@ -309,15 +313,15 @@ class Symbol {
   uint16_t versionId;
 
   void setFlags(uint16_t bits) {
-    flags |= bits;
+    flags.fetch_or(bits, std::memory_order_relaxed);
   }
   bool hasFlag(uint16_t bit) const {
     assert(bit && (bit & (bit - 1)) == 0 && "bit must be a power of 2");
-    return flags & bit;
+    return flags.load(std::memory_order_relaxed) & bit;
   }
 
   bool needsDynReloc() const {
-    return flags &
+    return flags.load(std::memory_order_relaxed) &
            (NEEDS_COPY | NEEDS_GOT | NEEDS_PLT | NEEDS_TLSDESC | NEEDS_TLSGD |
             NEEDS_TLSGD_TO_IE | NEEDS_GOT_DTPREL | NEEDS_TLSIE);
   }

diff  --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp
index afd4038da1a67..291c925cfb044 100644
--- a/lld/ELF/SyntheticSections.cpp
+++ b/lld/ELF/SyntheticSections.cpp
@@ -1572,10 +1572,11 @@ uint32_t DynamicReloc::getSymIndex(SymbolTableBaseSection *symTab) const {
 RelocationBaseSection::RelocationBaseSection(StringRef name, uint32_t type,
                                              int32_t dynamicTag,
                                              int32_t sizeDynamicTag,
-                                             bool combreloc)
+                                             bool combreloc,
+                                             unsigned concurrency)
     : SyntheticSection(SHF_ALLOC, type, config->wordsize, name),
       dynamicTag(dynamicTag), sizeDynamicTag(sizeDynamicTag),
-      combreloc(combreloc) {}
+      relocsVec(concurrency), combreloc(combreloc) {}
 
 void RelocationBaseSection::addSymbolReloc(RelType dynType,
                                            InputSectionBase &isec,
@@ -1586,19 +1587,6 @@ void RelocationBaseSection::addSymbolReloc(RelType dynType,
            R_ADDEND, addendRelType ? *addendRelType : target->noneRel);
 }
 
-void RelocationBaseSection::addRelativeReloc(
-    RelType dynType, InputSectionBase &inputSec, uint64_t offsetInSec,
-    Symbol &sym, int64_t addend, RelType addendRelType, RelExpr expr) {
-  // This function should only be called for non-preemptible symbols or
-  // RelExpr values that refer to an address inside the output file (e.g. the
-  // address of the GOT entry for a potentially preemptible symbol).
-  assert((!sym.isPreemptible || expr == R_GOT) &&
-         "cannot add relative relocation against preemptible symbol");
-  assert(expr != R_ADDEND && "expected non-addend relocation expression");
-  addReloc(DynamicReloc::AddendOnlyWithTargetVA, dynType, inputSec, offsetInSec,
-           sym, addend, expr, addendRelType);
-}
-
 void RelocationBaseSection::addAddendOnlyRelocIfNonPreemptible(
     RelType dynType, InputSectionBase &isec, uint64_t offsetInSec, Symbol &sym,
     RelType addendRelType) {
@@ -1611,17 +1599,14 @@ void RelocationBaseSection::addAddendOnlyRelocIfNonPreemptible(
              sym, 0, R_ABS, addendRelType);
 }
 
-void RelocationBaseSection::addReloc(DynamicReloc::Kind kind, RelType dynType,
-                                     InputSectionBase &inputSec,
-                                     uint64_t offsetInSec, Symbol &sym,
-                                     int64_t addend, RelExpr expr,
-                                     RelType addendRelType) {
-  // Write the addends to the relocated address if required. We skip
-  // it if the written value would be zero.
-  if (config->writeAddends && (expr != R_ADDEND || addend != 0))
-    inputSec.relocations.push_back(
-        {expr, addendRelType, offsetInSec, addend, &sym});
-  addReloc({dynType, &inputSec, offsetInSec, kind, sym, addend, expr});
+void RelocationBaseSection::mergeRels() {
+  size_t newSize = relocs.size();
+  for (const auto &v : relocsVec)
+    newSize += v.size();
+  relocs.reserve(newSize);
+  for (const auto &v : relocsVec)
+    llvm::append_range(relocs, v);
+  relocsVec.clear();
 }
 
 void RelocationBaseSection::partitionRels() {
@@ -1680,10 +1665,12 @@ void RelocationBaseSection::computeRels() {
 }
 
 template <class ELFT>
-RelocationSection<ELFT>::RelocationSection(StringRef name, bool combreloc)
+RelocationSection<ELFT>::RelocationSection(StringRef name, bool combreloc,
+                                           unsigned concurrency)
     : RelocationBaseSection(name, config->isRela ? SHT_RELA : SHT_REL,
                             config->isRela ? DT_RELA : DT_REL,
-                            config->isRela ? DT_RELASZ : DT_RELSZ, combreloc) {
+                            config->isRela ? DT_RELASZ : DT_RELSZ, combreloc,
+                            concurrency) {
   this->entsize = config->isRela ? sizeof(Elf_Rela) : sizeof(Elf_Rel);
 }
 
@@ -1699,19 +1686,30 @@ template <class ELFT> void RelocationSection<ELFT>::writeTo(uint8_t *buf) {
   }
 }
 
-RelrBaseSection::RelrBaseSection()
+RelrBaseSection::RelrBaseSection(unsigned concurrency)
     : SyntheticSection(SHF_ALLOC,
                        config->useAndroidRelrTags ? SHT_ANDROID_RELR : SHT_RELR,
-                       config->wordsize, ".relr.dyn") {}
+                       config->wordsize, ".relr.dyn"),
+      relocsVec(concurrency) {}
+
+void RelrBaseSection::mergeRels() {
+  size_t newSize = relocs.size();
+  for (const auto &v : relocsVec)
+    newSize += v.size();
+  relocs.reserve(newSize);
+  for (const auto &v : relocsVec)
+    llvm::append_range(relocs, v);
+  relocsVec.clear();
+}
 
 template <class ELFT>
 AndroidPackedRelocationSection<ELFT>::AndroidPackedRelocationSection(
-    StringRef name)
+    StringRef name, unsigned concurrency)
     : RelocationBaseSection(
           name, config->isRela ? SHT_ANDROID_RELA : SHT_ANDROID_REL,
           config->isRela ? DT_ANDROID_RELA : DT_ANDROID_REL,
           config->isRela ? DT_ANDROID_RELASZ : DT_ANDROID_RELSZ,
-          /*combreloc=*/false) {
+          /*combreloc=*/false, concurrency) {
   this->entsize = 1;
 }
 
@@ -1959,7 +1957,9 @@ bool AndroidPackedRelocationSection<ELFT>::updateAllocSize() {
   return relocData.size() != oldSize;
 }
 
-template <class ELFT> RelrSection<ELFT>::RelrSection() {
+template <class ELFT>
+RelrSection<ELFT>::RelrSection(unsigned concurrency)
+    : RelrBaseSection(concurrency) {
   this->entsize = config->wordsize;
 }
 

diff  --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h
index 5f7321a803d70..fd2296a4cfca5 100644
--- a/lld/ELF/SyntheticSections.h
+++ b/lld/ELF/SyntheticSections.h
@@ -26,6 +26,7 @@
 #include "llvm/ADT/MapVector.h"
 #include "llvm/MC/StringTableBuilder.h"
 #include "llvm/Support/Endian.h"
+#include "llvm/Support/Parallel.h"
 #include "llvm/Support/Threading.h"
 
 namespace lld::elf {
@@ -115,7 +116,7 @@ class GotSection : public SyntheticSection {
 
   // Flag to force GOT to be in output if we have relocations
   // that relies on its address.
-  bool hasGotOffRel = false;
+  std::atomic<bool> hasGotOffRel = false;
 
 protected:
   size_t numEntries = 0;
@@ -357,7 +358,7 @@ class GotPltSection final : public SyntheticSection {
 
   // Flag to force GotPlt to be in output if we have relocations
   // that relies on its address.
-  bool hasGotPltOffRel = false;
+  std::atomic<bool> hasGotPltOffRel = false;
 
 private:
   SmallVector<const Symbol *, 0> entries;
@@ -486,32 +487,55 @@ template <class ELFT> class DynamicSection final : public SyntheticSection {
 class RelocationBaseSection : public SyntheticSection {
 public:
   RelocationBaseSection(StringRef name, uint32_t type, int32_t dynamicTag,
-                        int32_t sizeDynamicTag, bool combreloc);
+                        int32_t sizeDynamicTag, bool combreloc,
+                        unsigned concurrency);
   /// Add a dynamic relocation without writing an addend to the output section.
   /// This overload can be used if the addends are written directly instead of
   /// using relocations on the input section (e.g. MipsGotSection::writeTo()).
-  void addReloc(const DynamicReloc &reloc) { relocs.push_back(reloc); }
+  template <bool shard = false> void addReloc(const DynamicReloc &reloc) {
+    relocs.push_back(reloc);
+  }
   /// Add a dynamic relocation against \p sym with an optional addend.
   void addSymbolReloc(RelType dynType, InputSectionBase &isec,
                       uint64_t offsetInSec, Symbol &sym, int64_t addend = 0,
                       llvm::Optional<RelType> addendRelType = llvm::None);
   /// Add a relative dynamic relocation that uses the target address of \p sym
   /// (i.e. InputSection::getRelocTargetVA()) + \p addend as the addend.
+  /// This function should only be called for non-preemptible symbols or
+  /// RelExpr values that refer to an address inside the output file (e.g. the
+  /// address of the GOT entry for a potentially preemptible symbol).
+  template <bool shard = false>
   void addRelativeReloc(RelType dynType, InputSectionBase &isec,
                         uint64_t offsetInSec, Symbol &sym, int64_t addend,
-                        RelType addendRelType, RelExpr expr);
+                        RelType addendRelType, RelExpr expr) {
+    assert(expr != R_ADDEND && "expected non-addend relocation expression");
+    addReloc<shard>(DynamicReloc::AddendOnlyWithTargetVA, dynType, isec,
+                    offsetInSec, sym, addend, expr, addendRelType);
+  }
   /// Add a dynamic relocation using the target address of \p sym as the addend
   /// if \p sym is non-preemptible. Otherwise add a relocation against \p sym.
   void addAddendOnlyRelocIfNonPreemptible(RelType dynType,
                                           InputSectionBase &isec,
                                           uint64_t offsetInSec, Symbol &sym,
                                           RelType addendRelType);
-  void addReloc(DynamicReloc::Kind kind, RelType dynType,
-                InputSectionBase &inputSec, uint64_t offsetInSec, Symbol &sym,
-                int64_t addend, RelExpr expr, RelType addendRelType);
-  bool isNeeded() const override { return !relocs.empty(); }
+  template <bool shard = false>
+  void addReloc(DynamicReloc::Kind kind, RelType dynType, InputSectionBase &sec,
+                uint64_t offsetInSec, Symbol &sym, int64_t addend, RelExpr expr,
+                RelType addendRelType) {
+    // Write the addends to the relocated address if required. We skip
+    // it if the written value would be zero.
+    if (config->writeAddends && (expr != R_ADDEND || addend != 0))
+      sec.relocations.push_back(
+          {expr, addendRelType, offsetInSec, addend, &sym});
+    addReloc<shard>({dynType, &sec, offsetInSec, kind, sym, addend, expr});
+  }
+  bool isNeeded() const override {
+    return !relocs.empty() ||
+           llvm::any_of(relocsVec, [](auto &v) { return !v.empty(); });
+  }
   size_t getSize() const override { return relocs.size() * this->entsize; }
   size_t getRelativeRelocCount() const { return numRelativeRelocs; }
+  void mergeRels();
   void partitionRels();
   void finalizeContents() override;
   static bool classof(const SectionBase *d) {
@@ -524,17 +548,25 @@ class RelocationBaseSection : public SyntheticSection {
 
 protected:
   void computeRels();
+  // Used when parallel relocation scanning adds relocations. The elements
+  // will be moved into relocs by mergeRel().
+  SmallVector<SmallVector<DynamicReloc, 0>, 0> relocsVec;
   size_t numRelativeRelocs = 0; // used by -z combreloc
   bool combreloc;
 };
 
+template <>
+inline void RelocationBaseSection::addReloc<true>(const DynamicReloc &reloc) {
+  relocsVec[llvm::parallel::threadIndex].push_back(reloc);
+}
+
 template <class ELFT>
 class RelocationSection final : public RelocationBaseSection {
   using Elf_Rel = typename ELFT::Rel;
   using Elf_Rela = typename ELFT::Rela;
 
 public:
-  RelocationSection(StringRef name, bool combreloc);
+  RelocationSection(StringRef name, bool combreloc, unsigned concurrency);
   void writeTo(uint8_t *buf) override;
 };
 
@@ -544,7 +576,7 @@ class AndroidPackedRelocationSection final : public RelocationBaseSection {
   using Elf_Rela = typename ELFT::Rela;
 
 public:
-  AndroidPackedRelocationSection(StringRef name);
+  AndroidPackedRelocationSection(StringRef name, unsigned concurrency);
 
   bool updateAllocSize() override;
   size_t getSize() const override { return relocData.size(); }
@@ -565,9 +597,14 @@ struct RelativeReloc {
 
 class RelrBaseSection : public SyntheticSection {
 public:
-  RelrBaseSection();
-  bool isNeeded() const override { return !relocs.empty(); }
+  RelrBaseSection(unsigned concurrency);
+  void mergeRels();
+  bool isNeeded() const override {
+    return !relocs.empty() ||
+           llvm::any_of(relocsVec, [](auto &v) { return !v.empty(); });
+  }
   SmallVector<RelativeReloc, 0> relocs;
+  SmallVector<SmallVector<RelativeReloc, 0>, 0> relocsVec;
 };
 
 // RelrSection is used to encode offsets for relative relocations.
@@ -578,7 +615,7 @@ template <class ELFT> class RelrSection final : public RelrBaseSection {
   using Elf_Relr = typename ELFT::Relr;
 
 public:
-  RelrSection();
+  RelrSection(unsigned concurrency);
 
   bool updateAllocSize() override;
   size_t getSize() const override { return relrRelocs.size() * this->entsize; }

diff  --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index de6c19863dafc..9124961a5089f 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -317,6 +317,7 @@ template <class ELFT> void elf::createSyntheticSections() {
 
   StringRef relaDynName = config->isRela ? ".rela.dyn" : ".rel.dyn";
 
+  const unsigned threadCount = parallel::strategy.compute_thread_count();
   for (Partition &part : partitions) {
     auto add = [&](SyntheticSection &sec) {
       sec.partition = part.getNumber();
@@ -350,11 +351,11 @@ template <class ELFT> void elf::createSyntheticSections() {
     }
 
     if (config->androidPackDynRelocs)
-      part.relaDyn =
-          std::make_unique<AndroidPackedRelocationSection<ELFT>>(relaDynName);
+      part.relaDyn = std::make_unique<AndroidPackedRelocationSection<ELFT>>(
+          relaDynName, threadCount);
     else
       part.relaDyn = std::make_unique<RelocationSection<ELFT>>(
-          relaDynName, config->zCombreloc);
+          relaDynName, config->zCombreloc, threadCount);
 
     if (config->hasDynSymTab) {
       add(*part.dynSymTab);
@@ -386,7 +387,7 @@ template <class ELFT> void elf::createSyntheticSections() {
     }
 
     if (config->relrPackDynRelocs) {
-      part.relrDyn = std::make_unique<RelrSection<ELFT>>();
+      part.relrDyn = std::make_unique<RelrSection<ELFT>>(threadCount);
       add(*part.relrDyn);
     }
 
@@ -468,7 +469,8 @@ template <class ELFT> void elf::createSyntheticSections() {
   // We always need to add rel[a].plt to output if it has entries.
   // Even for static linking it can contain R_[*]_IRELATIVE relocations.
   in.relaPlt = std::make_unique<RelocationSection<ELFT>>(
-      config->isRela ? ".rela.plt" : ".rel.plt", /*sort=*/false);
+      config->isRela ? ".rela.plt" : ".rel.plt", /*sort=*/false,
+      /*threadCount=*/1);
   add(*in.relaPlt);
 
   // The relaIplt immediately follows .rel[a].dyn to ensure that the IRelative
@@ -479,7 +481,7 @@ template <class ELFT> void elf::createSyntheticSections() {
   // behaviour by placing the iplt section in .rel.plt.
   in.relaIplt = std::make_unique<RelocationSection<ELFT>>(
       config->androidPackDynRelocs ? in.relaPlt->name : relaDynName,
-      /*sort=*/false);
+      /*sort=*/false, /*threadCount=*/1);
   add(*in.relaIplt);
 
   if ((config->emachine == EM_386 || config->emachine == EM_X86_64) &&
@@ -2074,16 +2076,20 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() {
     // symbol table section (dynSymTab) must be the first one.
     for (Partition &part : partitions) {
       if (part.relaDyn) {
+        part.relaDyn->mergeRels();
         // Compute DT_RELACOUNT to be used by part.dynamic.
         part.relaDyn->partitionRels();
         finalizeSynthetic(part.relaDyn.get());
       }
+      if (part.relrDyn) {
+        part.relrDyn->mergeRels();
+        finalizeSynthetic(part.relrDyn.get());
+      }
 
       finalizeSynthetic(part.dynSymTab.get());
       finalizeSynthetic(part.gnuHashTab.get());
       finalizeSynthetic(part.hashTab.get());
       finalizeSynthetic(part.verDef.get());
-      finalizeSynthetic(part.relrDyn.get());
       finalizeSynthetic(part.ehFrameHdr.get());
       finalizeSynthetic(part.verSym.get());
       finalizeSynthetic(part.verNeed.get());

diff  --git a/lld/test/ELF/combreloc.s b/lld/test/ELF/combreloc.s
index 17edb93cb5069..59dedceb633fc 100644
--- a/lld/test/ELF/combreloc.s
+++ b/lld/test/ELF/combreloc.s
@@ -35,8 +35,8 @@
 # NOCOMB-NEXT:     0x3400 R_X86_64_64 ccc 0x0
 # NOCOMB-NEXT:     0x3408 R_X86_64_64 bbb 0x0
 # NOCOMB-NEXT:     0x3410 R_X86_64_64 aaa 0x0
-# NOCOMB-NEXT:     0x3418 R_X86_64_RELATIVE - 0x3420
 # NOCOMB-NEXT:     0x23F0 R_X86_64_GLOB_DAT aaa 0x0
+# NOCOMB-NEXT:     0x3418 R_X86_64_RELATIVE - 0x3420
 # NOCOMB-NEXT:   }
 
 .globl aaa, bbb, ccc

diff  --git a/lld/test/ELF/comdat-discarded-error.s b/lld/test/ELF/comdat-discarded-error.s
index dec927d32f39d..f7ff635a0812d 100644
--- a/lld/test/ELF/comdat-discarded-error.s
+++ b/lld/test/ELF/comdat-discarded-error.s
@@ -5,7 +5,7 @@
 # RUN: echo '.weak foo; foo: .section .text.foo,"axG", at progbits,foo,comdat; .globl bar; bar:' |\
 # RUN:   llvm-mc -filetype=obj -triple=x86_64 - -o %t3.o
 
-# RUN: not ld.lld %t2.o %t3.o %t1.o -o /dev/null 2>&1 | FileCheck %s
+# RUN: not ld.lld --threads=1 %t2.o %t3.o %t1.o -o /dev/null 2>&1 | FileCheck %s
 
 # CHECK:      error: relocation refers to a symbol in a discarded section: bar
 # CHECK-NEXT: >>> defined in {{.*}}3.o

diff  --git a/lld/test/ELF/undef-multi.s b/lld/test/ELF/undef-multi.s
index bc1b0623fa580..af21693d08913 100644
--- a/lld/test/ELF/undef-multi.s
+++ b/lld/test/ELF/undef-multi.s
@@ -1,7 +1,7 @@
 # REQUIRES: x86
 # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o
 # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %p/Inputs/undef.s -o %t2.o
-# RUN: not ld.lld %t.o %t2.o -o /dev/null 2>&1 | FileCheck %s
+# RUN: not ld.lld --threads=1 %t.o %t2.o -o /dev/null 2>&1 | FileCheck %s
 
 # CHECK: error: undefined symbol: zed2
 # CHECK-NEXT: >>> referenced by undef-multi.s
@@ -24,7 +24,7 @@
 # RUN: echo "  call zed2" >> %t.moreref.s
 # RUN: echo "  call zed2" >> %t.moreref.s
 # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %t.moreref.s -o %t3.o
-# RUN: not ld.lld %t.o %t2.o %t3.o -o /dev/null -error-limit=2 2>&1 | \
+# RUN: not ld.lld --threads=1 %t.o %t2.o %t3.o -o /dev/null -error-limit=2 2>&1 | \
 # RUN:     FileCheck --check-prefix=LIMIT %s
 
 # LIMIT: error: undefined symbol: zed2

diff  --git a/lld/test/ELF/undef.s b/lld/test/ELF/undef.s
index 6398b73d51e62..2b42ae12be2c1 100644
--- a/lld/test/ELF/undef.s
+++ b/lld/test/ELF/undef.s
@@ -5,9 +5,9 @@
 # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %p/Inputs/undef-bad-debug.s -o %t4.o
 # RUN: rm -f %t2.a
 # RUN: llvm-ar rc %t2.a %t2.o
-# RUN: not ld.lld %t.o %t2.a %t3.o %t4.o -o /dev/null 2>&1 \
+# RUN: not ld.lld --threads=1 %t.o %t2.a %t3.o %t4.o -o /dev/null 2>&1 \
 # RUN:   | FileCheck %s --implicit-check-not="error:" --implicit-check-not="warning:"
-# RUN: not ld.lld -pie %t.o %t2.a %t3.o %t4.o -o /dev/null 2>&1 \
+# RUN: not ld.lld --threads=1 -pie %t.o %t2.a %t3.o %t4.o -o /dev/null 2>&1 \
 # RUN:   | FileCheck %s --implicit-check-not="error:" --implicit-check-not="warning:"
 
 # CHECK:      error: undefined symbol: foo

diff  --git a/llvm/include/llvm/Support/Parallel.h b/llvm/include/llvm/Support/Parallel.h
index 6569479674071..918edc07d96ae 100644
--- a/llvm/include/llvm/Support/Parallel.h
+++ b/llvm/include/llvm/Support/Parallel.h
@@ -28,6 +28,7 @@ namespace parallel {
 // this file. It defaults to using all hardware threads and should be
 // initialized before the first use of parallel routines.
 extern ThreadPoolStrategy strategy;
+extern thread_local unsigned threadIndex;
 
 namespace detail {
 class Latch {

diff  --git a/llvm/lib/Support/Parallel.cpp b/llvm/lib/Support/Parallel.cpp
index 9f13726e36913..71c41c24817fe 100644
--- a/llvm/lib/Support/Parallel.cpp
+++ b/llvm/lib/Support/Parallel.cpp
@@ -18,6 +18,7 @@
 #include <vector>
 
 llvm::ThreadPoolStrategy llvm::parallel::strategy;
+thread_local unsigned llvm::parallel::threadIndex;
 
 namespace llvm {
 namespace parallel {
@@ -95,6 +96,7 @@ class ThreadPoolExecutor : public Executor {
 
 private:
   void work(ThreadPoolStrategy S, unsigned ThreadID) {
+    threadIndex = ThreadID;
     S.apply_thread_strategy(ThreadID);
     while (true) {
       std::unique_lock<std::mutex> Lock(Mutex);


        


More information about the llvm-commits mailing list