[llvm] e6aebff - [ELF] Parallelize relocation scanning
Fangrui Song via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 12 12:56:44 PDT 2022
Author: Fangrui Song
Date: 2022-09-12T12:56:35-07:00
New Revision: e6aebff67426fa0f9779a0c19d6188a043bf15e7
URL: https://github.com/llvm/llvm-project/commit/e6aebff67426fa0f9779a0c19d6188a043bf15e7
DIFF: https://github.com/llvm/llvm-project/commit/e6aebff67426fa0f9779a0c19d6188a043bf15e7.diff
LOG: [ELF] Parallelize relocation scanning
* Change `Symbol::flags` to a `std::atomic<uint16_t>`
* Add `llvm::parallel::threadIndex` as a thread-local non-negative integer
* Add `relocsVec` to part.relaDyn and part.relrDyn so that relative relocations can be added without a mutex
* Arbitrarily change -z nocombreloc to move relative relocations to the end. Disable parallelism for deterministic output.
MIPS and PPC64 use global states for relocation scanning. Keep serial scanning.
Speed-up with mimalloc and --threads=8 on an Intel Skylake machine:
* clang (Release): 1.27x as fast
* clang (Debug): 1.06x as fast
* chrome (default): 1.05x as fast
* scylladb (default): 1.04x as fast
Speed-up with glibc malloc and --threads=16 on a ThunderX2 (AArch64):
* clang (Release): 1.31x as fast
* scylladb (default): 1.06x as fast
Reviewed By: andrewng
Differential Revision: https://reviews.llvm.org/D133003
Added:
Modified:
lld/ELF/Config.h
lld/ELF/Relocations.cpp
lld/ELF/Symbols.h
lld/ELF/SyntheticSections.cpp
lld/ELF/SyntheticSections.h
lld/ELF/Writer.cpp
lld/test/ELF/combreloc.s
lld/test/ELF/comdat-discarded-error.s
lld/test/ELF/undef-multi.s
lld/test/ELF/undef.s
llvm/include/llvm/Support/Parallel.h
llvm/lib/Support/Parallel.cpp
Removed:
################################################################################
diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h
index 1f8ff9362e91c..26a6d63818957 100644
--- a/lld/ELF/Config.h
+++ b/lld/ELF/Config.h
@@ -323,9 +323,6 @@ struct Configuration {
// if that's true.)
bool isMips64EL;
- // True if we need to reserve two .got entries for local-dynamic TLS model.
- bool needsTlsLd = false;
-
// True if we need to set the DF_STATIC_TLS flag to an output file, which
// works as a hint to the dynamic loader that the shared object contains code
// compiled with the initial-exec TLS model.
@@ -393,6 +390,8 @@ struct Ctx {
SmallVector<std::pair<Symbol *, unsigned>, 0> nonPrevailingSyms;
// True if SHT_LLVM_SYMPART is used.
std::atomic<bool> hasSympart{false};
+ // True if we need to reserve two .got entries for local-dynamic TLS model.
+ std::atomic<bool> needsTlsLd{false};
// A tuple of (reference, extractedFile, sym). Used by --why-extract=.
SmallVector<std::tuple<std::string, const InputFile *, const Symbol &>, 0>
whyExtractRecords;
diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp
index ce819de7eb5fe..36166c6f91383 100644
--- a/lld/ELF/Relocations.cpp
+++ b/lld/ELF/Relocations.cpp
@@ -580,6 +580,7 @@ struct UndefinedDiag {
};
std::vector<UndefinedDiag> undefs;
+std::mutex relocMutex;
}
// Check whether the definition name def is a mangled function name that matches
@@ -822,6 +823,7 @@ void elf::reportUndefinedSymbols() {
// Returns true if the undefined symbol will produce an error message.
static bool maybeReportUndefined(Undefined &sym, InputSectionBase &sec,
uint64_t offset) {
+ std::lock_guard<std::mutex> lock(relocMutex);
// If versioned, issue an error (even if the symbol is weak) because we don't
// know the defining filename which is required to construct a Verneed entry.
if (sym.hasVersionSuffix) {
@@ -870,6 +872,7 @@ RelType RelocationScanner::getMipsN32RelType(RelTy *&rel) const {
return type;
}
+template <bool shard = false>
static void addRelativeReloc(InputSectionBase &isec, uint64_t offsetInSec,
Symbol &sym, int64_t addend, RelExpr expr,
RelType type) {
@@ -883,11 +886,15 @@ static void addRelativeReloc(InputSectionBase &isec, uint64_t offsetInSec,
// address.
if (part.relrDyn && isec.alignment >= 2 && offsetInSec % 2 == 0) {
isec.relocations.push_back({expr, type, offsetInSec, addend, &sym});
- part.relrDyn->relocs.push_back({&isec, offsetInSec});
+ if (shard)
+ part.relrDyn->relocsVec[parallel::threadIndex].push_back(
+ {&isec, offsetInSec});
+ else
+ part.relrDyn->relocs.push_back({&isec, offsetInSec});
return;
}
- part.relaDyn->addRelativeReloc(target->relativeRel, isec, offsetInSec, sym,
- addend, type, expr);
+ part.relaDyn->addRelativeReloc<shard>(target->relativeRel, isec, offsetInSec,
+ sym, addend, type, expr);
}
template <class PltSection, class GotPltSection>
@@ -1055,11 +1062,12 @@ void RelocationScanner::processAux(RelExpr expr, RelType type, uint64_t offset,
if (canWrite) {
RelType rel = target.getDynRel(type);
if (expr == R_GOT || (rel == target.symbolicRel && !sym.isPreemptible)) {
- addRelativeReloc(*sec, offset, sym, addend, expr, type);
+ addRelativeReloc<true>(*sec, offset, sym, addend, expr, type);
return;
} else if (rel != 0) {
if (config->emachine == EM_MIPS && rel == target.symbolicRel)
rel = target.relativeRel;
+ std::lock_guard<std::mutex> lock(relocMutex);
sec->getPartition().relaDyn->addSymbolReloc(rel, *sec, offset, sym,
addend, type);
@@ -1231,7 +1239,7 @@ static unsigned handleTlsRelocation(RelType type, Symbol &sym,
}
if (expr == R_TLSLD_HINT)
return 1;
- config->needsTlsLd = true;
+ ctx->needsTlsLd.store(true, std::memory_order_relaxed);
c.relocations.push_back({expr, type, offset, addend, &sym});
return 1;
}
@@ -1286,7 +1294,7 @@ static unsigned handleTlsRelocation(RelType type, Symbol &sym,
sym.setFlags(NEEDS_TLSIE);
// R_GOT needs a relative relocation for PIC on i386 and Hexagon.
if (expr == R_GOT && config->isPic && !target->usesOnlyLowPageBits(type))
- addRelativeReloc(c, offset, sym, addend, expr, type);
+ addRelativeReloc<true>(c, offset, sym, addend, expr, type);
else
c.relocations.push_back({expr, type, offset, addend, &sym});
}
@@ -1371,10 +1379,10 @@ template <class ELFT, class RelTy> void RelocationScanner::scanOne(RelTy *&i) {
// The 5 types that relative GOTPLT are all x86 and x86-64 specific.
if (oneof<R_GOTPLTONLY_PC, R_GOTPLTREL, R_GOTPLT, R_PLT_GOTPLT,
R_TLSDESC_GOTPLT, R_TLSGD_GOTPLT>(expr)) {
- in.gotPlt->hasGotPltOffRel = true;
+ in.gotPlt->hasGotPltOffRel.store(true, std::memory_order_relaxed);
} else if (oneof<R_GOTONLY_PC, R_GOTREL, R_PPC32_PLTREL, R_PPC64_TOCBASE,
R_PPC64_RELAX_TOC>(expr)) {
- in.got->hasGotOffRel = true;
+ in.got->hasGotOffRel.store(true, std::memory_order_relaxed);
}
// Process TLS relocations, including relaxing TLS relocations. Note that
@@ -1422,6 +1430,7 @@ template <class ELFT, class RelTy> void RelocationScanner::scanOne(RelTy *&i) {
// We were asked not to generate PLT entries for ifuncs. Instead, pass the
// direct relocation on through.
if (LLVM_UNLIKELY(isIfunc) && config->zIfuncNoplt) {
+ std::lock_guard<std::mutex> lock(relocMutex);
sym.exportDynamic = true;
mainPart->relaDyn->addSymbolReloc(type, *sec, offset, sym, addend, type);
return;
@@ -1530,17 +1539,42 @@ template <class ELFT> void elf::scanRelocations() {
// determine if it needs special treatment, such as creating GOT, PLT,
// copy relocations, etc. Note that relocations for non-alloc sections are
// directly processed by InputSection::relocateNonAlloc.
- RelocationScanner scanner;
- for (InputSectionBase *sec : inputSections)
- if (sec->isLive() && (sec->flags & SHF_ALLOC))
- scanner.template scanSection<ELFT>(*sec);
- for (Partition &part : partitions) {
- for (EhInputSection *sec : part.ehFrame->sections)
- scanner.template scanSection<ELFT>(*sec);
- if (part.armExidx && part.armExidx->isLive())
- for (InputSection *sec : part.armExidx->exidxSections)
- scanner.template scanSection<ELFT>(*sec);
+
+ // Deterministic parallellism needs sorting relocations which is unsuitable
+ // for -z nocombreloc. MIPS and PPC64 use global states which are not suitable
+ // for parallelism.
+ bool serial = !config->zCombreloc || config->emachine == EM_MIPS ||
+ config->emachine == EM_PPC64;
+ parallel::TaskGroup tg;
+ for (ELFFileBase *f : ctx->objectFiles) {
+ auto fn = [f]() {
+ RelocationScanner scanner;
+ for (InputSectionBase *s : f->getSections()) {
+ if (s && s->kind() == SectionBase::Regular && s->isLive() &&
+ (s->flags & SHF_ALLOC) &&
+ !(s->type == SHT_ARM_EXIDX && config->emachine == EM_ARM))
+ scanner.template scanSection<ELFT>(*s);
+ }
+ };
+ if (serial)
+ fn();
+ else
+ tg.execute(fn);
}
+
+ // Both the main thread and thread pool index 0 use threadIndex==0. Be
+ // careful that they don't concurrently run scanSections. When serial is
+ // true, fn() has finished at this point, so running execute is safe.
+ tg.execute([] {
+ RelocationScanner scanner;
+ for (Partition &part : partitions) {
+ for (EhInputSection *sec : part.ehFrame->sections)
+ scanner.template scanSection<ELFT>(*sec);
+ if (part.armExidx && part.armExidx->isLive())
+ for (InputSection *sec : part.armExidx->exidxSections)
+ scanner.template scanSection<ELFT>(*sec);
+ }
+ });
}
static bool handleNonPreemptibleIfunc(Symbol &sym, uint16_t flags) {
@@ -1624,7 +1658,7 @@ static bool handleNonPreemptibleIfunc(Symbol &sym, uint16_t flags) {
void elf::postScanRelocations() {
auto fn = [](Symbol &sym) {
- auto flags = sym.flags;
+ auto flags = sym.flags.load(std::memory_order_relaxed);
if (handleNonPreemptibleIfunc(sym, flags))
return;
if (!sym.needsDynReloc())
@@ -1705,7 +1739,8 @@ void elf::postScanRelocations() {
addTpOffsetGotEntry(sym);
};
- if (config->needsTlsLd && in.got->addTlsIndex()) {
+ if (ctx->needsTlsLd.load(std::memory_order_relaxed) &&
+ in.got->addTlsIndex()) {
static Undefined dummy(nullptr, "", STB_LOCAL, 0, 0);
if (config->shared)
mainPart->relaDyn->addReloc(
diff --git a/lld/ELF/Symbols.h b/lld/ELF/Symbols.h
index 1c0a5f58d1cc9..8bca6c8b657f5 100644
--- a/lld/ELF/Symbols.h
+++ b/lld/ELF/Symbols.h
@@ -81,6 +81,10 @@ class Symbol {
// The file from which this symbol was created.
InputFile *file;
+ // The default copy constructor is deleted due to atomic flags. Define one for
+ // places where no atomic is needed.
+ Symbol(const Symbol &o) { memcpy(this, &o, sizeof(o)); }
+
protected:
const char *nameData;
// 32-bit size saves space.
@@ -295,7 +299,7 @@ class Symbol {
// Temporary flags used to communicate which symbol entries need PLT and GOT
// entries during postScanRelocations();
- uint16_t flags = 0;
+ std::atomic<uint16_t> flags = 0;
// A symAux index used to access GOT/PLT entry indexes. This is allocated in
// postScanRelocations().
@@ -309,15 +313,15 @@ class Symbol {
uint16_t versionId;
void setFlags(uint16_t bits) {
- flags |= bits;
+ flags.fetch_or(bits, std::memory_order_relaxed);
}
bool hasFlag(uint16_t bit) const {
assert(bit && (bit & (bit - 1)) == 0 && "bit must be a power of 2");
- return flags & bit;
+ return flags.load(std::memory_order_relaxed) & bit;
}
bool needsDynReloc() const {
- return flags &
+ return flags.load(std::memory_order_relaxed) &
(NEEDS_COPY | NEEDS_GOT | NEEDS_PLT | NEEDS_TLSDESC | NEEDS_TLSGD |
NEEDS_TLSGD_TO_IE | NEEDS_GOT_DTPREL | NEEDS_TLSIE);
}
diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp
index afd4038da1a67..291c925cfb044 100644
--- a/lld/ELF/SyntheticSections.cpp
+++ b/lld/ELF/SyntheticSections.cpp
@@ -1572,10 +1572,11 @@ uint32_t DynamicReloc::getSymIndex(SymbolTableBaseSection *symTab) const {
RelocationBaseSection::RelocationBaseSection(StringRef name, uint32_t type,
int32_t dynamicTag,
int32_t sizeDynamicTag,
- bool combreloc)
+ bool combreloc,
+ unsigned concurrency)
: SyntheticSection(SHF_ALLOC, type, config->wordsize, name),
dynamicTag(dynamicTag), sizeDynamicTag(sizeDynamicTag),
- combreloc(combreloc) {}
+ relocsVec(concurrency), combreloc(combreloc) {}
void RelocationBaseSection::addSymbolReloc(RelType dynType,
InputSectionBase &isec,
@@ -1586,19 +1587,6 @@ void RelocationBaseSection::addSymbolReloc(RelType dynType,
R_ADDEND, addendRelType ? *addendRelType : target->noneRel);
}
-void RelocationBaseSection::addRelativeReloc(
- RelType dynType, InputSectionBase &inputSec, uint64_t offsetInSec,
- Symbol &sym, int64_t addend, RelType addendRelType, RelExpr expr) {
- // This function should only be called for non-preemptible symbols or
- // RelExpr values that refer to an address inside the output file (e.g. the
- // address of the GOT entry for a potentially preemptible symbol).
- assert((!sym.isPreemptible || expr == R_GOT) &&
- "cannot add relative relocation against preemptible symbol");
- assert(expr != R_ADDEND && "expected non-addend relocation expression");
- addReloc(DynamicReloc::AddendOnlyWithTargetVA, dynType, inputSec, offsetInSec,
- sym, addend, expr, addendRelType);
-}
-
void RelocationBaseSection::addAddendOnlyRelocIfNonPreemptible(
RelType dynType, InputSectionBase &isec, uint64_t offsetInSec, Symbol &sym,
RelType addendRelType) {
@@ -1611,17 +1599,14 @@ void RelocationBaseSection::addAddendOnlyRelocIfNonPreemptible(
sym, 0, R_ABS, addendRelType);
}
-void RelocationBaseSection::addReloc(DynamicReloc::Kind kind, RelType dynType,
- InputSectionBase &inputSec,
- uint64_t offsetInSec, Symbol &sym,
- int64_t addend, RelExpr expr,
- RelType addendRelType) {
- // Write the addends to the relocated address if required. We skip
- // it if the written value would be zero.
- if (config->writeAddends && (expr != R_ADDEND || addend != 0))
- inputSec.relocations.push_back(
- {expr, addendRelType, offsetInSec, addend, &sym});
- addReloc({dynType, &inputSec, offsetInSec, kind, sym, addend, expr});
+void RelocationBaseSection::mergeRels() {
+ size_t newSize = relocs.size();
+ for (const auto &v : relocsVec)
+ newSize += v.size();
+ relocs.reserve(newSize);
+ for (const auto &v : relocsVec)
+ llvm::append_range(relocs, v);
+ relocsVec.clear();
}
void RelocationBaseSection::partitionRels() {
@@ -1680,10 +1665,12 @@ void RelocationBaseSection::computeRels() {
}
template <class ELFT>
-RelocationSection<ELFT>::RelocationSection(StringRef name, bool combreloc)
+RelocationSection<ELFT>::RelocationSection(StringRef name, bool combreloc,
+ unsigned concurrency)
: RelocationBaseSection(name, config->isRela ? SHT_RELA : SHT_REL,
config->isRela ? DT_RELA : DT_REL,
- config->isRela ? DT_RELASZ : DT_RELSZ, combreloc) {
+ config->isRela ? DT_RELASZ : DT_RELSZ, combreloc,
+ concurrency) {
this->entsize = config->isRela ? sizeof(Elf_Rela) : sizeof(Elf_Rel);
}
@@ -1699,19 +1686,30 @@ template <class ELFT> void RelocationSection<ELFT>::writeTo(uint8_t *buf) {
}
}
-RelrBaseSection::RelrBaseSection()
+RelrBaseSection::RelrBaseSection(unsigned concurrency)
: SyntheticSection(SHF_ALLOC,
config->useAndroidRelrTags ? SHT_ANDROID_RELR : SHT_RELR,
- config->wordsize, ".relr.dyn") {}
+ config->wordsize, ".relr.dyn"),
+ relocsVec(concurrency) {}
+
+void RelrBaseSection::mergeRels() {
+ size_t newSize = relocs.size();
+ for (const auto &v : relocsVec)
+ newSize += v.size();
+ relocs.reserve(newSize);
+ for (const auto &v : relocsVec)
+ llvm::append_range(relocs, v);
+ relocsVec.clear();
+}
template <class ELFT>
AndroidPackedRelocationSection<ELFT>::AndroidPackedRelocationSection(
- StringRef name)
+ StringRef name, unsigned concurrency)
: RelocationBaseSection(
name, config->isRela ? SHT_ANDROID_RELA : SHT_ANDROID_REL,
config->isRela ? DT_ANDROID_RELA : DT_ANDROID_REL,
config->isRela ? DT_ANDROID_RELASZ : DT_ANDROID_RELSZ,
- /*combreloc=*/false) {
+ /*combreloc=*/false, concurrency) {
this->entsize = 1;
}
@@ -1959,7 +1957,9 @@ bool AndroidPackedRelocationSection<ELFT>::updateAllocSize() {
return relocData.size() != oldSize;
}
-template <class ELFT> RelrSection<ELFT>::RelrSection() {
+template <class ELFT>
+RelrSection<ELFT>::RelrSection(unsigned concurrency)
+ : RelrBaseSection(concurrency) {
this->entsize = config->wordsize;
}
diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h
index 5f7321a803d70..fd2296a4cfca5 100644
--- a/lld/ELF/SyntheticSections.h
+++ b/lld/ELF/SyntheticSections.h
@@ -26,6 +26,7 @@
#include "llvm/ADT/MapVector.h"
#include "llvm/MC/StringTableBuilder.h"
#include "llvm/Support/Endian.h"
+#include "llvm/Support/Parallel.h"
#include "llvm/Support/Threading.h"
namespace lld::elf {
@@ -115,7 +116,7 @@ class GotSection : public SyntheticSection {
// Flag to force GOT to be in output if we have relocations
// that relies on its address.
- bool hasGotOffRel = false;
+ std::atomic<bool> hasGotOffRel = false;
protected:
size_t numEntries = 0;
@@ -357,7 +358,7 @@ class GotPltSection final : public SyntheticSection {
// Flag to force GotPlt to be in output if we have relocations
// that relies on its address.
- bool hasGotPltOffRel = false;
+ std::atomic<bool> hasGotPltOffRel = false;
private:
SmallVector<const Symbol *, 0> entries;
@@ -486,32 +487,55 @@ template <class ELFT> class DynamicSection final : public SyntheticSection {
class RelocationBaseSection : public SyntheticSection {
public:
RelocationBaseSection(StringRef name, uint32_t type, int32_t dynamicTag,
- int32_t sizeDynamicTag, bool combreloc);
+ int32_t sizeDynamicTag, bool combreloc,
+ unsigned concurrency);
/// Add a dynamic relocation without writing an addend to the output section.
/// This overload can be used if the addends are written directly instead of
/// using relocations on the input section (e.g. MipsGotSection::writeTo()).
- void addReloc(const DynamicReloc &reloc) { relocs.push_back(reloc); }
+ template <bool shard = false> void addReloc(const DynamicReloc &reloc) {
+ relocs.push_back(reloc);
+ }
/// Add a dynamic relocation against \p sym with an optional addend.
void addSymbolReloc(RelType dynType, InputSectionBase &isec,
uint64_t offsetInSec, Symbol &sym, int64_t addend = 0,
llvm::Optional<RelType> addendRelType = llvm::None);
/// Add a relative dynamic relocation that uses the target address of \p sym
/// (i.e. InputSection::getRelocTargetVA()) + \p addend as the addend.
+ /// This function should only be called for non-preemptible symbols or
+ /// RelExpr values that refer to an address inside the output file (e.g. the
+ /// address of the GOT entry for a potentially preemptible symbol).
+ template <bool shard = false>
void addRelativeReloc(RelType dynType, InputSectionBase &isec,
uint64_t offsetInSec, Symbol &sym, int64_t addend,
- RelType addendRelType, RelExpr expr);
+ RelType addendRelType, RelExpr expr) {
+ assert(expr != R_ADDEND && "expected non-addend relocation expression");
+ addReloc<shard>(DynamicReloc::AddendOnlyWithTargetVA, dynType, isec,
+ offsetInSec, sym, addend, expr, addendRelType);
+ }
/// Add a dynamic relocation using the target address of \p sym as the addend
/// if \p sym is non-preemptible. Otherwise add a relocation against \p sym.
void addAddendOnlyRelocIfNonPreemptible(RelType dynType,
InputSectionBase &isec,
uint64_t offsetInSec, Symbol &sym,
RelType addendRelType);
- void addReloc(DynamicReloc::Kind kind, RelType dynType,
- InputSectionBase &inputSec, uint64_t offsetInSec, Symbol &sym,
- int64_t addend, RelExpr expr, RelType addendRelType);
- bool isNeeded() const override { return !relocs.empty(); }
+ template <bool shard = false>
+ void addReloc(DynamicReloc::Kind kind, RelType dynType, InputSectionBase &sec,
+ uint64_t offsetInSec, Symbol &sym, int64_t addend, RelExpr expr,
+ RelType addendRelType) {
+ // Write the addends to the relocated address if required. We skip
+ // it if the written value would be zero.
+ if (config->writeAddends && (expr != R_ADDEND || addend != 0))
+ sec.relocations.push_back(
+ {expr, addendRelType, offsetInSec, addend, &sym});
+ addReloc<shard>({dynType, &sec, offsetInSec, kind, sym, addend, expr});
+ }
+ bool isNeeded() const override {
+ return !relocs.empty() ||
+ llvm::any_of(relocsVec, [](auto &v) { return !v.empty(); });
+ }
size_t getSize() const override { return relocs.size() * this->entsize; }
size_t getRelativeRelocCount() const { return numRelativeRelocs; }
+ void mergeRels();
void partitionRels();
void finalizeContents() override;
static bool classof(const SectionBase *d) {
@@ -524,17 +548,25 @@ class RelocationBaseSection : public SyntheticSection {
protected:
void computeRels();
+ // Used when parallel relocation scanning adds relocations. The elements
+ // will be moved into relocs by mergeRel().
+ SmallVector<SmallVector<DynamicReloc, 0>, 0> relocsVec;
size_t numRelativeRelocs = 0; // used by -z combreloc
bool combreloc;
};
+template <>
+inline void RelocationBaseSection::addReloc<true>(const DynamicReloc &reloc) {
+ relocsVec[llvm::parallel::threadIndex].push_back(reloc);
+}
+
template <class ELFT>
class RelocationSection final : public RelocationBaseSection {
using Elf_Rel = typename ELFT::Rel;
using Elf_Rela = typename ELFT::Rela;
public:
- RelocationSection(StringRef name, bool combreloc);
+ RelocationSection(StringRef name, bool combreloc, unsigned concurrency);
void writeTo(uint8_t *buf) override;
};
@@ -544,7 +576,7 @@ class AndroidPackedRelocationSection final : public RelocationBaseSection {
using Elf_Rela = typename ELFT::Rela;
public:
- AndroidPackedRelocationSection(StringRef name);
+ AndroidPackedRelocationSection(StringRef name, unsigned concurrency);
bool updateAllocSize() override;
size_t getSize() const override { return relocData.size(); }
@@ -565,9 +597,14 @@ struct RelativeReloc {
class RelrBaseSection : public SyntheticSection {
public:
- RelrBaseSection();
- bool isNeeded() const override { return !relocs.empty(); }
+ RelrBaseSection(unsigned concurrency);
+ void mergeRels();
+ bool isNeeded() const override {
+ return !relocs.empty() ||
+ llvm::any_of(relocsVec, [](auto &v) { return !v.empty(); });
+ }
SmallVector<RelativeReloc, 0> relocs;
+ SmallVector<SmallVector<RelativeReloc, 0>, 0> relocsVec;
};
// RelrSection is used to encode offsets for relative relocations.
@@ -578,7 +615,7 @@ template <class ELFT> class RelrSection final : public RelrBaseSection {
using Elf_Relr = typename ELFT::Relr;
public:
- RelrSection();
+ RelrSection(unsigned concurrency);
bool updateAllocSize() override;
size_t getSize() const override { return relrRelocs.size() * this->entsize; }
diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index de6c19863dafc..9124961a5089f 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -317,6 +317,7 @@ template <class ELFT> void elf::createSyntheticSections() {
StringRef relaDynName = config->isRela ? ".rela.dyn" : ".rel.dyn";
+ const unsigned threadCount = parallel::strategy.compute_thread_count();
for (Partition &part : partitions) {
auto add = [&](SyntheticSection &sec) {
sec.partition = part.getNumber();
@@ -350,11 +351,11 @@ template <class ELFT> void elf::createSyntheticSections() {
}
if (config->androidPackDynRelocs)
- part.relaDyn =
- std::make_unique<AndroidPackedRelocationSection<ELFT>>(relaDynName);
+ part.relaDyn = std::make_unique<AndroidPackedRelocationSection<ELFT>>(
+ relaDynName, threadCount);
else
part.relaDyn = std::make_unique<RelocationSection<ELFT>>(
- relaDynName, config->zCombreloc);
+ relaDynName, config->zCombreloc, threadCount);
if (config->hasDynSymTab) {
add(*part.dynSymTab);
@@ -386,7 +387,7 @@ template <class ELFT> void elf::createSyntheticSections() {
}
if (config->relrPackDynRelocs) {
- part.relrDyn = std::make_unique<RelrSection<ELFT>>();
+ part.relrDyn = std::make_unique<RelrSection<ELFT>>(threadCount);
add(*part.relrDyn);
}
@@ -468,7 +469,8 @@ template <class ELFT> void elf::createSyntheticSections() {
// We always need to add rel[a].plt to output if it has entries.
// Even for static linking it can contain R_[*]_IRELATIVE relocations.
in.relaPlt = std::make_unique<RelocationSection<ELFT>>(
- config->isRela ? ".rela.plt" : ".rel.plt", /*sort=*/false);
+ config->isRela ? ".rela.plt" : ".rel.plt", /*sort=*/false,
+ /*threadCount=*/1);
add(*in.relaPlt);
// The relaIplt immediately follows .rel[a].dyn to ensure that the IRelative
@@ -479,7 +481,7 @@ template <class ELFT> void elf::createSyntheticSections() {
// behaviour by placing the iplt section in .rel.plt.
in.relaIplt = std::make_unique<RelocationSection<ELFT>>(
config->androidPackDynRelocs ? in.relaPlt->name : relaDynName,
- /*sort=*/false);
+ /*sort=*/false, /*threadCount=*/1);
add(*in.relaIplt);
if ((config->emachine == EM_386 || config->emachine == EM_X86_64) &&
@@ -2074,16 +2076,20 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() {
// symbol table section (dynSymTab) must be the first one.
for (Partition &part : partitions) {
if (part.relaDyn) {
+ part.relaDyn->mergeRels();
// Compute DT_RELACOUNT to be used by part.dynamic.
part.relaDyn->partitionRels();
finalizeSynthetic(part.relaDyn.get());
}
+ if (part.relrDyn) {
+ part.relrDyn->mergeRels();
+ finalizeSynthetic(part.relrDyn.get());
+ }
finalizeSynthetic(part.dynSymTab.get());
finalizeSynthetic(part.gnuHashTab.get());
finalizeSynthetic(part.hashTab.get());
finalizeSynthetic(part.verDef.get());
- finalizeSynthetic(part.relrDyn.get());
finalizeSynthetic(part.ehFrameHdr.get());
finalizeSynthetic(part.verSym.get());
finalizeSynthetic(part.verNeed.get());
diff --git a/lld/test/ELF/combreloc.s b/lld/test/ELF/combreloc.s
index 17edb93cb5069..59dedceb633fc 100644
--- a/lld/test/ELF/combreloc.s
+++ b/lld/test/ELF/combreloc.s
@@ -35,8 +35,8 @@
# NOCOMB-NEXT: 0x3400 R_X86_64_64 ccc 0x0
# NOCOMB-NEXT: 0x3408 R_X86_64_64 bbb 0x0
# NOCOMB-NEXT: 0x3410 R_X86_64_64 aaa 0x0
-# NOCOMB-NEXT: 0x3418 R_X86_64_RELATIVE - 0x3420
# NOCOMB-NEXT: 0x23F0 R_X86_64_GLOB_DAT aaa 0x0
+# NOCOMB-NEXT: 0x3418 R_X86_64_RELATIVE - 0x3420
# NOCOMB-NEXT: }
.globl aaa, bbb, ccc
diff --git a/lld/test/ELF/comdat-discarded-error.s b/lld/test/ELF/comdat-discarded-error.s
index dec927d32f39d..f7ff635a0812d 100644
--- a/lld/test/ELF/comdat-discarded-error.s
+++ b/lld/test/ELF/comdat-discarded-error.s
@@ -5,7 +5,7 @@
# RUN: echo '.weak foo; foo: .section .text.foo,"axG", at progbits,foo,comdat; .globl bar; bar:' |\
# RUN: llvm-mc -filetype=obj -triple=x86_64 - -o %t3.o
-# RUN: not ld.lld %t2.o %t3.o %t1.o -o /dev/null 2>&1 | FileCheck %s
+# RUN: not ld.lld --threads=1 %t2.o %t3.o %t1.o -o /dev/null 2>&1 | FileCheck %s
# CHECK: error: relocation refers to a symbol in a discarded section: bar
# CHECK-NEXT: >>> defined in {{.*}}3.o
diff --git a/lld/test/ELF/undef-multi.s b/lld/test/ELF/undef-multi.s
index bc1b0623fa580..af21693d08913 100644
--- a/lld/test/ELF/undef-multi.s
+++ b/lld/test/ELF/undef-multi.s
@@ -1,7 +1,7 @@
# REQUIRES: x86
# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %p/Inputs/undef.s -o %t2.o
-# RUN: not ld.lld %t.o %t2.o -o /dev/null 2>&1 | FileCheck %s
+# RUN: not ld.lld --threads=1 %t.o %t2.o -o /dev/null 2>&1 | FileCheck %s
# CHECK: error: undefined symbol: zed2
# CHECK-NEXT: >>> referenced by undef-multi.s
@@ -24,7 +24,7 @@
# RUN: echo " call zed2" >> %t.moreref.s
# RUN: echo " call zed2" >> %t.moreref.s
# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %t.moreref.s -o %t3.o
-# RUN: not ld.lld %t.o %t2.o %t3.o -o /dev/null -error-limit=2 2>&1 | \
+# RUN: not ld.lld --threads=1 %t.o %t2.o %t3.o -o /dev/null -error-limit=2 2>&1 | \
# RUN: FileCheck --check-prefix=LIMIT %s
# LIMIT: error: undefined symbol: zed2
diff --git a/lld/test/ELF/undef.s b/lld/test/ELF/undef.s
index 6398b73d51e62..2b42ae12be2c1 100644
--- a/lld/test/ELF/undef.s
+++ b/lld/test/ELF/undef.s
@@ -5,9 +5,9 @@
# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %p/Inputs/undef-bad-debug.s -o %t4.o
# RUN: rm -f %t2.a
# RUN: llvm-ar rc %t2.a %t2.o
-# RUN: not ld.lld %t.o %t2.a %t3.o %t4.o -o /dev/null 2>&1 \
+# RUN: not ld.lld --threads=1 %t.o %t2.a %t3.o %t4.o -o /dev/null 2>&1 \
# RUN: | FileCheck %s --implicit-check-not="error:" --implicit-check-not="warning:"
-# RUN: not ld.lld -pie %t.o %t2.a %t3.o %t4.o -o /dev/null 2>&1 \
+# RUN: not ld.lld --threads=1 -pie %t.o %t2.a %t3.o %t4.o -o /dev/null 2>&1 \
# RUN: | FileCheck %s --implicit-check-not="error:" --implicit-check-not="warning:"
# CHECK: error: undefined symbol: foo
diff --git a/llvm/include/llvm/Support/Parallel.h b/llvm/include/llvm/Support/Parallel.h
index 6569479674071..918edc07d96ae 100644
--- a/llvm/include/llvm/Support/Parallel.h
+++ b/llvm/include/llvm/Support/Parallel.h
@@ -28,6 +28,7 @@ namespace parallel {
// this file. It defaults to using all hardware threads and should be
// initialized before the first use of parallel routines.
extern ThreadPoolStrategy strategy;
+extern thread_local unsigned threadIndex;
namespace detail {
class Latch {
diff --git a/llvm/lib/Support/Parallel.cpp b/llvm/lib/Support/Parallel.cpp
index 9f13726e36913..71c41c24817fe 100644
--- a/llvm/lib/Support/Parallel.cpp
+++ b/llvm/lib/Support/Parallel.cpp
@@ -18,6 +18,7 @@
#include <vector>
llvm::ThreadPoolStrategy llvm::parallel::strategy;
+thread_local unsigned llvm::parallel::threadIndex;
namespace llvm {
namespace parallel {
@@ -95,6 +96,7 @@ class ThreadPoolExecutor : public Executor {
private:
void work(ThreadPoolStrategy S, unsigned ThreadID) {
+ threadIndex = ThreadID;
S.apply_thread_strategy(ThreadID);
while (true) {
std::unique_lock<std::mutex> Lock(Mutex);
More information about the llvm-commits
mailing list