[lld] a815424 - Reland D119909 [ELF] Parallelize initializeLocalSymbols

Fangrui Song via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 4 19:00:14 PST 2022


Author: Fangrui Song
Date: 2022-03-04T19:00:10-08:00
New Revision: a815424cc5df25d3e2dbdbc5fb3bf3f4dc927dff

URL: https://github.com/llvm/llvm-project/commit/a815424cc5df25d3e2dbdbc5fb3bf3f4dc927dff
DIFF: https://github.com/llvm/llvm-project/commit/a815424cc5df25d3e2dbdbc5fb3bf3f4dc927dff.diff

LOG: Reland D119909 [ELF] Parallelize initializeLocalSymbols

ObjFile::parse combines symbol initialization and resolution. Many tasks
unrelated to symbol resolution can be postponed and parallelized. This patch
extracts local symbol initialization and parallelizes it.

Technically the new function initializeLocalSymbols can be merged into
ObjFile::postParse, but functions like getSrcMsg may access the
uninitialized (all nullptr) local part of InputFile::symbols.

Linking chrome: 1.02x as fast with glibc malloc, 1.04x as fast with mimalloc

Depends on f456c3ae3f4182b23673929e8fe0aa18bcec4289 and D119908

Reviewed By: ikudrin

Differential Revision: https://reviews.llvm.org/D119909

Added: 
    

Modified: 
    lld/ELF/Driver.cpp
    lld/ELF/InputFiles.cpp
    lld/ELF/InputFiles.h
    lld/ELF/InputSection.cpp

Removed: 
    


################################################################################
diff  --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 3c68686e50d09..ed9566f7fedee 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -2316,6 +2316,25 @@ static uint32_t getAndFeatures() {
   return ret;
 }
 
+static void initializeLocalSymbols(ELFFileBase *file) {
+  switch (config->ekind) {
+  case ELF32LEKind:
+    cast<ObjFile<ELF32LE>>(file)->initializeLocalSymbols();
+    break;
+  case ELF32BEKind:
+    cast<ObjFile<ELF32BE>>(file)->initializeLocalSymbols();
+    break;
+  case ELF64LEKind:
+    cast<ObjFile<ELF64LE>>(file)->initializeLocalSymbols();
+    break;
+  case ELF64BEKind:
+    cast<ObjFile<ELF64BE>>(file)->initializeLocalSymbols();
+    break;
+  default:
+    llvm_unreachable("");
+  }
+}
+
 static void postParseObjectFile(ELFFileBase *file) {
   switch (config->ekind) {
   case ELF32LEKind:
@@ -2457,6 +2476,7 @@ void LinkerDriver::link(opt::InputArgList &args) {
 
   // No more lazy bitcode can be extracted at this point. Do post parse work
   // like checking duplicate symbols.
+  parallelForEach(objectFiles, initializeLocalSymbols);
   parallelForEach(objectFiles, postParseObjectFile);
   parallelForEach(bitcodeFiles, [](BitcodeFile *file) { file->postParse(); });
 
@@ -2529,6 +2549,7 @@ void LinkerDriver::link(opt::InputArgList &args) {
   // compileBitcodeFiles may have produced lto.tmp object files. After this, no
   // more file will be added.
   auto newObjectFiles = makeArrayRef(objectFiles).slice(numObjsBeforeLTO);
+  parallelForEach(newObjectFiles, initializeLocalSymbols);
   parallelForEach(newObjectFiles, postParseObjectFile);
 
   // Handle --exclude-libs again because lto.tmp may reference additional

diff  --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index 2423be8e3cbf0..0df7552dd658e 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -1021,41 +1021,6 @@ void ObjFile<ELFT>::initializeSymbols(const object::ELFFile<ELFT> &obj) {
 
   ArrayRef<Elf_Sym> eSyms = this->getELFSyms<ELFT>();
   symbols.resize(eSyms.size());
-  SymbolUnion *locals =
-      firstGlobal == 0
-          ? nullptr
-          : getSpecificAllocSingleton<SymbolUnion>().Allocate(firstGlobal);
-
-  for (size_t i = 0, end = firstGlobal; i != end; ++i) {
-    const Elf_Sym &eSym = eSyms[i];
-    uint32_t secIdx = eSym.st_shndx;
-    if (LLVM_UNLIKELY(secIdx == SHN_XINDEX))
-      secIdx = check(getExtendedSymbolTableIndex<ELFT>(eSym, i, shndxTable));
-    else if (secIdx >= SHN_LORESERVE)
-      secIdx = 0;
-    if (LLVM_UNLIKELY(secIdx >= sections.size()))
-      fatal(toString(this) + ": invalid section index: " + Twine(secIdx));
-    if (LLVM_UNLIKELY(eSym.getBinding() != STB_LOCAL))
-      error(toString(this) + ": non-local symbol (" + Twine(i) +
-            ") found at index < .symtab's sh_info (" + Twine(end) + ")");
-
-    InputSectionBase *sec = sections[secIdx];
-    uint8_t type = eSym.getType();
-    if (type == STT_FILE)
-      sourceFile = CHECK(eSym.getName(stringTable), this);
-    if (LLVM_UNLIKELY(stringTable.size() <= eSym.st_name))
-      fatal(toString(this) + ": invalid symbol name offset");
-    StringRef name(stringTable.data() + eSym.st_name);
-
-    symbols[i] = reinterpret_cast<Symbol *>(locals + i);
-    if (eSym.st_shndx == SHN_UNDEF || sec == &InputSection::discarded)
-      new (symbols[i]) Undefined(this, name, STB_LOCAL, eSym.st_other, type,
-                                 /*discardedSecIdx=*/secIdx);
-    else
-      new (symbols[i]) Defined(this, name, STB_LOCAL, eSym.st_other, type,
-                               eSym.st_value, eSym.st_size, sec);
-    symbols[i]->isUsedInRegularObj = true;
-  }
 
   // Some entries have been filled by LazyObjFile.
   for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i)
@@ -1149,6 +1114,45 @@ void ObjFile<ELFT>::initializeSymbols(const object::ELFFile<ELFT> &obj) {
   }
 }
 
+template <class ELFT> void ObjFile<ELFT>::initializeLocalSymbols() {
+  if (!firstGlobal)
+    return;
+  localSymStorage = std::make_unique<SymbolUnion[]>(firstGlobal);
+  SymbolUnion *locals = localSymStorage.get();
+
+  ArrayRef<Elf_Sym> eSyms = this->getELFSyms<ELFT>();
+  for (size_t i = 0, end = firstGlobal; i != end; ++i) {
+    const Elf_Sym &eSym = eSyms[i];
+    uint32_t secIdx = eSym.st_shndx;
+    if (LLVM_UNLIKELY(secIdx == SHN_XINDEX))
+      secIdx = check(getExtendedSymbolTableIndex<ELFT>(eSym, i, shndxTable));
+    else if (secIdx >= SHN_LORESERVE)
+      secIdx = 0;
+    if (LLVM_UNLIKELY(secIdx >= sections.size()))
+      fatal(toString(this) + ": invalid section index: " + Twine(secIdx));
+    if (LLVM_UNLIKELY(eSym.getBinding() != STB_LOCAL))
+      error(toString(this) + ": non-local symbol (" + Twine(i) +
+            ") found at index < .symtab's sh_info (" + Twine(end) + ")");
+
+    InputSectionBase *sec = sections[secIdx];
+    uint8_t type = eSym.getType();
+    if (type == STT_FILE)
+      sourceFile = CHECK(eSym.getName(stringTable), this);
+    if (LLVM_UNLIKELY(stringTable.size() <= eSym.st_name))
+      fatal(toString(this) + ": invalid symbol name offset");
+    StringRef name(stringTable.data() + eSym.st_name);
+
+    symbols[i] = reinterpret_cast<Symbol *>(locals + i);
+    if (eSym.st_shndx == SHN_UNDEF || sec == &InputSection::discarded)
+      new (symbols[i]) Undefined(this, name, STB_LOCAL, eSym.st_other, type,
+                                 /*discardedSecIdx=*/secIdx);
+    else
+      new (symbols[i]) Defined(this, name, STB_LOCAL, eSym.st_other, type,
+                               eSym.st_value, eSym.st_size, sec);
+    symbols[i]->isUsedInRegularObj = true;
+  }
+}
+
 // Called after all ObjFile::parse is called for all ObjFiles. This checks
 // duplicate symbols and may do symbol property merge in the future.
 template <class ELFT> void ObjFile<ELFT>::postParse() {

diff  --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h
index e2aba8e6250cf..68129d0d298d1 100644
--- a/lld/ELF/InputFiles.h
+++ b/lld/ELF/InputFiles.h
@@ -10,6 +10,7 @@
 #define LLD_ELF_INPUT_FILES_H
 
 #include "Config.h"
+#include "Symbols.h"
 #include "lld/Common/ErrorHandler.h"
 #include "lld/Common/LLVM.h"
 #include "lld/Common/Reproduce.h"
@@ -273,6 +274,7 @@ template <class ELFT> class ObjFile : public ELFFileBase {
   // Get cached DWARF information.
   DWARFCache *getDwarf();
 
+  void initializeLocalSymbols();
   void postParse();
 
 private:
@@ -302,6 +304,9 @@ template <class ELFT> class ObjFile : public ELFFileBase {
   // If the section does not exist (which is common), the array is empty.
   ArrayRef<Elf_Word> shndxTable;
 
+  // Storage for local symbols.
+  std::unique_ptr<SymbolUnion[]> localSymStorage;
+
   // Debugging information to retrieve source file and line for error
   // reporting. Linker may find reasonable number of errors in a
   // single object file, so we cache debugging information in order to

diff  --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index 5360131c84f4b..497d97af2f42b 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -297,9 +297,10 @@ std::string InputSectionBase::getObjMsg(uint64_t off) {
   if (!file->archiveName.empty())
     archive = (" in archive " + file->archiveName).str();
 
-  // Find a symbol that encloses a given location.
+  // Find a symbol that encloses a given location. getObjMsg may be called
+  // before ObjFile::initializeLocalSymbols where local symbols are initialized.
   for (Symbol *b : file->getSymbols())
-    if (auto *d = dyn_cast<Defined>(b))
+    if (auto *d = dyn_cast_or_null<Defined>(b))
       if (d->section == this && d->value <= off && off < d->value + d->size)
         return filename + ":(" + toString(*d) + ")" + archive;
 


        


More information about the llvm-commits mailing list