[lld] r361473 - Speed up --start-lib and --end-lib.

Rui Ueyama via llvm-commits llvm-commits at lists.llvm.org
Thu May 23 02:53:30 PDT 2019


Author: ruiu
Date: Thu May 23 02:53:30 2019
New Revision: 361473

URL: http://llvm.org/viewvc/llvm-project?rev=361473&view=rev
Log:
Speed up --start-lib and --end-lib.

--{start,end}-lib give files grouped by the options the archive file
semantics. That is, each object file between them acts as if it were
in an archive file whose sole member is the file.

Therefore, files between --{start,end}-lib are linked to the final
output only if they are needed to resolve some undefined symbols.

Previously, the feature was implemented this way:

 1. We read a symbol table and insert defined symbols to the symbol
    table as lazy symbols.

 2. If an undefind symbol is resolved to a lazy symbol, that lazy
    symbol instantiate ObjFile class for that symbol, which re-insert
    all defined symbols to the symbol table.

So, if an ObjFile is instantiated, defined symbols are inserted to the
symbol table twice. Since inserting long symbol names is not cheap,
there's a room to optimize here.

This patch optimzies it. Now, LazyObjFile remembers symbol handles and
passed them over to a new ObjFile instance, so that the ObjFile
doesn't insert the same strings.

Here is a quick benchmark to link clang. "Original" is the original
lld with unmodified command line options. For "Case 1" and "Case 2", I
extracted all files from archive files and replace .a's in a command
line with .o's wrapped with --{start,end}-lib. I used the original lld
for Case 1" and use this patch for Case 2.

  Original: 5.892
    Case 1: 6.001 (+1.8%)
    Case 2: 5.701 (-3.2%)

So, interestingly, --{start,end}-lib are now faster than the regular
linking scheme with archive files. That's perhaps not too surprising,
though, because for regular archive files, we look up the symbol table
with the same string twice.

Differential Revision: https://reviews.llvm.org/D62188

Modified:
    lld/trunk/ELF/InputFiles.cpp
    lld/trunk/ELF/InputFiles.h
    lld/trunk/ELF/SymbolTable.cpp

Modified: lld/trunk/ELF/InputFiles.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/InputFiles.cpp?rev=361473&r1=361472&r2=361473&view=diff
==============================================================================
--- lld/trunk/ELF/InputFiles.cpp (original)
+++ lld/trunk/ELF/InputFiles.cpp Thu May 23 02:53:30 2019
@@ -913,62 +913,91 @@ StringRef ObjFile<ELFT>::getSectionName(
   return CHECK(getObj().getSectionName(&Sec, SectionStringTable), this);
 }
 
+// Initialize this->Symbols. this->Symbols is a parallel array as
+// its corresponding ELF symbol table.
 template <class ELFT> void ObjFile<ELFT>::initializeSymbols() {
-  this->Symbols.reserve(this->getELFSyms<ELFT>().size());
-  for (const Elf_Sym &Sym : this->getELFSyms<ELFT>())
-    this->Symbols.push_back(createSymbol(&Sym));
-}
-
-template <class ELFT> Symbol *ObjFile<ELFT>::createSymbol(const Elf_Sym *Sym) {
-  uint32_t SecIdx = getSectionIndex(*Sym);
-  if (SecIdx >= this->Sections.size())
-    fatal(toString(this) + ": invalid section index: " + Twine(SecIdx));
-
-  InputSectionBase *Sec = this->Sections[SecIdx];
-  uint8_t Binding = Sym->getBinding();
-  uint8_t StOther = Sym->st_other;
-  uint8_t Type = Sym->getType();
-  uint64_t Value = Sym->st_value;
-  uint64_t Size = Sym->st_size;
+  ArrayRef<Elf_Sym> ESyms = this->getELFSyms<ELFT>();
+  this->Symbols.resize(ESyms.size());
 
-  if (Binding == STB_LOCAL) {
-    if (Sym->getType() == STT_FILE)
-      SourceFile = CHECK(Sym->getName(this->StringTable), this);
-
-    if (this->StringTable.size() <= Sym->st_name)
-      fatal(toString(this) + ": invalid symbol name offset");
-
-    StringRefZ Name = this->StringTable.data() + Sym->st_name;
-    if (Sym->st_shndx == SHN_UNDEF)
-      return make<Undefined>(this, Name, Binding, StOther, Type);
-    return make<Defined>(this, Name, Binding, StOther, Type, Value, Size, Sec);
-  }
+  // Our symbol table may have already been partially initialized
+  // because of LazyObjFile.
+  for (size_t I = 0, End = ESyms.size(); I != End; ++I)
+    if (!this->Symbols[I] && ESyms[I].getBinding() != STB_LOCAL)
+      this->Symbols[I] =
+          Symtab->insert(CHECK(ESyms[I].getName(this->StringTable), this));
+
+  // Fill this->Symbols. A symbol is either local or global.
+  for (size_t I = 0, End = ESyms.size(); I != End; ++I) {
+    const Elf_Sym &ESym = ESyms[I];
+
+    // Read symbol attributes.
+    uint32_t SecIdx = getSectionIndex(ESym);
+    if (SecIdx >= this->Sections.size())
+      fatal(toString(this) + ": invalid section index: " + Twine(SecIdx));
+
+    InputSectionBase *Sec = this->Sections[SecIdx];
+    uint8_t Binding = ESym.getBinding();
+    uint8_t StOther = ESym.st_other;
+    uint8_t Type = ESym.getType();
+    uint64_t Value = ESym.st_value;
+    uint64_t Size = ESym.st_size;
+    StringRefZ Name = this->StringTable.data() + ESym.st_name;
+
+    // Handle local symbols. Local symbols are not added to the symbol
+    // table because they are not visible from other object files. We
+    // allocate symbol instances and add their pointers to Symbols.
+    if (Binding == STB_LOCAL) {
+      if (ESym.getType() == STT_FILE)
+        SourceFile = CHECK(ESym.getName(this->StringTable), this);
+
+      if (this->StringTable.size() <= ESym.st_name)
+        fatal(toString(this) + ": invalid symbol name offset");
+
+      if (ESym.st_shndx == SHN_UNDEF)
+        this->Symbols[I] = make<Undefined>(this, Name, Binding, StOther, Type);
+      else
+        this->Symbols[I] =
+            make<Defined>(this, Name, Binding, StOther, Type, Value, Size, Sec);
+      continue;
+    }
 
-  StringRef Name = CHECK(Sym->getName(this->StringTable), this);
+    // Handle global undefined symbols.
+    if (ESym.st_shndx == SHN_UNDEF) {
+      resolveSymbol(this->Symbols[I],
+                    Undefined{this, Name, Binding, StOther, Type});
+      continue;
+    }
 
-  if (Sym->st_shndx == SHN_UNDEF)
-    return Symtab->addSymbol(Undefined{this, Name, Binding, StOther, Type});
+    // Handle global common symbols.
+    if (ESym.st_shndx == SHN_COMMON) {
+      if (Value == 0 || Value >= UINT32_MAX)
+        fatal(toString(this) + ": common symbol '" + StringRef(Name.Data) +
+              "' has invalid alignment: " + Twine(Value));
+      resolveSymbol(this->Symbols[I], CommonSymbol{this, Name, Binding, StOther,
+                                                   Type, Value, Size});
+      continue;
+    }
 
-  if (Sec == &InputSection::Discarded)
-    return Symtab->addSymbol(Undefined{this, Name, Binding, StOther, Type,
-                                       /*DiscardedSecIdx=*/SecIdx});
+    // If a defined symbol is in a discarded section, handle it as if it
+    // were an undefined symbol. Such symbol doesn't comply with the
+    // standard, but in practice, a .eh_frame often directly refer
+    // COMDAT member sections, and if a comdat group is discarded, some
+    // defined symbol in a .eh_frame becomes dangling symbols.
+    if (Sec == &InputSection::Discarded) {
+      resolveSymbol(this->Symbols[I],
+                    Undefined{this, Name, Binding, StOther, Type, SecIdx});
+      continue;
+    }
 
-  if (Sym->st_shndx == SHN_COMMON) {
-    if (Value == 0 || Value >= UINT32_MAX)
-      fatal(toString(this) + ": common symbol '" + Name +
-            "' has invalid alignment: " + Twine(Value));
-    return Symtab->addSymbol(
-        CommonSymbol{this, Name, Binding, StOther, Type, Value, Size});
-  }
+    // Handle global defined symbols.
+    if (Binding == STB_GLOBAL || Binding == STB_WEAK ||
+        Binding == STB_GNU_UNIQUE) {
+      resolveSymbol(this->Symbols[I], Defined{this, Name, Binding, StOther,
+                                              Type, Value, Size, Sec});
+      continue;
+    }
 
-  switch (Binding) {
-  default:
     fatal(toString(this) + ": unexpected binding: " + Twine((int)Binding));
-  case STB_GLOBAL:
-  case STB_WEAK:
-  case STB_GNU_UNIQUE:
-    return Symtab->addSymbol(
-        Defined{this, Name, Binding, StOther, Type, Value, Size, Sec});
   }
 }
 
@@ -1455,10 +1484,16 @@ InputFile *LazyObjFile::fetch() {
 
   InputFile *File = createObjectFile(MBRef, ArchiveName, OffsetInArchive);
   File->GroupId = GroupId;
+
+  // Copy symbol vector so that the new InputFile doesn't have to
+  // insert the same defined symbols to the symbol table again.
+  File->Symbols = std::move(Symbols);
   return File;
 }
 
 template <class ELFT> void LazyObjFile::parse() {
+  using Elf_Sym = typename ELFT::Sym;
+
   // A lazy object file wraps either a bitcode file or an ELF file.
   if (isBitcode(this->MB)) {
     std::unique_ptr<lto::InputFile> Obj =
@@ -1476,6 +1511,7 @@ template <class ELFT> void LazyObjFile::
     return;
   }
 
+  // Find a symbol table.
   ELFFile<ELFT> Obj = check(ELFFile<ELFT>::create(MB.getBuffer()));
   ArrayRef<typename ELFT::Shdr> Sections = CHECK(Obj.sections(), this);
 
@@ -1483,16 +1519,28 @@ template <class ELFT> void LazyObjFile::
     if (Sec.sh_type != SHT_SYMTAB)
       continue;
 
-    typename ELFT::SymRange Syms = CHECK(Obj.symbols(&Sec), this);
+    // A symbol table is found.
+    ArrayRef<Elf_Sym> ESyms = CHECK(Obj.symbols(&Sec), this);
     uint32_t FirstGlobal = Sec.sh_info;
-    StringRef StringTable =
-        CHECK(Obj.getStringTableForSymtab(Sec, Sections), this);
+    StringRef Strtab = CHECK(Obj.getStringTableForSymtab(Sec, Sections), this);
+    this->Symbols.resize(ESyms.size());
 
-    for (const typename ELFT::Sym &Sym : Syms.slice(FirstGlobal)) {
-      if (Sym.st_shndx == SHN_UNDEF)
+    // Get existing symbols or insert placeholder symbols.
+    for (size_t I = FirstGlobal, End = ESyms.size(); I != End; ++I)
+      if (ESyms[I].st_shndx != SHN_UNDEF)
+        this->Symbols[I] = Symtab->insert(CHECK(ESyms[I].getName(Strtab), this));
+
+    // Replace existing symbols with LazyObject symbols.
+    //
+    // resolveSymbol() may trigger this->fetch() if an existing symbol
+    // is an undefined symbol. If that happens, this LazyObjFile has
+    // served its purpose, and we can exit from the loop early.
+    for (Symbol *Sym : this->Symbols) {
+      if (!Sym)
         continue;
-      Symtab->addSymbol(
-          LazyObject{*this, CHECK(Sym.getName(StringTable), this)});
+      resolveSymbol(Sym, LazyObject{*this, Sym->getName()});
+      if (AddedToLink)
+        return;
     }
     return;
   }

Modified: lld/trunk/ELF/InputFiles.h
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/InputFiles.h?rev=361473&r1=361472&r2=361473&view=diff
==============================================================================
--- lld/trunk/ELF/InputFiles.h (original)
+++ lld/trunk/ELF/InputFiles.h Thu May 23 02:53:30 2019
@@ -139,10 +139,11 @@ public:
   // Index of MIPS GOT built for this file.
   llvm::Optional<size_t> MipsGotIndex;
 
+  std::vector<Symbol *> Symbols;
+
 protected:
   InputFile(Kind K, MemoryBufferRef M);
   std::vector<InputSectionBase *> Sections;
-  std::vector<Symbol *> Symbols;
 
 private:
   const Kind FileKind;
@@ -255,7 +256,6 @@ private:
   StringRef getSectionName(const Elf_Shdr &Sec);
 
   bool shouldMerge(const Elf_Shdr &Sec);
-  Symbol *createSymbol(const Elf_Sym *Sym);
 
   // Each ELF symbol contains a section index which the symbol belongs to.
   // However, because the number of bits dedicated for that is limited, a

Modified: lld/trunk/ELF/SymbolTable.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/SymbolTable.cpp?rev=361473&r1=361472&r2=361473&view=diff
==============================================================================
--- lld/trunk/ELF/SymbolTable.cpp (original)
+++ lld/trunk/ELF/SymbolTable.cpp Thu May 23 02:53:30 2019
@@ -92,6 +92,7 @@ Symbol *SymbolTable::insert(StringRef Na
   Symbol *Sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
   SymVector.push_back(Sym);
 
+  Sym->setName(Name);
   Sym->SymbolKind = Symbol::PlaceholderKind;
   Sym->VersionId = Config->DefaultSymbolVersion;
   Sym->Visibility = STV_DEFAULT;




More information about the llvm-commits mailing list