[lld] 3a5fb57 - [ELF] Replace LazyObjFile with lazy ObjFile/BitcodeFile

Fangrui Song via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 22 17:41:55 PST 2021


Author: Fangrui Song
Date: 2021-12-22T17:41:50-08:00
New Revision: 3a5fb57393c3bc77be9e7afc2ec9d4ec3c9bbf70

URL: https://github.com/llvm/llvm-project/commit/3a5fb57393c3bc77be9e7afc2ec9d4ec3c9bbf70
DIFF: https://github.com/llvm/llvm-project/commit/3a5fb57393c3bc77be9e7afc2ec9d4ec3c9bbf70.diff

LOG: [ELF] Replace LazyObjFile with lazy ObjFile/BitcodeFile

The new `lazy` state is the inverse of the previous `LazyObjFile::extracted`.
There are many advantages:

* previously when a LazyObjFile was extracted, a new ObjFile/BitcodeFile was created; now the file is reused, just with `lazy` cleared
* avoid the confusing transfer of `symbols` from LazyObjFile to the new file
* the `incompatible file:` diagnostic is unified with `is incompatible with`
* simpler code, smaller executable (6200+ bytes smaller on x86-64)
* make eager parsing feasible (for parallel section/symbol table initialization)

Added: 
    

Modified: 
    lld/ELF/Driver.cpp
    lld/ELF/InputFiles.cpp
    lld/ELF/InputFiles.h
    lld/ELF/LTO.cpp
    lld/ELF/Symbols.cpp
    lld/ELF/Symbols.h
    lld/test/ELF/lazy-arch-conflict.s

Removed: 
    


################################################################################
diff  --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 05cc4db5af749..7d01b7f33deca 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -90,7 +90,7 @@ bool elf::link(ArrayRef<const char *> args, bool canExitEarly,
     archiveFiles.clear();
     binaryFiles.clear();
     bitcodeFiles.clear();
-    lazyObjFiles.clear();
+    lazyBitcodeFiles.clear();
     objectFiles.clear();
     sharedFiles.clear();
     backwardReferences.clear();
@@ -248,7 +248,7 @@ void LinkerDriver::addFile(StringRef path, bool withLOption) {
 
       for (const std::pair<MemoryBufferRef, uint64_t> &p :
            getArchiveMembers(mbref))
-        files.push_back(make<LazyObjFile>(p.first, path, p.second));
+        files.push_back(createLazyFile(p.first, path, p.second));
       return;
     }
 
@@ -273,7 +273,7 @@ void LinkerDriver::addFile(StringRef path, bool withLOption) {
   case file_magic::bitcode:
   case file_magic::elf_relocatable:
     if (inLib)
-      files.push_back(make<LazyObjFile>(mbref, "", 0));
+      files.push_back(createLazyFile(mbref, "", 0));
     else
       files.push_back(createObjectFile(mbref));
     break;

diff  --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index fb1fca1c7f0f2..cf8fd1dfc3137 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -46,7 +46,7 @@ uint32_t InputFile::nextGroupId;
 std::vector<ArchiveFile *> elf::archiveFiles;
 std::vector<BinaryFile *> elf::binaryFiles;
 std::vector<BitcodeFile *> elf::bitcodeFiles;
-std::vector<LazyObjFile *> elf::lazyObjFiles;
+std::vector<BitcodeFile *> elf::lazyBitcodeFiles;
 std::vector<ELFFileBase *> elf::objectFiles;
 std::vector<SharedFile *> elf::sharedFiles;
 
@@ -186,9 +186,13 @@ template <class ELFT> static void doParseFile(InputFile *file) {
   }
 
   // Lazy object file
-  if (auto *f = dyn_cast<LazyObjFile>(file)) {
-    lazyObjFiles.push_back(f);
-    f->parse<ELFT>();
+  if (file->lazy) {
+    if (auto *f = dyn_cast<BitcodeFile>(file)) {
+      lazyBitcodeFiles.push_back(f);
+      f->parseLazy();
+    } else {
+      cast<ObjFile<ELFT>>(file)->parseLazy();
+    }
     return;
   }
 
@@ -1130,15 +1134,14 @@ template <class ELFT> void ObjFile<ELFT>::initializeSymbols() {
     // defined symbol in a .eh_frame becomes dangling symbols.
     if (sec == &InputSection::discarded) {
       Undefined und{this, name, binding, stOther, type, secIdx};
-      // !ArchiveFile::parsed or LazyObjFile::extracted means that the file
+      // !ArchiveFile::parsed or !LazyObjFile::lazy means that the file
       // containing this object has not finished processing, i.e. this symbol is
       // a result of a lazy symbol extract. We should demote the lazy symbol to
       // an Undefined so that any relocations outside of the group to it will
       // trigger a discarded section error.
       if ((sym->symbolKind == Symbol::LazyArchiveKind &&
            !cast<ArchiveFile>(sym->file)->parsed) ||
-          (sym->symbolKind == Symbol::LazyObjectKind &&
-           cast<LazyObjFile>(sym->file)->extracted)) {
+          (sym->symbolKind == Symbol::LazyObjectKind && !sym->file->lazy)) {
         sym->replace(und);
         // Prevent LTO from internalizing the symbol in case there is a
         // reference to this symbol from this file.
@@ -1630,9 +1633,10 @@ static uint8_t getOsAbi(const Triple &t) {
 }
 
 BitcodeFile::BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
-                         uint64_t offsetInArchive)
+                         uint64_t offsetInArchive, bool lazy)
     : InputFile(BitcodeKind, mb) {
   this->archiveName = archiveName;
+  this->lazy = lazy;
 
   std::string path = mb.getBufferIdentifier().str();
   if (config->thinLTOIndexOnly)
@@ -1718,6 +1722,12 @@ template <class ELFT> void BitcodeFile::parse() {
     addDependentLibrary(l, this);
 }
 
+void BitcodeFile::parseLazy() {
+  for (const lto::InputFile::Symbol &sym : obj->symbols())
+    if (!sym.isUndefined())
+      symtab->addSymbol(LazyObject{*this, saver.save(sym.getName())});
+}
+
 void BinaryFile::parse() {
   ArrayRef<uint8_t> data = arrayRefFromStringRef(mb.getBuffer());
   auto *section = make<InputSection>(this, SHF_ALLOC | SHF_WRITE, SHT_PROGBITS,
@@ -1744,7 +1754,7 @@ void BinaryFile::parse() {
 InputFile *elf::createObjectFile(MemoryBufferRef mb, StringRef archiveName,
                                  uint64_t offsetInArchive) {
   if (isBitcode(mb))
-    return make<BitcodeFile>(mb, archiveName, offsetInArchive);
+    return make<BitcodeFile>(mb, archiveName, offsetInArchive, /*lazy=*/false);
 
   switch (getELFKind(mb, archiveName)) {
   case ELF32LEKind:
@@ -1760,41 +1770,20 @@ InputFile *elf::createObjectFile(MemoryBufferRef mb, StringRef archiveName,
   }
 }
 
-void LazyObjFile::extract() {
-  if (extracted)
-    return;
-  extracted = true;
-
-  InputFile *file = createObjectFile(mb, archiveName, offsetInArchive);
-  file->groupId = groupId;
-
-  // Copy symbol vector so that the new InputFile doesn't have to
-  // insert the same defined symbols to the symbol table again.
-  file->symbols = std::move(symbols);
+InputFile *elf::createLazyFile(MemoryBufferRef mb, StringRef archiveName,
+                               uint64_t offsetInArchive) {
+  if (isBitcode(mb))
+    return make<BitcodeFile>(mb, archiveName, offsetInArchive, /*lazy=*/true);
 
-  parseFile(file);
+  auto *file =
+      cast<ELFFileBase>(createObjectFile(mb, archiveName, offsetInArchive));
+  file->lazy = true;
+  return file;
 }
 
-template <class ELFT> void LazyObjFile::parse() {
+template <class ELFT> void ObjFile<ELFT>::parseLazy() {
   using Elf_Sym = typename ELFT::Sym;
 
-  // A lazy object file wraps either a bitcode file or an ELF file.
-  if (isBitcode(this->mb)) {
-    std::unique_ptr<lto::InputFile> obj =
-        CHECK(lto::InputFile::create(this->mb), this);
-    for (const lto::InputFile::Symbol &sym : obj->symbols()) {
-      if (sym.isUndefined())
-        continue;
-      symtab->addSymbol(LazyObject{*this, saver.save(sym.getName())});
-    }
-    return;
-  }
-
-  if (getELFKind(this->mb, archiveName) != config->ekind) {
-    error("incompatible file: " + this->mb.getBufferIdentifier());
-    return;
-  }
-
   // Find a symbol table.
   ELFFile<ELFT> obj = check(ELFFile<ELFT>::create(mb.getBuffer()));
   ArrayRef<typename ELFT::Shdr> sections = CHECK(obj.sections(), this);
@@ -1825,16 +1814,16 @@ template <class ELFT> void LazyObjFile::parse() {
         continue;
       sym->resolve(LazyObject{*this, sym->getName()});
 
-      // If extracted, stop iterating because this->symbols has been transferred
-      // to the instantiated ObjFile.
-      if (extracted)
+      // If extracted, stop iterating because the symbol resolution has been
+      // done by ObjFile::parse.
+      if (!lazy)
         return;
     }
     return;
   }
 }
 
-bool LazyObjFile::shouldExtractForCommon(const StringRef &name) {
+bool InputFile::shouldExtractForCommon(StringRef name) {
   if (isBitcode(mb))
     return isBitcodeNonCommonDef(mb, name, archiveName);
 
@@ -1855,11 +1844,6 @@ template void BitcodeFile::parse<ELF32BE>();
 template void BitcodeFile::parse<ELF64LE>();
 template void BitcodeFile::parse<ELF64BE>();
 
-template void LazyObjFile::parse<ELF32LE>();
-template void LazyObjFile::parse<ELF32BE>();
-template void LazyObjFile::parse<ELF64LE>();
-template void LazyObjFile::parse<ELF64BE>();
-
 template class elf::ObjFile<ELF32LE>;
 template class elf::ObjFile<ELF32BE>;
 template class elf::ObjFile<ELF64LE>;

diff  --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h
index e15f8798ae1c9..dd39bc397a0fd 100644
--- a/lld/ELF/InputFiles.h
+++ b/lld/ELF/InputFiles.h
@@ -101,6 +101,10 @@ class InputFile {
   // Get filename to use for linker script processing.
   StringRef getNameForScript() const;
 
+  // Check if a non-common symbol should be extracted to override a common
+  // definition.
+  bool shouldExtractForCommon(StringRef name);
+
   // If not empty, this stores the name of the archive containing this file.
   // We use this string for creating error messages.
   SmallString<0> archiveName;
@@ -132,6 +136,11 @@ class InputFile {
   ELFKind ekind = ELFNoneKind;
   uint8_t osabi = 0;
   uint8_t abiVersion = 0;
+
+  // True if this is a relocatable object file/bitcode file between --start-lib
+  // and --end-lib.
+  bool lazy = false;
+
   // True if this is an argument for --just-symbols. Usually false.
   bool justSymbols = false;
 
@@ -211,6 +220,7 @@ template <class ELFT> class ObjFile : public ELFFileBase {
   }
 
   void parse(bool ignoreComdats = false);
+  void parseLazy();
 
   StringRef getShtGroupSignature(ArrayRef<Elf_Shdr> sections,
                                  const Elf_Shdr &sec);
@@ -294,36 +304,6 @@ template <class ELFT> class ObjFile : public ELFFileBase {
   llvm::once_flag initDwarf;
 };
 
-// LazyObjFile is analogous to ArchiveFile in the sense that
-// the file contains lazy symbols. The 
diff erence is that
-// LazyObjFile wraps a single file instead of multiple files.
-//
-// This class is used for --start-lib and --end-lib options which
-// instruct the linker to link object files between them with the
-// archive file semantics.
-class LazyObjFile : public InputFile {
-public:
-  LazyObjFile(MemoryBufferRef m, StringRef archiveName,
-              uint64_t offsetInArchive)
-      : InputFile(LazyObjKind, m), offsetInArchive(offsetInArchive) {
-    this->archiveName = archiveName;
-  }
-
-  static bool classof(const InputFile *f) { return f->kind() == LazyObjKind; }
-
-  template <class ELFT> void parse();
-  void extract();
-
-  // Check if a non-common symbol should be extracted to override a common
-  // definition.
-  bool shouldExtractForCommon(const StringRef &name);
-
-  bool extracted = false;
-
-private:
-  uint64_t offsetInArchive;
-};
-
 // An ArchiveFile object represents a .a file.
 class ArchiveFile : public InputFile {
 public:
@@ -354,9 +334,10 @@ class ArchiveFile : public InputFile {
 class BitcodeFile : public InputFile {
 public:
   BitcodeFile(MemoryBufferRef m, StringRef archiveName,
-              uint64_t offsetInArchive);
+              uint64_t offsetInArchive, bool lazy);
   static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; }
   template <class ELFT> void parse();
+  void parseLazy();
   std::unique_ptr<llvm::lto::InputFile> obj;
 };
 
@@ -406,6 +387,8 @@ class BinaryFile : public InputFile {
 
 InputFile *createObjectFile(MemoryBufferRef mb, StringRef archiveName = "",
                             uint64_t offsetInArchive = 0);
+InputFile *createLazyFile(MemoryBufferRef mb, StringRef archiveName,
+                          uint64_t offsetInArchive);
 
 inline bool isBitcode(MemoryBufferRef mb) {
   return identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode;
@@ -416,7 +399,7 @@ std::string replaceThinLTOSuffix(StringRef path);
 extern std::vector<ArchiveFile *> archiveFiles;
 extern std::vector<BinaryFile *> binaryFiles;
 extern std::vector<BitcodeFile *> bitcodeFiles;
-extern std::vector<LazyObjFile *> lazyObjFiles;
+extern std::vector<BitcodeFile *> lazyBitcodeFiles;
 extern std::vector<ELFFileBase *> objectFiles;
 extern std::vector<SharedFile *> sharedFiles;
 

diff  --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp
index 46dc77a6789c5..b1632b8bae8a3 100644
--- a/lld/ELF/LTO.cpp
+++ b/lld/ELF/LTO.cpp
@@ -278,8 +278,8 @@ void BitcodeCompiler::add(BitcodeFile &f) {
 // This is needed because this is what GNU gold plugin does and we have a
 // distributed build system that depends on that behavior.
 static void thinLTOCreateEmptyIndexFiles() {
-  for (LazyObjFile *f : lazyObjFiles) {
-    if (f->extracted || !isBitcode(f->mb))
+  for (BitcodeFile *f : lazyBitcodeFiles) {
+    if (!f->lazy)
       continue;
     std::string path = replaceThinLTOSuffix(getThinLTOOutputFile(f->getName()));
     std::unique_ptr<raw_fd_ostream> os = openFile(path + ".thinlto.bc");

diff  --git a/lld/ELF/Symbols.cpp b/lld/ELF/Symbols.cpp
index f00d3217a6af4..f6f0ad0087d74 100644
--- a/lld/ELF/Symbols.cpp
+++ b/lld/ELF/Symbols.cpp
@@ -256,10 +256,12 @@ void Symbol::parseSymbolVersion() {
 }
 
 void Symbol::extract() const {
-  if (auto *sym = dyn_cast<LazyArchive>(this))
+  if (auto *sym = dyn_cast<LazyArchive>(this)) {
     cast<ArchiveFile>(sym->file)->extract(sym->sym);
-  else
-    cast<LazyObjFile>(this->file)->extract();
+  } else if (file->lazy) {
+    file->lazy = false;
+    parseFile(file);
+  }
 }
 
 MemoryBufferRef LazyArchive::getMemberBuffer() {
@@ -711,8 +713,7 @@ template <class LazyT> void Symbol::resolveLazy(const LazyT &other) {
         return;
       }
     } else if (auto *loSym = dyn_cast<LazyObject>(&other)) {
-      LazyObjFile *obj = cast<LazyObjFile>(loSym->file);
-      if (obj->shouldExtractForCommon(loSym->getName())) {
+      if (loSym->file->shouldExtractForCommon(loSym->getName())) {
         replaceCommon(*this, other);
         return;
       }

diff  --git a/lld/ELF/Symbols.h b/lld/ELF/Symbols.h
index b0b7832135a7f..beb45ec141470 100644
--- a/lld/ELF/Symbols.h
+++ b/lld/ELF/Symbols.h
@@ -430,7 +430,9 @@ class LazyObject : public Symbol {
 public:
   LazyObject(InputFile &file, StringRef name)
       : Symbol(LazyObjectKind, &file, name, llvm::ELF::STB_GLOBAL,
-               llvm::ELF::STV_DEFAULT, llvm::ELF::STT_NOTYPE) {}
+               llvm::ELF::STV_DEFAULT, llvm::ELF::STT_NOTYPE) {
+    isUsedInRegularObj = false;
+  }
 
   static bool classof(const Symbol *s) { return s->kind() == LazyObjectKind; }
 };

diff  --git a/lld/test/ELF/lazy-arch-conflict.s b/lld/test/ELF/lazy-arch-conflict.s
index 991476c5d6ec9..b6b41ae1025ac 100644
--- a/lld/test/ELF/lazy-arch-conflict.s
+++ b/lld/test/ELF/lazy-arch-conflict.s
@@ -4,4 +4,4 @@
 # RUN: echo '.globl foo; foo:' | llvm-mc -filetype=obj -triple=i686-pc-linux - -o %t32.o
 # RUN: not ld.lld %t64.o --start-lib %t32.o --end-lib -o /dev/null 2>&1 | FileCheck %s
 
-# CHECK: error: incompatible file: {{.*}}32.o
+# CHECK: error: {{.*}}32.o is incompatible with {{.*}}64.o


        


More information about the llvm-commits mailing list