[lld] r226336 - [PATCH] Speculatively instantiate archive members

Rui Ueyama ruiu at google.com
Fri Jan 16 14:44:50 PST 2015


Author: ruiu
Date: Fri Jan 16 16:44:50 2015
New Revision: 226336

URL: http://llvm.org/viewvc/llvm-project?rev=226336&view=rev
Log:
[PATCH] Speculatively instantiate archive members

LLD parses archive file index table only at first. When it finds a symbol
it is looking for is defined in a member file in an archive file, it actually
reads the member from the archive file. That's done in the core linker.

That's a single-thread process since the core linker is single threaded.
If your command line contains a few object files and a lot of archive files
(which is quite often the case), LLD hardly utilizes hardware parallelism.

This patch improves parallelism by speculatively instantiating archive
file members. At the beginning of the core linking, we first create a map
containing all symbols defined in all members, and each time we find a
new undefined symbol, we instantiate a member file containing the
symbol (if such file exists). File instantiation is side effect free, so this
should not affect correctness.

This is a quick benchmark result. Time to link self-link LLD executable:

Linux   9.78s -> 8.50s (0.86x)
Windows 6.18s -> 4.51s (0.73x)

http://reviews.llvm.org/D7015

Modified:
    lld/trunk/include/lld/Core/ArchiveLibraryFile.h
    lld/trunk/include/lld/Core/File.h
    lld/trunk/include/lld/Core/Resolver.h
    lld/trunk/lib/Core/Resolver.cpp
    lld/trunk/lib/ReaderWriter/FileArchive.cpp
    lld/trunk/lib/ReaderWriter/PECOFF/ReaderCOFF.cpp

Modified: lld/trunk/include/lld/Core/ArchiveLibraryFile.h
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/include/lld/Core/ArchiveLibraryFile.h?rev=226336&r1=226335&r2=226336&view=diff
==============================================================================
--- lld/trunk/include/lld/Core/ArchiveLibraryFile.h (original)
+++ lld/trunk/include/lld/Core/ArchiveLibraryFile.h Fri Jan 16 16:44:50 2015
@@ -11,6 +11,7 @@
 #define LLD_CORE_ARCHIVE_LIBRARY_FILE_H
 
 #include "lld/Core/File.h"
+#include "lld/Core/Parallel.h"
 #include <set>
 
 namespace lld {
@@ -37,9 +38,15 @@ public:
   virtual std::error_code
   parseAllMembers(std::vector<std::unique_ptr<File>> &result) = 0;
 
+  // Parses a member file containing a given symbol, so that when you
+  // need the file find() can return that immediately. Calling this function
+  // has no side effect other than pre-instantiating a file. Calling this
+  // function doesn't affect correctness.
+  virtual void preload(TaskGroup &group, StringRef symbolName) {}
+
   /// Returns a set of all defined symbols in the archive, i.e. all
   /// resolvable symbol using this file.
-  virtual std::set<StringRef> getDefinedSymbols() const {
+  virtual std::set<StringRef> getDefinedSymbols() {
     return std::set<StringRef>();
   }
 

Modified: lld/trunk/include/lld/Core/File.h
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/include/lld/Core/File.h?rev=226336&r1=226335&r2=226336&view=diff
==============================================================================
--- lld/trunk/include/lld/Core/File.h (original)
+++ lld/trunk/include/lld/Core/File.h Fri Jan 16 16:44:50 2015
@@ -164,6 +164,14 @@ public:
 
   std::error_code parse();
 
+  // This function is called just before the core linker tries to use
+  // a file. Currently the PECOFF reader uses this to trigger the
+  // driver to parse .drectve section (which contains command line options).
+  // If you want to do something having side effects, don't do that in
+  // doParse() because a file could be pre-loaded speculatively.
+  // Use this hook instead.
+  virtual void beforeLink() {}
+
   // Usually each file owns a std::unique_ptr<MemoryBuffer>.
   // However, there's one special case. If a file is an archive file,
   // the archive file and its children all shares the same memory buffer.

Modified: lld/trunk/include/lld/Core/Resolver.h
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/include/lld/Core/Resolver.h?rev=226336&r1=226335&r2=226336&view=diff
==============================================================================
--- lld/trunk/include/lld/Core/Resolver.h (original)
+++ lld/trunk/include/lld/Core/Resolver.h Fri Jan 16 16:44:50 2015
@@ -10,6 +10,7 @@
 #ifndef LLD_CORE_RESOLVER_H
 #define LLD_CORE_RESOLVER_H
 
+#include "lld/Core/ArchiveLibraryFile.h"
 #include "lld/Core/File.h"
 #include "lld/Core/SharedLibraryFile.h"
 #include "lld/Core/Simple.h"
@@ -63,6 +64,7 @@ private:
   void maybeAddSectionGroupOrGnuLinkOnce(const DefinedAtom &atom);
 
   /// \brief The main function that iterates over the files to resolve
+  void makePreloadArchiveMap();
   bool resolveUndefines();
   void updateReferences();
   void deadStripOptimize();
@@ -73,6 +75,7 @@ private:
 
   void markLive(const Atom *atom);
   void addAtoms(const std::vector<const DefinedAtom *>&);
+  void maybePreloadArchiveMember(StringRef sym);
 
   class MergedFile : public SimpleFile {
   public:
@@ -93,6 +96,9 @@ private:
   std::vector<File *> _files;
   std::map<File *, bool> _newUndefinesAdded;
   size_t _fileIndex;
+
+  // Preloading
+  std::map<StringRef, ArchiveLibraryFile *> _archiveMap;
 };
 
 } // namespace lld

Modified: lld/trunk/lib/Core/Resolver.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/lib/Core/Resolver.cpp?rev=226336&r1=226335&r2=226336&view=diff
==============================================================================
--- lld/trunk/lib/Core/Resolver.cpp (original)
+++ lld/trunk/lib/Core/Resolver.cpp Fri Jan 16 16:44:50 2015
@@ -31,9 +31,12 @@ bool Resolver::handleFile(const File &fi
   bool undefAdded = false;
   for (const DefinedAtom *atom : file.defined())
     doDefinedAtom(*atom);
-  for (const UndefinedAtom *atom : file.undefined())
-    if (doUndefinedAtom(*atom))
+  for (const UndefinedAtom *atom : file.undefined()) {
+    if (doUndefinedAtom(*atom)) {
       undefAdded = true;
+      maybePreloadArchiveMember(atom->name());
+    }
+  }
   for (const SharedLibraryAtom *atom : file.sharedLibrary())
     doSharedLibraryAtom(*atom);
   for (const AbsoluteAtom *atom : file.absolute())
@@ -229,6 +232,17 @@ void Resolver::addAtoms(const std::vecto
     doDefinedAtom(*newAtom);
 }
 
+// Instantiate an archive file member if there's a file containing a
+// defined symbol for a given symbol name. Instantiation is done in a
+// different worker thread and has no visible side effect.
+void Resolver::maybePreloadArchiveMember(StringRef sym) {
+  auto it = _archiveMap.find(sym);
+  if (it == _archiveMap.end())
+    return;
+  ArchiveLibraryFile *archive = it->second;
+  archive->preload(_context.getTaskGroup(), sym);
+}
+
 // Returns true if at least one of N previous files has created an
 // undefined symbol.
 bool Resolver::undefinesAdded(int begin, int end) {
@@ -261,6 +275,16 @@ File *Resolver::getFile(int &index, int
   return cast<FileNode>(inputs[index++].get())->getFile();
 }
 
+// Make a map of Symbol -> ArchiveFile.
+void Resolver::makePreloadArchiveMap() {
+  std::vector<std::unique_ptr<Node>> &nodes = _context.getNodes();
+  for (auto it = nodes.rbegin(), e = nodes.rend(); it != e; ++it)
+    if (auto *fnode = dyn_cast<FileNode>(it->get()))
+      if (auto *archive = dyn_cast<ArchiveLibraryFile>(fnode->getFile()))
+        for (StringRef sym : archive->getDefinedSymbols())
+          _archiveMap[sym] = archive;
+}
+
 // Keep adding atoms until _context.getNextFile() returns an error. This
 // function is where undefined atoms are resolved.
 bool Resolver::resolveUndefines() {
@@ -277,6 +301,7 @@ bool Resolver::resolveUndefines() {
                    << ": " << ec.message() << "\n";
       return false;
     }
+    file->beforeLink();
     switch (file->kind()) {
     case File::kindObject:
       if (groupLevel > 0)
@@ -446,6 +471,7 @@ void Resolver::removeCoalescedAwayAtoms(
 }
 
 bool Resolver::resolve() {
+  makePreloadArchiveMap();
   if (!resolveUndefines())
     return false;
   updateReferences();

Modified: lld/trunk/lib/ReaderWriter/FileArchive.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/lib/ReaderWriter/FileArchive.cpp?rev=226336&r1=226335&r2=226336&view=diff
==============================================================================
--- lld/trunk/lib/ReaderWriter/FileArchive.cpp (original)
+++ lld/trunk/lib/ReaderWriter/FileArchive.cpp Fri Jan 16 16:44:50 2015
@@ -17,7 +17,9 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include <future>
 #include <memory>
+#include <mutex>
 #include <set>
 #include <unordered_map>
 
@@ -57,6 +59,17 @@ public:
       return nullptr;
 
     _membersInstantiated.insert(memberStart);
+
+    // Check if a file is preloaded.
+    {
+      std::lock_guard<std::mutex> lock(_mutex);
+      auto it = _preloaded.find(memberStart);
+      if (it != _preloaded.end()) {
+        std::future<const File *> &future = it->second;
+        return future.get();
+      }
+    }
+
     std::unique_ptr<File> result;
     if (instantiateMember(ci, result))
       return nullptr;
@@ -65,6 +78,37 @@ public:
     return result.release();
   }
 
+  // Instantiate a member file containing a given symbol name.
+  void preload(TaskGroup &group, StringRef name) override {
+    auto member = _symbolMemberMap.find(name);
+    if (member == _symbolMemberMap.end())
+      return;
+    Archive::child_iterator ci = member->second;
+
+    // Do nothing if a member is already instantiated.
+    const char *memberStart = ci->getBuffer().data();
+    if (_membersInstantiated.count(memberStart))
+      return;
+
+    std::lock_guard<std::mutex> lock(_mutex);
+    if (_preloaded.find(memberStart) != _preloaded.end())
+      return;
+
+    // Instantiate the member
+    auto *promise = new std::promise<const File *>;
+    _preloaded[memberStart] = promise->get_future();
+    _promises.push_back(std::unique_ptr<std::promise<const File *>>(promise));
+
+    group.spawn([=] {
+      std::unique_ptr<File> result;
+      if (instantiateMember(ci, result)) {
+        promise->set_value(nullptr);
+        return;
+      }
+      promise->set_value(result.release());
+    });
+  }
+
   /// \brief parse each member
   std::error_code
   parseAllMembers(std::vector<std::unique_ptr<File>> &result) override {
@@ -117,7 +161,8 @@ public:
   }
 
   /// Returns a set of all defined symbols in the archive.
-  std::set<StringRef> getDefinedSymbols() const override {
+  std::set<StringRef> getDefinedSymbols() override {
+    parse();
     std::set<StringRef> ret;
     for (const auto &e : _symbolMemberMap)
       ret.insert(e.first);
@@ -225,6 +270,9 @@ private:
   atom_collection_vector<AbsoluteAtom> _absoluteAtoms;
   bool _logLoading;
   mutable std::vector<std::unique_ptr<MemoryBuffer>> _memberBuffers;
+  mutable std::map<const char *, std::future<const File *>> _preloaded;
+  mutable std::vector<std::unique_ptr<std::promise<const File *>>> _promises;
+  mutable std::mutex _mutex;
 };
 
 class ArchiveReader : public Reader {

Modified: lld/trunk/lib/ReaderWriter/PECOFF/ReaderCOFF.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/lib/ReaderWriter/PECOFF/ReaderCOFF.cpp?rev=226336&r1=226335&r2=226336&view=diff
==============================================================================
--- lld/trunk/lib/ReaderWriter/PECOFF/ReaderCOFF.cpp (original)
+++ lld/trunk/lib/ReaderWriter/PECOFF/ReaderCOFF.cpp Fri Jan 16 16:44:50 2015
@@ -106,6 +106,8 @@ public:
     return _absoluteAtoms;
   }
 
+  void beforeLink() override;
+
   void addDefinedAtom(AliasAtom *atom) {
     atom->setOrdinal(_ordinal++);
     _definedAtoms._atoms.push_back(atom);
@@ -382,7 +384,10 @@ std::error_code FileCOFF::doParse() {
   // The mapping for /alternatename is in the context object. This helper
   // function iterate over defined atoms and create alias atoms if needed.
   createAlternateNameAtoms();
+  return std::error_code();
+}
 
+void FileCOFF::beforeLink() {
   // Acquire the mutex to mutate _ctx.
   std::lock_guard<std::recursive_mutex> lock(_ctx.getMutex());
 
@@ -392,10 +397,8 @@ std::error_code FileCOFF::doParse() {
     _ctx.setSafeSEH(false);
 
   if (_ctx.deadStrip())
-    for (StringRef sym : undefinedSymbols)
-      _ctx.addDeadStripRoot(sym);
-
-  return std::error_code();
+    for (const UndefinedAtom *undef : undefined())
+      _ctx.addDeadStripRoot(undef->name());
 }
 
 /// Iterate over the symbol table to retrieve all symbols.





More information about the llvm-commits mailing list