[lld] [lld][MachO]Multi-threaded i/o. Twice as fast linking a large project. (PR #147134)

John Holdsworth via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 30 02:53:18 PDT 2025


================
@@ -282,11 +286,122 @@ static void saveThinArchiveToRepro(ArchiveFile const *file) {
           ": Archive::children failed: " + toString(std::move(e)));
 }
 
-static InputFile *addFile(StringRef path, LoadType loadType,
-                          bool isLazy = false, bool isExplicit = true,
-                          bool isBundleLoader = false,
-                          bool isForceHidden = false) {
-  std::optional<MemoryBufferRef> buffer = readFile(path);
+class DeferredFile {
+public:
+  DeferredFile(StringRef path, bool isLazy, MemoryBufferRef buffer)
+      : path(path), isLazy(isLazy), buffer(buffer) {}
+  StringRef path;
+  bool isLazy;
+  MemoryBufferRef buffer;
+};
+using DeferredFiles = std::vector<DeferredFile>;
+
+class SerialBackgroundQueue {
+  std::deque<std::function<void()>> queue;
+  std::thread *running;
+  std::mutex mutex;
+
+public:
+  void queueWork(std::function<void()> work, bool reap) {
+    mutex.lock();
+    if (running && (queue.empty() || reap)) {
+      mutex.unlock();
+      running->join();
+      mutex.lock();
+      delete running;
+      running = nullptr;
+    }
+
+    if (!reap) {
+      queue.emplace_back(std::move(work));
+      if (!running)
+        running = new std::thread([&]() {
+          bool shouldPop = false;
+          while (true) {
+            mutex.lock();
+            if (shouldPop)
+              queue.pop_front();
+            if (queue.empty()) {
+              mutex.unlock();
+              break;
+            }
+            auto work = std::move(queue.front());
+            shouldPop = true;
+            mutex.unlock();
+            work();
+          }
+        });
+    }
+    mutex.unlock();
+  }
+};
+
+#ifndef NDEBUG
+#include <iomanip>
+#include <iostream>
+#endif
+
+// Most input files have been mapped but not yet paged in.
+// This code forces the page-ins on multiple threads so
+// the process is not stalled waiting on disk buffer i/o.
+void multiThreadedPageInBackground(DeferredFiles &deferred) {
+  using namespace std::chrono;
+  static const size_t pageSize = Process::getPageSizeEstimate();
+  static const size_t largeArchive = 10 * 1024 * 1024;
+  std::atomic_int index = 0;
+#ifndef NDEBUG
+  std::atomic_int numDeferedFilesTouched = 0;
+  static std::atomic_uint64_t totalBytes = 0;
+  auto t0 = high_resolution_clock::now();
+#endif
+
+  parallelFor(0, config->readThreads, [&](size_t I) {
+    while (true) {
+      int localIndex = index.fetch_add(1);
+      if (localIndex >= (int)deferred.size())
+        break;
+      const StringRef &buff = deferred[localIndex].buffer.getBuffer();
+      if (buff.size() > largeArchive)
+        continue;
+#ifndef NDEBUG
+      totalBytes += buff.size();
+      numDeferedFilesTouched += 1;
+#endif
+
+      // Reference all file's mmap'd pages to load them into memory.
+      for (const char *page = buff.data(), *end = page + buff.size();
+           page < end; page += pageSize)
+        LLVM_ATTRIBUTE_UNUSED volatile char t = *page;
+    }
+  });
+
+#ifndef NDEBUG
+  auto dt = high_resolution_clock::now() - t0;
+  if (Process::GetEnv("LLD_MULTI_THREAD_PAGE"))
+    std::cerr << "multiThreadedPageIn " << totalBytes << "/"
+              << numDeferedFilesTouched << "/" << deferred.size() << "/"
+              << std::setprecision(4)
+              << duration_cast<milliseconds>(dt).count() / 1000. << "\n";
+#endif
+}
+
+static void multiThreadedPageIn(const DeferredFiles &deferred,
+                                bool reap = false) {
----------------
johnno1962 wrote:

I removed it as you don't want the main thread to ever be held up by the page-in threads.

https://github.com/llvm/llvm-project/pull/147134


More information about the llvm-commits mailing list