[lld] [lld][MachO]Multi-threaded i/o. Twice as fast linking a large project. (PR #147134)

Ellis Hoag via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 25 16:51:01 PDT 2025


================
@@ -282,11 +286,119 @@ static void saveThinArchiveToRepro(ArchiveFile const *file) {
           ": Archive::children failed: " + toString(std::move(e)));
 }
 
-static InputFile *addFile(StringRef path, LoadType loadType,
-                          bool isLazy = false, bool isExplicit = true,
-                          bool isBundleLoader = false,
-                          bool isForceHidden = false) {
-  std::optional<MemoryBufferRef> buffer = readFile(path);
+class DeferredFile {
+public:
+  DeferredFile(StringRef path, bool isLazy, MemoryBufferRef buffer)
+      : path(path), isLazy(isLazy), buffer(buffer) {}
+  StringRef path;
+  bool isLazy;
+  MemoryBufferRef buffer;
+};
+using DeferredFiles = std::vector<DeferredFile>;
+
+class SerialBackgroundQueue {
+  std::deque<std::function<void()>> queue;
+  std::thread *running;
+  std::mutex mutex;
+
+public:
+  void queueWork(std::function<void()> work, bool reap) {
+    mutex.lock();
+    if (running && (queue.empty() || reap)) {
+      mutex.unlock();
+      running->join();
+      mutex.lock();
+      delete running;
+      running = nullptr;
+    }
+
+    if (!reap) {
+      queue.emplace_back(std::move(work));
+      if (!running)
+        running = new std::thread([&]() {
+          bool shouldPop = false;
+          while (true) {
+            mutex.lock();
+            if (shouldPop)
+              queue.pop_front();
+            if (queue.empty()) {
+              mutex.unlock();
+              break;
+            }
+            auto work = std::move(queue.front());
+            shouldPop = true;
+            mutex.unlock();
+            work();
+          }
+        });
+    }
+    mutex.unlock();
+  }
+};
+
+#ifndef NDEBUG
+#include <iomanip>
+#include <iostream>
+#endif
+
+// Most input files have been mapped but not yet paged in.
+// This code forces the page-ins on multiple threads so
+// the process is not stalled waiting on disk buffer i/o.
+void multiThreadedPageInBackground(DeferredFiles &deferred) {
+  using namespace std::chrono;
+  static const size_t pageSize = Process::getPageSizeEstimate();
+  static const size_t largeArchive = 10 * 1024 * 1024;
+  static std::atomic_uint64_t totalBytes = 0;
+  std::atomic_int index = 0, included = 0;
+#ifndef NDEBUG
+  auto t0 = high_resolution_clock::now();
+#endif
+
+  parallelFor(0, config->readThreads, [&](size_t I) {
+    while (true) {
+      int localIndex = index.fetch_add(1);
+      if (localIndex >= (int)deferred.size())
+        break;
+      const StringRef &buff = deferred[localIndex].buffer.getBuffer();
+      totalBytes += buff.size();
+      if (buff.size() > largeArchive)
+        continue;
+
+      included += 1;
----------------
ellishg wrote:

NIT: We probably don't want to count bytes that we aren't paging in. Also, let's guard this behind NDEBUG


```suggestion
      if (buff.size() > largeArchive)
        continue;
#ifndef NDEBUG
      totalBytes += buff.size();
      included += 1;
#endif
```

https://github.com/llvm/llvm-project/pull/147134


More information about the llvm-commits mailing list