[lld] [lld][MachO]Multi-threaded i/o. Twice as fast linking a large project. (PR #147134)

Daniel Rodríguez Troitiño via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 18 12:51:30 PDT 2025


================
@@ -282,11 +284,83 @@ static void saveThinArchiveToRepro(ArchiveFile const *file) {
           ": Archive::children failed: " + toString(std::move(e)));
 }
 
-static InputFile *addFile(StringRef path, LoadType loadType,
-                          bool isLazy = false, bool isExplicit = true,
-                          bool isBundleLoader = false,
-                          bool isForceHidden = false) {
-  std::optional<MemoryBufferRef> buffer = readFile(path);
+class DeferredFile {
+public:
+  DeferredFile(StringRef path, bool isLazy, MemoryBufferRef buffer)
+      : path(path), isLazy(isLazy), buffer(buffer) {}
+  StringRef path;
+  bool isLazy;
+  MemoryBufferRef buffer;
+};
+using DeferredFiles = std::vector<DeferredFile>;
+
+// Most input files have been mapped but not yet paged in.
+// This code forces the page-ins on multiple threads so
+// the process is not stalled waiting on disk buffer i/o.
+void multiThreadedPageInBackground(const DeferredFiles &deferred) {
+  static const size_t pageSize = Process::getPageSizeEstimate();
+  static size_t totalBytes = 0;
+  std::atomic_int index = 0;
+
+  parallelFor(0, config->readThreads, [&](size_t I) {
+    while (true) {
+      int localIndex = index.fetch_add(1);
+      if (localIndex >= (int)deferred.size())
+        break;
+      const StringRef &buff = deferred[localIndex].buffer.getBuffer();
+      totalBytes += buff.size();
+
+      // Reference all file's mmap'd pages to load them into memory.
+      for (const char *page = buff.data(), *end = page + buff.size();
+           page < end; page += pageSize)
+        LLVM_ATTRIBUTE_UNUSED volatile char t = *page;
+    }
+  });
+
+  if (getenv("LLD_MULTI_THREAD_PAGE"))
+    llvm::dbgs() << "multiThreadedPageIn " << totalBytes << "/"
+                 << deferred.size() << "\n";
+}
+
+static void multiThreadedPageIn(const DeferredFiles &deferred) {
+  static std::deque<std::unique_ptr<DeferredFiles>> queue;
+  static std::thread *running;
+  static std::mutex mutex;
+
+  mutex.lock();
+  if (running && (queue.empty() || deferred.empty())) {
+    mutex.unlock();
+    running->join();
+    mutex.lock();
+    delete running;
+    running = nullptr;
+  }
+
+  if (!deferred.empty()) {
+    queue.emplace_back(
+        std::unique_ptr<DeferredFiles>(new DeferredFiles(deferred)));
+    if (!running)
+      running = new std::thread([&]() {
+        mutex.lock();
+        while (!queue.empty()) {
+          const DeferredFiles &deferred = *queue.front();
+          mutex.unlock();
+          multiThreadedPageInBackground(deferred);
+          mutex.lock();
+          queue.pop_front();
+        }
+        mutex.unlock();
----------------
drodriguez wrote:

I think you said in some comment. Even when using the `unique_ptr`, you cannot take a reference to `front()` and unlock the mutex, because `emplace_back` might invalidate the iterators before you might be able to use the reference outside the mutex.

One might need to copy the `unique_ptr` contents out of the vector before unlocking the mutex.

```suggestion
        while (true) {
           mutex.lock();
           if (queue.empty) {
             mutex.unlock();
             break;
           }
           DeferredFiles deferred(*queue.front());
           queue.pop_front();
           mutex.unlock();
           multiThreadedPageInBackground(deferred);
        }
```

https://github.com/llvm/llvm-project/pull/147134


More information about the llvm-commits mailing list