[lld] [lld][MachO]Multi-threaded i/o. 40% speedup linking a large project. (PR #147134)

John Holdsworth via llvm-commits llvm-commits at lists.llvm.org
Sun Jul 6 10:47:21 PDT 2025


https://github.com/johnno1962 updated https://github.com/llvm/llvm-project/pull/147134

>From c55b5b2c9f49d23a6063cc6e7a756e22c9cede43 Mon Sep 17 00:00:00 2001
From: John Holdsworth <github at johnholdsworth.com>
Date: Sat, 5 Jul 2025 10:24:51 +0200
Subject: [PATCH 1/3] Multi-threaded disk i/o.

---
 lld/MachO/Config.h   |   1 +
 lld/MachO/Driver.cpp | 111 +++++++++++++++++++++++++++++++++++++++----
 lld/MachO/Options.td |   3 ++
 3 files changed, 106 insertions(+), 9 deletions(-)

diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h
index a01e60efbe761..92c6eb85f4123 100644
--- a/lld/MachO/Config.h
+++ b/lld/MachO/Config.h
@@ -186,6 +186,7 @@ struct Configuration {
   bool interposable = false;
   bool errorForArchMismatch = false;
   bool ignoreAutoLink = false;
+  int readThreads = 0;
   // ld64 allows invalid auto link options as long as the link succeeds. LLD
   // does not, but there are cases in the wild where the invalid linker options
   // exist. This allows users to ignore the specific invalid options in the case
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 9eb391c4ee1b9..36626720aa252 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -44,6 +44,7 @@
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Parallel.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
 #include "llvm/Support/TarWriter.h"
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/TimeProfiler.h"
@@ -282,11 +283,11 @@ static void saveThinArchiveToRepro(ArchiveFile const *file) {
           ": Archive::children failed: " + toString(std::move(e)));
 }
 
-static InputFile *addFile(StringRef path, LoadType loadType,
-                          bool isLazy = false, bool isExplicit = true,
-                          bool isBundleLoader = false,
-                          bool isForceHidden = false) {
-  std::optional<MemoryBufferRef> buffer = readFile(path);
+static InputFile *deferredAddFile(std::optional<MemoryBufferRef> buffer,
+                                  StringRef path, LoadType loadType,
+                                  bool isLazy = false, bool isExplicit = true,
+                                  bool isBundleLoader = false,
+                                  bool isForceHidden = false) {
   if (!buffer)
     return nullptr;
   MemoryBufferRef mbref = *buffer;
@@ -441,6 +442,14 @@ static InputFile *addFile(StringRef path, LoadType loadType,
   return newFile;
 }
 
+static InputFile *addFile(StringRef path, LoadType loadType,
+                          bool isLazy = false, bool isExplicit = true,
+                          bool isBundleLoader = false,
+                          bool isForceHidden = false) {
+  return deferredAddFile(readFile(path), path, loadType, isLazy, isExplicit,
+                         isBundleLoader, isForceHidden);
+}
+
 static std::vector<StringRef> missingAutolinkWarnings;
 static void addLibrary(StringRef name, bool isNeeded, bool isWeak,
                        bool isReexport, bool isHidden, bool isExplicit,
@@ -564,13 +573,23 @@ void macho::resolveLCLinkerOptions() {
   }
 }
 
-static void addFileList(StringRef path, bool isLazy) {
+typedef struct {
+  StringRef path;
+  std::optional<MemoryBufferRef> buffer;
+} DeferredFile;
+
+static void addFileList(StringRef path, bool isLazy,
+                        std::vector<DeferredFile> &deferredFiles) {
   std::optional<MemoryBufferRef> buffer = readFile(path);
   if (!buffer)
     return;
   MemoryBufferRef mbref = *buffer;
   for (StringRef path : args::getLines(mbref))
-    addFile(rerootPath(path), LoadType::CommandLine, isLazy);
+    if (config->readThreads) {
+      StringRef rrpath = rerootPath(path);
+      deferredFiles.push_back({rrpath, readFile(rrpath)});
+    } else
+      addFile(rerootPath(path), LoadType::CommandLine, isLazy);
 }
 
 // We expect sub-library names of the form "libfoo", which will match a dylib
@@ -1215,13 +1234,68 @@ static void handleSymbolPatterns(InputArgList &args,
     parseSymbolPatternsFile(arg, symbolPatterns);
 }
 
-static void createFiles(const InputArgList &args) {
+// Most input files have been mapped but not yet paged in.
+// This code forces the page-ins on multiple threads so
+// the process is not stalled waiting on disk buffer i/o.
+void multiThreadedPageIn(std::vector<DeferredFile> &deferred, int nthreads) {
+#ifndef _WIN32
+  typedef struct {
+    std::vector<DeferredFile> &deferred;
+    size_t counter, total, pageSize;
+    pthread_mutex_t mutex;
+  } PageInState;
+  PageInState state = {deferred, 0, 0,
+                       llvm::sys::Process::getPageSizeEstimate(),
+                       pthread_mutex_t()};
+  pthread_mutex_init(&state.mutex, NULL);
+
+  pthread_t running[200];
+  int maxthreads = sizeof running / sizeof running[0];
+  if (nthreads > maxthreads)
+    nthreads = maxthreads;
+
+  for (int t = 0; t < nthreads; t++)
+    pthread_create(
+        &running[t], nullptr,
+        [](void *ptr) -> void * {
+          PageInState &state = *(PageInState *)ptr;
+          static int total = 0;
+          while (true) {
+            pthread_mutex_lock(&state.mutex);
+            if (state.counter >= state.deferred.size()) {
+              pthread_mutex_unlock(&state.mutex);
+              return nullptr;
+            }
+            DeferredFile &add = state.deferred[state.counter];
+            state.counter += 1;
+            pthread_mutex_unlock(&state.mutex);
+
+            int t = 0; // Reference each page to load it into memory.
+            for (const char *page = add.buffer->getBuffer().data(),
+                            *end = page + add.buffer->getBuffer().size();
+                 page < end; page += state.pageSize)
+              t += *page;
+            state.total += t; // Avoids whole section being optimised out.
+          }
+        },
+        &state);
+
+  for (int t = 0; t < nthreads; t++)
+    pthread_join(running[t], nullptr);
+
+  pthread_mutex_destroy(&state.mutex);
+#endif
+}
+
+void createFiles(const InputArgList &args) {
   TimeTraceScope timeScope("Load input files");
   // This loop should be reserved for options whose exact ordering matters.
   // Other options should be handled via filtered() and/or getLastArg().
   bool isLazy = false;
   // If we've processed an opening --start-lib, without a matching --end-lib
   bool inLib = false;
+  std::vector<DeferredFile> deferredFiles;
+
   for (const Arg *arg : args) {
     const Option &opt = arg->getOption();
     warnIfDeprecatedOption(opt);
@@ -1229,6 +1303,11 @@ static void createFiles(const InputArgList &args) {
 
     switch (opt.getID()) {
     case OPT_INPUT:
+      if (config->readThreads) {
+        StringRef rrpath = rerootPath(arg->getValue());
+        deferredFiles.push_back({rrpath, readFile(rrpath)});
+        break;
+      }
       addFile(rerootPath(arg->getValue()), LoadType::CommandLine, isLazy);
       break;
     case OPT_needed_library:
@@ -1249,7 +1328,7 @@ static void createFiles(const InputArgList &args) {
         dylibFile->forceWeakImport = true;
       break;
     case OPT_filelist:
-      addFileList(arg->getValue(), isLazy);
+      addFileList(arg->getValue(), isLazy, deferredFiles);
       break;
     case OPT_force_load:
       addFile(rerootPath(arg->getValue()), LoadType::CommandLineForce);
@@ -1295,6 +1374,12 @@ static void createFiles(const InputArgList &args) {
       break;
     }
   }
+
+  if (config->readThreads) {
+    multiThreadedPageIn(deferredFiles, config->readThreads);
+    for (auto &add : deferredFiles)
+      deferredAddFile(add.buffer, add.path, LoadType::CommandLine, isLazy);
+  }
 }
 
 static void gatherInputSections() {
@@ -1687,6 +1772,14 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
     }
   }
 
+  if (auto *arg = args.getLastArg(OPT_read_threads)) {
+    StringRef v(arg->getValue());
+    unsigned threads = 0;
+    if (!llvm::to_integer(v, threads, 0) || threads < 0)
+      error(arg->getSpelling() + ": expected a positive integer, but got '" +
+            arg->getValue() + "'");
+    config->readThreads = threads;
+  }
   if (auto *arg = args.getLastArg(OPT_threads_eq)) {
     StringRef v(arg->getValue());
     unsigned threads = 0;
diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td
index 4f0602f59812b..3dc98fccc1b7b 100644
--- a/lld/MachO/Options.td
+++ b/lld/MachO/Options.td
@@ -396,6 +396,9 @@ def dead_strip : Flag<["-"], "dead_strip">,
 def interposable : Flag<["-"], "interposable">,
     HelpText<"Indirects access to all exported symbols in an image">,
     Group<grp_opts>;
+def read_threads : Joined<["--"], "read-threads=">,
+    HelpText<"Number of threads to use paging in files.">,
+    Group<grp_lld>;
 def order_file : Separate<["-"], "order_file">,
     MetaVarName<"<file>">,
     HelpText<"Layout functions and data according to specification in <file>">,

>From 3d11a33599246bbf5e358b554489aeae854ed7be Mon Sep 17 00:00:00 2001
From: John Holdsworth <github at johnholdsworth.com>
Date: Sun, 6 Jul 2025 10:05:38 +0200
Subject: [PATCH 2/3] Afterthoughts.

---
 lld/MachO/Driver.cpp | 82 +++++++++++++++++++++++---------------------
 1 file changed, 43 insertions(+), 39 deletions(-)

diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 36626720aa252..5b9f9cc2939bd 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -283,11 +283,11 @@ static void saveThinArchiveToRepro(ArchiveFile const *file) {
           ": Archive::children failed: " + toString(std::move(e)));
 }
 
-static InputFile *deferredAddFile(std::optional<MemoryBufferRef> buffer,
-                                  StringRef path, LoadType loadType,
-                                  bool isLazy = false, bool isExplicit = true,
-                                  bool isBundleLoader = false,
-                                  bool isForceHidden = false) {
+static InputFile *processFile(std::optional<MemoryBufferRef> buffer,
+                              StringRef path, LoadType loadType,
+                              bool isLazy = false, bool isExplicit = true,
+                              bool isBundleLoader = false,
+                              bool isForceHidden = false) {
   if (!buffer)
     return nullptr;
   MemoryBufferRef mbref = *buffer;
@@ -446,8 +446,24 @@ static InputFile *addFile(StringRef path, LoadType loadType,
                           bool isLazy = false, bool isExplicit = true,
                           bool isBundleLoader = false,
                           bool isForceHidden = false) {
-  return deferredAddFile(readFile(path), path, loadType, isLazy, isExplicit,
-                         isBundleLoader, isForceHidden);
+  return processFile(readFile(path), path, loadType, isLazy, isExplicit,
+                     isBundleLoader, isForceHidden);
+}
+
+typedef struct {
+  StringRef path;
+  LoadType loadType;
+  bool isLazy;
+  std::optional<MemoryBufferRef> buffer;
+} DeferredFile;
+
+static void deferFile(StringRef path, LoadType loadType, bool isLazy,
+                      std::vector<DeferredFile> &deferred) {
+  std::optional<MemoryBufferRef> buffer = readFile(path);
+  if (config->readThreads)
+    deferred.push_back({path, loadType, isLazy, buffer});
+  else
+    processFile(buffer, path, loadType, isLazy);
 }
 
 static std::vector<StringRef> missingAutolinkWarnings;
@@ -573,11 +589,6 @@ void macho::resolveLCLinkerOptions() {
   }
 }
 
-typedef struct {
-  StringRef path;
-  std::optional<MemoryBufferRef> buffer;
-} DeferredFile;
-
 static void addFileList(StringRef path, bool isLazy,
                         std::vector<DeferredFile> &deferredFiles) {
   std::optional<MemoryBufferRef> buffer = readFile(path);
@@ -585,11 +596,7 @@ static void addFileList(StringRef path, bool isLazy,
     return;
   MemoryBufferRef mbref = *buffer;
   for (StringRef path : args::getLines(mbref))
-    if (config->readThreads) {
-      StringRef rrpath = rerootPath(path);
-      deferredFiles.push_back({rrpath, readFile(rrpath)});
-    } else
-      addFile(rerootPath(path), LoadType::CommandLine, isLazy);
+    deferFile(rerootPath(path), LoadType::CommandLine, isLazy, deferredFiles);
 }
 
 // We expect sub-library names of the form "libfoo", which will match a dylib
@@ -1239,43 +1246,44 @@ static void handleSymbolPatterns(InputArgList &args,
 // the process is not stalled waiting on disk buffer i/o.
 void multiThreadedPageIn(std::vector<DeferredFile> &deferred, int nthreads) {
 #ifndef _WIN32
+#define MaxReadThreads 200
   typedef struct {
     std::vector<DeferredFile> &deferred;
-    size_t counter, total, pageSize;
+    size_t counter, bytes, total, pageSize;
     pthread_mutex_t mutex;
   } PageInState;
-  PageInState state = {deferred, 0, 0,
-                       llvm::sys::Process::getPageSizeEstimate(),
-                       pthread_mutex_t()};
+  PageInState state = {
+      deferred,         0, 0, 0, llvm::sys::Process::getPageSizeEstimate(),
+      pthread_mutex_t()};
   pthread_mutex_init(&state.mutex, NULL);
 
-  pthread_t running[200];
-  int maxthreads = sizeof running / sizeof running[0];
-  if (nthreads > maxthreads)
-    nthreads = maxthreads;
+  pthread_t running[MaxReadThreads];
+  if (nthreads > MaxReadThreads)
+    nthreads = MaxReadThreads;
 
   for (int t = 0; t < nthreads; t++)
     pthread_create(
         &running[t], nullptr,
         [](void *ptr) -> void * {
           PageInState &state = *(PageInState *)ptr;
-          static int total = 0;
           while (true) {
             pthread_mutex_lock(&state.mutex);
             if (state.counter >= state.deferred.size()) {
               pthread_mutex_unlock(&state.mutex);
               return nullptr;
             }
-            DeferredFile &add = state.deferred[state.counter];
+            DeferredFile &file = state.deferred[state.counter];
             state.counter += 1;
             pthread_mutex_unlock(&state.mutex);
 
+            const char *page = file.buffer->getBuffer().data(),
+                       *end = page + file.buffer->getBuffer().size();
+            state.bytes += end - page;
+
             int t = 0; // Reference each page to load it into memory.
-            for (const char *page = add.buffer->getBuffer().data(),
-                            *end = page + add.buffer->getBuffer().size();
-                 page < end; page += state.pageSize)
+            for (; page < end; page += state.pageSize)
               t += *page;
-            state.total += t; // Avoids whole section being optimised out.
+            state.total += t; // Avoids the loop being optimised out.
           }
         },
         &state);
@@ -1303,12 +1311,8 @@ void createFiles(const InputArgList &args) {
 
     switch (opt.getID()) {
     case OPT_INPUT:
-      if (config->readThreads) {
-        StringRef rrpath = rerootPath(arg->getValue());
-        deferredFiles.push_back({rrpath, readFile(rrpath)});
-        break;
-      }
-      addFile(rerootPath(arg->getValue()), LoadType::CommandLine, isLazy);
+      deferFile(rerootPath(arg->getValue()), LoadType::CommandLine, isLazy,
+                deferredFiles);
       break;
     case OPT_needed_library:
       if (auto *dylibFile = dyn_cast_or_null<DylibFile>(
@@ -1377,8 +1381,8 @@ void createFiles(const InputArgList &args) {
 
   if (config->readThreads) {
     multiThreadedPageIn(deferredFiles, config->readThreads);
-    for (auto &add : deferredFiles)
-      deferredAddFile(add.buffer, add.path, LoadType::CommandLine, isLazy);
+    for (auto &file : deferredFiles)
+      processFile(file.buffer, file.path, file.loadType, file.isLazy);
   }
 }
 

>From fdc4c3898113d8dce06d6ce72d533df59edf1e94 Mon Sep 17 00:00:00 2001
From: John Holdsworth <github at johnholdsworth.com>
Date: Sun, 6 Jul 2025 18:09:19 +0200
Subject: [PATCH 3/3] multiThreadedPageIn of library archives.

---
 lld/MachO/Driver.cpp | 138 +++++++++++++++++++++++--------------------
 1 file changed, 75 insertions(+), 63 deletions(-)

diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 5b9f9cc2939bd..5fe7d38f7ade0 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -283,6 +283,71 @@ static void saveThinArchiveToRepro(ArchiveFile const *file) {
           ": Archive::children failed: " + toString(std::move(e)));
 }
 
+typedef struct {
+  StringRef path;
+  LoadType loadType;
+  bool isLazy;
+  std::optional<MemoryBufferRef> buffer;
+  const char *start;
+  size_t size;
+} DeferredFile;
+
+// Most input files have been mapped but not yet paged in.
+// This code forces the page-ins on multiple threads so
+// the process is not stalled waiting on disk buffer i/o.
+static void multiThreadedPageIn(std::vector<DeferredFile> &deferred) {
+#ifndef _WIN32
+#define MaxReadThreads 200
+  typedef struct {
+    std::vector<DeferredFile> &deferred;
+    size_t counter, total, pageSize;
+    pthread_mutex_t mutex;
+  } PageInState;
+  PageInState state = {deferred, 0, 0,
+                       llvm::sys::Process::getPageSizeEstimate(),
+                       pthread_mutex_t()};
+  static size_t totalBytes;
+
+  pthread_t running[MaxReadThreads];
+  if (config->readThreads > MaxReadThreads)
+    config->readThreads = MaxReadThreads;
+  pthread_mutex_init(&state.mutex, NULL);
+
+  for (int t = 0; t < config->readThreads; t++)
+    pthread_create(
+        &running[t], nullptr,
+        [](void *ptr) -> void * {
+          PageInState &state = *(PageInState *)ptr;
+          while (true) {
+            pthread_mutex_lock(&state.mutex);
+            if (state.counter >= state.deferred.size()) {
+              pthread_mutex_unlock(&state.mutex);
+              return nullptr;
+            }
+            DeferredFile &file = state.deferred[state.counter];
+            state.counter += 1;
+            pthread_mutex_unlock(&state.mutex);
+
+            const char *page = file.start, *end = page + file.size;
+            totalBytes += end - page;
+
+            int t = 0; // Reference each page to load it into memory.
+            for (; page < end; page += state.pageSize)
+              t += *page;
+            state.total += t; // Avoids the loop being optimised out.
+          }
+        },
+        &state);
+
+  for (int t = 0; t < config->readThreads; t++)
+    pthread_join(running[t], nullptr);
+
+  pthread_mutex_destroy(&state.mutex);
+  if (getenv("LLD_MULTI_THREAD_PAGE"))
+    printf("multiThreadedPageIn %ld/%ld\n", totalBytes, deferred.size());
+#endif
+}
+
 static InputFile *processFile(std::optional<MemoryBufferRef> buffer,
                               StringRef path, LoadType loadType,
                               bool isLazy = false, bool isExplicit = true,
@@ -367,6 +432,7 @@ static InputFile *processFile(std::optional<MemoryBufferRef> buffer,
       // we already found that it contains an ObjC symbol.
       if (readFile(path)) {
         Error e = Error::success();
+        std::vector<DeferredFile> deferredFiles;
         for (const object::Archive::Child &c : file->getArchive().children(e)) {
           Expected<MemoryBufferRef> mb = c.getMemoryBufferRef();
           if (!mb) {
@@ -380,6 +446,9 @@ static InputFile *processFile(std::optional<MemoryBufferRef> buffer,
             continue;
           }
 
+          deferredFiles.push_back({path, LoadType::CommandLine, false,
+                                   std::nullopt, mb->getBuffer().data(),
+                                   mb->getBuffer().size()});
           if (!hasObjCSection(*mb))
             continue;
           if (Error e = file->fetch(c, "-ObjC"))
@@ -389,6 +458,8 @@ static InputFile *processFile(std::optional<MemoryBufferRef> buffer,
         if (e)
           error(toString(file) +
                 ": Archive::children failed: " + toString(std::move(e)));
+        if (deferredFiles.size() > 1)
+          multiThreadedPageIn(deferredFiles);
       }
     }
     file->addLazySymbols();
@@ -450,18 +521,13 @@ static InputFile *addFile(StringRef path, LoadType loadType,
                      isBundleLoader, isForceHidden);
 }
 
-typedef struct {
-  StringRef path;
-  LoadType loadType;
-  bool isLazy;
-  std::optional<MemoryBufferRef> buffer;
-} DeferredFile;
-
 static void deferFile(StringRef path, LoadType loadType, bool isLazy,
                       std::vector<DeferredFile> &deferred) {
   std::optional<MemoryBufferRef> buffer = readFile(path);
   if (config->readThreads)
-    deferred.push_back({path, loadType, isLazy, buffer});
+    deferred.push_back({path, loadType, isLazy, buffer,
+                        buffer->getBuffer().data(),
+                        buffer->getBuffer().size()});
   else
     processFile(buffer, path, loadType, isLazy);
 }
@@ -1241,60 +1307,6 @@ static void handleSymbolPatterns(InputArgList &args,
     parseSymbolPatternsFile(arg, symbolPatterns);
 }
 
-// Most input files have been mapped but not yet paged in.
-// This code forces the page-ins on multiple threads so
-// the process is not stalled waiting on disk buffer i/o.
-void multiThreadedPageIn(std::vector<DeferredFile> &deferred, int nthreads) {
-#ifndef _WIN32
-#define MaxReadThreads 200
-  typedef struct {
-    std::vector<DeferredFile> &deferred;
-    size_t counter, bytes, total, pageSize;
-    pthread_mutex_t mutex;
-  } PageInState;
-  PageInState state = {
-      deferred,         0, 0, 0, llvm::sys::Process::getPageSizeEstimate(),
-      pthread_mutex_t()};
-  pthread_mutex_init(&state.mutex, NULL);
-
-  pthread_t running[MaxReadThreads];
-  if (nthreads > MaxReadThreads)
-    nthreads = MaxReadThreads;
-
-  for (int t = 0; t < nthreads; t++)
-    pthread_create(
-        &running[t], nullptr,
-        [](void *ptr) -> void * {
-          PageInState &state = *(PageInState *)ptr;
-          while (true) {
-            pthread_mutex_lock(&state.mutex);
-            if (state.counter >= state.deferred.size()) {
-              pthread_mutex_unlock(&state.mutex);
-              return nullptr;
-            }
-            DeferredFile &file = state.deferred[state.counter];
-            state.counter += 1;
-            pthread_mutex_unlock(&state.mutex);
-
-            const char *page = file.buffer->getBuffer().data(),
-                       *end = page + file.buffer->getBuffer().size();
-            state.bytes += end - page;
-
-            int t = 0; // Reference each page to load it into memory.
-            for (; page < end; page += state.pageSize)
-              t += *page;
-            state.total += t; // Avoids the loop being optimised out.
-          }
-        },
-        &state);
-
-  for (int t = 0; t < nthreads; t++)
-    pthread_join(running[t], nullptr);
-
-  pthread_mutex_destroy(&state.mutex);
-#endif
-}
-
 void createFiles(const InputArgList &args) {
   TimeTraceScope timeScope("Load input files");
   // This loop should be reserved for options whose exact ordering matters.
@@ -1380,7 +1392,7 @@ void createFiles(const InputArgList &args) {
   }
 
   if (config->readThreads) {
-    multiThreadedPageIn(deferredFiles, config->readThreads);
+    multiThreadedPageIn(deferredFiles);
     for (auto &file : deferredFiles)
       processFile(file.buffer, file.path, file.loadType, file.isLazy);
   }



More information about the llvm-commits mailing list