[lld] [lld][MachO]Multi-threaded i/o. 40% speedup linking a large project. (PR #147134)
John Holdsworth via llvm-commits
llvm-commits at lists.llvm.org
Sun Jul 6 10:47:21 PDT 2025
https://github.com/johnno1962 updated https://github.com/llvm/llvm-project/pull/147134
>From c55b5b2c9f49d23a6063cc6e7a756e22c9cede43 Mon Sep 17 00:00:00 2001
From: John Holdsworth <github at johnholdsworth.com>
Date: Sat, 5 Jul 2025 10:24:51 +0200
Subject: [PATCH 1/3] Multi-threaded disk i/o.
---
lld/MachO/Config.h | 1 +
lld/MachO/Driver.cpp | 111 +++++++++++++++++++++++++++++++++++++++----
lld/MachO/Options.td | 3 ++
3 files changed, 106 insertions(+), 9 deletions(-)
diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h
index a01e60efbe761..92c6eb85f4123 100644
--- a/lld/MachO/Config.h
+++ b/lld/MachO/Config.h
@@ -186,6 +186,7 @@ struct Configuration {
bool interposable = false;
bool errorForArchMismatch = false;
bool ignoreAutoLink = false;
+ int readThreads = 0;
// ld64 allows invalid auto link options as long as the link succeeds. LLD
// does not, but there are cases in the wild where the invalid linker options
// exist. This allows users to ignore the specific invalid options in the case
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 9eb391c4ee1b9..36626720aa252 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -44,6 +44,7 @@
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Parallel.h"
#include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
#include "llvm/Support/TarWriter.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/TimeProfiler.h"
@@ -282,11 +283,11 @@ static void saveThinArchiveToRepro(ArchiveFile const *file) {
": Archive::children failed: " + toString(std::move(e)));
}
-static InputFile *addFile(StringRef path, LoadType loadType,
- bool isLazy = false, bool isExplicit = true,
- bool isBundleLoader = false,
- bool isForceHidden = false) {
- std::optional<MemoryBufferRef> buffer = readFile(path);
+static InputFile *deferredAddFile(std::optional<MemoryBufferRef> buffer,
+ StringRef path, LoadType loadType,
+ bool isLazy = false, bool isExplicit = true,
+ bool isBundleLoader = false,
+ bool isForceHidden = false) {
if (!buffer)
return nullptr;
MemoryBufferRef mbref = *buffer;
@@ -441,6 +442,14 @@ static InputFile *addFile(StringRef path, LoadType loadType,
return newFile;
}
+static InputFile *addFile(StringRef path, LoadType loadType,
+ bool isLazy = false, bool isExplicit = true,
+ bool isBundleLoader = false,
+ bool isForceHidden = false) {
+ return deferredAddFile(readFile(path), path, loadType, isLazy, isExplicit,
+ isBundleLoader, isForceHidden);
+}
+
static std::vector<StringRef> missingAutolinkWarnings;
static void addLibrary(StringRef name, bool isNeeded, bool isWeak,
bool isReexport, bool isHidden, bool isExplicit,
@@ -564,13 +573,23 @@ void macho::resolveLCLinkerOptions() {
}
}
-static void addFileList(StringRef path, bool isLazy) {
+typedef struct {
+ StringRef path;
+ std::optional<MemoryBufferRef> buffer;
+} DeferredFile;
+
+static void addFileList(StringRef path, bool isLazy,
+ std::vector<DeferredFile> &deferredFiles) {
std::optional<MemoryBufferRef> buffer = readFile(path);
if (!buffer)
return;
MemoryBufferRef mbref = *buffer;
for (StringRef path : args::getLines(mbref))
- addFile(rerootPath(path), LoadType::CommandLine, isLazy);
+ if (config->readThreads) {
+ StringRef rrpath = rerootPath(path);
+ deferredFiles.push_back({rrpath, readFile(rrpath)});
+ } else
+ addFile(rerootPath(path), LoadType::CommandLine, isLazy);
}
// We expect sub-library names of the form "libfoo", which will match a dylib
@@ -1215,13 +1234,68 @@ static void handleSymbolPatterns(InputArgList &args,
parseSymbolPatternsFile(arg, symbolPatterns);
}
-static void createFiles(const InputArgList &args) {
+// Most input files have been mapped but not yet paged in.
+// This code forces the page-ins on multiple threads so
+// the process is not stalled waiting on disk buffer i/o.
+void multiThreadedPageIn(std::vector<DeferredFile> &deferred, int nthreads) {
+#ifndef _WIN32
+ typedef struct {
+ std::vector<DeferredFile> &deferred;
+ size_t counter, total, pageSize;
+ pthread_mutex_t mutex;
+ } PageInState;
+ PageInState state = {deferred, 0, 0,
+ llvm::sys::Process::getPageSizeEstimate(),
+ pthread_mutex_t()};
+ pthread_mutex_init(&state.mutex, NULL);
+
+ pthread_t running[200];
+ int maxthreads = sizeof running / sizeof running[0];
+ if (nthreads > maxthreads)
+ nthreads = maxthreads;
+
+ for (int t = 0; t < nthreads; t++)
+ pthread_create(
+ &running[t], nullptr,
+ [](void *ptr) -> void * {
+ PageInState &state = *(PageInState *)ptr;
+ static int total = 0;
+ while (true) {
+ pthread_mutex_lock(&state.mutex);
+ if (state.counter >= state.deferred.size()) {
+ pthread_mutex_unlock(&state.mutex);
+ return nullptr;
+ }
+ DeferredFile &add = state.deferred[state.counter];
+ state.counter += 1;
+ pthread_mutex_unlock(&state.mutex);
+
+ int t = 0; // Reference each page to load it into memory.
+ for (const char *page = add.buffer->getBuffer().data(),
+ *end = page + add.buffer->getBuffer().size();
+ page < end; page += state.pageSize)
+ t += *page;
+ state.total += t; // Avoids whole section being optimised out.
+ }
+ },
+ &state);
+
+ for (int t = 0; t < nthreads; t++)
+ pthread_join(running[t], nullptr);
+
+ pthread_mutex_destroy(&state.mutex);
+#endif
+}
+
+void createFiles(const InputArgList &args) {
TimeTraceScope timeScope("Load input files");
// This loop should be reserved for options whose exact ordering matters.
// Other options should be handled via filtered() and/or getLastArg().
bool isLazy = false;
// If we've processed an opening --start-lib, without a matching --end-lib
bool inLib = false;
+ std::vector<DeferredFile> deferredFiles;
+
for (const Arg *arg : args) {
const Option &opt = arg->getOption();
warnIfDeprecatedOption(opt);
@@ -1229,6 +1303,11 @@ static void createFiles(const InputArgList &args) {
switch (opt.getID()) {
case OPT_INPUT:
+ if (config->readThreads) {
+ StringRef rrpath = rerootPath(arg->getValue());
+ deferredFiles.push_back({rrpath, readFile(rrpath)});
+ break;
+ }
addFile(rerootPath(arg->getValue()), LoadType::CommandLine, isLazy);
break;
case OPT_needed_library:
@@ -1249,7 +1328,7 @@ static void createFiles(const InputArgList &args) {
dylibFile->forceWeakImport = true;
break;
case OPT_filelist:
- addFileList(arg->getValue(), isLazy);
+ addFileList(arg->getValue(), isLazy, deferredFiles);
break;
case OPT_force_load:
addFile(rerootPath(arg->getValue()), LoadType::CommandLineForce);
@@ -1295,6 +1374,12 @@ static void createFiles(const InputArgList &args) {
break;
}
}
+
+ if (config->readThreads) {
+ multiThreadedPageIn(deferredFiles, config->readThreads);
+ for (auto &add : deferredFiles)
+ deferredAddFile(add.buffer, add.path, LoadType::CommandLine, isLazy);
+ }
}
static void gatherInputSections() {
@@ -1687,6 +1772,14 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
}
}
+ if (auto *arg = args.getLastArg(OPT_read_threads)) {
+ StringRef v(arg->getValue());
+ unsigned threads = 0;
+ if (!llvm::to_integer(v, threads, 0) || threads < 0)
+ error(arg->getSpelling() + ": expected a positive integer, but got '" +
+ arg->getValue() + "'");
+ config->readThreads = threads;
+ }
if (auto *arg = args.getLastArg(OPT_threads_eq)) {
StringRef v(arg->getValue());
unsigned threads = 0;
diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td
index 4f0602f59812b..3dc98fccc1b7b 100644
--- a/lld/MachO/Options.td
+++ b/lld/MachO/Options.td
@@ -396,6 +396,9 @@ def dead_strip : Flag<["-"], "dead_strip">,
def interposable : Flag<["-"], "interposable">,
HelpText<"Indirects access to all exported symbols in an image">,
Group<grp_opts>;
+def read_threads : Joined<["--"], "read-threads=">,
+ HelpText<"Number of threads to use paging in files.">,
+ Group<grp_lld>;
def order_file : Separate<["-"], "order_file">,
MetaVarName<"<file>">,
HelpText<"Layout functions and data according to specification in <file>">,
>From 3d11a33599246bbf5e358b554489aeae854ed7be Mon Sep 17 00:00:00 2001
From: John Holdsworth <github at johnholdsworth.com>
Date: Sun, 6 Jul 2025 10:05:38 +0200
Subject: [PATCH 2/3] Afterthoughts.
---
lld/MachO/Driver.cpp | 82 +++++++++++++++++++++++---------------------
1 file changed, 43 insertions(+), 39 deletions(-)
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 36626720aa252..5b9f9cc2939bd 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -283,11 +283,11 @@ static void saveThinArchiveToRepro(ArchiveFile const *file) {
": Archive::children failed: " + toString(std::move(e)));
}
-static InputFile *deferredAddFile(std::optional<MemoryBufferRef> buffer,
- StringRef path, LoadType loadType,
- bool isLazy = false, bool isExplicit = true,
- bool isBundleLoader = false,
- bool isForceHidden = false) {
+static InputFile *processFile(std::optional<MemoryBufferRef> buffer,
+ StringRef path, LoadType loadType,
+ bool isLazy = false, bool isExplicit = true,
+ bool isBundleLoader = false,
+ bool isForceHidden = false) {
if (!buffer)
return nullptr;
MemoryBufferRef mbref = *buffer;
@@ -446,8 +446,24 @@ static InputFile *addFile(StringRef path, LoadType loadType,
bool isLazy = false, bool isExplicit = true,
bool isBundleLoader = false,
bool isForceHidden = false) {
- return deferredAddFile(readFile(path), path, loadType, isLazy, isExplicit,
- isBundleLoader, isForceHidden);
+ return processFile(readFile(path), path, loadType, isLazy, isExplicit,
+ isBundleLoader, isForceHidden);
+}
+
+typedef struct {
+ StringRef path;
+ LoadType loadType;
+ bool isLazy;
+ std::optional<MemoryBufferRef> buffer;
+} DeferredFile;
+
+static void deferFile(StringRef path, LoadType loadType, bool isLazy,
+ std::vector<DeferredFile> &deferred) {
+ std::optional<MemoryBufferRef> buffer = readFile(path);
+ if (config->readThreads)
+ deferred.push_back({path, loadType, isLazy, buffer});
+ else
+ processFile(buffer, path, loadType, isLazy);
}
static std::vector<StringRef> missingAutolinkWarnings;
@@ -573,11 +589,6 @@ void macho::resolveLCLinkerOptions() {
}
}
-typedef struct {
- StringRef path;
- std::optional<MemoryBufferRef> buffer;
-} DeferredFile;
-
static void addFileList(StringRef path, bool isLazy,
std::vector<DeferredFile> &deferredFiles) {
std::optional<MemoryBufferRef> buffer = readFile(path);
@@ -585,11 +596,7 @@ static void addFileList(StringRef path, bool isLazy,
return;
MemoryBufferRef mbref = *buffer;
for (StringRef path : args::getLines(mbref))
- if (config->readThreads) {
- StringRef rrpath = rerootPath(path);
- deferredFiles.push_back({rrpath, readFile(rrpath)});
- } else
- addFile(rerootPath(path), LoadType::CommandLine, isLazy);
+ deferFile(rerootPath(path), LoadType::CommandLine, isLazy, deferredFiles);
}
// We expect sub-library names of the form "libfoo", which will match a dylib
@@ -1239,43 +1246,44 @@ static void handleSymbolPatterns(InputArgList &args,
// the process is not stalled waiting on disk buffer i/o.
void multiThreadedPageIn(std::vector<DeferredFile> &deferred, int nthreads) {
#ifndef _WIN32
+#define MaxReadThreads 200
typedef struct {
std::vector<DeferredFile> &deferred;
- size_t counter, total, pageSize;
+ size_t counter, bytes, total, pageSize;
pthread_mutex_t mutex;
} PageInState;
- PageInState state = {deferred, 0, 0,
- llvm::sys::Process::getPageSizeEstimate(),
- pthread_mutex_t()};
+ PageInState state = {
+ deferred, 0, 0, 0, llvm::sys::Process::getPageSizeEstimate(),
+ pthread_mutex_t()};
pthread_mutex_init(&state.mutex, NULL);
- pthread_t running[200];
- int maxthreads = sizeof running / sizeof running[0];
- if (nthreads > maxthreads)
- nthreads = maxthreads;
+ pthread_t running[MaxReadThreads];
+ if (nthreads > MaxReadThreads)
+ nthreads = MaxReadThreads;
for (int t = 0; t < nthreads; t++)
pthread_create(
&running[t], nullptr,
[](void *ptr) -> void * {
PageInState &state = *(PageInState *)ptr;
- static int total = 0;
while (true) {
pthread_mutex_lock(&state.mutex);
if (state.counter >= state.deferred.size()) {
pthread_mutex_unlock(&state.mutex);
return nullptr;
}
- DeferredFile &add = state.deferred[state.counter];
+ DeferredFile &file = state.deferred[state.counter];
state.counter += 1;
pthread_mutex_unlock(&state.mutex);
+ const char *page = file.buffer->getBuffer().data(),
+ *end = page + file.buffer->getBuffer().size();
+ state.bytes += end - page;
+
int t = 0; // Reference each page to load it into memory.
- for (const char *page = add.buffer->getBuffer().data(),
- *end = page + add.buffer->getBuffer().size();
- page < end; page += state.pageSize)
+ for (; page < end; page += state.pageSize)
t += *page;
- state.total += t; // Avoids whole section being optimised out.
+ state.total += t; // Avoids the loop being optimised out.
}
},
&state);
@@ -1303,12 +1311,8 @@ void createFiles(const InputArgList &args) {
switch (opt.getID()) {
case OPT_INPUT:
- if (config->readThreads) {
- StringRef rrpath = rerootPath(arg->getValue());
- deferredFiles.push_back({rrpath, readFile(rrpath)});
- break;
- }
- addFile(rerootPath(arg->getValue()), LoadType::CommandLine, isLazy);
+ deferFile(rerootPath(arg->getValue()), LoadType::CommandLine, isLazy,
+ deferredFiles);
break;
case OPT_needed_library:
if (auto *dylibFile = dyn_cast_or_null<DylibFile>(
@@ -1377,8 +1381,8 @@ void createFiles(const InputArgList &args) {
if (config->readThreads) {
multiThreadedPageIn(deferredFiles, config->readThreads);
- for (auto &add : deferredFiles)
- deferredAddFile(add.buffer, add.path, LoadType::CommandLine, isLazy);
+ for (auto &file : deferredFiles)
+ processFile(file.buffer, file.path, file.loadType, file.isLazy);
}
}
>From fdc4c3898113d8dce06d6ce72d533df59edf1e94 Mon Sep 17 00:00:00 2001
From: John Holdsworth <github at johnholdsworth.com>
Date: Sun, 6 Jul 2025 18:09:19 +0200
Subject: [PATCH 3/3] multiThreadedPageIn of library archives.
---
lld/MachO/Driver.cpp | 138 +++++++++++++++++++++++--------------------
1 file changed, 75 insertions(+), 63 deletions(-)
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 5b9f9cc2939bd..5fe7d38f7ade0 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -283,6 +283,71 @@ static void saveThinArchiveToRepro(ArchiveFile const *file) {
": Archive::children failed: " + toString(std::move(e)));
}
+typedef struct {
+ StringRef path;
+ LoadType loadType;
+ bool isLazy;
+ std::optional<MemoryBufferRef> buffer;
+ const char *start;
+ size_t size;
+} DeferredFile;
+
+// Most input files have been mapped but not yet paged in.
+// This code forces the page-ins on multiple threads so
+// the process is not stalled waiting on disk buffer i/o.
+static void multiThreadedPageIn(std::vector<DeferredFile> &deferred) {
+#ifndef _WIN32
+#define MaxReadThreads 200
+ typedef struct {
+ std::vector<DeferredFile> &deferred;
+ size_t counter, total, pageSize;
+ pthread_mutex_t mutex;
+ } PageInState;
+ PageInState state = {deferred, 0, 0,
+ llvm::sys::Process::getPageSizeEstimate(),
+ pthread_mutex_t()};
+ static size_t totalBytes;
+
+ pthread_t running[MaxReadThreads];
+ if (config->readThreads > MaxReadThreads)
+ config->readThreads = MaxReadThreads;
+ pthread_mutex_init(&state.mutex, NULL);
+
+ for (int t = 0; t < config->readThreads; t++)
+ pthread_create(
+ &running[t], nullptr,
+ [](void *ptr) -> void * {
+ PageInState &state = *(PageInState *)ptr;
+ while (true) {
+ pthread_mutex_lock(&state.mutex);
+ if (state.counter >= state.deferred.size()) {
+ pthread_mutex_unlock(&state.mutex);
+ return nullptr;
+ }
+ DeferredFile &file = state.deferred[state.counter];
+ state.counter += 1;
+ pthread_mutex_unlock(&state.mutex);
+
+ const char *page = file.start, *end = page + file.size;
+ totalBytes += end - page;
+
+ int t = 0; // Reference each page to load it into memory.
+ for (; page < end; page += state.pageSize)
+ t += *page;
+ state.total += t; // Avoids the loop being optimised out.
+ }
+ },
+ &state);
+
+ for (int t = 0; t < config->readThreads; t++)
+ pthread_join(running[t], nullptr);
+
+ pthread_mutex_destroy(&state.mutex);
+ if (getenv("LLD_MULTI_THREAD_PAGE"))
+ printf("multiThreadedPageIn %ld/%ld\n", totalBytes, deferred.size());
+#endif
+}
+
static InputFile *processFile(std::optional<MemoryBufferRef> buffer,
StringRef path, LoadType loadType,
bool isLazy = false, bool isExplicit = true,
@@ -367,6 +432,7 @@ static InputFile *processFile(std::optional<MemoryBufferRef> buffer,
// we already found that it contains an ObjC symbol.
if (readFile(path)) {
Error e = Error::success();
+ std::vector<DeferredFile> deferredFiles;
for (const object::Archive::Child &c : file->getArchive().children(e)) {
Expected<MemoryBufferRef> mb = c.getMemoryBufferRef();
if (!mb) {
@@ -380,6 +446,9 @@ static InputFile *processFile(std::optional<MemoryBufferRef> buffer,
continue;
}
+ deferredFiles.push_back({path, LoadType::CommandLine, false,
+ std::nullopt, mb->getBuffer().data(),
+ mb->getBuffer().size()});
if (!hasObjCSection(*mb))
continue;
if (Error e = file->fetch(c, "-ObjC"))
@@ -389,6 +458,8 @@ static InputFile *processFile(std::optional<MemoryBufferRef> buffer,
if (e)
error(toString(file) +
": Archive::children failed: " + toString(std::move(e)));
+ if (deferredFiles.size() > 1)
+ multiThreadedPageIn(deferredFiles);
}
}
file->addLazySymbols();
@@ -450,18 +521,13 @@ static InputFile *addFile(StringRef path, LoadType loadType,
isBundleLoader, isForceHidden);
}
-typedef struct {
- StringRef path;
- LoadType loadType;
- bool isLazy;
- std::optional<MemoryBufferRef> buffer;
-} DeferredFile;
-
static void deferFile(StringRef path, LoadType loadType, bool isLazy,
std::vector<DeferredFile> &deferred) {
std::optional<MemoryBufferRef> buffer = readFile(path);
if (config->readThreads)
- deferred.push_back({path, loadType, isLazy, buffer});
+ deferred.push_back({path, loadType, isLazy, buffer,
+ buffer->getBuffer().data(),
+ buffer->getBuffer().size()});
else
processFile(buffer, path, loadType, isLazy);
}
@@ -1241,60 +1307,6 @@ static void handleSymbolPatterns(InputArgList &args,
parseSymbolPatternsFile(arg, symbolPatterns);
}
-// Most input files have been mapped but not yet paged in.
-// This code forces the page-ins on multiple threads so
-// the process is not stalled waiting on disk buffer i/o.
-void multiThreadedPageIn(std::vector<DeferredFile> &deferred, int nthreads) {
-#ifndef _WIN32
-#define MaxReadThreads 200
- typedef struct {
- std::vector<DeferredFile> &deferred;
- size_t counter, bytes, total, pageSize;
- pthread_mutex_t mutex;
- } PageInState;
- PageInState state = {
- deferred, 0, 0, 0, llvm::sys::Process::getPageSizeEstimate(),
- pthread_mutex_t()};
- pthread_mutex_init(&state.mutex, NULL);
-
- pthread_t running[MaxReadThreads];
- if (nthreads > MaxReadThreads)
- nthreads = MaxReadThreads;
-
- for (int t = 0; t < nthreads; t++)
- pthread_create(
- &running[t], nullptr,
- [](void *ptr) -> void * {
- PageInState &state = *(PageInState *)ptr;
- while (true) {
- pthread_mutex_lock(&state.mutex);
- if (state.counter >= state.deferred.size()) {
- pthread_mutex_unlock(&state.mutex);
- return nullptr;
- }
- DeferredFile &file = state.deferred[state.counter];
- state.counter += 1;
- pthread_mutex_unlock(&state.mutex);
-
- const char *page = file.buffer->getBuffer().data(),
- *end = page + file.buffer->getBuffer().size();
- state.bytes += end - page;
-
- int t = 0; // Reference each page to load it into memory.
- for (; page < end; page += state.pageSize)
- t += *page;
- state.total += t; // Avoids the loop being optimised out.
- }
- },
- &state);
-
- for (int t = 0; t < nthreads; t++)
- pthread_join(running[t], nullptr);
-
- pthread_mutex_destroy(&state.mutex);
-#endif
-}
-
void createFiles(const InputArgList &args) {
TimeTraceScope timeScope("Load input files");
// This loop should be reserved for options whose exact ordering matters.
@@ -1380,7 +1392,7 @@ void createFiles(const InputArgList &args) {
}
if (config->readThreads) {
- multiThreadedPageIn(deferredFiles, config->readThreads);
+ multiThreadedPageIn(deferredFiles);
for (auto &file : deferredFiles)
processFile(file.buffer, file.path, file.loadType, file.isLazy);
}
More information about the llvm-commits
mailing list