[lld] [lld][Macho]Multi-threaded disk i/o. 20% speedup linking a large project. (PR #147134)
John Holdsworth via llvm-commits
llvm-commits at lists.llvm.org
Sat Jul 5 01:48:42 PDT 2025
https://github.com/johnno1962 created https://github.com/llvm/llvm-project/pull/147134
This PR adds a new option to lld `--read-threads=20` that defers all disk I/o then performs it multithreaded so the process is never stalled waiting for the I/o of the page-in of mapped files resulting in a saving of elapsed time. For a large link (iterating on Chromium project) these are the baseline linkage times saving a single file and rebuilding (seconds):
26.01, 25.84, 26.15, 26.03, 27.10, 25.90, 25.86, 25.81, 25.80, 25.87
With the proposed code change, and using the `--read-threads=20` option, the linking times reduce to the following:
21.13, 20.35, 20.01, 20.01, 20.30, 20.39, 19.97, 20.23, 20.17, 20.23
The secret sauce is in the new function `multiThreadedPageIn()` in Driver.cpp. Without the option set lld behaves as before.
>From ae6e10dbc863b3ab40f5b0ca893304b0c7ccd435 Mon Sep 17 00:00:00 2001
From: John Holdsworth <github at johnholdsworth.com>
Date: Sat, 5 Jul 2025 10:24:51 +0200
Subject: [PATCH] Multi-threaded disk i/o.
---
lld/MachO/Config.h | 1 +
lld/MachO/Driver.cpp | 104 ++++++++++++++++++++++++++++++++++++++-----
lld/MachO/Options.td | 3 ++
3 files changed, 98 insertions(+), 10 deletions(-)
diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h
index a01e60efbe761..92c6eb85f4123 100644
--- a/lld/MachO/Config.h
+++ b/lld/MachO/Config.h
@@ -186,6 +186,7 @@ struct Configuration {
bool interposable = false;
bool errorForArchMismatch = false;
bool ignoreAutoLink = false;
+ int readThreads = 0;
// ld64 allows invalid auto link options as long as the link succeeds. LLD
// does not, but there are cases in the wild where the invalid linker options
// exist. This allows users to ignore the specific invalid options in the case
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 9eb391c4ee1b9..a244f2781c22c 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -47,6 +47,7 @@
#include "llvm/Support/TarWriter.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/TimeProfiler.h"
+#include "llvm/Support/Process.h"
#include "llvm/TargetParser/Host.h"
#include "llvm/TextAPI/Architecture.h"
#include "llvm/TextAPI/PackedVersion.h"
@@ -282,11 +283,11 @@ static void saveThinArchiveToRepro(ArchiveFile const *file) {
": Archive::children failed: " + toString(std::move(e)));
}
-static InputFile *addFile(StringRef path, LoadType loadType,
- bool isLazy = false, bool isExplicit = true,
- bool isBundleLoader = false,
- bool isForceHidden = false) {
- std::optional<MemoryBufferRef> buffer = readFile(path);
+static InputFile *deferredAddFile(std::optional<MemoryBufferRef> buffer,
+ StringRef path, LoadType loadType,
+ bool isLazy = false, bool isExplicit = true,
+ bool isBundleLoader = false,
+ bool isForceHidden = false) {
if (!buffer)
return nullptr;
MemoryBufferRef mbref = *buffer;
@@ -441,6 +442,14 @@ static InputFile *addFile(StringRef path, LoadType loadType,
return newFile;
}
+static InputFile *addFile(StringRef path, LoadType loadType,
+ bool isLazy = false, bool isExplicit = true,
+ bool isBundleLoader = false,
+ bool isForceHidden = false) {
+ return deferredAddFile(readFile(path), path, loadType, isLazy,
+ isExplicit, isBundleLoader, isForceHidden);
+}
+
static std::vector<StringRef> missingAutolinkWarnings;
static void addLibrary(StringRef name, bool isNeeded, bool isWeak,
bool isReexport, bool isHidden, bool isExplicit,
@@ -564,13 +573,21 @@ void macho::resolveLCLinkerOptions() {
}
}
-static void addFileList(StringRef path, bool isLazy) {
+typedef struct { StringRef path; std::optional<MemoryBufferRef> buffer; } DeferredFile;
+
+static void addFileList(StringRef path, bool isLazy,
+ std::vector<DeferredFile> &deferredFiles, int readThreads) {
std::optional<MemoryBufferRef> buffer = readFile(path);
if (!buffer)
return;
MemoryBufferRef mbref = *buffer;
for (StringRef path : args::getLines(mbref))
- addFile(rerootPath(path), LoadType::CommandLine, isLazy);
+ if (readThreads) {
+ StringRef rrpath = rerootPath(path);
+ deferredFiles.push_back({rrpath, readFile(rrpath)});
+ }
+ else
+ addFile(rerootPath(path), LoadType::CommandLine, isLazy);
}
// We expect sub-library names of the form "libfoo", which will match a dylib
@@ -1215,13 +1232,61 @@ static void handleSymbolPatterns(InputArgList &args,
parseSymbolPatternsFile(arg, symbolPatterns);
}
-static void createFiles(const InputArgList &args) {
+// Most input files have been mapped but not yet paged in.
+// This code forces the page-ins on multiple threads so
+// the process is not stalled waiting on disk buffer i/o.
+void multiThreadedPageIn(std::vector<DeferredFile> &deferred, int nthreads) {
+ typedef struct {
+ std::vector<DeferredFile> &deferred;
+ size_t counter, total, pageSize;
+ pthread_mutex_t mutex;
+ } PageInState;
+ PageInState state = {deferred, 0, 0,
+ llvm::sys::Process::getPageSizeEstimate(), pthread_mutex_t()};
+ pthread_mutex_init(&state.mutex, NULL);
+
+ pthread_t running[200];
+ int maxthreads = sizeof running / sizeof running[0];
+ if (nthreads > maxthreads)
+ nthreads = maxthreads;
+ for (int t=0; t<nthreads; t++)
+ pthread_create(&running[t], nullptr, [](void* ptr) -> void*{
+ PageInState &state = *(PageInState *)ptr;
+ static int total = 0;
+ while (true) {
+ pthread_mutex_lock(&state.mutex);
+ if (state.counter >= state.deferred.size()) {
+ pthread_mutex_unlock(&state.mutex);
+ return nullptr;
+ }
+ DeferredFile &add = state.deferred[state.counter];
+ state.counter += 1;
+ pthread_mutex_unlock(&state.mutex);
+
+ int t = 0; // Reference each page to load it into memory.
+ for (const char *start = add.buffer->getBuffer().data(),
+ *page = start; page<start+add.buffer->getBuffer().size();
+ page += state.pageSize)
+ t += *page;
+ state.total += t; // Avoids whole section being optimised out.
+ }
+ }, &state);
+
+ for (int t=0; t<nthreads; t++)
+ pthread_join(running[t], nullptr);
+
+ pthread_mutex_destroy(&state.mutex);
+}
+
+void createFiles(const InputArgList &args, int readThreads) {
TimeTraceScope timeScope("Load input files");
// This loop should be reserved for options whose exact ordering matters.
// Other options should be handled via filtered() and/or getLastArg().
bool isLazy = false;
// If we've processed an opening --start-lib, without a matching --end-lib
bool inLib = false;
+ std::vector<DeferredFile> deferredFiles;
+
for (const Arg *arg : args) {
const Option &opt = arg->getOption();
warnIfDeprecatedOption(opt);
@@ -1229,6 +1294,11 @@ static void createFiles(const InputArgList &args) {
switch (opt.getID()) {
case OPT_INPUT:
+ if (readThreads) {
+ StringRef rrpath = rerootPath(arg->getValue());
+ deferredFiles.push_back({rrpath,readFile(rrpath)});
+ break;
+ }
addFile(rerootPath(arg->getValue()), LoadType::CommandLine, isLazy);
break;
case OPT_needed_library:
@@ -1249,7 +1319,7 @@ static void createFiles(const InputArgList &args) {
dylibFile->forceWeakImport = true;
break;
case OPT_filelist:
- addFileList(arg->getValue(), isLazy);
+ addFileList(arg->getValue(), isLazy, deferredFiles, readThreads);
break;
case OPT_force_load:
addFile(rerootPath(arg->getValue()), LoadType::CommandLineForce);
@@ -1295,6 +1365,12 @@ static void createFiles(const InputArgList &args) {
break;
}
}
+
+ if (readThreads) {
+ multiThreadedPageIn(deferredFiles, readThreads);
+ for (auto &add : deferredFiles)
+ deferredAddFile(add.buffer, add.path, LoadType::CommandLine, isLazy);
+ }
}
static void gatherInputSections() {
@@ -1687,6 +1763,14 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
}
}
+ if (auto *arg = args.getLastArg(OPT_read_threads)) {
+ StringRef v(arg->getValue());
+ unsigned threads = 0;
+ if (!llvm::to_integer(v, threads, 0) || threads < 0)
+ error(arg->getSpelling() + ": expected a positive integer, but got '" +
+ arg->getValue() + "'");
+ config->readThreads = threads;
+ }
if (auto *arg = args.getLastArg(OPT_threads_eq)) {
StringRef v(arg->getValue());
unsigned threads = 0;
@@ -2107,7 +2191,7 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
TimeTraceScope timeScope("ExecuteLinker");
initLLVM(); // must be run before any call to addFile()
- createFiles(args);
+ createFiles(args, config->readThreads);
// Now that all dylibs have been loaded, search for those that should be
// re-exported.
diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td
index 4f0602f59812b..3dc98fccc1b7b 100644
--- a/lld/MachO/Options.td
+++ b/lld/MachO/Options.td
@@ -396,6 +396,9 @@ def dead_strip : Flag<["-"], "dead_strip">,
def interposable : Flag<["-"], "interposable">,
HelpText<"Indirects access to all exported symbols in an image">,
Group<grp_opts>;
+def read_threads : Joined<["--"], "read-threads=">,
+ HelpText<"Number of threads to use paging in files.">,
+ Group<grp_lld>;
def order_file : Separate<["-"], "order_file">,
MetaVarName<"<file>">,
HelpText<"Layout functions and data according to specification in <file>">,
More information about the llvm-commits
mailing list