[lld] 2b24287 - [lld][MachO] Multi-threaded preload of input files into memory (#147134)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 27 11:11:27 PDT 2025
Author: John Holdsworth
Date: 2025-08-27T11:11:22-07:00
New Revision: 2b2428794c0fc86a3eed74a264f2dd0a9548487c
URL: https://github.com/llvm/llvm-project/commit/2b2428794c0fc86a3eed74a264f2dd0a9548487c
DIFF: https://github.com/llvm/llvm-project/commit/2b2428794c0fc86a3eed74a264f2dd0a9548487c.diff
LOG: [lld][MachO] Multi-threaded preload of input files into memory (#147134)
This PR adds a new option to lld `--read-workers=20` that defers all
disk I/o then performs it multithreaded so the process is never stalled
waiting for the I/o of the page-in of mapped input files. This results
in a saving of elapsed time. For a large link (iterating on Chromium)
these are the baseline linkage times saving a single file and rebuilding
(seconds inside Xcode):
26.01, 25.84, 26.15, 26.03, 27.10, 25.90, 25.86, 25.81, 25.80, 25.87
With the proposed code change, and using the `--read-workers=20` option,
the linking times reduce to the following:
21.13, 20.35, 20.01, 20.01, 20.30, 20.39, 19.97, 20.23, 20.17, 20.23
The secret sauce is in the new function `multiThreadedPageIn()` in
Driver.cpp. Without the option lld behaves as before.
Edit: with subsequent commits I've taken this novel i/o approach to its
full potential. Latest linking times are now:
13.2, 11.9, 12.12, 12.01, 11.99, 13.11, 11.93, 11.95, 12.18, 11.97
Chrome is still linking and running so it doesn't look like anything is
broken. Despite being multi-threaded all memory access is readonly and
the original code paths are not changed. All that is happening is the
system is being asked to proactively page in files rather than waiting
for processing to page fault which would otherwise stall the process.
---------
Co-authored-by: Daniel RodrÃguez Troitiño <drodrigueztroitino at gmail.com>
Co-authored-by: Ellis Hoag <ellis.sparky.hoag at gmail.com>
Added:
Modified:
lld/MachO/Config.h
lld/MachO/Driver.cpp
lld/MachO/Options.td
Removed:
################################################################################
diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h
index a01e60efbe761..19dba790c1c7c 100644
--- a/lld/MachO/Config.h
+++ b/lld/MachO/Config.h
@@ -186,6 +186,7 @@ struct Configuration {
bool interposable = false;
bool errorForArchMismatch = false;
bool ignoreAutoLink = false;
+ int readWorkers = 0;
// ld64 allows invalid auto link options as long as the link succeeds. LLD
// does not, but there are cases in the wild where the invalid linker options
// exist. This allows users to ignore the specific invalid options in the case
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index f11c65a5583ad..bcba759b2bbee 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -44,8 +44,10 @@
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Parallel.h"
#include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
#include "llvm/Support/TarWriter.h"
#include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/Threading.h"
#include "llvm/Support/TimeProfiler.h"
#include "llvm/TargetParser/Host.h"
#include "llvm/TextAPI/Architecture.h"
@@ -282,11 +284,117 @@ static void saveThinArchiveToRepro(ArchiveFile const *file) {
": Archive::children failed: " + toString(std::move(e)));
}
-static InputFile *addFile(StringRef path, LoadType loadType,
- bool isLazy = false, bool isExplicit = true,
- bool isBundleLoader = false,
- bool isForceHidden = false) {
- std::optional<MemoryBufferRef> buffer = readFile(path);
+struct DeferredFile {
+ StringRef path;
+ bool isLazy;
+ MemoryBufferRef buffer;
+};
+using DeferredFiles = std::vector<DeferredFile>;
+
+class SerialBackgroundQueue {
+ std::deque<std::function<void()>> queue;
+ std::thread *running;
+ std::mutex mutex;
+
+public:
+ void queueWork(std::function<void()> work) {
+ mutex.lock();
+ if (running && queue.empty()) {
+ mutex.unlock();
+ running->join();
+ mutex.lock();
+ delete running;
+ running = nullptr;
+ }
+
+ if (work) {
+ queue.emplace_back(std::move(work));
+ if (!running)
+ running = new std::thread([&]() {
+ while (true) {
+ mutex.lock();
+ if (queue.empty()) {
+ mutex.unlock();
+ break;
+ }
+ auto work = std::move(queue.front());
+ mutex.unlock();
+ work();
+ mutex.lock();
+ queue.pop_front();
+ mutex.unlock();
+ }
+ });
+ }
+ mutex.unlock();
+ }
+};
+
+// Most input files have been mapped but not yet paged in.
+// This code forces the page-ins on multiple threads so
+// the process is not stalled waiting on disk buffer i/o.
+void multiThreadedPageInBackground(DeferredFiles &deferred) {
+ static const size_t pageSize = Process::getPageSizeEstimate();
+ static const size_t largeArchive = 10 * 1024 * 1024;
+#ifndef NDEBUG
+ using namespace std::chrono;
+ std::atomic_int numDeferedFilesTouched = 0;
+ static std::atomic_uint64_t totalBytes = 0;
+ auto t0 = high_resolution_clock::now();
+#endif
+
+ auto preloadDeferredFile = [&](const DeferredFile &deferredFile) {
+ const StringRef &buff = deferredFile.buffer.getBuffer();
+ if (buff.size() > largeArchive)
+ return;
+#ifndef NDEBUG
+ totalBytes += buff.size();
+ numDeferedFilesTouched += 1;
+#endif
+
+ // Reference all file's mmap'd pages to load them into memory.
+ for (const char *page = buff.data(), *end = page + buff.size(); page < end;
+ page += pageSize)
+ LLVM_ATTRIBUTE_UNUSED volatile char t = *page;
+ };
+#if LLVM_ENABLE_THREADS
+ { // Create scope for waiting for the taskGroup
+ std::atomic_size_t index = 0;
+ llvm::parallel::TaskGroup taskGroup;
+ for (int w = 0; w < config->readWorkers; w++)
+ taskGroup.spawn([&index, &preloadDeferredFile, &deferred]() {
+ while (true) {
+ size_t localIndex = index.fetch_add(1);
+ if (localIndex >= deferred.size())
+ break;
+ preloadDeferredFile(deferred[localIndex]);
+ }
+ });
+ }
+#endif
+#ifndef NDEBUG
+ auto dt = high_resolution_clock::now() - t0;
+ if (Process::GetEnv("LLD_MULTI_THREAD_PAGE"))
+ llvm::dbgs() << "multiThreadedPageIn " << totalBytes << "/"
+ << numDeferedFilesTouched << "/" << deferred.size() << "/"
+ << duration_cast<milliseconds>(dt).count() / 1000. << "\n";
+#endif
+}
+
+static void multiThreadedPageIn(const DeferredFiles &deferred) {
+ static SerialBackgroundQueue pageInQueue;
+ pageInQueue.queueWork([=]() {
+ DeferredFiles files = deferred;
+ multiThreadedPageInBackground(files);
+ });
+}
+
+static InputFile *processFile(std::optional<MemoryBufferRef> buffer,
+ DeferredFiles *archiveContents, StringRef path,
+ LoadType loadType, bool isLazy = false,
+ bool isExplicit = true,
+ bool isBundleLoader = false,
+ bool isForceHidden = false) {
if (!buffer)
return nullptr;
MemoryBufferRef mbref = *buffer;
@@ -379,6 +487,8 @@ static InputFile *addFile(StringRef path, LoadType loadType,
continue;
}
+ if (archiveContents)
+ archiveContents->push_back({path, isLazy, *mb});
if (!hasObjCSection(*mb))
continue;
if (Error e = file->fetch(c, "-ObjC"))
@@ -390,7 +500,8 @@ static InputFile *addFile(StringRef path, LoadType loadType,
": Archive::children failed: " + toString(std::move(e)));
}
}
- file->addLazySymbols();
+ if (!archiveContents || archiveContents->empty())
+ file->addLazySymbols();
loadedArchives[path] = ArchiveFileInfo{file, isCommandLineLoad};
newFile = file;
break;
@@ -441,6 +552,24 @@ static InputFile *addFile(StringRef path, LoadType loadType,
return newFile;
}
+static InputFile *addFile(StringRef path, LoadType loadType,
+ bool isLazy = false, bool isExplicit = true,
+ bool isBundleLoader = false,
+ bool isForceHidden = false) {
+ return processFile(readFile(path), nullptr, path, loadType, isLazy,
+ isExplicit, isBundleLoader, isForceHidden);
+}
+
+static void deferFile(StringRef path, bool isLazy, DeferredFiles &deferred) {
+ std::optional<MemoryBufferRef> buffer = readFile(path);
+ if (!buffer)
+ return;
+ if (config->readWorkers)
+ deferred.push_back({path, isLazy, *buffer});
+ else
+ processFile(buffer, nullptr, path, LoadType::CommandLine, isLazy);
+}
+
static std::vector<StringRef> missingAutolinkWarnings;
static void addLibrary(StringRef name, bool isNeeded, bool isWeak,
bool isReexport, bool isHidden, bool isExplicit,
@@ -564,13 +693,14 @@ void macho::resolveLCLinkerOptions() {
}
}
-static void addFileList(StringRef path, bool isLazy) {
+static void addFileList(StringRef path, bool isLazy,
+ DeferredFiles &deferredFiles) {
std::optional<MemoryBufferRef> buffer = readFile(path);
if (!buffer)
return;
MemoryBufferRef mbref = *buffer;
for (StringRef path : args::getLines(mbref))
- addFile(rerootPath(path), LoadType::CommandLine, isLazy);
+ deferFile(rerootPath(path), isLazy, deferredFiles);
}
// We expect sub-library names of the form "libfoo", which will match a dylib
@@ -1222,6 +1352,8 @@ static void createFiles(const InputArgList &args) {
bool isLazy = false;
// If we've processed an opening --start-lib, without a matching --end-lib
bool inLib = false;
+ DeferredFiles deferredFiles;
+
for (const Arg *arg : args) {
const Option &opt = arg->getOption();
warnIfDeprecatedOption(opt);
@@ -1229,7 +1361,7 @@ static void createFiles(const InputArgList &args) {
switch (opt.getID()) {
case OPT_INPUT:
- addFile(rerootPath(arg->getValue()), LoadType::CommandLine, isLazy);
+ deferFile(rerootPath(arg->getValue()), isLazy, deferredFiles);
break;
case OPT_needed_library:
if (auto *dylibFile = dyn_cast_or_null<DylibFile>(
@@ -1249,7 +1381,7 @@ static void createFiles(const InputArgList &args) {
dylibFile->forceWeakImport = true;
break;
case OPT_filelist:
- addFileList(arg->getValue(), isLazy);
+ addFileList(arg->getValue(), isLazy, deferredFiles);
break;
case OPT_force_load:
addFile(rerootPath(arg->getValue()), LoadType::CommandLineForce);
@@ -1295,6 +1427,24 @@ static void createFiles(const InputArgList &args) {
break;
}
}
+
+ if (config->readWorkers) {
+ multiThreadedPageIn(deferredFiles);
+
+ DeferredFiles archiveContents;
+ std::vector<ArchiveFile *> archives;
+ for (auto &file : deferredFiles) {
+ auto inputFile = processFile(file.buffer, &archiveContents, file.path,
+ LoadType::CommandLine, file.isLazy);
+ if (ArchiveFile *archive = dyn_cast<ArchiveFile>(inputFile))
+ archives.push_back(archive);
+ }
+
+ if (!archiveContents.empty())
+ multiThreadedPageIn(archiveContents);
+ for (auto *archive : archives)
+ archive->addLazySymbols();
+ }
}
static void gatherInputSections() {
@@ -1681,6 +1831,14 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
}
}
+ if (auto *arg = args.getLastArg(OPT_read_workers)) {
+ StringRef v(arg->getValue());
+ unsigned threads = 0;
+ if (!llvm::to_integer(v, threads, 0) || threads < 0)
+ error(arg->getSpelling() + ": expected a positive integer, but got '" +
+ arg->getValue() + "'");
+ config->readWorkers = threads;
+ }
if (auto *arg = args.getLastArg(OPT_threads_eq)) {
StringRef v(arg->getValue());
unsigned threads = 0;
diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td
index 4f0602f59812b..8ae50f380741a 100644
--- a/lld/MachO/Options.td
+++ b/lld/MachO/Options.td
@@ -396,6 +396,9 @@ def dead_strip : Flag<["-"], "dead_strip">,
def interposable : Flag<["-"], "interposable">,
HelpText<"Indirects access to all exported symbols in an image">,
Group<grp_opts>;
+def read_workers : Joined<["--"], "read-workers=">,
+ HelpText<"Approximate number of workers to use to eagerly preload input files content into memory. Use 0 to disable this feature. Default is disabled.">,
+ Group<grp_lld>;
def order_file : Separate<["-"], "order_file">,
MetaVarName<"<file>">,
HelpText<"Layout functions and data according to specification in <file>">,
More information about the llvm-commits
mailing list