[lld] [llvm] [lld][MachO] Follow-up to use madvise() for threaded file page-in. (PR #157917)
John Holdsworth via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 11 08:58:55 PDT 2025
https://github.com/johnno1962 updated https://github.com/llvm/llvm-project/pull/157917
>From 65e433cf48c96cc08d65e51f384bde4f17c067e5 Mon Sep 17 00:00:00 2001
From: John Holdsworth <github at johnholdsworth.com>
Date: Wed, 10 Sep 2025 20:03:03 +0200
Subject: [PATCH 1/4] Switch to use madvise() to page-in files.
---
lld/MachO/Driver.cpp | 17 +++++++++++++----
1 file changed, 13 insertions(+), 4 deletions(-)
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 3db638e1ead96..2495d268cfb71 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -53,6 +53,10 @@
#include "llvm/TextAPI/Architecture.h"
#include "llvm/TextAPI/PackedVersion.h"
+#if !_WIN32
+#include <sys/mman.h>
+#endif
+
using namespace llvm;
using namespace llvm::MachO;
using namespace llvm::object;
@@ -334,11 +338,10 @@ class SerialBackgroundQueue {
// This code forces the page-ins on multiple threads so
// the process is not stalled waiting on disk buffer i/o.
void multiThreadedPageInBackground(DeferredFiles &deferred) {
- static const size_t pageSize = Process::getPageSizeEstimate();
static const size_t largeArchive = 10 * 1024 * 1024;
#ifndef NDEBUG
using namespace std::chrono;
- std::atomic_int numDeferedFilesTouched = 0;
+ std::atomic_int numDeferedFilesAdvised = 0;
static std::atomic_uint64_t totalBytes = 0;
auto t0 = high_resolution_clock::now();
#endif
@@ -349,13 +352,19 @@ void multiThreadedPageInBackground(DeferredFiles &deferred) {
return;
#ifndef NDEBUG
totalBytes += buff.size();
- numDeferedFilesTouched += 1;
+ numDeferedFilesAdvised += 1;
#endif
+#if _WIN32
+ static const size_t pageSize = Process::getPageSizeEstimate();
// Reference all file's mmap'd pages to load them into memory.
for (const char *page = buff.data(), *end = page + buff.size(); page < end;
page += pageSize)
LLVM_ATTRIBUTE_UNUSED volatile char t = *page;
+#else
+ // Advise that mmap'd files should be loaded into memory.
+ madvise((void *)buff.data(), buff.size(), MADV_WILLNEED);
+#endif
};
#if LLVM_ENABLE_THREADS
{ // Create scope for waiting for the taskGroup
@@ -376,7 +385,7 @@ void multiThreadedPageInBackground(DeferredFiles &deferred) {
auto dt = high_resolution_clock::now() - t0;
if (Process::GetEnv("LLD_MULTI_THREAD_PAGE"))
llvm::dbgs() << "multiThreadedPageIn " << totalBytes << "/"
- << numDeferedFilesTouched << "/" << deferred.size() << "/"
+ << numDeferedFilesAdvised << "/" << deferred.size() << "/"
<< duration_cast<milliseconds>(dt).count() / 1000. << "\n";
#endif
}
>From 59e77f5a5d7e09cf30a900d452ba0a15650a6588 Mon Sep 17 00:00:00 2001
From: John Holdsworth <github at johnholdsworth.com>
Date: Thu, 11 Sep 2025 07:37:54 +0200
Subject: [PATCH 2/4] Response to review.
---
lld/MachO/Driver.cpp | 27 ++++++++++++++++-----------
1 file changed, 16 insertions(+), 11 deletions(-)
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 2495d268cfb71..03f1b4038beeb 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -338,34 +338,37 @@ class SerialBackgroundQueue {
// This code forces the page-ins on multiple threads so
// the process is not stalled waiting on disk buffer i/o.
void multiThreadedPageInBackground(DeferredFiles &deferred) {
- static const size_t largeArchive = 10 * 1024 * 1024;
-#ifndef NDEBUG
using namespace std::chrono;
- std::atomic_int numDeferedFilesAdvised = 0;
+ static const size_t pageSize = Process::getPageSizeEstimate();
+ static const size_t largeArchive = 10 * 1024 * 1024;
static std::atomic_uint64_t totalBytes = 0;
+ std::atomic_int numDeferedFilesAdvised = 0;
auto t0 = high_resolution_clock::now();
-#endif
auto preloadDeferredFile = [&](const DeferredFile &deferredFile) {
const StringRef &buff = deferredFile.buffer.getBuffer();
if (buff.size() > largeArchive)
return;
-#ifndef NDEBUG
+ if (((uintptr_t)buff.data() & (pageSize - 1)))
+ return; // Not mmap()'d (not page aligned).
+
totalBytes += buff.size();
numDeferedFilesAdvised += 1;
-#endif
#if _WIN32
- static const size_t pageSize = Process::getPageSizeEstimate();
// Reference all file's mmap'd pages to load them into memory.
for (const char *page = buff.data(), *end = page + buff.size(); page < end;
page += pageSize)
LLVM_ATTRIBUTE_UNUSED volatile char t = *page;
#else
- // Advise that mmap'd files should be loaded into memory.
- madvise((void *)buff.data(), buff.size(), MADV_WILLNEED);
+ if (madvise((void *)buff.data(), buff.size(), MADV_WILLNEED) < 0)
+#ifndef NDEBUG
+ llvm::errs() << "madvise() error " << strerror(errno)
+#endif
+ ;
#endif
};
+
#if LLVM_ENABLE_THREADS
{ // Create scope for waiting for the taskGroup
std::atomic_size_t index = 0;
@@ -380,14 +383,16 @@ void multiThreadedPageInBackground(DeferredFiles &deferred) {
}
});
}
+#else
+ for (const auto &file : deferred)
+ preloadDeferredFile(file);
#endif
-#ifndef NDEBUG
+
auto dt = high_resolution_clock::now() - t0;
if (Process::GetEnv("LLD_MULTI_THREAD_PAGE"))
llvm::dbgs() << "multiThreadedPageIn " << totalBytes << "/"
<< numDeferedFilesAdvised << "/" << deferred.size() << "/"
<< duration_cast<milliseconds>(dt).count() / 1000. << "\n";
-#endif
}
static void multiThreadedPageIn(const DeferredFiles &deferred) {
>From 298380eb921bae4f2507aebf5b59611d76a41117 Mon Sep 17 00:00:00 2001
From: John Holdsworth <github at johnholdsworth.com>
Date: Thu, 11 Sep 2025 08:53:49 +0200
Subject: [PATCH 3/4] Use LLVM_DEBUG.
---
lld/MachO/Driver.cpp | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 03f1b4038beeb..ee0c7ec80f993 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -41,6 +41,7 @@
#include "llvm/Object/Archive.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Parallel.h"
#include "llvm/Support/Path.h"
@@ -361,11 +362,10 @@ void multiThreadedPageInBackground(DeferredFiles &deferred) {
page += pageSize)
LLVM_ATTRIBUTE_UNUSED volatile char t = *page;
#else
+#define DEBUG_TYPE "lld-madvise"
if (madvise((void *)buff.data(), buff.size(), MADV_WILLNEED) < 0)
-#ifndef NDEBUG
- llvm::errs() << "madvise() error " << strerror(errno)
-#endif
- ;
+ LLVM_DEBUG(llvm::dbgs() << "madvise() error: " << strerror(errno));
+#undef DEBUG_TYPE
#endif
};
>From 1636d63080dced683767b6941bc1b36dc89bda6d Mon Sep 17 00:00:00 2001
From: John Holdsworth <github at johnholdsworth.com>
Date: Thu, 11 Sep 2025 17:19:19 +0200
Subject: [PATCH 4/4] Page align buffer pointer and revert performance
regression.
---
lld/MachO/Driver.cpp | 6 +++---
lld/MachO/InputFiles.cpp | 3 ++-
llvm/lib/Object/Archive.cpp | 3 ++-
3 files changed, 7 insertions(+), 5 deletions(-)
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index ee0c7ec80f993..f5296da872fe6 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -350,8 +350,6 @@ void multiThreadedPageInBackground(DeferredFiles &deferred) {
const StringRef &buff = deferredFile.buffer.getBuffer();
if (buff.size() > largeArchive)
return;
- if (((uintptr_t)buff.data() & (pageSize - 1)))
- return; // Not mmap()'d (not page aligned).
totalBytes += buff.size();
numDeferedFilesAdvised += 1;
@@ -363,7 +361,9 @@ void multiThreadedPageInBackground(DeferredFiles &deferred) {
LLVM_ATTRIBUTE_UNUSED volatile char t = *page;
#else
#define DEBUG_TYPE "lld-madvise"
- if (madvise((void *)buff.data(), buff.size(), MADV_WILLNEED) < 0)
+ auto aligned = llvm::alignAddr(buff.data(), Align(pageSize));
+ auto rounded = llvm::alignTo(buff.size(), Align(pageSize));
+ if (madvise((void *)aligned, rounded, MADV_WILLNEED) < 0)
LLVM_DEBUG(llvm::dbgs() << "madvise() error: " << strerror(errno));
#undef DEBUG_TYPE
#endif
diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 442fc608865d2..29a83eaeb0da5 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -217,7 +217,8 @@ std::optional<MemoryBufferRef> macho::readFile(StringRef path) {
if (entry != cachedReads.end())
return entry->second;
- ErrorOr<std::unique_ptr<MemoryBuffer>> mbOrErr = MemoryBuffer::getFile(path);
+ ErrorOr<std::unique_ptr<MemoryBuffer>> mbOrErr =
+ MemoryBuffer::getFile(path, false, /*RequiresNullTerminator*/ false);
if (std::error_code ec = mbOrErr.getError()) {
error("cannot open " + path + ": " + ec.message());
return std::nullopt;
diff --git a/llvm/lib/Object/Archive.cpp b/llvm/lib/Object/Archive.cpp
index 92f31c909efd4..445bd6b4609d8 100644
--- a/llvm/lib/Object/Archive.cpp
+++ b/llvm/lib/Object/Archive.cpp
@@ -584,7 +584,8 @@ Expected<StringRef> Archive::Child::getBuffer() const {
if (!FullNameOrErr)
return FullNameOrErr.takeError();
const std::string &FullName = *FullNameOrErr;
- ErrorOr<std::unique_ptr<MemoryBuffer>> Buf = MemoryBuffer::getFile(FullName);
+ ErrorOr<std::unique_ptr<MemoryBuffer>> Buf =
+ MemoryBuffer::getFile(FullName, false, /*RequiresNullTerminator*/ false);
if (std::error_code EC = Buf.getError())
return errorCodeToError(EC);
Parent->ThinBuffers.push_back(std::move(*Buf));
More information about the llvm-commits
mailing list