[lld] [llvm] [lld][MachO] Follow-up to use madvise() for threaded file page-in. (PR #157917)
John Holdsworth via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 18 02:58:28 PDT 2025
https://github.com/johnno1962 updated https://github.com/llvm/llvm-project/pull/157917
>From 65e433cf48c96cc08d65e51f384bde4f17c067e5 Mon Sep 17 00:00:00 2001
From: John Holdsworth <github at johnholdsworth.com>
Date: Wed, 10 Sep 2025 20:03:03 +0200
Subject: [PATCH 01/13] Switch to use madvise() to page-in files.
---
lld/MachO/Driver.cpp | 17 +++++++++++++----
1 file changed, 13 insertions(+), 4 deletions(-)
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 3db638e1ead96..2495d268cfb71 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -53,6 +53,10 @@
#include "llvm/TextAPI/Architecture.h"
#include "llvm/TextAPI/PackedVersion.h"
+#if !_WIN32
+#include <sys/mman.h>
+#endif
+
using namespace llvm;
using namespace llvm::MachO;
using namespace llvm::object;
@@ -334,11 +338,10 @@ class SerialBackgroundQueue {
// This code forces the page-ins on multiple threads so
// the process is not stalled waiting on disk buffer i/o.
void multiThreadedPageInBackground(DeferredFiles &deferred) {
- static const size_t pageSize = Process::getPageSizeEstimate();
static const size_t largeArchive = 10 * 1024 * 1024;
#ifndef NDEBUG
using namespace std::chrono;
- std::atomic_int numDeferedFilesTouched = 0;
+ std::atomic_int numDeferedFilesAdvised = 0;
static std::atomic_uint64_t totalBytes = 0;
auto t0 = high_resolution_clock::now();
#endif
@@ -349,13 +352,19 @@ void multiThreadedPageInBackground(DeferredFiles &deferred) {
return;
#ifndef NDEBUG
totalBytes += buff.size();
- numDeferedFilesTouched += 1;
+ numDeferedFilesAdvised += 1;
#endif
+#if _WIN32
+ static const size_t pageSize = Process::getPageSizeEstimate();
// Reference all file's mmap'd pages to load them into memory.
for (const char *page = buff.data(), *end = page + buff.size(); page < end;
page += pageSize)
LLVM_ATTRIBUTE_UNUSED volatile char t = *page;
+#else
+ // Advise that mmap'd files should be loaded into memory.
+ madvise((void *)buff.data(), buff.size(), MADV_WILLNEED);
+#endif
};
#if LLVM_ENABLE_THREADS
{ // Create scope for waiting for the taskGroup
@@ -376,7 +385,7 @@ void multiThreadedPageInBackground(DeferredFiles &deferred) {
auto dt = high_resolution_clock::now() - t0;
if (Process::GetEnv("LLD_MULTI_THREAD_PAGE"))
llvm::dbgs() << "multiThreadedPageIn " << totalBytes << "/"
- << numDeferedFilesTouched << "/" << deferred.size() << "/"
+ << numDeferedFilesAdvised << "/" << deferred.size() << "/"
<< duration_cast<milliseconds>(dt).count() / 1000. << "\n";
#endif
}
>From 59e77f5a5d7e09cf30a900d452ba0a15650a6588 Mon Sep 17 00:00:00 2001
From: John Holdsworth <github at johnholdsworth.com>
Date: Thu, 11 Sep 2025 07:37:54 +0200
Subject: [PATCH 02/13] Response to review.
---
lld/MachO/Driver.cpp | 27 ++++++++++++++++-----------
1 file changed, 16 insertions(+), 11 deletions(-)
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 2495d268cfb71..03f1b4038beeb 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -338,34 +338,37 @@ class SerialBackgroundQueue {
// This code forces the page-ins on multiple threads so
// the process is not stalled waiting on disk buffer i/o.
void multiThreadedPageInBackground(DeferredFiles &deferred) {
- static const size_t largeArchive = 10 * 1024 * 1024;
-#ifndef NDEBUG
using namespace std::chrono;
- std::atomic_int numDeferedFilesAdvised = 0;
+ static const size_t pageSize = Process::getPageSizeEstimate();
+ static const size_t largeArchive = 10 * 1024 * 1024;
static std::atomic_uint64_t totalBytes = 0;
+ std::atomic_int numDeferedFilesAdvised = 0;
auto t0 = high_resolution_clock::now();
-#endif
auto preloadDeferredFile = [&](const DeferredFile &deferredFile) {
const StringRef &buff = deferredFile.buffer.getBuffer();
if (buff.size() > largeArchive)
return;
-#ifndef NDEBUG
+ if (((uintptr_t)buff.data() & (pageSize - 1)))
+ return; // Not mmap()'d (not page aligned).
+
totalBytes += buff.size();
numDeferedFilesAdvised += 1;
-#endif
#if _WIN32
- static const size_t pageSize = Process::getPageSizeEstimate();
// Reference all file's mmap'd pages to load them into memory.
for (const char *page = buff.data(), *end = page + buff.size(); page < end;
page += pageSize)
LLVM_ATTRIBUTE_UNUSED volatile char t = *page;
#else
- // Advise that mmap'd files should be loaded into memory.
- madvise((void *)buff.data(), buff.size(), MADV_WILLNEED);
+ if (madvise((void *)buff.data(), buff.size(), MADV_WILLNEED) < 0)
+#ifndef NDEBUG
+ llvm::errs() << "madvise() error " << strerror(errno)
+#endif
+ ;
#endif
};
+
#if LLVM_ENABLE_THREADS
{ // Create scope for waiting for the taskGroup
std::atomic_size_t index = 0;
@@ -380,14 +383,16 @@ void multiThreadedPageInBackground(DeferredFiles &deferred) {
}
});
}
+#else
+ for (const auto &file : deferred)
+ preloadDeferredFile(file);
#endif
-#ifndef NDEBUG
+
auto dt = high_resolution_clock::now() - t0;
if (Process::GetEnv("LLD_MULTI_THREAD_PAGE"))
llvm::dbgs() << "multiThreadedPageIn " << totalBytes << "/"
<< numDeferedFilesAdvised << "/" << deferred.size() << "/"
<< duration_cast<milliseconds>(dt).count() / 1000. << "\n";
-#endif
}
static void multiThreadedPageIn(const DeferredFiles &deferred) {
>From 298380eb921bae4f2507aebf5b59611d76a41117 Mon Sep 17 00:00:00 2001
From: John Holdsworth <github at johnholdsworth.com>
Date: Thu, 11 Sep 2025 08:53:49 +0200
Subject: [PATCH 03/13] Use LLVM_DEBUG.
---
lld/MachO/Driver.cpp | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 03f1b4038beeb..ee0c7ec80f993 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -41,6 +41,7 @@
#include "llvm/Object/Archive.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Parallel.h"
#include "llvm/Support/Path.h"
@@ -361,11 +362,10 @@ void multiThreadedPageInBackground(DeferredFiles &deferred) {
page += pageSize)
LLVM_ATTRIBUTE_UNUSED volatile char t = *page;
#else
+#define DEBUG_TYPE "lld-madvise"
if (madvise((void *)buff.data(), buff.size(), MADV_WILLNEED) < 0)
-#ifndef NDEBUG
- llvm::errs() << "madvise() error " << strerror(errno)
-#endif
- ;
+ LLVM_DEBUG(llvm::dbgs() << "madvise() error: " << strerror(errno));
+#undef DEBUG_TYPE
#endif
};
>From e31984b250c3fdd7980efdfe5683c155b409d826 Mon Sep 17 00:00:00 2001
From: John Holdsworth <github at johnholdsworth.com>
Date: Thu, 11 Sep 2025 17:19:19 +0200
Subject: [PATCH 04/13] Page align buffer pointer and address 2 second
performance regression introduced by:
https://github.com/llvm/llvm-project/commit/85cd3d98686c47d015dbcc17f1f7d0714b00e172
---
lld/MachO/Driver.cpp | 7 +++----
lld/MachO/InputFiles.cpp | 3 ++-
llvm/lib/Object/Archive.cpp | 3 ++-
3 files changed, 7 insertions(+), 6 deletions(-)
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index ee0c7ec80f993..f652e195ffc5e 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -350,8 +350,6 @@ void multiThreadedPageInBackground(DeferredFiles &deferred) {
const StringRef &buff = deferredFile.buffer.getBuffer();
if (buff.size() > largeArchive)
return;
- if (((uintptr_t)buff.data() & (pageSize - 1)))
- return; // Not mmap()'d (not page aligned).
totalBytes += buff.size();
numDeferedFilesAdvised += 1;
@@ -363,8 +361,9 @@ void multiThreadedPageInBackground(DeferredFiles &deferred) {
LLVM_ATTRIBUTE_UNUSED volatile char t = *page;
#else
#define DEBUG_TYPE "lld-madvise"
- if (madvise((void *)buff.data(), buff.size(), MADV_WILLNEED) < 0)
- LLVM_DEBUG(llvm::dbgs() << "madvise() error: " << strerror(errno));
+ auto aligned = llvm::alignAddr(buff.data(), Align(pageSize));
+ if (madvise((void *)aligned, buff.size(), MADV_WILLNEED) < 0)
+ LLVM_DEBUG(llvm::dbgs() << "madvise error: " << strerror(errno) << "\n");
#undef DEBUG_TYPE
#endif
};
diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 442fc608865d2..29a83eaeb0da5 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -217,7 +217,8 @@ std::optional<MemoryBufferRef> macho::readFile(StringRef path) {
if (entry != cachedReads.end())
return entry->second;
- ErrorOr<std::unique_ptr<MemoryBuffer>> mbOrErr = MemoryBuffer::getFile(path);
+ ErrorOr<std::unique_ptr<MemoryBuffer>> mbOrErr =
+ MemoryBuffer::getFile(path, false, /*RequiresNullTerminator*/ false);
if (std::error_code ec = mbOrErr.getError()) {
error("cannot open " + path + ": " + ec.message());
return std::nullopt;
diff --git a/llvm/lib/Object/Archive.cpp b/llvm/lib/Object/Archive.cpp
index 92f31c909efd4..445bd6b4609d8 100644
--- a/llvm/lib/Object/Archive.cpp
+++ b/llvm/lib/Object/Archive.cpp
@@ -584,7 +584,8 @@ Expected<StringRef> Archive::Child::getBuffer() const {
if (!FullNameOrErr)
return FullNameOrErr.takeError();
const std::string &FullName = *FullNameOrErr;
- ErrorOr<std::unique_ptr<MemoryBuffer>> Buf = MemoryBuffer::getFile(FullName);
+ ErrorOr<std::unique_ptr<MemoryBuffer>> Buf =
+ MemoryBuffer::getFile(FullName, false, /*RequiresNullTerminator*/ false);
if (std::error_code EC = Buf.getError())
return errorCodeToError(EC);
Parent->ThinBuffers.push_back(std::move(*Buf));
>From 884b771e18b305b6d15b789dbb40d8025798e7cc Mon Sep 17 00:00:00 2001
From: John Holdsworth <github at johnholdsworth.com>
Date: Thu, 11 Sep 2025 20:25:52 +0200
Subject: [PATCH 05/13] Alternative solution to performance regression.
---
lld/MachO/InputFiles.cpp | 3 +--
llvm/lib/Object/Archive.cpp | 3 +--
llvm/lib/Support/MemoryBuffer.cpp | 6 ++++--
3 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 29a83eaeb0da5..442fc608865d2 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -217,8 +217,7 @@ std::optional<MemoryBufferRef> macho::readFile(StringRef path) {
if (entry != cachedReads.end())
return entry->second;
- ErrorOr<std::unique_ptr<MemoryBuffer>> mbOrErr =
- MemoryBuffer::getFile(path, false, /*RequiresNullTerminator*/ false);
+ ErrorOr<std::unique_ptr<MemoryBuffer>> mbOrErr = MemoryBuffer::getFile(path);
if (std::error_code ec = mbOrErr.getError()) {
error("cannot open " + path + ": " + ec.message());
return std::nullopt;
diff --git a/llvm/lib/Object/Archive.cpp b/llvm/lib/Object/Archive.cpp
index 445bd6b4609d8..92f31c909efd4 100644
--- a/llvm/lib/Object/Archive.cpp
+++ b/llvm/lib/Object/Archive.cpp
@@ -584,8 +584,7 @@ Expected<StringRef> Archive::Child::getBuffer() const {
if (!FullNameOrErr)
return FullNameOrErr.takeError();
const std::string &FullName = *FullNameOrErr;
- ErrorOr<std::unique_ptr<MemoryBuffer>> Buf =
- MemoryBuffer::getFile(FullName, false, /*RequiresNullTerminator*/ false);
+ ErrorOr<std::unique_ptr<MemoryBuffer>> Buf = MemoryBuffer::getFile(FullName);
if (std::error_code EC = Buf.getError())
return errorCodeToError(EC);
Parent->ThinBuffers.push_back(std::move(*Buf));
diff --git a/llvm/lib/Support/MemoryBuffer.cpp b/llvm/lib/Support/MemoryBuffer.cpp
index 1c4645ad83641..9c08a197a6987 100644
--- a/llvm/lib/Support/MemoryBuffer.cpp
+++ b/llvm/lib/Support/MemoryBuffer.cpp
@@ -505,8 +505,10 @@ getOpenFileImpl(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize,
// On at least Linux, and possibly on other systems, mmap may return pages
// from the page cache that are not properly filled with trailing zeroes,
// if some prior user of the page wrote non-zero bytes. Detect this and
- // don't use mmap in that case.
- if (!RequiresNullTerminator || *Result->getBufferEnd() == '\0')
+ // don't use mmap in that case (unless it is object or archive file).
+ if (!RequiresNullTerminator || *Result->getBufferEnd() == '\0' ||
+ StringRef(Filename.str()).ends_with(".o") ||
+ StringRef(Filename.str()).ends_with(".a"))
return std::move(Result);
}
}
>From 1b9b13998ba6f8caccaf3fdd8bea41d59beb403f Mon Sep 17 00:00:00 2001
From: John Holdsworth <github at johnholdsworth.com>
Date: Thu, 11 Sep 2025 22:04:55 +0200
Subject: [PATCH 06/13] Revert "Alternative solution to performance
regression."
This reverts commit 884b771e18b305b6d15b789dbb40d8025798e7cc.
---
lld/MachO/InputFiles.cpp | 3 ++-
llvm/lib/Object/Archive.cpp | 3 ++-
llvm/lib/Support/MemoryBuffer.cpp | 6 ++----
3 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 442fc608865d2..29a83eaeb0da5 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -217,7 +217,8 @@ std::optional<MemoryBufferRef> macho::readFile(StringRef path) {
if (entry != cachedReads.end())
return entry->second;
- ErrorOr<std::unique_ptr<MemoryBuffer>> mbOrErr = MemoryBuffer::getFile(path);
+ ErrorOr<std::unique_ptr<MemoryBuffer>> mbOrErr =
+ MemoryBuffer::getFile(path, false, /*RequiresNullTerminator*/ false);
if (std::error_code ec = mbOrErr.getError()) {
error("cannot open " + path + ": " + ec.message());
return std::nullopt;
diff --git a/llvm/lib/Object/Archive.cpp b/llvm/lib/Object/Archive.cpp
index 92f31c909efd4..445bd6b4609d8 100644
--- a/llvm/lib/Object/Archive.cpp
+++ b/llvm/lib/Object/Archive.cpp
@@ -584,7 +584,8 @@ Expected<StringRef> Archive::Child::getBuffer() const {
if (!FullNameOrErr)
return FullNameOrErr.takeError();
const std::string &FullName = *FullNameOrErr;
- ErrorOr<std::unique_ptr<MemoryBuffer>> Buf = MemoryBuffer::getFile(FullName);
+ ErrorOr<std::unique_ptr<MemoryBuffer>> Buf =
+ MemoryBuffer::getFile(FullName, false, /*RequiresNullTerminator*/ false);
if (std::error_code EC = Buf.getError())
return errorCodeToError(EC);
Parent->ThinBuffers.push_back(std::move(*Buf));
diff --git a/llvm/lib/Support/MemoryBuffer.cpp b/llvm/lib/Support/MemoryBuffer.cpp
index 9c08a197a6987..1c4645ad83641 100644
--- a/llvm/lib/Support/MemoryBuffer.cpp
+++ b/llvm/lib/Support/MemoryBuffer.cpp
@@ -505,10 +505,8 @@ getOpenFileImpl(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize,
// On at least Linux, and possibly on other systems, mmap may return pages
// from the page cache that are not properly filled with trailing zeroes,
// if some prior user of the page wrote non-zero bytes. Detect this and
- // don't use mmap in that case (unless it is object or archive file).
- if (!RequiresNullTerminator || *Result->getBufferEnd() == '\0' ||
- StringRef(Filename.str()).ends_with(".o") ||
- StringRef(Filename.str()).ends_with(".a"))
+ // don't use mmap in that case.
+ if (!RequiresNullTerminator || *Result->getBufferEnd() == '\0')
return std::move(Result);
}
}
>From f5fef9b15cd7f2224c004459ec06aeb52132f3d4 Mon Sep 17 00:00:00 2001
From: John Holdsworth <github at johnholdsworth.com>
Date: Fri, 12 Sep 2025 10:08:37 +0200
Subject: [PATCH 07/13] Format llvm/lib/Object/Archive.cpp
Co-authored-by: James Henderson <James.Henderson at sony.com>
---
llvm/lib/Object/Archive.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Object/Archive.cpp b/llvm/lib/Object/Archive.cpp
index 445bd6b4609d8..bedd021092081 100644
--- a/llvm/lib/Object/Archive.cpp
+++ b/llvm/lib/Object/Archive.cpp
@@ -585,7 +585,7 @@ Expected<StringRef> Archive::Child::getBuffer() const {
return FullNameOrErr.takeError();
const std::string &FullName = *FullNameOrErr;
ErrorOr<std::unique_ptr<MemoryBuffer>> Buf =
- MemoryBuffer::getFile(FullName, false, /*RequiresNullTerminator*/ false);
+ MemoryBuffer::getFile(FullName, false, /*RequiresNullTerminator=*/false);
if (std::error_code EC = Buf.getError())
return errorCodeToError(EC);
Parent->ThinBuffers.push_back(std::move(*Buf));
>From 80767a224a47eaad78950b1a8d0b0a75e096457e Mon Sep 17 00:00:00 2001
From: John Holdsworth <github at johnholdsworth.com>
Date: Fri, 12 Sep 2025 10:19:01 +0200
Subject: [PATCH 08/13] Format lld/MachO/InputFiles.cpp
---
lld/MachO/InputFiles.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 29a83eaeb0da5..bdc63f23f90b6 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -218,7 +218,7 @@ std::optional<MemoryBufferRef> macho::readFile(StringRef path) {
return entry->second;
ErrorOr<std::unique_ptr<MemoryBuffer>> mbOrErr =
- MemoryBuffer::getFile(path, false, /*RequiresNullTerminator*/ false);
+ MemoryBuffer::getFile(path, false, /*RequiresNullTerminator=*/false);
if (std::error_code ec = mbOrErr.getError()) {
error("cannot open " + path + ": " + ec.message());
return std::nullopt;
>From f2654ae6e1bb7aa78a16badb1bcc9d6f1f824676 Mon Sep 17 00:00:00 2001
From: John Holdsworth <github at johnholdsworth.com>
Date: Fri, 12 Sep 2025 16:56:01 +0200
Subject: [PATCH 09/13] A third alternative.
---
lld/MachO/InputFiles.cpp | 3 +--
llvm/lib/Object/Archive.cpp | 3 +--
llvm/lib/Support/MemoryBuffer.cpp | 21 ++++++++++++---------
3 files changed, 14 insertions(+), 13 deletions(-)
diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index bdc63f23f90b6..442fc608865d2 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -217,8 +217,7 @@ std::optional<MemoryBufferRef> macho::readFile(StringRef path) {
if (entry != cachedReads.end())
return entry->second;
- ErrorOr<std::unique_ptr<MemoryBuffer>> mbOrErr =
- MemoryBuffer::getFile(path, false, /*RequiresNullTerminator=*/false);
+ ErrorOr<std::unique_ptr<MemoryBuffer>> mbOrErr = MemoryBuffer::getFile(path);
if (std::error_code ec = mbOrErr.getError()) {
error("cannot open " + path + ": " + ec.message());
return std::nullopt;
diff --git a/llvm/lib/Object/Archive.cpp b/llvm/lib/Object/Archive.cpp
index bedd021092081..92f31c909efd4 100644
--- a/llvm/lib/Object/Archive.cpp
+++ b/llvm/lib/Object/Archive.cpp
@@ -584,8 +584,7 @@ Expected<StringRef> Archive::Child::getBuffer() const {
if (!FullNameOrErr)
return FullNameOrErr.takeError();
const std::string &FullName = *FullNameOrErr;
- ErrorOr<std::unique_ptr<MemoryBuffer>> Buf =
- MemoryBuffer::getFile(FullName, false, /*RequiresNullTerminator=*/false);
+ ErrorOr<std::unique_ptr<MemoryBuffer>> Buf = MemoryBuffer::getFile(FullName);
if (std::error_code EC = Buf.getError())
return errorCodeToError(EC);
Parent->ThinBuffers.push_back(std::move(*Buf));
diff --git a/llvm/lib/Support/MemoryBuffer.cpp b/llvm/lib/Support/MemoryBuffer.cpp
index 1c4645ad83641..5ded80cc20cd2 100644
--- a/llvm/lib/Support/MemoryBuffer.cpp
+++ b/llvm/lib/Support/MemoryBuffer.cpp
@@ -266,8 +266,9 @@ MemoryBuffer::getFile(const Twine &Filename, bool IsText,
template <typename MB>
static ErrorOr<std::unique_ptr<MB>>
getOpenFileImpl(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize,
- uint64_t MapSize, int64_t Offset, bool RequiresNullTerminator,
- bool IsVolatile, std::optional<Align> Alignment);
+ uint64_t MapSize, int64_t Offset, bool IsText,
+ bool RequiresNullTerminator, bool IsVolatile,
+ std::optional<Align> Alignment);
template <typename MB>
static ErrorOr<std::unique_ptr<MB>>
@@ -280,7 +281,8 @@ getFileAux(const Twine &Filename, uint64_t MapSize, uint64_t Offset,
return errorToErrorCode(FDOrErr.takeError());
sys::fs::file_t FD = *FDOrErr;
auto Ret = getOpenFileImpl<MB>(FD, Filename, /*FileSize=*/-1, MapSize, Offset,
- RequiresNullTerminator, IsVolatile, Alignment);
+ IsText, RequiresNullTerminator, IsVolatile,
+ Alignment);
sys::fs::closeFile(FD);
return Ret;
}
@@ -468,8 +470,9 @@ WriteThroughMemoryBuffer::getFileSlice(const Twine &Filename, uint64_t MapSize,
template <typename MB>
static ErrorOr<std::unique_ptr<MB>>
getOpenFileImpl(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize,
- uint64_t MapSize, int64_t Offset, bool RequiresNullTerminator,
- bool IsVolatile, std::optional<Align> Alignment) {
+ uint64_t MapSize, int64_t Offset, bool IsText,
+ bool RequiresNullTerminator, bool IsVolatile,
+ std::optional<Align> Alignment) {
static int PageSize = sys::Process::getPageSizeEstimate();
// Default is to map the full file.
@@ -506,7 +509,7 @@ getOpenFileImpl(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize,
// from the page cache that are not properly filled with trailing zeroes,
// if some prior user of the page wrote non-zero bytes. Detect this and
// don't use mmap in that case.
- if (!RequiresNullTerminator || *Result->getBufferEnd() == '\0')
+ if (!IsText || *Result->getBufferEnd() == '\0')
return std::move(Result);
}
}
@@ -555,8 +558,8 @@ MemoryBuffer::getOpenFile(sys::fs::file_t FD, const Twine &Filename,
uint64_t FileSize, bool RequiresNullTerminator,
bool IsVolatile, std::optional<Align> Alignment) {
return getOpenFileImpl<MemoryBuffer>(FD, Filename, FileSize, FileSize, 0,
- RequiresNullTerminator, IsVolatile,
- Alignment);
+ false, RequiresNullTerminator,
+ IsVolatile, Alignment);
}
ErrorOr<std::unique_ptr<MemoryBuffer>> MemoryBuffer::getOpenFileSlice(
@@ -564,7 +567,7 @@ ErrorOr<std::unique_ptr<MemoryBuffer>> MemoryBuffer::getOpenFileSlice(
bool IsVolatile, std::optional<Align> Alignment) {
assert(MapSize != uint64_t(-1));
return getOpenFileImpl<MemoryBuffer>(FD, Filename, -1, MapSize, Offset, false,
- IsVolatile, Alignment);
+ false, IsVolatile, Alignment);
}
ErrorOr<std::unique_ptr<MemoryBuffer>> MemoryBuffer::getSTDIN() {
>From 198d9b08934af01f19a6fbe92e0d4a5928832d54 Mon Sep 17 00:00:00 2001
From: John Holdsworth <github at johnholdsworth.com>
Date: Mon, 15 Sep 2025 09:36:14 +0200
Subject: [PATCH 10/13] Safer IsText option.
---
llvm/lib/Support/MemoryBuffer.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Support/MemoryBuffer.cpp b/llvm/lib/Support/MemoryBuffer.cpp
index 5ded80cc20cd2..5bd1f6ed80c86 100644
--- a/llvm/lib/Support/MemoryBuffer.cpp
+++ b/llvm/lib/Support/MemoryBuffer.cpp
@@ -509,7 +509,7 @@ getOpenFileImpl(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize,
// from the page cache that are not properly filled with trailing zeroes,
// if some prior user of the page wrote non-zero bytes. Detect this and
// don't use mmap in that case.
- if (!IsText || *Result->getBufferEnd() == '\0')
+ if (!IsText || !RequiresNullTerminator || *Result->getBufferEnd() == '\0')
return std::move(Result);
}
}
>From 3dddb2aee0e763ad03497b07b2a152455ec4ae2f Mon Sep 17 00:00:00 2001
From: John Holdsworth <github at johnholdsworth.com>
Date: Mon, 15 Sep 2025 20:45:14 +0200
Subject: [PATCH 11/13] Revert "Safer IsText option."
This reverts commit 198d9b08934af01f19a6fbe92e0d4a5928832d54.
---
llvm/lib/Support/MemoryBuffer.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Support/MemoryBuffer.cpp b/llvm/lib/Support/MemoryBuffer.cpp
index 5bd1f6ed80c86..5ded80cc20cd2 100644
--- a/llvm/lib/Support/MemoryBuffer.cpp
+++ b/llvm/lib/Support/MemoryBuffer.cpp
@@ -509,7 +509,7 @@ getOpenFileImpl(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize,
// from the page cache that are not properly filled with trailing zeroes,
// if some prior user of the page wrote non-zero bytes. Detect this and
// don't use mmap in that case.
- if (!IsText || !RequiresNullTerminator || *Result->getBufferEnd() == '\0')
+ if (!IsText || *Result->getBufferEnd() == '\0')
return std::move(Result);
}
}
>From 41c3e615e4d1b3bb5c049a045442b653ee2c9706 Mon Sep 17 00:00:00 2001
From: John Holdsworth <github at johnholdsworth.com>
Date: Mon, 15 Sep 2025 20:46:03 +0200
Subject: [PATCH 12/13] Revert "A third alternative."
This reverts commit f2654ae6e1bb7aa78a16badb1bcc9d6f1f824676.
---
lld/MachO/InputFiles.cpp | 3 ++-
llvm/lib/Object/Archive.cpp | 3 ++-
llvm/lib/Support/MemoryBuffer.cpp | 21 +++++++++------------
3 files changed, 13 insertions(+), 14 deletions(-)
diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 442fc608865d2..bdc63f23f90b6 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -217,7 +217,8 @@ std::optional<MemoryBufferRef> macho::readFile(StringRef path) {
if (entry != cachedReads.end())
return entry->second;
- ErrorOr<std::unique_ptr<MemoryBuffer>> mbOrErr = MemoryBuffer::getFile(path);
+ ErrorOr<std::unique_ptr<MemoryBuffer>> mbOrErr =
+ MemoryBuffer::getFile(path, false, /*RequiresNullTerminator=*/false);
if (std::error_code ec = mbOrErr.getError()) {
error("cannot open " + path + ": " + ec.message());
return std::nullopt;
diff --git a/llvm/lib/Object/Archive.cpp b/llvm/lib/Object/Archive.cpp
index 92f31c909efd4..bedd021092081 100644
--- a/llvm/lib/Object/Archive.cpp
+++ b/llvm/lib/Object/Archive.cpp
@@ -584,7 +584,8 @@ Expected<StringRef> Archive::Child::getBuffer() const {
if (!FullNameOrErr)
return FullNameOrErr.takeError();
const std::string &FullName = *FullNameOrErr;
- ErrorOr<std::unique_ptr<MemoryBuffer>> Buf = MemoryBuffer::getFile(FullName);
+ ErrorOr<std::unique_ptr<MemoryBuffer>> Buf =
+ MemoryBuffer::getFile(FullName, false, /*RequiresNullTerminator=*/false);
if (std::error_code EC = Buf.getError())
return errorCodeToError(EC);
Parent->ThinBuffers.push_back(std::move(*Buf));
diff --git a/llvm/lib/Support/MemoryBuffer.cpp b/llvm/lib/Support/MemoryBuffer.cpp
index 5ded80cc20cd2..1c4645ad83641 100644
--- a/llvm/lib/Support/MemoryBuffer.cpp
+++ b/llvm/lib/Support/MemoryBuffer.cpp
@@ -266,9 +266,8 @@ MemoryBuffer::getFile(const Twine &Filename, bool IsText,
template <typename MB>
static ErrorOr<std::unique_ptr<MB>>
getOpenFileImpl(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize,
- uint64_t MapSize, int64_t Offset, bool IsText,
- bool RequiresNullTerminator, bool IsVolatile,
- std::optional<Align> Alignment);
+ uint64_t MapSize, int64_t Offset, bool RequiresNullTerminator,
+ bool IsVolatile, std::optional<Align> Alignment);
template <typename MB>
static ErrorOr<std::unique_ptr<MB>>
@@ -281,8 +280,7 @@ getFileAux(const Twine &Filename, uint64_t MapSize, uint64_t Offset,
return errorToErrorCode(FDOrErr.takeError());
sys::fs::file_t FD = *FDOrErr;
auto Ret = getOpenFileImpl<MB>(FD, Filename, /*FileSize=*/-1, MapSize, Offset,
- IsText, RequiresNullTerminator, IsVolatile,
- Alignment);
+ RequiresNullTerminator, IsVolatile, Alignment);
sys::fs::closeFile(FD);
return Ret;
}
@@ -470,9 +468,8 @@ WriteThroughMemoryBuffer::getFileSlice(const Twine &Filename, uint64_t MapSize,
template <typename MB>
static ErrorOr<std::unique_ptr<MB>>
getOpenFileImpl(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize,
- uint64_t MapSize, int64_t Offset, bool IsText,
- bool RequiresNullTerminator, bool IsVolatile,
- std::optional<Align> Alignment) {
+ uint64_t MapSize, int64_t Offset, bool RequiresNullTerminator,
+ bool IsVolatile, std::optional<Align> Alignment) {
static int PageSize = sys::Process::getPageSizeEstimate();
// Default is to map the full file.
@@ -509,7 +506,7 @@ getOpenFileImpl(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize,
// from the page cache that are not properly filled with trailing zeroes,
// if some prior user of the page wrote non-zero bytes. Detect this and
// don't use mmap in that case.
- if (!IsText || *Result->getBufferEnd() == '\0')
+ if (!RequiresNullTerminator || *Result->getBufferEnd() == '\0')
return std::move(Result);
}
}
@@ -558,8 +555,8 @@ MemoryBuffer::getOpenFile(sys::fs::file_t FD, const Twine &Filename,
uint64_t FileSize, bool RequiresNullTerminator,
bool IsVolatile, std::optional<Align> Alignment) {
return getOpenFileImpl<MemoryBuffer>(FD, Filename, FileSize, FileSize, 0,
- false, RequiresNullTerminator,
- IsVolatile, Alignment);
+ RequiresNullTerminator, IsVolatile,
+ Alignment);
}
ErrorOr<std::unique_ptr<MemoryBuffer>> MemoryBuffer::getOpenFileSlice(
@@ -567,7 +564,7 @@ ErrorOr<std::unique_ptr<MemoryBuffer>> MemoryBuffer::getOpenFileSlice(
bool IsVolatile, std::optional<Align> Alignment) {
assert(MapSize != uint64_t(-1));
return getOpenFileImpl<MemoryBuffer>(FD, Filename, -1, MapSize, Offset, false,
- false, IsVolatile, Alignment);
+ IsVolatile, Alignment);
}
ErrorOr<std::unique_ptr<MemoryBuffer>> MemoryBuffer::getSTDIN() {
>From be7af98b9a94bdb40a918130189544c8ab5232cd Mon Sep 17 00:00:00 2001
From: John Holdsworth <github at johnholdsworth.com>
Date: Thu, 18 Sep 2025 11:07:02 +0200
Subject: [PATCH 13/13] Multi-threaded file opening/mapping.
---
lld/MachO/Driver.cpp | 31 +++++++++++++++++--------------
lld/MachO/InputFiles.cpp | 21 +++++++++++++++++----
2 files changed, 34 insertions(+), 18 deletions(-)
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index f652e195ffc5e..e4f59280731be 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -292,7 +292,7 @@ static void saveThinArchiveToRepro(ArchiveFile const *file) {
struct DeferredFile {
StringRef path;
bool isLazy;
- MemoryBufferRef buffer;
+ std::optional<MemoryBufferRef> buffer;
};
using DeferredFiles = std::vector<DeferredFile>;
@@ -346,8 +346,10 @@ void multiThreadedPageInBackground(DeferredFiles &deferred) {
std::atomic_int numDeferedFilesAdvised = 0;
auto t0 = high_resolution_clock::now();
- auto preloadDeferredFile = [&](const DeferredFile &deferredFile) {
- const StringRef &buff = deferredFile.buffer.getBuffer();
+ auto preloadDeferredFile = [&](DeferredFile &deferredFile) {
+ if (!deferredFile.buffer.has_value())
+ deferredFile.buffer = *readFile(deferredFile.path);
+ const StringRef &buff = (*deferredFile.buffer).getBuffer();
if (buff.size() > largeArchive)
return;
@@ -394,12 +396,9 @@ void multiThreadedPageInBackground(DeferredFiles &deferred) {
<< duration_cast<milliseconds>(dt).count() / 1000. << "\n";
}
-static void multiThreadedPageIn(const DeferredFiles &deferred) {
+static void multiThreadedPageIn(DeferredFiles &deferred) {
static SerialBackgroundQueue pageInQueue;
- pageInQueue.queueWork([=]() {
- DeferredFiles files = deferred;
- multiThreadedPageInBackground(files);
- });
+ pageInQueue.queueWork([&]() { multiThreadedPageInBackground(deferred); });
}
static InputFile *processFile(std::optional<MemoryBufferRef> buffer,
@@ -574,13 +573,12 @@ static InputFile *addFile(StringRef path, LoadType loadType,
}
static void deferFile(StringRef path, bool isLazy, DeferredFiles &deferred) {
+ if (config->readWorkers)
+ return deferred.push_back({path, isLazy, std::nullopt});
std::optional<MemoryBufferRef> buffer = readFile(path);
if (!buffer)
return;
- if (config->readWorkers)
- deferred.push_back({path, isLazy, *buffer});
- else
- processFile(buffer, nullptr, path, LoadType::CommandLine, isLazy);
+ processFile(buffer, nullptr, path, LoadType::CommandLine, isLazy);
}
static std::vector<StringRef> missingAutolinkWarnings;
@@ -1365,7 +1363,8 @@ static void createFiles(const InputArgList &args) {
bool isLazy = false;
// If we've processed an opening --start-lib, without a matching --end-lib
bool inLib = false;
- DeferredFiles deferredFiles;
+ static DeferredFiles deferredFiles;
+ deferredFiles.clear();
for (const Arg *arg : args) {
const Option &opt = arg->getOption();
@@ -1444,9 +1443,13 @@ static void createFiles(const InputArgList &args) {
if (config->readWorkers) {
multiThreadedPageIn(deferredFiles);
- DeferredFiles archiveContents;
+ static DeferredFiles archiveContents;
std::vector<ArchiveFile *> archives;
+ archiveContents.clear();
+
for (auto &file : deferredFiles) {
+ if (!file.buffer.has_value())
+ file.buffer = readFile(file.path);
auto inputFile = processFile(file.buffer, &archiveContents, file.path,
LoadType::CommandLine, file.isLazy);
if (ArchiveFile *archive = dyn_cast<ArchiveFile>(inputFile))
diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index bdc63f23f90b6..88a7bb608d7ca 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -212,10 +212,15 @@ static bool compatWithTargetArch(const InputFile *file, const Header *hdr) {
DenseMap<CachedHashStringRef, MemoryBufferRef> macho::cachedReads;
// Open a given file path and return it as a memory-mapped file.
std::optional<MemoryBufferRef> macho::readFile(StringRef path) {
+ static std::mutex mutex;
+ mutex.lock();
CachedHashStringRef key(path);
auto entry = cachedReads.find(key);
- if (entry != cachedReads.end())
+ if (entry != cachedReads.end()) {
+ mutex.unlock();
return entry->second;
+ }
+ mutex.unlock();
ErrorOr<std::unique_ptr<MemoryBuffer>> mbOrErr =
MemoryBuffer::getFile(path, false, /*RequiresNullTerminator=*/false);
@@ -224,18 +229,23 @@ std::optional<MemoryBufferRef> macho::readFile(StringRef path) {
return std::nullopt;
}
+ mutex.lock();
std::unique_ptr<MemoryBuffer> &mb = *mbOrErr;
MemoryBufferRef mbref = mb->getMemBufferRef();
make<std::unique_ptr<MemoryBuffer>>(std::move(mb)); // take mb ownership
+ mutex.unlock();
// If this is a regular non-fat file, return it.
const char *buf = mbref.getBufferStart();
const auto *hdr = reinterpret_cast<const fat_header *>(buf);
if (mbref.getBufferSize() < sizeof(uint32_t) ||
read32be(&hdr->magic) != FAT_MAGIC) {
+ mutex.lock();
if (tar)
tar->append(relativeToRoot(path), mbref.getBuffer());
- return cachedReads[key] = mbref;
+ cachedReads[key] = mbref;
+ mutex.unlock();
+ return mbref;
}
llvm::BumpPtrAllocator &bAlloc = lld::bAlloc();
@@ -272,10 +282,13 @@ std::optional<MemoryBufferRef> macho::readFile(StringRef path) {
uint32_t size = read32be(&arch[i].size);
if (offset + size > mbref.getBufferSize())
error(path + ": slice extends beyond end of file");
+ mutex.lock();
if (tar)
tar->append(relativeToRoot(path), mbref.getBuffer());
- return cachedReads[key] = MemoryBufferRef(StringRef(buf + offset, size),
- path.copy(bAlloc));
+ mbref = MemoryBufferRef(StringRef(buf + offset, size), path.copy(bAlloc));
+ cachedReads[key] = mbref;
+ mutex.unlock();
+ return mbref;
}
auto targetArchName = getArchName(target->cpuType, target->cpuSubtype);
More information about the llvm-commits
mailing list