[llvm] Make WriteIndexesThinBackend multi threaded (PR #109847)
Nuri Amari via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 4 08:16:03 PDT 2024
https://github.com/NuriAmari updated https://github.com/llvm/llvm-project/pull/109847
>From fa9222b90084422e63c67924874cfd0c1a61da45 Mon Sep 17 00:00:00 2001
From: Nuri Amari <nuriamari at fb.com>
Date: Tue, 24 Sep 2024 11:29:54 -0700
Subject: [PATCH 1/3] Make WriteIndexesThinBackend multi threaded
We've noticed that for large builds executing thin-link can take on the
order of 10s of minutes. We are only using a single thread to write the
sharded indices and import files for each input bitcode file. While we
need to ensure the index files produced lists modules in a deterministic
order, that doesn't prevent us from executing the rest of the work in parallel.
In this change we use a thread pool to execute as much of the backend's
work as possible in parallel. In local testing on a machine with 80
cores, this change makes a thin-link for ~100,000 input files run in ~2 minutes.
Without this change is takes upwards of 10 minutes.
---
llvm/lib/LTO/LTO.cpp | 57 ++++++++++++++++++++++++++++++++++----------
1 file changed, 45 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index a88124dacfaefd..78084c7aedcd91 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -1395,11 +1395,12 @@ class lto::ThinBackendProc {
MapVector<StringRef, BitcodeModule> &ModuleMap) = 0;
virtual Error wait() = 0;
virtual unsigned getThreadCount() = 0;
+ virtual bool isSensitiveToInputOrder() { return false; }
// Write sharded indices and (optionally) imports to disk
Error emitFiles(const FunctionImporter::ImportMapTy &ImportList,
llvm::StringRef ModulePath,
- const std::string &NewModulePath) {
+ const std::string &NewModulePath) const {
ModuleToSummariesForIndexTy ModuleToSummariesForIndex;
GVSummaryPtrSet DeclarationSummaries;
@@ -1613,6 +1614,10 @@ namespace {
class WriteIndexesThinBackend : public ThinBackendProc {
std::string OldPrefix, NewPrefix, NativeObjectPrefix;
raw_fd_ostream *LinkedObjectsFile;
+ DefaultThreadPool BackendThreadPool;
+ std::optional<Error> Err;
+ std::mutex ErrMu;
+ std::mutex OnWriteMu;
public:
WriteIndexesThinBackend(
@@ -1634,8 +1639,6 @@ class WriteIndexesThinBackend : public ThinBackendProc {
const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
MapVector<StringRef, BitcodeModule> &ModuleMap) override {
StringRef ModulePath = BM.getModuleIdentifier();
- std::string NewModulePath =
- getThinLTOOutputFile(ModulePath, OldPrefix, NewPrefix);
if (LinkedObjectsFile) {
std::string ObjectPrefix =
@@ -1645,19 +1648,48 @@ class WriteIndexesThinBackend : public ThinBackendProc {
*LinkedObjectsFile << LinkedObjectsFilePath << '\n';
}
- if (auto E = emitFiles(ImportList, ModulePath, NewModulePath))
- return E;
+ BackendThreadPool.async(
+ [this](const StringRef ModulePath,
+ const FunctionImporter::ImportMapTy &ImportList,
+ const std::string &OldPrefix, const std::string &NewPrefix) {
+ std::string NewModulePath =
+ getThinLTOOutputFile(ModulePath, OldPrefix, NewPrefix);
+ auto E = emitFiles(ImportList, ModulePath, NewModulePath);
+ if (E) {
+ std::unique_lock<std::mutex> L(ErrMu);
+ if (Err)
+ Err = joinErrors(std::move(*Err), std::move(E));
+ else
+ Err = std::move(E);
+ return;
+ }
+ if (OnWrite) {
+ // Serialize calls to the on write callback in case it is not thread
+ // safe
+ std::unique_lock<std::mutex> L(OnWriteMu);
+ OnWrite(std::string(ModulePath));
+ }
+ },
+ ModulePath, ImportList, OldPrefix, NewPrefix);
+ return Error::success();
+ }
- if (OnWrite)
- OnWrite(std::string(ModulePath));
+ Error wait() override {
+ BackendThreadPool.wait();
+ if (Err)
+ return std::move(*Err);
return Error::success();
}
- Error wait() override { return Error::success(); }
+ unsigned getThreadCount() override {
+ return BackendThreadPool.getMaxConcurrency();
+ }
- // WriteIndexesThinBackend should always return 1 to prevent module
- // re-ordering and avoid non-determinism in the final link.
- unsigned getThreadCount() override { return 1; }
+ bool isSensitiveToInputOrder() override {
+ // The order which modules are written to LinkedObjectsFile should be
+ // deterministic and match the order they are passed on the command line.
+ return true;
+ }
};
} // end anonymous namespace
@@ -1856,7 +1888,8 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
ThinLTO.ModuleMap);
};
- if (BackendProc->getThreadCount() == 1) {
+ if (BackendProc->getThreadCount() == 1 ||
+ BackendProc->isSensitiveToInputOrder()) {
// Process the modules in the order they were provided on the command-line.
// It is important for this codepath to be used for WriteIndexesThinBackend,
// to ensure the emitted LinkedObjectsFile lists ThinLTO objects in the same
>From f00ed250315469d3c38c26627e4df636e0fbea44 Mon Sep 17 00:00:00 2001
From: Nuri Amari <nuriamari at fb.com>
Date: Wed, 25 Sep 2024 13:03:59 -0700
Subject: [PATCH 2/3] Address PR Feedback #1
---
llvm/lib/LTO/LTO.cpp | 63 +++++++++++++++++---------------------------
1 file changed, 24 insertions(+), 39 deletions(-)
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index 78084c7aedcd91..dd9e1c21c7e27c 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -1376,15 +1376,21 @@ class lto::ThinBackendProc {
const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries;
lto::IndexWriteCallback OnWrite;
bool ShouldEmitImportsFiles;
+ DefaultThreadPool BackendThreadPool;
+ std::optional<Error> Err;
+ std::mutex ErrMu;
+ std::mutex OnWriteMu;
public:
ThinBackendProc(
const Config &Conf, ModuleSummaryIndex &CombinedIndex,
const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
- lto::IndexWriteCallback OnWrite, bool ShouldEmitImportsFiles)
+ lto::IndexWriteCallback OnWrite, bool ShouldEmitImportsFiles,
+ ThreadPoolStrategy ThinLTOParallelism)
: Conf(Conf), CombinedIndex(CombinedIndex),
ModuleToDefinedGVSummaries(ModuleToDefinedGVSummaries),
- OnWrite(OnWrite), ShouldEmitImportsFiles(ShouldEmitImportsFiles) {}
+ OnWrite(OnWrite), ShouldEmitImportsFiles(ShouldEmitImportsFiles),
+ BackendThreadPool(ThinLTOParallelism) {}
virtual ~ThinBackendProc() = default;
virtual Error start(
@@ -1393,8 +1399,13 @@ class lto::ThinBackendProc {
const FunctionImporter::ExportSetTy &ExportList,
const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
MapVector<StringRef, BitcodeModule> &ModuleMap) = 0;
- virtual Error wait() = 0;
- virtual unsigned getThreadCount() = 0;
+ Error wait() {
+ BackendThreadPool.wait();
+ if (Err)
+ return std::move(*Err);
+ return Error::success();
+ }
+ unsigned getThreadCount() { return BackendThreadPool.getMaxConcurrency(); }
virtual bool isSensitiveToInputOrder() { return false; }
// Write sharded indices and (optionally) imports to disk
@@ -1429,15 +1440,11 @@ class lto::ThinBackendProc {
namespace {
class InProcessThinBackend : public ThinBackendProc {
- DefaultThreadPool BackendThreadPool;
AddStreamFn AddStream;
FileCache Cache;
DenseSet<GlobalValue::GUID> CfiFunctionDefs;
DenseSet<GlobalValue::GUID> CfiFunctionDecls;
- std::optional<Error> Err;
- std::mutex ErrMu;
-
bool ShouldEmitIndexFiles;
public:
@@ -1448,9 +1455,9 @@ class InProcessThinBackend : public ThinBackendProc {
AddStreamFn AddStream, FileCache Cache, lto::IndexWriteCallback OnWrite,
bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles)
: ThinBackendProc(Conf, CombinedIndex, ModuleToDefinedGVSummaries,
- OnWrite, ShouldEmitImportsFiles),
- BackendThreadPool(ThinLTOParallelism), AddStream(std::move(AddStream)),
- Cache(std::move(Cache)), ShouldEmitIndexFiles(ShouldEmitIndexFiles) {
+ OnWrite, ShouldEmitImportsFiles, ThinLTOParallelism),
+ AddStream(std::move(AddStream)), Cache(std::move(Cache)),
+ ShouldEmitIndexFiles(ShouldEmitIndexFiles) {
for (auto &Name : CombinedIndex.cfiFunctionDefs())
CfiFunctionDefs.insert(
GlobalValue::getGUID(GlobalValue::dropLLVMManglingEscape(Name)));
@@ -1546,18 +1553,6 @@ class InProcessThinBackend : public ThinBackendProc {
OnWrite(std::string(ModulePath));
return Error::success();
}
-
- Error wait() override {
- BackendThreadPool.wait();
- if (Err)
- return std::move(*Err);
- else
- return Error::success();
- }
-
- unsigned getThreadCount() override {
- return BackendThreadPool.getMaxConcurrency();
- }
};
} // end anonymous namespace
@@ -1614,10 +1609,6 @@ namespace {
class WriteIndexesThinBackend : public ThinBackendProc {
std::string OldPrefix, NewPrefix, NativeObjectPrefix;
raw_fd_ostream *LinkedObjectsFile;
- DefaultThreadPool BackendThreadPool;
- std::optional<Error> Err;
- std::mutex ErrMu;
- std::mutex OnWriteMu;
public:
WriteIndexesThinBackend(
@@ -1627,7 +1618,8 @@ class WriteIndexesThinBackend : public ThinBackendProc {
std::string NativeObjectPrefix, bool ShouldEmitImportsFiles,
raw_fd_ostream *LinkedObjectsFile, lto::IndexWriteCallback OnWrite)
: ThinBackendProc(Conf, CombinedIndex, ModuleToDefinedGVSummaries,
- OnWrite, ShouldEmitImportsFiles),
+ OnWrite, ShouldEmitImportsFiles,
+ /* ThinLTOParallelism */ hardware_concurrency()),
OldPrefix(OldPrefix), NewPrefix(NewPrefix),
NativeObjectPrefix(NativeObjectPrefix),
LinkedObjectsFile(LinkedObjectsFile) {}
@@ -1640,6 +1632,10 @@ class WriteIndexesThinBackend : public ThinBackendProc {
MapVector<StringRef, BitcodeModule> &ModuleMap) override {
StringRef ModulePath = BM.getModuleIdentifier();
+ // The contents of this file may be used as input to a native link, and must
+ // therefore contain the processed modules in a determinstic order than
+ // match the order they are provided on the command line. For that reason,
+ // we cannot include this in the asynchronously executed lambda below.
if (LinkedObjectsFile) {
std::string ObjectPrefix =
NativeObjectPrefix.empty() ? NewPrefix : NativeObjectPrefix;
@@ -1674,17 +1670,6 @@ class WriteIndexesThinBackend : public ThinBackendProc {
return Error::success();
}
- Error wait() override {
- BackendThreadPool.wait();
- if (Err)
- return std::move(*Err);
- return Error::success();
- }
-
- unsigned getThreadCount() override {
- return BackendThreadPool.getMaxConcurrency();
- }
-
bool isSensitiveToInputOrder() override {
// The order which modules are written to LinkedObjectsFile should be
// deterministic and match the order they are passed on the command line.
>From 846ac06d523ea3e2f3febf4bf9c38cde2fec24c2 Mon Sep 17 00:00:00 2001
From: Nuri Amari <nuriamari at fb.com>
Date: Fri, 4 Oct 2024 08:13:24 -0700
Subject: [PATCH 3/3] Address PR Feedback #2
---
llvm/lib/LTO/LTO.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index dd9e1c21c7e27c..bad872c73b4d2a 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -1633,7 +1633,7 @@ class WriteIndexesThinBackend : public ThinBackendProc {
StringRef ModulePath = BM.getModuleIdentifier();
// The contents of this file may be used as input to a native link, and must
- // therefore contain the processed modules in a determinstic order than
+ // therefore contain the processed modules in a determinstic order that
// match the order they are provided on the command line. For that reason,
// we cannot include this in the asynchronously executed lambda below.
if (LinkedObjectsFile) {
More information about the llvm-commits
mailing list