[llvm] Make WriteIndexesThinBackend multi threaded (PR #109847)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 24 14:35:25 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-lto
Author: Nuri Amari (NuriAmari)
<details>
<summary>Changes</summary>
We've noticed that for large builds executing thin-link can take on the order of 10s of minutes. We are only using a single thread to write the sharded indices and import files for each input bitcode file. While we need to ensure the index file produced lists modules in a deterministic order, that doesn't prevent us from executing the rest of the work in parallel.
In this change we use a thread pool to execute as much of the backend's work as possible in parallel. In local testing on a machine with 80 cores, this change makes a thin-link for ~100,000 input files run in ~2 minutes. Without this change it takes upwards of 10 minutes.
---
Full diff: https://github.com/llvm/llvm-project/pull/109847.diff
1 Files Affected:
- (modified) llvm/lib/LTO/LTO.cpp (+45-12)
``````````diff
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index a88124dacfaefd..78084c7aedcd91 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -1395,11 +1395,12 @@ class lto::ThinBackendProc {
MapVector<StringRef, BitcodeModule> &ModuleMap) = 0;
virtual Error wait() = 0;
virtual unsigned getThreadCount() = 0;
+ virtual bool isSensitiveToInputOrder() { return false; }
// Write sharded indices and (optionally) imports to disk
Error emitFiles(const FunctionImporter::ImportMapTy &ImportList,
llvm::StringRef ModulePath,
- const std::string &NewModulePath) {
+ const std::string &NewModulePath) const {
ModuleToSummariesForIndexTy ModuleToSummariesForIndex;
GVSummaryPtrSet DeclarationSummaries;
@@ -1613,6 +1614,10 @@ namespace {
class WriteIndexesThinBackend : public ThinBackendProc {
std::string OldPrefix, NewPrefix, NativeObjectPrefix;
raw_fd_ostream *LinkedObjectsFile;
+ DefaultThreadPool BackendThreadPool;
+ std::optional<Error> Err;
+ std::mutex ErrMu;
+ std::mutex OnWriteMu;
public:
WriteIndexesThinBackend(
@@ -1634,8 +1639,6 @@ class WriteIndexesThinBackend : public ThinBackendProc {
const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
MapVector<StringRef, BitcodeModule> &ModuleMap) override {
StringRef ModulePath = BM.getModuleIdentifier();
- std::string NewModulePath =
- getThinLTOOutputFile(ModulePath, OldPrefix, NewPrefix);
if (LinkedObjectsFile) {
std::string ObjectPrefix =
@@ -1645,19 +1648,48 @@ class WriteIndexesThinBackend : public ThinBackendProc {
*LinkedObjectsFile << LinkedObjectsFilePath << '\n';
}
- if (auto E = emitFiles(ImportList, ModulePath, NewModulePath))
- return E;
+ BackendThreadPool.async(
+ [this](const StringRef ModulePath,
+ const FunctionImporter::ImportMapTy &ImportList,
+ const std::string &OldPrefix, const std::string &NewPrefix) {
+ std::string NewModulePath =
+ getThinLTOOutputFile(ModulePath, OldPrefix, NewPrefix);
+ auto E = emitFiles(ImportList, ModulePath, NewModulePath);
+ if (E) {
+ std::unique_lock<std::mutex> L(ErrMu);
+ if (Err)
+ Err = joinErrors(std::move(*Err), std::move(E));
+ else
+ Err = std::move(E);
+ return;
+ }
+ if (OnWrite) {
+ // Serialize calls to the on write callback in case it is not thread
+ // safe
+ std::unique_lock<std::mutex> L(OnWriteMu);
+ OnWrite(std::string(ModulePath));
+ }
+ },
+ ModulePath, ImportList, OldPrefix, NewPrefix);
+ return Error::success();
+ }
- if (OnWrite)
- OnWrite(std::string(ModulePath));
+ Error wait() override {
+ BackendThreadPool.wait();
+ if (Err)
+ return std::move(*Err);
return Error::success();
}
- Error wait() override { return Error::success(); }
+ unsigned getThreadCount() override {
+ return BackendThreadPool.getMaxConcurrency();
+ }
- // WriteIndexesThinBackend should always return 1 to prevent module
- // re-ordering and avoid non-determinism in the final link.
- unsigned getThreadCount() override { return 1; }
+ bool isSensitiveToInputOrder() override {
+ // The order which modules are written to LinkedObjectsFile should be
+ // deterministic and match the order they are passed on the command line.
+ return true;
+ }
};
} // end anonymous namespace
@@ -1856,7 +1888,8 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
ThinLTO.ModuleMap);
};
- if (BackendProc->getThreadCount() == 1) {
+ if (BackendProc->getThreadCount() == 1 ||
+ BackendProc->isSensitiveToInputOrder()) {
// Process the modules in the order they were provided on the command-line.
// It is important for this codepath to be used for WriteIndexesThinBackend,
// to ensure the emitted LinkedObjectsFile lists ThinLTO objects in the same
``````````
</details>
https://github.com/llvm/llvm-project/pull/109847
More information about the llvm-commits
mailing list