[clang] [lld] [llvm] Integrated Distributed ThinLTO (DTLTO): Initial support (PR #126654)
Nuri Amari via cfe-commits
cfe-commits at lists.llvm.org
Tue Feb 11 14:11:21 PST 2025
================
@@ -2142,3 +2190,327 @@ std::vector<int> lto::generateModulesOrdering(ArrayRef<BitcodeModule *> R) {
});
return ModulesOrdering;
}
+
+namespace {
+// For this out-of-process backend no codegen is done when invoked for each
+// task. Instead we generate the required information (e.g. the summary index
+// shard,import list, etc..) to allow for the codegen to be performed
+// externally . This backend's `wait` function then invokes an external
+// distributor process to do backend compilations.
+class OutOfProcessThinBackend : public CGThinBackend {
+ using SString = SmallString<128>;
+
+ AddBufferFn AddBuffer;
+
+ BumpPtrAllocator Alloc;
+ StringSaver Saver{Alloc};
+
+ SString LinkerOutputFile;
+ StringRef LinkerVersion;
+ SString RemoteOptTool;
+ SString DistributorPath;
+ bool SaveTemps;
+
+ SmallVector<StringRef, 0> CodegenOptions;
+ DenseSet<StringRef> AdditionalInputs;
+
+ // Information specific to individual backend compilation job.
+ struct Job {
+ unsigned Task;
+ StringRef ModuleID;
+ StringRef Triple;
+ StringRef NativeObjectPath;
+ StringRef SummaryIndexPath;
+ ImportsFilesContainer ImportFiles;
+ };
+ // The set of backend compilations jobs.
+ SmallVector<Job> Jobs;
+
+ // A unique string to identify the current link.
+ SmallString<8> UID;
+
+public:
+ OutOfProcessThinBackend(
+ const Config &Conf, ModuleSummaryIndex &CombinedIndex,
+ ThreadPoolStrategy ThinLTOParallelism,
+ const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
+ AddStreamFn AddStream, AddBufferFn AddBuffer,
+ lto::IndexWriteCallback OnWrite, bool ShouldEmitIndexFiles,
+ bool ShouldEmitImportsFiles, StringRef LinkerOutputFile,
+ StringRef LinkerVersion, StringRef RemoteOptTool, StringRef Distributor,
+ bool SaveTemps)
+ : CGThinBackend(Conf, CombinedIndex, ModuleToDefinedGVSummaries, OnWrite,
+ ShouldEmitIndexFiles, ShouldEmitImportsFiles,
+ ThinLTOParallelism),
+ AddBuffer(std::move(AddBuffer)), LinkerOutputFile(LinkerOutputFile),
+ LinkerVersion(LinkerVersion), RemoteOptTool(RemoteOptTool),
+ DistributorPath(Distributor), SaveTemps(SaveTemps) {}
+
+ virtual void setup(unsigned MaxTasks) override {
+ UID = itostr(sys::Process::getProcessId());
+ Jobs.resize((size_t)MaxTasks);
+ }
+
+ Error start(
+ unsigned Task, BitcodeModule BM,
+ const FunctionImporter::ImportMapTy &ImportList,
+ const FunctionImporter::ExportSetTy &ExportList,
+ const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
+ MapVector<StringRef, BitcodeModule> &ModuleMap,
+ DenseMap<StringRef, std::string> &ModuleTriples) override {
+
+ StringRef ModulePath = BM.getModuleIdentifier();
+
+ SString ObjFilePath = sys::path::parent_path(LinkerOutputFile);
+ sys::path::append(ObjFilePath, sys::path::stem(ModulePath) + "." +
+ itostr(Task) + "." + UID + ".native.o");
+
+ Job &J = Jobs[Task - 1]; /*Task 0 is reserved*/
+ J = {Task,
+ ModulePath,
+ ModuleTriples[ModulePath],
+ Saver.save(ObjFilePath.str()),
+ Saver.save(ObjFilePath.str() + ".thinlto.bc"),
+ {}};
+
+ assert(ModuleToDefinedGVSummaries.count(ModulePath));
+ BackendThreadPool.async(
+ [=](Job &J, const FunctionImporter::ImportMapTy &ImportList) {
+ if (LLVM_ENABLE_THREADS && Conf.TimeTraceEnabled)
+ timeTraceProfilerInitialize(Conf.TimeTraceGranularity,
+ "thin backend");
+ if (auto E = emitFiles(ImportList, J.ModuleID, J.SummaryIndexPath,
+ J.ModuleID.str(), J.ImportFiles)) {
+ std::unique_lock<std::mutex> L(ErrMu);
+ if (Err)
+ Err = joinErrors(std::move(*Err), std::move(E));
+ else
+ Err = std::move(E);
+ }
+ if (LLVM_ENABLE_THREADS && Conf.TimeTraceEnabled)
+ timeTraceProfilerFinishThread();
+ },
+ std::ref(J), std::ref(ImportList));
+
+ return Error::success();
+ }
+
+ // Derive a set of Clang options that will be shared/common for all DTLTO
----------------
NuriAmari wrote:
With or without this patch to add integrated dthin-lto, I think something like this (automatic translation of a thin-link invocation into a matching Clang backend invocation) would be useful. Optimization level and flags forwarded through Clang directly to the linker (ex: `-Wl,-mllvm,-my_codegen_option`) are those I've run into most often.
For Buck we either have this [imho unfortunate script wrapping the backend Clang invocations](https://github.com/facebook/buck2/blob/main/prelude/cxx/dist_lto/tools/dist_lto_opt_gnu.py) that translates args from the thin-link invocation to the Clang backend invocation, or we specify both sets of flags manually. @teresajohnson @MaskRay How does Bazel handle this problem?
https://github.com/llvm/llvm-project/pull/126654
More information about the cfe-commits
mailing list