[llvm] [DTLTO][LLVM] Integrated Distributed ThinLTO (DTLTO) (PR #127749)

via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 19 06:33:10 PST 2025


https://github.com/bd1976bris updated https://github.com/llvm/llvm-project/pull/127749

>From 03e98d4a3de79c769105d177c222103171aec4e9 Mon Sep 17 00:00:00 2001
From: Ben Dunbobbin <Ben.Dunbobbin at sony.com>
Date: Wed, 19 Feb 2025 13:28:05 +0000
Subject: [PATCH 01/10] [DTLTO][LLVM] Store the Target Triple for ThinLTO
 modules.

This will be used for supplying a `-target=<triple>` option to clang for
DTLTO.

Note that If DTLTO is changed to use an optimisation tool that does not
require an explicit triple to be passed then the triple handling can be
removed entirely.
---
 llvm/include/llvm/LTO/LTO.h |  7 +++++--
 llvm/lib/LTO/LTO.cpp        | 14 +++++++++-----
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h
index 242a05f7d32c0..677ec412afc19 100644
--- a/llvm/include/llvm/LTO/LTO.h
+++ b/llvm/include/llvm/LTO/LTO.h
@@ -228,7 +228,8 @@ class ThinBackendProc {
       const FunctionImporter::ImportMapTy &ImportList,
       const FunctionImporter::ExportSetTy &ExportList,
       const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
-      MapVector<StringRef, BitcodeModule> &ModuleMap) = 0;
+      MapVector<StringRef, BitcodeModule> &ModuleMap,
+      DenseMap<StringRef, std::string> &ModuleTriples) = 0;
   Error wait() {
     BackendThreadPool.wait();
     if (Err)
@@ -426,6 +427,7 @@ class LTO {
     // The bitcode modules to compile, if specified by the LTO Config.
     std::optional<ModuleMapType> ModulesToCompile;
     DenseMap<GlobalValue::GUID, StringRef> PrevailingModuleForGUID;
+    DenseMap<StringRef, std::string> ModuleTriples;
   } ThinLTO;
 
   // The global resolution for a particular (mangled) symbol name. This is in
@@ -517,7 +519,8 @@ class LTO {
                        bool LivenessFromIndex);
 
   Error addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
-                   const SymbolResolution *&ResI, const SymbolResolution *ResE);
+                   const SymbolResolution *&ResI, const SymbolResolution *ResE,
+                   StringRef Triple);
 
   Error runRegularLTO(AddStreamFn AddStream);
   Error runThinLTO(AddStreamFn AddStream, FileCache Cache,
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index 0f53c60851217..e5bc79297ecaa 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -783,7 +783,7 @@ Error LTO::addModule(InputFile &Input, unsigned ModI,
                        LTOInfo->HasSummary);
 
   if (IsThinLTO)
-    return addThinLTO(BM, ModSyms, ResI, ResE);
+    return addThinLTO(BM, ModSyms, ResI, ResE, Input.getTargetTriple());
 
   RegularLTO.EmptyCombinedModule = false;
   Expected<RegularLTOState::AddedModule> ModOrErr =
@@ -1030,7 +1030,7 @@ Error LTO::linkRegularLTO(RegularLTOState::AddedModule Mod,
 // Add a ThinLTO module to the link.
 Error LTO::addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
                       const SymbolResolution *&ResI,
-                      const SymbolResolution *ResE) {
+                      const SymbolResolution *ResE, StringRef Triple) {
   const SymbolResolution *ResITmp = ResI;
   for (const InputFile::Symbol &Sym : Syms) {
     assert(ResITmp != ResE);
@@ -1090,6 +1090,8 @@ Error LTO::addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
         "Expected at most one ThinLTO module per bitcode file",
         inconvertibleErrorCode());
 
+  ThinLTO.ModuleTriples.insert({BM.getModuleIdentifier(), Triple.str()});
+
   if (!Conf.ThinLTOModulesToCompile.empty()) {
     if (!ThinLTO.ModulesToCompile)
       ThinLTO.ModulesToCompile = ModuleMapType();
@@ -1496,7 +1498,8 @@ class InProcessThinBackend : public ThinBackendProc {
       const FunctionImporter::ImportMapTy &ImportList,
       const FunctionImporter::ExportSetTy &ExportList,
       const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
-      MapVector<StringRef, BitcodeModule> &ModuleMap) override {
+      MapVector<StringRef, BitcodeModule> &ModuleMap,
+      DenseMap<StringRef, std::string> & /*ModuleTriples*/) override {
     StringRef ModulePath = BM.getModuleIdentifier();
     assert(ModuleToDefinedGVSummaries.count(ModulePath));
     const GVSummaryMapTy &DefinedGlobals =
@@ -1776,7 +1779,8 @@ class WriteIndexesThinBackend : public ThinBackendProc {
       const FunctionImporter::ImportMapTy &ImportList,
       const FunctionImporter::ExportSetTy &ExportList,
       const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
-      MapVector<StringRef, BitcodeModule> &ModuleMap) override {
+      MapVector<StringRef, BitcodeModule> &ModuleMap,
+      DenseMap<StringRef, std::string> & /*ModuleTriples*/) override {
     StringRef ModulePath = BM.getModuleIdentifier();
 
     // The contents of this file may be used as input to a native link, and must
@@ -2013,7 +2017,7 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
       return BackendProcess->start(
           RegularLTO.ParallelCodeGenParallelismLevel + I, Mod.second,
           ImportLists[Mod.first], ExportLists[Mod.first],
-          ResolvedODR[Mod.first], ThinLTO.ModuleMap);
+          ResolvedODR[Mod.first], ThinLTO.ModuleMap, ThinLTO.ModuleTriples);
     };
 
     if (BackendProcess->getThreadCount() == 1 ||

>From 19ef1d1805b8b9247c4e1a09f253e98d6d008bdf Mon Sep 17 00:00:00 2001
From: Ben Dunbobbin <Ben.Dunbobbin at sony.com>
Date: Wed, 19 Feb 2025 13:43:22 +0000
Subject: [PATCH 02/10] [DTLTO][LLVM] Make the ThinLTO backend wait() method
 virtual

The DTLTO ThinLTO backend will override this function to perform
code generation.

The DTLTO ThinLTO backend will not do any codegen when invoked for each
task. Instead, it will generate the required information (e.g., the
summary index shard, import list, etc.) to allow for the codegen to be
performed externally. The backend's `wait` function will then invoke an
external distributor process to do backend compilations.
---
 llvm/include/llvm/LTO/LTO.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h
index 677ec412afc19..41f55e91c7ed9 100644
--- a/llvm/include/llvm/LTO/LTO.h
+++ b/llvm/include/llvm/LTO/LTO.h
@@ -230,7 +230,7 @@ class ThinBackendProc {
       const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
       MapVector<StringRef, BitcodeModule> &ModuleMap,
       DenseMap<StringRef, std::string> &ModuleTriples) = 0;
-  Error wait() {
+  virtual Error wait() {
     BackendThreadPool.wait();
     if (Err)
       return std::move(*Err);

>From bc7f32e19a10b64d690f599a59673dc15627160b Mon Sep 17 00:00:00 2001
From: Ben Dunbobbin <Ben.Dunbobbin at sony.com>
Date: Wed, 19 Feb 2025 13:56:24 +0000
Subject: [PATCH 03/10] [DTLTO][LLVM] Generalize the emit files infrastructure

The DTLTO ThinLTO backend will need to customize the file paths used for
the summary index shard files. This is so that the names of these files
can include a unique ID so that multiple DTLTO links do not overwrite
each other's summary index shard files.

It also needs to be able to get the import files list for each job in
memory rather than writing it to a file. This is to allow the import
lists to be included in the JSON used to communicate with the
external distributor process.
---
 llvm/include/llvm/LTO/LTO.h                   | 13 +++++++++--
 .../llvm/Transforms/IPO/FunctionImport.h      |  6 +++++
 llvm/lib/LTO/LTO.cpp                          | 22 ++++++++++++++++---
 llvm/lib/Transforms/IPO/FunctionImport.cpp    | 14 ++++++++++--
 4 files changed, 48 insertions(+), 7 deletions(-)

diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h
index 41f55e91c7ed9..31d939d514491 100644
--- a/llvm/include/llvm/LTO/LTO.h
+++ b/llvm/include/llvm/LTO/LTO.h
@@ -199,6 +199,8 @@ class InputFile {
 
 using IndexWriteCallback = std::function<void(const std::string &)>;
 
+using ImportsFilesContainer = llvm::SmallVector<std::string>;
+
 /// This class defines the interface to the ThinLTO backend.
 class ThinBackendProc {
 protected:
@@ -241,8 +243,15 @@ class ThinBackendProc {
 
   // Write sharded indices and (optionally) imports to disk
   Error emitFiles(const FunctionImporter::ImportMapTy &ImportList,
-                  llvm::StringRef ModulePath,
-                  const std::string &NewModulePath) const;
+                  StringRef ModulePath, const std::string &NewModulePath) const;
+
+  // Write sharded indices to SummaryPath, (optionally) imports
+  // IndexPath, and (optionally) record imports in ImportsFiles.
+  Error emitFiles(const FunctionImporter::ImportMapTy &ImportList,
+                  StringRef ModulePath, StringRef SummaryPath,
+                  const std::string &NewModulePath,
+                  std::optional<std::reference_wrapper<ImportsFilesContainer>>
+                      ImportsFiles) const;
 };
 
 /// This callable defines the behavior of a ThinLTO backend after the thin-link
diff --git a/llvm/include/llvm/Transforms/IPO/FunctionImport.h b/llvm/include/llvm/Transforms/IPO/FunctionImport.h
index 3623f9194d4d1..5e4116834b7f2 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionImport.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionImport.h
@@ -421,6 +421,12 @@ Error EmitImportsFiles(
     StringRef ModulePath, StringRef OutputFilename,
     const ModuleToSummariesForIndexTy &ModuleToSummariesForIndex);
 
+/// Call \p F passing each of the files module \p ModulePath will import from.
+void processImportsFiles(
+    StringRef ModulePath,
+    const ModuleToSummariesForIndexTy &ModuleToSummariesForIndex,
+    function_ref<void(const std::string &)> F);
+
 /// Based on the information recorded in the summaries during global
 /// summary-based analysis:
 /// 1. Resolve prevailing symbol linkages and constrain visibility (CanAutoHide
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index e5bc79297ecaa..3c1e7f35606b2 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -1392,6 +1392,16 @@ SmallVector<const char *> LTO::getRuntimeLibcallSymbols(const Triple &TT) {
 Error ThinBackendProc::emitFiles(
     const FunctionImporter::ImportMapTy &ImportList, llvm::StringRef ModulePath,
     const std::string &NewModulePath) const {
+  return emitFiles(ImportList, ModulePath, NewModulePath + ".thinlto.bc",
+                   NewModulePath,
+                   /*ImportsFiles=*/std::nullopt);
+}
+
+Error ThinBackendProc::emitFiles(
+    const FunctionImporter::ImportMapTy &ImportList, llvm::StringRef ModulePath,
+    StringRef SummaryPath, const std::string &NewModulePath,
+    std::optional<std::reference_wrapper<ImportsFilesContainer>> ImportsFiles)
+    const {
   ModuleToSummariesForIndexTy ModuleToSummariesForIndex;
   GVSummaryPtrSet DeclarationSummaries;
 
@@ -1400,10 +1410,9 @@ Error ThinBackendProc::emitFiles(
                                    ImportList, ModuleToSummariesForIndex,
                                    DeclarationSummaries);
 
-  raw_fd_ostream OS(NewModulePath + ".thinlto.bc", EC,
-                    sys::fs::OpenFlags::OF_None);
+  raw_fd_ostream OS(SummaryPath, EC, sys::fs::OpenFlags::OF_None);
   if (EC)
-    return createFileError("cannot open " + NewModulePath + ".thinlto.bc", EC);
+    return createFileError("cannot open " + Twine(SummaryPath), EC);
 
   writeIndexToFile(CombinedIndex, OS, &ModuleToSummariesForIndex,
                    &DeclarationSummaries);
@@ -1414,6 +1423,13 @@ Error ThinBackendProc::emitFiles(
     if (ImportFilesError)
       return ImportFilesError;
   }
+
+  // Optionally, store the imports files.
+  if (ImportsFiles)
+    processImportsFiles(
+        ModulePath, ModuleToSummariesForIndex,
+        [&](StringRef M) { ImportsFiles->get().push_back(M.str()); });
+
   return Error::success();
 }
 
diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp
index c3d0a1a3a046e..cdcf918d3fae8 100644
--- a/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -1568,13 +1568,23 @@ Error llvm::EmitImportsFiles(
   if (EC)
     return createFileError("cannot open " + OutputFilename,
                            errorCodeToError(EC));
+  processImportsFiles(ModulePath, ModuleToSummariesForIndex,
+                      [&](StringRef M) { ImportsOS << M << "\n"; });
+  return Error::success();
+}
+
+/// Invoke callback \p F on the file paths from which \p ModulePath
+/// will import.
+void llvm::processImportsFiles(
+    StringRef ModulePath,
+    const ModuleToSummariesForIndexTy &ModuleToSummariesForIndex,
+    function_ref<void(const std::string &)> F) {
   for (const auto &ILI : ModuleToSummariesForIndex)
     // The ModuleToSummariesForIndex map includes an entry for the current
     // Module (needed for writing out the index files). We don't want to
     // include it in the imports file, however, so filter it out.
     if (ILI.first != ModulePath)
-      ImportsOS << ILI.first << "\n";
-  return Error::success();
+      F(ILI.first);
 }
 
 bool llvm::convertToDeclaration(GlobalValue &GV) {

>From 7d0c1c84b2728fc11f984123de70fdcd552a464a Mon Sep 17 00:00:00 2001
From: Ben Dunbobbin <Ben.Dunbobbin at sony.com>
Date: Wed, 19 Feb 2025 14:04:57 +0000
Subject: [PATCH 04/10] [DTLTO][LLVM] Add a setup() member to the ThinLTO
 backends

The new setup() member is now called to allow the ThinLTO backends to
prepare for code generation.

For the DTLTO backend, this will be used to preallocate storage for the
information required to perform the backend compilation jobs.
---
 llvm/include/llvm/LTO/LTO.h | 1 +
 llvm/lib/LTO/LTO.cpp        | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h
index 31d939d514491..815abecbdcda5 100644
--- a/llvm/include/llvm/LTO/LTO.h
+++ b/llvm/include/llvm/LTO/LTO.h
@@ -225,6 +225,7 @@ class ThinBackendProc {
         BackendThreadPool(ThinLTOParallelism) {}
 
   virtual ~ThinBackendProc() = default;
+  virtual void setup(unsigned MaxTasks) {}
   virtual Error start(
       unsigned Task, BitcodeModule BM,
       const FunctionImporter::ImportMapTy &ImportList,
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index 3c1e7f35606b2..ad6712baa0dd6 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -2036,6 +2036,8 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
           ResolvedODR[Mod.first], ThinLTO.ModuleMap, ThinLTO.ModuleTriples);
     };
 
+    BackendProcess->setup(ModuleMap.size());
+
     if (BackendProcess->getThreadCount() == 1 ||
         BackendProcess->isSensitiveToInputOrder()) {
       // Process the modules in the order they were provided on the

>From d55d8c02add633af1ce9de639222be2878c2c800 Mon Sep 17 00:00:00 2001
From: Ben Dunbobbin <Ben.Dunbobbin at sony.com>
Date: Wed, 19 Feb 2025 14:14:57 +0000
Subject: [PATCH 05/10] [DTLTO][LLVM] Derive the InProcess backend from a base
 class

Move some setup code and state to a base class.

This will allow the DTLTO ThinLTO backend to share this setup code and
state by also deriving from this base class.
---
 llvm/lib/LTO/LTO.cpp | 31 +++++++++++++++++++++++--------
 1 file changed, 23 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index ad6712baa0dd6..cb4f87358c19c 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -1434,25 +1434,23 @@ Error ThinBackendProc::emitFiles(
 }
 
 namespace {
-class InProcessThinBackend : public ThinBackendProc {
+class CGThinBackend : public ThinBackendProc {
 protected:
   AddStreamFn AddStream;
-  FileCache Cache;
   DenseSet<GlobalValue::GUID> CfiFunctionDefs;
   DenseSet<GlobalValue::GUID> CfiFunctionDecls;
-
   bool ShouldEmitIndexFiles;
 
 public:
-  InProcessThinBackend(
+  CGThinBackend(
       const Config &Conf, ModuleSummaryIndex &CombinedIndex,
-      ThreadPoolStrategy ThinLTOParallelism,
       const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
-      AddStreamFn AddStream, FileCache Cache, lto::IndexWriteCallback OnWrite,
-      bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles)
+      AddStreamFn AddStream, lto::IndexWriteCallback OnWrite,
+      bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles,
+      ThreadPoolStrategy ThinLTOParallelism)
       : ThinBackendProc(Conf, CombinedIndex, ModuleToDefinedGVSummaries,
                         OnWrite, ShouldEmitImportsFiles, ThinLTOParallelism),
-        AddStream(std::move(AddStream)), Cache(std::move(Cache)),
+        AddStream(std::move(AddStream)),
         ShouldEmitIndexFiles(ShouldEmitIndexFiles) {
     for (auto &Name : CombinedIndex.cfiFunctionDefs())
       CfiFunctionDefs.insert(
@@ -1461,6 +1459,23 @@ class InProcessThinBackend : public ThinBackendProc {
       CfiFunctionDecls.insert(
           GlobalValue::getGUID(GlobalValue::dropLLVMManglingEscape(Name)));
   }
+};
+
+class InProcessThinBackend : public CGThinBackend {
+protected:
+  FileCache Cache;
+
+public:
+  InProcessThinBackend(
+      const Config &Conf, ModuleSummaryIndex &CombinedIndex,
+      ThreadPoolStrategy ThinLTOParallelism,
+      const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
+      AddStreamFn AddStream, FileCache Cache, lto::IndexWriteCallback OnWrite,
+      bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles)
+      : CGThinBackend(Conf, CombinedIndex, ModuleToDefinedGVSummaries,
+                      AddStream, OnWrite, ShouldEmitIndexFiles,
+                      ShouldEmitImportsFiles, ThinLTOParallelism),
+        Cache(std::move(Cache)) {}
 
   virtual Error runThinLTOBackendThread(
       AddStreamFn AddStream, FileCache Cache, unsigned Task, BitcodeModule BM,

>From e3c016fe0ac3b3eb43b0f12096770fa42b304d7a Mon Sep 17 00:00:00 2001
From: Ben Dunbobbin <Ben.Dunbobbin at sony.com>
Date: Mon, 17 Feb 2025 17:59:02 +0000
Subject: [PATCH 06/10] [DTLTO][LLVM] Implement integrated distribution for
 ThinLTO (DTLTO).

Structural changes:
1. A new ThinLTO backend implementing DTLTO has been added.
2. Both the new backend and the InProcess backend derive from a common
   base class to share common setup code and state.
3. The target triple is now stored for the ThinLTO bitcode files.
4. A new setup() member is called, which the ThinLTO backends can use to
   prepare for code generation. For the DTLTO backend, this is used to
   pre-allocate storage for the information required to perform the
   backend compilation jobs.
5. The functions for emitting summary index shard and imports files have
   been altered to allow the caller to specify the filenames to write
   and to allow the list of imports to be stored in a container rather
   than written to a file.
---
 llvm/include/llvm/LTO/LTO.h              |  22 ++
 llvm/lib/LTO/LTO.cpp                     | 277 +++++++++++++++++++++++
 llvm/test/ThinLTO/X86/dtlto/dtlto.test   |  85 +++++++
 llvm/test/ThinLTO/X86/dtlto/imports.test |  79 +++++++
 llvm/test/ThinLTO/X86/dtlto/json.test    |  99 ++++++++
 llvm/test/ThinLTO/X86/dtlto/summary.test |  55 +++++
 llvm/test/ThinLTO/X86/dtlto/triple.test  |  44 ++++
 llvm/test/lit.cfg.py                     |   1 +
 llvm/tools/llvm-lto2/llvm-lto2.cpp       |  22 +-
 llvm/utils/dtlto/mock.py                 |  20 ++
 llvm/utils/dtlto/validate.py             |  79 +++++++
 11 files changed, 782 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/ThinLTO/X86/dtlto/dtlto.test
 create mode 100644 llvm/test/ThinLTO/X86/dtlto/imports.test
 create mode 100644 llvm/test/ThinLTO/X86/dtlto/json.test
 create mode 100644 llvm/test/ThinLTO/X86/dtlto/summary.test
 create mode 100644 llvm/test/ThinLTO/X86/dtlto/triple.test
 create mode 100644 llvm/utils/dtlto/mock.py
 create mode 100644 llvm/utils/dtlto/validate.py

diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h
index 815abecbdcda5..c776b57c1af82 100644
--- a/llvm/include/llvm/LTO/LTO.h
+++ b/llvm/include/llvm/LTO/LTO.h
@@ -305,6 +305,28 @@ ThinBackend createInProcessThinBackend(ThreadPoolStrategy Parallelism,
                                        bool ShouldEmitIndexFiles = false,
                                        bool ShouldEmitImportsFiles = false);
 
+/// This ThinBackend generates the index shards and then runs the individual
+/// backend jobs via an external process. It takes the same parameters as the
+/// InProcessThinBackend, however, these parameters only control the behavior
+/// when generating the index files for the modules. Addtionally:
+/// LinkerOutputFile is a string that should identify this LTO invocation in
+/// the context of a wider build. It's used for naming to aid the user in
+/// identifying activity related to a specific LTO invocation.
+/// RemoteOptTool specifies the path to a Clang executable to be invoked for the
+/// backend jobs.
+/// Distributor specifies the path to a process to invoke to manage the backend
+/// jobs execution.
+/// SaveTemps is a debugging tool that prevents temporary files created by this
+/// backend from being cleaned up.
+ThinBackend createOutOfProcessThinBackend(ThreadPoolStrategy Parallelism,
+                                          IndexWriteCallback OnWrite,
+                                          bool ShouldEmitIndexFiles,
+                                          bool ShouldEmitImportsFiles,
+                                          StringRef LinkerOutputFile,
+                                          StringRef RemoteOptTool,
+                                          StringRef Distributor,
+                                          bool SaveTemps);
+
 /// This ThinBackend writes individual module indexes to files, instead of
 /// running the individual backend jobs. This backend is for distributed builds
 /// where separate processes will invoke the real backends.
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index cb4f87358c19c..3a9d57f69f718 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -41,8 +41,11 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/FileSystem.h"
+#include "llvm/Support/FileUtilities.h"
+#include "llvm/Support/JSON.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
 #include "llvm/Support/SHA1.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/ThreadPool.h"
@@ -91,6 +94,15 @@ extern cl::opt<bool> SupportsHotColdNew;
 
 /// Enable MemProf context disambiguation for thin link.
 extern cl::opt<bool> EnableMemProfContextDisambiguation;
+
+cl::list<std::string> AdditionalThinLTODistributorArgs(
+    "thinlto-distributor-arg",
+    cl::desc("Additional arguments to pass to the ThinLTO distributor"));
+
+cl::list<std::string>
+    ThinLTORemoteOptToolArgs("thinlto-remote-opt-tool-arg",
+                             cl::desc("Additional arguments to pass to the "
+                                      "ThinLTO remote optimization tool"));
 } // namespace llvm
 
 // Computes a unique hash for the Module considering the current list of
@@ -2179,3 +2191,268 @@ std::vector<int> lto::generateModulesOrdering(ArrayRef<BitcodeModule *> R) {
   });
   return ModulesOrdering;
 }
+
+namespace {
+// For this out-of-process backend no codegen is done when invoked for each
+// task. Instead we generate the required information (e.g. the summary index
+// shard, import list, etc..) to allow for the codegen to be performed
+// externally . This backend's `wait` function then invokes an external
+// distributor process to do backend compilations.
+class OutOfProcessThinBackend : public CGThinBackend {
+  using SString = SmallString<128>;
+
+  BumpPtrAllocator Alloc;
+  StringSaver Saver{Alloc};
+
+  SString LinkerOutputFile;
+  SString RemoteOptTool;
+  SString DistributorPath;
+  bool SaveTemps;
+
+  SmallVector<StringRef, 0> CodegenOptions;
+  DenseSet<StringRef> AdditionalInputs;
+
+  // Information specific to individual backend compilation job.
+  struct Job {
+    unsigned Task;
+    StringRef ModuleID;
+    StringRef Triple;
+    StringRef NativeObjectPath;
+    StringRef SummaryIndexPath;
+    ImportsFilesContainer ImportFiles;
+  };
+  // The set of backend compilations jobs.
+  SmallVector<Job> Jobs;
+
+  // A unique string to identify the current link.
+  SmallString<8> UID;
+
+public:
+  OutOfProcessThinBackend(
+      const Config &Conf, ModuleSummaryIndex &CombinedIndex,
+      ThreadPoolStrategy ThinLTOParallelism,
+      const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
+      AddStreamFn AddStream, lto::IndexWriteCallback OnWrite,
+      bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles,
+      StringRef LinkerOutputFile, StringRef RemoteOptTool,
+      StringRef Distributor, bool SaveTemps)
+      : CGThinBackend(Conf, CombinedIndex, ModuleToDefinedGVSummaries,
+                      AddStream, OnWrite, ShouldEmitIndexFiles,
+                      ShouldEmitImportsFiles, ThinLTOParallelism),
+        LinkerOutputFile(LinkerOutputFile), RemoteOptTool(RemoteOptTool),
+        DistributorPath(Distributor), SaveTemps(SaveTemps) {}
+
+  virtual void setup(unsigned MaxTasks) override {
+    UID = itostr(sys::Process::getProcessId());
+    Jobs.resize((size_t)MaxTasks);
+  }
+
+  Error start(
+      unsigned Task, BitcodeModule BM,
+      const FunctionImporter::ImportMapTy &ImportList,
+      const FunctionImporter::ExportSetTy &ExportList,
+      const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
+      MapVector<StringRef, BitcodeModule> &ModuleMap,
+      DenseMap<StringRef, std::string> &ModuleTriples) override {
+
+    StringRef ModulePath = BM.getModuleIdentifier();
+
+    SString ObjFilePath = sys::path::parent_path(LinkerOutputFile);
+    sys::path::append(ObjFilePath, sys::path::stem(ModulePath) + "." +
+                                       itostr(Task) + "." + UID + ".native.o");
+
+    Job &J = Jobs[Task - 1]; /*Task 0 is reserved*/
+    J = {Task,
+         ModulePath,
+         ModuleTriples[ModulePath],
+         Saver.save(ObjFilePath.str()),
+         Saver.save(ObjFilePath.str() + ".thinlto.bc"),
+         {}};
+
+    assert(ModuleToDefinedGVSummaries.count(ModulePath));
+    BackendThreadPool.async(
+        [=](Job &J, const FunctionImporter::ImportMapTy &ImportList) {
+          if (LLVM_ENABLE_THREADS && Conf.TimeTraceEnabled)
+            timeTraceProfilerInitialize(Conf.TimeTraceGranularity,
+                                        "thin backend");
+          if (auto E = emitFiles(ImportList, J.ModuleID, J.SummaryIndexPath,
+                                 J.ModuleID.str(), J.ImportFiles)) {
+            std::unique_lock<std::mutex> L(ErrMu);
+            if (Err)
+              Err = joinErrors(std::move(*Err), std::move(E));
+            else
+              Err = std::move(E);
+          }
+          if (LLVM_ENABLE_THREADS && Conf.TimeTraceEnabled)
+            timeTraceProfilerFinishThread();
+        },
+        std::ref(J), std::ref(ImportList));
+
+    return Error::success();
+  }
+
+  // Generates a JSON file describing the backend compilations, for the
+  // distributor.
+  bool emitDistributorJson(StringRef DistributorJson) {
+    using json::Array;
+    std::error_code EC;
+    raw_fd_ostream OS(DistributorJson, EC);
+    if (EC)
+      return false;
+
+    json::OStream JOS(OS);
+    JOS.object([&]() {
+      // Information common to all jobs note that we use a custom syntax for
+      // referencing by index into the job input and output file arrays.
+      JOS.attributeObject("common", [&]() {
+        JOS.attribute("linker_output", LinkerOutputFile);
+
+        // Common command line template.
+        JOS.attributeArray("args", [&]() {
+          JOS.value(RemoteOptTool);
+
+          // Reference to Job::NativeObjectPath.
+          JOS.value("-o");
+          JOS.value(Array{"primary_output", 0});
+
+          JOS.value("-c");
+
+          JOS.value("-x");
+          JOS.value("ir");
+
+          // Reference to Job::ModuleID.
+          JOS.value(Array{"primary_input", 0});
+
+          // Reference to Job::SummaryIndexPath.
+          JOS.value(Array{"summary_index", "-fthinlto-index=", 0});
+          JOS.value(Saver.save("--target=" + Twine(Jobs.front().Triple)));
+
+          for (const auto &A : ThinLTORemoteOptToolArgs)
+            JOS.value(A);
+        });
+      });
+      JOS.attributeArray("jobs", [&]() {
+        for (const auto &J : Jobs) {
+          assert(J.Task != 0);
+          JOS.object([&]() {
+            JOS.attribute("primary_input", Array{J.ModuleID});
+            JOS.attribute("summary_index", Array{J.SummaryIndexPath});
+            JOS.attribute("primary_output", Array{J.NativeObjectPath});
+
+            // Add the bitcode files from which imports will be made. These do
+            // not appear on the command line but are recorded in the summary
+            // index shard.
+            JOS.attribute("imports", Array(J.ImportFiles));
+
+            // Add any input files that are common to each invocation. These
+            // filenames are duplicated in the command line template and in
+            // each of the per job "inputs" array. However, this small amount
+            // of duplication makes the schema simpler.
+            JOS.attribute("additional_inputs", Array(AdditionalInputs));
+          });
+        }
+      });
+    });
+
+    return true;
+  }
+
+  void removeFile(StringRef FileName) {
+    std::error_code EC = sys::fs::remove(FileName, true);
+    if (EC && EC != std::make_error_code(std::errc::no_such_file_or_directory))
+      errs() << "warning: could not remove the file '" << FileName
+             << "': " << EC.message() << "\n";
+  }
+
+  Error wait() override {
+    // Wait for the information on the required backend compilations to be
+    // gathered.
+    BackendThreadPool.wait();
+    if (Err)
+      return std::move(*Err);
+
+    auto CleanPerJobFiles = llvm::make_scope_exit([&] {
+      if (!SaveTemps)
+        for (auto &Job : Jobs) {
+          removeFile(Job.NativeObjectPath);
+          if (!ShouldEmitIndexFiles)
+            removeFile(Job.SummaryIndexPath);
+        }
+    });
+
+    const StringRef BCError = "DTLTO backend compilation: ";
+
+    // TODO: If we move to using an optimisation tool that does not require an
+    // explicit triple to be passed then the triple handling can be removed
+    // entirely.
+    if (!llvm::all_of(Jobs, [&](const auto &Job) {
+          return Job.Triple == Jobs.front().Triple;
+        }))
+      return make_error<StringError>(BCError + "all triples must be consistent",
+                                     inconvertibleErrorCode());
+
+    SString JsonFile = sys::path::parent_path(LinkerOutputFile);
+    sys::path::append(JsonFile, sys::path::stem(LinkerOutputFile) + "." + UID +
+                                    ".dist-file.json");
+    if (!emitDistributorJson(JsonFile))
+      return make_error<StringError>(
+          BCError + "failed to generate distributor JSON script: " + JsonFile,
+          inconvertibleErrorCode());
+    auto CleanJson = llvm::make_scope_exit([&] {
+      if (!SaveTemps)
+        removeFile(JsonFile);
+    });
+
+    SmallVector<StringRef, 3> Args = {DistributorPath};
+    llvm::append_range(Args, AdditionalThinLTODistributorArgs);
+    Args.push_back(JsonFile);
+    std::string ErrMsg;
+    if (sys::ExecuteAndWait(Args[0], Args,
+                            /*Env=*/std::nullopt, /*Redirects=*/{},
+                            /*SecondsToWait=*/0, /*MemoryLimit=*/0, &ErrMsg)) {
+      return make_error<StringError>(
+          BCError + "distributor execution failed" +
+              (!ErrMsg.empty() ? ": " + ErrMsg + Twine(".") : Twine(".")),
+          inconvertibleErrorCode());
+    }
+
+    for (auto &Job : Jobs) {
+      // Load the native object from a file into a memory buffer
+      // and store its contents in the output buffer.
+      ErrorOr<std::unique_ptr<MemoryBuffer>> objFileMbOrErr =
+          MemoryBuffer::getFile(Job.NativeObjectPath, false, false);
+      if (std::error_code ec = objFileMbOrErr.getError())
+        return make_error<StringError>(
+            BCError + "cannot open native object file: " +
+                Job.NativeObjectPath + ": " + ec.message(),
+            inconvertibleErrorCode());
+      std::unique_ptr<llvm::MemoryBuffer> umb = std::move(objFileMbOrErr.get());
+      Expected<std::unique_ptr<CachedFileStream>> StreamOrErr =
+          AddStream(Job.Task, Job.ModuleID);
+      if (Error Err = StreamOrErr.takeError())
+        report_fatal_error(std::move(Err));
+      std::unique_ptr<CachedFileStream> Stream = std::move(*StreamOrErr);
+      *Stream->OS << umb->getMemBufferRef().getBuffer();
+    }
+
+    return Error::success();
+  }
+};
+} // end anonymous namespace
+
+ThinBackend lto::createOutOfProcessThinBackend(
+    ThreadPoolStrategy Parallelism, lto::IndexWriteCallback OnWrite,
+    bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles,
+    StringRef LinkerOutputFile, StringRef RemoteOptTool, StringRef Distributor,
+    bool SaveTemps) {
+  auto Func =
+      [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex,
+          const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
+          AddStreamFn AddStream, FileCache /*Cache*/) {
+        return std::make_unique<OutOfProcessThinBackend>(
+            Conf, CombinedIndex, Parallelism, ModuleToDefinedGVSummaries,
+            AddStream, OnWrite, ShouldEmitIndexFiles, ShouldEmitImportsFiles,
+            LinkerOutputFile, RemoteOptTool, Distributor, SaveTemps);
+      };
+  return ThinBackend(Func, Parallelism);
+}
diff --git a/llvm/test/ThinLTO/X86/dtlto/dtlto.test b/llvm/test/ThinLTO/X86/dtlto/dtlto.test
new file mode 100644
index 0000000000000..17dde95c022d8
--- /dev/null
+++ b/llvm/test/ThinLTO/X86/dtlto/dtlto.test
@@ -0,0 +1,85 @@
+# Test DTLTO output with llvm-lto2.
+
+RUN: rm -rf %t && split-file %s %t && cd %t
+
+# Generate bitcode files with summary.
+RUN: opt -thinlto-bc t1.ll -o t1.bc
+RUN: opt -thinlto-bc t2.ll -o t2.bc
+
+# Generate mock native object files.
+RUN: opt t1.ll -o t1.o
+RUN: opt t2.ll -o t2.o
+
+# Create an empty subdirectory to avoid having to account for the input files.
+RUN: mkdir %t/out && cd %t/out
+
+# Define a substitution to share the common DTLTO arguments.
+DEFINE: %{command} =llvm-lto2 run ../t1.bc ../t2.bc -o t.o \
+DEFINE:     -dtlto \
+DEFINE:     -dtlto-remote-opt-tool=dummy \
+DEFINE:     -dtlto-distributor=%python \
+DEFINE:     -thinlto-distributor-arg=%llvm_src_root/utils/dtlto/mock.py \
+DEFINE:     -thinlto-distributor-arg=../t1.o \
+DEFINE:     -thinlto-distributor-arg=../t2.o \
+DEFINE:     -r=../t1.bc,t1,px \
+DEFINE:     -r=../t2.bc,t2,px
+
+# Perform DTLTO. mock.py does not do any compilation, instead it simply writes
+# the contents of the object files supplied on the command line into the
+# output object files in job order.
+RUN: %{command}
+
+# Check that the expected output files have been created.
+RUN: ls | count 2
+RUN: ls | FileCheck %s --check-prefix=THINLTO
+
+# llvm-lto2 ThinLTO output files.
+THINLTO-DAG: {{^}}t.o.1{{$}}
+THINLTO-DAG: {{^}}t.o.2{{$}}
+
+RUN: cd .. && rm -rf %t/out && mkdir %t/out && cd %t/out
+
+# Perform DTLTO with --save-temps.
+RUN: %{command} --save-temps
+
+# Check that the expected output files have been created.
+RUN: ls | count 12
+RUN: ls | FileCheck %s --check-prefixes=THINLTO,SAVETEMPS
+
+# Common -save-temps files from llvm-lto2.
+SAVETEMPS-DAG: {{^}}t.o.resolution.txt{{$}}
+SAVETEMPS-DAG: {{^}}t.o.index.bc{{$}}
+SAVETEMPS-DAG: {{^}}t.o.index.dot{{$}}
+
+# -save-temps incremental files.
+SAVETEMPS-DAG: {{^}}t.o.0.0.preopt.bc{{$}}
+SAVETEMPS-DAG: {{^}}t.o.0.2.internalize.bc{{$}}
+
+# A jobs description JSON.
+SAVETEMPS-DAG: {{^}}t.[[#]].dist-file.json{{$}}
+
+# Summary shards emitted for DTLTO.
+SAVETEMPS-DAG: {{^}}t1.1.[[#]].native.o.thinlto.bc{{$}}
+SAVETEMPS-DAG: {{^}}t2.2.[[#]].native.o.thinlto.bc{{$}}
+
+# DTLTO native output files (the results of the external backend compilations).
+SAVETEMPS-DAG: {{^}}t1.1.[[#]].native.o{{$}}
+SAVETEMPS-DAG: {{^}}t2.2.[[#]].native.o{{$}}
+
+#--- t1.ll
+
+target triple = "x86_64-unknown-linux-gnu"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @t1() {
+  ret void
+}
+
+#--- t2.ll
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @t2() {
+  ret void
+}
diff --git a/llvm/test/ThinLTO/X86/dtlto/imports.test b/llvm/test/ThinLTO/X86/dtlto/imports.test
new file mode 100644
index 0000000000000..57ace00aeaf9e
--- /dev/null
+++ b/llvm/test/ThinLTO/X86/dtlto/imports.test
@@ -0,0 +1,79 @@
+# Check that DTLTO creates imports lists correctly.
+
+RUN: rm -rf %t && split-file %s %t && cd %t
+
+# Compile bitcode.
+RUN: opt -thinlto-bc 0.ll -o 0.bc -O2
+RUN: opt -thinlto-bc 1.ll -o 1.bc -O2
+
+# Define a substitution to share the common DTLTO arguments. Note that the use
+# of validate.py will cause a failure as it does not create output files.
+DEFINE: %{command} =llvm-lto2 run \
+DEFINE:    0.bc 1.bc -o t.o \
+DEFINE:    -dtlto \
+DEFINE:    -dtlto-remote-opt-tool=dummy \
+DEFINE:    -dtlto-distributor=%python \
+DEFINE:    -thinlto-distributor-arg=%llvm_src_root/utils/dtlto/validate.py \
+DEFINE:    -thinlto-distributor-arg=0.bc \
+DEFINE:    -thinlto-distributor-arg=1.bc \
+DEFINE:    -thinlto-emit-indexes \
+DEFINE:    -r=0.bc,g,px \
+DEFINE:    -r=1.bc,f,px \
+DEFINE:    -r=1.bc,g
+
+# We expect an import from 0.o into 1.o but no imports into 0.o. Check that the
+# expected input files have been added to the JSON.
+RUN: not %{command} >out.log 2>&1
+RUN: FileCheck --input-file=out.log %s --check-prefixes=INPUTS,ERR
+
+INPUTS:      "primary_input": [
+INPUTS-NEXT:   "0.bc"
+INPUTS-NEXT: ]
+INPUTS:      "imports": []
+INPUTS:      "primary_input": [
+INPUTS-NEXT:   "1.bc"
+INPUTS-NEXT: ]
+INPUTS:      "imports": [
+INPUTS-NEXT:   "0.bc"
+INPUTS-NEXT: ]
+
+# This check ensures that we have failed for the expected reason.
+ERR: failed: DTLTO backend compilation: cannot open native object file:
+
+# Check that imports files have not been created.
+RUN: ls | FileCheck %s --check-prefix=NOINDEXFILES
+NOINDEXFILES-NOT: imports
+
+# Check that imports files are not created with -save-temps.
+RUN: not %{command} -save-temps 2>&1 \ 
+RUN:   | FileCheck %s --check-prefixes=ERR
+RUN: ls | FileCheck %s --check-prefix=NOINDEXFILES
+NOINDEXFILES-NOT: imports
+
+# Check that imports files are created with -thinlto-emit-imports.
+RUN: not %{command} -thinlto-emit-imports 2>&1 \ 
+RUN:   | FileCheck %s --check-prefixes=ERR
+RUN: ls | FileCheck %s --check-prefix=INDEXFILES
+INDEXFILES: 0.bc.imports
+INDEXFILES: 1.bc.imports
+
+#--- 0.ll
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @g() {
+entry:
+  ret void
+}
+
+#--- 1.ll
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @g(...)
+
+define void @f() {
+entry:
+  call void (...) @g()
+  ret void
+}
diff --git a/llvm/test/ThinLTO/X86/dtlto/json.test b/llvm/test/ThinLTO/X86/dtlto/json.test
new file mode 100644
index 0000000000000..f6a78957cda33
--- /dev/null
+++ b/llvm/test/ThinLTO/X86/dtlto/json.test
@@ -0,0 +1,99 @@
+# Check that the JSON output from DTLTO is as expected. Note that validate.py
+# checks the JSON structure so we just check the field contents in this test.
+
+RUN: rm -rf %t && split-file %s %t && cd %t
+
+# Generate bitcode files with summary.
+RUN: opt -thinlto-bc t1.ll -o t1.bc
+RUN: opt -thinlto-bc t2.ll -o t2.bc
+
+# Perform DTLTO.
+RUN: not llvm-lto2 run t1.bc t2.bc -o my.output \
+RUN:     -r=t1.bc,t1,px -r=t2.bc,t2,px \
+RUN:     -dtlto \
+RUN:     -dtlto-remote-opt-tool=my_clang.exe \
+RUN:     -dtlto-distributor=%python \
+RUN:     -thinlto-distributor-arg=%llvm_src_root/utils/dtlto/validate.py \
+RUN:     -thinlto-remote-opt-tool-arg=--rota1=10 \
+RUN:     -thinlto-remote-opt-tool-arg=--rota2=20 \
+RUN:     -thinlto-distributor-arg=--da1=10 \
+RUN:     -thinlto-distributor-arg=--da2=10 \
+RUN:   2>&1 | FileCheck %s
+
+CHECK: distributor_args=['--da1=10', '--da2=10']
+
+# Check the common object.
+CHECK:      "linker_output": "my.output"
+CHECK:      "args":
+CHECK:      "my_clang.exe"
+CHECK:      "-o"
+CHECK-NEXT: [
+CHECK-NEXT: "primary_output"
+CHECK-NEXT: 0
+CHECK-NEXT: ]
+CHECK:      "-c"
+CHECK:      "-x"
+CHECK:      "ir"
+CHECK-NEXT: [
+CHECK-NEXT: "primary_input"
+CHECK-NEXT:  0
+CHECK-NEXT: ]
+CHECK:      "summary_index"
+CHECK-NEXT: "-fthinlto-index="
+CHECK-NEXT: 0
+CHECK-NEXT: ]
+CHECK:      "--target=x86_64-unknown-linux-gnu"
+CHECK:      "--rota1=10"
+CHECK:      "--rota2=20"
+
+# Check the first job entry.
+CHECK: "jobs":
+CHECK:      "primary_input": [
+CHECK-NEXT: "t1.bc"
+CHECK-NEXT: ]
+CHECK:      "summary_index": [
+CHECK-NEXT: "t1.1.[[#]].native.o.thinlto.bc"
+CHECK-NEXT: ]
+CHECK:      "primary_output": [
+CHECK-NEXT: "t1.1.[[#]].native.o"
+CHECK-NEXT: ]
+CHECK:      "imports": [],
+CHECK:      "additional_inputs": []
+CHECK-NEXT: }
+
+# Check the second job entry.
+CHECK-NEXT: {
+CHECK-NEXT: "primary_input": [
+CHECK-NEXT: "t2.bc"
+CHECK-NEXT: ]
+CHECK-NEXT: "summary_index": [
+CHECK-NEXT: "t2.2.[[#]].native.o.thinlto.bc"
+CHECK-NEXT: ]
+CHECK-NEXT: "primary_output": [
+CHECK-NEXT: "t2.2.[[#]].native.o"
+CHECK-NEXT: ]
+CHECK-NEXT: "imports": []
+CHECK-NEXT: "additional_inputs": []
+CHECK-NEXT: }
+CHECK-NEXT: ]
+
+# This check ensures that we have failed for the expected reason.
+CHECK: failed: DTLTO backend compilation: cannot open native object file:
+
+#--- t1.ll
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @t1() {
+entry:
+  ret void
+}
+
+#--- t2.ll
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @t2() {
+entry:
+  ret void
+}
diff --git a/llvm/test/ThinLTO/X86/dtlto/summary.test b/llvm/test/ThinLTO/X86/dtlto/summary.test
new file mode 100644
index 0000000000000..dab59ece12f8e
--- /dev/null
+++ b/llvm/test/ThinLTO/X86/dtlto/summary.test
@@ -0,0 +1,55 @@
+# Check that DTLTO creates identical summary index shard files as are created
+# for an equivalent ThinLTO link.
+
+ RUN: rm -rf %t && split-file %s %t && cd %t
+
+# Generate ThinLTO bitcode files.
+RUN: opt -thinlto-bc t1.ll -o t1.bc
+RUN: opt -thinlto-bc t2.ll -o t2.bc
+
+# Generate mock native object files.
+RUN: opt t1.ll -o t1.o
+RUN: opt t2.ll -o t2.o
+
+# Define a substitution to share the common arguments.
+DEFINE: %{command} =llvm-lto2 run t1.bc t2.bc -o t.o \
+DEFINE:     -r=t1.bc,t1,px \
+DEFINE:     -r=t2.bc,t2,px \
+DEFINE:     -r=t2.bc,t1 \
+DEFINE:     -thinlto-emit-indexes
+
+# Perform DTLTO.
+RUN: %{command} -dtlto \
+RUN:     -dtlto-remote-opt-tool=dummy \
+RUN:     -dtlto-distributor=%python \
+RUN:     -thinlto-distributor-arg=%llvm_src_root/utils/dtlto/mock.py \
+RUN:     -thinlto-distributor-arg=t1.o \
+RUN:     -thinlto-distributor-arg=t2.o
+
+# Perform ThinLTO.
+RUN: %{command}
+
+# Check for equivalence. We use a wildcard to account for the PID.
+RUN: cmp t1.1.*.native.o.thinlto.bc t1.bc.thinlto.bc
+RUN: cmp t2.2.*.native.o.thinlto.bc t2.bc.thinlto.bc
+
+#--- t1.ll
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @t1() {
+entry:
+  ret void
+}
+
+#--- t2.ll
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @t1(...)
+
+define void @t2() {
+entry:
+  call void (...) @t1()
+  ret void
+}
diff --git a/llvm/test/ThinLTO/X86/dtlto/triple.test b/llvm/test/ThinLTO/X86/dtlto/triple.test
new file mode 100644
index 0000000000000..f0642281535a9
--- /dev/null
+++ b/llvm/test/ThinLTO/X86/dtlto/triple.test
@@ -0,0 +1,44 @@
+# Test the DTLTO limitation that all triples must match.
+
+RUN: rm -rf %t && split-file %s %t && cd %t
+
+# Generate bitcode files with summary.
+RUN: opt -thinlto-bc t1.ll -o t1.bc
+RUN: opt -thinlto-bc t2.ll -o t2.bc
+
+# Generate native object files.
+RUN: opt t1.ll -o t1.o
+RUN: opt t2.ll -o t2.o
+
+# Perform DTLTO. mock.py does not do any compilation, instead it emits the
+# object files supplied using -thinlto-distributor-arg in job order.
+RUN: not llvm-lto2 run t1.bc t2.bc -o t.o -save-temps \
+RUN:     -dtlto \
+RUN:     -dtlto-remote-opt-tool=dummy \
+RUN:     -dtlto-distributor=%python \
+RUN:     -thinlto-distributor-arg=%llvm_src_root/utils/dtlto/mock.py \
+RUN:     -thinlto-distributor-arg=t1.o \
+RUN:     -thinlto-distributor-arg=t2.o \
+RUN:     -r=t1.bc,t1,px \
+RUN:     -r=t2.bc,t2,px 2>&1 | FileCheck %s
+
+# This check ensures that we have failed for the expected reason.
+CHECK: failed: DTLTO backend compilation: all triples must be consistent
+
+;--- t1.ll
+
+target triple = "x86_64-unknown-linux-gnu"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @t1() {
+  ret void
+}
+
+;--- t2.ll
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-unknown-gnu"
+
+define void @t2() {
+  ret void
+}
diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index aad7a088551b2..6722064d2a7b6 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -91,6 +91,7 @@ def get_asan_rtlib():
 config.substitutions.append(("%shlibext", config.llvm_shlib_ext))
 config.substitutions.append(("%pluginext", config.llvm_plugin_ext))
 config.substitutions.append(("%exeext", config.llvm_exe_ext))
+config.substitutions.append(("%llvm_src_root", config.llvm_src_root))
 
 
 lli_args = []
diff --git a/llvm/tools/llvm-lto2/llvm-lto2.cpp b/llvm/tools/llvm-lto2/llvm-lto2.cpp
index d4f022ef021a4..82f2db58acd11 100644
--- a/llvm/tools/llvm-lto2/llvm-lto2.cpp
+++ b/llvm/tools/llvm-lto2/llvm-lto2.cpp
@@ -97,6 +97,16 @@ static cl::opt<bool>
                                 "specified with -thinlto-emit-indexes or "
                                 "-thinlto-distributed-indexes"));
 
+static cl::opt<bool> DTLTO("dtlto", cl::desc("Perform DTLTO"));
+
+static cl::opt<std::string>
+    DTLTORemoteOptTool("dtlto-remote-opt-tool",
+                       cl::desc("Specify the remote opt tool for DTLTO"));
+
+static cl::opt<std::string>
+    DTLTODistributor("dtlto-distributor",
+                     cl::desc("Specify the distributor for DTLTO"));
+
 // Default to using all available threads in the system, but using only one
 // thread per core (no SMT).
 // Use -thinlto-threads=all to use hardware_concurrency() instead, which means
@@ -344,6 +354,10 @@ static int run(int argc, char **argv) {
   Conf.PTO.LoopVectorization = Conf.OptLevel > 1;
   Conf.PTO.SLPVectorization = Conf.OptLevel > 1;
 
+  if (ThinLTODistributedIndexes && DTLTO)
+    llvm::errs() << "-thinlto-distributed-indexes cannot be specfied together "
+                    "with -dtlto\n";
+
   ThinBackend Backend;
   if (ThinLTODistributedIndexes)
     Backend = createWriteIndexesThinBackend(llvm::hardware_concurrency(Threads),
@@ -353,7 +367,13 @@ static int run(int argc, char **argv) {
                                             ThinLTOEmitImports,
                                             /*LinkedObjectsFile=*/nullptr,
                                             /*OnWrite=*/{});
-  else
+  else if (DTLTO) {
+
+    Backend = createOutOfProcessThinBackend(
+        llvm::heavyweight_hardware_concurrency(Threads),
+        /*OnWrite=*/{}, ThinLTOEmitIndexes, ThinLTOEmitImports, OutputFilename,
+        DTLTORemoteOptTool, DTLTODistributor, SaveTemps);
+  } else
     Backend = createInProcessThinBackend(
         llvm::heavyweight_hardware_concurrency(Threads),
         /* OnWrite */ {}, ThinLTOEmitIndexes, ThinLTOEmitImports);
diff --git a/llvm/utils/dtlto/mock.py b/llvm/utils/dtlto/mock.py
new file mode 100644
index 0000000000000..423a7f7beb194
--- /dev/null
+++ b/llvm/utils/dtlto/mock.py
@@ -0,0 +1,20 @@
+import sys
+import json
+import shutil
+from pathlib import Path
+import validate
+
+if __name__ == "__main__":
+    json_arg = sys.argv[-1]
+    distributor_args = sys.argv[1:-1]
+
+    # Load the DTLTO information from the input JSON file.
+    data = json.loads(Path(json_arg).read_bytes())
+
+    # Iterate over the jobs and create the output
+    # files by copying over the supplied input files.
+    for job_index, job in enumerate(data["jobs"]):
+        shutil.copy(distributor_args[job_index], job["primary_output"][0])
+
+    # Check the format of the JSON.
+    validate.validate(data)
diff --git a/llvm/utils/dtlto/validate.py b/llvm/utils/dtlto/validate.py
new file mode 100644
index 0000000000000..eb14f2c746a8e
--- /dev/null
+++ b/llvm/utils/dtlto/validate.py
@@ -0,0 +1,79 @@
+import sys
+import json
+from pathlib import Path
+
+
+def take(jvalue, jpath):
+    parts = jpath.split(".")
+    for part in parts[:-1]:
+        jvalue = jvalue[part]
+    return jvalue.pop(parts[-1], KeyError)
+
+
+def validate(jdoc):
+    # Check the format of the JSON
+    assert type(take(jdoc, "common.linker_output")) is str
+
+    args = take(jdoc, "common.args")
+    assert type(args) is list
+    assert len(args) > 0
+
+    def validate_reference(a):
+        for j in jdoc["jobs"]:
+            for x in a[1:]:
+                if type(x) is int:
+                    if a[0] not in j or x >= len(j[a[0]]):
+                        return False
+        return True
+
+    for a in args:
+        assert type(a) is str or (
+            type(a) is list
+            and len(a) >= 2
+            and type(a[0]) is str
+            and all(type(x) in (str, int) for x in a[1:])
+            and any(type(x) is int for x in a[1:])
+            and validate_reference(a)
+        )
+
+    assert len(take(jdoc, "common")) == 0
+
+    jobs = take(jdoc, "jobs")
+    assert type(jobs) is list
+    for j in jobs:
+        assert type(j) is dict
+
+        # Mandatory job attributes.
+        for attr in ("primary_input", "primary_output", "summary_index"):
+            array = take(j, attr)
+            assert type(array) is list
+            assert len(array) == 1
+            assert type(array[0]) is str
+
+        # Optional job attributes.
+        for attr in ("additional_inputs", "additional_outputs", "imports"):
+            array = take(j, attr)
+            if array is KeyError:
+                continue
+            assert type(array) is list
+            assert all(type(a) is str for a in array)
+
+        assert len(j) == 0
+
+    assert len(jdoc) == 0
+
+
+if __name__ == "__main__":
+    json_arg = sys.argv[-1]
+    distributor_args = sys.argv[1:-1]
+
+    print(f"{distributor_args=}")
+
+    # Load the DTLTO information from the input JSON file.
+    jdoc = json.loads(Path(json_arg).read_bytes())
+
+    # Write the input JSON to stdout.
+    print(json.dumps(jdoc, indent=4))
+
+    # Check the format of the JSON
+    validate(jdoc)

>From 0490b3b78f854a6c64f9ab079adb7f9440e26f77 Mon Sep 17 00:00:00 2001
From: Ben Dunbobbin <Ben.Dunbobbin at sony.com>
Date: Wed, 19 Feb 2025 03:38:07 +0000
Subject: [PATCH 07/10] [DTLTO][LLVM] Translate some LTO configuration state
 into clang options.

Intentionally minimal for now. Additional state translation will be added
in future commits.
---
 cross-project-tests/CMakeLists.txt            |  13 +-
 cross-project-tests/dtlto/README.md           |   3 +
 .../dtlto/dtlto-translate-options.ll          | 144 ++++++++++++++++++
 cross-project-tests/dtlto/lit.local.cfg       |   2 +
 cross-project-tests/lit.cfg.py                |   5 +-
 llvm/lib/LTO/LTO.cpp                          |  60 +++++++-
 llvm/test/ThinLTO/X86/dtlto/json.test         |   3 +
 llvm/utils/dtlto/local.py                     |  25 +++
 8 files changed, 251 insertions(+), 4 deletions(-)
 create mode 100644 cross-project-tests/dtlto/README.md
 create mode 100644 cross-project-tests/dtlto/dtlto-translate-options.ll
 create mode 100644 cross-project-tests/dtlto/lit.local.cfg
 create mode 100644 llvm/utils/dtlto/local.py

diff --git a/cross-project-tests/CMakeLists.txt b/cross-project-tests/CMakeLists.txt
index 7f2fee48fda77..c58119447ce0e 100644
--- a/cross-project-tests/CMakeLists.txt
+++ b/cross-project-tests/CMakeLists.txt
@@ -19,11 +19,13 @@ set(CROSS_PROJECT_TEST_DEPS
   FileCheck
   check-gdb-llvm-support
   count
-  llvm-dwarfdump
   llvm-config
+  llvm-dwarfdump
+  llvm-lto2
   llvm-objdump
-  split-file
   not
+  opt
+  split-file
   )
 
 if ("clang" IN_LIST LLVM_ENABLE_PROJECTS)
@@ -94,6 +96,13 @@ add_lit_testsuite(check-cross-amdgpu "Running AMDGPU cross-project tests"
   DEPENDS clang
   )
 
+# DTLTO tests.
+add_lit_testsuite(check-cross-dtlto "Running DTLTO cross-project tests"
+  ${CMAKE_CURRENT_BINARY_DIR}/dtlto
+  EXCLUDE_FROM_CHECK_ALL
+  DEPENDS ${CROSS_PROJECT_TEST_DEPS}
+  )
+
 # Add check-cross-project-* targets.
 add_lit_testsuites(CROSS_PROJECT ${CMAKE_CURRENT_SOURCE_DIR}
   DEPENDS ${CROSS_PROJECT_TEST_DEPS}
diff --git a/cross-project-tests/dtlto/README.md b/cross-project-tests/dtlto/README.md
new file mode 100644
index 0000000000000..cfd9d3496ca42
--- /dev/null
+++ b/cross-project-tests/dtlto/README.md
@@ -0,0 +1,3 @@
+Tests for DTLTO (integrated distributed ThinLTO) functionality.
+
+These are integration tests as DTLTO invokes `clang` for code-generation.
\ No newline at end of file
diff --git a/cross-project-tests/dtlto/dtlto-translate-options.ll b/cross-project-tests/dtlto/dtlto-translate-options.ll
new file mode 100644
index 0000000000000..bbb6ccf33fe7c
--- /dev/null
+++ b/cross-project-tests/dtlto/dtlto-translate-options.ll
@@ -0,0 +1,144 @@
+;; Check that the expected Clang arguments are generated by DTLTO for the 
+;; backend compilations and are accepted by Clang.
+
+; RUN: rm -rf %t && split-file %s %t && cd %t
+
+;; Generate bitcode files with a summary index.
+; RUN: opt -thinlto-bc x86_64-unknown-linux-gnu.ll -o x86_64-unknown-linux-gnu.bc
+; RUN: opt -thinlto-bc x86_64-pc-windows-msvc.ll   -o x86_64-pc-windows-msvc.bc
+
+
+;; Check that any invalid arguments would cause a Clang error. This property is
+;; relied on by the actual testcases later in this test.
+; RUN: not %clang -x ir x86_64-unknown-linux-gnu.ll \
+; RUN:     -invalid-incorrect-not-an-option 2>&1 | FileCheck %s --check-prefix=SANITY1
+; SANITY1: unknown argument: '-invalid-incorrect-not-an-option'
+
+
+;; Define a substitution used to simplify the testcases.
+; DEFINE: %{distributor} = dummy
+; DEFINE: %{extra_flags} = dummy
+; DEFINE: %{triple} = dummy
+; DEFINE: %{command} = llvm-lto2 run \
+; DEFINE:   -thinlto-distributor-arg=%llvm_src_root/utils/dtlto/%{distributor} \
+; DEFINE:   -thinlto-remote-opt-tool-arg=-Wunused-command-line-argument \
+; DEFINE:   @%{triple}.rsp %{extra_flags}
+
+
+;; Write common arguments to a response files.
+
+; RUN: echo "x86_64-unknown-linux-gnu.bc -o x86_64-unknown-linux-gnu.o \
+; RUN:       -dtlto \
+; RUN:       -dtlto-remote-opt-tool=%clang \
+; RUN:       -thinlto-remote-opt-tool-arg=-Werror \
+; RUN:       -dtlto-distributor=%python \
+; RUN:       -r=x86_64-unknown-linux-gnu.bc,globalfunc1,plx" > x86_64-unknown-linux-gnu.rsp
+
+; RUN: echo "x86_64-pc-windows-msvc.bc -o x86_64-pc-windows-msvc.o \
+; RUN:       -dtlto \
+; RUN:       -dtlto-remote-opt-tool=%clang \
+; RUN:       -thinlto-remote-opt-tool-arg=-Werror \
+; RUN:       -thinlto-remote-opt-tool-arg=-Wno-override-module \
+; RUN:       -dtlto-distributor=%python \
+; RUN:       -r=x86_64-pc-windows-msvc.bc,globalfunc2,plx" > x86_64-pc-windows-msvc.rsp
+
+
+;; Check that boolean configuration states are translated as expected and Clang
+;; accepts them.
+
+; RUN: echo " \
+; RUN:   --addrsig=1 \
+; RUN:   -function-sections=1 \
+; RUN:   -data-sections=1" > on.rsp
+
+; RUN: echo " \
+; RUN:   --addrsig=0 \
+; RUN:   -function-sections=0 \
+; RUN:   -data-sections=0" > off.rsp
+
+;; Perform DTLTO with configuration state set.
+; REDEFINE: %{extra_flags} = @on.rsp
+; REDEFINE: %{distributor} = local.py
+; REDEFINE: %{triple} = x86_64-unknown-linux-gnu
+; RUN: %{command}
+; REDEFINE: %{distributor} = validate.py
+; RUN: not %{command} 2>&1 | FileCheck %s --check-prefix=ON \
+; RUN:     --implicit-check-not=-no-pgo-warn-mismatch
+; ON-DAG: "-faddrsig"
+; ON-DAG: "-ffunction-sections"
+; ON-DAG: "-fdata-sections"
+
+;; Perform DTLTO with configuration state unset.
+; REDEFINE: %{extra_flags} = @off.rsp
+; REDEFINE: %{distributor} = local.py
+; RUN: %{command}
+; REDEFINE: %{distributor} = validate.py
+; RUN: not %{command} 2>&1 | FileCheck %s --check-prefix=OFF
+; OFF-NOT: --implicit-check-not=--faddrsig
+; OFF-NOT: --implicit-check-not=--ffunction-sections
+; OFF-NOT: --implicit-check-not=--fdata-sections
+; OFF-NOT: --implicit-check-not=-no-pgo-warn-mismatch
+
+
+;; Check optimisation level.
+
+; RUN: llvm-lto2 run \
+; RUN:   -thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \
+; RUN:   @x86_64-unknown-linux-gnu.rsp \
+; RUN:   -O3
+
+; RUN: not llvm-lto2 run \
+; RUN:   -thinlto-distributor-arg=%llvm_src_root/utils/dtlto/validate.py \
+; RUN:   @x86_64-unknown-linux-gnu.rsp \
+; RUN:   -O3 2>&1 | FileCheck %s --check-prefix=OPTLEVEL
+; OPTLEVEL-DAG: "-O3"
+
+
+;; Check relocation model.
+
+; REDEFINE: %{extra_flags} = -relocation-model=pic
+; REDEFINE: %{distributor} = local.py
+; RUN: %{command}
+; REDEFINE: %{distributor} = validate.py
+; RUN: not %{command} 2>&1 | FileCheck %s --check-prefix=PIC
+; PIC: -fpic
+
+
+; REDEFINE: %{extra_flags} = -relocation-model=pic
+; REDEFINE: %{distributor} = local.py
+; REDEFINE: %{triple} = x86_64-pc-windows-msvc
+; RUN: %{command}
+; REDEFINE: %{distributor} = validate.py
+; RUN: not %{command} 2>&1 | FileCheck %s --check-prefix=NOPIC
+; REDEFINE: %{triple} = x86_64-unknown-linux-gnu
+; NOPIC-NOT: -fpic
+
+;; Check specifying a sample profile.
+; REDEFINE: %{extra_flags} = --lto-sample-profile-file="missing.profdata"
+; REDEFINE: %{distributor} = local.py
+; RUN: not %{command} 2>&1 | FileCheck %s --check-prefix=SAMPLE_PROFILE_ERR
+; SAMPLE_PROFILE_ERR: no such file or directory: 'missing.profdata'
+; REDEFINE: %{distributor} = validate.py
+; RUN: not %{command} 2>&1 | FileCheck %s --check-prefix=SAMPLE_PROFILE
+; SAMPLE_PROFILE-DAG: "-fprofile-sample-use=missing.profdata"
+
+
+;--- x86_64-unknown-linux-gnu.ll
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @globalfunc1() {
+entry:
+  ret void
+}
+
+;--- x86_64-pc-windows-msvc.ll
+
+target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+define void @globalfunc2() {
+entry:
+  ret void
+}
diff --git a/cross-project-tests/dtlto/lit.local.cfg b/cross-project-tests/dtlto/lit.local.cfg
new file mode 100644
index 0000000000000..9a5f7aff60249
--- /dev/null
+++ b/cross-project-tests/dtlto/lit.local.cfg
@@ -0,0 +1,2 @@
+if any(feature not in config.available_features for feature in ["clang", "llvm-lto2", "opt"]):
+    config.unsupported = True
diff --git a/cross-project-tests/lit.cfg.py b/cross-project-tests/lit.cfg.py
index 66fdd63632885..3d27954c7545e 100644
--- a/cross-project-tests/lit.cfg.py
+++ b/cross-project-tests/lit.cfg.py
@@ -19,7 +19,7 @@
 config.test_format = lit.formats.ShTest(not llvm_config.use_lit_shell)
 
 # suffixes: A list of file extensions to treat as test files.
-config.suffixes = [".c", ".cl", ".cpp", ".m"]
+config.suffixes = [".c", ".cl", ".cpp", ".m", ".ll"]
 
 # excludes: A list of directories to exclude from the testsuite. The 'Inputs'
 # subdirectories contain auxiliary inputs for various tests in their parent
@@ -96,6 +96,9 @@ def get_required_attr(config, attr_name):
 if lldb_path is not None:
     config.available_features.add("lldb")
 
+for tool in ["llvm-lto2", "opt"]:
+    if llvm_config.use_llvm_tool(tool):
+        config.available_features.add(tool)
 
 def configure_dexter_substitutions():
     """Configure substitutions for host platform and return list of dependencies"""
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index 3a9d57f69f718..fa0bc6f69f006 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -2291,6 +2291,62 @@ class OutOfProcessThinBackend : public CGThinBackend {
     return Error::success();
   }
 
+  // Derive a set of Clang options that will be shared/common for all DTLTO
+  // backend compilations. We are intentionally minimal here as these options
+  // must remain synchronized with the behavior of Clang. DTLTO does not support
+  // all the features available with in-process LTO. More features are expected
+  // to be added over time. Users can specify Clang options directly if a
+  // feature is not supported. Note that explicitly specified options that imply
+  // additional input or output file dependencies must be communicated to the
+  // distribution system, potentially by setting extra options on the
+  // distributor program.
+  // TODO: If this strategy of deriving options proves insufficient, alternative
+  // approaches should be considered, such as:
+  //   - A serialization/deserialization format for LTO configuration.
+  //   - Modifying LLD to be the tool that performs the backend compilations.
+  void buildCommonRemoteOptToolOptions() {
+    const lto::Config &C = Conf;
+    auto &Ops = CodegenOptions;
+    llvm::Triple TT{Jobs.front().Triple};
+
+    Ops.push_back(Saver.save("-O" + Twine(C.OptLevel)));
+
+    if (C.Options.EmitAddrsig)
+      Ops.push_back("-faddrsig");
+    if (C.Options.FunctionSections)
+      Ops.push_back("-ffunction-sections");
+    if (C.Options.DataSections)
+      Ops.push_back("-fdata-sections");
+
+    if (C.RelocModel == Reloc::PIC_)
+      // Clang doesn't have -fpic for all triples.
+      if (!TT.isOSBinFormatCOFF())
+        Ops.push_back("-fpic");
+
+    // Turn on/off warnings about profile cfg mismatch (default on)
+    // --lto-pgo-warn-mismatch.
+    if (!C.PGOWarnMismatch) {
+      Ops.push_back("-mllvm");
+      Ops.push_back("-no-pgo-warn-mismatch");
+    }
+
+    // Enable sample-based profile guided optimizations.
+    // Sample profile file path --lto-sample-profile=<value>.
+    if (!C.SampleProfile.empty()) {
+      Ops.push_back(
+          Saver.save("-fprofile-sample-use=" + Twine(C.SampleProfile)));
+      AdditionalInputs.insert(C.SampleProfile);
+    }
+
+    // We don't know which of options will be used by Clang.
+    Ops.push_back("-Wno-unused-command-line-argument");
+
+    // Forward any supplied options.
+    if (!ThinLTORemoteOptToolArgs.empty())
+      for (auto &a : ThinLTORemoteOptToolArgs)
+        Ops.push_back(a);
+  }
+
   // Generates a JSON file describing the backend compilations, for the
   // distributor.
   bool emitDistributorJson(StringRef DistributorJson) {
@@ -2327,7 +2383,7 @@ class OutOfProcessThinBackend : public CGThinBackend {
           JOS.value(Array{"summary_index", "-fthinlto-index=", 0});
           JOS.value(Saver.save("--target=" + Twine(Jobs.front().Triple)));
 
-          for (const auto &A : ThinLTORemoteOptToolArgs)
+          for (const auto &A : CodegenOptions)
             JOS.value(A);
         });
       });
@@ -2391,6 +2447,8 @@ class OutOfProcessThinBackend : public CGThinBackend {
       return make_error<StringError>(BCError + "all triples must be consistent",
                                      inconvertibleErrorCode());
 
+    buildCommonRemoteOptToolOptions();
+
     SString JsonFile = sys::path::parent_path(LinkerOutputFile);
     sys::path::append(JsonFile, sys::path::stem(LinkerOutputFile) + "." + UID +
                                     ".dist-file.json");
diff --git a/llvm/test/ThinLTO/X86/dtlto/json.test b/llvm/test/ThinLTO/X86/dtlto/json.test
index f6a78957cda33..a90d9ec041047 100644
--- a/llvm/test/ThinLTO/X86/dtlto/json.test
+++ b/llvm/test/ThinLTO/X86/dtlto/json.test
@@ -43,6 +43,9 @@ CHECK-NEXT: "-fthinlto-index="
 CHECK-NEXT: 0
 CHECK-NEXT: ]
 CHECK:      "--target=x86_64-unknown-linux-gnu"
+CHECK:      "-O2",
+CHECK:      "-fpic"
+CHECK:      "-Wno-unused-command-line-argument"
 CHECK:      "--rota1=10"
 CHECK:      "--rota2=20"
 
diff --git a/llvm/utils/dtlto/local.py b/llvm/utils/dtlto/local.py
new file mode 100644
index 0000000000000..7be109061310c
--- /dev/null
+++ b/llvm/utils/dtlto/local.py
@@ -0,0 +1,25 @@
+import subprocess
+import sys
+import json
+from pathlib import Path
+
+if __name__ == "__main__":
+    # Load the DTLTO information from the input JSON file.
+    data = json.loads(Path(sys.argv[-1]).read_bytes())
+
+    # Iterate over the jobs and execute the codegen tool.
+    for job in data["jobs"]:
+        jobargs = []
+        for arg in data["common"]["args"]:
+            if isinstance(arg, list):
+                # arg is a "template", into which an external filename is to be
+                # inserted. The first element of arg names an array of strings
+                # in the job. The remaining elements of arg are either indices
+                # into the array or literal strings.
+                files, rest = job[arg[0]], arg[1:]
+                jobargs.append(
+                    "".join(files[x] if isinstance(x, int) else x for x in rest)
+                )
+            else:
+                jobargs.append(arg)
+        subprocess.check_call(jobargs)

>From 2c9710fa9e8eaa52f70bd8987f0129cd522f9437 Mon Sep 17 00:00:00 2001
From: Ben Dunbobbin <Ben.Dunbobbin at sony.com>
Date: Wed, 19 Feb 2025 04:11:17 +0000
Subject: [PATCH 08/10] [DTLTO][LLVM] Allow LTO to take an AddBuffer function
 and use in DTLTO

Replace the use of AddStream in the DTLTO ThinLTO backend to add files
to the link with AddBuffer.

Unlike the InProcess ThinLTO backend, DTLTO runs the backend compilation
jobs by invoking an external process (currently clang). This writes the
output object file to disk. Therefore, DTLTO requires a performant way
of adding an existing file to the link. Note that the AddBuffer
mechanism is also used for adding a file to the link if there is a cache
hit.
---
 llvm/include/llvm/LTO/LTO.h         | 26 ++++++++----
 llvm/include/llvm/Support/Caching.h |  3 +-
 llvm/lib/LTO/LTO.cpp                | 65 ++++++++++++++---------------
 llvm/tools/llvm-lto2/llvm-lto2.cpp  |  2 +-
 4 files changed, 51 insertions(+), 45 deletions(-)

diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h
index c776b57c1af82..764f60dfd8458 100644
--- a/llvm/include/llvm/LTO/LTO.h
+++ b/llvm/include/llvm/LTO/LTO.h
@@ -264,7 +264,7 @@ class ThinBackendProc {
 using ThinBackendFunction = std::function<std::unique_ptr<ThinBackendProc>(
     const Config &C, ModuleSummaryIndex &CombinedIndex,
     const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
-    AddStreamFn AddStream, FileCache Cache)>;
+    AddStreamFn AddStream, AddBufferFn AddBuffer, FileCache Cache)>;
 
 /// This type defines the behavior following the thin-link phase during ThinLTO.
 /// It encapsulates a backend function and a strategy for thread pool
@@ -279,10 +279,10 @@ struct ThinBackend {
   std::unique_ptr<ThinBackendProc> operator()(
       const Config &Conf, ModuleSummaryIndex &CombinedIndex,
       const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
-      AddStreamFn AddStream, FileCache Cache) {
+      AddStreamFn AddStream, AddBufferFn AddBuffer, FileCache Cache) {
     assert(isValid() && "Invalid backend function");
     return Func(Conf, CombinedIndex, ModuleToDefinedGVSummaries,
-                std::move(AddStream), std::move(Cache));
+                std::move(AddStream), std::move(AddBuffer), std::move(Cache));
   }
   ThreadPoolStrategy getParallelism() const { return Parallelism; }
   bool isValid() const { return static_cast<bool>(Func); }
@@ -308,7 +308,7 @@ ThinBackend createInProcessThinBackend(ThreadPoolStrategy Parallelism,
 /// This ThinBackend generates the index shards and then runs the individual
 /// backend jobs via an external process. It takes the same parameters as the
 /// InProcessThinBackend, however, these parameters only control the behavior
-/// when generating the index files for the modules. Addtionally:
+/// when generating the index files for the modules. Additionally:
 /// LinkerOutputFile is a string that should identify this LTO invocation in
 /// the context of a wider build. It's used for naming to aid the user in
 /// identifying activity related to a specific LTO invocation.
@@ -402,15 +402,22 @@ class LTO {
   /// full description of tasks see LTOBackend.h.
   unsigned getMaxTasks() const;
 
-  /// Runs the LTO pipeline. This function calls the supplied AddStream
-  /// function to add native object files to the link.
+  /// Runs the LTO pipeline. This function calls the supplied AddStream or
+  /// AddBuffer function to add native object files to the link depending on
+  /// whether the files are streamed into memory or written to disk by the
+  /// backend.
   ///
   /// The Cache parameter is optional. If supplied, it will be used to cache
   /// native object files and add them to the link.
   ///
-  /// The client will receive at most one callback (via either AddStream or
+  /// The AddBuffer parameter is only required for DTLTO, currently. It is
+  /// optional to minimise the impact on current LTO users (DTLTO is not used
+  /// currently).
+  ///
+  /// The client will receive at most one callback (via AddStream, AddBuffer or
   /// Cache) for each task identifier.
-  Error run(AddStreamFn AddStream, FileCache Cache = {});
+  Error run(AddStreamFn AddStream, FileCache Cache = {},
+            AddBufferFn AddBuffer = nullptr);
 
   /// Static method that returns a list of libcall symbols that can be generated
   /// by LTO but might not be visible from bitcode symbol table.
@@ -555,7 +562,8 @@ class LTO {
                    StringRef Triple);
 
   Error runRegularLTO(AddStreamFn AddStream);
-  Error runThinLTO(AddStreamFn AddStream, FileCache Cache,
+  Error runThinLTO(AddStreamFn AddStream, AddBufferFn AddBuffer,
+                   FileCache Cache,
                    const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols);
 
   Error checkPartiallySplit();
diff --git a/llvm/include/llvm/Support/Caching.h b/llvm/include/llvm/Support/Caching.h
index cf45145619d95..8c3ea4f205d4c 100644
--- a/llvm/include/llvm/Support/Caching.h
+++ b/llvm/include/llvm/Support/Caching.h
@@ -84,7 +84,8 @@ struct FileCache {
   std::string CacheDirectoryPath;
 };
 
-/// This type defines the callback to add a pre-existing file (e.g. in a cache).
+/// This type defines the callback to add a pre-existing file (e.g. in a cache
+/// or created by a backend compilation run as a separate process).
 ///
 /// Buffer callbacks must be thread safe.
 using AddBufferFn = std::function<void(unsigned Task, const Twine &ModuleName,
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index fa0bc6f69f006..6ad31d68578cb 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -1172,7 +1172,7 @@ Error LTO::checkPartiallySplit() {
   return Error::success();
 }
 
-Error LTO::run(AddStreamFn AddStream, FileCache Cache) {
+Error LTO::run(AddStreamFn AddStream, FileCache Cache, AddBufferFn AddBuffer) {
   // Compute "dead" symbols, we don't want to import/export these!
   DenseSet<GlobalValue::GUID> GUIDPreservedSymbols;
   DenseMap<GlobalValue::GUID, PrevailingType> GUIDPrevailingResolutions;
@@ -1222,7 +1222,7 @@ Error LTO::run(AddStreamFn AddStream, FileCache Cache) {
   if (!Result)
     // This will reset the GlobalResolutions optional once done with it to
     // reduce peak memory before importing.
-    Result = runThinLTO(AddStream, Cache, GUIDPreservedSymbols);
+    Result = runThinLTO(AddStream, AddBuffer, Cache, GUIDPreservedSymbols);
 
   if (StatsFile)
     PrintStatisticsJSON(StatsFile->os());
@@ -1448,7 +1448,6 @@ Error ThinBackendProc::emitFiles(
 namespace {
 class CGThinBackend : public ThinBackendProc {
 protected:
-  AddStreamFn AddStream;
   DenseSet<GlobalValue::GUID> CfiFunctionDefs;
   DenseSet<GlobalValue::GUID> CfiFunctionDecls;
   bool ShouldEmitIndexFiles;
@@ -1457,12 +1456,10 @@ class CGThinBackend : public ThinBackendProc {
   CGThinBackend(
       const Config &Conf, ModuleSummaryIndex &CombinedIndex,
       const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
-      AddStreamFn AddStream, lto::IndexWriteCallback OnWrite,
-      bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles,
-      ThreadPoolStrategy ThinLTOParallelism)
+      lto::IndexWriteCallback OnWrite, bool ShouldEmitIndexFiles,
+      bool ShouldEmitImportsFiles, ThreadPoolStrategy ThinLTOParallelism)
       : ThinBackendProc(Conf, CombinedIndex, ModuleToDefinedGVSummaries,
                         OnWrite, ShouldEmitImportsFiles, ThinLTOParallelism),
-        AddStream(std::move(AddStream)),
         ShouldEmitIndexFiles(ShouldEmitIndexFiles) {
     for (auto &Name : CombinedIndex.cfiFunctionDefs())
       CfiFunctionDefs.insert(
@@ -1475,6 +1472,7 @@ class CGThinBackend : public ThinBackendProc {
 
 class InProcessThinBackend : public CGThinBackend {
 protected:
+  AddStreamFn AddStream;
   FileCache Cache;
 
 public:
@@ -1484,10 +1482,10 @@ class InProcessThinBackend : public CGThinBackend {
       const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
       AddStreamFn AddStream, FileCache Cache, lto::IndexWriteCallback OnWrite,
       bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles)
-      : CGThinBackend(Conf, CombinedIndex, ModuleToDefinedGVSummaries,
-                      AddStream, OnWrite, ShouldEmitIndexFiles,
-                      ShouldEmitImportsFiles, ThinLTOParallelism),
-        Cache(std::move(Cache)) {}
+      : CGThinBackend(Conf, CombinedIndex, ModuleToDefinedGVSummaries, OnWrite,
+                      ShouldEmitIndexFiles, ShouldEmitImportsFiles,
+                      ThinLTOParallelism),
+        AddStream(std::move(AddStream)), Cache(std::move(Cache)) {}
 
   virtual Error runThinLTOBackendThread(
       AddStreamFn AddStream, FileCache Cache, unsigned Task, BitcodeModule BM,
@@ -1755,7 +1753,7 @@ ThinBackend lto::createInProcessThinBackend(ThreadPoolStrategy Parallelism,
   auto Func =
       [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex,
           const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
-          AddStreamFn AddStream, FileCache Cache) {
+          AddStreamFn AddStream, AddBufferFn /*AddBuffer*/, FileCache Cache) {
         return std::make_unique<InProcessThinBackend>(
             Conf, CombinedIndex, Parallelism, ModuleToDefinedGVSummaries,
             AddStream, Cache, OnWrite, ShouldEmitIndexFiles,
@@ -1877,7 +1875,7 @@ ThinBackend lto::createWriteIndexesThinBackend(
   auto Func =
       [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex,
           const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
-          AddStreamFn AddStream, FileCache Cache) {
+          AddStreamFn AddStream, AddBufferFn AddBuffer, FileCache Cache) {
         return std::make_unique<WriteIndexesThinBackend>(
             Conf, CombinedIndex, Parallelism, ModuleToDefinedGVSummaries,
             OldPrefix, NewPrefix, NativeObjectPrefix, ShouldEmitImportsFiles,
@@ -1886,7 +1884,8 @@ ThinBackend lto::createWriteIndexesThinBackend(
   return ThinBackend(Func, Parallelism);
 }
 
-Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
+Error LTO::runThinLTO(AddStreamFn AddStream, AddBufferFn AddBuffer,
+                      FileCache Cache,
                       const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) {
   LLVM_DEBUG(dbgs() << "Running ThinLTO\n");
   ThinLTO.CombinedIndex.releaseTemporaryMemory();
@@ -2094,7 +2093,7 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
   if (!CodeGenDataThinLTOTwoRounds) {
     std::unique_ptr<ThinBackendProc> BackendProc =
         ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries,
-                        AddStream, Cache);
+                        AddStream, AddBuffer, Cache);
     return RunBackends(BackendProc.get());
   }
 
@@ -2201,6 +2200,8 @@ namespace {
 class OutOfProcessThinBackend : public CGThinBackend {
   using SString = SmallString<128>;
 
+  AddBufferFn AddBuffer;
+
   BumpPtrAllocator Alloc;
   StringSaver Saver{Alloc};
 
@@ -2232,15 +2233,16 @@ class OutOfProcessThinBackend : public CGThinBackend {
       const Config &Conf, ModuleSummaryIndex &CombinedIndex,
       ThreadPoolStrategy ThinLTOParallelism,
       const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
-      AddStreamFn AddStream, lto::IndexWriteCallback OnWrite,
-      bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles,
-      StringRef LinkerOutputFile, StringRef RemoteOptTool,
-      StringRef Distributor, bool SaveTemps)
-      : CGThinBackend(Conf, CombinedIndex, ModuleToDefinedGVSummaries,
-                      AddStream, OnWrite, ShouldEmitIndexFiles,
-                      ShouldEmitImportsFiles, ThinLTOParallelism),
-        LinkerOutputFile(LinkerOutputFile), RemoteOptTool(RemoteOptTool),
-        DistributorPath(Distributor), SaveTemps(SaveTemps) {}
+      AddStreamFn AddStream, AddBufferFn AddBuffer,
+      lto::IndexWriteCallback OnWrite, bool ShouldEmitIndexFiles,
+      bool ShouldEmitImportsFiles, StringRef LinkerOutputFile,
+      StringRef RemoteOptTool, StringRef Distributor, bool SaveTemps)
+      : CGThinBackend(Conf, CombinedIndex, ModuleToDefinedGVSummaries, OnWrite,
+                      ShouldEmitIndexFiles, ShouldEmitImportsFiles,
+                      ThinLTOParallelism),
+        AddBuffer(std::move(AddBuffer)), LinkerOutputFile(LinkerOutputFile),
+        RemoteOptTool(RemoteOptTool), DistributorPath(Distributor),
+        SaveTemps(SaveTemps) {}
 
   virtual void setup(unsigned MaxTasks) override {
     UID = itostr(sys::Process::getProcessId());
@@ -2484,13 +2486,7 @@ class OutOfProcessThinBackend : public CGThinBackend {
             BCError + "cannot open native object file: " +
                 Job.NativeObjectPath + ": " + ec.message(),
             inconvertibleErrorCode());
-      std::unique_ptr<llvm::MemoryBuffer> umb = std::move(objFileMbOrErr.get());
-      Expected<std::unique_ptr<CachedFileStream>> StreamOrErr =
-          AddStream(Job.Task, Job.ModuleID);
-      if (Error Err = StreamOrErr.takeError())
-        report_fatal_error(std::move(Err));
-      std::unique_ptr<CachedFileStream> Stream = std::move(*StreamOrErr);
-      *Stream->OS << umb->getMemBufferRef().getBuffer();
+      AddBuffer(Job.Task, Job.ModuleID, std::move(objFileMbOrErr.get()));
     }
 
     return Error::success();
@@ -2506,11 +2502,12 @@ ThinBackend lto::createOutOfProcessThinBackend(
   auto Func =
       [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex,
           const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
-          AddStreamFn AddStream, FileCache /*Cache*/) {
+          AddStreamFn AddStream, AddBufferFn AddBuffer, FileCache /*Cache*/) {
         return std::make_unique<OutOfProcessThinBackend>(
             Conf, CombinedIndex, Parallelism, ModuleToDefinedGVSummaries,
-            AddStream, OnWrite, ShouldEmitIndexFiles, ShouldEmitImportsFiles,
-            LinkerOutputFile, RemoteOptTool, Distributor, SaveTemps);
+            AddStream, AddBuffer, OnWrite, ShouldEmitIndexFiles,
+            ShouldEmitImportsFiles, LinkerOutputFile, RemoteOptTool,
+            Distributor, SaveTemps);
       };
   return ThinBackend(Func, Parallelism);
 }
diff --git a/llvm/tools/llvm-lto2/llvm-lto2.cpp b/llvm/tools/llvm-lto2/llvm-lto2.cpp
index 82f2db58acd11..df6de93f6f236 100644
--- a/llvm/tools/llvm-lto2/llvm-lto2.cpp
+++ b/llvm/tools/llvm-lto2/llvm-lto2.cpp
@@ -476,7 +476,7 @@ static int run(int argc, char **argv) {
     Cache = check(localCache("ThinLTO", "Thin", CacheDir, AddBuffer),
                   "failed to create cache");
 
-  check(Lto.run(AddStream, Cache), "LTO::run failed");
+  check(Lto.run(AddStream, Cache, AddBuffer), "LTO::run failed");
   return static_cast<int>(HasErrors);
 }
 

>From 0c46c0c9fdf34df345524069c504351d4db25b47 Mon Sep 17 00:00:00 2001
From: Ben Dunbobbin <Ben.Dunbobbin at sony.com>
Date: Wed, 19 Feb 2025 04:33:25 +0000
Subject: [PATCH 09/10] [DTLTO][LLVM][Doc] Add DTLTO documentation

---
 llvm/docs/DTLTO.rst      | 228 +++++++++++++++++++++++++++++++++++++++
 llvm/docs/UserGuides.rst |   6 ++
 2 files changed, 234 insertions(+)
 create mode 100644 llvm/docs/DTLTO.rst

diff --git a/llvm/docs/DTLTO.rst b/llvm/docs/DTLTO.rst
new file mode 100644
index 0000000000000..3fed25a922f5a
--- /dev/null
+++ b/llvm/docs/DTLTO.rst
@@ -0,0 +1,228 @@
+===================
+DTLTO
+===================
+.. contents::
+   :local:
+   :depth: 2
+
+.. toctree::
+   :maxdepth: 1
+
+Distributed ThinLTO (DTLTO)
+===========================
+
+Distributed ThinLTO (DTLTO) facilitates the distribution of backend ThinLTO
+compilations via external distribution systems such as Incredibuild.
+
+The existing method of distributing ThinLTO compilations via separate thin-link,
+backend compilation, and link steps often requires significant changes to the
+user's build process to adopt, as it requires using a build system which can
+handle the dynamic dependencies specified by the index files, such as Bazel.
+
+DTLTO eliminates this need by managing distribution internally within the LLD
+linker during the traditional link step. This allows DTLTO to be used with any
+build process that supports in-process ThinLTO.
+
+Limitations
+-----------
+
+The current implementation of DTLTO has the following limitations:
+
+- The ThinLTO cache is not supported.
+- Only ELF and COFF platforms are supported.
+- Archives with bitcode members are not supported.
+- Only a very limited set of LTO configurations are currently supported, e.g.,
+  support for basic block sections is not currently available.
+
+Overview of Operation
+---------------------
+
+For each ThinLTO backend compilation job, LLD:
+
+1. Generates the required summary index shard.
+2. Records a list of input and output files.
+3. Constructs a Clang command line to perform the ThinLTO backend compilation.
+
+This information is supplied, via a JSON file, to a distributor program that
+executes the backend compilations using a distribution system. Upon completion,
+LLD integrates the compiled native object files into the link process.
+
+The design keeps the details of distribution systems out of the LLVM source
+code.
+
+Distributors
+------------
+
+Distributors are programs responsible for:
+
+1. Consuming the JSON backend compilations job description file.
+2. Translating job descriptions into requests for the distribution system.
+3. Blocking execution until all backend compilations are complete.
+
+Distributors must return a non-zero exit code on failure. They can be
+implemented as binaries or in scripting languages, such as Python. An example
+script demonstrating basic local execution is available with the LLVM source
+code.
+
+How Distributors Are Invoked
+----------------------------
+
+Clang and LLD provide options to specify a distributor program for managing
+backend compilations. Distributor options and backend compilation options, can
+also be specified. Such options are transparently forwarded.
+
+The backend compilations are currently performed by invoking Clang. For further
+details, refer to:
+
+- Clang documentation: https://clang.llvm.org/docs/ThinLTO.html
+- LLD documentation: https://lld.llvm.org/DTLTO.html
+
+When invoked with a distributor, LLD generates a JSON file describing the
+backend compilation jobs and executes the distributor passing it this file. The
+JSON file provides the following information to the distributor:
+
+- The **command line** to execute the backend compilations.
+   - DTLTO constructs a Clang command line by translating some of the LTO
+     configuration state into Clang options and forwarding options specified
+     by the user.
+
+- **Link output path**.
+   - A string identifying the output to which this LTO invocation will 
+     contribute. Distributors can use this to label build jobs for informational
+     purposes.
+
+- **Linker's version string**.
+   - Distributors can use this to determine if the invoked remote optimisation
+     tool is compatible.
+
+- The list of **imports** required for each job.
+   - The per-job list of bitcode files from which importing will occur. This is
+     the same information that is emitted into import files for ThinLTO.
+
+- The **input files** required for each job.
+   - The per-job set of files required for backend compilation, such as bitcode
+     files, summary index files, and profile data.
+
+- The **output files** generated by each job.
+   - The per-job files generated by the backend compilations, such as compiled
+     object files and toolchain metrics.
+
+Temporary Files
+---------------
+
+During its operation, DTLTO generates temporary files. Temporary files are
+created in the same directory as the linker's output file and their filenames
+include the stem of the bitcode module, or the output file that the LTO 
+invocation is contributing to, to aid the user in identifying them:
+
+- **JSON Job Description File**:
+    - Format:  `dtlto.<UID>.dist-file.json`
+    - Example: `dtlto.77380.dist-file.json` (for output file `dtlto.elf`).
+
+- **Object Files From Backend Compilations**:
+    - Format:  `<Module ID stem>.<Task>.<UID>.native.o`
+    - Example: `my.1.77380.native.o` (for bitcode module `my.o`).
+
+- **Summary Index Shard Files**:
+    - Format:  `<Module ID stem>.<Task>.<UID>.native.o.thinlto.bc`
+    - Example: `my.1.77380.native.o.thinlto.bc` (for bitcode module `my.o`).
+
+Temporary files are removed, by default, after the backend compilations complete.
+
+JSON Schema
+-----------
+
+Below is an example of a JSON job file for backend compilation of the module
+`dtlto.o`:
+
+.. code-block:: json
+
+    {
+        "common": {
+            "linker_output": "dtlto.elf",
+            "linker_version": "LLD 20.0.0",
+            "args": [
+                "/usr/local/clang",
+                "-O3", "-fprofile-sample-use=my.profdata",
+                "-o", ["primary_output", 0],
+                "-c", "-x", "ir", ["primary_input", 0],
+                ["summary_index", "-fthinlto-index=", 0],
+                "--target=x86_64-sie-ps5"
+            ]
+        },
+        "jobs": [
+            {
+                "primary_input": ["dtlto.o"],
+                "summary_index": ["dtlto.1.51232.native.o.thinlto.bc"],
+                "primary_output": ["dtlto.1.51232.native.o"],
+                "imports": [],
+                "additional_inputs": ["my.profdata"]
+            }
+        ]
+    }
+
+Key Features of the Schema
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+- **Input/Output Paths**: Paths are stored in per-file-type array fields. This
+  allows files to be adjusted, if required, to meet the constraints of the
+  underlying distribution system. For example, a system may only be able to read
+  and write remote files to `C:\\sandbox`. The remote paths used can be adjusted
+  by the distributor for such constraints. Once outputs are back on the local
+  system, the distributor can rename them as required.
+
+
+- **Command-Line Template**: Command-line options are stored in a common
+  template to avoid duplication for each job. The template consists of an array
+  of strings and arrays. The arrays are placeholders which reference per-job
+  paths. This allows the remote optimisation tool to be changed without updating
+  the distributors.
+
+Command-Line Expansion Example
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To create the backend compilation commands, the command-line template is
+expanded for each job. Placeholders are expanded in the following way: The first
+array element specifies the name of the array field to look in. The remaining
+elements are converted to strings and concatenated. Integers are converted by
+indexing into the specified array.
+
+The example above generates the following backend compilation command for
+`main.o`:
+
+.. code-block:: console
+
+    /usr/local/clang -O3 -fprofile-sample-use=my.profdata \
+        -o dtlto.1.51232.native.o -c -x ir dtlto.o \
+        -fthinlto-index=dtlto.1.51232.native.o.thinlto.bc --target=x86_64-sie-ps5
+
+This expansion scheme allows the remote optimization tool to be changed without
+updating the distributors. For example, if the "args" field in the above example
+was replaced with:
+
+.. code-block:: json
+
+    "args": [
+        "custom-codgen-tool",
+        "-opt-level=2",
+        "-profile-instrument-use-path=my.profdata",
+        "-output", ["primary_output", 0],
+        "-input", ["primary_input", 0],
+        "-thinlto-index", ["summary_index", 0],
+        "-triple", "x86_64-sie-ps5"
+    ]
+
+Then distributors can expand the command line without needing to be updated:
+
+.. code-block:: console
+
+    custom-codgen-tool -opt-level=2 -profile-instrument-use-path=my.profdata \
+        -output dtlto.1.51232.native.o -input dtlto.o \
+        -thinlto-index dtlto.1.51232.native.o.thinlto.bc -triple x86_64-sie-ps5
+
+Constraints
+-----------
+
+- Matching versions of Clang and LLD should be used.
+- The distributor used must support the JSON schema generated by the version of
+  LLD in use.
\ No newline at end of file
diff --git a/llvm/docs/UserGuides.rst b/llvm/docs/UserGuides.rst
index 6eee564713d6d..3e16fe42b7d11 100644
--- a/llvm/docs/UserGuides.rst
+++ b/llvm/docs/UserGuides.rst
@@ -32,6 +32,7 @@ intermediate LLVM representation.
    DebuggingJITedCode
    DirectXUsage
    Docker
+   DTLTO
    FatLTO
    ExtendingLLVM
    GitHub
@@ -164,6 +165,11 @@ Optimizations
    This document describes the interface between LLVM intermodular optimizer
    and the linker and its design
 
+:doc:`DTLTO`
+   This document describes the DTLTO implementation, which allows for
+   distributing ThinLTO backend compilations without requiring support from
+   the build system.
+
 :doc:`GoldPlugin`
    How to build your programs with link-time optimization on Linux.
 

>From 33dbf558840b096c75f84dca6b2741674d50164c Mon Sep 17 00:00:00 2001
From: Ben Dunbobbin <Ben.Dunbobbin at sony.com>
Date: Wed, 19 Feb 2025 14:28:30 +0000
Subject: [PATCH 10/10] [DTLTO][LLVM] clang format LTO.h to prevent automated
 checks errors

Some of the code in LTO.h did not conform to the LLVM coding standard.
However, it did match the style already used in that file.

However this was causing automated code-formatting checks from Github to
fail which was confusing on the PR. I decided to apply clang-format
everywhere to prevent this - even though the new code no longer matches
the style of the existing.
---
 llvm/include/llvm/LTO/LTO.h | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h
index 764f60dfd8458..8e6d638602317 100644
--- a/llvm/include/llvm/LTO/LTO.h
+++ b/llvm/include/llvm/LTO/LTO.h
@@ -318,14 +318,11 @@ ThinBackend createInProcessThinBackend(ThreadPoolStrategy Parallelism,
 /// jobs execution.
 /// SaveTemps is a debugging tool that prevents temporary files created by this
 /// backend from being cleaned up.
-ThinBackend createOutOfProcessThinBackend(ThreadPoolStrategy Parallelism,
-                                          IndexWriteCallback OnWrite,
-                                          bool ShouldEmitIndexFiles,
-                                          bool ShouldEmitImportsFiles,
-                                          StringRef LinkerOutputFile,
-                                          StringRef RemoteOptTool,
-                                          StringRef Distributor,
-                                          bool SaveTemps);
+ThinBackend createOutOfProcessThinBackend(
+    ThreadPoolStrategy Parallelism, IndexWriteCallback OnWrite,
+    bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles,
+    StringRef LinkerOutputFile, StringRef RemoteOptTool, StringRef Distributor,
+    bool SaveTemps);
 
 /// This ThinBackend writes individual module indexes to files, instead of
 /// running the individual backend jobs. This backend is for distributed builds



More information about the llvm-commits mailing list