[clang] [lld] [llvm] Integrated Distributed ThinLTO (DTLTO): Initial support (PR #126654)
    via llvm-commits 
    llvm-commits at lists.llvm.org
       
    Wed Feb 12 17:38:15 PST 2025
    
    
  
https://github.com/bd1976bris updated https://github.com/llvm/llvm-project/pull/126654
>From 3711d21c9e6b0b3ed26d73f6b57b22553010f6c2 Mon Sep 17 00:00:00 2001
From: Ben Dunbobbin <Ben.Dunbobbin at sony.com>
Date: Tue, 11 Feb 2025 02:14:07 +0000
Subject: [PATCH 1/3] Implement integrated distribution for ThinLTO (DTLTO).
 ELF and COFF only.
---
 clang/docs/ThinLTO.rst                        |  32 ++
 clang/include/clang/Driver/Options.td         |   8 +-
 clang/lib/Driver/ToolChains/Gnu.cpp           |  19 +
 clang/test/Driver/DTLTO/dtlto.c               |  44 ++
 .../ClangNVLinkWrapper.cpp                    |   2 +-
 cross-project-tests/CMakeLists.txt            |  15 +-
 cross-project-tests/dtlto/README.txt          |   2 +
 cross-project-tests/dtlto/archive-thin.test   |  72 +++
 .../dtlto/dtlto-translate-options.ll          | 144 ++++++
 cross-project-tests/dtlto/dtlto.c             |  49 ++
 cross-project-tests/dtlto/lit.local.cfg       |   2 +
 cross-project-tests/lit.cfg.py                |   5 +-
 lld/COFF/Config.h                             |   5 +
 lld/COFF/Driver.cpp                           |  13 +
 lld/COFF/LTO.cpp                              |  30 +-
 lld/COFF/Options.td                           |   4 +
 lld/ELF/Config.h                              |   2 +
 lld/ELF/Driver.cpp                            |   3 +
 lld/ELF/InputFiles.cpp                        |  58 ++-
 lld/ELF/LTO.cpp                               |  26 +-
 lld/ELF/Options.td                            |   6 +-
 lld/MachO/LTO.cpp                             |  13 +-
 lld/docs/DTLTO.rst                            |  60 +++
 lld/docs/index.rst                            |   1 +
 lld/test/COFF/dtlto.test                      |  50 +++
 lld/test/ELF/dtlto/dtlto.test                 |  53 +++
 lld/test/ELF/dtlto/imports.test               |  69 +++
 lld/test/ELF/dtlto/relative.test              |  65 +++
 lld/test/lit.cfg.py                           |   1 +
 lld/wasm/LTO.cpp                              |  13 +-
 llvm/docs/DTLTO.rst                           | 228 ++++++++++
 llvm/docs/UserGuides.rst                      |   6 +
 llvm/include/llvm/LTO/LTO.h                   |  68 ++-
 llvm/include/llvm/Support/Caching.h           |   3 +-
 .../llvm/Transforms/IPO/FunctionImport.h      |   6 +
 llvm/lib/LTO/LTO.cpp                          | 418 +++++++++++++++++-
 llvm/lib/Transforms/IPO/FunctionImport.cpp    |  14 +-
 llvm/test/ThinLTO/X86/dtlto-triple.ll         |  47 ++
 llvm/test/ThinLTO/X86/dtlto.ll                |  65 +++
 llvm/test/lit.cfg.py                          |   1 +
 llvm/tools/llvm-lto2/llvm-lto2.cpp            |  33 +-
 llvm/utils/dtlto/local.py                     |  25 ++
 llvm/utils/dtlto/mock.py                      |  16 +
 llvm/utils/dtlto/validate.py                  |  75 ++++
 44 files changed, 1796 insertions(+), 75 deletions(-)
 create mode 100644 clang/test/Driver/DTLTO/dtlto.c
 create mode 100644 cross-project-tests/dtlto/README.txt
 create mode 100644 cross-project-tests/dtlto/archive-thin.test
 create mode 100644 cross-project-tests/dtlto/dtlto-translate-options.ll
 create mode 100644 cross-project-tests/dtlto/dtlto.c
 create mode 100644 cross-project-tests/dtlto/lit.local.cfg
 create mode 100644 lld/docs/DTLTO.rst
 create mode 100644 lld/test/COFF/dtlto.test
 create mode 100644 lld/test/ELF/dtlto/dtlto.test
 create mode 100644 lld/test/ELF/dtlto/imports.test
 create mode 100644 lld/test/ELF/dtlto/relative.test
 create mode 100644 llvm/docs/DTLTO.rst
 create mode 100644 llvm/test/ThinLTO/X86/dtlto-triple.ll
 create mode 100644 llvm/test/ThinLTO/X86/dtlto.ll
 create mode 100644 llvm/utils/dtlto/local.py
 create mode 100644 llvm/utils/dtlto/mock.py
 create mode 100644 llvm/utils/dtlto/validate.py
diff --git a/clang/docs/ThinLTO.rst b/clang/docs/ThinLTO.rst
index c042547678919..c3924ea45c9cc 100644
--- a/clang/docs/ThinLTO.rst
+++ b/clang/docs/ThinLTO.rst
@@ -240,6 +240,38 @@ The ``BOOTSTRAP_LLVM_ENABLE_LTO=Thin`` will enable ThinLTO for stage 2 and
 stage 3 in case the compiler used for stage 1 does not support the ThinLTO
 option.
 
+Distributed ThinLTO (DTLTO)
+---------------------------
+
+DTLTO allows for the distribution of backend ThinLTO compilations via external
+distribution systems, e.g. Incredibuild. There is existing support for
+distributing ThinLTO compilations by using separate thin-link, backend
+compilation, and link steps coordinated by a build system which can handle the
+dynamic dependencies specified by the index files, such as Bazel. However, this
+often requires changes to the user's build process. With DTLTO distribution is
+managed internally in LLD as part of the traditional link step and therefore
+should be usable in any build process that can support in-process ThinLTO.
+
+DTLTO requires the LLD linker (``-fuse-ld=lld``).
+
+``-fthinlto-distributor=<path>``
+   - Specifies the ``<path>`` to the distributor process executable for DTLTO.
+   - If specified, ThinLTO backend compilations will be distributed by LLD.
+
+``-Xdist <arg>``
+   - Pass ``<arg>`` to the distributor process (see ``-fthinlto-distributor=``).
+   - Can be specified multiple times to pass multiple options.
+
+Examples:
+   - ``clang -flto=thin -fthinlto-distributor=incredibuild.exe -Xdist --verbose -fuse-ld=lld``
+   - ``clang -flto=thin -fthinlto-distributor=$(which python) -Xdist incredibuild.py -fuse-ld=lld``
+
+If ``-fthinlto-distributor=`` is specified Clang supplies the path to a
+distributable optimization and code generation tool to LLD. Currently this tool
+is Clang itself specified.
+
+See `DTLTO <https://lld.llvm.org/dtlto.html>`_ for more information.
+
 More Information
 ================
 
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 618815db28434..3385432eee2fc 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -969,6 +969,10 @@ def Xlinker : Separate<["-"], "Xlinker">, Flags<[LinkerInput, RenderAsInput]>,
   Visibility<[ClangOption, CLOption, FlangOption]>,
   HelpText<"Pass <arg> to the linker">, MetaVarName<"<arg>">,
   Group<Link_Group>;
+def Xdist : Separate<["-"], "Xdist">, Flags<[LinkOption]>,
+  Visibility<[ClangOption, CLOption]>,
+  HelpText<"Pass <arg> to the ThinLTO distributor">,
+  MetaVarName<"<arg>">, Group<Link_Group>;
 def Xoffload_linker : JoinedAndSeparate<["-"], "Xoffload-linker">,
   Visibility<[ClangOption, FlangOption]>,
   HelpText<"Pass <arg> to the offload linkers or the ones identified by -<triple>">,
@@ -4087,7 +4091,9 @@ def ffinite_loops: Flag<["-"],  "ffinite-loops">, Group<f_Group>,
 def fno_finite_loops: Flag<["-"], "fno-finite-loops">, Group<f_Group>,
   HelpText<"Do not assume that any loop is finite.">,
   Visibility<[ClangOption, CC1Option]>;
-
+def fthinlto_distributor_EQ : Joined<["-"], "fthinlto-distributor=">, Group<f_Group>,
+  HelpText<"Specifies the <path> to the distributor process executable.">, MetaVarName<"<path>">,
+  Visibility<[ClangOption, CLOption]>;
 def ftrigraphs : Flag<["-"], "ftrigraphs">, Group<f_Group>,
   HelpText<"Process trigraph sequences">, Visibility<[ClangOption, CC1Option]>;
 def fno_trigraphs : Flag<["-"], "fno-trigraphs">, Group<f_Group>,
diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp
index f56eeda3cb5f6..3c17ea38f8a47 100644
--- a/clang/lib/Driver/ToolChains/Gnu.cpp
+++ b/clang/lib/Driver/ToolChains/Gnu.cpp
@@ -535,6 +535,25 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA,
                   D.getLTOMode() == LTOK_Thin);
   }
 
+  // Forward the DTLTO options to the linker. We add these unconditionally,
+  // rather than in addLTOOptions() as it is the linker that decides whether to
+  // do LTO or not dependent upon whether there are any bitcode input files in
+  // the link.
+  if (Arg *A = Args.getLastArg(options::OPT_fthinlto_distributor_EQ)) {
+    A->claim();
+    CmdArgs.push_back(
+        Args.MakeArgString("--thinlto-distributor=" + Twine(A->getValue())));
+    CmdArgs.push_back(
+        Args.MakeArgString("--thinlto-remote-opt-tool=" +
+                           Twine(ToolChain.getDriver().getClangProgramPath())));
+
+    for (const Arg *A : Args.filtered(options::OPT_Xdist)) {
+      A->claim();
+      CmdArgs.push_back(Args.MakeArgString("-mllvm=-thinlto-distributor-arg=" +
+                                           Twine(A->getValue())));
+    }
+  }
+
   if (Args.hasArg(options::OPT_Z_Xlinker__no_demangle))
     CmdArgs.push_back("--no-demangle");
 
diff --git a/clang/test/Driver/DTLTO/dtlto.c b/clang/test/Driver/DTLTO/dtlto.c
new file mode 100644
index 0000000000000..a1babb42793bd
--- /dev/null
+++ b/clang/test/Driver/DTLTO/dtlto.c
@@ -0,0 +1,44 @@
+/// Check DTLTO options are forwarded to the linker.
+
+// REQUIRES: lld
+
+// RUN: echo "-target x86_64-linux-gnu \
+// RUN:   -Xdist distarg1 \
+// RUN:   -Xdist distarg2 \
+// RUN:   -fuse-ld=lld" > %t.rsp
+
+
+/// Check that options are forwarded as expected with --thinlto-distributor=.
+// RUN: %clang -### @%t.rsp -fthinlto-distributor=dist.exe %s 2>&1 | \
+// RUN:   FileCheck %s --implicit-check-not=warning
+
+// CHECK: ld.lld
+// CHECK-SAME: "--thinlto-distributor=dist.exe"
+// CHECK-SAME: "--thinlto-remote-opt-tool={{.*}}clang
+// CHECK-SAME: "-mllvm=-thinlto-distributor-arg=distarg1"
+// CHECK-SAME: "-mllvm=-thinlto-distributor-arg=distarg2"
+
+
+/// Check that options are not added without --thinlto-distributor= and
+/// that there is an unused option warning issued for -Xdist options. We
+/// specify -flto here as these options should be unaffected by it.
+// RUN: %clang -### @%t.rsp -flto=thin %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=NONE,NOMORE --implicit-check-not=warning
+
+// NONE: warning: argument unused during compilation: '-Xdist distarg1'
+// NONE: warning: argument unused during compilation: '-Xdist distarg2'
+// NONE:     ld.lld
+// NOMORE-NOT: --thinlto-distributor=
+// NOMORE-NOT: --thinlto-remote-opt-tool=
+// NOMORE-NOT: -mllvm
+// NOMORE-NOT: -thinlto-distributor-arg=
+
+
+/// Check the expected arguments are forwarded by default with only
+/// --thinlto-distributor=.
+// RUN: %clang -### -target x86_64-linux-gnu -fthinlto-distributor=dist.exe -fuse-ld=lld %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEFAULT,NOMORE --implicit-check-not=warning
+
+// DEFAULT: ld.lld
+// DEFAULT-SAME: "--thinlto-distributor=dist.exe"
+// DEFAULT-SAME: "--thinlto-remote-opt-tool={{.*}}clang
diff --git a/clang/tools/clang-nvlink-wrapper/ClangNVLinkWrapper.cpp b/clang/tools/clang-nvlink-wrapper/ClangNVLinkWrapper.cpp
index faf73a7c2f193..3bd2d1471081f 100644
--- a/clang/tools/clang-nvlink-wrapper/ClangNVLinkWrapper.cpp
+++ b/clang/tools/clang-nvlink-wrapper/ClangNVLinkWrapper.cpp
@@ -645,7 +645,7 @@ Expected<SmallVector<StringRef>> getInput(const ArgList &Args) {
           std::make_unique<raw_fd_ostream>(FD, true));
     };
 
-    if (Error Err = LTOBackend.run(AddStream))
+    if (Error Err = LTOBackend.run(AddStream, /*AddBuffer=*/nullptr))
       return Err;
 
     if (Args.hasArg(OPT_lto_emit_llvm) || Args.hasArg(OPT_lto_emit_asm))
diff --git a/cross-project-tests/CMakeLists.txt b/cross-project-tests/CMakeLists.txt
index 7f2fee48fda77..25f03ce88fd78 100644
--- a/cross-project-tests/CMakeLists.txt
+++ b/cross-project-tests/CMakeLists.txt
@@ -19,11 +19,15 @@ set(CROSS_PROJECT_TEST_DEPS
   FileCheck
   check-gdb-llvm-support
   count
-  llvm-dwarfdump
+  llvm-ar
   llvm-config
+  llvm-dwarfdump
+  llvm-lto2
   llvm-objdump
-  split-file
+  llvm-profdata
   not
+  opt
+  split-file
   )
 
 if ("clang" IN_LIST LLVM_ENABLE_PROJECTS)
@@ -94,6 +98,13 @@ add_lit_testsuite(check-cross-amdgpu "Running AMDGPU cross-project tests"
   DEPENDS clang
   )
 
+# DTLTO tests.
+add_lit_testsuite(check-cross-dtlto "Running DTLTO cross-project tests"
+  ${CMAKE_CURRENT_BINARY_DIR}/dtlto
+  EXCLUDE_FROM_CHECK_ALL
+  DEPENDS ${CROSS_PROJECT_TEST_DEPS}
+  )
+
 # Add check-cross-project-* targets.
 add_lit_testsuites(CROSS_PROJECT ${CMAKE_CURRENT_SOURCE_DIR}
   DEPENDS ${CROSS_PROJECT_TEST_DEPS}
diff --git a/cross-project-tests/dtlto/README.txt b/cross-project-tests/dtlto/README.txt
new file mode 100644
index 0000000000000..bc92ffa96807a
--- /dev/null
+++ b/cross-project-tests/dtlto/README.txt
@@ -0,0 +1,2 @@
+                                                                   -*- rst -*-
+This is a collection of tests to check distributed thinLTO (DTLTO) functionality
diff --git a/cross-project-tests/dtlto/archive-thin.test b/cross-project-tests/dtlto/archive-thin.test
new file mode 100644
index 0000000000000..1f1fc60e28724
--- /dev/null
+++ b/cross-project-tests/dtlto/archive-thin.test
@@ -0,0 +1,72 @@
+## Simple test that a DTLTO link succeeds and outputs the expected set of files
+## correctly when thin archives are present.
+
+# RUN: rm -rf %t.dir && split-file %s %t.dir && cd %t.dir
+# RUN: %clang -target x86_64-linux-gnu -c foo.c -o foo.o
+# RUN: %clang -target x86_64-linux-gnu -c -flto=thin bar.c -o bar.o
+# RUN: %clang -target x86_64-linux-gnu -c -flto=thin dog.c -o dog.o
+# RUN: %clang -target x86_64-linux-gnu -c -flto=thin cat.c -o cat.o
+# RUN: %clang -target x86_64-linux-gnu -c -flto=thin _start.c -o _start.o
+
+# RUN: llvm-ar rcs foo.a foo.o --thin
+## Create this bitcode thin archive in a sub-directory to test the expansion of
+## the path to a bitcode file which is referenced using "..", e.g. in this case
+## "../bar.o". The ".." should be collapsed in any expansion to avoid
+## referencing an unknown directory on the remote side.
+# RUN: mkdir lib
+# RUN: llvm-ar rcs lib/bar.a bar.o --thin
+## Create this bitcode thin archive with an absolute path entry containing "..".
+# RUN: llvm-ar rcs dog.a %t.dir/lib/../dog.o --thin
+# RUN: llvm-ar rcs cat.a cat.o --thin
+# RUN: llvm-ar rcs _start.a _start.o --thin
+
+# RUN: mkdir %t.dir/out && cd %t.dir/out
+
+# RUN: %clang -target x86_64-linux-gnu \
+# RUN:   %t.dir/foo.a %t.dir/lib/bar.a ../_start.a %t.dir/cat.a -Wl,--whole-archive,../dog.a \
+# RUN:   -flto=thin \
+# RUN:   -fthinlto-distributor=%python \
+# RUN:   -Xdist %llvm_src_root/utils/dtlto/local.py \
+# RUN:   --save-temps \
+# RUN:   -fuse-ld=lld \
+# RUN:   -nostdlib \
+# RUN:   -nostartfiles \
+# RUN:   -Wl,--save-temps \
+# RUN:   -Wl,-mllvm,--thinlto-remote-opt-tool-arg=-save-temps=cwd \
+# RUN:   -Werror
+
+## Check that the required output files have been created.
+# RUN: ls | FileCheck %s --check-prefix=OUTPUTS \
+# RUN:     --implicit-check-not=cat --implicit-check-not=foo
+
+## The DTLTO backend emits the JSON jobs description and summary shards.
+# OUTPUTS-DAG: a.{{[0-9]+}}.dist-file.json
+# OUTPUTS-DAG: bar.{{[0-9]+}}.{{[0-9]+}}.native.o.thinlto.bc{{$}}
+# OUTPUTS-DAG: dog.{{[0-9]+}}.{{[0-9]+}}.native.o.thinlto.bc{{$}}
+# OUTPUTS-DAG: _start.{{[0-9]+}}.{{[0-9]+}}.native.o.thinlto.bc{{$}}
+## Native output object files.
+# OUTPUTS-DAG: bar.{{[0-9]+}}.{{[0-9]+}}.native.o{{$}}
+# OUTPUTS-DAG: dog.{{[0-9]+}}.{{[0-9]+}}.native.o{{$}}
+# OUTPUTS-DAG: _start.{{[0-9]+}}.{{[0-9]+}}.native.o{{$}}
+
+## Check that bar.o and dog.o are not referenced using "..".
+# RUN: not grep '\.\.\(/\|\\\\\)\(bar\|dog\)\.o' a.*.dist-file.json
+
+#--- foo.c
+__attribute__((retain)) void foo() {}
+
+#--- bar.c
+extern void foo();
+__attribute__((retain)) void bar() { foo(); }
+
+#--- dog.c
+__attribute__((retain)) void dog() {}
+
+#--- cat.c
+__attribute__((retain)) void cat() {}
+
+#--- _start.c
+extern void bar();
+__attribute__((retain)) void _start() {
+  bar();
+}
diff --git a/cross-project-tests/dtlto/dtlto-translate-options.ll b/cross-project-tests/dtlto/dtlto-translate-options.ll
new file mode 100644
index 0000000000000..bbb6ccf33fe7c
--- /dev/null
+++ b/cross-project-tests/dtlto/dtlto-translate-options.ll
@@ -0,0 +1,144 @@
+;; Check that the expected Clang arguments are generated by DTLTO for the 
+;; backend compilations and are accepted by Clang.
+
+; RUN: rm -rf %t && split-file %s %t && cd %t
+
+;; Generate bitcode files with a summary index.
+; RUN: opt -thinlto-bc x86_64-unknown-linux-gnu.ll -o x86_64-unknown-linux-gnu.bc
+; RUN: opt -thinlto-bc x86_64-pc-windows-msvc.ll   -o x86_64-pc-windows-msvc.bc
+
+
+;; Check that any invalid arguments would cause a Clang error. This property is
+;; relied on by the actual testcases later in this test.
+; RUN: not %clang -x ir x86_64-unknown-linux-gnu.ll \
+; RUN:     -invalid-incorrect-not-an-option 2>&1 | FileCheck %s --check-prefix=SANITY1
+; SANITY1: unknown argument: '-invalid-incorrect-not-an-option'
+
+
+;; Define a substitution used to simplify the testcases.
+; DEFINE: %{distributor} = dummy
+; DEFINE: %{extra_flags} = dummy
+; DEFINE: %{triple} = dummy
+; DEFINE: %{command} = llvm-lto2 run \
+; DEFINE:   -thinlto-distributor-arg=%llvm_src_root/utils/dtlto/%{distributor} \
+; DEFINE:   -thinlto-remote-opt-tool-arg=-Wunused-command-line-argument \
+; DEFINE:   @%{triple}.rsp %{extra_flags}
+
+
+;; Write common arguments to a response files.
+
+; RUN: echo "x86_64-unknown-linux-gnu.bc -o x86_64-unknown-linux-gnu.o \
+; RUN:       -dtlto \
+; RUN:       -dtlto-remote-opt-tool=%clang \
+; RUN:       -thinlto-remote-opt-tool-arg=-Werror \
+; RUN:       -dtlto-distributor=%python \
+; RUN:       -r=x86_64-unknown-linux-gnu.bc,globalfunc1,plx" > x86_64-unknown-linux-gnu.rsp
+
+; RUN: echo "x86_64-pc-windows-msvc.bc -o x86_64-pc-windows-msvc.o \
+; RUN:       -dtlto \
+; RUN:       -dtlto-remote-opt-tool=%clang \
+; RUN:       -thinlto-remote-opt-tool-arg=-Werror \
+; RUN:       -thinlto-remote-opt-tool-arg=-Wno-override-module \
+; RUN:       -dtlto-distributor=%python \
+; RUN:       -r=x86_64-pc-windows-msvc.bc,globalfunc2,plx" > x86_64-pc-windows-msvc.rsp
+
+
+;; Check that boolean configuration states are translated as expected and Clang
+;; accepts them.
+
+; RUN: echo " \
+; RUN:   --addrsig=1 \
+; RUN:   -function-sections=1 \
+; RUN:   -data-sections=1" > on.rsp
+
+; RUN: echo " \
+; RUN:   --addrsig=0 \
+; RUN:   -function-sections=0 \
+; RUN:   -data-sections=0" > off.rsp
+
+;; Perform DTLTO with configuration state set.
+; REDEFINE: %{extra_flags} = @on.rsp
+; REDEFINE: %{distributor} = local.py
+; REDEFINE: %{triple} = x86_64-unknown-linux-gnu
+; RUN: %{command}
+; REDEFINE: %{distributor} = validate.py
+; RUN: not %{command} 2>&1 | FileCheck %s --check-prefix=ON \
+; RUN:     --implicit-check-not=-no-pgo-warn-mismatch
+; ON-DAG: "-faddrsig"
+; ON-DAG: "-ffunction-sections"
+; ON-DAG: "-fdata-sections"
+
+;; Perform DTLTO with configuration state unset.
+; REDEFINE: %{extra_flags} = @off.rsp
+; REDEFINE: %{distributor} = local.py
+; RUN: %{command}
+; REDEFINE: %{distributor} = validate.py
+; RUN: not %{command} 2>&1 | FileCheck %s --check-prefix=OFF
+; OFF-NOT: --implicit-check-not=--faddrsig
+; OFF-NOT: --implicit-check-not=--ffunction-sections
+; OFF-NOT: --implicit-check-not=--fdata-sections
+; OFF-NOT: --implicit-check-not=-no-pgo-warn-mismatch
+
+
+;; Check optimisation level.
+
+; RUN: llvm-lto2 run \
+; RUN:   -thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \
+; RUN:   @x86_64-unknown-linux-gnu.rsp \
+; RUN:   -O3
+
+; RUN: not llvm-lto2 run \
+; RUN:   -thinlto-distributor-arg=%llvm_src_root/utils/dtlto/validate.py \
+; RUN:   @x86_64-unknown-linux-gnu.rsp \
+; RUN:   -O3 2>&1 | FileCheck %s --check-prefix=OPTLEVEL
+; OPTLEVEL-DAG: "-O3"
+
+
+;; Check relocation model.
+
+; REDEFINE: %{extra_flags} = -relocation-model=pic
+; REDEFINE: %{distributor} = local.py
+; RUN: %{command}
+; REDEFINE: %{distributor} = validate.py
+; RUN: not %{command} 2>&1 | FileCheck %s --check-prefix=PIC
+; PIC: -fpic
+
+
+; REDEFINE: %{extra_flags} = -relocation-model=pic
+; REDEFINE: %{distributor} = local.py
+; REDEFINE: %{triple} = x86_64-pc-windows-msvc
+; RUN: %{command}
+; REDEFINE: %{distributor} = validate.py
+; RUN: not %{command} 2>&1 | FileCheck %s --check-prefix=NOPIC
+; REDEFINE: %{triple} = x86_64-unknown-linux-gnu
+; NOPIC-NOT: -fpic
+
+;; Check specifying a sample profile.
+; REDEFINE: %{extra_flags} = --lto-sample-profile-file="missing.profdata"
+; REDEFINE: %{distributor} = local.py
+; RUN: not %{command} 2>&1 | FileCheck %s --check-prefix=SAMPLE_PROFILE_ERR
+; SAMPLE_PROFILE_ERR: no such file or directory: 'missing.profdata'
+; REDEFINE: %{distributor} = validate.py
+; RUN: not %{command} 2>&1 | FileCheck %s --check-prefix=SAMPLE_PROFILE
+; SAMPLE_PROFILE-DAG: "-fprofile-sample-use=missing.profdata"
+
+
+;--- x86_64-unknown-linux-gnu.ll
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @globalfunc1() {
+entry:
+  ret void
+}
+
+;--- x86_64-pc-windows-msvc.ll
+
+target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+define void @globalfunc2() {
+entry:
+  ret void
+}
diff --git a/cross-project-tests/dtlto/dtlto.c b/cross-project-tests/dtlto/dtlto.c
new file mode 100644
index 0000000000000..95c784df4201f
--- /dev/null
+++ b/cross-project-tests/dtlto/dtlto.c
@@ -0,0 +1,49 @@
+/// Simple test that DTLTO works with a single input file and generates the
+/// expected set of files with --save-temps applied to the linker.
+///
+/// Note that we also supply --save-temps to the compiler for predictable
+/// bitcode file names.
+
+// RUN: rm -rf %t && mkdir %t && cd %t
+
+// RUN: %clang -target x86_64-linux-gnu %s -shared -flto=thin \
+// RUN:   -fthinlto-distributor=%python \
+// RUN:   -Xdist %llvm_src_root/utils/dtlto/local.py \
+// RUN:   --save-temps \
+// RUN:   -fuse-ld=lld \
+// RUN:   -nostdlib \
+// RUN:   -nostartfiles \
+// RUN:   -Wl,--save-temps \
+// RUN:   -Werror
+
+/// Check that the required output files have been created.
+// RUN: ls | count 13
+// RUN: ls | FileCheck %s --check-prefix=BITCODE
+// RUN: ls | FileCheck %s --check-prefix=BACKEND
+// RUN: ls | FileCheck %s --check-prefix=NATIVE
+// RUN: ls | FileCheck %s --check-prefix=LLD
+
+/// Files produced by the bitcode compilation.
+// BITCODE: dtlto.bc
+// BITCODE: dtlto.i
+// BITCODE: dtlto.o
+
+/// The DTLTO backend emits the jobs description JSON and a summary shard.
+// BACKEND: a.{{[0-9]+}}.dist-file.json
+// BACKEND: dtlto.{{[0-9]+}}.{{[0-9]+}}.native.o.thinlto.bc{{$}}
+
+/// Native object output file for dtlto.o.
+// NATIVE: dtlto.{{[0-9]+}}.{{[0-9]+}}.native.o{{$}}
+/// linked ELF.
+// LLD: a.out{{$}}
+
+/// save-temps incremental files for a.out.
+/// TODO: Perhaps we should suppress some of the linker hooks for DTLTO.
+// LLD: a.out.0.0.preopt.bc{{$}}
+// LLD: a.out.0.2.internalize.bc{{$}}
+// LLD: a.out.index.bc{{$}}
+// LLD: a.out.index.dot{{$}}
+// LLD: a.out.lto.dtlto.o{{$}}
+// LLD: a.out.resolution.txt{{$}}
+
+int _start() { return 0; }
diff --git a/cross-project-tests/dtlto/lit.local.cfg b/cross-project-tests/dtlto/lit.local.cfg
new file mode 100644
index 0000000000000..222a1c98a9eba
--- /dev/null
+++ b/cross-project-tests/dtlto/lit.local.cfg
@@ -0,0 +1,2 @@
+if any(feature not in config.available_features for feature in ["clang", "llvm-ar", "llvm-lto2", "opt"]):
+    config.unsupported = True
diff --git a/cross-project-tests/lit.cfg.py b/cross-project-tests/lit.cfg.py
index 66fdd63632885..b77d26cb8ce28 100644
--- a/cross-project-tests/lit.cfg.py
+++ b/cross-project-tests/lit.cfg.py
@@ -19,7 +19,7 @@
 config.test_format = lit.formats.ShTest(not llvm_config.use_lit_shell)
 
 # suffixes: A list of file extensions to treat as test files.
-config.suffixes = [".c", ".cl", ".cpp", ".m"]
+config.suffixes = [".c", ".cl", ".cpp", ".m", ".ll", ".test"]
 
 # excludes: A list of directories to exclude from the testsuite. The 'Inputs'
 # subdirectories contain auxiliary inputs for various tests in their parent
@@ -96,6 +96,9 @@ def get_required_attr(config, attr_name):
 if lldb_path is not None:
     config.available_features.add("lldb")
 
+for tool in ["llvm-ar", "llvm-lto2", "opt"]:
+    if llvm_config.use_llvm_tool(tool):
+        config.available_features.add(tool)
 
 def configure_dexter_substitutions():
     """Configure substitutions for host platform and return list of dependencies"""
diff --git a/lld/COFF/Config.h b/lld/COFF/Config.h
index 0c7c4e91402f1..dd7e9efb77a29 100644
--- a/lld/COFF/Config.h
+++ b/lld/COFF/Config.h
@@ -192,6 +192,11 @@ struct Configuration {
   // Used for /lldltocachepolicy=policy
   llvm::CachePruningPolicy ltoCachePolicy;
 
+  // Used for --thinlto-distributor=
+  StringRef DTLTODistributor;
+  // Used for --thinlto-remote-opt-tool=
+  StringRef DTLTORemoteOptTool;
+
   // Used for /opt:[no]ltodebugpassmanager
   bool ltoDebugPassManager = false;
 
diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp
index 6433ce6643f9c..cd08267a94bf9 100644
--- a/lld/COFF/Driver.cpp
+++ b/lld/COFF/Driver.cpp
@@ -1518,6 +1518,14 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
     v.push_back(arg->getValue());
     config->mllvmOpts.emplace_back(arg->getValue());
   }
+
+  if (!ctx.config.DTLTODistributor.empty())
+    for (auto o : {"-thinlto-remote-opt-tool-arg=-fdiagnostics-format",
+                   "-thinlto-remote-opt-tool-arg=msvc"}) {
+      v.push_back(o);
+      config->mllvmOpts.emplace_back(o);
+    }
+
   {
     llvm::TimeTraceScope timeScope2("Parse cl::opt");
     cl::ResetAllOptionOccurrences();
@@ -2081,6 +2089,11 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
     Fatal(ctx) << "/manifestinput: requires /manifest:embed";
   }
 
+  // Handle DTLTO options.
+  config->DTLTODistributor = args.getLastArgValue(OPT_thinlto_distributor_eq);
+  config->DTLTORemoteOptTool =
+      args.getLastArgValue(OPT_thinlto_remote_opt_tool_eq);
+
   // Handle /dwodir
   config->dwoDir = args.getLastArgValue(OPT_dwodir);
 
diff --git a/lld/COFF/LTO.cpp b/lld/COFF/LTO.cpp
index a8cecb39ac614..36f8f7b25c2a8 100644
--- a/lld/COFF/LTO.cpp
+++ b/lld/COFF/LTO.cpp
@@ -16,6 +16,7 @@
 #include "lld/Common/Filesystem.h"
 #include "lld/Common/Strings.h"
 #include "lld/Common/TargetOptionsCommandFlags.h"
+#include "lld/Common/Version.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringRef.h"
@@ -116,7 +117,18 @@ BitcodeCompiler::BitcodeCompiler(COFFLinkerContext &c) : ctx(c) {
 
   // Initialize ltoObj.
   lto::ThinBackend backend;
-  if (ctx.config.thinLTOIndexOnly) {
+  if (!ctx.config.DTLTODistributor.empty()) {
+    StringRef version = getenv("LLD_VERSION"); // For testing only.
+    if (version.empty())
+      version = ctx.saver.save(getLLDVersion());
+    backend = lto::createOutOfProcessThinBackend(
+        llvm::heavyweight_hardware_concurrency(ctx.config.thinLTOJobs),
+        /*OnWrite=*/nullptr,
+        /*ShouldEmitIndexFiles=*/false,
+        /*ShouldEmitImportFiles=*/false, ctx.config.outputFile, version,
+        ctx.config.DTLTORemoteOptTool, ctx.config.DTLTODistributor,
+        !ctx.config.saveTempsArgs.empty());
+  } else if (ctx.config.thinLTOIndexOnly) {
     auto OnIndexWrite = [&](StringRef S) { thinIndices.erase(S); };
     backend = lto::createWriteIndexesThinBackend(
         llvm::hardware_concurrency(ctx.config.thinLTOJobs),
@@ -182,13 +194,15 @@ std::vector<InputFile *> BitcodeCompiler::compile() {
   // native object files for ThinLTO incremental builds. If a path was
   // specified, configure LTO to use it as the cache directory.
   FileCache cache;
+  auto AddBuffer = [&](size_t task, const Twine &moduleName,
+                       std::unique_ptr<MemoryBuffer> mb) {
+    files[task] = std::move(mb);
+    file_names[task] = moduleName.str();
+  };
+
   if (!ctx.config.ltoCache.empty())
-    cache = check(localCache("ThinLTO", "Thin", ctx.config.ltoCache,
-                             [&](size_t task, const Twine &moduleName,
-                                 std::unique_ptr<MemoryBuffer> mb) {
-                               files[task] = std::move(mb);
-                               file_names[task] = moduleName.str();
-                             }));
+    cache =
+        check(localCache("ThinLTO", "Thin", ctx.config.ltoCache, AddBuffer));
 
   checkError(ltoObj->run(
       [&](size_t task, const Twine &moduleName) {
@@ -196,7 +210,7 @@ std::vector<InputFile *> BitcodeCompiler::compile() {
         return std::make_unique<CachedFileStream>(
             std::make_unique<raw_svector_ostream>(buf[task].second));
       },
-      cache));
+      AddBuffer, cache));
 
   // Emit empty index files for non-indexed files
   for (StringRef s : thinIndices) {
diff --git a/lld/COFF/Options.td b/lld/COFF/Options.td
index b6fd3d0daaef9..a4cfc6129d58f 100644
--- a/lld/COFF/Options.td
+++ b/lld/COFF/Options.td
@@ -270,6 +270,10 @@ def thinlto_object_suffix_replace : P<
 def thinlto_prefix_replace: P<
     "thinlto-prefix-replace",
     "'old;new' replace old prefix with new prefix in ThinLTO outputs">;
+def thinlto_distributor_eq: Joined<["--"], "thinlto-distributor=">,
+  HelpText<"Distributor to use for ThinLTO backend compilations">;
+def thinlto_remote_opt_tool_eq: Joined<["--"], "thinlto-remote-opt-tool=">,
+  HelpText<"Optimization tool to be invoked by the ThinLTO distributor">;
 def lto_obj_path : P<
     "lto-obj-path",
     "output native object for merged LTO unit to this path">;
diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h
index f132b11b20c63..a27de8f9d6a63 100644
--- a/lld/ELF/Config.h
+++ b/lld/ELF/Config.h
@@ -243,6 +243,8 @@ struct Config {
   llvm::SmallVector<llvm::StringRef, 0> searchPaths;
   llvm::SmallVector<llvm::StringRef, 0> symbolOrderingFile;
   llvm::SmallVector<llvm::StringRef, 0> thinLTOModulesToCompile;
+  llvm::StringRef DTLTODistributor;
+  llvm::StringRef DTLTORemoteOptTool;
   llvm::SmallVector<llvm::StringRef, 0> undefined;
   llvm::SmallVector<SymbolVersion, 0> dynamicList;
   llvm::SmallVector<uint8_t, 0> buildIdVector;
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 7d14180a49926..58524d8a3cad7 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -1341,6 +1341,9 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) {
       args.hasFlag(OPT_dependent_libraries, OPT_no_dependent_libraries, true);
   ctx.arg.disableVerify = args.hasArg(OPT_disable_verify);
   ctx.arg.discard = getDiscard(args);
+  ctx.arg.DTLTODistributor = args.getLastArgValue(OPT_thinlto_distributor_eq);
+  ctx.arg.DTLTORemoteOptTool =
+      args.getLastArgValue(OPT_thinlto_remote_opt_tool_eq);
   ctx.arg.dwoDir = args.getLastArgValue(OPT_plugin_opt_dwo_dir_eq);
   ctx.arg.dynamicLinker = getDynamicLinker(ctx, args);
   ctx.arg.ehFrameHdr =
diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index d43de8ce6dfef..c5019eae2c9c5 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -21,6 +21,7 @@
 #include "llvm/ADT/CachedHashString.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/LTO/LTO.h"
+#include "llvm/Object/Archive.h"
 #include "llvm/Object/IRObjectFile.h"
 #include "llvm/Support/ARMAttributeParser.h"
 #include "llvm/Support/ARMBuildAttributes.h"
@@ -1702,6 +1703,38 @@ static uint8_t getOsAbi(const Triple &t) {
   }
 }
 
+namespace dtlto {
+// Check if an archive file is a thin archive.
+bool isThinArchive(Ctx &ctx, StringRef archiveFilePath) {
+  const size_t thinArchiveMagicLen = sizeof(ThinArchiveMagic) - 1;
+
+  ErrorOr<std::unique_ptr<MemoryBuffer>> memBufferOrError =
+      MemoryBuffer::getFileSlice(archiveFilePath, thinArchiveMagicLen, 0);
+  if (std::error_code ec = memBufferOrError.getError()) {
+    ErrAlways(ctx) << "cannot open " << archiveFilePath << ": " << ec.message();
+    return false;
+  }
+
+  MemoryBufferRef memBufRef = *memBufferOrError.get();
+  return memBufRef.getBuffer().starts_with(ThinArchiveMagic);
+}
+
+// Compute a thin archive member full file path.
+std::string computeFullThinArchiveMemberPath(const StringRef modulePath,
+                                             const StringRef archiveName) {
+  assert(!archiveName.empty());
+  SmallString<64> archiveMemberPath;
+  if (path::is_relative(modulePath)) {
+    archiveMemberPath = path::parent_path(archiveName);
+    path::append(archiveMemberPath, modulePath);
+  } else
+    archiveMemberPath = modulePath;
+
+  path::remove_dots(archiveMemberPath, /*remove_dot_dot=*/true);
+  return archiveMemberPath.c_str();
+}
+} // namespace dtlto
+
 BitcodeFile::BitcodeFile(Ctx &ctx, MemoryBufferRef mb, StringRef archiveName,
                          uint64_t offsetInArchive, bool lazy)
     : InputFile(ctx, BitcodeKind, mb) {
@@ -1712,6 +1745,13 @@ BitcodeFile::BitcodeFile(Ctx &ctx, MemoryBufferRef mb, StringRef archiveName,
   if (ctx.arg.thinLTOIndexOnly)
     path = replaceThinLTOSuffix(ctx, mb.getBufferIdentifier());
 
+  // For DTLTO the name needs to be a valid path to a bitcode file.
+  bool dtltoThinArchiveHandling = !ctx.arg.DTLTODistributor.empty() &&
+                                  !archiveName.empty() &&
+                                  dtlto::isThinArchive(ctx, archiveName);
+  if (dtltoThinArchiveHandling)
+    path = dtlto::computeFullThinArchiveMemberPath(path, archiveName);
+
   // ThinLTO assumes that all MemoryBufferRefs given to it have a unique
   // name. If two archives define two members with the same name, this
   // causes a collision which result in only one of the objects being taken
@@ -1719,7 +1759,7 @@ BitcodeFile::BitcodeFile(Ctx &ctx, MemoryBufferRef mb, StringRef archiveName,
   // symbols later in the link stage). So we append file offset to make
   // filename unique.
   StringSaver &ss = ctx.saver;
-  StringRef name = archiveName.empty()
+  StringRef name = (archiveName.empty() || dtltoThinArchiveHandling)
                        ? ss.save(path)
                        : ss.save(archiveName + "(" + path::filename(path) +
                                  " at " + utostr(offsetInArchive) + ")");
@@ -1727,6 +1767,22 @@ BitcodeFile::BitcodeFile(Ctx &ctx, MemoryBufferRef mb, StringRef archiveName,
 
   obj = CHECK2(lto::InputFile::create(mbref), this);
 
+  // A thin archive member file path potentially can be relative to a thin
+  // archive. This will result in an invalid file path name passed in
+  // 'mb->Identifier', (because from the linker's perspective, relative -
+  // means relative to the linker process' current directory).
+  // For non-archive bitcodes and referenced archive members, a correctly
+  // generated 'name' is used to identify the memory buffer associated with
+  // these bitcode files. However, for a non-referenced archive member,
+  // incorrect 'mb->Identifer' will be used as a path for generating an empty
+  // summary index file later, leading to a crash. We have to fix this problem
+  // by replacing the value of 'mb->Identifier' with 'name'.
+  // Since the MemoryBufferRef class does not allow an individual access to
+  // its data members, we will use the class copy constructor for updating the
+  // 'Indentifier' data member value.
+  if (dtltoThinArchiveHandling)
+    this->mb = mbref;
+
   Triple t(obj->getTargetTriple());
   ekind = getBitcodeELFKind(t);
   emachine = getBitcodeMachineKind(ctx, mb.getBufferIdentifier(), t);
diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp
index 195526bf390d2..ed7a36d9a7e59 100644
--- a/lld/ELF/LTO.cpp
+++ b/lld/ELF/LTO.cpp
@@ -17,6 +17,7 @@
 #include "lld/Common/Filesystem.h"
 #include "lld/Common/Strings.h"
 #include "lld/Common/TargetOptionsCommandFlags.h"
+#include "lld/Common/Version.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
@@ -186,6 +187,16 @@ BitcodeCompiler::BitcodeCompiler(Ctx &ctx) : ctx(ctx) {
         std::string(ctx.arg.thinLTOPrefixReplaceNew),
         std::string(ctx.arg.thinLTOPrefixReplaceNativeObject),
         ctx.arg.thinLTOEmitImportsFiles, indexFile.get(), onIndexWrite);
+  } else if (!ctx.arg.DTLTODistributor.empty() && !ctx.bitcodeFiles.empty()) {
+    StringRef version = getenv("LLD_VERSION"); // For testing only.
+    if (version.empty())
+      version = ctx.saver.save(getLLDVersion());
+    backend = lto::createOutOfProcessThinBackend(
+        llvm::heavyweight_hardware_concurrency(ctx.arg.thinLTOJobs),
+        onIndexWrite, ctx.arg.thinLTOEmitIndexFiles,
+        ctx.arg.thinLTOEmitImportsFiles, ctx.arg.outputFile, version,
+        ctx.arg.DTLTORemoteOptTool, ctx.arg.DTLTODistributor,
+        !ctx.arg.saveTempsArgs.empty());
   } else {
     backend = lto::createInProcessThinBackend(
         llvm::heavyweight_hardware_concurrency(ctx.arg.thinLTOJobs),
@@ -319,13 +330,14 @@ SmallVector<std::unique_ptr<InputFile>, 0> BitcodeCompiler::compile() {
   // to cache native object files for ThinLTO incremental builds. If a path was
   // specified, configure LTO to use it as the cache directory.
   FileCache cache;
+  AddBufferFn AddBuffer = [&](size_t task, const Twine &moduleName,
+                              std::unique_ptr<MemoryBuffer> mb) {
+    files[task] = std::move(mb);
+    filenames[task] = moduleName.str();
+  };
   if (!ctx.arg.thinLTOCacheDir.empty())
-    cache = check(localCache("ThinLTO", "Thin", ctx.arg.thinLTOCacheDir,
-                             [&](size_t task, const Twine &moduleName,
-                                 std::unique_ptr<MemoryBuffer> mb) {
-                               files[task] = std::move(mb);
-                               filenames[task] = moduleName.str();
-                             }));
+    cache = check(
+        localCache("ThinLTO", "Thin", ctx.arg.thinLTOCacheDir, AddBuffer));
 
   if (!ctx.bitcodeFiles.empty())
     checkError(ctx.e, ltoObj->run(
@@ -335,7 +347,7 @@ SmallVector<std::unique_ptr<InputFile>, 0> BitcodeCompiler::compile() {
                                 std::make_unique<raw_svector_ostream>(
                                     buf[task].second));
                           },
-                          cache));
+                          AddBuffer, cache));
 
   // Emit empty index files for non-indexed files but not in single-module mode.
   if (ctx.arg.thinLTOModulesToCompile.empty()) {
diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td
index b3b12a0646875..c4d37cdfed4ba 100644
--- a/lld/ELF/Options.td
+++ b/lld/ELF/Options.td
@@ -1,3 +1,4 @@
+
 include "llvm/Option/OptParser.td"
 
 // Convenience classes for long options which only accept two dashes. For lld
@@ -700,7 +701,10 @@ def thinlto_object_suffix_replace_eq: JJ<"thinlto-object-suffix-replace=">;
 def thinlto_prefix_replace_eq: JJ<"thinlto-prefix-replace=">;
 def thinlto_single_module_eq: JJ<"thinlto-single-module=">,
   HelpText<"Specify a single module to compile in ThinLTO mode, for debugging only">;
-
+def thinlto_distributor_eq: JJ<"thinlto-distributor=">,
+  HelpText<"Distributor to use for ThinLTO backend compilations">;
+def thinlto_remote_opt_tool_eq : JJ<"thinlto-remote-opt-tool=">,
+  HelpText<"Optimization tool to be invoked by the ThinLTO distributor">;
 defm fat_lto_objects: BB<"fat-lto-objects",
     "Use the .llvm.lto section, which contains LLVM bitcode, in fat LTO object files to perform LTO.",
     "Ignore the .llvm.lto section in relocatable object files (default).">;
diff --git a/lld/MachO/LTO.cpp b/lld/MachO/LTO.cpp
index 2eeca44ecbb3c..4a1a3989be76a 100644
--- a/lld/MachO/LTO.cpp
+++ b/lld/MachO/LTO.cpp
@@ -198,12 +198,13 @@ std::vector<ObjFile *> BitcodeCompiler::compile() {
   // to cache native object files for ThinLTO incremental builds. If a path was
   // specified, configure LTO to use it as the cache directory.
   FileCache cache;
+  AddBufferFn AddBuffer = [&](size_t task, const Twine &moduleName,
+                              std::unique_ptr<MemoryBuffer> mb) {
+    files[task] = std::move(mb);
+  };
   if (!config->thinLTOCacheDir.empty())
-    cache = check(localCache("ThinLTO", "Thin", config->thinLTOCacheDir,
-                             [&](size_t task, const Twine &moduleName,
-                                 std::unique_ptr<MemoryBuffer> mb) {
-                               files[task] = std::move(mb);
-                             }));
+    cache = check(
+        localCache("ThinLTO", "Thin", config->thinLTOCacheDir, AddBuffer));
 
   if (hasFiles)
     checkError(ltoObj->run(
@@ -211,7 +212,7 @@ std::vector<ObjFile *> BitcodeCompiler::compile() {
           return std::make_unique<CachedFileStream>(
               std::make_unique<raw_svector_ostream>(buf[task]));
         },
-        cache));
+        AddBuffer, cache));
 
   // Emit empty index files for non-indexed files
   for (StringRef s : thinIndices) {
diff --git a/lld/docs/DTLTO.rst b/lld/docs/DTLTO.rst
new file mode 100644
index 0000000000000..85213f5306526
--- /dev/null
+++ b/lld/docs/DTLTO.rst
@@ -0,0 +1,60 @@
+Distributed ThinLTO (DTLTO)
+===========================
+
+DTLTO allows for the distribution of backend ThinLTO compilations via external
+distribution systems, e.g. Incredibuild. There is existing support for
+distributing ThinLTO compilations by using separate thin-link, backend
+compilation, and link steps coordinated by a build system that can handle the
+dynamic dependencies specified by the index files, such as Bazel. However, this
+often requires changes to the user's build process. DTLTO distribution is
+managed internally in LLD as part of the traditional link step and, therefore,
+should be usable via any build process that can support in-process ThinLTO.
+
+ELF LLD
+-------
+
+The command line interface for DTLTO is:
+
+- `--thinlto-distributor=<path>`
+  Specifies the file to execute as a distributor process.
+  If specified, ThinLTO backend compilations will be distributed.
+
+- `--thinlto-remote-opt-tool=<path>`
+  Specifies the path to the tool that the distributor process will use for
+  backend compilations.
+
+  The remote optimisation tool invoked must match the version of LLD.
+
+  Currently `Clang` is used on remote machines to perform optimization. The
+  design permits this to be swapped out later without affecting distributors.
+  This may occur in the future, at which point a different set of constraints
+  will apply.
+
+- `-mllvm -thinlto-distributor-arg=<arg>`  
+  Specifies `<arg>` on the command line when invoking the distributor.  
+
+- `-mllvm -thinlto-remote-opt-tool-arg=<arg>`  
+  Specifies `<arg>` on the command line to the remote optimisation tool. These
+  arguments are appended to the end of the command line for the remote 
+  optimisation tool.
+
+Remote optimisation tool options that imply an additional input or output file 
+dependency are unsupported and may result in miscompilation depending on the
+properties of the distribution system (as such additional input/output files may
+not be pushed to or fetched from distribution system nodes correctly). If such 
+options are required, then the distributor can be modified to accept switches 
+that specify additional input/output dependencies, and 
+`-Xdist`/`-thinlto-distributor-arg=` can be used to pass such options through 
+to the distributor.
+
+Some LLD LTO options (e.g., `--lto-sample-profile=<file>`) are supported. 
+Currently, other options are silently accepted but do not have the desired 
+effect. Support for such options will be expanded in the future.
+
+COFF LLD
+--------
+
+The command line interface for COFF LLD is generally the same as for ELF LLD.
+
+Currently, there is no DTLTO command line interface supplied for `Clang-cl`, as
+users are expected to invoke LLD directly.
diff --git a/lld/docs/index.rst b/lld/docs/index.rst
index 8260461c36905..69792e3b575be 100644
--- a/lld/docs/index.rst
+++ b/lld/docs/index.rst
@@ -147,3 +147,4 @@ document soon.
    ELF/start-stop-gc
    ELF/warn_backrefs
    MachO/index
+   DTLTO
diff --git a/lld/test/COFF/dtlto.test b/lld/test/COFF/dtlto.test
new file mode 100644
index 0000000000000..fcaa1eab13c15
--- /dev/null
+++ b/lld/test/COFF/dtlto.test
@@ -0,0 +1,50 @@
+# REQUIRES: x86
+
+## Test that generated JSON file for DTLTO is valid and contains the expected
+## options based on the LTO configuration.
+
+# RUN: rm -rf %t.dir && split-file %s %t.dir && cd %t.dir
+
+## Compile bitcode.
+# RUN: opt -thinlto-bc foo.ll -o foo.obj
+
+## Common command line arguments. Note that the use of validate.py will cause
+## the link to fail.
+# RUN: echo "foo.obj /entry:foo /subsystem:console \
+# RUN:   --thinlto-distributor=%python \
+# RUN:   -mllvm:-thinlto-distributor-arg=%llvm_src_root/utils/dtlto/validate.py \
+# RUN:   --thinlto-remote-opt-tool=my_clang.exe" > l.rsp
+
+## Command line arguments that should affect codegen.
+# RUN: echo "/lto-pgo-warn-mismatch:no \
+# RUN:       /lto-sample-profile:foo.ll \
+# RUN:       -mllvm:-thinlto-distributor-arg=bibbity=10 \
+# RUN:       -mllvm:-thinlto-remote-opt-tool-arg=bobbity=20" > o.rsp
+
+## Show that command line arguments have the desired effect when specified and
+## that the effect is not present otherwise.
+# RUN: not lld-link @l.rsp @o.rsp 2>&1 | FileCheck %s --check-prefixes=ERR,OPT,BOTH
+# RUN: not lld-link @l.rsp        2>&1 | FileCheck %s --check-prefixes=ERR,NONE,BOTH \
+# RUN:   --implicit-check-not=bibbity --implicit-check-not=bobbity \
+# RUN:   --implicit-check-not=-fprofile-instrument --implicit-check-not=foo.ll
+
+# OPT:  distributor_args=['bibbity=10']
+# NONE: distributor_args=[]
+
+# OPT:  "linker_output": "foo.exe"
+# OPT:  "linker_version": "LLD 1.0"
+# BOTH: "my_clang.exe"
+# BOTH:  "-O2"
+# OPT:  "bobbity=20"
+# OPT:  "-fprofile-sample-use=foo.ll"
+
+# ERR: lld-link: error: DTLTO backend compilation: cannot open native object file:
+
+#--- foo.ll
+target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+define void @foo() {
+entry:
+  ret void
+}
diff --git a/lld/test/ELF/dtlto/dtlto.test b/lld/test/ELF/dtlto/dtlto.test
new file mode 100644
index 0000000000000..7be9988d8ea81
--- /dev/null
+++ b/lld/test/ELF/dtlto/dtlto.test
@@ -0,0 +1,53 @@
+# REQUIRES: x86
+
+## Test that generated JSON file for DTLTO is valid and contains the expected
+## options based on the LTO configuration.
+
+# RUN: rm -rf %t.dir && split-file %s %t.dir && cd %t.dir
+
+## Compile bitcode.
+# RUN: opt -thinlto-bc foo.ll -o foo.o
+
+## Common command line arguments. Note that the use of validate.py will cause
+## the link to fail.
+# RUN: echo "foo.o \
+# RUN:       --thinlto-distributor=%python \
+# RUN:       -mllvm -thinlto-distributor-arg=%llvm_src_root/utils/dtlto/validate.py \
+# RUN:       --thinlto-remote-opt-tool=my_clang.exe" > l.rsp
+
+## Command line arguments that should affect codegen.
+# RUN: echo "--lto-O3 \
+# RUN:       --lto-CGO2 \
+# RUN:       --no-lto-pgo-warn-mismatch \
+# RUN:       --lto-sample-profile=foo.ll \
+# RUN:       -mllvm -thinlto-distributor-arg=bibbity=10 \
+# RUN:       -mllvm -thinlto-remote-opt-tool-arg=bobbity=20" > o.rsp
+
+## Show that command line arguments have the desired effect when specified and
+## that the effect is not present otherwise.
+# RUN: not ld.lld @l.rsp @o.rsp 2>&1 | FileCheck %s --check-prefixes=ERR,OPT,BOTH
+# RUN: not ld.lld @l.rsp        2>&1 | FileCheck %s --check-prefixes=ERR,NONE,BOTH \
+# RUN:   --implicit-check-not=bibbity --implicit-check-not=bobbity \
+# RUN:   --implicit-check-not=-fprofile-instrument --implicit-check-not=foo.ll
+
+# OPT:  distributor_args=['bibbity=10']
+# NONE: distributor_args=[]
+
+# OPT:       "linker_output": "a.out"
+# OPT:       "linker_version": "LLD 1.0"
+# BOTH:      "my_clang.exe"
+# OPT:       "-O3"
+# NONE:      "-O2"
+# OPT:       "-fprofile-sample-use=foo.ll"
+# OPT:       "bobbity=20"
+
+# ERR: ld.lld: error: DTLTO backend compilation: cannot open native object file:
+
+#--- foo.ll
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @foo() {
+entry:
+  ret void
+}
diff --git a/lld/test/ELF/dtlto/imports.test b/lld/test/ELF/dtlto/imports.test
new file mode 100644
index 0000000000000..2e096d7b2d93c
--- /dev/null
+++ b/lld/test/ELF/dtlto/imports.test
@@ -0,0 +1,69 @@
+# REQUIRES: x86
+
+## Check that DTLTO handles imports files correctly.
+
+# RUN: rm -rf %t.dir && split-file %s %t.dir && cd %t.dir
+
+## Compile bitcode.
+# RUN: opt -module-summary 0.ll -o 0.o -O2
+# RUN: opt -module-summary 1.ll -o 1.o -O2
+
+## Common command line arguments. Note that the use of validate.py will cause
+## the link to fail.
+# RUN: echo "0.o 1.o \
+# RUN:       --thinlto-distributor=%python \
+# RUN:       -mllvm -thinlto-distributor-arg=%llvm_src_root/utils/dtlto/validate.py \
+# RUN:       --thinlto-remote-opt-tool=dummy.exe" > l.rsp
+
+## We expect an import from 0.o into 1.o but no imports into 0.o. Check that the
+## expected input files have been added to the JSON.
+# RUN: not ld.lld @l.rsp >out.log 2>&1
+# RUN: FileCheck --input-file=out.log %s --check-prefixes=INPUTS,ERR
+
+# INPUTS:      "primary_input": [
+# INPUTS-NEXT:   "0.o"
+# INPUTS-NEXT: ]
+# INPUTS:      "imports": []
+# INPUTS:      "primary_input": [
+# INPUTS-NEXT:   "1.o"
+# INPUTS-NEXT: ]
+# INPUTS:      "imports": [
+# INPUTS-NEXT:   "0.o"
+# INPUTS-NEXT: ]
+
+## This check ensures that we have failed for the expected reason.
+# ERR: ld.lld: error: DTLTO backend compilation: cannot open native object file:
+
+
+## Check that imports files have not been created.
+# RUN: ls | FileCheck %s --check-prefix=NOINDEXFILES
+# NOINDEXFILES-NOT: imports
+
+
+## Check that imports files are created with --thinlto-emit-imports-files.
+# RUN: not ld.lld @l.rsp --thinlto-emit-imports-files 2>&1 \ 
+# RUN:   | FileCheck %s --check-prefixes=ERR
+# RUN: ls | FileCheck %s --check-prefix=INDEXFILES
+# INDEXFILES: 0.o.imports
+# INDEXFILES: 1.o.imports
+
+;--- 0.ll
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @g() {
+entry:
+  ret void
+}
+
+;--- 1.ll
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @g(...)
+
+define void @f() {
+entry:
+  call void (...) @g()
+  ret void
+}
diff --git a/lld/test/ELF/dtlto/relative.test b/lld/test/ELF/dtlto/relative.test
new file mode 100644
index 0000000000000..a938ddea57b87
--- /dev/null
+++ b/lld/test/ELF/dtlto/relative.test
@@ -0,0 +1,65 @@
+# REQUIRES: x86
+
+## Test that DTLTO writes the files it generates to the expected locations.
+
+# RUN: rm -rf %t.dir && split-file %s %t.dir && cd %t.dir
+
+# RUN: mkdir other && cd other
+
+## Compile bitcode.
+# RUN: opt -module-summary ../0.ll -o ../0.o
+# RUN: opt -module-summary ../1.ll -o ../1.o
+
+## Common command line arguments. Note that the use of validate.py will cause
+## the link to fail.
+# RUN: echo "--thinlto-distributor=%python \
+# RUN:   -mllvm -thinlto-distributor-arg=%llvm_src_root/utils/dtlto/validate.py \
+# RUN:   --thinlto-remote-opt-tool=dummy.exe" > l.rsp
+
+## Check that the expected set of filenames have been generated.
+# RUN: not ld.lld @l.rsp ../0.o ../1.o -o ../up.elf --thinlto-emit-index-files \
+# RUN:   --thinlto-emit-imports-files >out.log 2>&1
+# RUN: FileCheck --input-file=out.log %s --check-prefixes=INPUTS,ERR
+
+# INPUTS:      "primary_input": [
+# INPUTS-NEXT:   "../0.o"
+# INPUTS-NEXT: ]
+# INPUTS:      "summary_index": [
+# INPUTS-NEXT:   "..{{(/|\\\\)}}0.{{[0-9]+}}.{{[0-9]+}}.native.o.thinlto.bc"
+# INPUTS-NEXT: ]
+# INPUTS:      "primary_input": [
+# INPUTS-NEXT:   "../1.o"
+# INPUTS-NEXT: ]
+# INPUTS:      "summary_index": [
+# INPUTS-NEXT:   "..{{(/|\\\\)}}1.{{[0-9]+}}.{{[0-9]+}}.native.o.thinlto.bc"
+# INPUTS-NEXT: ]
+
+# ERR: DTLTO backend compilation: cannot open native object file:
+
+## Check that imports and index files are created when requested.
+# RUN: ls .. | FileCheck %s --check-prefix=FILES
+# FILES: 0.{{[0-9]+}}.{{[0-9]+}}.native.o.thinlto.bc
+# FILES: 0.o.imports
+# FILES: 1.{{[0-9]+}}.{{[0-9]+}}.native.o.thinlto.bc
+# FILES: 1.o.imports
+
+;--- 0.ll
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @g() {
+entry:
+  ret void
+}
+
+;--- 1.ll
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @g(...)
+
+define void @f() {
+entry:
+  call void (...) @g()
+  ret void
+}
diff --git a/lld/test/lit.cfg.py b/lld/test/lit.cfg.py
index 9e6b0e839d9a8..10f556567cdc8 100644
--- a/lld/test/lit.cfg.py
+++ b/lld/test/lit.cfg.py
@@ -36,6 +36,7 @@
 
 llvm_config.use_default_substitutions()
 llvm_config.use_lld()
+config.substitutions.append(("%llvm_src_root", config.llvm_src_root))
 
 tool_patterns = [
     "llc",
diff --git a/lld/wasm/LTO.cpp b/lld/wasm/LTO.cpp
index b9bd48acd6dc1..d628f88001d23 100644
--- a/lld/wasm/LTO.cpp
+++ b/lld/wasm/LTO.cpp
@@ -191,12 +191,13 @@ std::vector<StringRef> BitcodeCompiler::compile() {
   // to cache native object files for ThinLTO incremental builds. If a path was
   // specified, configure LTO to use it as the cache directory.
   FileCache cache;
+  AddBufferFn Addbuffer = [&](size_t task, const Twine &moduleName,
+                              std::unique_ptr<MemoryBuffer> mb) {
+    files[task] = std::move(mb);
+  };
   if (!ctx.arg.thinLTOCacheDir.empty())
-    cache = check(localCache("ThinLTO", "Thin", ctx.arg.thinLTOCacheDir,
-                             [&](size_t task, const Twine &moduleName,
-                                 std::unique_ptr<MemoryBuffer> mb) {
-                               files[task] = std::move(mb);
-                             }));
+    cache = check(
+        localCache("ThinLTO", "Thin", ctx.arg.thinLTOCacheDir, Addbuffer));
 
   checkError(ltoObj->run(
       [&](size_t task, const Twine &moduleName) {
@@ -204,7 +205,7 @@ std::vector<StringRef> BitcodeCompiler::compile() {
         return std::make_unique<CachedFileStream>(
             std::make_unique<raw_svector_ostream>(buf[task].second));
       },
-      cache));
+      Addbuffer, cache));
 
   // Emit empty index files for non-indexed files but not in single-module mode.
   for (StringRef s : thinIndices) {
diff --git a/llvm/docs/DTLTO.rst b/llvm/docs/DTLTO.rst
new file mode 100644
index 0000000000000..92bfdcef3ac94
--- /dev/null
+++ b/llvm/docs/DTLTO.rst
@@ -0,0 +1,228 @@
+===================
+DTLTO
+===================
+.. contents::
+   :local:
+   :depth: 2
+
+.. toctree::
+   :maxdepth: 1
+
+Distributed ThinLTO (DTLTO)
+===========================
+
+Distributed ThinLTO (DTLTO) facilitates the distribution of backend ThinLTO
+compilations via external distribution systems such as Incredibuild.
+
+The existing method of distributing ThinLTO compilations via separate thin-link,
+backend compilation, and link steps often requires significant changes to the
+user's build process to adopt, as it requires using a build system which can
+handle the dynamic dependencies specified by the index files, such as Bazel.
+
+DTLTO eliminates this need by managing distribution internally within the LLD
+linker during the traditional link step. This allows DTLTO to be used with any
+build process that supports in-process ThinLTO.
+
+Limitations
+-----------
+
+The current implementation of DTLTO has the following limitations:
+
+- The ThinLTO cache is not supported.
+- Only ELF and COFF platforms are supported.
+- Archives with bitcode members are not supported.
+- Only a very limited set of LTO configurations are currently supported, e.g.,
+  support for basic block sections is not currently available.
+
+Overview of Operation
+---------------------
+
+For each ThinLTO backend compilation job, LLD:
+
+1. Generates the required summary index shard.
+2. Records a list of input and output files.
+3. Constructs a Clang command line to perform the ThinLTO backend compilation.
+
+This information is supplied, via a JSON file, to a distributor program that
+executes the backend compilations using a distribution system. Upon completion,
+LLD integrates the compiled native object files into the link process.
+
+The design keeps the details of distribution systems out of the LLVM source
+code.
+
+Distributors
+------------
+
+Distributors are programs responsible for:
+
+1. Consuming the JSON backend compilations job description file.
+2. Translating job descriptions into requests for the distribution system.
+3. Blocking execution until all backend compilations are complete.
+
+Distributors must return a non-zero exit code on failure. They can be
+implemented as binaries or in scripting languages, such as Python. An example
+script demonstrating basic local execution is available with the LLVM source
+code.
+
+How Distributors Are Invoked
+----------------------------
+
+Clang and LLD provide options to specify a distributor program for managing
+backend compilations. Distributor options and backend compilation options, can
+also be specified. Such options are transparently forwarded.
+
+The backend compilations are currently performed by invoking Clang. For further
+details, refer to:
+
+- Clang documentation: https://clang.llvm.org/docs/ThinLTO.html
+- LLD documentation: https://lld.llvm.org/DTLTO.html
+
+When invoked with a distributor, LLD generates a JSON file describing the
+backend compilation jobs and executes the distributor passing it this file. The
+JSON file provides the following information to the distributor:
+
+- The **command line** to execute the backend compilations.
+   - DTLTO constructs a Clang command line by translating some of the LTO
+     configuration state into Clang options and forwarding options specified
+     by the user.
+
+- **Link output path**.
+   - A string identifying the output to which this LTO invocation will 
+     contribute. Distributors can use this to label build jobs for informational
+     purposes.
+
+- **Linker's version string**.
+   - Distributors can use this to determine if the invoked remote optimisation
+     tool is compatible.
+
+- The list of **imports** required for each job.
+   - The per-job list of bitcode files from which importing will occur. This is
+     the same information that is emitted into import files for ThinLTO.
+
+- The **input files** required for each job.
+   - The per-job set of files required for backend compilation, such as bitcode
+     files, summary index files, and profile data.
+
+- The **output files** generated by each job.
+   - The per-job files generated by the backend compilations, such as compiled
+     object files and toolchain metrics.
+
+Temporary Files
+---------------
+
+During its operation, DTLTO generates temporary files. Temporary files are
+created in the same directory as the linker's output file and their filenames
+include the stem of the bitcode module, or the output file that the LTO 
+invocation is contributing to, to aid the user in identifying them:
+
+- **JSON Job Description File**:
+    - Format:  `dtlto.<UID>.dist-file.json`
+    - Example: `dtlto.77380.dist-file.json` (for output file `dtlto.elf`).
+
+- **Object Files From Backend Compilations**:
+    - Format:  `<Module ID stem>.<Task>.<UID>.native.o`
+    - Example: `my.1.77380.native.o` (for bitcode module `my.o`).
+
+- **Summary Index Shard Files**:
+    - Format:  `<Module ID stem>.<Task>.<UID>.native.o.thinlto.bc`
+    - Example: `my.1.77380.native.o.thinlto.bc` (for bitcode module `my.o`).
+
+Temporary files are removed, by default, after the backend compilations complete.
+
+JSON Schema
+-----------
+
+Below is an example of a JSON job file for backend compilation of the module
+`dtlto.o`:
+
+.. code-block:: json
+
+    {
+        "common": {
+            "linker_output": "dtlto.elf",
+            "linker_version": "LLD 20.0.0",
+            "args": [
+                "/usr/local/clang",
+                "-O3", "-fprofile-sample-use=my.profdata",
+                "-o", ["primary_output", 0],
+                "-c", "-x", "ir", ["primary_input", 0],
+                ["summary_index", "-fthinlto-index=", 0],
+                "-target", "x86_64-sie-ps5"
+            ]
+        },
+        "jobs": [
+            {
+                "primary_input": ["dtlto.o"],
+                "summary_index": ["dtlto.1.51232.native.o.thinlto.bc"],
+                "primary_output": ["dtlto.1.51232.native.o"],
+                "imports": [],
+                "additional_inputs": ["my.profdata"]
+            }
+        ]
+    }
+
+Key Features of the Schema
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+- **Input/Output Paths**: Paths are stored in per-file-type array fields. This
+  allows files to be adjusted, if required, to meet the constraints of the
+  underlying distribution system. For example, a system may only be able to read
+  and write remote files to `C:\\sandbox`. The remote paths used can be adjusted
+  by the distributor for such constraints. Once outputs are back on the local
+  system, the distributor can rename them as required.
+
+
+- **Command-Line Template**: Command-line options are stored in a common
+  template to avoid duplication for each job. The template consists of an array
+  of strings and arrays. The arrays are placeholders which reference per-job
+  paths. This allows the remote optimisation tool to be changed without updating
+  the distributors.
+
+Command-Line Expansion Example
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To create the backend compilation commands, the command-line template is
+expanded for each job. Placeholders are expanded in the following way: The first
+array element specifies the name of the array field to look in. The remaining
+elements are converted to strings and concatenated. Integers are converted by
+indexing into the specified array.
+
+The example above generates the following backend compilation command for
+`main.o`:
+
+.. code-block:: console
+
+    /usr/local/clang -O3 -fprofile-sample-use=my.profdata \
+        -o dtlto.1.51232.native.o -c -x ir dtlto.o \
+        -fthinlto-index=dtlto.1.51232.native.o.thinlto.bc -target x86_64-sie-ps5
+
+This expansion scheme allows the remote optimization tool to be changed without
+updating the distributors. For example, if the "args" field in the above example
+was replaced with:
+
+.. code-block:: json
+
+    "args": [
+        "custom-codgen-tool",
+        "-opt-level=2",
+        "-profile-instrument-use-path=my.profdata",
+        "-output", ["primary_output", 0],
+        "-input", ["primary_input", 0],
+        "-thinlto-index", ["summary_index", 0],
+        "-triple", "x86_64-sie-ps5"
+    ]
+
+Then distributors can expand the command line without needing to be updated:
+
+.. code-block:: console
+
+    custom-codgen-tool -opt-level=2 -profile-instrument-use-path=my.profdata \
+        -output dtlto.1.51232.native.o -input dtlto.o \
+        -thinlto-index dtlto.1.51232.native.o.thinlto.bc -triple x86_64-sie-ps5
+
+Constraints
+-----------
+
+- Matching versions of Clang and LLD should be used.
+- The distributor used must support the JSON schema generated by the version of
+  LLD in use.
\ No newline at end of file
diff --git a/llvm/docs/UserGuides.rst b/llvm/docs/UserGuides.rst
index 6eee564713d6d..3e16fe42b7d11 100644
--- a/llvm/docs/UserGuides.rst
+++ b/llvm/docs/UserGuides.rst
@@ -32,6 +32,7 @@ intermediate LLVM representation.
    DebuggingJITedCode
    DirectXUsage
    Docker
+   DTLTO
    FatLTO
    ExtendingLLVM
    GitHub
@@ -164,6 +165,11 @@ Optimizations
    This document describes the interface between LLVM intermodular optimizer
    and the linker and its design
 
+:doc:`DTLTO`
+   This document describes the DTLTO implementation, which allows for
+   distributing ThinLTO backend compilations without requiring support from
+   the build system.
+
 :doc:`GoldPlugin`
    How to build your programs with link-time optimization on Linux.
 
diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h
index 242a05f7d32c0..594fb50da4939 100644
--- a/llvm/include/llvm/LTO/LTO.h
+++ b/llvm/include/llvm/LTO/LTO.h
@@ -199,6 +199,8 @@ class InputFile {
 
 using IndexWriteCallback = std::function<void(const std::string &)>;
 
+using ImportsFilesContainer = llvm::SmallVector<std::string>;
+
 /// This class defines the interface to the ThinLTO backend.
 class ThinBackendProc {
 protected:
@@ -223,13 +225,15 @@ class ThinBackendProc {
         BackendThreadPool(ThinLTOParallelism) {}
 
   virtual ~ThinBackendProc() = default;
+  virtual void setup(unsigned MaxTasks) {}
   virtual Error start(
       unsigned Task, BitcodeModule BM,
       const FunctionImporter::ImportMapTy &ImportList,
       const FunctionImporter::ExportSetTy &ExportList,
       const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
-      MapVector<StringRef, BitcodeModule> &ModuleMap) = 0;
-  Error wait() {
+      MapVector<StringRef, BitcodeModule> &ModuleMap,
+      DenseMap<StringRef, std::string> &ModuleTriples) = 0;
+  virtual Error wait() {
     BackendThreadPool.wait();
     if (Err)
       return std::move(*Err);
@@ -240,8 +244,15 @@ class ThinBackendProc {
 
   // Write sharded indices and (optionally) imports to disk
   Error emitFiles(const FunctionImporter::ImportMapTy &ImportList,
-                  llvm::StringRef ModulePath,
-                  const std::string &NewModulePath) const;
+                  StringRef ModulePath, const std::string &NewModulePath) const;
+
+  // Write sharded indices to SummaryPath, (optionally) imports
+  // IndexPath, and (optionally) record imports in ImportsFiles.
+  Error emitFiles(const FunctionImporter::ImportMapTy &ImportList,
+                  StringRef ModulePath, StringRef SummaryPath,
+                  const std::string &NewModulePath,
+                  std::optional<std::reference_wrapper<ImportsFilesContainer>>
+                      ImportsFiles) const;
 };
 
 /// This callable defines the behavior of a ThinLTO backend after the thin-link
@@ -253,7 +264,7 @@ class ThinBackendProc {
 using ThinBackendFunction = std::function<std::unique_ptr<ThinBackendProc>(
     const Config &C, ModuleSummaryIndex &CombinedIndex,
     const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
-    AddStreamFn AddStream, FileCache Cache)>;
+    AddStreamFn AddStream, AddBufferFn AddBuffer, FileCache Cache)>;
 
 /// This type defines the behavior following the thin-link phase during ThinLTO.
 /// It encapsulates a backend function and a strategy for thread pool
@@ -268,10 +279,10 @@ struct ThinBackend {
   std::unique_ptr<ThinBackendProc> operator()(
       const Config &Conf, ModuleSummaryIndex &CombinedIndex,
       const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
-      AddStreamFn AddStream, FileCache Cache) {
+      AddStreamFn AddStream, AddBufferFn AddBuffer, FileCache Cache) {
     assert(isValid() && "Invalid backend function");
     return Func(Conf, CombinedIndex, ModuleToDefinedGVSummaries,
-                std::move(AddStream), std::move(Cache));
+                std::move(AddStream), std::move(AddBuffer), std::move(Cache));
   }
   ThreadPoolStrategy getParallelism() const { return Parallelism; }
   bool isValid() const { return static_cast<bool>(Func); }
@@ -294,6 +305,32 @@ ThinBackend createInProcessThinBackend(ThreadPoolStrategy Parallelism,
                                        bool ShouldEmitIndexFiles = false,
                                        bool ShouldEmitImportsFiles = false);
 
+/// This ThinBackend generates the index shards and then runs the individual
+/// backend jobs via an external process. It takes the same parameters as the
+/// InProcessThinBackend, however, these parameters only control the behavior
+/// when generating the index files for the modules. Addtionally:
+/// LinkerOutputFile is a string that should identify this LTO invocation in
+/// the context of a wider build. It's used for naming to aid the user in
+/// identifying activity related to a specific LTO invocation.
+/// LinkerVersion is the LLVM version of the tool invoking this backend. This
+/// may be used to check compatibility with external components invoked via this
+/// backend.
+/// RemoteOptTool specifies the path to a Clang executable to be invoked for the
+/// backend jobs.
+/// Distributor specifies the path to a process to invoke to manage the backend
+/// jobs execution.
+/// SaveTemps is a debugging tool that prevents temporary files created by this
+/// backend from being cleaned up.
+ThinBackend createOutOfProcessThinBackend(ThreadPoolStrategy Parallelism,
+                                          IndexWriteCallback OnWrite,
+                                          bool ShouldEmitIndexFiles,
+                                          bool ShouldEmitImportsFiles,
+                                          StringRef LinkerOutputFile,
+                                          StringRef LinkerVersion,
+                                          StringRef RemoteOptTool,
+                                          StringRef Distributor,
+                                          bool SaveTemps);
+
 /// This ThinBackend writes individual module indexes to files, instead of
 /// running the individual backend jobs. This backend is for distributed builds
 /// where separate processes will invoke the real backends.
@@ -369,15 +406,17 @@ class LTO {
   /// full description of tasks see LTOBackend.h.
   unsigned getMaxTasks() const;
 
-  /// Runs the LTO pipeline. This function calls the supplied AddStream
-  /// function to add native object files to the link.
+  /// Runs the LTO pipeline. This function calls the supplied AddStream or
+  /// AddBuffer function to add native object files to the link depending on
+  /// whether the files are streamed into memory or written to disk by the
+  /// backend.
   ///
   /// The Cache parameter is optional. If supplied, it will be used to cache
   /// native object files and add them to the link.
   ///
-  /// The client will receive at most one callback (via either AddStream or
+  /// The client will receive at most one callback (via AddStream, AddBuffer or
   /// Cache) for each task identifier.
-  Error run(AddStreamFn AddStream, FileCache Cache = {});
+  Error run(AddStreamFn AddStream, AddBufferFn AddBuffer, FileCache Cache = {});
 
   /// Static method that returns a list of libcall symbols that can be generated
   /// by LTO but might not be visible from bitcode symbol table.
@@ -426,6 +465,7 @@ class LTO {
     // The bitcode modules to compile, if specified by the LTO Config.
     std::optional<ModuleMapType> ModulesToCompile;
     DenseMap<GlobalValue::GUID, StringRef> PrevailingModuleForGUID;
+    DenseMap<StringRef, std::string> ModuleTriples;
   } ThinLTO;
 
   // The global resolution for a particular (mangled) symbol name. This is in
@@ -517,10 +557,12 @@ class LTO {
                        bool LivenessFromIndex);
 
   Error addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
-                   const SymbolResolution *&ResI, const SymbolResolution *ResE);
+                   const SymbolResolution *&ResI, const SymbolResolution *ResE,
+                   StringRef Triple);
 
   Error runRegularLTO(AddStreamFn AddStream);
-  Error runThinLTO(AddStreamFn AddStream, FileCache Cache,
+  Error runThinLTO(AddStreamFn AddStream, AddBufferFn AddBuffer,
+                   FileCache Cache,
                    const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols);
 
   Error checkPartiallySplit();
diff --git a/llvm/include/llvm/Support/Caching.h b/llvm/include/llvm/Support/Caching.h
index cf45145619d95..8c3ea4f205d4c 100644
--- a/llvm/include/llvm/Support/Caching.h
+++ b/llvm/include/llvm/Support/Caching.h
@@ -84,7 +84,8 @@ struct FileCache {
   std::string CacheDirectoryPath;
 };
 
-/// This type defines the callback to add a pre-existing file (e.g. in a cache).
+/// This type defines the callback to add a pre-existing file (e.g. in a cache
+/// or created by a backend compilation run as a separate process).
 ///
 /// Buffer callbacks must be thread safe.
 using AddBufferFn = std::function<void(unsigned Task, const Twine &ModuleName,
diff --git a/llvm/include/llvm/Transforms/IPO/FunctionImport.h b/llvm/include/llvm/Transforms/IPO/FunctionImport.h
index 3623f9194d4d1..5e4116834b7f2 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionImport.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionImport.h
@@ -421,6 +421,12 @@ Error EmitImportsFiles(
     StringRef ModulePath, StringRef OutputFilename,
     const ModuleToSummariesForIndexTy &ModuleToSummariesForIndex);
 
+/// Call \p F passing each of the files module \p ModulePath will import from.
+void processImportsFiles(
+    StringRef ModulePath,
+    const ModuleToSummariesForIndexTy &ModuleToSummariesForIndex,
+    function_ref<void(const std::string &)> F);
+
 /// Based on the information recorded in the summaries during global
 /// summary-based analysis:
 /// 1. Resolve prevailing symbol linkages and constrain visibility (CanAutoHide
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index 0f53c60851217..8cfefad45c4ee 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -41,8 +41,11 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/FileSystem.h"
+#include "llvm/Support/FileUtilities.h"
+#include "llvm/Support/JSON.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
 #include "llvm/Support/SHA1.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/ThreadPool.h"
@@ -91,6 +94,15 @@ extern cl::opt<bool> SupportsHotColdNew;
 
 /// Enable MemProf context disambiguation for thin link.
 extern cl::opt<bool> EnableMemProfContextDisambiguation;
+
+cl::list<std::string> AdditionalThinLTODistributorArgs(
+    "thinlto-distributor-arg",
+    cl::desc("Additional arguments to pass to the ThinLTO distributor"));
+
+cl::list<std::string>
+    ThinLTORemoteOptToolArgs("thinlto-remote-opt-tool-arg",
+                             cl::desc("Additional arguments to pass to the "
+                                      "ThinLTO remote optimization tool"));
 } // namespace llvm
 
 // Computes a unique hash for the Module considering the current list of
@@ -783,7 +795,7 @@ Error LTO::addModule(InputFile &Input, unsigned ModI,
                        LTOInfo->HasSummary);
 
   if (IsThinLTO)
-    return addThinLTO(BM, ModSyms, ResI, ResE);
+    return addThinLTO(BM, ModSyms, ResI, ResE, Input.getTargetTriple());
 
   RegularLTO.EmptyCombinedModule = false;
   Expected<RegularLTOState::AddedModule> ModOrErr =
@@ -1030,7 +1042,7 @@ Error LTO::linkRegularLTO(RegularLTOState::AddedModule Mod,
 // Add a ThinLTO module to the link.
 Error LTO::addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
                       const SymbolResolution *&ResI,
-                      const SymbolResolution *ResE) {
+                      const SymbolResolution *ResE, StringRef Triple) {
   const SymbolResolution *ResITmp = ResI;
   for (const InputFile::Symbol &Sym : Syms) {
     assert(ResITmp != ResE);
@@ -1090,6 +1102,8 @@ Error LTO::addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
         "Expected at most one ThinLTO module per bitcode file",
         inconvertibleErrorCode());
 
+  ThinLTO.ModuleTriples.insert({BM.getModuleIdentifier(), Triple.str()});
+
   if (!Conf.ThinLTOModulesToCompile.empty()) {
     if (!ThinLTO.ModulesToCompile)
       ThinLTO.ModulesToCompile = ModuleMapType();
@@ -1158,7 +1172,7 @@ Error LTO::checkPartiallySplit() {
   return Error::success();
 }
 
-Error LTO::run(AddStreamFn AddStream, FileCache Cache) {
+Error LTO::run(AddStreamFn AddStream, AddBufferFn AddBuffer, FileCache Cache) {
   // Compute "dead" symbols, we don't want to import/export these!
   DenseSet<GlobalValue::GUID> GUIDPreservedSymbols;
   DenseMap<GlobalValue::GUID, PrevailingType> GUIDPrevailingResolutions;
@@ -1208,7 +1222,7 @@ Error LTO::run(AddStreamFn AddStream, FileCache Cache) {
   if (!Result)
     // This will reset the GlobalResolutions optional once done with it to
     // reduce peak memory before importing.
-    Result = runThinLTO(AddStream, Cache, GUIDPreservedSymbols);
+    Result = runThinLTO(AddStream, AddBuffer, Cache, GUIDPreservedSymbols);
 
   if (StatsFile)
     PrintStatisticsJSON(StatsFile->os());
@@ -1390,6 +1404,16 @@ SmallVector<const char *> LTO::getRuntimeLibcallSymbols(const Triple &TT) {
 Error ThinBackendProc::emitFiles(
     const FunctionImporter::ImportMapTy &ImportList, llvm::StringRef ModulePath,
     const std::string &NewModulePath) const {
+  return emitFiles(ImportList, ModulePath, NewModulePath + ".thinlto.bc",
+                   NewModulePath,
+                   /*ImportsFiles=*/std::nullopt);
+}
+
+Error ThinBackendProc::emitFiles(
+    const FunctionImporter::ImportMapTy &ImportList, llvm::StringRef ModulePath,
+    StringRef SummaryPath, const std::string &NewModulePath,
+    std::optional<std::reference_wrapper<ImportsFilesContainer>> ImportsFiles)
+    const {
   ModuleToSummariesForIndexTy ModuleToSummariesForIndex;
   GVSummaryPtrSet DeclarationSummaries;
 
@@ -1398,10 +1422,9 @@ Error ThinBackendProc::emitFiles(
                                    ImportList, ModuleToSummariesForIndex,
                                    DeclarationSummaries);
 
-  raw_fd_ostream OS(NewModulePath + ".thinlto.bc", EC,
-                    sys::fs::OpenFlags::OF_None);
+  raw_fd_ostream OS(SummaryPath, EC, sys::fs::OpenFlags::OF_None);
   if (EC)
-    return createFileError("cannot open " + NewModulePath + ".thinlto.bc", EC);
+    return createFileError("cannot open " + Twine(SummaryPath), EC);
 
   writeIndexToFile(CombinedIndex, OS, &ModuleToSummariesForIndex,
                    &DeclarationSummaries);
@@ -1412,29 +1435,31 @@ Error ThinBackendProc::emitFiles(
     if (ImportFilesError)
       return ImportFilesError;
   }
+
+  // Optionally, store the imports files.
+  if (ImportsFiles)
+    processImportsFiles(
+        ModulePath, ModuleToSummariesForIndex,
+        [&](StringRef M) { ImportsFiles->get().push_back(M.str()); });
+
   return Error::success();
 }
 
 namespace {
-class InProcessThinBackend : public ThinBackendProc {
+class CGThinBackend : public ThinBackendProc {
 protected:
-  AddStreamFn AddStream;
-  FileCache Cache;
   DenseSet<GlobalValue::GUID> CfiFunctionDefs;
   DenseSet<GlobalValue::GUID> CfiFunctionDecls;
-
   bool ShouldEmitIndexFiles;
 
 public:
-  InProcessThinBackend(
+  CGThinBackend(
       const Config &Conf, ModuleSummaryIndex &CombinedIndex,
-      ThreadPoolStrategy ThinLTOParallelism,
       const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
-      AddStreamFn AddStream, FileCache Cache, lto::IndexWriteCallback OnWrite,
-      bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles)
+      lto::IndexWriteCallback OnWrite, bool ShouldEmitIndexFiles,
+      bool ShouldEmitImportsFiles, ThreadPoolStrategy ThinLTOParallelism)
       : ThinBackendProc(Conf, CombinedIndex, ModuleToDefinedGVSummaries,
                         OnWrite, ShouldEmitImportsFiles, ThinLTOParallelism),
-        AddStream(std::move(AddStream)), Cache(std::move(Cache)),
         ShouldEmitIndexFiles(ShouldEmitIndexFiles) {
     for (auto &Name : CombinedIndex.cfiFunctionDefs())
       CfiFunctionDefs.insert(
@@ -1443,6 +1468,24 @@ class InProcessThinBackend : public ThinBackendProc {
       CfiFunctionDecls.insert(
           GlobalValue::getGUID(GlobalValue::dropLLVMManglingEscape(Name)));
   }
+};
+
+class InProcessThinBackend : public CGThinBackend {
+protected:
+  AddStreamFn AddStream;
+  FileCache Cache;
+
+public:
+  InProcessThinBackend(
+      const Config &Conf, ModuleSummaryIndex &CombinedIndex,
+      ThreadPoolStrategy ThinLTOParallelism,
+      const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
+      AddStreamFn AddStream, FileCache Cache, lto::IndexWriteCallback OnWrite,
+      bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles)
+      : CGThinBackend(Conf, CombinedIndex, ModuleToDefinedGVSummaries, OnWrite,
+                      ShouldEmitIndexFiles, ShouldEmitImportsFiles,
+                      ThinLTOParallelism),
+        AddStream(std::move(AddStream)), Cache(std::move(Cache)) {}
 
   virtual Error runThinLTOBackendThread(
       AddStreamFn AddStream, FileCache Cache, unsigned Task, BitcodeModule BM,
@@ -1496,7 +1539,8 @@ class InProcessThinBackend : public ThinBackendProc {
       const FunctionImporter::ImportMapTy &ImportList,
       const FunctionImporter::ExportSetTy &ExportList,
       const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
-      MapVector<StringRef, BitcodeModule> &ModuleMap) override {
+      MapVector<StringRef, BitcodeModule> &ModuleMap,
+      DenseMap<StringRef, std::string> & /*ModuleTriples*/) override {
     StringRef ModulePath = BM.getModuleIdentifier();
     assert(ModuleToDefinedGVSummaries.count(ModulePath));
     const GVSummaryMapTy &DefinedGlobals =
@@ -1709,7 +1753,7 @@ ThinBackend lto::createInProcessThinBackend(ThreadPoolStrategy Parallelism,
   auto Func =
       [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex,
           const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
-          AddStreamFn AddStream, FileCache Cache) {
+          AddStreamFn AddStream, AddBufferFn /*AddBuffer*/, FileCache Cache) {
         return std::make_unique<InProcessThinBackend>(
             Conf, CombinedIndex, Parallelism, ModuleToDefinedGVSummaries,
             AddStream, Cache, OnWrite, ShouldEmitIndexFiles,
@@ -1776,7 +1820,8 @@ class WriteIndexesThinBackend : public ThinBackendProc {
       const FunctionImporter::ImportMapTy &ImportList,
       const FunctionImporter::ExportSetTy &ExportList,
       const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
-      MapVector<StringRef, BitcodeModule> &ModuleMap) override {
+      MapVector<StringRef, BitcodeModule> &ModuleMap,
+      DenseMap<StringRef, std::string> & /*ModuleTriples*/) override {
     StringRef ModulePath = BM.getModuleIdentifier();
 
     // The contents of this file may be used as input to a native link, and must
@@ -1830,7 +1875,7 @@ ThinBackend lto::createWriteIndexesThinBackend(
   auto Func =
       [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex,
           const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
-          AddStreamFn AddStream, FileCache Cache) {
+          AddStreamFn AddStream, AddBufferFn AddBuffer, FileCache Cache) {
         return std::make_unique<WriteIndexesThinBackend>(
             Conf, CombinedIndex, Parallelism, ModuleToDefinedGVSummaries,
             OldPrefix, NewPrefix, NativeObjectPrefix, ShouldEmitImportsFiles,
@@ -1839,7 +1884,8 @@ ThinBackend lto::createWriteIndexesThinBackend(
   return ThinBackend(Func, Parallelism);
 }
 
-Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
+Error LTO::runThinLTO(AddStreamFn AddStream, AddBufferFn AddBuffer,
+                      FileCache Cache,
                       const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) {
   LLVM_DEBUG(dbgs() << "Running ThinLTO\n");
   ThinLTO.CombinedIndex.releaseTemporaryMemory();
@@ -2013,9 +2059,11 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
       return BackendProcess->start(
           RegularLTO.ParallelCodeGenParallelismLevel + I, Mod.second,
           ImportLists[Mod.first], ExportLists[Mod.first],
-          ResolvedODR[Mod.first], ThinLTO.ModuleMap);
+          ResolvedODR[Mod.first], ThinLTO.ModuleMap, ThinLTO.ModuleTriples);
     };
 
+    BackendProcess->setup(ModuleMap.size());
+
     if (BackendProcess->getThreadCount() == 1 ||
         BackendProcess->isSensitiveToInputOrder()) {
       // Process the modules in the order they were provided on the
@@ -2045,7 +2093,7 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
   if (!CodeGenDataThinLTOTwoRounds) {
     std::unique_ptr<ThinBackendProc> BackendProc =
         ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries,
-                        AddStream, Cache);
+                        AddStream, AddBuffer, Cache);
     return RunBackends(BackendProc.get());
   }
 
@@ -2142,3 +2190,327 @@ std::vector<int> lto::generateModulesOrdering(ArrayRef<BitcodeModule *> R) {
   });
   return ModulesOrdering;
 }
+
+namespace {
+// For this out-of-process backend no codegen is done when invoked for each
+// task. Instead we generate the required information (e.g. the summary index
+// shard,import list, etc..) to allow for the codegen to be performed
+// externally . This backend's `wait` function then invokes an external
+// distributor process to do backend compilations.
+class OutOfProcessThinBackend : public CGThinBackend {
+  using SString = SmallString<128>;
+
+  AddBufferFn AddBuffer;
+
+  BumpPtrAllocator Alloc;
+  StringSaver Saver{Alloc};
+
+  SString LinkerOutputFile;
+  StringRef LinkerVersion;
+  SString RemoteOptTool;
+  SString DistributorPath;
+  bool SaveTemps;
+
+  SmallVector<StringRef, 0> CodegenOptions;
+  DenseSet<StringRef> AdditionalInputs;
+
+  // Information specific to individual backend compilation job.
+  struct Job {
+    unsigned Task;
+    StringRef ModuleID;
+    StringRef Triple;
+    StringRef NativeObjectPath;
+    StringRef SummaryIndexPath;
+    ImportsFilesContainer ImportFiles;
+  };
+  // The set of backend compilations jobs.
+  SmallVector<Job> Jobs;
+
+  // A unique string to identify the current link.
+  SmallString<8> UID;
+
+public:
+  OutOfProcessThinBackend(
+      const Config &Conf, ModuleSummaryIndex &CombinedIndex,
+      ThreadPoolStrategy ThinLTOParallelism,
+      const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
+      AddStreamFn AddStream, AddBufferFn AddBuffer,
+      lto::IndexWriteCallback OnWrite, bool ShouldEmitIndexFiles,
+      bool ShouldEmitImportsFiles, StringRef LinkerOutputFile,
+      StringRef LinkerVersion, StringRef RemoteOptTool, StringRef Distributor,
+      bool SaveTemps)
+      : CGThinBackend(Conf, CombinedIndex, ModuleToDefinedGVSummaries, OnWrite,
+                      ShouldEmitIndexFiles, ShouldEmitImportsFiles,
+                      ThinLTOParallelism),
+        AddBuffer(std::move(AddBuffer)), LinkerOutputFile(LinkerOutputFile),
+        LinkerVersion(LinkerVersion), RemoteOptTool(RemoteOptTool),
+        DistributorPath(Distributor), SaveTemps(SaveTemps) {}
+
+  virtual void setup(unsigned MaxTasks) override {
+    UID = itostr(sys::Process::getProcessId());
+    Jobs.resize((size_t)MaxTasks);
+  }
+
+  Error start(
+      unsigned Task, BitcodeModule BM,
+      const FunctionImporter::ImportMapTy &ImportList,
+      const FunctionImporter::ExportSetTy &ExportList,
+      const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
+      MapVector<StringRef, BitcodeModule> &ModuleMap,
+      DenseMap<StringRef, std::string> &ModuleTriples) override {
+
+    StringRef ModulePath = BM.getModuleIdentifier();
+
+    SString ObjFilePath = sys::path::parent_path(LinkerOutputFile);
+    sys::path::append(ObjFilePath, sys::path::stem(ModulePath) + "." +
+                                       itostr(Task) + "." + UID + ".native.o");
+
+    Job &J = Jobs[Task - 1]; /*Task 0 is reserved*/
+    J = {Task,
+         ModulePath,
+         ModuleTriples[ModulePath],
+         Saver.save(ObjFilePath.str()),
+         Saver.save(ObjFilePath.str() + ".thinlto.bc"),
+         {}};
+
+    assert(ModuleToDefinedGVSummaries.count(ModulePath));
+    BackendThreadPool.async(
+        [=](Job &J, const FunctionImporter::ImportMapTy &ImportList) {
+          if (LLVM_ENABLE_THREADS && Conf.TimeTraceEnabled)
+            timeTraceProfilerInitialize(Conf.TimeTraceGranularity,
+                                        "thin backend");
+          if (auto E = emitFiles(ImportList, J.ModuleID, J.SummaryIndexPath,
+                                 J.ModuleID.str(), J.ImportFiles)) {
+            std::unique_lock<std::mutex> L(ErrMu);
+            if (Err)
+              Err = joinErrors(std::move(*Err), std::move(E));
+            else
+              Err = std::move(E);
+          }
+          if (LLVM_ENABLE_THREADS && Conf.TimeTraceEnabled)
+            timeTraceProfilerFinishThread();
+        },
+        std::ref(J), std::ref(ImportList));
+
+    return Error::success();
+  }
+
+  // Derive a set of Clang options that will be shared/common for all DTLTO
+  // backend compilations. We are intentionally minimal here as these options
+  // must remain synchronized with the behavior of Clang. DTLTO does not support
+  // all the features available with in-process LTO. More features are expected
+  // to be added over time. Users can specify Clang options directly if a
+  // feature is not supported. Note that explicitly specified options that imply
+  // additional input or output file dependencies must be communicated to the
+  // distribution system, potentially by setting extra options on the
+  // distributor program.
+  // TODO: If this strategy of deriving options proves insufficient, alternative
+  // approaches should be considered, such as:
+  //   - A serialization/deserialization format for LTO configuration.
+  //   - Modifying LLD to be the tool that performs the backend compilations.
+  void buildCommonRemoteOptToolOptions() {
+    const lto::Config &C = Conf;
+    auto &Ops = CodegenOptions;
+    llvm::Triple TT{Jobs.front().Triple};
+
+    Ops.push_back(Saver.save("-O" + Twine(C.OptLevel)));
+
+    if (C.Options.EmitAddrsig)
+      Ops.push_back("-faddrsig");
+    if (C.Options.FunctionSections)
+      Ops.push_back("-ffunction-sections");
+    if (C.Options.DataSections)
+      Ops.push_back("-fdata-sections");
+
+    if (C.RelocModel == Reloc::PIC_)
+      // Clang doesn't have -fpic for all triples.
+      if (!TT.isOSBinFormatCOFF())
+        Ops.push_back("-fpic");
+
+    // Turn on/off warnings about profile cfg mismatch (default on)
+    // --lto-pgo-warn-mismatch.
+    if (!C.PGOWarnMismatch) {
+      Ops.push_back("-mllvm");
+      Ops.push_back("-no-pgo-warn-mismatch");
+    }
+
+    // Enable sample-based profile guided optimizations.
+    // Sample profile file path --lto-sample-profile=<value>.
+    if (!C.SampleProfile.empty()) {
+      Ops.push_back(
+          Saver.save("-fprofile-sample-use=" + Twine(C.SampleProfile)));
+      AdditionalInputs.insert(C.SampleProfile);
+    }
+
+    // Forward any supplied options.
+    if (!ThinLTORemoteOptToolArgs.empty())
+      for (auto &a : ThinLTORemoteOptToolArgs)
+        Ops.push_back(a);
+
+    // We don't know which of those options will be used by Clang.
+    Ops.push_back("-Wno-unused-command-line-argument");
+  }
+
+  // Generates a JSON file describing the backend compilations, for the
+  // distributor.
+  bool emitDistributorJson(StringRef DistributorJson) {
+    using json::Array;
+    std::error_code EC;
+    raw_fd_ostream OS(DistributorJson, EC);
+    if (EC)
+      return false;
+
+    json::OStream JOS(OS);
+    JOS.object([&]() {
+      // Information common to all jobs note that we use a custom syntax for
+      // referencing by index into the job input and output file arrays.
+      JOS.attributeObject("common", [&]() {
+        JOS.attribute("linker_output", LinkerOutputFile);
+        JOS.attribute("linker_version", LinkerVersion);
+
+        // Common command line template.
+        JOS.attributeArray("args", [&]() {
+          JOS.value(RemoteOptTool);
+          for (const auto &A : CodegenOptions)
+            JOS.value(A);
+
+          // Reference to Job::NativeObjectPath.
+          JOS.value("-o");
+          JOS.value(Array{"primary_output", 0});
+
+          JOS.value("-c");
+
+          JOS.value("-x");
+          JOS.value("ir");
+
+          // Reference to Job::ModuleID.
+          JOS.value(Array{"primary_input", 0});
+
+          // Reference to Job::SummaryIndexPath.
+          JOS.value(Array{"summary_index", "-fthinlto-index=", 0});
+          JOS.value("-target");
+          JOS.value(Jobs.front().Triple);
+        });
+      });
+      JOS.attributeArray("jobs", [&]() {
+        for (const auto &J : Jobs) {
+          assert(J.Task != 0);
+          JOS.object([&]() {
+            JOS.attribute("primary_input", Array{J.ModuleID});
+            JOS.attribute("summary_index", Array{J.SummaryIndexPath});
+            JOS.attribute("primary_output", Array{J.NativeObjectPath});
+
+            // Add the bitcode files from which imports will be made. These do
+            // not appear on the command line but are recorded in the summary
+            // index shard.
+            JOS.attribute("imports", Array(J.ImportFiles));
+
+            // Add any input files that are common to each invocation. These
+            // filenames are duplicated in the command line template and in
+            // each of the per job "inputs" array. However, this small amount
+            // of duplication makes the schema simpler.
+            JOS.attribute("additional_inputs", Array(AdditionalInputs));
+          });
+        }
+      });
+    });
+
+    return true;
+  }
+
+  void removeFile(StringRef FileName) {
+    std::error_code EC = sys::fs::remove(FileName, true);
+    if (EC && EC != std::make_error_code(std::errc::no_such_file_or_directory))
+      errs() << "warning: could not remove the file '" << FileName
+             << "': " << EC.message() << "\n";
+  }
+
+  Error wait() override {
+    auto CleanPerJobFiles = llvm::make_scope_exit([&] {
+      if (!SaveTemps)
+        for (auto &Job : Jobs) {
+          removeFile(Job.NativeObjectPath);
+          if (!ShouldEmitIndexFiles)
+            removeFile(Job.SummaryIndexPath);
+        }
+    });
+
+    const StringRef BCError = "DTLTO backend compilation: ";
+
+    // TODO: If we move to using an optimisation tool that does not require an
+    // explicit triple to be passed then the triple handling can be removed
+    // entirely.
+    if (!llvm::all_of(Jobs, [&](const auto &Job) {
+          return Job.Triple == Jobs.front().Triple;
+        }))
+      return make_error<StringError>(BCError + "all triples must be consistent",
+                                     inconvertibleErrorCode());
+
+    buildCommonRemoteOptToolOptions();
+
+    // Wait for the information on the required backend compilations to be
+    // gathered.
+    BackendThreadPool.wait();
+    if (Err)
+      return std::move(*Err);
+
+    SString JsonFile = sys::path::parent_path(LinkerOutputFile);
+    sys::path::append(JsonFile, sys::path::stem(LinkerOutputFile) + "." + UID +
+                                    ".dist-file.json");
+    if (!emitDistributorJson(JsonFile))
+      return make_error<StringError>(
+          BCError + "failed to generate distributor JSON script: " + JsonFile,
+          inconvertibleErrorCode());
+    auto CleanJson = llvm::make_scope_exit([&] {
+      if (!SaveTemps)
+        removeFile(JsonFile);
+    });
+
+    SmallVector<StringRef, 3> Args = {DistributorPath};
+    llvm::append_range(Args, AdditionalThinLTODistributorArgs);
+    Args.push_back(JsonFile);
+    std::string ErrMsg;
+    if (sys::ExecuteAndWait(Args[0], Args,
+                            /*Env=*/std::nullopt, /*Redirects=*/{},
+                            /*SecondsToWait=*/0, /*MemoryLimit=*/0, &ErrMsg)) {
+      return make_error<StringError>(
+          BCError + "distributor execution failed" +
+              (!ErrMsg.empty() ? ": " + ErrMsg + Twine(".") : Twine(".")),
+          inconvertibleErrorCode());
+    }
+
+    for (auto &Job : Jobs) {
+      // Load the native object from a file into a memory buffer
+      // and store its contents in the output buffer.
+      ErrorOr<std::unique_ptr<MemoryBuffer>> objFileMbOrErr =
+          MemoryBuffer::getFile(Job.NativeObjectPath, false, false);
+      if (std::error_code ec = objFileMbOrErr.getError())
+        return make_error<StringError>(
+            BCError + "cannot open native object file: " +
+                Job.NativeObjectPath + ": " + ec.message(),
+            inconvertibleErrorCode());
+      AddBuffer(Job.Task, Job.ModuleID, std::move(objFileMbOrErr.get()));
+    }
+
+    return Error::success();
+  }
+};
+} // end anonymous namespace
+
+ThinBackend lto::createOutOfProcessThinBackend(
+    ThreadPoolStrategy Parallelism, lto::IndexWriteCallback OnWrite,
+    bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles,
+    StringRef LinkerOutputFile, StringRef LinkerVersion,
+    StringRef RemoteOptTool, StringRef Distributor, bool SaveTemps) {
+  auto Func =
+      [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex,
+          const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
+          AddStreamFn AddStream, AddBufferFn AddBuffer, FileCache /*Cache*/) {
+        return std::make_unique<OutOfProcessThinBackend>(
+            Conf, CombinedIndex, Parallelism, ModuleToDefinedGVSummaries,
+            AddStream, AddBuffer, OnWrite, ShouldEmitIndexFiles,
+            ShouldEmitImportsFiles, LinkerOutputFile, LinkerVersion,
+            RemoteOptTool, Distributor, SaveTemps);
+      };
+  return ThinBackend(Func, Parallelism);
+}
diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp
index c3d0a1a3a046e..cdcf918d3fae8 100644
--- a/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -1568,13 +1568,23 @@ Error llvm::EmitImportsFiles(
   if (EC)
     return createFileError("cannot open " + OutputFilename,
                            errorCodeToError(EC));
+  processImportsFiles(ModulePath, ModuleToSummariesForIndex,
+                      [&](StringRef M) { ImportsOS << M << "\n"; });
+  return Error::success();
+}
+
+/// Invoke callback \p F on the file paths from which \p ModulePath
+/// will import.
+void llvm::processImportsFiles(
+    StringRef ModulePath,
+    const ModuleToSummariesForIndexTy &ModuleToSummariesForIndex,
+    function_ref<void(const std::string &)> F) {
   for (const auto &ILI : ModuleToSummariesForIndex)
     // The ModuleToSummariesForIndex map includes an entry for the current
     // Module (needed for writing out the index files). We don't want to
     // include it in the imports file, however, so filter it out.
     if (ILI.first != ModulePath)
-      ImportsOS << ILI.first << "\n";
-  return Error::success();
+      F(ILI.first);
 }
 
 bool llvm::convertToDeclaration(GlobalValue &GV) {
diff --git a/llvm/test/ThinLTO/X86/dtlto-triple.ll b/llvm/test/ThinLTO/X86/dtlto-triple.ll
new file mode 100644
index 0000000000000..18936e9087c9c
--- /dev/null
+++ b/llvm/test/ThinLTO/X86/dtlto-triple.ll
@@ -0,0 +1,47 @@
+;; Test the DTLTO limitation that all triples must match.
+
+; RUN: rm -rf %t && split-file %s %t && cd %t
+
+;; Generate bitcode files with summary.
+; RUN: opt -thinlto-bc t1.ll -o t1.bc
+; RUN: opt -thinlto-bc t2.ll -o t2.bc
+
+;; Generate native object files.
+; RUN: opt t1.ll -o t1.o
+; RUN: opt t2.ll -o t2.o
+
+;; Perform DTLTO. mock.py does not do any compilation,
+;; instead it uses the native object files supplied
+;; using -thinlto-distributor-arg.
+; RUN: not llvm-lto2 run t1.bc t2.bc -o t.o -save-temps \
+; RUN:     -dtlto \
+; RUN:     -dtlto-remote-opt-tool=dummy \
+; RUN:     -dtlto-distributor=%python \
+; RUN:     -thinlto-distributor-arg=%llvm_src_root/utils/dtlto/mock.py \
+; RUN:     -thinlto-distributor-arg=t1.o \
+; RUN:     -thinlto-distributor-arg=t2.o \
+; RUN:     -r=t1.bc,t1,px \
+; RUN:     -r=t2.bc,t2,px 2>&1 | FileCheck %s
+
+; CHECK: failed: DTLTO backend compilation: all triples must be consistent
+
+
+
+;--- t1.ll
+
+target triple = "x86_64-unknown-linux-gnu"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @t1() {
+  ret void
+}
+
+;--- t2.ll
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-unknown-gnu"
+
+define void @t2() {
+  ret void
+}
+
diff --git a/llvm/test/ThinLTO/X86/dtlto.ll b/llvm/test/ThinLTO/X86/dtlto.ll
new file mode 100644
index 0000000000000..0da3421d4a61a
--- /dev/null
+++ b/llvm/test/ThinLTO/X86/dtlto.ll
@@ -0,0 +1,65 @@
+;; Test DTLTO output with llvm-lto2.
+
+; RUN: rm -rf %t && split-file %s %t && cd %t
+
+;; Generate bitcode files with summary.
+; RUN: opt -thinlto-bc t1.ll -o t1.bc
+; RUN: opt -thinlto-bc t2.ll -o t2.bc
+
+;; Generate native object files.
+; RUN: opt t1.ll -o t1.o
+; RUN: opt t2.ll -o t2.o
+
+;; Perform DTLTO. mock.py does not do any compilation,
+;; instead it uses the native object files supplied
+;; using -thinlto-distributor-arg.
+; RUN: llvm-lto2 run t1.bc t2.bc -o t.o -save-temps \
+; RUN:     -dtlto \
+; RUN:     -dtlto-remote-opt-tool=dummy \
+; RUN:     -dtlto-distributor=%python \
+; RUN:     -thinlto-distributor-arg=%llvm_src_root/utils/dtlto/mock.py \
+; RUN:     -thinlto-distributor-arg=t1.o \
+; RUN:     -thinlto-distributor-arg=t2.o \
+; RUN:     -thinlto-emit-indexes \
+; RUN:     -thinlto-emit-imports \
+; RUN:     -r=t1.bc,t1,px \
+; RUN:     -r=t2.bc,t2,px
+
+;; Check that the expected output files have been created.
+; RUN: ls * | FileCheck %s --check-prefix=OUTPUT
+
+; OUTPUT-DAG: t1.{{[0-9]+}}.{{[0-9]+}}.native.o{{$}}
+; OUTPUT-DAG: t1.bc.imports{{$}}
+; OUTPUT-DAG: t1.{{[0-9]+}}.{{[0-9]+}}.native.o.thinlto.bc{{$}}
+
+; OUTPUT-DAG: t2.{{[0-9]+}}.{{[0-9]+}}.native.o{{$}}
+; OUTPUT-DAG: t2.bc.imports{{$}}
+; OUTPUT-DAG: t2.{{[0-9]+}}.{{[0-9]+}}.native.o.thinlto.bc{{$}}
+
+
+;--- t1.ll
+
+target triple = "x86_64-unknown-linux-gnu"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @t1() {
+  ret void
+}
+
+;--- t2.ll
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @t2() {
+  ret void
+}
+
+;--- t3.ll
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-unknown-gnu"
+
+define void @t3() {
+  ret void
+}
diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index aad7a088551b2..6722064d2a7b6 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -91,6 +91,7 @@ def get_asan_rtlib():
 config.substitutions.append(("%shlibext", config.llvm_shlib_ext))
 config.substitutions.append(("%pluginext", config.llvm_plugin_ext))
 config.substitutions.append(("%exeext", config.llvm_exe_ext))
+config.substitutions.append(("%llvm_src_root", config.llvm_src_root))
 
 
 lli_args = []
diff --git a/llvm/tools/llvm-lto2/llvm-lto2.cpp b/llvm/tools/llvm-lto2/llvm-lto2.cpp
index d4f022ef021a4..c9e6e7ce13402 100644
--- a/llvm/tools/llvm-lto2/llvm-lto2.cpp
+++ b/llvm/tools/llvm-lto2/llvm-lto2.cpp
@@ -97,6 +97,16 @@ static cl::opt<bool>
                                 "specified with -thinlto-emit-indexes or "
                                 "-thinlto-distributed-indexes"));
 
+static cl::opt<bool> DTLTO("dtlto", cl::desc("Perform DTLTO"));
+
+static cl::opt<std::string>
+    DTLTORemoteOptTool("dtlto-remote-opt-tool",
+                       cl::desc("Specify the remote opt tool for DTLTO"));
+
+static cl::opt<std::string>
+    DTLTODistributor("dtlto-distributor",
+                     cl::desc("Specify the distributor for DTLTO"));
+
 // Default to using all available threads in the system, but using only one
 // thread per core (no SMT).
 // Use -thinlto-threads=all to use hardware_concurrency() instead, which means
@@ -344,6 +354,12 @@ static int run(int argc, char **argv) {
   Conf.PTO.LoopVectorization = Conf.OptLevel > 1;
   Conf.PTO.SLPVectorization = Conf.OptLevel > 1;
 
+  if (ThinLTODistributedIndexes && DTLTO)
+    llvm::errs() << "-thinlto-distributed-indexes cannot be specfied together "
+                    "with -dtlto\n";
+
+  std::string TargetTripleStr = "";
+
   ThinBackend Backend;
   if (ThinLTODistributedIndexes)
     Backend = createWriteIndexesThinBackend(llvm::hardware_concurrency(Threads),
@@ -353,7 +369,20 @@ static int run(int argc, char **argv) {
                                             ThinLTOEmitImports,
                                             /*LinkedObjectsFile=*/nullptr,
                                             /*OnWrite=*/{});
-  else
+  else if (DTLTO) {
+    if (!InputFilenames.empty()) {
+      std::string F = InputFilenames[0];
+      std::unique_ptr<MemoryBuffer> MB = check(MemoryBuffer::getFile(F), F);
+      std::unique_ptr<InputFile> Input =
+          check(InputFile::create(MB->getMemBufferRef()), F);
+      TargetTripleStr = llvm::Triple::normalize(Input->getTargetTriple());
+    }
+
+    Backend = createOutOfProcessThinBackend(
+        llvm::heavyweight_hardware_concurrency(Threads),
+        /*OnWrite=*/{}, ThinLTOEmitIndexes, ThinLTOEmitImports, OutputFilename,
+        "DummyVersion", DTLTORemoteOptTool, DTLTODistributor, SaveTemps);
+  } else
     Backend = createInProcessThinBackend(
         llvm::heavyweight_hardware_concurrency(Threads),
         /* OnWrite */ {}, ThinLTOEmitIndexes, ThinLTOEmitImports);
@@ -456,7 +485,7 @@ static int run(int argc, char **argv) {
     Cache = check(localCache("ThinLTO", "Thin", CacheDir, AddBuffer),
                   "failed to create cache");
 
-  check(Lto.run(AddStream, Cache), "LTO::run failed");
+  check(Lto.run(AddStream, AddBuffer, Cache), "LTO::run failed");
   return static_cast<int>(HasErrors);
 }
 
diff --git a/llvm/utils/dtlto/local.py b/llvm/utils/dtlto/local.py
new file mode 100644
index 0000000000000..7be109061310c
--- /dev/null
+++ b/llvm/utils/dtlto/local.py
@@ -0,0 +1,25 @@
+import subprocess
+import sys
+import json
+from pathlib import Path
+
+if __name__ == "__main__":
+    # Load the DTLTO information from the input JSON file.
+    data = json.loads(Path(sys.argv[-1]).read_bytes())
+
+    # Iterate over the jobs and execute the codegen tool.
+    for job in data["jobs"]:
+        jobargs = []
+        for arg in data["common"]["args"]:
+            if isinstance(arg, list):
+                # arg is a "template", into which an external filename is to be
+                # inserted. The first element of arg names an array of strings
+                # in the job. The remaining elements of arg are either indices
+                # into the array or literal strings.
+                files, rest = job[arg[0]], arg[1:]
+                jobargs.append(
+                    "".join(files[x] if isinstance(x, int) else x for x in rest)
+                )
+            else:
+                jobargs.append(arg)
+        subprocess.check_call(jobargs)
diff --git a/llvm/utils/dtlto/mock.py b/llvm/utils/dtlto/mock.py
new file mode 100644
index 0000000000000..76bc554702e64
--- /dev/null
+++ b/llvm/utils/dtlto/mock.py
@@ -0,0 +1,16 @@
+import sys
+import json
+import shutil
+from pathlib import Path
+
+if __name__ == "__main__":
+    json_arg = sys.argv[-1]
+    distributor_args = sys.argv[1:-1]
+
+    # Load the DTLTO information from the input JSON file.
+    data = json.loads(Path(json_arg).read_bytes())
+
+    # Iterate over the jobs and create the output
+    # files by copying over the supplied input files.
+    for job_index, job in enumerate(data["jobs"]):
+        shutil.copy(distributor_args[job_index], job["primary_output"][0])
diff --git a/llvm/utils/dtlto/validate.py b/llvm/utils/dtlto/validate.py
new file mode 100644
index 0000000000000..7cb62d4aa7ed8
--- /dev/null
+++ b/llvm/utils/dtlto/validate.py
@@ -0,0 +1,75 @@
+import sys
+import json
+from pathlib import Path
+
+
+def take(jvalue, jpath):
+    parts = jpath.split(".")
+    for part in parts[:-1]:
+        jvalue = jvalue[part]
+    return jvalue.pop(parts[-1], KeyError)
+
+
+if __name__ == "__main__":
+    json_arg = sys.argv[-1]
+    distributor_args = sys.argv[1:-1]
+
+    print(f"{distributor_args=}")
+
+    # Load the DTLTO information from the input JSON file.
+    jdoc = json.loads(Path(json_arg).read_bytes())
+
+    # Write the input JSON to stdout.
+    print(json.dumps(jdoc, indent=4))
+
+    # Check the format of the JSON
+    assert type(take(jdoc, "common.linker_output")) is str
+    assert type(take(jdoc, "common.linker_version")) is str
+
+    args = take(jdoc, "common.args")
+    assert type(args) is list
+    assert len(args) > 0
+
+    def validate_reference(a):
+        for j in jdoc["jobs"]:
+            for x in a[1:]:
+                if type(x) is int:
+                    if a[0] not in j or x >= len(j[a[0]]):
+                        return False
+        return True
+
+    for a in args:
+        assert type(a) is str or (
+            type(a) is list
+            and len(a) >= 2
+            and type(a[0]) is str
+            and all(type(x) in (str, int) for x in a[1:])
+            and any(type(x) is int for x in a[1:])
+            and validate_reference(a)
+        )
+
+    assert len(take(jdoc, "common")) == 0
+
+    jobs = take(jdoc, "jobs")
+    assert type(jobs) is list
+    for j in jobs:
+        assert type(j) is dict
+
+        # Mandatory job attributes.
+        for attr in ("primary_input", "primary_output", "summary_index"):
+            array = take(j, attr)
+            assert type(array) is list
+            assert len(array) == 1
+            assert type(array[0]) is str
+
+        # Optional job attributes.
+        for attr in ("additional_inputs", "additional_outputs", "imports"):
+            array = take(j, attr)
+            if array is KeyError:
+                continue
+            assert type(array) is list
+            assert all(type(a) is str for a in array)
+
+        assert len(j) == 0
+
+    assert len(jdoc) == 0
>From e4bd1347ac46453a3b2715e01d0a54bfe7f287b7 Mon Sep 17 00:00:00 2001
From: bd1976bris <bd1976llvm at gmail.com>
Date: Tue, 11 Feb 2025 23:52:04 +0000
Subject: [PATCH 2/3] Update clang/docs/ThinLTO.rst
Co-authored-by: Paul Kirth <paulkirth at google.com>
---
 clang/docs/ThinLTO.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/docs/ThinLTO.rst b/clang/docs/ThinLTO.rst
index c3924ea45c9cc..2686df8c7da9b 100644
--- a/clang/docs/ThinLTO.rst
+++ b/clang/docs/ThinLTO.rst
@@ -268,7 +268,7 @@ Examples:
 
 If ``-fthinlto-distributor=`` is specified Clang supplies the path to a
 distributable optimization and code generation tool to LLD. Currently this tool
-is Clang itself specified.
+is Clang itself.
 
 See `DTLTO <https://lld.llvm.org/dtlto.html>`_ for more information.
 
>From ea257975cf9f596b1407afeac16b7c6b6daea5a7 Mon Sep 17 00:00:00 2001
From: Ben Dunbobbin <Ben.Dunbobbin at sony.com>
Date: Thu, 13 Feb 2025 01:02:54 +0000
Subject: [PATCH 3/3] Test improvements and a bug fix.
- Addressed test review comments.
- Improved content in the cross-project-tests/dtlto README.
- Reorder the code so no operations on the jobs occur until all the
  index generation tasks have completed. This fixes a intermittent
  failure that could occur if an error occurred before all the index
  generation jobs were complete. As it was intermittent it wasn't
  caught by my testing and made it into the initial PR :( In a future
  revision we will want to introduce a more robust mechanism to clean
  up the DTLTO output. For now keep the implementation simple.
---
 clang/test/Driver/DTLTO/dtlto.c             |  7 +--
 cross-project-tests/dtlto/README.md         |  3 ++
 cross-project-tests/dtlto/README.txt        |  2 -
 cross-project-tests/dtlto/archive-thin.test | 12 ++---
 cross-project-tests/dtlto/dtlto.c           | 54 ++++++++++-----------
 lld/test/COFF/dtlto.test                    | 13 ++---
 lld/test/ELF/dtlto/dtlto.test               | 18 ++++---
 lld/test/ELF/dtlto/imports.test             |  2 +-
 llvm/docs/DTLTO.rst                         |  4 +-
 llvm/lib/LTO/LTO.cpp                        | 15 +++---
 10 files changed, 65 insertions(+), 65 deletions(-)
 create mode 100644 cross-project-tests/dtlto/README.md
 delete mode 100644 cross-project-tests/dtlto/README.txt
diff --git a/clang/test/Driver/DTLTO/dtlto.c b/clang/test/Driver/DTLTO/dtlto.c
index a1babb42793bd..f322026d03f09 100644
--- a/clang/test/Driver/DTLTO/dtlto.c
+++ b/clang/test/Driver/DTLTO/dtlto.c
@@ -2,7 +2,7 @@
 
 // REQUIRES: lld
 
-// RUN: echo "-target x86_64-linux-gnu \
+// RUN: echo "--target=x86_64-linux-gnu \
 // RUN:   -Xdist distarg1 \
 // RUN:   -Xdist distarg2 \
 // RUN:   -fuse-ld=lld" > %t.rsp
@@ -36,8 +36,9 @@
 
 /// Check the expected arguments are forwarded by default with only
 /// --thinlto-distributor=.
-// RUN: %clang -### -target x86_64-linux-gnu -fthinlto-distributor=dist.exe -fuse-ld=lld %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefixes=DEFAULT,NOMORE --implicit-check-not=warning
+// RUN: %clang --target=x86_64-linux-gnu -fthinlto-distributor=dist.exe \
+// RUN:   -fuse-ld=lld -Werror -### %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefixes=DEFAULT,NOMORE
 
 // DEFAULT: ld.lld
 // DEFAULT-SAME: "--thinlto-distributor=dist.exe"
diff --git a/cross-project-tests/dtlto/README.md b/cross-project-tests/dtlto/README.md
new file mode 100644
index 0000000000000..cfd9d3496ca42
--- /dev/null
+++ b/cross-project-tests/dtlto/README.md
@@ -0,0 +1,3 @@
+Tests for DTLTO (integrated distributed ThinLTO) functionality.
+
+These are integration tests as DTLTO invokes `clang` for code-generation.
\ No newline at end of file
diff --git a/cross-project-tests/dtlto/README.txt b/cross-project-tests/dtlto/README.txt
deleted file mode 100644
index bc92ffa96807a..0000000000000
--- a/cross-project-tests/dtlto/README.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-                                                                   -*- rst -*-
-This is a collection of tests to check distributed thinLTO (DTLTO) functionality
diff --git a/cross-project-tests/dtlto/archive-thin.test b/cross-project-tests/dtlto/archive-thin.test
index 1f1fc60e28724..2632e4d7593fe 100644
--- a/cross-project-tests/dtlto/archive-thin.test
+++ b/cross-project-tests/dtlto/archive-thin.test
@@ -2,11 +2,11 @@
 ## correctly when thin archives are present.
 
 # RUN: rm -rf %t.dir && split-file %s %t.dir && cd %t.dir
-# RUN: %clang -target x86_64-linux-gnu -c foo.c -o foo.o
-# RUN: %clang -target x86_64-linux-gnu -c -flto=thin bar.c -o bar.o
-# RUN: %clang -target x86_64-linux-gnu -c -flto=thin dog.c -o dog.o
-# RUN: %clang -target x86_64-linux-gnu -c -flto=thin cat.c -o cat.o
-# RUN: %clang -target x86_64-linux-gnu -c -flto=thin _start.c -o _start.o
+# RUN: %clang --target=x86_64-linux-gnu -c foo.c -o foo.o
+# RUN: %clang --target=x86_64-linux-gnu -c -flto=thin bar.c -o bar.o
+# RUN: %clang --target=x86_64-linux-gnu -c -flto=thin dog.c -o dog.o
+# RUN: %clang --target=x86_64-linux-gnu -c -flto=thin cat.c -o cat.o
+# RUN: %clang --target=x86_64-linux-gnu -c -flto=thin _start.c -o _start.o
 
 # RUN: llvm-ar rcs foo.a foo.o --thin
 ## Create this bitcode thin archive in a sub-directory to test the expansion of
@@ -22,7 +22,7 @@
 
 # RUN: mkdir %t.dir/out && cd %t.dir/out
 
-# RUN: %clang -target x86_64-linux-gnu \
+# RUN: %clang --target=x86_64-linux-gnu \
 # RUN:   %t.dir/foo.a %t.dir/lib/bar.a ../_start.a %t.dir/cat.a -Wl,--whole-archive,../dog.a \
 # RUN:   -flto=thin \
 # RUN:   -fthinlto-distributor=%python \
diff --git a/cross-project-tests/dtlto/dtlto.c b/cross-project-tests/dtlto/dtlto.c
index 95c784df4201f..7738f8a93be8c 100644
--- a/cross-project-tests/dtlto/dtlto.c
+++ b/cross-project-tests/dtlto/dtlto.c
@@ -6,44 +6,40 @@
 
 // RUN: rm -rf %t && mkdir %t && cd %t
 
-// RUN: %clang -target x86_64-linux-gnu %s -shared -flto=thin \
+// RUN: %clang --target=x86_64-linux-gnu %s -shared -flto=thin \
 // RUN:   -fthinlto-distributor=%python \
 // RUN:   -Xdist %llvm_src_root/utils/dtlto/local.py \
 // RUN:   --save-temps \
 // RUN:   -fuse-ld=lld \
 // RUN:   -nostdlib \
-// RUN:   -nostartfiles \
 // RUN:   -Wl,--save-temps \
 // RUN:   -Werror
 
 /// Check that the required output files have been created.
 // RUN: ls | count 13
-// RUN: ls | FileCheck %s --check-prefix=BITCODE
-// RUN: ls | FileCheck %s --check-prefix=BACKEND
-// RUN: ls | FileCheck %s --check-prefix=NATIVE
-// RUN: ls | FileCheck %s --check-prefix=LLD
-
-/// Files produced by the bitcode compilation.
-// BITCODE: dtlto.bc
-// BITCODE: dtlto.i
-// BITCODE: dtlto.o
-
-/// The DTLTO backend emits the jobs description JSON and a summary shard.
-// BACKEND: a.{{[0-9]+}}.dist-file.json
-// BACKEND: dtlto.{{[0-9]+}}.{{[0-9]+}}.native.o.thinlto.bc{{$}}
-
-/// Native object output file for dtlto.o.
-// NATIVE: dtlto.{{[0-9]+}}.{{[0-9]+}}.native.o{{$}}
-/// linked ELF.
-// LLD: a.out{{$}}
-
-/// save-temps incremental files for a.out.
-/// TODO: Perhaps we should suppress some of the linker hooks for DTLTO.
-// LLD: a.out.0.0.preopt.bc{{$}}
-// LLD: a.out.0.2.internalize.bc{{$}}
-// LLD: a.out.index.bc{{$}}
-// LLD: a.out.index.dot{{$}}
-// LLD: a.out.lto.dtlto.o{{$}}
-// LLD: a.out.resolution.txt{{$}}
+// RUN: ls | FileCheck %s
+
+/// Produced by the bitcode compilation.
+// CHECK-DAG: {{^}}dtlto.bc{{$}}
+// CHECK-DAG: {{^}}dtlto.i{{$}}
+// CHECK-DAG: {{^}}dtlto.o{{$}}
+
+/// A jobs description JSON and a summary shard is emitted for DTLTO.
+// CHECK-DAG: {{^}}a.[[#]].dist-file.json{{$}}
+// CHECK-DAG: {{^}}dtlto.[[#]].[[#]].native.o.thinlto.bc{{$}}
+
+/// The backend compilation produces a native object output file for dtlto.o.
+// CHECK-DAG: dtlto.[[#]].[[#]].native.o{{$}}
+
+/// Linked ELF.
+// CHECK-DAG: {{^}}a.out{{$}}
+
+/// --save-temps incremental files for a.out.
+// CHECK-DAG: {{^}}a.out.lto.dtlto.o{{$}}
+// CHECK-DAG: {{^}}a.out.0.0.preopt.bc{{$}}
+// CHECK-DAG: {{^}}a.out.0.2.internalize.bc{{$}}
+// CHECK-DAG: {{^}}a.out.index.bc{{$}}
+// CHECK-DAG: {{^}}a.out.index.dot{{$}}
+// CHECK-DAG: {{^}}a.out.resolution.txt{{$}}
 
 int _start() { return 0; }
diff --git a/lld/test/COFF/dtlto.test b/lld/test/COFF/dtlto.test
index fcaa1eab13c15..83fde026bc332 100644
--- a/lld/test/COFF/dtlto.test
+++ b/lld/test/COFF/dtlto.test
@@ -15,12 +15,6 @@
 # RUN:   -mllvm:-thinlto-distributor-arg=%llvm_src_root/utils/dtlto/validate.py \
 # RUN:   --thinlto-remote-opt-tool=my_clang.exe" > l.rsp
 
-## Command line arguments that should affect codegen.
-# RUN: echo "/lto-pgo-warn-mismatch:no \
-# RUN:       /lto-sample-profile:foo.ll \
-# RUN:       -mllvm:-thinlto-distributor-arg=bibbity=10 \
-# RUN:       -mllvm:-thinlto-remote-opt-tool-arg=bobbity=20" > o.rsp
-
 ## Show that command line arguments have the desired effect when specified and
 ## that the effect is not present otherwise.
 # RUN: not lld-link @l.rsp @o.rsp 2>&1 | FileCheck %s --check-prefixes=ERR,OPT,BOTH
@@ -40,6 +34,13 @@
 
 # ERR: lld-link: error: DTLTO backend compilation: cannot open native object file:
 
+## Command line arguments that should affect codegen.
+#--- o.rsp
+/lto-pgo-warn-mismatch:no
+/lto-sample-profile:foo.ll
+-mllvm:-thinlto-distributor-arg=bibbity=10
+-mllvm:-thinlto-remote-opt-tool-arg=bobbity=20
+
 #--- foo.ll
 target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-pc-windows-msvc"
diff --git a/lld/test/ELF/dtlto/dtlto.test b/lld/test/ELF/dtlto/dtlto.test
index 7be9988d8ea81..0724617000b1e 100644
--- a/lld/test/ELF/dtlto/dtlto.test
+++ b/lld/test/ELF/dtlto/dtlto.test
@@ -15,14 +15,6 @@
 # RUN:       -mllvm -thinlto-distributor-arg=%llvm_src_root/utils/dtlto/validate.py \
 # RUN:       --thinlto-remote-opt-tool=my_clang.exe" > l.rsp
 
-## Command line arguments that should affect codegen.
-# RUN: echo "--lto-O3 \
-# RUN:       --lto-CGO2 \
-# RUN:       --no-lto-pgo-warn-mismatch \
-# RUN:       --lto-sample-profile=foo.ll \
-# RUN:       -mllvm -thinlto-distributor-arg=bibbity=10 \
-# RUN:       -mllvm -thinlto-remote-opt-tool-arg=bobbity=20" > o.rsp
-
 ## Show that command line arguments have the desired effect when specified and
 ## that the effect is not present otherwise.
 # RUN: not ld.lld @l.rsp @o.rsp 2>&1 | FileCheck %s --check-prefixes=ERR,OPT,BOTH
@@ -43,6 +35,16 @@
 
 # ERR: ld.lld: error: DTLTO backend compilation: cannot open native object file:
 
+
+## Command line arguments that should affect codegen.
+#--- o.rsp
+--lto-O3
+--lto-CGO2
+--no-lto-pgo-warn-mismatch
+--lto-sample-profile=foo.ll
+-mllvm -thinlto-distributor-arg=bibbity=10
+-mllvm -thinlto-remote-opt-tool-arg=bobbity=20
+
 #--- foo.ll
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/lld/test/ELF/dtlto/imports.test b/lld/test/ELF/dtlto/imports.test
index 2e096d7b2d93c..9105d118015b4 100644
--- a/lld/test/ELF/dtlto/imports.test
+++ b/lld/test/ELF/dtlto/imports.test
@@ -2,7 +2,7 @@
 
 ## Check that DTLTO handles imports files correctly.
 
-# RUN: rm -rf %t.dir && split-file %s %t.dir && cd %t.dir
+# RUN: rm -rf %t && split-file %s %t && cd %t
 
 ## Compile bitcode.
 # RUN: opt -module-summary 0.ll -o 0.o -O2
diff --git a/llvm/docs/DTLTO.rst b/llvm/docs/DTLTO.rst
index 92bfdcef3ac94..3fed25a922f5a 100644
--- a/llvm/docs/DTLTO.rst
+++ b/llvm/docs/DTLTO.rst
@@ -147,7 +147,7 @@ Below is an example of a JSON job file for backend compilation of the module
                 "-o", ["primary_output", 0],
                 "-c", "-x", "ir", ["primary_input", 0],
                 ["summary_index", "-fthinlto-index=", 0],
-                "-target", "x86_64-sie-ps5"
+                "--target=x86_64-sie-ps5"
             ]
         },
         "jobs": [
@@ -194,7 +194,7 @@ The example above generates the following backend compilation command for
 
     /usr/local/clang -O3 -fprofile-sample-use=my.profdata \
         -o dtlto.1.51232.native.o -c -x ir dtlto.o \
-        -fthinlto-index=dtlto.1.51232.native.o.thinlto.bc -target x86_64-sie-ps5
+        -fthinlto-index=dtlto.1.51232.native.o.thinlto.bc --target=x86_64-sie-ps5
 
 This expansion scheme allows the remote optimization tool to be changed without
 updating the distributors. For example, if the "args" field in the above example
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index 8cfefad45c4ee..fca35bcd17f10 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -2388,8 +2388,7 @@ class OutOfProcessThinBackend : public CGThinBackend {
 
           // Reference to Job::SummaryIndexPath.
           JOS.value(Array{"summary_index", "-fthinlto-index=", 0});
-          JOS.value("-target");
-          JOS.value(Jobs.front().Triple);
+          JOS.value(Saver.save("--target=" + Twine(Jobs.front().Triple)));
         });
       });
       JOS.attributeArray("jobs", [&]() {
@@ -2426,6 +2425,12 @@ class OutOfProcessThinBackend : public CGThinBackend {
   }
 
   Error wait() override {
+    // Wait for the information on the required backend compilations to be
+    // gathered.
+    BackendThreadPool.wait();
+    if (Err)
+      return std::move(*Err);
+
     auto CleanPerJobFiles = llvm::make_scope_exit([&] {
       if (!SaveTemps)
         for (auto &Job : Jobs) {
@@ -2448,12 +2453,6 @@ class OutOfProcessThinBackend : public CGThinBackend {
 
     buildCommonRemoteOptToolOptions();
 
-    // Wait for the information on the required backend compilations to be
-    // gathered.
-    BackendThreadPool.wait();
-    if (Err)
-      return std::move(*Err);
-
     SString JsonFile = sys::path::parent_path(LinkerOutputFile);
     sys::path::append(JsonFile, sys::path::stem(LinkerOutputFile) + "." + UID +
                                     ".dist-file.json");
    
    
More information about the llvm-commits
mailing list