[llvm] [DTLTO] [LLVM] Initial DTLTO cache implementation (PR #156433)

Fri Nov 7 02:59:49 PST 2025

https://github.com/romanova-ekaterina updated https://github.com/llvm/llvm-project/pull/156433

>From 1ecbb72954b1243b52613ca9e30cd1ebac201c96 Mon Sep 17 00:00:00 2001
From: kromanova <katya.romanova at sony.com>
Date: Sun, 24 Aug 2025 22:37:58 -0700
Subject: [PATCH 1/4] [DTLTO] [LLVM] Initial DTLTO cache implementation

 This patch implements DTLTO cache. DTLTO cache is implemented the same
 way as ThinLTO cache. In fact the same class Cache is used for both of them.

 Because parameters for codegen are different for DTLTO and ThinLTO
 (DTLTO codegen is done by invoking clang and its codegen parameters are
 not fully synchronized with codegen parameters used by LTO backend).
 The object files generated by DTLTO and ThinLTO might be different and
 shouldn't be mixed. If ThinLTO and DTLTO share the same cache
 directory, the cache file won't interfere with each other.

 I added a couple of test files in cross-project-test/dtlto directory,
 but if more tests are required for initial implementation, I could add
 them.
---
 cross-project-tests/dtlto/dtlto-cache.test    |  89 ++++++++++++
 .../dtlto/dtlto-thinlto-cache.test            |  66 +++++++++
 llvm/include/llvm/LTO/Config.h                |   1 +
 llvm/lib/LTO/LTO.cpp                          | 131 +++++++++++++-----
 4 files changed, 254 insertions(+), 33 deletions(-)
 create mode 100644 cross-project-tests/dtlto/dtlto-cache.test
 create mode 100644 cross-project-tests/dtlto/dtlto-thinlto-cache.test

diff --git a/cross-project-tests/dtlto/dtlto-cache.test b/cross-project-tests/dtlto/dtlto-cache.test
new file mode 100644
index 0000000000000..b98d4dbb433bb
--- /dev/null
+++ b/cross-project-tests/dtlto/dtlto-cache.test
@@ -0,0 +1,89 @@
+REQUIRES: x86-registered-target, ld.lld
+
+# Show that the ThinLTO cache works with DTLTO.
+
+RUN: rm -rf %t && split-file %s %t && cd %t
+
+# Compile source files into bitcode files.
+RUN: %clang -O2 --target=x86_64-linux-gnu -flto=thin -c foo.c main.c
+
+# Execute the linker and check that the cache is populated.
+RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \
+RUN:   main.o foo.o -o populate1.elf \
+RUN:   -Wl,--thinlto-distributor=%python \
+RUN:   -Wl,--thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \
+RUN:   -Wl,--thinlto-remote-compiler=%clang \
+RUN:   -Wl,--thinlto-cache-dir=cache.dir \
+RUN:   -Wl,--save-temps
+
+# Check that there are two backend compilation jobs occurred.
+RUN: grep -wo args populate1.*.dist-file.json | wc -l | grep -qx 3
+RUN: ls cache.dir/llvmcache.timestamp
+RUN: ls cache.dir | count 3
+
+# Execute the linker again and check that a fully populated cache is used correctly, 
+# i.e., no additional cache entries are created for cache hits.
+RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \
+RUN:   main.o foo.o -o populate2.elf \
+RUN:   -Wl,--thinlto-distributor=%python \
+RUN:   -Wl,--thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \
+RUN:   -Wl,--thinlto-remote-compiler=%clang \
+RUN:   -Wl,--thinlto-cache-dir=cache.dir \
+RUN:   -Wl,--save-temps
+
+# Check that there are no backend compilation jobs occurred.
+RUN: grep -wo args populate2.*.dist-file.json | wc -l | grep -qx 1
+RUN: ls cache.dir | count 3
+
+RUN: %clang -O0 --target=x86_64-linux-gnu -flto=thin -c foo.c -o foo.O0.o
+RUN: %clang -O0 --target=x86_64-linux-gnu -flto=thin -c main.c -o main.O0.o
+
+# Execute the linker again and check that the cache is populated correctly when there 
+# are no cache hits but there are existing cache entries.
+# As a side effect, this also verifies that the optimization level is considered when 
+# evaluating the cache entry key.
+
+RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \
+RUN:   main.O0.o foo.O0.o -o populate3.elf \
+RUN:   -Wl,--thinlto-distributor=%python \
+RUN:   -Wl,--thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \
+RUN:   -Wl,--thinlto-remote-compiler=%clang \
+RUN:   -Wl,--thinlto-cache-dir=cache.dir \
+RUN:   -Wl,--save-temps
+
+# Check that there are two new backend compilation jobs occurred.
+RUN: grep -wo args populate3.*.dist-file.json | wc -l | grep -qx 3
+RUN: ls cache.dir | count 5
+
+RUN: %clang -O2 --target=x86_64-linux-gnu -flto=thin -c main-partial.c 
+
+# Execute the linker and check that everything works correctly with the partially populated cache;
+# One more cache entry should be generated after this run.
+
+RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \
+RUN:   main-partial.o foo.o -o main-partial.elf \
+RUN:   -Wl,--thinlto-distributor=%python \
+RUN:   -Wl,--thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \
+RUN:   -Wl,--thinlto-remote-compiler=%clang \
+RUN:   -Wl,--thinlto-cache-dir=cache.dir \
+RUN:   -Wl,--save-temps
+
+# Check that there is one new backend compilation jobs occurred.
+RUN: grep -wo args main-partial.*.dist-file.json | wc -l | grep -qx 2
+RUN: ls cache.dir | count 6
+
+#--- foo.c
+volatile int foo_int;
+__attribute__((retain)) int foo(int x) { return x + foo_int; }
+
+#--- main.c
+extern int foo(int x);
+__attribute__((retain)) int main(int argc, char** argv) {
+  return foo(argc);
+}
+
+#--- main-partial.c
+extern int foo(int x);
+__attribute__((retain)) int main(int argc, char** argv) {
+  return foo(argc+1);
+}
diff --git a/cross-project-tests/dtlto/dtlto-thinlto-cache.test b/cross-project-tests/dtlto/dtlto-thinlto-cache.test
new file mode 100644
index 0000000000000..d71e4aa5f131d
--- /dev/null
+++ b/cross-project-tests/dtlto/dtlto-thinlto-cache.test
@@ -0,0 +1,66 @@
+REQUIRES: x86-registered-target, ld.lld
+
+# This test verifies that a cache populated by a ThinLTO link is not reused by a DTLTO link and vice versa.
+
+RUN: rm -rf %t && split-file %s %t && cd %t
+
+# Compile source files into bitcode files.
+RUN: %clang -O2 --target=x86_64-linux-gnu -flto=thin -c foo.c main.c
+
+# Execute the linker and check that ThinLTO cache is populated.
+RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \
+RUN:   main.o foo.o -o main.elf \
+RUN:   -Wl,--thinlto-cache-dir=cache.dir \
+RUN:   -Wl,--save-temps
+
+RUN: ls cache.dir/llvmcache.timestamp
+RUN: ls cache.dir | count 3
+
+# Execute the linker and check that DTLTO adds additional entries to the ThinLTO cache, implying they do not share entries.
+RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \
+RUN:   main.o foo.o -o populate1.elf \
+RUN:   -Wl,--thinlto-distributor=%python \
+RUN:   -Wl,--thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \
+RUN:   -Wl,--thinlto-remote-compiler=%clang \
+RUN:   -Wl,--thinlto-cache-dir=cache.dir \
+RUN:   -Wl,--save-temps
+
+# Check that there are two backend compilation jobs occurred.
+RUN: grep -wo args populate1.*.dist-file.json | wc -l | grep -qx 3
+RUN: ls cache.dir | count 5
+
+# Clean up cache directory.
+RUN: rm -rf cache.dir
+
+# Execute the linker and check that DTLTO cache is populated. 
+RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \
+RUN:   main.o foo.o -o populate2.elf \
+RUN:   -Wl,--thinlto-distributor=%python \
+RUN:   -Wl,--thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \
+RUN:   -Wl,--thinlto-remote-compiler=%clang \
+RUN:   -Wl,--thinlto-cache-dir=cache.dir \
+RUN:   -Wl,--save-temps
+
+# Check that there are two backend compilation jobs occurred.
+RUN: grep -wo args populate2.*.dist-file.json | wc -l | grep -qx 3
+RUN: ls cache.dir/llvmcache.timestamp
+RUN: ls cache.dir | count 3
+
+# Execute the linker and check that DTLTO adds additional entries to the ThinLTO cache, 
+# implying they do not share entries.
+RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \
+RUN:   main.o foo.o -o main.elf \
+RUN:   -Wl,--thinlto-cache-dir=cache.dir \
+RUN:   -Wl,--save-temps
+
+RUN: ls cache.dir | count 5
+
+#--- foo.c
+volatile int foo_int;
+__attribute__((retain)) int foo(int x) { return x + foo_int; }
+
+#--- main.c
+extern int foo(int x);
+__attribute__((retain)) int main(int argc, char** argv) {
+  return foo(argc);
+}
diff --git a/llvm/include/llvm/LTO/Config.h b/llvm/include/llvm/LTO/Config.h
index 50e143c518213..f5cd2e79e137c 100644
--- a/llvm/include/llvm/LTO/Config.h
+++ b/llvm/include/llvm/LTO/Config.h
@@ -281,6 +281,7 @@ struct Config {
   LLVM_ABI Error addSaveTemps(std::string OutputFileName,
                               bool UseInputModulePath = false,
                               const DenseSet<StringRef> &SaveTempsArgs = {});
+  mutable uint8_t Dtlto = 0;
 };
 
 struct LTOLLVMDiagnosticHandler : public DiagnosticHandler {
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index 35d24c17bbd93..c086cffe9f9c9 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -168,6 +168,7 @@ std::string llvm::computeLTOCacheKey(
   AddString(Conf.OverrideTriple);
   AddString(Conf.DefaultTriple);
   AddString(Conf.DwoDir);
+  AddUint8(Conf.Dtlto);
 
   // Include the hash for the current module
   auto ModHash = Index.getModuleHash(ModuleID);
@@ -2244,7 +2245,7 @@ class OutOfProcessThinBackend : public CGThinBackend {
 
   SmallVector<StringRef, 0> CodegenOptions;
   DenseSet<StringRef> CommonInputs;
-
+  std::atomic<uint64_t> CachedJobs{0};
   // Information specific to individual backend compilation job.
   struct Job {
     unsigned Task;
@@ -2252,6 +2253,9 @@ class OutOfProcessThinBackend : public CGThinBackend {
     StringRef NativeObjectPath;
     StringRef SummaryIndexPath;
     ImportsFilesContainer ImportsFiles;
+    std::string CacheKey;
+    AddStreamFn CacheAddStream;
+    bool Cached = false;
   };
   // The set of backend compilations jobs.
   SmallVector<Job> Jobs;
@@ -2265,12 +2269,15 @@ class OutOfProcessThinBackend : public CGThinBackend {
   // The target triple to supply for backend compilations.
   llvm::Triple Triple;
 
+  // Cache
+  FileCache Cache;
+
 public:
   OutOfProcessThinBackend(
       const Config &Conf, ModuleSummaryIndex &CombinedIndex,
       ThreadPoolStrategy ThinLTOParallelism,
       const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
-      AddStreamFn AddStream, lto::IndexWriteCallback OnWrite,
+      AddStreamFn AddStream, FileCache CacheFn, lto::IndexWriteCallback OnWrite,
       bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles,
       StringRef LinkerOutputFile, StringRef Distributor,
       ArrayRef<StringRef> DistributorArgs, StringRef RemoteCompiler,
@@ -2280,7 +2287,8 @@ class OutOfProcessThinBackend : public CGThinBackend {
                       ShouldEmitImportsFiles, ThinLTOParallelism),
         LinkerOutputFile(LinkerOutputFile), DistributorPath(Distributor),
         DistributorArgs(DistributorArgs), RemoteCompiler(RemoteCompiler),
-        RemoteCompilerArgs(RemoteCompilerArgs), SaveTemps(SaveTemps) {}
+        RemoteCompilerArgs(RemoteCompilerArgs), SaveTemps(SaveTemps),
+        Cache(std::move(CacheFn)) {}
 
   virtual void setup(unsigned ThinLTONumTasks, unsigned ThinLTOTaskOffset,
                      llvm::Triple Triple) override {
@@ -2288,6 +2296,7 @@ class OutOfProcessThinBackend : public CGThinBackend {
     Jobs.resize((size_t)ThinLTONumTasks);
     this->ThinLTOTaskOffset = ThinLTOTaskOffset;
     this->Triple = Triple;
+    this->Conf.Dtlto = 1;
   }
 
   Error start(
@@ -2304,13 +2313,14 @@ class OutOfProcessThinBackend : public CGThinBackend {
                                        itostr(Task) + "." + UID + ".native.o");
 
     Job &J = Jobs[Task - ThinLTOTaskOffset];
-    J = {
-        Task,
-        ModulePath,
-        Saver.save(ObjFilePath.str()),
-        Saver.save(ObjFilePath.str() + ".thinlto.bc"),
-        {} // Filled in by emitFiles below.
-    };
+    J = {Task,
+         ModulePath,
+         Saver.save(ObjFilePath.str()),
+         Saver.save(ObjFilePath.str() + ".thinlto.bc"),
+         {}, // Filled in by emitFiles below.
+         "",
+         nullptr,
+         false};
 
     assert(ModuleToDefinedGVSummaries.count(ModulePath));
 
@@ -2326,6 +2336,35 @@ class OutOfProcessThinBackend : public CGThinBackend {
             else
               Err = std::move(E);
           }
+
+          if (Cache.isValid() &&
+              CombinedIndex.modulePaths().count(J.ModuleID) &&
+              all_of(CombinedIndex.getModuleHash(J.ModuleID),
+                     [](uint32_t V) { return V != 0; })) {
+
+            const GVSummaryMapTy &DefinedGlobals =
+                ModuleToDefinedGVSummaries.find(ModulePath)->second;
+
+            // Compute and store a bitcode module cache key.
+            J.CacheKey = computeLTOCacheKey(
+                Conf, CombinedIndex, ModulePath, ImportList, ExportList,
+                ResolvedODR, DefinedGlobals, CfiFunctionDefs, CfiFunctionDecls);
+
+            // Check if we have something in the cache.
+            auto CacheAddStreamExp = Cache(J.Task, J.CacheKey, J.ModuleID);
+            if (Error E = CacheAddStreamExp.takeError()) {
+              Err = joinErrors(std::move(*Err), std::move(E));
+            } else {
+              AddStreamFn &CacheAddStream = *CacheAddStreamExp;
+              if (!CacheAddStream) {
+                J.Cached = true; // Cache hit, mark the job as cached.
+                CachedJobs.fetch_add(1);
+              } else {
+                // Cache miss, save cache 'add stream' function for a later use.
+                J.CacheAddStream = std::move(CacheAddStream);
+              }
+            }
+          }
         },
         std::ref(J), std::ref(ImportList));
 
@@ -2417,6 +2456,9 @@ class OutOfProcessThinBackend : public CGThinBackend {
         for (const auto &J : Jobs) {
           assert(J.Task != 0);
 
+          if (!Cache.getCacheDirectoryPath().empty() && J.Cached)
+            continue;
+
           SmallVector<StringRef, 2> Inputs;
           SmallVector<StringRef, 1> Outputs;
 
@@ -2488,20 +2530,26 @@ class OutOfProcessThinBackend : public CGThinBackend {
         removeFile(JsonFile);
     });
 
-    SmallVector<StringRef, 3> Args = {DistributorPath};
-    llvm::append_range(Args, DistributorArgs);
-    Args.push_back(JsonFile);
-    std::string ErrMsg;
-    if (sys::ExecuteAndWait(Args[0], Args,
-                            /*Env=*/std::nullopt, /*Redirects=*/{},
-                            /*SecondsToWait=*/0, /*MemoryLimit=*/0, &ErrMsg)) {
-      return make_error<StringError>(
-          BCError + "distributor execution failed" +
-              (!ErrMsg.empty() ? ": " + ErrMsg + Twine(".") : Twine(".")),
-          inconvertibleErrorCode());
+    if (CachedJobs.load() < Jobs.size()) {
+      SmallVector<StringRef, 3> Args = {DistributorPath};
+      llvm::append_range(Args, DistributorArgs);
+      Args.push_back(JsonFile);
+      std::string ErrMsg;
+      if (sys::ExecuteAndWait(Args[0], Args,
+                              /*Env=*/std::nullopt, /*Redirects=*/{},
+                              /*SecondsToWait=*/0, /*MemoryLimit=*/0,
+                              &ErrMsg)) {
+        return make_error<StringError>(
+            BCError + "distributor execution failed" +
+                (!ErrMsg.empty() ? ": " + ErrMsg + Twine(".") : Twine(".")),
+            inconvertibleErrorCode());
+      }
     }
 
     for (auto &Job : Jobs) {
+      if (Cache.isValid() && !Job.CacheKey.empty())
+        if (Job.Cached)
+          continue;
       // Load the native object from a file into a memory buffer
       // and store its contents in the output buffer.
       auto ObjFileMbOrErr =
@@ -2512,15 +2560,32 @@ class OutOfProcessThinBackend : public CGThinBackend {
             BCError + "cannot open native object file: " +
                 Job.NativeObjectPath + ": " + EC.message(),
             inconvertibleErrorCode());
-      auto StreamOrErr = AddStream(Job.Task, Job.ModuleID);
-      if (Error Err = StreamOrErr.takeError())
-        report_fatal_error(std::move(Err));
-      auto &Stream = *StreamOrErr->get();
-      *Stream.OS << ObjFileMbOrErr->get()->getMemBufferRef().getBuffer();
-      if (Error Err = Stream.commit())
-        report_fatal_error(std::move(Err));
-    }
 
+      MemoryBufferRef ObjFileMbRef = ObjFileMbOrErr->get()->getMemBufferRef();
+      if (Cache.isValid() && Job.CacheAddStream) {
+        // Obtain a file stream for a storing a cache entry.
+        auto CachedFileStreamOrErr = Job.CacheAddStream(Job.Task, Job.ModuleID);
+        if (!CachedFileStreamOrErr)
+          return joinErrors(
+              CachedFileStreamOrErr.takeError(),
+              createStringError(inconvertibleErrorCode(),
+                                "Cannot get a cache file stream: %s",
+                                Job.NativeObjectPath.data()));
+        // Store a file buffer into the cache stream.
+        auto &CacheStream = *(CachedFileStreamOrErr->get());
+        *(CacheStream.OS) << ObjFileMbRef.getBuffer();
+        if (Error Err = CacheStream.commit())
+          return Err;
+      } else {
+        auto StreamOrErr = AddStream(Job.Task, Job.ModuleID);
+        if (Error Err = StreamOrErr.takeError())
+          report_fatal_error(std::move(Err));
+        auto &Stream = *StreamOrErr->get();
+        *Stream.OS << ObjFileMbRef.getBuffer();
+        if (Error Err = Stream.commit())
+          report_fatal_error(std::move(Err));
+      }
+    }
     return Error::success();
   }
 };
@@ -2535,12 +2600,12 @@ ThinBackend lto::createOutOfProcessThinBackend(
   auto Func =
       [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex,
           const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
-          AddStreamFn AddStream, FileCache /*Cache*/) {
+          AddStreamFn AddStream, FileCache Cache) {
         return std::make_unique<OutOfProcessThinBackend>(
             Conf, CombinedIndex, Parallelism, ModuleToDefinedGVSummaries,
-            AddStream, OnWrite, ShouldEmitIndexFiles, ShouldEmitImportsFiles,
-            LinkerOutputFile, Distributor, DistributorArgs, RemoteCompiler,
-            RemoteCompilerArgs, SaveTemps);
+            AddStream, Cache, OnWrite, ShouldEmitIndexFiles,
+            ShouldEmitImportsFiles, LinkerOutputFile, Distributor,
+            DistributorArgs, RemoteCompiler, RemoteCompilerArgs, SaveTemps);
       };
   return ThinBackend(Func, Parallelism);
 }

>From 4c313814b85dbd63b08df78ba23bd0ef0caa3330 Mon Sep 17 00:00:00 2001
From: kromanova <katya.romanova at sony.com>
Date: Thu, 16 Oct 2025 02:23:00 -0700
Subject: [PATCH 2/4] Updated first round of code review comments

---
 .../dtlto/dtlto-thinlto-cache.test              | 16 ++++++++++------
 llvm/include/llvm/LTO/Config.h                  |  5 ++++-
 llvm/lib/LTO/LTO.cpp                            | 17 +++++++++++------
 3 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/cross-project-tests/dtlto/dtlto-thinlto-cache.test b/cross-project-tests/dtlto/dtlto-thinlto-cache.test
index d71e4aa5f131d..c177112e2dbbd 100644
--- a/cross-project-tests/dtlto/dtlto-thinlto-cache.test
+++ b/cross-project-tests/dtlto/dtlto-thinlto-cache.test
@@ -1,13 +1,14 @@
 REQUIRES: x86-registered-target, ld.lld
 
-# This test verifies that a cache populated by a ThinLTO link is not reused by a DTLTO link and vice versa.
+# This test verifies that a cache populated by an in-process ThinLTO codegen is
+# not reused by an out-of-process (DTLTO) codegen and vice versa.
 
 RUN: rm -rf %t && split-file %s %t && cd %t
 
 # Compile source files into bitcode files.
 RUN: %clang -O2 --target=x86_64-linux-gnu -flto=thin -c foo.c main.c
 
-# Execute the linker and check that ThinLTO cache is populated.
+# Execute the linker and check that in-process ThinLTO cache is populated.
 RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \
 RUN:   main.o foo.o -o main.elf \
 RUN:   -Wl,--thinlto-cache-dir=cache.dir \
@@ -16,7 +17,9 @@ RUN:   -Wl,--save-temps
 RUN: ls cache.dir/llvmcache.timestamp
 RUN: ls cache.dir | count 3
 
-# Execute the linker and check that DTLTO adds additional entries to the ThinLTO cache, implying they do not share entries.
+# Execute the linker and check that out-of-process codegen (DTLTO) adds
+# additional entries to the cache, implying that in-process and
+# out-of-process codegens do not share cache entries.
 RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \
 RUN:   main.o foo.o -o populate1.elf \
 RUN:   -Wl,--thinlto-distributor=%python \
@@ -32,7 +35,7 @@ RUN: ls cache.dir | count 5
 # Clean up cache directory.
 RUN: rm -rf cache.dir
 
-# Execute the linker and check that DTLTO cache is populated. 
+# Execute the linker and check that out-of-process (DTLTO) cache is populated.
 RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \
 RUN:   main.o foo.o -o populate2.elf \
 RUN:   -Wl,--thinlto-distributor=%python \
@@ -46,8 +49,9 @@ RUN: grep -wo args populate2.*.dist-file.json | wc -l | grep -qx 3
 RUN: ls cache.dir/llvmcache.timestamp
 RUN: ls cache.dir | count 3
 
-# Execute the linker and check that DTLTO adds additional entries to the ThinLTO cache, 
-# implying they do not share entries.
+# Execute the linker and check that in-process codegen adds additional entries
+# to the cache, implying that in-process and out-of-process codegens do
+# not share cache entries.
 RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \
 RUN:   main.o foo.o -o main.elf \
 RUN:   -Wl,--thinlto-cache-dir=cache.dir \
diff --git a/llvm/include/llvm/LTO/Config.h b/llvm/include/llvm/LTO/Config.h
index f5cd2e79e137c..a65ba6abf4e48 100644
--- a/llvm/include/llvm/LTO/Config.h
+++ b/llvm/include/llvm/LTO/Config.h
@@ -281,7 +281,10 @@ struct Config {
   LLVM_ABI Error addSaveTemps(std::string OutputFileName,
                               bool UseInputModulePath = false,
                               const DenseSet<StringRef> &SaveTempsArgs = {});
-  mutable uint8_t Dtlto = 0;
+  // DTLTO flag is used as one of parameters to calculate cache entries and to
+  // ensure that in-process cache and out-of-process (DTLTO) cache are
+  // distinguished.
+  mutable bool Dtlto = 0;
 };
 
 struct LTOLLVMDiagnosticHandler : public DiagnosticHandler {
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index 90ed89389aa32..4016932f6a536 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -1571,6 +1571,10 @@ class InProcessThinBackend : public CGThinBackend {
     if (Error Err = CacheAddStreamOrErr.takeError())
       return Err;
     AddStreamFn &CacheAddStream = *CacheAddStreamOrErr;
+    // If CacheAddStream is null, we have a cache hit and at this point object
+    // file is already passed back to the linker.
+    // If CacheAddStream is not null, we have a cache miss and we need to run
+    // the backend for codegen.
     if (CacheAddStream)
       return RunThinBackend(CacheAddStream);
 
@@ -2259,7 +2263,8 @@ class OutOfProcessThinBackend : public CGThinBackend {
 
   SmallVector<StringRef, 0> CodegenOptions;
   DenseSet<StringRef> CommonInputs;
-  std::atomic<uint64_t> CachedJobs{0};
+  // Number of the object files that have been already cached.
+  std::atomic<size_t> CachedJobs{0};
   // Information specific to individual backend compilation job.
   struct Job {
     unsigned Task;
@@ -2332,7 +2337,7 @@ class OutOfProcessThinBackend : public CGThinBackend {
          Saver.save(ObjFilePath.str()),
          Saver.save(ObjFilePath.str() + ".thinlto.bc"),
          {}, // Filled in by emitFiles below.
-         "",
+         "", /*CacheKey=*/
          nullptr,
          false};
 
@@ -2470,7 +2475,7 @@ class OutOfProcessThinBackend : public CGThinBackend {
         for (const auto &J : Jobs) {
           assert(J.Task != 0);
 
-          if (!Cache.getCacheDirectoryPath().empty() && J.Cached)
+          if (J.Cached)
             continue;
 
           SmallVector<StringRef, 2> Inputs;
@@ -2544,6 +2549,7 @@ class OutOfProcessThinBackend : public CGThinBackend {
         removeFile(JsonFile);
     });
 
+    // Checks if we have any jobs that don't have corresponding cache entries.
     if (CachedJobs.load() < Jobs.size()) {
       SmallVector<StringRef, 3> Args = {DistributorPath};
       llvm::append_range(Args, DistributorArgs);
@@ -2561,9 +2567,8 @@ class OutOfProcessThinBackend : public CGThinBackend {
     }
 
     for (auto &Job : Jobs) {
-      if (Cache.isValid() && !Job.CacheKey.empty())
-        if (Job.Cached)
-          continue;
+      if (!Job.CacheKey.empty() && (Job.Cached))
+        continue;
       // Load the native object from a file into a memory buffer
       // and store its contents in the output buffer.
       auto ObjFileMbOrErr =

>From 9a1ceadecd9a40ec3a163640594baddcfe966008 Mon Sep 17 00:00:00 2001
From: Romanova <katya.romanova at sony.com>
Date: Wed, 29 Oct 2025 02:44:48 -0700
Subject: [PATCH 3/4] Rewrote rather large lambda and restructured it the same
 way as InProcessThinBackend::runThinLTOBackendThread

---
 llvm/lib/LTO/LTO.cpp | 102 +++++++++++++++++++++++++------------------
 1 file changed, 59 insertions(+), 43 deletions(-)

diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index 4016932f6a536..a6dcd920d5eec 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -1571,10 +1571,6 @@ class InProcessThinBackend : public CGThinBackend {
     if (Error Err = CacheAddStreamOrErr.takeError())
       return Err;
     AddStreamFn &CacheAddStream = *CacheAddStreamOrErr;
-    // If CacheAddStream is null, we have a cache hit and at this point object
-    // file is already passed back to the linker.
-    // If CacheAddStream is not null, we have a cache miss and we need to run
-    // the backend for codegen.
     if (CacheAddStream)
       return RunThinBackend(CacheAddStream);
 
@@ -2318,6 +2314,57 @@ class OutOfProcessThinBackend : public CGThinBackend {
     this->Conf.Dtlto = 1;
   }
 
+  virtual Error runThinLTOBackendThread(
+      Job &J, const FunctionImporter::ImportMapTy &ImportList,
+      const FunctionImporter::ExportSetTy &ExportList,
+      const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>
+          &ResolvedODR) {
+
+    llvm::TimeTraceScope timeScope(
+        "Run ThinLTO backend thread (out-of-process)", J.ModuleID);
+
+    if (auto E = emitFiles(ImportList, J.ModuleID, J.ModuleID.str(),
+                           J.SummaryIndexPath, J.ImportsFiles)) {
+      std::unique_lock<std::mutex> L(ErrMu);
+      if (Err)
+        Err = joinErrors(std::move(*Err), std::move(E));
+      else
+        Err = std::move(E);
+    }
+
+    if (Cache.isValid() && CombinedIndex.modulePaths().count(J.ModuleID) &&
+        all_of(CombinedIndex.getModuleHash(J.ModuleID),
+               [](uint32_t V) { return V != 0; })) {
+      const GVSummaryMapTy &DefinedGlobals =
+          ModuleToDefinedGVSummaries.find(J.ModuleID)->second;
+
+      // The module may be cached, this helps handling it.
+      J.CacheKey = computeLTOCacheKey(
+          Conf, CombinedIndex, J.ModuleID, ImportList, ExportList, ResolvedODR,
+          DefinedGlobals, CfiFunctionDefs, CfiFunctionDecls);
+
+      // The module may be cached, this helps handling it.
+      auto CacheAddStreamExp = Cache(J.Task, J.CacheKey, J.ModuleID);
+      if (Error E = CacheAddStreamExp.takeError()) {
+        Err = joinErrors(std::move(*Err), std::move(E));
+      } else {
+        AddStreamFn &CacheAddStream = *CacheAddStreamExp;
+        // If CacheAddStream is null, we have a cache hit and at this point
+        // object file is already passed back to the linker.
+        if (!CacheAddStream) {
+          J.Cached = true; // Cache hit, mark the job as cached.
+          CachedJobs.fetch_add(1);
+        } else {
+          // If CacheAddStream is not null, we have a cache miss and we need to
+          // run the backend for codegen. Save cache 'add stream'
+          // function for a later use.
+          J.CacheAddStream = std::move(CacheAddStream);
+        }
+      }
+    }
+    return Error::success();
+  }
+
   Error start(
       unsigned Task, BitcodeModule BM,
       const FunctionImporter::ImportMapTy &ImportList,
@@ -2346,46 +2393,15 @@ class OutOfProcessThinBackend : public CGThinBackend {
     // The BackendThreadPool is only used here to write the sharded index files
     // (similar to WriteIndexesThinBackend).
     BackendThreadPool.async(
-        [=](Job &J, const FunctionImporter::ImportMapTy &ImportList) {
-          if (auto E = emitFiles(ImportList, J.ModuleID, J.ModuleID.str(),
-                                 J.SummaryIndexPath, J.ImportsFiles)) {
-            std::unique_lock<std::mutex> L(ErrMu);
-            if (Err)
-              Err = joinErrors(std::move(*Err), std::move(E));
-            else
-              Err = std::move(E);
-          }
-
-          if (Cache.isValid() &&
-              CombinedIndex.modulePaths().count(J.ModuleID) &&
-              all_of(CombinedIndex.getModuleHash(J.ModuleID),
-                     [](uint32_t V) { return V != 0; })) {
-
-            const GVSummaryMapTy &DefinedGlobals =
-                ModuleToDefinedGVSummaries.find(ModulePath)->second;
-
-            // Compute and store a bitcode module cache key.
-            J.CacheKey = computeLTOCacheKey(
-                Conf, CombinedIndex, ModulePath, ImportList, ExportList,
-                ResolvedODR, DefinedGlobals, CfiFunctionDefs, CfiFunctionDecls);
-
-            // Check if we have something in the cache.
-            auto CacheAddStreamExp = Cache(J.Task, J.CacheKey, J.ModuleID);
-            if (Error E = CacheAddStreamExp.takeError()) {
-              Err = joinErrors(std::move(*Err), std::move(E));
-            } else {
-              AddStreamFn &CacheAddStream = *CacheAddStreamExp;
-              if (!CacheAddStream) {
-                J.Cached = true; // Cache hit, mark the job as cached.
-                CachedJobs.fetch_add(1);
-              } else {
-                // Cache miss, save cache 'add stream' function for a later use.
-                J.CacheAddStream = std::move(CacheAddStream);
-              }
-            }
-          }
+        [=](Job &J, const FunctionImporter::ImportMapTy &ImportList,
+            const FunctionImporter::ExportSetTy &ExportList,
+            const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>
+                &ResolvedODR) {
+          Error E =
+              runThinLTOBackendThread(J, ImportList, ExportList, ResolvedODR);
         },
-        std::ref(J), std::ref(ImportList));
+        std::ref(J), std::ref(ImportList), std::ref(ExportList),
+        std::ref(ResolvedODR));
 
     return Error::success();
   }

>From 1b6df1c8a5a8d7e3748e76495cc6fb808f9931d0 Mon Sep 17 00:00:00 2001
From: Romanova <katya.romanova at sony.com>
Date: Fri, 7 Nov 2025 02:58:44 -0800
Subject: [PATCH 4/4] Updated second rount of code review comments

---
 llvm/include/llvm/LTO/Config.h |  9 ++--
 llvm/lib/LTO/LTO.cpp           | 87 +++++++++++++++++++---------------
 2 files changed, 53 insertions(+), 43 deletions(-)

diff --git a/llvm/include/llvm/LTO/Config.h b/llvm/include/llvm/LTO/Config.h
index a65ba6abf4e48..566a87ed1a790 100644
--- a/llvm/include/llvm/LTO/Config.h
+++ b/llvm/include/llvm/LTO/Config.h
@@ -94,6 +94,11 @@ struct Config {
   /// need to create copies, so it can set this field to false.
   bool KeepSymbolNameCopies = true;
 
+  /// This flag is used as one of parameters to calculate cache entries and to
+  /// ensure that in-process cache and out-of-process (DTLTO) cache are
+  /// distinguished.
+  mutable bool Dtlto = 0;
+
   /// Allows non-imported definitions to get the potentially more constraining
   /// visibility from the prevailing definition. FromPrevailing is the default
   /// because it works for many binary formats. ELF can use the more optimized
@@ -281,10 +286,6 @@ struct Config {
   LLVM_ABI Error addSaveTemps(std::string OutputFileName,
                               bool UseInputModulePath = false,
                               const DenseSet<StringRef> &SaveTempsArgs = {});
-  // DTLTO flag is used as one of parameters to calculate cache entries and to
-  // ensure that in-process cache and out-of-process (DTLTO) cache are
-  // distinguished.
-  mutable bool Dtlto = 0;
 };
 
 struct LTOLLVMDiagnosticHandler : public DiagnosticHandler {
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index a6dcd920d5eec..9013bc6418af8 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -2324,43 +2324,39 @@ class OutOfProcessThinBackend : public CGThinBackend {
         "Run ThinLTO backend thread (out-of-process)", J.ModuleID);
 
     if (auto E = emitFiles(ImportList, J.ModuleID, J.ModuleID.str(),
-                           J.SummaryIndexPath, J.ImportsFiles)) {
-      std::unique_lock<std::mutex> L(ErrMu);
-      if (Err)
-        Err = joinErrors(std::move(*Err), std::move(E));
-      else
-        Err = std::move(E);
-    }
+                           J.SummaryIndexPath, J.ImportsFiles))
+      return E;
 
-    if (Cache.isValid() && CombinedIndex.modulePaths().count(J.ModuleID) &&
+    if (!Cache.isValid() || !CombinedIndex.modulePaths().count(J.ModuleID) ||
         all_of(CombinedIndex.getModuleHash(J.ModuleID),
-               [](uint32_t V) { return V != 0; })) {
-      const GVSummaryMapTy &DefinedGlobals =
-          ModuleToDefinedGVSummaries.find(J.ModuleID)->second;
-
-      // The module may be cached, this helps handling it.
-      J.CacheKey = computeLTOCacheKey(
-          Conf, CombinedIndex, J.ModuleID, ImportList, ExportList, ResolvedODR,
-          DefinedGlobals, CfiFunctionDefs, CfiFunctionDecls);
-
-      // The module may be cached, this helps handling it.
-      auto CacheAddStreamExp = Cache(J.Task, J.CacheKey, J.ModuleID);
-      if (Error E = CacheAddStreamExp.takeError()) {
-        Err = joinErrors(std::move(*Err), std::move(E));
-      } else {
-        AddStreamFn &CacheAddStream = *CacheAddStreamExp;
-        // If CacheAddStream is null, we have a cache hit and at this point
-        // object file is already passed back to the linker.
-        if (!CacheAddStream) {
-          J.Cached = true; // Cache hit, mark the job as cached.
-          CachedJobs.fetch_add(1);
-        } else {
-          // If CacheAddStream is not null, we have a cache miss and we need to
-          // run the backend for codegen. Save cache 'add stream'
-          // function for a later use.
-          J.CacheAddStream = std::move(CacheAddStream);
-        }
-      }
+               [](uint32_t V) { return V == 0; }))
+      // Cache disabled or no entry for this module in the combined index or
+      // no module hash.
+      return Error::success();
+
+    const GVSummaryMapTy &DefinedGlobals =
+        ModuleToDefinedGVSummaries.find(J.ModuleID)->second;
+
+    // The module may be cached, this helps handling it.
+    J.CacheKey = computeLTOCacheKey(Conf, CombinedIndex, J.ModuleID, ImportList,
+                                    ExportList, ResolvedODR, DefinedGlobals,
+                                    CfiFunctionDefs, CfiFunctionDecls);
+
+    // The module may be cached, this helps handling it.
+    auto CacheAddStreamExp = Cache(J.Task, J.CacheKey, J.ModuleID);
+    if (Error Err = CacheAddStreamExp.takeError())
+      return Err;
+    AddStreamFn &CacheAddStream = *CacheAddStreamExp;
+    // If CacheAddStream is null, we have a cache hit and at this point
+    // object file is already passed back to the linker.
+    if (!CacheAddStream) {
+      J.Cached = true; // Cache hit, mark the job as cached.
+      CachedJobs.fetch_add(1);
+    } else {
+      // If CacheAddStream is not null, we have a cache miss and we need to
+      // run the backend for codegen. Save cache 'add stream'
+      // function for a later use.
+      J.CacheAddStream = std::move(CacheAddStream);
     }
     return Error::success();
   }
@@ -2399,6 +2395,13 @@ class OutOfProcessThinBackend : public CGThinBackend {
                 &ResolvedODR) {
           Error E =
               runThinLTOBackendThread(J, ImportList, ExportList, ResolvedODR);
+          if (E) {
+            std::unique_lock<std::mutex> L(ErrMu);
+            if (Err)
+              Err = joinErrors(std::move(*Err), std::move(E));
+            else
+              Err = std::move(E);
+          }
         },
         std::ref(J), std::ref(ImportList), std::ref(ExportList),
         std::ref(ResolvedODR));
@@ -2490,9 +2493,10 @@ class OutOfProcessThinBackend : public CGThinBackend {
       JOS.attributeArray("jobs", [&]() {
         for (const auto &J : Jobs) {
           assert(J.Task != 0);
-
-          if (J.Cached)
+          if (J.Cached) {
+            assert(!Cache.getCacheDirectoryPath().empty());
             continue;
+          }
 
           SmallVector<StringRef, 2> Inputs;
           SmallVector<StringRef, 1> Outputs;
@@ -2583,8 +2587,10 @@ class OutOfProcessThinBackend : public CGThinBackend {
     }
 
     for (auto &Job : Jobs) {
-      if (!Job.CacheKey.empty() && (Job.Cached))
+      if (!Job.CacheKey.empty() && Job.Cached) {
+        assert(Cache.isValid());
         continue;
+      }
       // Load the native object from a file into a memory buffer
       // and store its contents in the output buffer.
       auto ObjFileMbOrErr =
@@ -2597,7 +2603,10 @@ class OutOfProcessThinBackend : public CGThinBackend {
             inconvertibleErrorCode());
 
       MemoryBufferRef ObjFileMbRef = ObjFileMbOrErr->get()->getMemBufferRef();
-      if (Cache.isValid() && Job.CacheAddStream) {
+      if (Cache.isValid()) {
+        // Cache hits are takes care of earlier. At this point, we could only
+        // have cache misses.
+        assert(Job.CacheAddStream);
         // Obtain a file stream for a storing a cache entry.
         auto CachedFileStreamOrErr = Job.CacheAddStream(Job.Task, Job.ModuleID);
         if (!CachedFileStreamOrErr)