[lld] [llvm] [DTLTO][ELF][COFF][MachO] Add archive support for DTLTO. (PR #157043)

via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 8 05:01:39 PDT 2025


https://github.com/kbelochapka updated https://github.com/llvm/llvm-project/pull/157043

>From 3a92171ca70f2f689797088f20f3b45f84021df4 Mon Sep 17 00:00:00 2001
From: Konstantin Belochapka <konstantin.belochapka at sony.com>
Date: Sun, 24 Aug 2025 22:35:14 -0700
Subject: [PATCH 1/5] [DTLTO][ELF][COFF][MachO] Add archive support for DTLTO.

This patch implements support for handling archive members in DTLTO.

Unlike ThinLTO, where archive members are passed as in-memory buffers,
DTLTO requires archive members to be materialized as individual files on the filesystem.
This is necessary because DTLTO invokes clang externally, which expects file-based inputs.
To support this, this implementation identifies archive members among the input files,
saves them to the filesystem, and updates their module_id to match their file paths.
---
 cross-project-tests/CMakeLists.txt            |   1 +
 cross-project-tests/dtlto/archive.test        |  80 +++++++
 .../dtlto/archives-mixed-lto-modes-test.test  |  35 +++
 .../dtlto/archives-same-module-id.test        |  55 +++++
 lld/COFF/InputFiles.cpp                       |   1 +
 lld/ELF/InputFiles.cpp                        |   1 +
 lld/ELF/LTO.cpp                               |   3 +
 lld/MachO/InputFiles.cpp                      |   1 +
 llvm/include/llvm/Bitcode/BitcodeReader.h     |   5 +
 llvm/include/llvm/DTLTO/Dtlto.h               |  23 ++
 llvm/include/llvm/LTO/LTO.h                   |  48 ++++
 llvm/lib/CMakeLists.txt                       |   1 +
 llvm/lib/DTLTO/CMakeLists.txt                 |   7 +
 llvm/lib/DTLTO/Dtlto.cpp                      | 226 ++++++++++++++++++
 llvm/lib/LTO/CMakeLists.txt                   |   1 +
 llvm/lib/LTO/LTO.cpp                          |  20 +-
 16 files changed, 506 insertions(+), 2 deletions(-)
 create mode 100644 cross-project-tests/dtlto/archive.test
 create mode 100644 cross-project-tests/dtlto/archives-mixed-lto-modes-test.test
 create mode 100644 cross-project-tests/dtlto/archives-same-module-id.test
 create mode 100644 llvm/include/llvm/DTLTO/Dtlto.h
 create mode 100644 llvm/lib/DTLTO/CMakeLists.txt
 create mode 100644 llvm/lib/DTLTO/Dtlto.cpp

diff --git a/cross-project-tests/CMakeLists.txt b/cross-project-tests/CMakeLists.txt
index 192db87043177..6753a27698eae 100644
--- a/cross-project-tests/CMakeLists.txt
+++ b/cross-project-tests/CMakeLists.txt
@@ -20,6 +20,7 @@ set(CROSS_PROJECT_TEST_DEPS
   check-gdb-llvm-support
   count
   llvm-ar
+  llvm-ar
   llvm-config
   llvm-dwarfdump
   llvm-objdump
diff --git a/cross-project-tests/dtlto/archive.test b/cross-project-tests/dtlto/archive.test
new file mode 100644
index 0000000000000..1c01ae7a5691a
--- /dev/null
+++ b/cross-project-tests/dtlto/archive.test
@@ -0,0 +1,80 @@
+REQUIRES: x86-registered-target,ld.lld,llvm-ar
+
+# Test that a DTLTO link succeeds and outputs the expected set of files
+# correctly when archives are present.
+
+RUN: rm -rf %t && split-file %s %t && cd %t
+# Compile sources into bitcode. -O2 is required for cross-module importing.
+RUN: %clang -O2 --target=x86_64-linux-gnu -flto=thin -c foo.c boo.c moo.c loo.c voo.c main.c
+
+RUN: llvm-ar rcs archive.a foo.o boo.o moo.o
+RUN: llvm-ar rcsT archive.thin.a loo.o voo.o
+
+# Build with DTLTO.
+RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin \
+RUN:   -fuse-ld=lld -nostdlib -e main \
+RUN:   main.o archive.a archive.thin.a -o main.elf \
+RUN:   -Wl,--thinlto-distributor=%python \
+RUN:   -Wl,--thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \
+RUN:   -Wl,--thinlto-remote-compiler=%clang \
+RUN:   -Wl,--save-temps
+
+# Check that the required output files have been created.
+RUN: ls | FileCheck %s --check-prefix=OUTPUTS
+
+# JSON jobs description.
+OUTPUTS-DAG:    {{^}}main.[[PID:[0-9]+]].dist-file.json
+
+# Main source.
+OUTPUTS-DAG:    {{^}}main.{{[0-9]+}}.[[PID]].native.o{{$}}
+OUTPUTS-DAG:    {{^}}main.{{[0-9]+}}.[[PID]].native.o.thinlto.bc{{$}}
+
+# Regular archive members.
+# Filename composition: <archive>(<member> at <offset>).<task>.<pid>.<task>.<pid>.native.o[.thinlto.bc].
+OUTPUTS-DAG:    {{^}}archive.a(boo.o at {{[0-9]+}}).2.[[HEXPID:[a-fA-F0-9]+]].2.[[PID]].native.o{{$}}
+OUTPUTS-DAG:    {{^}}archive.a(boo.o at {{[0-9]+}}).2.[[HEXPID]].2.[[PID]].native.o.thinlto.bc{{$}}
+
+OUTPUTS-DAG:    {{^}}archive.a(foo.o at {{[0-9]+}}).3.[[HEXPID]].3.[[PID]].native.o{{$}}
+OUTPUTS-DAG:    {{^}}archive.a(foo.o at {{[0-9]+}}).3.[[HEXPID]].3.[[PID]].native.o.thinlto.bc{{$}}
+
+OUTPUTS-DAG:    {{^}}archive.a(moo.o at {{[0-9]+}}).4.[[HEXPID]].4.[[PID]].native.o{{$}}
+OUTPUTS-DAG:    {{^}}archive.a(moo.o at {{[0-9]+}}).4.[[HEXPID]].4.[[PID]].native.o.thinlto.bc{{$}}
+
+# Thin archive members.
+OUTPUTS-DAG:    {{^}}voo.{{[0-9]+}}.[[PID]].native.o{{$}}
+OUTPUTS-DAG:    {{^}}voo.{{[0-9]+}}.[[PID]].native.o.thinlto.bc{{$}}
+
+OUTPUTS-DAG:    {{^}}loo.{{[0-9]+}}.[[PID]].native.o{{$}}
+OUTPUTS-DAG:    {{^}}loo.{{[0-9]+}}.[[PID]].native.o.thinlto.bc{{$}}
+
+# Executable file.
+OUTPUTS-DAG:    {{^}}main.elf{{$}}
+
+#--- foo.c
+volatile int foo_int;
+__attribute__((retain)) int foo(int x) { return x + foo_int; }
+
+#--- boo.c
+extern int foo(int x);
+__attribute__((retain)) int boo(int x) { return foo(x); }
+
+#--- moo.c
+__attribute__((retain)) int moo() { return 3; }
+
+#--- loo.c
+extern int moo(int x);
+__attribute__((retain)) int loo(int x) { return moo(x); }
+
+#--- voo.c
+extern int foo(int x);
+extern int loo(int x);
+__attribute__((retain)) int voo(int x) { return foo(x) + loo(x + 1) + 7; }
+
+#--- main.c
+extern int boo(int x);
+extern int moo();
+extern int voo(int x);
+__attribute__((retain)) int main(int argc, char** argv) {
+  return boo(argc) + moo() + voo(argc + 3);
+}
+
diff --git a/cross-project-tests/dtlto/archives-mixed-lto-modes-test.test b/cross-project-tests/dtlto/archives-mixed-lto-modes-test.test
new file mode 100644
index 0000000000000..74f146028b4b6
--- /dev/null
+++ b/cross-project-tests/dtlto/archives-mixed-lto-modes-test.test
@@ -0,0 +1,35 @@
+REQUIRES: x86-registered-target,ld.lld,llvm-ar
+
+# Test that DTLTO works with a mixture of FullLTO and ThinLTO bitcode archive members
+# where there is more than one LTO partition.
+
+RUN: rm -rf %t && split-file %s %t && cd %t
+
+RUN: %clang --target=x86_64-linux-gnu -flto -c one.c two.c
+RUN: %clang --target=x86_64-linux-gnu -flto=thin -c three.c
+
+RUN: llvm-ar rc archive.a one.o two.o three.o
+
+# Build with DTLTO.
+RUN: %clang --target=x86_64-linux-gnu -Werror -flto -fuse-ld=lld -nostdlib \
+RUN:   -Wl,--whole-archive archive.a \
+RUN:   -Wl,--thinlto-distributor=%python \
+RUN:   -Wl,--thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \
+RUN:   -Wl,--thinlto-remote-compiler=%clang \
+RUN:   -Wl,--save-temps,--lto-partitions=2
+
+# Show that the FullLTO modules have been prepared for distribution, this is
+# not optimal but has no functional impact.
+RUN: FileCheck %s --input-file=a.out.resolution.txt
+CHECK: archive.a(one.o at {{.*}}).1.[[PID:[a-zA-Z0-9_]+]].o
+CHECK: archive.a(two.o at {{.*}}).2.[[PID]].o
+CHECK: archive.a(three.o at {{.*}}).3.[[PID]].o
+
+#--- one.c
+__attribute__((retain)) void one() {}
+
+#--- two.c
+__attribute__((retain)) void two() {}
+
+#--- three.c
+__attribute__((retain)) void three() {}
diff --git a/cross-project-tests/dtlto/archives-same-module-id.test b/cross-project-tests/dtlto/archives-same-module-id.test
new file mode 100644
index 0000000000000..09d5f7492bfa5
--- /dev/null
+++ b/cross-project-tests/dtlto/archives-same-module-id.test
@@ -0,0 +1,55 @@
+REQUIRES: x86-registered-target,ld.lld,llvm-ar
+
+# Test that a DTLTO link succeeds when there are two archive member files with
+# the same filename path component.
+
+# Split this file into several sources.
+RUN: rm -rf %t && split-file %s %t && cd %t
+
+RUN: %clang -O2 --target=x86_64-linux-gnu -flto=thin -c start.c
+
+# Create first archive.
+RUN: mkdir archive1 && cd archive1
+RUN: %clang -O2 --target=x86_64-linux-gnu -flto=thin -c ../t1.c ../t3.c
+RUN: llvm-ar rc archive.a t3.o t1.o
+RUN: cd ..
+
+# Create second archive.
+RUN: mkdir archive2 && cd archive2
+RUN: %clang -O2 --target=x86_64-linux-gnu -flto=thin -c ../t1.c ../t3.c
+RUN: llvm-ar rc archive.a t3.o t1.o
+RUN: cd ..
+
+RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld \
+RUN:   -nostdlib -Wl,--undefined=t1,--undefined=t3 \
+RUN:    start.o archive1/archive.a archive2/archive.a -o main.elf \
+RUN:   -Wl,--save-temps \
+RUN:   -Wl,--thinlto-distributor=%python \
+RUN:   -Wl,--thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \
+RUN:   -Wl,--thinlto-remote-compiler=%clang
+
+# Check that the required output files have been created.
+RUN: ls | FileCheck %s --check-prefix=OUTPUTS
+
+# JSON jobs description.
+OUTPUTS-DAG:    {{^}}main.[[PID:[0-9]+]].dist-file.json
+
+# Sources.
+OUTPUTS-DAG:    {{^}}start.{{[0-9]+}}.[[PID]].native.o{{$}}
+OUTPUTS-DAG:    {{^}}start.{{[0-9]+}}.[[PID]].native.o.thinlto.bc{{$}}
+
+# Archive members.
+# Filename composition: <archive>(<member> at <offset>).<task>.<pid>.<task>.<pid>.native.o[.thinlto.bc].
+OUTPUTS-DAG:    {{^}}archive.a(t3.o at {{[0-9]+}}).2.[[HEXPID:[a-fA-F0-9]+]].2.[[PID]].native.o{{$}}
+OUTPUTS-DAG:    {{^}}archive.a(t3.o at {{[0-9]+}}).2.[[HEXPID]].2.[[PID]].native.o.thinlto.bc{{$}}
+OUTPUTS-DAG:    {{^}}archive.a(t1.o at {{[0-9]+}}).3.[[HEXPID]].3.[[PID]].native.o{{$}}
+OUTPUTS-DAG:    {{^}}archive.a(t1.o at {{[0-9]+}}).3.[[HEXPID]].3.[[PID]].native.o.thinlto.bc{{$}}
+
+#--- t1.c
+__attribute__((retain)) void t1() { }
+
+#--- start.c
+__attribute__((retain)) void _start() { }
+
+#--- t3.c
+__attribute__((retain)) void t3() { }
diff --git a/lld/COFF/InputFiles.cpp b/lld/COFF/InputFiles.cpp
index c08099b8810bb..d415955b6093b 100644
--- a/lld/COFF/InputFiles.cpp
+++ b/lld/COFF/InputFiles.cpp
@@ -1380,6 +1380,7 @@ BitcodeFile *BitcodeFile::create(COFFLinkerContext &ctx, MemoryBufferRef mb,
                                                utostr(offsetInArchive)));
 
   std::unique_ptr<lto::InputFile> obj = check(lto::InputFile::create(mbref));
+  obj->setArchivePathAndName(archiveName, mb.getBufferIdentifier());
   return make<BitcodeFile>(ctx.getSymtab(getMachineType(obj.get())), mb, obj,
                            lazy);
 }
diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index a5921feb18299..ec0af9d0c0f4e 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -1874,6 +1874,7 @@ BitcodeFile::BitcodeFile(Ctx &ctx, MemoryBufferRef mb, StringRef archiveName,
   MemoryBufferRef mbref(mb.getBuffer(), name);
 
   obj = CHECK2(lto::InputFile::create(mbref), this);
+  obj->setArchivePathAndName(archiveName, mb.getBufferIdentifier());
 
   Triple t(obj->getTargetTriple());
   ekind = getBitcodeELFKind(t);
diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp
index 8d4a6c9e3a81e..13e9c63495e4d 100644
--- a/lld/ELF/LTO.cpp
+++ b/lld/ELF/LTO.cpp
@@ -202,6 +202,9 @@ BitcodeCompiler::BitcodeCompiler(Ctx &ctx) : ctx(ctx) {
                                       ctx.arg.ltoPartitions,
                                       ltoModes[ctx.arg.ltoKind]);
 
+  if(!ctx.arg.dtltoDistributor.empty())
+      ltoObj->Dtlto = true;
+
   // Initialize usedStartStop.
   if (ctx.bitcodeFiles.empty())
     return;
diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 3b3023a94166f..bb40fcfb7701f 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -2360,6 +2360,7 @@ BitcodeFile::BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
                                                sys::path::filename(path) + ")" +
                                                utostr(offsetInArchive)));
   obj = check(lto::InputFile::create(mbref));
+  obj->setArchivePathAndName(archiveName, mb.getBufferIdentifier());
   if (lazy)
     parseLazy();
   else
diff --git a/llvm/include/llvm/Bitcode/BitcodeReader.h b/llvm/include/llvm/Bitcode/BitcodeReader.h
index 4f839d4cd1575..772ca82019278 100644
--- a/llvm/include/llvm/Bitcode/BitcodeReader.h
+++ b/llvm/include/llvm/Bitcode/BitcodeReader.h
@@ -137,6 +137,11 @@ struct ParserCallbacks {
 
     StringRef getModuleIdentifier() const { return ModuleIdentifier; }
 
+    // Assign a new module identifier to this bitcode module.
+    void setModuleIdentifier(llvm::StringRef ModuleId) {
+      ModuleIdentifier = ModuleId;
+    }
+
     /// Read the bitcode module and prepare for lazy deserialization of function
     /// bodies. If ShouldLazyLoadMetadata is true, lazily load metadata as well.
     /// If IsImporting is true, this module is being parsed for ThinLTO
diff --git a/llvm/include/llvm/DTLTO/Dtlto.h b/llvm/include/llvm/DTLTO/Dtlto.h
new file mode 100644
index 0000000000000..aa6af7d0cd9b7
--- /dev/null
+++ b/llvm/include/llvm/DTLTO/Dtlto.h
@@ -0,0 +1,23 @@
+//===- Dtlto.h - Distributed ThinLTO functions and classes ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===---------------------------------------------------------------------===//
+
+#ifndef LLVM_DTLTO_H
+#define LLVM_DTLTO_H
+
+#include "llvm/LTO/LTO.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+namespace dtlto {
+
+llvm::Expected<llvm::lto::InputFile*> addInput(llvm::lto::LTO *LtoObj,
+                               std::unique_ptr<llvm::lto::InputFile> Input);
+
+llvm::Error process(llvm::lto::LTO &LtoObj);
+} // namespace dtlto
+
+#endif // LLVM_DTLTO_H
diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h
index 323c478691a92..7183a4429e665 100644
--- a/llvm/include/llvm/LTO/LTO.h
+++ b/llvm/include/llvm/LTO/LTO.h
@@ -32,6 +32,23 @@
 #include "llvm/Transforms/IPO/FunctionAttrs.h"
 #include "llvm/Transforms/IPO/FunctionImport.h"
 
+namespace llvm {
+namespace lto {
+class LTO;
+}
+} // namespace llvm
+
+namespace dtlto {
+class TempFilesRemover {
+  llvm::lto::LTO *Lto = nullptr;
+
+public:
+  TempFilesRemover(llvm::lto::LTO *LtoObj) : Lto{LtoObj} {}
+  ~TempFilesRemover();
+};
+
+} // namespace dtlto
+
 namespace llvm {
 
 class Error;
@@ -135,6 +152,12 @@ class InputFile {
   std::vector<StringRef> DependentLibraries;
   std::vector<std::pair<StringRef, Comdat::SelectionKind>> ComdatTable;
 
+  MemoryBufferRef MbRef;
+  bool IsMemberOfArchive = false;
+  bool IsThinLTO = false;
+  StringRef ArchivePath;
+  StringRef MemberName;
+
 public:
   LLVM_ABI ~InputFile();
 
@@ -193,6 +216,20 @@ class InputFile {
 
   // Returns the only BitcodeModule from InputFile.
   LLVM_ABI BitcodeModule &getSingleBitcodeModule();
+  // Returns the memory buffer reference for this input file.
+  MemoryBufferRef getFileBuffer() const { return MbRef; }
+  // Returns true if this input file is a member of an archive.
+  bool isMemberOfArchive() const { return IsMemberOfArchive; }
+  // Mark this input file as a member of archive.
+  void memberOfArchive(bool MA) { IsMemberOfArchive = MA; }
+
+  // Returns true if bitcode is ThinLTO.
+  bool isThinLTO() const { return IsThinLTO; }
+
+  // Store an archive path and a member name.
+  void setArchivePathAndName(StringRef Path, StringRef Name) { ArchivePath = Path; MemberName = Name; }
+  StringRef getArchivePath() const  { return ArchivePath; }
+  StringRef getMemberName() const  { return MemberName; }
 
 private:
   ArrayRef<Symbol> module_symbols(unsigned I) const {
@@ -580,6 +617,17 @@ class LTO {
 
   // Diagnostic optimization remarks file
   std::unique_ptr<ToolOutputFile> DiagnosticOutputFile;
+
+public:
+  /// DTLTO mode.
+  bool Dtlto = false;
+
+  BumpPtrAllocator PtrAlloc;
+  StringSaver Saver{PtrAlloc};
+
+  // Array of input bitcode files for LTO.
+  std::vector<std::unique_ptr<llvm::lto::InputFile>> InputFiles;
+  std::unique_ptr<dtlto::TempFilesRemover> TempsRemover;
 };
 
 /// The resolution for a symbol. The linker must provide a SymbolResolution for
diff --git a/llvm/lib/CMakeLists.txt b/llvm/lib/CMakeLists.txt
index a9432977718c6..0856af9058fef 100644
--- a/llvm/lib/CMakeLists.txt
+++ b/llvm/lib/CMakeLists.txt
@@ -22,6 +22,7 @@ add_subdirectory(Frontend)
 add_subdirectory(Transforms)
 add_subdirectory(Linker)
 add_subdirectory(Analysis)
+add_subdirectory(DTLTO)
 add_subdirectory(LTO)
 add_subdirectory(MC)
 add_subdirectory(MCA)
diff --git a/llvm/lib/DTLTO/CMakeLists.txt b/llvm/lib/DTLTO/CMakeLists.txt
new file mode 100644
index 0000000000000..51fd8aad6f48b
--- /dev/null
+++ b/llvm/lib/DTLTO/CMakeLists.txt
@@ -0,0 +1,7 @@
+add_llvm_component_library(LLVMDTLTO
+  Dtlto.cpp
+
+  LINK_COMPONENTS
+  Core
+  Support
+  )
diff --git a/llvm/lib/DTLTO/Dtlto.cpp b/llvm/lib/DTLTO/Dtlto.cpp
new file mode 100644
index 0000000000000..356f30188c5c4
--- /dev/null
+++ b/llvm/lib/DTLTO/Dtlto.cpp
@@ -0,0 +1,226 @@
+//===- Dtlto.cpp - Distributed ThinLTO implementation --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// \file
+// This file implements support functions for Distributed ThinLTO, focusing on
+// archive file handling.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DTLTO/Dtlto.h"
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/Magic.h"
+#include "llvm/LTO/LTO.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBufferRef.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <iostream>
+#include <string>
+
+using namespace llvm;
+
+namespace dtlto {
+
+// Removes any temporary regular archive member files that were created during
+// processing.
+TempFilesRemover::~TempFilesRemover() {
+  if (!Lto)
+    return;
+  for (auto &Input : Lto->InputFiles) {
+    if (Input->isMemberOfArchive())
+      sys::fs::remove(Input->getName(), /*IgnoreNonExisting=*/true);
+  }
+}
+
+// Writes the content of a memory buffer into a file.
+static llvm::Error saveBuffer(StringRef FileBuffer, StringRef FilePath) {
+  std::error_code EC;
+  raw_fd_ostream OS(FilePath.str(), EC, sys::fs::OpenFlags::OF_None);
+  if (EC) {
+    return createStringError(inconvertibleErrorCode(),
+                             "Failed to create file %s: %s", FilePath.data(),
+                             EC.message().c_str());
+  }
+  OS.write(FileBuffer.data(), FileBuffer.size());
+  if (OS.has_error()) {
+    return createStringError(inconvertibleErrorCode(),
+                             "Failed writing to file %s", FilePath.data());
+  }
+  return Error::success();
+}
+
+// Compute the file path for a thin archive member.
+//
+// For thin archives, an archive member name is typically a file path relative
+// to the archive file's directory. This function resolves that path.
+SmallString<64> computeThinArchiveMemberPath(const StringRef ArchivePath,
+                                             const StringRef MemberName) {
+  assert(!ArchivePath.empty() && "An archive file path must be non empty.");
+  SmallString<64> MemberPath;
+  if (sys::path::is_relative(MemberName)) {
+    MemberPath = sys::path::parent_path(ArchivePath);
+    sys::path::append(MemberPath, MemberName);
+  } else
+    MemberPath = MemberName;
+  sys::path::remove_dots(MemberPath, /*remove_dot_dot=*/true);
+  return MemberPath;
+}
+
+// Magic string identifying thin archive files.
+static constexpr StringLiteral THIN_ARCHIVE_MAGIC = "!<thin>\n";
+
+// Determines if a file at the given path is a thin archive file.
+//
+// This function uses a cache to avoid repeatedly reading the same file.
+// It reads only the header portion (magic bytes) of the file to identify
+// the archive type.
+Expected<bool> isThinArchive(const StringRef ArchivePath) {
+  static StringMap<bool> ArchiveFiles;
+
+  // Return cached result if available.
+  auto Cached = ArchiveFiles.find(ArchivePath);
+  if (Cached != ArchiveFiles.end())
+    return Cached->second;
+
+  uint64_t FileSize = -1;
+  bool IsThin = false;
+  std::error_code EC = sys::fs::file_size(ArchivePath, FileSize);
+  if (EC)
+    return createStringError(inconvertibleErrorCode(),
+                             "Failed to get file size from archive %s: %s",
+                             ArchivePath.data(), EC.message().c_str());
+  if (FileSize < THIN_ARCHIVE_MAGIC.size())
+    return createStringError(inconvertibleErrorCode(),
+                             "Archive file size is too small %s",
+                             ArchivePath.data());
+
+  // Read only the first few bytes containing the magic signature.
+  ErrorOr<std::unique_ptr<MemoryBuffer>> MemBufferOrError =
+      MemoryBuffer::getFileSlice(ArchivePath, THIN_ARCHIVE_MAGIC.size(), 0);
+
+  if (EC = MemBufferOrError.getError())
+    return createStringError(inconvertibleErrorCode(),
+                             "Failed to read from archive %s: %s",
+                             ArchivePath.data(), EC.message().c_str());
+
+  StringRef MemBuf = (*MemBufferOrError.get()).getBuffer();
+  if (file_magic::archive != identify_magic(MemBuf))
+    return createStringError(inconvertibleErrorCode(),
+                             "Unknown format for archive %s",
+                             ArchivePath.data());
+
+  IsThin = MemBuf.starts_with(THIN_ARCHIVE_MAGIC);
+
+  // Cache the result
+  ArchiveFiles[ArchivePath] = IsThin;
+  return IsThin;
+}
+
+// This function performs the following tasks:
+// 1. Adds the input file to the LTO object's list of input files.
+// 2. For thin archive members, generates a new module ID which is a path to a
+// thin archive member file.
+// 3. For regular archive members, generates a new unique module ID.
+// 4. Updates the bitcode module's identifier.
+Expected<lto::InputFile *> addInput(lto::LTO *LtoObj,
+                                    std::unique_ptr<lto::InputFile> InputPtr) {
+
+  // Add the input file to the LTO object.
+  LtoObj->InputFiles.push_back(std::move(InputPtr));
+  lto::InputFile *Input = LtoObj->InputFiles.back().get();
+
+  // Skip processing if not in DTLTO mode.
+  if (!LtoObj->Dtlto)
+    return Input;
+
+  StringRef ModuleId = Input->getName();
+  StringRef ArchivePath = Input->getArchivePath();
+
+  // Only process archive members.
+  if (ArchivePath.empty())
+    return Input;
+
+  SmallString<64> NewModuleId;
+  BitcodeModule &BM = Input->getSingleBitcodeModule();
+
+  // Check if the archive is a thin archive.
+  Expected<bool> IsThin = isThinArchive(ArchivePath);
+  if (!IsThin)
+    return IsThin.takeError();
+
+  if (*IsThin) {
+    // For thin archives, use the path to the actual file.
+    NewModuleId =
+        computeThinArchiveMemberPath(ArchivePath, Input->getMemberName());
+  } else {
+    // For regular archives, generate a unique name.
+    Input->memberOfArchive(true);
+
+    // Create unique identifier using process ID and sequence number.
+    std::string PID = utohexstr(sys::Process::getProcessId());
+    std::string Seq = std::to_string(LtoObj->InputFiles.size());
+
+    NewModuleId = {sys::path::filename(ModuleId), ".", Seq, ".", PID, ".o"};
+  }
+
+  // Update the module identifier and save it.
+  BM.setModuleIdentifier(LtoObj->Saver.save(NewModuleId.str()));
+
+  return Input;
+}
+
+// Write the archive member content to a file named after the module ID.
+// If a file with that name already exists, it's likely a leftover from a
+// previously terminated linker process and can be safely overwritten.
+Error saveInputArchiveMember(lto::LTO &LtoObj, lto::InputFile *Input) {
+  StringRef ModuleId = Input->getName();
+  if (Input->isMemberOfArchive()) {
+    MemoryBufferRef MemoryBufferRef = Input->getFileBuffer();
+    if (Error EC = saveBuffer(MemoryBufferRef.getBuffer(), ModuleId))
+      return EC;
+  }
+  return Error::success();
+}
+
+// Iterates through all ThinLTO-enabled input files and saves their content
+// to separate files if they are regular archive members.
+Error saveInputArchiveMembers(lto::LTO& LtoObj) {
+  for (auto &Input : LtoObj.InputFiles) {
+    if (!Input->isThinLTO())
+      continue;
+    if (Error EC = saveInputArchiveMember(LtoObj, Input.get()))
+      return EC;
+  }
+  return Error::success();
+}
+
+// Entry point for DTLTO archives support.
+//
+// Sets up the temporary file remover and processes archive members.
+// Must be called after all inputs are added but before optimization begins.
+llvm::Error process(llvm::lto::LTO &LtoObj) {
+  if (!LtoObj.Dtlto)
+    return Error::success();
+
+  // Set up cleanup handler for temporary files
+  LtoObj.TempsRemover = std::make_unique<TempFilesRemover>(&LtoObj);
+
+  // Process and save archive members to separate files if needed.
+  if (Error EC = saveInputArchiveMembers(LtoObj))
+    return EC;
+  return Error::success();
+}
+
+} // namespace dtlto
diff --git a/llvm/lib/LTO/CMakeLists.txt b/llvm/lib/LTO/CMakeLists.txt
index 057d73b6349cf..499623eacf97c 100644
--- a/llvm/lib/LTO/CMakeLists.txt
+++ b/llvm/lib/LTO/CMakeLists.txt
@@ -25,6 +25,7 @@ add_llvm_component_library(LLVMLTO
   CodeGen
   CodeGenTypes
   Core
+  DTLTO
   Extensions
   IPO
   InstCombine
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index 35d24c17bbd93..d0a7eceb38614 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -26,6 +26,7 @@
 #include "llvm/CGData/CodeGenData.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/Config/llvm-config.h"
+#include "llvm/DTLTO/Dtlto.h"
 #include "llvm/IR/AutoUpgrade.h"
 #include "llvm/IR/DiagnosticPrinter.h"
 #include "llvm/IR/Intrinsics.h"
@@ -570,6 +571,8 @@ Expected<std::unique_ptr<InputFile>> InputFile::create(MemoryBufferRef Object) {
   File->COFFLinkerOpts = FOrErr->TheReader.getCOFFLinkerOpts();
   File->DependentLibraries = FOrErr->TheReader.getDependentLibraries();
   File->ComdatTable = FOrErr->TheReader.getComdatTable();
+  File->MbRef =
+      Object; // Save a memory buffer reference to an input file object.
 
   for (unsigned I = 0; I != FOrErr->Mods.size(); ++I) {
     size_t Begin = File->Symbols.size();
@@ -729,12 +732,17 @@ static void writeToResolutionFile(raw_ostream &OS, InputFile *Input,
   assert(ResI == Res.end());
 }
 
-Error LTO::add(std::unique_ptr<InputFile> Input,
+Error LTO::add(std::unique_ptr<InputFile> InputPtr,
                ArrayRef<SymbolResolution> Res) {
   assert(!CalledGetMaxTasks);
 
+  Expected<InputFile *> InputOrErr = dtlto::addInput(this, std::move(InputPtr));
+  if (!InputOrErr)
+    return InputOrErr.takeError();
+  InputFile *Input = *InputOrErr;
+
   if (Conf.ResolutionFile)
-    writeToResolutionFile(*Conf.ResolutionFile, Input.get(), Res);
+    writeToResolutionFile(*Conf.ResolutionFile, Input, Res);
 
   if (RegularLTO.CombinedModule->getTargetTriple().empty()) {
     Triple InputTriple(Input->getTargetTriple());
@@ -782,6 +790,10 @@ LTO::addModule(InputFile &Input, ArrayRef<SymbolResolution> InputRes,
     LTOMode = LTOK_UnifiedThin;
 
   bool IsThinLTO = LTOInfo->IsThinLTO && (LTOMode != LTOK_UnifiedRegular);
+  // If any of the modules inside of a input bitcode file was compiled with
+  // ThinLTO, we assume that the whole input file also was compiled with
+  // ThinLTO.
+  Input.IsThinLTO = IsThinLTO;
 
   auto ModSyms = Input.module_symbols(ModI);
   addModuleToGlobalRes(ModSyms, Res,
@@ -1193,6 +1205,10 @@ Error LTO::checkPartiallySplit() {
 }
 
 Error LTO::run(AddStreamFn AddStream, FileCache Cache) {
+  if (Dtlto) {
+    if (Error EC = dtlto::process(*this))
+      return EC;
+  }
   // Compute "dead" symbols, we don't want to import/export these!
   DenseSet<GlobalValue::GUID> GUIDPreservedSymbols;
   DenseMap<GlobalValue::GUID, PrevailingType> GUIDPrevailingResolutions;

>From 9afd973128f2c66f97616b70d4fc01d45178a0cb Mon Sep 17 00:00:00 2001
From: Konstantin Belochapka <konstantin.belochapka at sony.com>
Date: Fri, 5 Sep 2025 22:44:40 -0700
Subject: [PATCH 2/5] [DTLTO][ELF][COFF][MachO] Add archive support for DTLTO -
 Fixed compilation error on Linux.

---
 llvm/lib/DTLTO/Dtlto.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/DTLTO/Dtlto.cpp b/llvm/lib/DTLTO/Dtlto.cpp
index 356f30188c5c4..97a15eda81674 100644
--- a/llvm/lib/DTLTO/Dtlto.cpp
+++ b/llvm/lib/DTLTO/Dtlto.cpp
@@ -110,7 +110,7 @@ Expected<bool> isThinArchive(const StringRef ArchivePath) {
   ErrorOr<std::unique_ptr<MemoryBuffer>> MemBufferOrError =
       MemoryBuffer::getFileSlice(ArchivePath, THIN_ARCHIVE_MAGIC.size(), 0);
 
-  if (EC = MemBufferOrError.getError())
+  if ((EC = MemBufferOrError.getError()))
     return createStringError(inconvertibleErrorCode(),
                              "Failed to read from archive %s: %s",
                              ArchivePath.data(), EC.message().c_str());

>From ee4ec950532238d306657df35baecc5e3d5727e1 Mon Sep 17 00:00:00 2001
From: Konstantin Belochapka <konstantin.belochapka at sony.com>
Date: Fri, 5 Sep 2025 22:55:07 -0700
Subject: [PATCH 3/5] [DTLTO][ELF][COFF][MachO] Add archive support for DTLTO -
 Fixed formating errors.

---
 lld/COFF/LTO.cpp                | 3 +++
 lld/ELF/LTO.cpp                 | 4 ++--
 llvm/include/llvm/DTLTO/Dtlto.h | 4 ++--
 llvm/include/llvm/LTO/LTO.h     | 9 ++++++---
 llvm/lib/DTLTO/Dtlto.cpp        | 2 +-
 5 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/lld/COFF/LTO.cpp b/lld/COFF/LTO.cpp
index 1050874a1b10c..d656a54782c97 100644
--- a/lld/COFF/LTO.cpp
+++ b/lld/COFF/LTO.cpp
@@ -132,6 +132,9 @@ BitcodeCompiler::BitcodeCompiler(COFFLinkerContext &c) : ctx(c) {
         llvm::heavyweight_hardware_concurrency(ctx.config.thinLTOJobs));
   }
 
+  if (!ctx.config.dtltoDistributor.empty())
+    ltoObj->Dtlto = true;
+
   ltoObj = std::make_unique<lto::LTO>(createConfig(), backend,
                                       ctx.config.ltoPartitions);
 }
diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp
index 13e9c63495e4d..5a8f9395b7174 100644
--- a/lld/ELF/LTO.cpp
+++ b/lld/ELF/LTO.cpp
@@ -202,8 +202,8 @@ BitcodeCompiler::BitcodeCompiler(Ctx &ctx) : ctx(ctx) {
                                       ctx.arg.ltoPartitions,
                                       ltoModes[ctx.arg.ltoKind]);
 
-  if(!ctx.arg.dtltoDistributor.empty())
-      ltoObj->Dtlto = true;
+  if (!ctx.arg.dtltoDistributor.empty())
+    ltoObj->Dtlto = true;
 
   // Initialize usedStartStop.
   if (ctx.bitcodeFiles.empty())
diff --git a/llvm/include/llvm/DTLTO/Dtlto.h b/llvm/include/llvm/DTLTO/Dtlto.h
index aa6af7d0cd9b7..b908abf83c009 100644
--- a/llvm/include/llvm/DTLTO/Dtlto.h
+++ b/llvm/include/llvm/DTLTO/Dtlto.h
@@ -14,8 +14,8 @@
 
 namespace dtlto {
 
-llvm::Expected<llvm::lto::InputFile*> addInput(llvm::lto::LTO *LtoObj,
-                               std::unique_ptr<llvm::lto::InputFile> Input);
+llvm::Expected<llvm::lto::InputFile *>
+addInput(llvm::lto::LTO *LtoObj, std::unique_ptr<llvm::lto::InputFile> Input);
 
 llvm::Error process(llvm::lto::LTO &LtoObj);
 } // namespace dtlto
diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h
index 7183a4429e665..195e9cc76003b 100644
--- a/llvm/include/llvm/LTO/LTO.h
+++ b/llvm/include/llvm/LTO/LTO.h
@@ -227,9 +227,12 @@ class InputFile {
   bool isThinLTO() const { return IsThinLTO; }
 
   // Store an archive path and a member name.
-  void setArchivePathAndName(StringRef Path, StringRef Name) { ArchivePath = Path; MemberName = Name; }
-  StringRef getArchivePath() const  { return ArchivePath; }
-  StringRef getMemberName() const  { return MemberName; }
+  void setArchivePathAndName(StringRef Path, StringRef Name) {
+    ArchivePath = Path;
+    MemberName = Name;
+  }
+  StringRef getArchivePath() const { return ArchivePath; }
+  StringRef getMemberName() const { return MemberName; }
 
 private:
   ArrayRef<Symbol> module_symbols(unsigned I) const {
diff --git a/llvm/lib/DTLTO/Dtlto.cpp b/llvm/lib/DTLTO/Dtlto.cpp
index 97a15eda81674..ee668d978db14 100644
--- a/llvm/lib/DTLTO/Dtlto.cpp
+++ b/llvm/lib/DTLTO/Dtlto.cpp
@@ -196,7 +196,7 @@ Error saveInputArchiveMember(lto::LTO &LtoObj, lto::InputFile *Input) {
 
 // Iterates through all ThinLTO-enabled input files and saves their content
 // to separate files if they are regular archive members.
-Error saveInputArchiveMembers(lto::LTO& LtoObj) {
+Error saveInputArchiveMembers(lto::LTO &LtoObj) {
   for (auto &Input : LtoObj.InputFiles) {
     if (!Input->isThinLTO())
       continue;

>From 4171a334f022c829b66a8706ee0c98cdf340ece5 Mon Sep 17 00:00:00 2001
From: Konstantin Belochapka <konstantin.belochapka at sony.com>
Date: Mon, 8 Sep 2025 04:36:25 -0700
Subject: [PATCH 4/5] [DTLTO][ELF][COFF][MachO] Add archive support for DTLTO -
 Addressed review comments from Tobias.

---
 cross-project-tests/CMakeLists.txt           | 1 -
 llvm/include/llvm/DTLTO/{Dtlto.h => DTLTO.h} | 2 +-
 llvm/lib/DTLTO/{Dtlto.cpp => DTLTO.cpp}      | 2 +-
 llvm/lib/LTO/LTO.cpp                         | 2 +-
 4 files changed, 3 insertions(+), 4 deletions(-)
 rename llvm/include/llvm/DTLTO/{Dtlto.h => DTLTO.h} (90%)
 rename llvm/lib/DTLTO/{Dtlto.cpp => DTLTO.cpp} (99%)

diff --git a/cross-project-tests/CMakeLists.txt b/cross-project-tests/CMakeLists.txt
index 6753a27698eae..192db87043177 100644
--- a/cross-project-tests/CMakeLists.txt
+++ b/cross-project-tests/CMakeLists.txt
@@ -20,7 +20,6 @@ set(CROSS_PROJECT_TEST_DEPS
   check-gdb-llvm-support
   count
   llvm-ar
-  llvm-ar
   llvm-config
   llvm-dwarfdump
   llvm-objdump
diff --git a/llvm/include/llvm/DTLTO/Dtlto.h b/llvm/include/llvm/DTLTO/DTLTO.h
similarity index 90%
rename from llvm/include/llvm/DTLTO/Dtlto.h
rename to llvm/include/llvm/DTLTO/DTLTO.h
index b908abf83c009..dfbfcf79d7435 100644
--- a/llvm/include/llvm/DTLTO/Dtlto.h
+++ b/llvm/include/llvm/DTLTO/DTLTO.h
@@ -1,4 +1,4 @@
-//===- Dtlto.h - Distributed ThinLTO functions and classes ----*- C++ -*-===//
+//===- DTLTO.h - Distributed ThinLTO functions and classes ----*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/DTLTO/Dtlto.cpp b/llvm/lib/DTLTO/DTLTO.cpp
similarity index 99%
rename from llvm/lib/DTLTO/Dtlto.cpp
rename to llvm/lib/DTLTO/DTLTO.cpp
index ee668d978db14..19ae6385f75cd 100644
--- a/llvm/lib/DTLTO/Dtlto.cpp
+++ b/llvm/lib/DTLTO/DTLTO.cpp
@@ -12,7 +12,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/DTLTO/Dtlto.h"
+#include "llvm/DTLTO/DTLTO.h"
 
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index ca89ed6c24361..894b990a4263b 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -26,7 +26,7 @@
 #include "llvm/CGData/CodeGenData.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/Config/llvm-config.h"
-#include "llvm/DTLTO/Dtlto.h"
+#include "llvm/DTLTO/DTLTO.h"
 #include "llvm/IR/AutoUpgrade.h"
 #include "llvm/IR/DiagnosticPrinter.h"
 #include "llvm/IR/Intrinsics.h"

>From fa7376cef8495747e408730d77d518a971505c92 Mon Sep 17 00:00:00 2001
From: Konstantin Belochapka <konstantin.belochapka at sony.com>
Date: Mon, 8 Sep 2025 05:01:15 -0700
Subject: [PATCH 5/5] [DTLTO][ELF][COFF][MachO] Add archive support for DTLTO -
 Compilation fix.

---
 llvm/lib/DTLTO/CMakeLists.txt | 2 +-
 llvm/lib/LTO/LTO.cpp          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/DTLTO/CMakeLists.txt b/llvm/lib/DTLTO/CMakeLists.txt
index 51fd8aad6f48b..4a35de24c86db 100644
--- a/llvm/lib/DTLTO/CMakeLists.txt
+++ b/llvm/lib/DTLTO/CMakeLists.txt
@@ -1,5 +1,5 @@
 add_llvm_component_library(LLVMDTLTO
-  Dtlto.cpp
+  DTLTO.cpp
 
   LINK_COMPONENTS
   Core
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index 894b990a4263b..7c4bf54cd5fd9 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -735,7 +735,7 @@ static void writeToResolutionFile(raw_ostream &OS, InputFile *Input,
 
 Error LTO::add(std::unique_ptr<InputFile> InputPtr,
                ArrayRef<SymbolResolution> Res) {
-  llvm::TimeTraceScope timeScope("LTO add input", Input->getName());
+  llvm::TimeTraceScope timeScope("LTO add input", InputPtr->getName());
   assert(!CalledGetMaxTasks);
 
   Expected<InputFile *> InputOrErr = dtlto::addInput(this, std::move(InputPtr));



More information about the llvm-commits mailing list