[lld] [llvm] [DTLTO][ELF][COFF][MachO] Add archive support for DTLTO. (PR #157043)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 8 05:01:39 PDT 2025
https://github.com/kbelochapka updated https://github.com/llvm/llvm-project/pull/157043
>From 3a92171ca70f2f689797088f20f3b45f84021df4 Mon Sep 17 00:00:00 2001
From: Konstantin Belochapka <konstantin.belochapka at sony.com>
Date: Sun, 24 Aug 2025 22:35:14 -0700
Subject: [PATCH 1/5] [DTLTO][ELF][COFF][MachO] Add archive support for DTLTO.
This patch implements support for handling archive members in DTLTO.
Unlike ThinLTO, where archive members are passed as in-memory buffers,
DTLTO requires archive members to be materialized as individual files on the filesystem.
This is necessary because DTLTO invokes clang externally, which expects file-based inputs.
To support this, this implementation identifies archive members among the input files,
saves them to the filesystem, and updates their module_id to match their file paths.
---
cross-project-tests/CMakeLists.txt | 1 +
cross-project-tests/dtlto/archive.test | 80 +++++++
.../dtlto/archives-mixed-lto-modes-test.test | 35 +++
.../dtlto/archives-same-module-id.test | 55 +++++
lld/COFF/InputFiles.cpp | 1 +
lld/ELF/InputFiles.cpp | 1 +
lld/ELF/LTO.cpp | 3 +
lld/MachO/InputFiles.cpp | 1 +
llvm/include/llvm/Bitcode/BitcodeReader.h | 5 +
llvm/include/llvm/DTLTO/Dtlto.h | 23 ++
llvm/include/llvm/LTO/LTO.h | 48 ++++
llvm/lib/CMakeLists.txt | 1 +
llvm/lib/DTLTO/CMakeLists.txt | 7 +
llvm/lib/DTLTO/Dtlto.cpp | 226 ++++++++++++++++++
llvm/lib/LTO/CMakeLists.txt | 1 +
llvm/lib/LTO/LTO.cpp | 20 +-
16 files changed, 506 insertions(+), 2 deletions(-)
create mode 100644 cross-project-tests/dtlto/archive.test
create mode 100644 cross-project-tests/dtlto/archives-mixed-lto-modes-test.test
create mode 100644 cross-project-tests/dtlto/archives-same-module-id.test
create mode 100644 llvm/include/llvm/DTLTO/Dtlto.h
create mode 100644 llvm/lib/DTLTO/CMakeLists.txt
create mode 100644 llvm/lib/DTLTO/Dtlto.cpp
diff --git a/cross-project-tests/CMakeLists.txt b/cross-project-tests/CMakeLists.txt
index 192db87043177..6753a27698eae 100644
--- a/cross-project-tests/CMakeLists.txt
+++ b/cross-project-tests/CMakeLists.txt
@@ -20,6 +20,7 @@ set(CROSS_PROJECT_TEST_DEPS
check-gdb-llvm-support
count
llvm-ar
+ llvm-ar
llvm-config
llvm-dwarfdump
llvm-objdump
diff --git a/cross-project-tests/dtlto/archive.test b/cross-project-tests/dtlto/archive.test
new file mode 100644
index 0000000000000..1c01ae7a5691a
--- /dev/null
+++ b/cross-project-tests/dtlto/archive.test
@@ -0,0 +1,80 @@
+REQUIRES: x86-registered-target,ld.lld,llvm-ar
+
+# Test that a DTLTO link succeeds and outputs the expected set of files
+# correctly when archives are present.
+
+RUN: rm -rf %t && split-file %s %t && cd %t
+# Compile sources into bitcode. -O2 is required for cross-module importing.
+RUN: %clang -O2 --target=x86_64-linux-gnu -flto=thin -c foo.c boo.c moo.c loo.c voo.c main.c
+
+RUN: llvm-ar rcs archive.a foo.o boo.o moo.o
+RUN: llvm-ar rcsT archive.thin.a loo.o voo.o
+
+# Build with DTLTO.
+RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin \
+RUN: -fuse-ld=lld -nostdlib -e main \
+RUN: main.o archive.a archive.thin.a -o main.elf \
+RUN: -Wl,--thinlto-distributor=%python \
+RUN: -Wl,--thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \
+RUN: -Wl,--thinlto-remote-compiler=%clang \
+RUN: -Wl,--save-temps
+
+# Check that the required output files have been created.
+RUN: ls | FileCheck %s --check-prefix=OUTPUTS
+
+# JSON jobs description.
+OUTPUTS-DAG: {{^}}main.[[PID:[0-9]+]].dist-file.json
+
+# Main source.
+OUTPUTS-DAG: {{^}}main.{{[0-9]+}}.[[PID]].native.o{{$}}
+OUTPUTS-DAG: {{^}}main.{{[0-9]+}}.[[PID]].native.o.thinlto.bc{{$}}
+
+# Regular archive members.
+# Filename composition: <archive>(<member> at <offset>).<task>.<pid>.<task>.<pid>.native.o[.thinlto.bc].
+OUTPUTS-DAG: {{^}}archive.a(boo.o at {{[0-9]+}}).2.[[HEXPID:[a-fA-F0-9]+]].2.[[PID]].native.o{{$}}
+OUTPUTS-DAG: {{^}}archive.a(boo.o at {{[0-9]+}}).2.[[HEXPID]].2.[[PID]].native.o.thinlto.bc{{$}}
+
+OUTPUTS-DAG: {{^}}archive.a(foo.o at {{[0-9]+}}).3.[[HEXPID]].3.[[PID]].native.o{{$}}
+OUTPUTS-DAG: {{^}}archive.a(foo.o at {{[0-9]+}}).3.[[HEXPID]].3.[[PID]].native.o.thinlto.bc{{$}}
+
+OUTPUTS-DAG: {{^}}archive.a(moo.o at {{[0-9]+}}).4.[[HEXPID]].4.[[PID]].native.o{{$}}
+OUTPUTS-DAG: {{^}}archive.a(moo.o at {{[0-9]+}}).4.[[HEXPID]].4.[[PID]].native.o.thinlto.bc{{$}}
+
+# Thin archive members.
+OUTPUTS-DAG: {{^}}voo.{{[0-9]+}}.[[PID]].native.o{{$}}
+OUTPUTS-DAG: {{^}}voo.{{[0-9]+}}.[[PID]].native.o.thinlto.bc{{$}}
+
+OUTPUTS-DAG: {{^}}loo.{{[0-9]+}}.[[PID]].native.o{{$}}
+OUTPUTS-DAG: {{^}}loo.{{[0-9]+}}.[[PID]].native.o.thinlto.bc{{$}}
+
+# Executable file.
+OUTPUTS-DAG: {{^}}main.elf{{$}}
+
+#--- foo.c
+volatile int foo_int;
+__attribute__((retain)) int foo(int x) { return x + foo_int; }
+
+#--- boo.c
+extern int foo(int x);
+__attribute__((retain)) int boo(int x) { return foo(x); }
+
+#--- moo.c
+__attribute__((retain)) int moo() { return 3; }
+
+#--- loo.c
+extern int moo(int x);
+__attribute__((retain)) int loo(int x) { return moo(x); }
+
+#--- voo.c
+extern int foo(int x);
+extern int loo(int x);
+__attribute__((retain)) int voo(int x) { return foo(x) + loo(x + 1) + 7; }
+
+#--- main.c
+extern int boo(int x);
+extern int moo();
+extern int voo(int x);
+__attribute__((retain)) int main(int argc, char** argv) {
+ return boo(argc) + moo() + voo(argc + 3);
+}
+
diff --git a/cross-project-tests/dtlto/archives-mixed-lto-modes-test.test b/cross-project-tests/dtlto/archives-mixed-lto-modes-test.test
new file mode 100644
index 0000000000000..74f146028b4b6
--- /dev/null
+++ b/cross-project-tests/dtlto/archives-mixed-lto-modes-test.test
@@ -0,0 +1,35 @@
+REQUIRES: x86-registered-target,ld.lld,llvm-ar
+
+# Test that DTLTO works with a mixture of FullLTO and ThinLTO bitcode archive members
+# where there is more than one LTO partition.
+
+RUN: rm -rf %t && split-file %s %t && cd %t
+
+RUN: %clang --target=x86_64-linux-gnu -flto -c one.c two.c
+RUN: %clang --target=x86_64-linux-gnu -flto=thin -c three.c
+
+RUN: llvm-ar rc archive.a one.o two.o three.o
+
+# Build with DTLTO.
+RUN: %clang --target=x86_64-linux-gnu -Werror -flto -fuse-ld=lld -nostdlib \
+RUN: -Wl,--whole-archive archive.a \
+RUN: -Wl,--thinlto-distributor=%python \
+RUN: -Wl,--thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \
+RUN: -Wl,--thinlto-remote-compiler=%clang \
+RUN: -Wl,--save-temps,--lto-partitions=2
+
+# Show that the FullLTO modules have been prepared for distribution, this is
+# not optimal but has no functional impact.
+RUN: FileCheck %s --input-file=a.out.resolution.txt
+CHECK: archive.a(one.o at {{.*}}).1.[[PID:[a-zA-Z0-9_]+]].o
+CHECK: archive.a(two.o at {{.*}}).2.[[PID]].o
+CHECK: archive.a(three.o at {{.*}}).3.[[PID]].o
+
+#--- one.c
+__attribute__((retain)) void one() {}
+
+#--- two.c
+__attribute__((retain)) void two() {}
+
+#--- three.c
+__attribute__((retain)) void three() {}
diff --git a/cross-project-tests/dtlto/archives-same-module-id.test b/cross-project-tests/dtlto/archives-same-module-id.test
new file mode 100644
index 0000000000000..09d5f7492bfa5
--- /dev/null
+++ b/cross-project-tests/dtlto/archives-same-module-id.test
@@ -0,0 +1,55 @@
+REQUIRES: x86-registered-target,ld.lld,llvm-ar
+
+# Test that a DTLTO link succeeds when there are two archive member files with
+# the same filename path component.
+
+# Split this file into several sources.
+RUN: rm -rf %t && split-file %s %t && cd %t
+
+RUN: %clang -O2 --target=x86_64-linux-gnu -flto=thin -c start.c
+
+# Create first archive.
+RUN: mkdir archive1 && cd archive1
+RUN: %clang -O2 --target=x86_64-linux-gnu -flto=thin -c ../t1.c ../t3.c
+RUN: llvm-ar rc archive.a t3.o t1.o
+RUN: cd ..
+
+# Create second archive.
+RUN: mkdir archive2 && cd archive2
+RUN: %clang -O2 --target=x86_64-linux-gnu -flto=thin -c ../t1.c ../t3.c
+RUN: llvm-ar rc archive.a t3.o t1.o
+RUN: cd ..
+
+RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld \
+RUN: -nostdlib -Wl,--undefined=t1,--undefined=t3 \
+RUN: start.o archive1/archive.a archive2/archive.a -o main.elf \
+RUN: -Wl,--save-temps \
+RUN: -Wl,--thinlto-distributor=%python \
+RUN: -Wl,--thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \
+RUN: -Wl,--thinlto-remote-compiler=%clang
+
+# Check that the required output files have been created.
+RUN: ls | FileCheck %s --check-prefix=OUTPUTS
+
+# JSON jobs description.
+OUTPUTS-DAG: {{^}}main.[[PID:[0-9]+]].dist-file.json
+
+# Sources.
+OUTPUTS-DAG: {{^}}start.{{[0-9]+}}.[[PID]].native.o{{$}}
+OUTPUTS-DAG: {{^}}start.{{[0-9]+}}.[[PID]].native.o.thinlto.bc{{$}}
+
+# Archive members.
+# Filename composition: <archive>(<member> at <offset>).<task>.<pid>.<task>.<pid>.native.o[.thinlto.bc].
+OUTPUTS-DAG: {{^}}archive.a(t3.o at {{[0-9]+}}).2.[[HEXPID:[a-fA-F0-9]+]].2.[[PID]].native.o{{$}}
+OUTPUTS-DAG: {{^}}archive.a(t3.o at {{[0-9]+}}).2.[[HEXPID]].2.[[PID]].native.o.thinlto.bc{{$}}
+OUTPUTS-DAG: {{^}}archive.a(t1.o at {{[0-9]+}}).3.[[HEXPID]].3.[[PID]].native.o{{$}}
+OUTPUTS-DAG: {{^}}archive.a(t1.o at {{[0-9]+}}).3.[[HEXPID]].3.[[PID]].native.o.thinlto.bc{{$}}
+
+#--- t1.c
+__attribute__((retain)) void t1() { }
+
+#--- start.c
+__attribute__((retain)) void _start() { }
+
+#--- t3.c
+__attribute__((retain)) void t3() { }
diff --git a/lld/COFF/InputFiles.cpp b/lld/COFF/InputFiles.cpp
index c08099b8810bb..d415955b6093b 100644
--- a/lld/COFF/InputFiles.cpp
+++ b/lld/COFF/InputFiles.cpp
@@ -1380,6 +1380,7 @@ BitcodeFile *BitcodeFile::create(COFFLinkerContext &ctx, MemoryBufferRef mb,
utostr(offsetInArchive)));
std::unique_ptr<lto::InputFile> obj = check(lto::InputFile::create(mbref));
+ obj->setArchivePathAndName(archiveName, mb.getBufferIdentifier());
return make<BitcodeFile>(ctx.getSymtab(getMachineType(obj.get())), mb, obj,
lazy);
}
diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index a5921feb18299..ec0af9d0c0f4e 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -1874,6 +1874,7 @@ BitcodeFile::BitcodeFile(Ctx &ctx, MemoryBufferRef mb, StringRef archiveName,
MemoryBufferRef mbref(mb.getBuffer(), name);
obj = CHECK2(lto::InputFile::create(mbref), this);
+ obj->setArchivePathAndName(archiveName, mb.getBufferIdentifier());
Triple t(obj->getTargetTriple());
ekind = getBitcodeELFKind(t);
diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp
index 8d4a6c9e3a81e..13e9c63495e4d 100644
--- a/lld/ELF/LTO.cpp
+++ b/lld/ELF/LTO.cpp
@@ -202,6 +202,9 @@ BitcodeCompiler::BitcodeCompiler(Ctx &ctx) : ctx(ctx) {
ctx.arg.ltoPartitions,
ltoModes[ctx.arg.ltoKind]);
+ if(!ctx.arg.dtltoDistributor.empty())
+ ltoObj->Dtlto = true;
+
// Initialize usedStartStop.
if (ctx.bitcodeFiles.empty())
return;
diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 3b3023a94166f..bb40fcfb7701f 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -2360,6 +2360,7 @@ BitcodeFile::BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
sys::path::filename(path) + ")" +
utostr(offsetInArchive)));
obj = check(lto::InputFile::create(mbref));
+ obj->setArchivePathAndName(archiveName, mb.getBufferIdentifier());
if (lazy)
parseLazy();
else
diff --git a/llvm/include/llvm/Bitcode/BitcodeReader.h b/llvm/include/llvm/Bitcode/BitcodeReader.h
index 4f839d4cd1575..772ca82019278 100644
--- a/llvm/include/llvm/Bitcode/BitcodeReader.h
+++ b/llvm/include/llvm/Bitcode/BitcodeReader.h
@@ -137,6 +137,11 @@ struct ParserCallbacks {
StringRef getModuleIdentifier() const { return ModuleIdentifier; }
+ // Assign a new module identifier to this bitcode module.
+ void setModuleIdentifier(llvm::StringRef ModuleId) {
+ ModuleIdentifier = ModuleId;
+ }
+
/// Read the bitcode module and prepare for lazy deserialization of function
/// bodies. If ShouldLazyLoadMetadata is true, lazily load metadata as well.
/// If IsImporting is true, this module is being parsed for ThinLTO
diff --git a/llvm/include/llvm/DTLTO/Dtlto.h b/llvm/include/llvm/DTLTO/Dtlto.h
new file mode 100644
index 0000000000000..aa6af7d0cd9b7
--- /dev/null
+++ b/llvm/include/llvm/DTLTO/Dtlto.h
@@ -0,0 +1,23 @@
+//===- Dtlto.h - Distributed ThinLTO functions and classes ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===---------------------------------------------------------------------===//
+
+#ifndef LLVM_DTLTO_H
+#define LLVM_DTLTO_H
+
+#include "llvm/LTO/LTO.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+namespace dtlto {
+
+llvm::Expected<llvm::lto::InputFile*> addInput(llvm::lto::LTO *LtoObj,
+ std::unique_ptr<llvm::lto::InputFile> Input);
+
+llvm::Error process(llvm::lto::LTO &LtoObj);
+} // namespace dtlto
+
+#endif // LLVM_DTLTO_H
diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h
index 323c478691a92..7183a4429e665 100644
--- a/llvm/include/llvm/LTO/LTO.h
+++ b/llvm/include/llvm/LTO/LTO.h
@@ -32,6 +32,23 @@
#include "llvm/Transforms/IPO/FunctionAttrs.h"
#include "llvm/Transforms/IPO/FunctionImport.h"
+namespace llvm {
+namespace lto {
+class LTO;
+}
+} // namespace llvm
+
+namespace dtlto {
+class TempFilesRemover {
+ llvm::lto::LTO *Lto = nullptr;
+
+public:
+ TempFilesRemover(llvm::lto::LTO *LtoObj) : Lto{LtoObj} {}
+ ~TempFilesRemover();
+};
+
+} // namespace dtlto
+
namespace llvm {
class Error;
@@ -135,6 +152,12 @@ class InputFile {
std::vector<StringRef> DependentLibraries;
std::vector<std::pair<StringRef, Comdat::SelectionKind>> ComdatTable;
+ MemoryBufferRef MbRef;
+ bool IsMemberOfArchive = false;
+ bool IsThinLTO = false;
+ StringRef ArchivePath;
+ StringRef MemberName;
+
public:
LLVM_ABI ~InputFile();
@@ -193,6 +216,20 @@ class InputFile {
// Returns the only BitcodeModule from InputFile.
LLVM_ABI BitcodeModule &getSingleBitcodeModule();
+ // Returns the memory buffer reference for this input file.
+ MemoryBufferRef getFileBuffer() const { return MbRef; }
+ // Returns true if this input file is a member of an archive.
+ bool isMemberOfArchive() const { return IsMemberOfArchive; }
+ // Mark this input file as a member of archive.
+ void memberOfArchive(bool MA) { IsMemberOfArchive = MA; }
+
+ // Returns true if bitcode is ThinLTO.
+ bool isThinLTO() const { return IsThinLTO; }
+
+ // Store an archive path and a member name.
+ void setArchivePathAndName(StringRef Path, StringRef Name) { ArchivePath = Path; MemberName = Name; }
+ StringRef getArchivePath() const { return ArchivePath; }
+ StringRef getMemberName() const { return MemberName; }
private:
ArrayRef<Symbol> module_symbols(unsigned I) const {
@@ -580,6 +617,17 @@ class LTO {
// Diagnostic optimization remarks file
std::unique_ptr<ToolOutputFile> DiagnosticOutputFile;
+
+public:
+ /// DTLTO mode.
+ bool Dtlto = false;
+
+ BumpPtrAllocator PtrAlloc;
+ StringSaver Saver{PtrAlloc};
+
+ // Array of input bitcode files for LTO.
+ std::vector<std::unique_ptr<llvm::lto::InputFile>> InputFiles;
+ std::unique_ptr<dtlto::TempFilesRemover> TempsRemover;
};
/// The resolution for a symbol. The linker must provide a SymbolResolution for
diff --git a/llvm/lib/CMakeLists.txt b/llvm/lib/CMakeLists.txt
index a9432977718c6..0856af9058fef 100644
--- a/llvm/lib/CMakeLists.txt
+++ b/llvm/lib/CMakeLists.txt
@@ -22,6 +22,7 @@ add_subdirectory(Frontend)
add_subdirectory(Transforms)
add_subdirectory(Linker)
add_subdirectory(Analysis)
+add_subdirectory(DTLTO)
add_subdirectory(LTO)
add_subdirectory(MC)
add_subdirectory(MCA)
diff --git a/llvm/lib/DTLTO/CMakeLists.txt b/llvm/lib/DTLTO/CMakeLists.txt
new file mode 100644
index 0000000000000..51fd8aad6f48b
--- /dev/null
+++ b/llvm/lib/DTLTO/CMakeLists.txt
@@ -0,0 +1,7 @@
+add_llvm_component_library(LLVMDTLTO
+ Dtlto.cpp
+
+ LINK_COMPONENTS
+ Core
+ Support
+ )
diff --git a/llvm/lib/DTLTO/Dtlto.cpp b/llvm/lib/DTLTO/Dtlto.cpp
new file mode 100644
index 0000000000000..356f30188c5c4
--- /dev/null
+++ b/llvm/lib/DTLTO/Dtlto.cpp
@@ -0,0 +1,226 @@
+//===- Dtlto.cpp - Distributed ThinLTO implementation --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// \file
+// This file implements support functions for Distributed ThinLTO, focusing on
+// archive file handling.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DTLTO/Dtlto.h"
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/Magic.h"
+#include "llvm/LTO/LTO.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBufferRef.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <iostream>
+#include <string>
+
+using namespace llvm;
+
+namespace dtlto {
+
+// Removes any temporary regular archive member files that were created during
+// processing.
+TempFilesRemover::~TempFilesRemover() {
+ if (!Lto)
+ return;
+ for (auto &Input : Lto->InputFiles) {
+ if (Input->isMemberOfArchive())
+ sys::fs::remove(Input->getName(), /*IgnoreNonExisting=*/true);
+ }
+}
+
+// Writes the content of a memory buffer into a file.
+static llvm::Error saveBuffer(StringRef FileBuffer, StringRef FilePath) {
+ std::error_code EC;
+ raw_fd_ostream OS(FilePath.str(), EC, sys::fs::OpenFlags::OF_None);
+ if (EC) {
+ return createStringError(inconvertibleErrorCode(),
+ "Failed to create file %s: %s", FilePath.data(),
+ EC.message().c_str());
+ }
+ OS.write(FileBuffer.data(), FileBuffer.size());
+ if (OS.has_error()) {
+ return createStringError(inconvertibleErrorCode(),
+ "Failed writing to file %s", FilePath.data());
+ }
+ return Error::success();
+}
+
+// Compute the file path for a thin archive member.
+//
+// For thin archives, an archive member name is typically a file path relative
+// to the archive file's directory. This function resolves that path.
+SmallString<64> computeThinArchiveMemberPath(const StringRef ArchivePath,
+ const StringRef MemberName) {
+ assert(!ArchivePath.empty() && "An archive file path must be non empty.");
+ SmallString<64> MemberPath;
+ if (sys::path::is_relative(MemberName)) {
+ MemberPath = sys::path::parent_path(ArchivePath);
+ sys::path::append(MemberPath, MemberName);
+ } else
+ MemberPath = MemberName;
+ sys::path::remove_dots(MemberPath, /*remove_dot_dot=*/true);
+ return MemberPath;
+}
+
+// Magic string identifying thin archive files.
+static constexpr StringLiteral THIN_ARCHIVE_MAGIC = "!<thin>\n";
+
+// Determines if a file at the given path is a thin archive file.
+//
+// This function uses a cache to avoid repeatedly reading the same file.
+// It reads only the header portion (magic bytes) of the file to identify
+// the archive type.
+Expected<bool> isThinArchive(const StringRef ArchivePath) {
+ static StringMap<bool> ArchiveFiles;
+
+ // Return cached result if available.
+ auto Cached = ArchiveFiles.find(ArchivePath);
+ if (Cached != ArchiveFiles.end())
+ return Cached->second;
+
+ uint64_t FileSize = -1;
+ bool IsThin = false;
+ std::error_code EC = sys::fs::file_size(ArchivePath, FileSize);
+ if (EC)
+ return createStringError(inconvertibleErrorCode(),
+ "Failed to get file size from archive %s: %s",
+ ArchivePath.data(), EC.message().c_str());
+ if (FileSize < THIN_ARCHIVE_MAGIC.size())
+ return createStringError(inconvertibleErrorCode(),
+ "Archive file size is too small %s",
+ ArchivePath.data());
+
+ // Read only the first few bytes containing the magic signature.
+ ErrorOr<std::unique_ptr<MemoryBuffer>> MemBufferOrError =
+ MemoryBuffer::getFileSlice(ArchivePath, THIN_ARCHIVE_MAGIC.size(), 0);
+
+ if (EC = MemBufferOrError.getError())
+ return createStringError(inconvertibleErrorCode(),
+ "Failed to read from archive %s: %s",
+ ArchivePath.data(), EC.message().c_str());
+
+ StringRef MemBuf = (*MemBufferOrError.get()).getBuffer();
+ if (file_magic::archive != identify_magic(MemBuf))
+ return createStringError(inconvertibleErrorCode(),
+ "Unknown format for archive %s",
+ ArchivePath.data());
+
+ IsThin = MemBuf.starts_with(THIN_ARCHIVE_MAGIC);
+
+ // Cache the result
+ ArchiveFiles[ArchivePath] = IsThin;
+ return IsThin;
+}
+
+// This function performs the following tasks:
+// 1. Adds the input file to the LTO object's list of input files.
+// 2. For thin archive members, generates a new module ID which is a path to a
+// thin archive member file.
+// 3. For regular archive members, generates a new unique module ID.
+// 4. Updates the bitcode module's identifier.
+Expected<lto::InputFile *> addInput(lto::LTO *LtoObj,
+ std::unique_ptr<lto::InputFile> InputPtr) {
+
+ // Add the input file to the LTO object.
+ LtoObj->InputFiles.push_back(std::move(InputPtr));
+ lto::InputFile *Input = LtoObj->InputFiles.back().get();
+
+ // Skip processing if not in DTLTO mode.
+ if (!LtoObj->Dtlto)
+ return Input;
+
+ StringRef ModuleId = Input->getName();
+ StringRef ArchivePath = Input->getArchivePath();
+
+ // Only process archive members.
+ if (ArchivePath.empty())
+ return Input;
+
+ SmallString<64> NewModuleId;
+ BitcodeModule &BM = Input->getSingleBitcodeModule();
+
+ // Check if the archive is a thin archive.
+ Expected<bool> IsThin = isThinArchive(ArchivePath);
+ if (!IsThin)
+ return IsThin.takeError();
+
+ if (*IsThin) {
+ // For thin archives, use the path to the actual file.
+ NewModuleId =
+ computeThinArchiveMemberPath(ArchivePath, Input->getMemberName());
+ } else {
+ // For regular archives, generate a unique name.
+ Input->memberOfArchive(true);
+
+ // Create unique identifier using process ID and sequence number.
+ std::string PID = utohexstr(sys::Process::getProcessId());
+ std::string Seq = std::to_string(LtoObj->InputFiles.size());
+
+ NewModuleId = {sys::path::filename(ModuleId), ".", Seq, ".", PID, ".o"};
+ }
+
+ // Update the module identifier and save it.
+ BM.setModuleIdentifier(LtoObj->Saver.save(NewModuleId.str()));
+
+ return Input;
+}
+
+// Write the archive member content to a file named after the module ID.
+// If a file with that name already exists, it's likely a leftover from a
+// previously terminated linker process and can be safely overwritten.
+Error saveInputArchiveMember(lto::LTO &LtoObj, lto::InputFile *Input) {
+ StringRef ModuleId = Input->getName();
+ if (Input->isMemberOfArchive()) {
+ MemoryBufferRef MemoryBufferRef = Input->getFileBuffer();
+ if (Error EC = saveBuffer(MemoryBufferRef.getBuffer(), ModuleId))
+ return EC;
+ }
+ return Error::success();
+}
+
+// Iterates through all ThinLTO-enabled input files and saves their content
+// to separate files if they are regular archive members.
+Error saveInputArchiveMembers(lto::LTO& LtoObj) {
+ for (auto &Input : LtoObj.InputFiles) {
+ if (!Input->isThinLTO())
+ continue;
+ if (Error EC = saveInputArchiveMember(LtoObj, Input.get()))
+ return EC;
+ }
+ return Error::success();
+}
+
+// Entry point for DTLTO archives support.
+//
+// Sets up the temporary file remover and processes archive members.
+// Must be called after all inputs are added but before optimization begins.
+llvm::Error process(llvm::lto::LTO &LtoObj) {
+ if (!LtoObj.Dtlto)
+ return Error::success();
+
+ // Set up cleanup handler for temporary files
+ LtoObj.TempsRemover = std::make_unique<TempFilesRemover>(&LtoObj);
+
+ // Process and save archive members to separate files if needed.
+ if (Error EC = saveInputArchiveMembers(LtoObj))
+ return EC;
+ return Error::success();
+}
+
+} // namespace dtlto
diff --git a/llvm/lib/LTO/CMakeLists.txt b/llvm/lib/LTO/CMakeLists.txt
index 057d73b6349cf..499623eacf97c 100644
--- a/llvm/lib/LTO/CMakeLists.txt
+++ b/llvm/lib/LTO/CMakeLists.txt
@@ -25,6 +25,7 @@ add_llvm_component_library(LLVMLTO
CodeGen
CodeGenTypes
Core
+ DTLTO
Extensions
IPO
InstCombine
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index 35d24c17bbd93..d0a7eceb38614 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -26,6 +26,7 @@
#include "llvm/CGData/CodeGenData.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/Config/llvm-config.h"
+#include "llvm/DTLTO/Dtlto.h"
#include "llvm/IR/AutoUpgrade.h"
#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/IR/Intrinsics.h"
@@ -570,6 +571,8 @@ Expected<std::unique_ptr<InputFile>> InputFile::create(MemoryBufferRef Object) {
File->COFFLinkerOpts = FOrErr->TheReader.getCOFFLinkerOpts();
File->DependentLibraries = FOrErr->TheReader.getDependentLibraries();
File->ComdatTable = FOrErr->TheReader.getComdatTable();
+ File->MbRef =
+ Object; // Save a memory buffer reference to an input file object.
for (unsigned I = 0; I != FOrErr->Mods.size(); ++I) {
size_t Begin = File->Symbols.size();
@@ -729,12 +732,17 @@ static void writeToResolutionFile(raw_ostream &OS, InputFile *Input,
assert(ResI == Res.end());
}
-Error LTO::add(std::unique_ptr<InputFile> Input,
+Error LTO::add(std::unique_ptr<InputFile> InputPtr,
ArrayRef<SymbolResolution> Res) {
assert(!CalledGetMaxTasks);
+ Expected<InputFile *> InputOrErr = dtlto::addInput(this, std::move(InputPtr));
+ if (!InputOrErr)
+ return InputOrErr.takeError();
+ InputFile *Input = *InputOrErr;
+
if (Conf.ResolutionFile)
- writeToResolutionFile(*Conf.ResolutionFile, Input.get(), Res);
+ writeToResolutionFile(*Conf.ResolutionFile, Input, Res);
if (RegularLTO.CombinedModule->getTargetTriple().empty()) {
Triple InputTriple(Input->getTargetTriple());
@@ -782,6 +790,10 @@ LTO::addModule(InputFile &Input, ArrayRef<SymbolResolution> InputRes,
LTOMode = LTOK_UnifiedThin;
bool IsThinLTO = LTOInfo->IsThinLTO && (LTOMode != LTOK_UnifiedRegular);
+ // If any of the modules inside of a input bitcode file was compiled with
+ // ThinLTO, we assume that the whole input file also was compiled with
+ // ThinLTO.
+ Input.IsThinLTO = IsThinLTO;
auto ModSyms = Input.module_symbols(ModI);
addModuleToGlobalRes(ModSyms, Res,
@@ -1193,6 +1205,10 @@ Error LTO::checkPartiallySplit() {
}
Error LTO::run(AddStreamFn AddStream, FileCache Cache) {
+ if (Dtlto) {
+ if (Error EC = dtlto::process(*this))
+ return EC;
+ }
// Compute "dead" symbols, we don't want to import/export these!
DenseSet<GlobalValue::GUID> GUIDPreservedSymbols;
DenseMap<GlobalValue::GUID, PrevailingType> GUIDPrevailingResolutions;
>From 9afd973128f2c66f97616b70d4fc01d45178a0cb Mon Sep 17 00:00:00 2001
From: Konstantin Belochapka <konstantin.belochapka at sony.com>
Date: Fri, 5 Sep 2025 22:44:40 -0700
Subject: [PATCH 2/5] [DTLTO][ELF][COFF][MachO] Add archive support for DTLTO -
Fixed compilation error on Linux.
---
llvm/lib/DTLTO/Dtlto.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/DTLTO/Dtlto.cpp b/llvm/lib/DTLTO/Dtlto.cpp
index 356f30188c5c4..97a15eda81674 100644
--- a/llvm/lib/DTLTO/Dtlto.cpp
+++ b/llvm/lib/DTLTO/Dtlto.cpp
@@ -110,7 +110,7 @@ Expected<bool> isThinArchive(const StringRef ArchivePath) {
ErrorOr<std::unique_ptr<MemoryBuffer>> MemBufferOrError =
MemoryBuffer::getFileSlice(ArchivePath, THIN_ARCHIVE_MAGIC.size(), 0);
- if (EC = MemBufferOrError.getError())
+ if ((EC = MemBufferOrError.getError()))
return createStringError(inconvertibleErrorCode(),
"Failed to read from archive %s: %s",
ArchivePath.data(), EC.message().c_str());
>From ee4ec950532238d306657df35baecc5e3d5727e1 Mon Sep 17 00:00:00 2001
From: Konstantin Belochapka <konstantin.belochapka at sony.com>
Date: Fri, 5 Sep 2025 22:55:07 -0700
Subject: [PATCH 3/5] [DTLTO][ELF][COFF][MachO] Add archive support for DTLTO -
Fixed formating errors.
---
lld/COFF/LTO.cpp | 3 +++
lld/ELF/LTO.cpp | 4 ++--
llvm/include/llvm/DTLTO/Dtlto.h | 4 ++--
llvm/include/llvm/LTO/LTO.h | 9 ++++++---
llvm/lib/DTLTO/Dtlto.cpp | 2 +-
5 files changed, 14 insertions(+), 8 deletions(-)
diff --git a/lld/COFF/LTO.cpp b/lld/COFF/LTO.cpp
index 1050874a1b10c..d656a54782c97 100644
--- a/lld/COFF/LTO.cpp
+++ b/lld/COFF/LTO.cpp
@@ -132,6 +132,9 @@ BitcodeCompiler::BitcodeCompiler(COFFLinkerContext &c) : ctx(c) {
llvm::heavyweight_hardware_concurrency(ctx.config.thinLTOJobs));
}
+ if (!ctx.config.dtltoDistributor.empty())
+ ltoObj->Dtlto = true;
+
ltoObj = std::make_unique<lto::LTO>(createConfig(), backend,
ctx.config.ltoPartitions);
}
diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp
index 13e9c63495e4d..5a8f9395b7174 100644
--- a/lld/ELF/LTO.cpp
+++ b/lld/ELF/LTO.cpp
@@ -202,8 +202,8 @@ BitcodeCompiler::BitcodeCompiler(Ctx &ctx) : ctx(ctx) {
ctx.arg.ltoPartitions,
ltoModes[ctx.arg.ltoKind]);
- if(!ctx.arg.dtltoDistributor.empty())
- ltoObj->Dtlto = true;
+ if (!ctx.arg.dtltoDistributor.empty())
+ ltoObj->Dtlto = true;
// Initialize usedStartStop.
if (ctx.bitcodeFiles.empty())
diff --git a/llvm/include/llvm/DTLTO/Dtlto.h b/llvm/include/llvm/DTLTO/Dtlto.h
index aa6af7d0cd9b7..b908abf83c009 100644
--- a/llvm/include/llvm/DTLTO/Dtlto.h
+++ b/llvm/include/llvm/DTLTO/Dtlto.h
@@ -14,8 +14,8 @@
namespace dtlto {
-llvm::Expected<llvm::lto::InputFile*> addInput(llvm::lto::LTO *LtoObj,
- std::unique_ptr<llvm::lto::InputFile> Input);
+llvm::Expected<llvm::lto::InputFile *>
+addInput(llvm::lto::LTO *LtoObj, std::unique_ptr<llvm::lto::InputFile> Input);
llvm::Error process(llvm::lto::LTO &LtoObj);
} // namespace dtlto
diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h
index 7183a4429e665..195e9cc76003b 100644
--- a/llvm/include/llvm/LTO/LTO.h
+++ b/llvm/include/llvm/LTO/LTO.h
@@ -227,9 +227,12 @@ class InputFile {
bool isThinLTO() const { return IsThinLTO; }
// Store an archive path and a member name.
- void setArchivePathAndName(StringRef Path, StringRef Name) { ArchivePath = Path; MemberName = Name; }
- StringRef getArchivePath() const { return ArchivePath; }
- StringRef getMemberName() const { return MemberName; }
+ void setArchivePathAndName(StringRef Path, StringRef Name) {
+ ArchivePath = Path;
+ MemberName = Name;
+ }
+ StringRef getArchivePath() const { return ArchivePath; }
+ StringRef getMemberName() const { return MemberName; }
private:
ArrayRef<Symbol> module_symbols(unsigned I) const {
diff --git a/llvm/lib/DTLTO/Dtlto.cpp b/llvm/lib/DTLTO/Dtlto.cpp
index 97a15eda81674..ee668d978db14 100644
--- a/llvm/lib/DTLTO/Dtlto.cpp
+++ b/llvm/lib/DTLTO/Dtlto.cpp
@@ -196,7 +196,7 @@ Error saveInputArchiveMember(lto::LTO &LtoObj, lto::InputFile *Input) {
// Iterates through all ThinLTO-enabled input files and saves their content
// to separate files if they are regular archive members.
-Error saveInputArchiveMembers(lto::LTO& LtoObj) {
+Error saveInputArchiveMembers(lto::LTO &LtoObj) {
for (auto &Input : LtoObj.InputFiles) {
if (!Input->isThinLTO())
continue;
>From 4171a334f022c829b66a8706ee0c98cdf340ece5 Mon Sep 17 00:00:00 2001
From: Konstantin Belochapka <konstantin.belochapka at sony.com>
Date: Mon, 8 Sep 2025 04:36:25 -0700
Subject: [PATCH 4/5] [DTLTO][ELF][COFF][MachO] Add archive support for DTLTO -
Addressed review comments from Tobias.
---
cross-project-tests/CMakeLists.txt | 1 -
llvm/include/llvm/DTLTO/{Dtlto.h => DTLTO.h} | 2 +-
llvm/lib/DTLTO/{Dtlto.cpp => DTLTO.cpp} | 2 +-
llvm/lib/LTO/LTO.cpp | 2 +-
4 files changed, 3 insertions(+), 4 deletions(-)
rename llvm/include/llvm/DTLTO/{Dtlto.h => DTLTO.h} (90%)
rename llvm/lib/DTLTO/{Dtlto.cpp => DTLTO.cpp} (99%)
diff --git a/cross-project-tests/CMakeLists.txt b/cross-project-tests/CMakeLists.txt
index 6753a27698eae..192db87043177 100644
--- a/cross-project-tests/CMakeLists.txt
+++ b/cross-project-tests/CMakeLists.txt
@@ -20,7 +20,6 @@ set(CROSS_PROJECT_TEST_DEPS
check-gdb-llvm-support
count
llvm-ar
- llvm-ar
llvm-config
llvm-dwarfdump
llvm-objdump
diff --git a/llvm/include/llvm/DTLTO/Dtlto.h b/llvm/include/llvm/DTLTO/DTLTO.h
similarity index 90%
rename from llvm/include/llvm/DTLTO/Dtlto.h
rename to llvm/include/llvm/DTLTO/DTLTO.h
index b908abf83c009..dfbfcf79d7435 100644
--- a/llvm/include/llvm/DTLTO/Dtlto.h
+++ b/llvm/include/llvm/DTLTO/DTLTO.h
@@ -1,4 +1,4 @@
-//===- Dtlto.h - Distributed ThinLTO functions and classes ----*- C++ -*-===//
+//===- DTLTO.h - Distributed ThinLTO functions and classes ----*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/DTLTO/Dtlto.cpp b/llvm/lib/DTLTO/DTLTO.cpp
similarity index 99%
rename from llvm/lib/DTLTO/Dtlto.cpp
rename to llvm/lib/DTLTO/DTLTO.cpp
index ee668d978db14..19ae6385f75cd 100644
--- a/llvm/lib/DTLTO/Dtlto.cpp
+++ b/llvm/lib/DTLTO/DTLTO.cpp
@@ -12,7 +12,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/DTLTO/Dtlto.h"
+#include "llvm/DTLTO/DTLTO.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index ca89ed6c24361..894b990a4263b 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -26,7 +26,7 @@
#include "llvm/CGData/CodeGenData.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/Config/llvm-config.h"
-#include "llvm/DTLTO/Dtlto.h"
+#include "llvm/DTLTO/DTLTO.h"
#include "llvm/IR/AutoUpgrade.h"
#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/IR/Intrinsics.h"
>From fa7376cef8495747e408730d77d518a971505c92 Mon Sep 17 00:00:00 2001
From: Konstantin Belochapka <konstantin.belochapka at sony.com>
Date: Mon, 8 Sep 2025 05:01:15 -0700
Subject: [PATCH 5/5] [DTLTO][ELF][COFF][MachO] Add archive support for DTLTO -
Compilation fix.
---
llvm/lib/DTLTO/CMakeLists.txt | 2 +-
llvm/lib/LTO/LTO.cpp | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/DTLTO/CMakeLists.txt b/llvm/lib/DTLTO/CMakeLists.txt
index 51fd8aad6f48b..4a35de24c86db 100644
--- a/llvm/lib/DTLTO/CMakeLists.txt
+++ b/llvm/lib/DTLTO/CMakeLists.txt
@@ -1,5 +1,5 @@
add_llvm_component_library(LLVMDTLTO
- Dtlto.cpp
+ DTLTO.cpp
LINK_COMPONENTS
Core
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index 894b990a4263b..7c4bf54cd5fd9 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -735,7 +735,7 @@ static void writeToResolutionFile(raw_ostream &OS, InputFile *Input,
Error LTO::add(std::unique_ptr<InputFile> InputPtr,
ArrayRef<SymbolResolution> Res) {
- llvm::TimeTraceScope timeScope("LTO add input", Input->getName());
+ llvm::TimeTraceScope timeScope("LTO add input", InputPtr->getName());
assert(!CalledGetMaxTasks);
Expected<InputFile *> InputOrErr = dtlto::addInput(this, std::move(InputPtr));
More information about the llvm-commits
mailing list