[lld] [llvm] [DTLTO][ELF][COFF][MachO] Add archive support for DTLTO. (PR #157043)

via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 5 01:13:40 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-lld-elf

Author: None (kbelochapka)

<details>
<summary>Changes</summary>

This patch implements support for handling archive members in DTLTO.
 
Unlike ThinLTO, where archive members are passed as in-memory buffers,
DTLTO requires archive members to be materialized as individual files on the filesystem.
This is necessary because DTLTO invokes clang externally, which expects file-based inputs.
To support this, this implementation identifies archive members among the input files,
saves them to the filesystem, and updates their module_id to match their file paths.

---

Patch is 25.21 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/157043.diff


16 Files Affected:

- (modified) cross-project-tests/CMakeLists.txt (+1) 
- (added) cross-project-tests/dtlto/archive.test (+80) 
- (added) cross-project-tests/dtlto/archives-mixed-lto-modes-test.test (+35) 
- (added) cross-project-tests/dtlto/archives-same-module-id.test (+55) 
- (modified) lld/COFF/InputFiles.cpp (+1) 
- (modified) lld/ELF/InputFiles.cpp (+1) 
- (modified) lld/ELF/LTO.cpp (+3) 
- (modified) lld/MachO/InputFiles.cpp (+1) 
- (modified) llvm/include/llvm/Bitcode/BitcodeReader.h (+5) 
- (added) llvm/include/llvm/DTLTO/Dtlto.h (+23) 
- (modified) llvm/include/llvm/LTO/LTO.h (+48) 
- (modified) llvm/lib/CMakeLists.txt (+1) 
- (added) llvm/lib/DTLTO/CMakeLists.txt (+7) 
- (added) llvm/lib/DTLTO/Dtlto.cpp (+226) 
- (modified) llvm/lib/LTO/CMakeLists.txt (+1) 
- (modified) llvm/lib/LTO/LTO.cpp (+18-2) 


``````````diff
diff --git a/cross-project-tests/CMakeLists.txt b/cross-project-tests/CMakeLists.txt
index 192db87043177..6753a27698eae 100644
--- a/cross-project-tests/CMakeLists.txt
+++ b/cross-project-tests/CMakeLists.txt
@@ -20,6 +20,7 @@ set(CROSS_PROJECT_TEST_DEPS
   check-gdb-llvm-support
   count
   llvm-ar
+  llvm-ar
   llvm-config
   llvm-dwarfdump
   llvm-objdump
diff --git a/cross-project-tests/dtlto/archive.test b/cross-project-tests/dtlto/archive.test
new file mode 100644
index 0000000000000..1c01ae7a5691a
--- /dev/null
+++ b/cross-project-tests/dtlto/archive.test
@@ -0,0 +1,80 @@
+REQUIRES: x86-registered-target,ld.lld,llvm-ar
+
+# Test that a DTLTO link succeeds and outputs the expected set of files
+# correctly when archives are present.
+
+RUN: rm -rf %t && split-file %s %t && cd %t
+# Compile sources into bitcode. -O2 is required for cross-module importing.
+RUN: %clang -O2 --target=x86_64-linux-gnu -flto=thin -c foo.c boo.c moo.c loo.c voo.c main.c
+
+RUN: llvm-ar rcs archive.a foo.o boo.o moo.o
+RUN: llvm-ar rcsT archive.thin.a loo.o voo.o
+
+# Build with DTLTO.
+RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin \
+RUN:   -fuse-ld=lld -nostdlib -e main \
+RUN:   main.o archive.a archive.thin.a -o main.elf \
+RUN:   -Wl,--thinlto-distributor=%python \
+RUN:   -Wl,--thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \
+RUN:   -Wl,--thinlto-remote-compiler=%clang \
+RUN:   -Wl,--save-temps
+
+# Check that the required output files have been created.
+RUN: ls | FileCheck %s --check-prefix=OUTPUTS
+
+# JSON jobs description.
+OUTPUTS-DAG:    {{^}}main.[[PID:[0-9]+]].dist-file.json
+
+# Main source.
+OUTPUTS-DAG:    {{^}}main.{{[0-9]+}}.[[PID]].native.o{{$}}
+OUTPUTS-DAG:    {{^}}main.{{[0-9]+}}.[[PID]].native.o.thinlto.bc{{$}}
+
+# Regular archive members.
+# Filename composition: <archive>(<member> at <offset>).<task>.<pid>.<task>.<pid>.native.o[.thinlto.bc].
+OUTPUTS-DAG:    {{^}}archive.a(boo.o at {{[0-9]+}}).2.[[HEXPID:[a-fA-F0-9]+]].2.[[PID]].native.o{{$}}
+OUTPUTS-DAG:    {{^}}archive.a(boo.o at {{[0-9]+}}).2.[[HEXPID]].2.[[PID]].native.o.thinlto.bc{{$}}
+
+OUTPUTS-DAG:    {{^}}archive.a(foo.o at {{[0-9]+}}).3.[[HEXPID]].3.[[PID]].native.o{{$}}
+OUTPUTS-DAG:    {{^}}archive.a(foo.o at {{[0-9]+}}).3.[[HEXPID]].3.[[PID]].native.o.thinlto.bc{{$}}
+
+OUTPUTS-DAG:    {{^}}archive.a(moo.o at {{[0-9]+}}).4.[[HEXPID]].4.[[PID]].native.o{{$}}
+OUTPUTS-DAG:    {{^}}archive.a(moo.o at {{[0-9]+}}).4.[[HEXPID]].4.[[PID]].native.o.thinlto.bc{{$}}
+
+# Thin archive members.
+OUTPUTS-DAG:    {{^}}voo.{{[0-9]+}}.[[PID]].native.o{{$}}
+OUTPUTS-DAG:    {{^}}voo.{{[0-9]+}}.[[PID]].native.o.thinlto.bc{{$}}
+
+OUTPUTS-DAG:    {{^}}loo.{{[0-9]+}}.[[PID]].native.o{{$}}
+OUTPUTS-DAG:    {{^}}loo.{{[0-9]+}}.[[PID]].native.o.thinlto.bc{{$}}
+
+# Executable file.
+OUTPUTS-DAG:    {{^}}main.elf{{$}}
+
+#--- foo.c
+volatile int foo_int;
+__attribute__((retain)) int foo(int x) { return x + foo_int; }
+
+#--- boo.c
+extern int foo(int x);
+__attribute__((retain)) int boo(int x) { return foo(x); }
+
+#--- moo.c
+__attribute__((retain)) int moo() { return 3; }
+
+#--- loo.c
+extern int moo(int x);
+__attribute__((retain)) int loo(int x) { return moo(x); }
+
+#--- voo.c
+extern int foo(int x);
+extern int loo(int x);
+__attribute__((retain)) int voo(int x) { return foo(x) + loo(x + 1) + 7; }
+
+#--- main.c
+extern int boo(int x);
+extern int moo();
+extern int voo(int x);
+__attribute__((retain)) int main(int argc, char** argv) {
+  return boo(argc) + moo() + voo(argc + 3);
+}
+
diff --git a/cross-project-tests/dtlto/archives-mixed-lto-modes-test.test b/cross-project-tests/dtlto/archives-mixed-lto-modes-test.test
new file mode 100644
index 0000000000000..74f146028b4b6
--- /dev/null
+++ b/cross-project-tests/dtlto/archives-mixed-lto-modes-test.test
@@ -0,0 +1,35 @@
+REQUIRES: x86-registered-target,ld.lld,llvm-ar
+
+# Test that DTLTO works with a mixture of FullLTO and ThinLTO bitcode archive members
+# where there is more than one LTO partition.
+
+RUN: rm -rf %t && split-file %s %t && cd %t
+
+RUN: %clang --target=x86_64-linux-gnu -flto -c one.c two.c
+RUN: %clang --target=x86_64-linux-gnu -flto=thin -c three.c
+
+RUN: llvm-ar rc archive.a one.o two.o three.o
+
+# Build with DTLTO.
+RUN: %clang --target=x86_64-linux-gnu -Werror -flto -fuse-ld=lld -nostdlib \
+RUN:   -Wl,--whole-archive archive.a \
+RUN:   -Wl,--thinlto-distributor=%python \
+RUN:   -Wl,--thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \
+RUN:   -Wl,--thinlto-remote-compiler=%clang \
+RUN:   -Wl,--save-temps,--lto-partitions=2
+
+# Show that the FullLTO modules have been prepared for distribution, this is
+# not optimal but has no functional impact.
+RUN: FileCheck %s --input-file=a.out.resolution.txt
+CHECK: archive.a(one.o at {{.*}}).1.[[PID:[a-zA-Z0-9_]+]].o
+CHECK: archive.a(two.o at {{.*}}).2.[[PID]].o
+CHECK: archive.a(three.o at {{.*}}).3.[[PID]].o
+
+#--- one.c
+__attribute__((retain)) void one() {}
+
+#--- two.c
+__attribute__((retain)) void two() {}
+
+#--- three.c
+__attribute__((retain)) void three() {}
diff --git a/cross-project-tests/dtlto/archives-same-module-id.test b/cross-project-tests/dtlto/archives-same-module-id.test
new file mode 100644
index 0000000000000..09d5f7492bfa5
--- /dev/null
+++ b/cross-project-tests/dtlto/archives-same-module-id.test
@@ -0,0 +1,55 @@
+REQUIRES: x86-registered-target,ld.lld,llvm-ar
+
+# Test that a DTLTO link succeeds when there are two archive member files with
+# the same filename path component.
+
+# Split this file into several sources.
+RUN: rm -rf %t && split-file %s %t && cd %t
+
+RUN: %clang -O2 --target=x86_64-linux-gnu -flto=thin -c start.c
+
+# Create first archive.
+RUN: mkdir archive1 && cd archive1
+RUN: %clang -O2 --target=x86_64-linux-gnu -flto=thin -c ../t1.c ../t3.c
+RUN: llvm-ar rc archive.a t3.o t1.o
+RUN: cd ..
+
+# Create second archive.
+RUN: mkdir archive2 && cd archive2
+RUN: %clang -O2 --target=x86_64-linux-gnu -flto=thin -c ../t1.c ../t3.c
+RUN: llvm-ar rc archive.a t3.o t1.o
+RUN: cd ..
+
+RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld \
+RUN:   -nostdlib -Wl,--undefined=t1,--undefined=t3 \
+RUN:    start.o archive1/archive.a archive2/archive.a -o main.elf \
+RUN:   -Wl,--save-temps \
+RUN:   -Wl,--thinlto-distributor=%python \
+RUN:   -Wl,--thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \
+RUN:   -Wl,--thinlto-remote-compiler=%clang
+
+# Check that the required output files have been created.
+RUN: ls | FileCheck %s --check-prefix=OUTPUTS
+
+# JSON jobs description.
+OUTPUTS-DAG:    {{^}}main.[[PID:[0-9]+]].dist-file.json
+
+# Sources.
+OUTPUTS-DAG:    {{^}}start.{{[0-9]+}}.[[PID]].native.o{{$}}
+OUTPUTS-DAG:    {{^}}start.{{[0-9]+}}.[[PID]].native.o.thinlto.bc{{$}}
+
+# Archive members.
+# Filename composition: <archive>(<member> at <offset>).<task>.<pid>.<task>.<pid>.native.o[.thinlto.bc].
+OUTPUTS-DAG:    {{^}}archive.a(t3.o at {{[0-9]+}}).2.[[HEXPID:[a-fA-F0-9]+]].2.[[PID]].native.o{{$}}
+OUTPUTS-DAG:    {{^}}archive.a(t3.o at {{[0-9]+}}).2.[[HEXPID]].2.[[PID]].native.o.thinlto.bc{{$}}
+OUTPUTS-DAG:    {{^}}archive.a(t1.o at {{[0-9]+}}).3.[[HEXPID]].3.[[PID]].native.o{{$}}
+OUTPUTS-DAG:    {{^}}archive.a(t1.o at {{[0-9]+}}).3.[[HEXPID]].3.[[PID]].native.o.thinlto.bc{{$}}
+
+#--- t1.c
+__attribute__((retain)) void t1() { }
+
+#--- start.c
+__attribute__((retain)) void _start() { }
+
+#--- t3.c
+__attribute__((retain)) void t3() { }
diff --git a/lld/COFF/InputFiles.cpp b/lld/COFF/InputFiles.cpp
index c08099b8810bb..d415955b6093b 100644
--- a/lld/COFF/InputFiles.cpp
+++ b/lld/COFF/InputFiles.cpp
@@ -1380,6 +1380,7 @@ BitcodeFile *BitcodeFile::create(COFFLinkerContext &ctx, MemoryBufferRef mb,
                                                utostr(offsetInArchive)));
 
   std::unique_ptr<lto::InputFile> obj = check(lto::InputFile::create(mbref));
+  obj->setArchivePathAndName(archiveName, mb.getBufferIdentifier());
   return make<BitcodeFile>(ctx.getSymtab(getMachineType(obj.get())), mb, obj,
                            lazy);
 }
diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index a5921feb18299..ec0af9d0c0f4e 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -1874,6 +1874,7 @@ BitcodeFile::BitcodeFile(Ctx &ctx, MemoryBufferRef mb, StringRef archiveName,
   MemoryBufferRef mbref(mb.getBuffer(), name);
 
   obj = CHECK2(lto::InputFile::create(mbref), this);
+  obj->setArchivePathAndName(archiveName, mb.getBufferIdentifier());
 
   Triple t(obj->getTargetTriple());
   ekind = getBitcodeELFKind(t);
diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp
index 8d4a6c9e3a81e..13e9c63495e4d 100644
--- a/lld/ELF/LTO.cpp
+++ b/lld/ELF/LTO.cpp
@@ -202,6 +202,9 @@ BitcodeCompiler::BitcodeCompiler(Ctx &ctx) : ctx(ctx) {
                                       ctx.arg.ltoPartitions,
                                       ltoModes[ctx.arg.ltoKind]);
 
+  if(!ctx.arg.dtltoDistributor.empty())
+      ltoObj->Dtlto = true;
+
   // Initialize usedStartStop.
   if (ctx.bitcodeFiles.empty())
     return;
diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 442fc608865d2..8cd4303441a22 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -2361,6 +2361,7 @@ BitcodeFile::BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
                                                sys::path::filename(path) + ")" +
                                                utostr(offsetInArchive)));
   obj = check(lto::InputFile::create(mbref));
+  obj->setArchivePathAndName(archiveName, mb.getBufferIdentifier());
   if (lazy)
     parseLazy();
   else
diff --git a/llvm/include/llvm/Bitcode/BitcodeReader.h b/llvm/include/llvm/Bitcode/BitcodeReader.h
index 4f839d4cd1575..772ca82019278 100644
--- a/llvm/include/llvm/Bitcode/BitcodeReader.h
+++ b/llvm/include/llvm/Bitcode/BitcodeReader.h
@@ -137,6 +137,11 @@ struct ParserCallbacks {
 
     StringRef getModuleIdentifier() const { return ModuleIdentifier; }
 
+    // Assign a new module identifier to this bitcode module.
+    void setModuleIdentifier(llvm::StringRef ModuleId) {
+      ModuleIdentifier = ModuleId;
+    }
+
     /// Read the bitcode module and prepare for lazy deserialization of function
     /// bodies. If ShouldLazyLoadMetadata is true, lazily load metadata as well.
     /// If IsImporting is true, this module is being parsed for ThinLTO
diff --git a/llvm/include/llvm/DTLTO/Dtlto.h b/llvm/include/llvm/DTLTO/Dtlto.h
new file mode 100644
index 0000000000000..aa6af7d0cd9b7
--- /dev/null
+++ b/llvm/include/llvm/DTLTO/Dtlto.h
@@ -0,0 +1,23 @@
+//===- Dtlto.h - Distributed ThinLTO functions and classes ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===---------------------------------------------------------------------===//
+
+#ifndef LLVM_DTLTO_H
+#define LLVM_DTLTO_H
+
+#include "llvm/LTO/LTO.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+namespace dtlto {
+
+llvm::Expected<llvm::lto::InputFile*> addInput(llvm::lto::LTO *LtoObj,
+                               std::unique_ptr<llvm::lto::InputFile> Input);
+
+llvm::Error process(llvm::lto::LTO &LtoObj);
+} // namespace dtlto
+
+#endif // LLVM_DTLTO_H
diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h
index 323c478691a92..7183a4429e665 100644
--- a/llvm/include/llvm/LTO/LTO.h
+++ b/llvm/include/llvm/LTO/LTO.h
@@ -32,6 +32,23 @@
 #include "llvm/Transforms/IPO/FunctionAttrs.h"
 #include "llvm/Transforms/IPO/FunctionImport.h"
 
+namespace llvm {
+namespace lto {
+class LTO;
+}
+} // namespace llvm
+
+namespace dtlto {
+class TempFilesRemover {
+  llvm::lto::LTO *Lto = nullptr;
+
+public:
+  TempFilesRemover(llvm::lto::LTO *LtoObj) : Lto{LtoObj} {}
+  ~TempFilesRemover();
+};
+
+} // namespace dtlto
+
 namespace llvm {
 
 class Error;
@@ -135,6 +152,12 @@ class InputFile {
   std::vector<StringRef> DependentLibraries;
   std::vector<std::pair<StringRef, Comdat::SelectionKind>> ComdatTable;
 
+  MemoryBufferRef MbRef;
+  bool IsMemberOfArchive = false;
+  bool IsThinLTO = false;
+  StringRef ArchivePath;
+  StringRef MemberName;
+
 public:
   LLVM_ABI ~InputFile();
 
@@ -193,6 +216,20 @@ class InputFile {
 
   // Returns the only BitcodeModule from InputFile.
   LLVM_ABI BitcodeModule &getSingleBitcodeModule();
+  // Returns the memory buffer reference for this input file.
+  MemoryBufferRef getFileBuffer() const { return MbRef; }
+  // Returns true if this input file is a member of an archive.
+  bool isMemberOfArchive() const { return IsMemberOfArchive; }
+  // Mark this input file as a member of archive.
+  void memberOfArchive(bool MA) { IsMemberOfArchive = MA; }
+
+  // Returns true if bitcode is ThinLTO.
+  bool isThinLTO() const { return IsThinLTO; }
+
+  // Store an archive path and a member name.
+  void setArchivePathAndName(StringRef Path, StringRef Name) { ArchivePath = Path; MemberName = Name; }
+  StringRef getArchivePath() const  { return ArchivePath; }
+  StringRef getMemberName() const  { return MemberName; }
 
 private:
   ArrayRef<Symbol> module_symbols(unsigned I) const {
@@ -580,6 +617,17 @@ class LTO {
 
   // Diagnostic optimization remarks file
   std::unique_ptr<ToolOutputFile> DiagnosticOutputFile;
+
+public:
+  /// DTLTO mode.
+  bool Dtlto = false;
+
+  BumpPtrAllocator PtrAlloc;
+  StringSaver Saver{PtrAlloc};
+
+  // Array of input bitcode files for LTO.
+  std::vector<std::unique_ptr<llvm::lto::InputFile>> InputFiles;
+  std::unique_ptr<dtlto::TempFilesRemover> TempsRemover;
 };
 
 /// The resolution for a symbol. The linker must provide a SymbolResolution for
diff --git a/llvm/lib/CMakeLists.txt b/llvm/lib/CMakeLists.txt
index a9432977718c6..0856af9058fef 100644
--- a/llvm/lib/CMakeLists.txt
+++ b/llvm/lib/CMakeLists.txt
@@ -22,6 +22,7 @@ add_subdirectory(Frontend)
 add_subdirectory(Transforms)
 add_subdirectory(Linker)
 add_subdirectory(Analysis)
+add_subdirectory(DTLTO)
 add_subdirectory(LTO)
 add_subdirectory(MC)
 add_subdirectory(MCA)
diff --git a/llvm/lib/DTLTO/CMakeLists.txt b/llvm/lib/DTLTO/CMakeLists.txt
new file mode 100644
index 0000000000000..51fd8aad6f48b
--- /dev/null
+++ b/llvm/lib/DTLTO/CMakeLists.txt
@@ -0,0 +1,7 @@
+add_llvm_component_library(LLVMDTLTO
+  Dtlto.cpp
+
+  LINK_COMPONENTS
+  Core
+  Support
+  )
diff --git a/llvm/lib/DTLTO/Dtlto.cpp b/llvm/lib/DTLTO/Dtlto.cpp
new file mode 100644
index 0000000000000..356f30188c5c4
--- /dev/null
+++ b/llvm/lib/DTLTO/Dtlto.cpp
@@ -0,0 +1,226 @@
+//===- Dtlto.cpp - Distributed ThinLTO implementation --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// \file
+// This file implements support functions for Distributed ThinLTO, focusing on
+// archive file handling.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DTLTO/Dtlto.h"
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/Magic.h"
+#include "llvm/LTO/LTO.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBufferRef.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <iostream>
+#include <string>
+
+using namespace llvm;
+
+namespace dtlto {
+
+// Removes any temporary regular archive member files that were created during
+// processing.
+TempFilesRemover::~TempFilesRemover() {
+  if (!Lto)
+    return;
+  for (auto &Input : Lto->InputFiles) {
+    if (Input->isMemberOfArchive())
+      sys::fs::remove(Input->getName(), /*IgnoreNonExisting=*/true);
+  }
+}
+
+// Writes the content of a memory buffer into a file.
+static llvm::Error saveBuffer(StringRef FileBuffer, StringRef FilePath) {
+  std::error_code EC;
+  raw_fd_ostream OS(FilePath.str(), EC, sys::fs::OpenFlags::OF_None);
+  if (EC) {
+    return createStringError(inconvertibleErrorCode(),
+                             "Failed to create file %s: %s", FilePath.data(),
+                             EC.message().c_str());
+  }
+  OS.write(FileBuffer.data(), FileBuffer.size());
+  if (OS.has_error()) {
+    return createStringError(inconvertibleErrorCode(),
+                             "Failed writing to file %s", FilePath.data());
+  }
+  return Error::success();
+}
+
+// Compute the file path for a thin archive member.
+//
+// For thin archives, an archive member name is typically a file path relative
+// to the archive file's directory. This function resolves that path.
+SmallString<64> computeThinArchiveMemberPath(const StringRef ArchivePath,
+                                             const StringRef MemberName) {
+  assert(!ArchivePath.empty() && "An archive file path must be non empty.");
+  SmallString<64> MemberPath;
+  if (sys::path::is_relative(MemberName)) {
+    MemberPath = sys::path::parent_path(ArchivePath);
+    sys::path::append(MemberPath, MemberName);
+  } else
+    MemberPath = MemberName;
+  sys::path::remove_dots(MemberPath, /*remove_dot_dot=*/true);
+  return MemberPath;
+}
+
+// Magic string identifying thin archive files.
+static constexpr StringLiteral THIN_ARCHIVE_MAGIC = "!<thin>\n";
+
+// Determines if a file at the given path is a thin archive file.
+//
+// This function uses a cache to avoid repeatedly reading the same file.
+// It reads only the header portion (magic bytes) of the file to identify
+// the archive type.
+Expected<bool> isThinArchive(const StringRef ArchivePath) {
+  static StringMap<bool> ArchiveFiles;
+
+  // Return cached result if available.
+  auto Cached = ArchiveFiles.find(ArchivePath);
+  if (Cached != ArchiveFiles.end())
+    return Cached->second;
+
+  uint64_t FileSize = -1;
+  bool IsThin = false;
+  std::error_code EC = sys::fs::file_size(ArchivePath, FileSize);
+  if (EC)
+    return createStringError(inconvertibleErrorCode(),
+                             "Failed to get file size from archive %s: %s",
+                             ArchivePath.data(), EC.message().c_str());
+  if (FileSize < THIN_ARCHIVE_MAGIC.size())
+    return createStringError(inconvertibleErrorCode(),
+                             "Archive file size is too small %s",
+                             ArchivePath.data());
+
+  // Read only the first few bytes containing the magic signature.
+  ErrorOr<std::unique_ptr<MemoryBuffer>> MemBufferOrError =
+      MemoryBuffer::getFileSlice(ArchivePath, THIN_ARCHIVE_MAGIC.size(), 0);
+
+  if (EC = MemBufferOrError.getError())
+    return createStringError(inconvertibleErrorCode(),
+                             "Failed to read from archive %s: %s",
+                             ArchivePath.data(), EC.message().c_str());
+
+  StringRef MemBuf = (*MemBufferOrError.get()).getBuffer();
+  if (file_magic::archive != identify_magic(MemBuf))
+    return createStringError(inconvertibleErrorCode(),
+                             "Unknown format for archive %s",
+                             ArchivePath.data());
+
+  IsThin = MemBuf.starts_with(THIN_ARCHIVE_MAGIC);
+
+  // Cache the result
+  ArchiveFiles[ArchivePath] = IsThin;
+  return IsThin;
+}
+
+// This function performs the following tasks:
+// 1. Adds the input file to the LTO object's list of input files.
+// 2. For thin archive members, generates a new module ID which is a path to a
+// thin archive member file.
+// 3. For regular archive members, generates a new unique module ID.
+// 4. Updates the bitcode module's identifier.
+Expected<lto::InputFile *> addInput(lto::LTO *LtoObj,
+                                    std::unique_ptr<lto::InputFile> InputPtr) {
+
+  // Add the input file to the LTO object.
+  LtoObj->InputFiles.push_back(std::move(InputPtr));
+  lto::InputFile *Input = LtoObj->InputFiles.back().get();
+
+  // Skip processing if not in DTLTO mode.
+  if (!LtoObj->Dtlto)
+    return Input;
+
+  StringRef ModuleId = Input->getName();
+  StringRef ArchivePath = Input->getArchivePath();
+
+  // Only process archive members.
+  if (ArchivePath.empty())
+    return Input;
+
+  SmallString<64> NewModuleId;
+  BitcodeMod...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/157043


More information about the llvm-commits mailing list