[llvm] 8b57b97 - [DTLTO][NFC] Minor improvements to the input file preparation class (#180824)

via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 11 01:02:25 PST 2026


Author: Ben Dunbobbin
Date: 2026-02-11T09:02:19Z
New Revision: 8b57b9730222cdc098d6a8708099e5b4986b1e2c

URL: https://github.com/llvm/llvm-project/commit/8b57b9730222cdc098d6a8708099e5b4986b1e2c
DIFF: https://github.com/llvm/llvm-project/commit/8b57b9730222cdc098d6a8708099e5b4986b1e2c.diff

LOG: [DTLTO][NFC] Minor improvements to the input file preparation class (#180824)

This change performs a small set of NFC refactors to improve clarity. In
particular, we make it clear that the responsibilities of this class now
extend beyond its original archive-handling role.

Added: 
    

Modified: 
    llvm/include/llvm/DTLTO/DTLTO.h
    llvm/include/llvm/LTO/LTO.h
    llvm/lib/DTLTO/DTLTO.cpp
    llvm/lib/LTO/LTO.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/DTLTO/DTLTO.h b/llvm/include/llvm/DTLTO/DTLTO.h
index 02b098a68aec5..e80577aa12834 100644
--- a/llvm/include/llvm/DTLTO/DTLTO.h
+++ b/llvm/include/llvm/DTLTO/DTLTO.h
@@ -6,8 +6,8 @@
 //
 //===---------------------------------------------------------------------===//
 
-#ifndef LLVM_DTLTO_H
-#define LLVM_DTLTO_H
+#ifndef LLVM_DTLTO_DTLTO_H
+#define LLVM_DTLTO_DTLTO_H
 
 #include "llvm/LTO/LTO.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -15,6 +15,20 @@
 namespace llvm {
 namespace lto {
 
+// The purpose of this class is to prepare inputs so that distributed ThinLTO
+// backend compilations can succeed.
+//
+// For distributed compilation, each input must exist as an individual bitcode
+// file on disk and be loadable via its ModuleID. This requirement is not met
+// for archive members, as an archive is a collection of files rather than a
+// standalone file. Similarly, for FatLTO objects, the bitcode is stored in a
+// section of the containing ELF object file. To address this, the class ensures
+// that an individual bitcode file exists for each input (by writing it out if
+// necessary) and that the ModuleID is updated to point to it.
+//
+// The class ensures that lto::InputFile objects are preserved until enough of
+// the LTO pipeline has executed to determine the required per-module
+// information, such as whether a module will participate in ThinLTO.
 class DTLTO : public LTO {
   using Base = LTO;
 
@@ -33,7 +47,10 @@ class DTLTO : public LTO {
   addInput(std::unique_ptr<InputFile> InputPtr) override;
 
 protected:
-  LLVM_ABI llvm::Error handleArchiveInputs() override;
+  // Save the contents of ThinLTO-enabled input files that must be serialized
+  // for distribution, such as archive members and FatLTO objects, to individual
+  // bitcode files named after the module ID.
+  LLVM_ABI llvm::Error serializeInputsForDistribution() override;
 
   LLVM_ABI void cleanup() override;
 
@@ -48,24 +65,17 @@ class DTLTO : public LTO {
   /// Controls preservation of any created temporary files.
   bool SaveTemps;
 
-  // Determines if a file at the given path is a thin archive file.
-  Expected<bool> isThinArchive(const StringRef ArchivePath);
-
-  // Write the archive member content to a file named after the module ID.
-  Error saveInputArchiveMember(lto::InputFile *Input);
-
-  // Iterates through all input files and saves their content
-  // to files if they are regular archive members.
-  Error saveInputArchiveMembers();
-
   // Array of input bitcode files for LTO.
   std::vector<std::shared_ptr<lto::InputFile>> InputFiles;
 
-  // A cache to avoid repeatedly reading the same archive file.
-  StringMap<bool> ArchiveFiles;
+  // Cache of whether a path refers to a thin archive.
+  StringMap<bool> ArchiveIsThinCache;
+
+  // Determines if the file at the given path is a thin archive.
+  Expected<bool> isThinArchive(const StringRef ArchivePath);
 };
 
 } // namespace lto
 } // namespace llvm
 
-#endif // LLVM_DTLTO_H
+#endif // LLVM_DTLTO_DTLTO_H

diff  --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h
index f992be9899e3d..fe8ef19759404 100644
--- a/llvm/include/llvm/LTO/LTO.h
+++ b/llvm/include/llvm/LTO/LTO.h
@@ -465,7 +465,7 @@ class LTO {
 
 protected:
   // Called at the start of run().
-  virtual Error handleArchiveInputs() { return Error::success(); }
+  virtual Error serializeInputsForDistribution() { return Error::success(); }
 
   // Called before returning from run().
   virtual void cleanup() {}

diff  --git a/llvm/lib/DTLTO/DTLTO.cpp b/llvm/lib/DTLTO/DTLTO.cpp
index 4a1107e76e47b..92b5bb362ba4c 100644
--- a/llvm/lib/DTLTO/DTLTO.cpp
+++ b/llvm/lib/DTLTO/DTLTO.cpp
@@ -8,7 +8,7 @@
 //
 // \file
 // This file implements support functions for Distributed ThinLTO, focusing on
-// archive file handling.
+// preparing input files for distribution.
 //
 //===----------------------------------------------------------------------===//
 
@@ -34,31 +34,35 @@ using namespace llvm;
 
 namespace {
 
-// Writes the content of a memory buffer into a file.
-llvm::Error saveBuffer(StringRef FileBuffer, StringRef FilePath) {
+// Saves the content of Buffer to Path overwriting any existing file.
+Error save(StringRef Buffer, StringRef Path) {
   std::error_code EC;
-  raw_fd_ostream OS(FilePath.str(), EC, sys::fs::OpenFlags::OF_None);
-  if (EC) {
+  raw_fd_ostream OS(Path.str(), EC, sys::fs::OpenFlags::OF_None);
+  if (EC)
     return createStringError(inconvertibleErrorCode(),
-                             "Failed to create file %s: %s", FilePath.data(),
+                             "Failed to create file %s: %s", Path.data(),
                              EC.message().c_str());
-  }
-  OS.write(FileBuffer.data(), FileBuffer.size());
-  if (OS.has_error()) {
+  OS.write(Buffer.data(), Buffer.size());
+  if (OS.has_error())
     return createStringError(inconvertibleErrorCode(),
-                             "Failed writing to file %s", FilePath.data());
-  }
+                             "Failed writing to file %s", Path.data());
   return Error::success();
 }
 
+// Saves the content of Input to Path overwriting any existing file.
+Error save(lto::InputFile *Input, StringRef Path) {
+  MemoryBufferRef MB = Input->getFileBuffer();
+  return save(MB.getBuffer(), Path);
+}
+
 // Compute the file path for a thin archive member.
 //
 // For thin archives, an archive member name is typically a file path relative
 // to the archive file's directory. This function resolves that path.
-SmallString<64> computeThinArchiveMemberPath(const StringRef ArchivePath,
-                                             const StringRef MemberName) {
+SmallString<256> computeThinArchiveMemberPath(StringRef ArchivePath,
+                                              StringRef MemberName) {
   assert(!ArchivePath.empty() && "An archive file path must be non empty.");
-  SmallString<64> MemberPath;
+  SmallString<256> MemberPath;
   if (sys::path::is_relative(MemberName)) {
     MemberPath = sys::path::parent_path(ArchivePath);
     sys::path::append(MemberPath, MemberName);
@@ -77,12 +81,11 @@ SmallString<64> computeThinArchiveMemberPath(const StringRef ArchivePath,
 // the archive type.
 Expected<bool> lto::DTLTO::isThinArchive(const StringRef ArchivePath) {
   // Return cached result if available.
-  auto Cached = ArchiveFiles.find(ArchivePath);
-  if (Cached != ArchiveFiles.end())
+  auto Cached = ArchiveIsThinCache.find(ArchivePath);
+  if (Cached != ArchiveIsThinCache.end())
     return Cached->second;
 
   uint64_t FileSize = -1;
-  bool IsThin = false;
   std::error_code EC = sys::fs::file_size(ArchivePath, FileSize);
   if (EC)
     return createStringError(inconvertibleErrorCode(),
@@ -94,43 +97,45 @@ Expected<bool> lto::DTLTO::isThinArchive(const StringRef ArchivePath) {
                              ArchivePath.data());
 
   // Read only the first few bytes containing the magic signature.
-  ErrorOr<std::unique_ptr<MemoryBuffer>> MemBufferOrError =
-      MemoryBuffer::getFileSlice(ArchivePath, sizeof(object::ThinArchiveMagic),
-                                 0);
-
-  if ((EC = MemBufferOrError.getError()))
+  ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr = MemoryBuffer::getFileSlice(
+      ArchivePath, sizeof(object::ThinArchiveMagic), 0);
+  if ((EC = MBOrErr.getError()))
     return createStringError(inconvertibleErrorCode(),
                              "Failed to read from archive %s: %s",
                              ArchivePath.data(), EC.message().c_str());
 
-  StringRef MemBuf = (*MemBufferOrError.get()).getBuffer();
-  if (file_magic::archive != identify_magic(MemBuf))
+  StringRef Buf = (*MBOrErr)->getBuffer();
+  if (file_magic::archive != identify_magic(Buf))
     return createStringError(inconvertibleErrorCode(),
                              "Unknown format for archive %s",
                              ArchivePath.data());
 
-  IsThin = MemBuf.starts_with(object::ThinArchiveMagic);
+  bool IsThin = Buf.starts_with(object::ThinArchiveMagic);
+
+  // Cache the result.
+  ArchiveIsThinCache[ArchivePath] = IsThin;
 
-  // Cache the result
-  ArchiveFiles[ArchivePath] = IsThin;
   return IsThin;
 }
 
+// Add an input file and prepare it for distribution.
+//
 // This function performs the following tasks:
-// 1. Adds the input file to the LTO object's list of input files.
-// 2. For thin archive members, generates a new module ID which is a path to a
-// thin archive member file.
-// 3. For regular archive members, generates a new unique module ID.
-// 4. Updates the bitcode module's identifier.
+// 1. Add the input file to the LTO object's list of input files.
+// 2. For thin archive members, overwrite the module ID with the path to the
+//    member file on disk.
+// 3. For archive members and FatLTO objects, overwrite the module ID with a
+//    unique path naming a file that will contain the member content. The file
+//    is created and populated later (see serializeInputs()).
 Expected<std::shared_ptr<lto::InputFile>>
-lto::DTLTO::addInput(std::unique_ptr<lto::InputFile> InputPtr) {
+lto::DTLTO::addInput(std::unique_ptr<InputFile> InputPtr) {
   TimeTraceScope TimeScope("Add input for DTLTO");
 
   // Add the input file to the LTO object.
   InputFiles.emplace_back(InputPtr.release());
-  std::shared_ptr<lto::InputFile> &Input = InputFiles.back();
+  auto &Input = InputFiles.back();
+  BitcodeModule &BM = Input->getPrimaryBitcodeModule();
 
-  StringRef ModuleId = Input->getName();
   StringRef ArchivePath = Input->getArchivePath();
 
   // In most cases, the module ID already points to an individual bitcode file
@@ -138,82 +143,60 @@ lto::DTLTO::addInput(std::unique_ptr<lto::InputFile> InputPtr) {
   if (ArchivePath.empty() && !Input->isFatLTOObject())
     return Input;
 
-  SmallString<64> NewModuleId;
-  BitcodeModule &BM = Input->getPrimaryBitcodeModule();
-
   // For a member of a thin archive that is not a FatLTO object, there is an
   // existing file on disk that can be used, so we can avoid having to
-  // materialize.
+  // serialize.
   Expected<bool> UseThinMember =
       Input->isFatLTOObject() ? false : isThinArchive(ArchivePath);
   if (!UseThinMember)
     return UseThinMember.takeError();
-
   if (*UseThinMember) {
-    // For thin archives, use the path to the actual file.
-    NewModuleId =
+    // For thin archives, use the path to the actual member file on disk.
+    auto MemberPath =
         computeThinArchiveMemberPath(ArchivePath, Input->getMemberName());
-  } else {
-    // For regular archives and FatLTO objects, generate a unique name.
-    Input->setSerializeForDistribution(true);
-
-    // Create unique identifier using process ID and sequence number.
-    std::string PID = utohexstr(sys::Process::getProcessId());
-    std::string Seq = std::to_string(InputFiles.size());
-
-    NewModuleId = sys::path::parent_path(LinkerOutputFile);
-    sys::path::append(NewModuleId, sys::path::filename(ModuleId) + "." + Seq +
-                                       "." + PID + ".o");
+    BM.setModuleIdentifier(Saver.save(MemberPath.str()));
+    return Input;
   }
 
-  // Update the module identifier and save it.
-  BM.setModuleIdentifier(Saver.save(NewModuleId.str()));
-
+  // A new file on disk will be needed for archive members and FatLTO objects.
+  Input->setSerializeForDistribution(true);
+
+  // Create a unique path by including the process ID and sequence number in the
+  // filename.
+  SmallString<256> Id(sys::path::parent_path(LinkerOutputFile));
+  sys::path::append(Id,
+                    Twine(sys::path::filename(Input->getName())) + "." +
+                        std::to_string(InputFiles.size()) /*Sequence number*/ +
+                        "." + utohexstr(sys::Process::getProcessId()) + ".o");
+  BM.setModuleIdentifier(Saver.save(Id.str()));
   return Input;
 }
 
-// Write the archive member content to a file named after the module ID.
-// If a file with that name already exists, it's likely a leftover from a
-// previously terminated linker process and can be safely overwritten.
-Error lto::DTLTO::saveInputArchiveMember(lto::InputFile *Input) {
-  StringRef ModuleId = Input->getName();
-  if (Input->getSerializeForDistribution()) {
+// Save the contents of ThinLTO-enabled input files that must be serialized for
+// distribution, such as archive members and FatLTO objects, to individual
+// bitcode files named after the module ID.
+//
+// Must be called after all input files are added but before optimization
+// begins. If a file with that name already exists, it is likely a leftover from
+// a previously terminated linker process and can be safely overwritten.
+llvm::Error lto::DTLTO::serializeInputsForDistribution() {
+  for (auto &Input : InputFiles) {
+    if (!Input->isThinLTO() || !Input->getSerializeForDistribution())
+      continue;
+    // Save the content of the input file to a file named after the module ID.
+    StringRef ModuleId = Input->getName();
     TimeTraceScope TimeScope("Serialize bitcode input for DTLTO", ModuleId);
     // Cleanup this file on abnormal process exit.
     if (!SaveTemps)
       llvm::sys::RemoveFileOnSignal(ModuleId);
-    MemoryBufferRef MemoryBufferRef = Input->getFileBuffer();
-    if (Error EC = saveBuffer(MemoryBufferRef.getBuffer(), ModuleId))
+    if (Error EC = save(Input.get(), ModuleId))
       return EC;
   }
-  return Error::success();
-}
-
-// Iterates through all ThinLTO-enabled input files and saves their content
-// to separate files if they are regular archive members.
-Error lto::DTLTO::saveInputArchiveMembers() {
-  for (auto &Input : InputFiles) {
-    if (!Input->isThinLTO())
-      continue;
-    if (Error EC = saveInputArchiveMember(Input.get()))
-      return EC;
-  }
-  return Error::success();
-}
-
-// Entry point for DTLTO archives support.
-//
-// Sets up the temporary file remover and processes archive members.
-// Must be called after all inputs are added but before optimization begins.
-llvm::Error lto::DTLTO::handleArchiveInputs() {
 
-  // Process and save archive members to separate files if needed.
-  if (Error EC = saveInputArchiveMembers())
-    return EC;
   return Error::success();
 }
 
-// Remove temporary archive member files created to enable distribution.
+// Remove serialized inputs created to enable distribution.
 void lto::DTLTO::cleanup() {
   if (!SaveTemps) {
     TimeTraceScope TimeScope("Remove temporary inputs for DTLTO");

diff  --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index bbbdfcbd71f2b..749af6eda3fdb 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -1229,7 +1229,7 @@ Error LTO::checkPartiallySplit() {
 Error LTO::run(AddStreamFn AddStream, FileCache Cache) {
   llvm::scope_exit CleanUp([this]() { cleanup(); });
 
-  if (Error EC = handleArchiveInputs())
+  if (Error EC = serializeInputsForDistribution())
     return EC;
 
   // Compute "dead" symbols, we don't want to import/export these!


        


More information about the llvm-commits mailing list