[llvm] 8b57b97 - [DTLTO][NFC] Minor improvements to the input file preparation class (#180824)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 11 01:02:25 PST 2026
Author: Ben Dunbobbin
Date: 2026-02-11T09:02:19Z
New Revision: 8b57b9730222cdc098d6a8708099e5b4986b1e2c
URL: https://github.com/llvm/llvm-project/commit/8b57b9730222cdc098d6a8708099e5b4986b1e2c
DIFF: https://github.com/llvm/llvm-project/commit/8b57b9730222cdc098d6a8708099e5b4986b1e2c.diff
LOG: [DTLTO][NFC] Minor improvements to the input file preparation class (#180824)
This change performs a small set of NFC refactors to improve clarity. In
particular, we make it clear that the responsibilities of this class now
extend beyond its original archive-handling role.
Added:
Modified:
llvm/include/llvm/DTLTO/DTLTO.h
llvm/include/llvm/LTO/LTO.h
llvm/lib/DTLTO/DTLTO.cpp
llvm/lib/LTO/LTO.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/DTLTO/DTLTO.h b/llvm/include/llvm/DTLTO/DTLTO.h
index 02b098a68aec5..e80577aa12834 100644
--- a/llvm/include/llvm/DTLTO/DTLTO.h
+++ b/llvm/include/llvm/DTLTO/DTLTO.h
@@ -6,8 +6,8 @@
//
//===---------------------------------------------------------------------===//
-#ifndef LLVM_DTLTO_H
-#define LLVM_DTLTO_H
+#ifndef LLVM_DTLTO_DTLTO_H
+#define LLVM_DTLTO_DTLTO_H
#include "llvm/LTO/LTO.h"
#include "llvm/Support/MemoryBuffer.h"
@@ -15,6 +15,20 @@
namespace llvm {
namespace lto {
+// The purpose of this class is to prepare inputs so that distributed ThinLTO
+// backend compilations can succeed.
+//
+// For distributed compilation, each input must exist as an individual bitcode
+// file on disk and be loadable via its ModuleID. This requirement is not met
+// for archive members, as an archive is a collection of files rather than a
+// standalone file. Similarly, for FatLTO objects, the bitcode is stored in a
+// section of the containing ELF object file. To address this, the class ensures
+// that an individual bitcode file exists for each input (by writing it out if
+// necessary) and that the ModuleID is updated to point to it.
+//
+// The class ensures that lto::InputFile objects are preserved until enough of
+// the LTO pipeline has executed to determine the required per-module
+// information, such as whether a module will participate in ThinLTO.
class DTLTO : public LTO {
using Base = LTO;
@@ -33,7 +47,10 @@ class DTLTO : public LTO {
addInput(std::unique_ptr<InputFile> InputPtr) override;
protected:
- LLVM_ABI llvm::Error handleArchiveInputs() override;
+ // Save the contents of ThinLTO-enabled input files that must be serialized
+ // for distribution, such as archive members and FatLTO objects, to individual
+ // bitcode files named after the module ID.
+ LLVM_ABI llvm::Error serializeInputsForDistribution() override;
LLVM_ABI void cleanup() override;
@@ -48,24 +65,17 @@ class DTLTO : public LTO {
/// Controls preservation of any created temporary files.
bool SaveTemps;
- // Determines if a file at the given path is a thin archive file.
- Expected<bool> isThinArchive(const StringRef ArchivePath);
-
- // Write the archive member content to a file named after the module ID.
- Error saveInputArchiveMember(lto::InputFile *Input);
-
- // Iterates through all input files and saves their content
- // to files if they are regular archive members.
- Error saveInputArchiveMembers();
-
// Array of input bitcode files for LTO.
std::vector<std::shared_ptr<lto::InputFile>> InputFiles;
- // A cache to avoid repeatedly reading the same archive file.
- StringMap<bool> ArchiveFiles;
+ // Cache of whether a path refers to a thin archive.
+ StringMap<bool> ArchiveIsThinCache;
+
+ // Determines if the file at the given path is a thin archive.
+ Expected<bool> isThinArchive(const StringRef ArchivePath);
};
} // namespace lto
} // namespace llvm
-#endif // LLVM_DTLTO_H
+#endif // LLVM_DTLTO_DTLTO_H
diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h
index f992be9899e3d..fe8ef19759404 100644
--- a/llvm/include/llvm/LTO/LTO.h
+++ b/llvm/include/llvm/LTO/LTO.h
@@ -465,7 +465,7 @@ class LTO {
protected:
// Called at the start of run().
- virtual Error handleArchiveInputs() { return Error::success(); }
+ virtual Error serializeInputsForDistribution() { return Error::success(); }
// Called before returning from run().
virtual void cleanup() {}
diff --git a/llvm/lib/DTLTO/DTLTO.cpp b/llvm/lib/DTLTO/DTLTO.cpp
index 4a1107e76e47b..92b5bb362ba4c 100644
--- a/llvm/lib/DTLTO/DTLTO.cpp
+++ b/llvm/lib/DTLTO/DTLTO.cpp
@@ -8,7 +8,7 @@
//
// \file
// This file implements support functions for Distributed ThinLTO, focusing on
-// archive file handling.
+// preparing input files for distribution.
//
//===----------------------------------------------------------------------===//
@@ -34,31 +34,35 @@ using namespace llvm;
namespace {
-// Writes the content of a memory buffer into a file.
-llvm::Error saveBuffer(StringRef FileBuffer, StringRef FilePath) {
+// Saves the content of Buffer to Path overwriting any existing file.
+Error save(StringRef Buffer, StringRef Path) {
std::error_code EC;
- raw_fd_ostream OS(FilePath.str(), EC, sys::fs::OpenFlags::OF_None);
- if (EC) {
+ raw_fd_ostream OS(Path.str(), EC, sys::fs::OpenFlags::OF_None);
+ if (EC)
return createStringError(inconvertibleErrorCode(),
- "Failed to create file %s: %s", FilePath.data(),
+ "Failed to create file %s: %s", Path.data(),
EC.message().c_str());
- }
- OS.write(FileBuffer.data(), FileBuffer.size());
- if (OS.has_error()) {
+ OS.write(Buffer.data(), Buffer.size());
+ if (OS.has_error())
return createStringError(inconvertibleErrorCode(),
- "Failed writing to file %s", FilePath.data());
- }
+ "Failed writing to file %s", Path.data());
return Error::success();
}
+// Saves the content of Input to Path overwriting any existing file.
+Error save(lto::InputFile *Input, StringRef Path) {
+ MemoryBufferRef MB = Input->getFileBuffer();
+ return save(MB.getBuffer(), Path);
+}
+
// Compute the file path for a thin archive member.
//
// For thin archives, an archive member name is typically a file path relative
// to the archive file's directory. This function resolves that path.
-SmallString<64> computeThinArchiveMemberPath(const StringRef ArchivePath,
- const StringRef MemberName) {
+SmallString<256> computeThinArchiveMemberPath(StringRef ArchivePath,
+ StringRef MemberName) {
assert(!ArchivePath.empty() && "An archive file path must be non empty.");
- SmallString<64> MemberPath;
+ SmallString<256> MemberPath;
if (sys::path::is_relative(MemberName)) {
MemberPath = sys::path::parent_path(ArchivePath);
sys::path::append(MemberPath, MemberName);
@@ -77,12 +81,11 @@ SmallString<64> computeThinArchiveMemberPath(const StringRef ArchivePath,
// the archive type.
Expected<bool> lto::DTLTO::isThinArchive(const StringRef ArchivePath) {
// Return cached result if available.
- auto Cached = ArchiveFiles.find(ArchivePath);
- if (Cached != ArchiveFiles.end())
+ auto Cached = ArchiveIsThinCache.find(ArchivePath);
+ if (Cached != ArchiveIsThinCache.end())
return Cached->second;
uint64_t FileSize = -1;
- bool IsThin = false;
std::error_code EC = sys::fs::file_size(ArchivePath, FileSize);
if (EC)
return createStringError(inconvertibleErrorCode(),
@@ -94,43 +97,45 @@ Expected<bool> lto::DTLTO::isThinArchive(const StringRef ArchivePath) {
ArchivePath.data());
// Read only the first few bytes containing the magic signature.
- ErrorOr<std::unique_ptr<MemoryBuffer>> MemBufferOrError =
- MemoryBuffer::getFileSlice(ArchivePath, sizeof(object::ThinArchiveMagic),
- 0);
-
- if ((EC = MemBufferOrError.getError()))
+ ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr = MemoryBuffer::getFileSlice(
+ ArchivePath, sizeof(object::ThinArchiveMagic), 0);
+ if ((EC = MBOrErr.getError()))
return createStringError(inconvertibleErrorCode(),
"Failed to read from archive %s: %s",
ArchivePath.data(), EC.message().c_str());
- StringRef MemBuf = (*MemBufferOrError.get()).getBuffer();
- if (file_magic::archive != identify_magic(MemBuf))
+ StringRef Buf = (*MBOrErr)->getBuffer();
+ if (file_magic::archive != identify_magic(Buf))
return createStringError(inconvertibleErrorCode(),
"Unknown format for archive %s",
ArchivePath.data());
- IsThin = MemBuf.starts_with(object::ThinArchiveMagic);
+ bool IsThin = Buf.starts_with(object::ThinArchiveMagic);
+
+ // Cache the result.
+ ArchiveIsThinCache[ArchivePath] = IsThin;
- // Cache the result
- ArchiveFiles[ArchivePath] = IsThin;
return IsThin;
}
+// Add an input file and prepare it for distribution.
+//
// This function performs the following tasks:
-// 1. Adds the input file to the LTO object's list of input files.
-// 2. For thin archive members, generates a new module ID which is a path to a
-// thin archive member file.
-// 3. For regular archive members, generates a new unique module ID.
-// 4. Updates the bitcode module's identifier.
+// 1. Add the input file to the LTO object's list of input files.
+// 2. For thin archive members, overwrite the module ID with the path to the
+// member file on disk.
+// 3. For archive members and FatLTO objects, overwrite the module ID with a
+// unique path naming a file that will contain the member content. The file
+// is created and populated later (see serializeInputs()).
Expected<std::shared_ptr<lto::InputFile>>
-lto::DTLTO::addInput(std::unique_ptr<lto::InputFile> InputPtr) {
+lto::DTLTO::addInput(std::unique_ptr<InputFile> InputPtr) {
TimeTraceScope TimeScope("Add input for DTLTO");
// Add the input file to the LTO object.
InputFiles.emplace_back(InputPtr.release());
- std::shared_ptr<lto::InputFile> &Input = InputFiles.back();
+ auto &Input = InputFiles.back();
+ BitcodeModule &BM = Input->getPrimaryBitcodeModule();
- StringRef ModuleId = Input->getName();
StringRef ArchivePath = Input->getArchivePath();
// In most cases, the module ID already points to an individual bitcode file
@@ -138,82 +143,60 @@ lto::DTLTO::addInput(std::unique_ptr<lto::InputFile> InputPtr) {
if (ArchivePath.empty() && !Input->isFatLTOObject())
return Input;
- SmallString<64> NewModuleId;
- BitcodeModule &BM = Input->getPrimaryBitcodeModule();
-
// For a member of a thin archive that is not a FatLTO object, there is an
// existing file on disk that can be used, so we can avoid having to
- // materialize.
+ // serialize.
Expected<bool> UseThinMember =
Input->isFatLTOObject() ? false : isThinArchive(ArchivePath);
if (!UseThinMember)
return UseThinMember.takeError();
-
if (*UseThinMember) {
- // For thin archives, use the path to the actual file.
- NewModuleId =
+ // For thin archives, use the path to the actual member file on disk.
+ auto MemberPath =
computeThinArchiveMemberPath(ArchivePath, Input->getMemberName());
- } else {
- // For regular archives and FatLTO objects, generate a unique name.
- Input->setSerializeForDistribution(true);
-
- // Create unique identifier using process ID and sequence number.
- std::string PID = utohexstr(sys::Process::getProcessId());
- std::string Seq = std::to_string(InputFiles.size());
-
- NewModuleId = sys::path::parent_path(LinkerOutputFile);
- sys::path::append(NewModuleId, sys::path::filename(ModuleId) + "." + Seq +
- "." + PID + ".o");
+ BM.setModuleIdentifier(Saver.save(MemberPath.str()));
+ return Input;
}
- // Update the module identifier and save it.
- BM.setModuleIdentifier(Saver.save(NewModuleId.str()));
-
+ // A new file on disk will be needed for archive members and FatLTO objects.
+ Input->setSerializeForDistribution(true);
+
+ // Create a unique path by including the process ID and sequence number in the
+ // filename.
+ SmallString<256> Id(sys::path::parent_path(LinkerOutputFile));
+ sys::path::append(Id,
+ Twine(sys::path::filename(Input->getName())) + "." +
+ std::to_string(InputFiles.size()) /*Sequence number*/ +
+ "." + utohexstr(sys::Process::getProcessId()) + ".o");
+ BM.setModuleIdentifier(Saver.save(Id.str()));
return Input;
}
-// Write the archive member content to a file named after the module ID.
-// If a file with that name already exists, it's likely a leftover from a
-// previously terminated linker process and can be safely overwritten.
-Error lto::DTLTO::saveInputArchiveMember(lto::InputFile *Input) {
- StringRef ModuleId = Input->getName();
- if (Input->getSerializeForDistribution()) {
+// Save the contents of ThinLTO-enabled input files that must be serialized for
+// distribution, such as archive members and FatLTO objects, to individual
+// bitcode files named after the module ID.
+//
+// Must be called after all input files are added but before optimization
+// begins. If a file with that name already exists, it is likely a leftover from
+// a previously terminated linker process and can be safely overwritten.
+llvm::Error lto::DTLTO::serializeInputsForDistribution() {
+ for (auto &Input : InputFiles) {
+ if (!Input->isThinLTO() || !Input->getSerializeForDistribution())
+ continue;
+ // Save the content of the input file to a file named after the module ID.
+ StringRef ModuleId = Input->getName();
TimeTraceScope TimeScope("Serialize bitcode input for DTLTO", ModuleId);
// Cleanup this file on abnormal process exit.
if (!SaveTemps)
llvm::sys::RemoveFileOnSignal(ModuleId);
- MemoryBufferRef MemoryBufferRef = Input->getFileBuffer();
- if (Error EC = saveBuffer(MemoryBufferRef.getBuffer(), ModuleId))
+ if (Error EC = save(Input.get(), ModuleId))
return EC;
}
- return Error::success();
-}
-
-// Iterates through all ThinLTO-enabled input files and saves their content
-// to separate files if they are regular archive members.
-Error lto::DTLTO::saveInputArchiveMembers() {
- for (auto &Input : InputFiles) {
- if (!Input->isThinLTO())
- continue;
- if (Error EC = saveInputArchiveMember(Input.get()))
- return EC;
- }
- return Error::success();
-}
-
-// Entry point for DTLTO archives support.
-//
-// Sets up the temporary file remover and processes archive members.
-// Must be called after all inputs are added but before optimization begins.
-llvm::Error lto::DTLTO::handleArchiveInputs() {
- // Process and save archive members to separate files if needed.
- if (Error EC = saveInputArchiveMembers())
- return EC;
return Error::success();
}
-// Remove temporary archive member files created to enable distribution.
+// Remove serialized inputs created to enable distribution.
void lto::DTLTO::cleanup() {
if (!SaveTemps) {
TimeTraceScope TimeScope("Remove temporary inputs for DTLTO");
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index bbbdfcbd71f2b..749af6eda3fdb 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -1229,7 +1229,7 @@ Error LTO::checkPartiallySplit() {
Error LTO::run(AddStreamFn AddStream, FileCache Cache) {
llvm::scope_exit CleanUp([this]() { cleanup(); });
- if (Error EC = handleArchiveInputs())
+ if (Error EC = serializeInputsForDistribution())
return EC;
// Compute "dead" symbols, we don't want to import/export these!
More information about the llvm-commits
mailing list