[lld] [llvm] [DTLTO][ELF][COFF] Add archive support for DTLTO. (PR #157043)
Teresa Johnson via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 5 11:48:17 PST 2025
================
@@ -0,0 +1,223 @@
+//===- Dtlto.cpp - Distributed ThinLTO implementation --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// \file
+// This file implements support functions for Distributed ThinLTO, focusing on
+// archive file handling.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DTLTO/DTLTO.h"
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/Magic.h"
+#include "llvm/LTO/LTO.h"
+#include "llvm/Object/Archive.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBufferRef.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <iostream>
+#include <string>
+
+using namespace llvm;
+
+namespace dtlto {
+
+// Removes any temporary regular archive member files that were created during
+// processing.
+TempFilesRemover::~TempFilesRemover() {
+ for (auto &Input : Lto->InputFiles) {
+ if (Input->isMemberOfArchive())
+ sys::fs::remove(Input->getName(), /*IgnoreNonExisting=*/true);
+ }
+}
+
+// Writes the content of a memory buffer into a file.
+static llvm::Error saveBuffer(StringRef FileBuffer, StringRef FilePath) {
+ std::error_code EC;
+ raw_fd_ostream OS(FilePath.str(), EC, sys::fs::OpenFlags::OF_None);
+ if (EC) {
+ return createStringError(inconvertibleErrorCode(),
+ "Failed to create file %s: %s", FilePath.data(),
+ EC.message().c_str());
+ }
+ OS.write(FileBuffer.data(), FileBuffer.size());
+ if (OS.has_error()) {
+ return createStringError(inconvertibleErrorCode(),
+ "Failed writing to file %s", FilePath.data());
+ }
+ return Error::success();
+}
+
+// Compute the file path for a thin archive member.
+//
+// For thin archives, an archive member name is typically a file path relative
+// to the archive file's directory. This function resolves that path.
+SmallString<64> computeThinArchiveMemberPath(const StringRef ArchivePath,
+ const StringRef MemberName) {
+ assert(!ArchivePath.empty() && "An archive file path must be non empty.");
+ SmallString<64> MemberPath;
+ if (sys::path::is_relative(MemberName)) {
+ MemberPath = sys::path::parent_path(ArchivePath);
+ sys::path::append(MemberPath, MemberName);
+ } else
+ MemberPath = MemberName;
+ sys::path::remove_dots(MemberPath, /*remove_dot_dot=*/true);
+ return MemberPath;
+}
+
+// Determines if a file at the given path is a thin archive file.
+//
+// This function uses a cache to avoid repeatedly reading the same file.
+// It reads only the header portion (magic bytes) of the file to identify
+// the archive type.
+Expected<bool> isThinArchive(const StringRef ArchivePath) {
+ static StringMap<bool> ArchiveFiles;
+
+ // Return cached result if available.
+ auto Cached = ArchiveFiles.find(ArchivePath);
+ if (Cached != ArchiveFiles.end())
+ return Cached->second;
+
+ uint64_t FileSize = -1;
+ bool IsThin = false;
+ std::error_code EC = sys::fs::file_size(ArchivePath, FileSize);
+ if (EC)
+ return createStringError(inconvertibleErrorCode(),
+ "Failed to get file size from archive %s: %s",
+ ArchivePath.data(), EC.message().c_str());
+ if (FileSize < sizeof(object::ThinArchiveMagic))
+ return createStringError(inconvertibleErrorCode(),
+ "Archive file size is too small %s",
+ ArchivePath.data());
+
+ // Read only the first few bytes containing the magic signature.
+ ErrorOr<std::unique_ptr<MemoryBuffer>> MemBufferOrError =
+ MemoryBuffer::getFileSlice(ArchivePath, sizeof(object::ThinArchiveMagic),
+ 0);
+
+ if ((EC = MemBufferOrError.getError()))
+ return createStringError(inconvertibleErrorCode(),
+ "Failed to read from archive %s: %s",
+ ArchivePath.data(), EC.message().c_str());
+
+ StringRef MemBuf = (*MemBufferOrError.get()).getBuffer();
+ if (file_magic::archive != identify_magic(MemBuf))
+ return createStringError(inconvertibleErrorCode(),
+ "Unknown format for archive %s",
+ ArchivePath.data());
+
+ IsThin = MemBuf.starts_with(object::ThinArchiveMagic);
+
+ // Cache the result
+ ArchiveFiles[ArchivePath] = IsThin;
+ return IsThin;
+}
+
+// This function performs the following tasks:
+// 1. Adds the input file to the LTO object's list of input files.
+// 2. For thin archive members, generates a new module ID which is a path to a
+// thin archive member file.
+// 3. For regular archive members, generates a new unique module ID.
+// 4. Updates the bitcode module's identifier.
+Expected<lto::InputFile *> addInput(lto::LTO *LtoObj,
+ std::unique_ptr<lto::InputFile> InputPtr) {
+
+ // Add the input file to the LTO object.
----------------
teresajohnson wrote:
> This certainly can be done (derive DTLTO class from LTO) , but this would necessitate to make more code changes in the lld sources where the LTO class is being instantiated.
Wouldn't that just be changing which class is instantiated with make_unique and removing the code being added there in the current PR version to set the Dtlto flag on the resulting LTO object? Then addInput for example could be a virtual class that does different things for DTLTO. I think overall that would be cleaner with better abstractions and modularity.
https://github.com/llvm/llvm-project/pull/157043
More information about the llvm-commits
mailing list