[lld] [llvm] [DTLTO][ELF][COFF] Add archive support for DTLTO. (PR #157043)

Teresa Johnson via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 2 13:45:49 PST 2025


================
@@ -0,0 +1,226 @@
+//===- Dtlto.cpp - Distributed ThinLTO implementation --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// \file
+// This file implements support functions for Distributed ThinLTO, focusing on
+// archive file handling.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DTLTO/DTLTO.h"
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/Magic.h"
+#include "llvm/LTO/LTO.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBufferRef.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <iostream>
+#include <string>
+
+using namespace llvm;
+
+namespace dtlto {
+
+// Removes any temporary regular archive member files that were created during
+// processing.
+TempFilesRemover::~TempFilesRemover() {
+  if (!Lto)
+    return;
+  for (auto &Input : Lto->InputFiles) {
+    if (Input->isMemberOfArchive())
+      sys::fs::remove(Input->getName(), /*IgnoreNonExisting=*/true);
+  }
+}
+
+// Writes the content of a memory buffer into a file.
+static llvm::Error saveBuffer(StringRef FileBuffer, StringRef FilePath) {
+  std::error_code EC;
+  raw_fd_ostream OS(FilePath.str(), EC, sys::fs::OpenFlags::OF_None);
+  if (EC) {
+    return createStringError(inconvertibleErrorCode(),
+                             "Failed to create file %s: %s", FilePath.data(),
+                             EC.message().c_str());
+  }
+  OS.write(FileBuffer.data(), FileBuffer.size());
+  if (OS.has_error()) {
+    return createStringError(inconvertibleErrorCode(),
+                             "Failed writing to file %s", FilePath.data());
+  }
+  return Error::success();
+}
+
+// Compute the file path for a thin archive member.
+//
+// For thin archives, an archive member name is typically a file path relative
+// to the archive file's directory. This function resolves that path.
+SmallString<64> computeThinArchiveMemberPath(const StringRef ArchivePath,
+                                             const StringRef MemberName) {
+  assert(!ArchivePath.empty() && "An archive file path must be non empty.");
+  SmallString<64> MemberPath;
+  if (sys::path::is_relative(MemberName)) {
+    MemberPath = sys::path::parent_path(ArchivePath);
+    sys::path::append(MemberPath, MemberName);
+  } else
+    MemberPath = MemberName;
+  sys::path::remove_dots(MemberPath, /*remove_dot_dot=*/true);
+  return MemberPath;
+}
+
+// Magic string identifying thin archive files.
+static constexpr StringLiteral THIN_ARCHIVE_MAGIC = "!<thin>\n";
+
+// Determines if a file at the given path is a thin archive file.
+//
+// This function uses a cache to avoid repeatedly reading the same file.
+// It reads only the header portion (magic bytes) of the file to identify
+// the archive type.
+Expected<bool> isThinArchive(const StringRef ArchivePath) {
+  static StringMap<bool> ArchiveFiles;
+
+  // Return cached result if available.
+  auto Cached = ArchiveFiles.find(ArchivePath);
+  if (Cached != ArchiveFiles.end())
+    return Cached->second;
+
+  uint64_t FileSize = -1;
+  bool IsThin = false;
+  std::error_code EC = sys::fs::file_size(ArchivePath, FileSize);
+  if (EC)
+    return createStringError(inconvertibleErrorCode(),
+                             "Failed to get file size from archive %s: %s",
+                             ArchivePath.data(), EC.message().c_str());
+  if (FileSize < THIN_ARCHIVE_MAGIC.size())
+    return createStringError(inconvertibleErrorCode(),
+                             "Archive file size is too small %s",
+                             ArchivePath.data());
+
+  // Read only the first few bytes containing the magic signature.
+  ErrorOr<std::unique_ptr<MemoryBuffer>> MemBufferOrError =
+      MemoryBuffer::getFileSlice(ArchivePath, THIN_ARCHIVE_MAGIC.size(), 0);
+
+  if ((EC = MemBufferOrError.getError()))
+    return createStringError(inconvertibleErrorCode(),
+                             "Failed to read from archive %s: %s",
+                             ArchivePath.data(), EC.message().c_str());
+
+  StringRef MemBuf = (*MemBufferOrError.get()).getBuffer();
+  if (file_magic::archive != identify_magic(MemBuf))
+    return createStringError(inconvertibleErrorCode(),
+                             "Unknown format for archive %s",
+                             ArchivePath.data());
+
+  IsThin = MemBuf.starts_with(THIN_ARCHIVE_MAGIC);
+
+  // Cache the result
+  ArchiveFiles[ArchivePath] = IsThin;
+  return IsThin;
+}
+
+// This function performs the following tasks:
+// 1. Adds the input file to the LTO object's list of input files.
+// 2. For thin archive members, generates a new module ID which is a path to a
+// thin archive member file.
+// 3. For regular archive members, generates a new unique module ID.
+// 4. Updates the bitcode module's identifier.
+Expected<lto::InputFile *> addInput(lto::LTO *LtoObj,
----------------
teresajohnson wrote:

I did at the time mean in the LTO class. But I do appreciate the concerns around modularity. It would be better then to have a DTLTO class that is derived from LTO, and includes all of these new methods (and static vars like ArchiveFiles mentioned above). Then you can create that, instead of a normal LTO object and setting Dtlto = true as you currently are.

https://github.com/llvm/llvm-project/pull/157043


More information about the llvm-commits mailing list