[llvm] Revert "[CAS] Add MappedFileRegionArena" (PR #158694)

via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 15 10:32:58 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-platform-windows

Author: Paul Kirth (ilovepi)

<details>
<summary>Changes</summary>

Reverts llvm/llvm-project#<!-- -->114099

This broke bots: https://lab.llvm.org/buildbot/#/builders/140/builds/30748


---

Patch is 38.68 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/158694.diff


12 Files Affected:

- (modified) llvm/CMakeLists.txt (-1) 
- (removed) llvm/include/llvm/CAS/MappedFileRegionArena.h (-130) 
- (modified) llvm/include/llvm/Config/llvm-config.h.cmake (-3) 
- (modified) llvm/include/llvm/Support/FileSystem.h (-5) 
- (modified) llvm/lib/CAS/CMakeLists.txt (-2) 
- (removed) llvm/lib/CAS/MappedFileRegionArena.cpp (-388) 
- (removed) llvm/lib/CAS/OnDiskCommon.cpp (-121) 
- (removed) llvm/lib/CAS/OnDiskCommon.h (-46) 
- (modified) llvm/lib/Support/Unix/Path.inc (-5) 
- (modified) llvm/lib/Support/Windows/Path.inc (-17) 
- (modified) llvm/unittests/CAS/CMakeLists.txt (-5) 
- (removed) llvm/unittests/CAS/ProgramTest.cpp (-239) 


``````````diff
diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index e8af7fb432f40..b98192968a3ab 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -878,7 +878,6 @@ option (LLVM_ENABLE_SPHINX "Use Sphinx to generate llvm documentation." OFF)
 option (LLVM_ENABLE_OCAMLDOC "Build OCaml bindings documentation." ON)
 option (LLVM_ENABLE_BINDINGS "Build bindings." ON)
 option (LLVM_ENABLE_TELEMETRY "Enable the telemetry library. If set to OFF, library cannot be enabled after build (eg., at runtime)" ON)
-option (LLVM_ENABLE_ONDISK_CAS "Build OnDiskCAS." ON)
 
 set(LLVM_INSTALL_DOXYGEN_HTML_DIR "${CMAKE_INSTALL_DOCDIR}/llvm/doxygen-html"
     CACHE STRING "Doxygen-generated HTML documentation install directory")
diff --git a/llvm/include/llvm/CAS/MappedFileRegionArena.h b/llvm/include/llvm/CAS/MappedFileRegionArena.h
deleted file mode 100644
index ff51f0eb59929..0000000000000
--- a/llvm/include/llvm/CAS/MappedFileRegionArena.h
+++ /dev/null
@@ -1,130 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// This file declares interface for MappedFileRegionArena, a bump pointer
-/// allocator, backed by a memory-mapped file.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CAS_MAPPEDFILEREGIONARENA_H
-#define LLVM_CAS_MAPPEDFILEREGIONARENA_H
-
-#include "llvm/Support/Alignment.h"
-#include "llvm/Support/FileSystem.h"
-#include <atomic>
-
-namespace llvm::cas {
-
-/// Allocator for an owned mapped file region that supports thread-safe and
-/// process-safe bump pointer allocation.
-///
-/// This allocator is designed to create a sparse file when supported by the
-/// filesystem's \c ftruncate so that it can be used with a large maximum size.
-/// It will also attempt to shrink the underlying file down to its current
-/// allocation size when the last concurrent mapping is closed.
-///
-/// Process-safe. Uses file locks when resizing the file during initialization
-/// and destruction.
-///
-/// Thread-safe. Requires OS support thread-safe file lock.
-///
-/// Provides 8-byte alignment for all allocations.
-class MappedFileRegionArena {
-public:
-  using RegionT = sys::fs::mapped_file_region;
-
-  /// Header for MappedFileRegionArena. It can be configured to be located
-  /// at any location within the file and the allocation will be appended after
-  /// the header.
-  struct Header {
-    // BumpPtr for new allocation.
-    std::atomic<uint64_t> BumpPtr;
-    // Allocated size on disk.
-    std::atomic<uint64_t> AllocatedSize;
-    // Capacity of the file.
-    std::atomic<uint64_t> Capacity;
-    // Offset from the beginning of the file to this header (for verification).
-    std::atomic<uint64_t> HeaderOffset;
-  };
-
-  /// Create a \c MappedFileRegionArena.
-  ///
-  /// \param Path the path to open the mapped region.
-  /// \param Capacity the maximum size for the mapped file region.
-  /// \param HeaderOffset the offset at which to store the header. This is so
-  /// that information can be stored before the header, like a file magic.
-  /// \param NewFileConstructor is for constructing new files. It has exclusive
-  /// access to the file. Must call \c initializeBumpPtr.
-  static Expected<MappedFileRegionArena>
-  create(const Twine &Path, uint64_t Capacity, uint64_t HeaderOffset,
-         function_ref<Error(MappedFileRegionArena &)> NewFileConstructor);
-
-  /// Minimum alignment for allocations, currently hardcoded to 8B.
-  static constexpr Align getAlign() {
-    // Trick Align into giving us '8' as a constexpr.
-    struct alignas(8) T {};
-    static_assert(alignof(T) == 8, "Tautology failed?");
-    return Align::Of<T>();
-  }
-
-  /// Allocate at least \p AllocSize. Rounds up to \a getAlign().
-  Expected<char *> allocate(uint64_t AllocSize) {
-    auto Offset = allocateOffset(AllocSize);
-    if (LLVM_UNLIKELY(!Offset))
-      return Offset.takeError();
-    return data() + *Offset;
-  }
-  /// Allocate, returning the offset from \a data() instead of a pointer.
-  Expected<int64_t> allocateOffset(uint64_t AllocSize);
-
-  char *data() const { return Region.data(); }
-  uint64_t size() const { return H->BumpPtr; }
-  uint64_t capacity() const { return Region.size(); }
-
-  RegionT &getRegion() { return Region; }
-
-  ~MappedFileRegionArena() { destroyImpl(); }
-
-  MappedFileRegionArena() = default;
-  MappedFileRegionArena(MappedFileRegionArena &&RHS) { moveImpl(RHS); }
-  MappedFileRegionArena &operator=(MappedFileRegionArena &&RHS) {
-    destroyImpl();
-    moveImpl(RHS);
-    return *this;
-  }
-
-  MappedFileRegionArena(const MappedFileRegionArena &) = delete;
-  MappedFileRegionArena &operator=(const MappedFileRegionArena &) = delete;
-
-private:
-  // initialize header from offset.
-  void initializeHeader(uint64_t HeaderOffset);
-
-  void destroyImpl();
-  void moveImpl(MappedFileRegionArena &RHS) {
-    std::swap(Region, RHS.Region);
-    std::swap(H, RHS.H);
-    std::swap(Path, RHS.Path);
-    std::swap(FD, RHS.FD);
-    std::swap(SharedLockFD, RHS.SharedLockFD);
-  }
-
-private:
-  RegionT Region;
-  Header *H = nullptr;
-  std::string Path;
-  // File descriptor for the main storage file.
-  std::optional<int> FD;
-  // File descriptor for the file used as reader/writer lock.
-  std::optional<int> SharedLockFD;
-};
-
-} // namespace llvm::cas
-
-#endif // LLVM_CAS_MAPPEDFILEREGIONARENA_H
diff --git a/llvm/include/llvm/Config/llvm-config.h.cmake b/llvm/include/llvm/Config/llvm-config.h.cmake
index 6488d6c01b5c6..39136bc45c292 100644
--- a/llvm/include/llvm/Config/llvm-config.h.cmake
+++ b/llvm/include/llvm/Config/llvm-config.h.cmake
@@ -146,7 +146,4 @@
    coverage bugs, and to 0 otherwise. */
 #cmakedefine01 LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN
 
-/* Define to 1 to enable LLVM OnDisk Content Addressable Storage */
-#cmakedefine01 LLVM_ENABLE_ONDISK_CAS
-
 #endif
diff --git a/llvm/include/llvm/Support/FileSystem.h b/llvm/include/llvm/Support/FileSystem.h
index c203779307840..a21b0a272d2b0 100644
--- a/llvm/include/llvm/Support/FileSystem.h
+++ b/llvm/include/llvm/Support/FileSystem.h
@@ -410,11 +410,6 @@ LLVM_ABI std::error_code copy_file(const Twine &From, int ToFD);
 ///          platform-specific error_code.
 LLVM_ABI std::error_code resize_file(int FD, uint64_t Size);
 
-/// Resize path to size with sparse files explicitly enabled. It uses
-/// FSCTL_SET_SPARSE On Windows. This is the same as resize_file on
-/// non-Windows
-LLVM_ABI std::error_code resize_file_sparse(int FD, uint64_t Size);
-
 /// Resize \p FD to \p Size before mapping \a mapped_file_region::readwrite. On
 /// non-Windows, this calls \a resize_file(). On Windows, this is a no-op,
 /// since the subsequent mapping (via \c CreateFileMapping) automatically
diff --git a/llvm/lib/CAS/CMakeLists.txt b/llvm/lib/CAS/CMakeLists.txt
index 6ed724bc2fd76..f3d2b41c704bc 100644
--- a/llvm/lib/CAS/CMakeLists.txt
+++ b/llvm/lib/CAS/CMakeLists.txt
@@ -3,9 +3,7 @@ add_llvm_component_library(LLVMCAS
   ActionCaches.cpp
   BuiltinCAS.cpp
   InMemoryCAS.cpp
-  MappedFileRegionArena.cpp
   ObjectStore.cpp
-  OnDiskCommon.cpp
 
   ADDITIONAL_HEADER_DIRS
   ${LLVM_MAIN_INCLUDE_DIR}/llvm/CAS
diff --git a/llvm/lib/CAS/MappedFileRegionArena.cpp b/llvm/lib/CAS/MappedFileRegionArena.cpp
deleted file mode 100644
index 3c920edcaae6a..0000000000000
--- a/llvm/lib/CAS/MappedFileRegionArena.cpp
+++ /dev/null
@@ -1,388 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-/// \file Implements MappedFileRegionArena.
-///
-/// A bump pointer allocator, backed by a memory-mapped file.
-///
-/// The effect we want is:
-///
-/// Step 1. If it doesn't exist, create the file with an initial size.
-/// Step 2. Reserve virtual memory large enough for the max file size.
-/// Step 3. Map the file into memory in the reserved region.
-/// Step 4. Increase the file size and update the mapping when necessary.
-///
-/// However, updating the mapping is challenging when it needs to work portably,
-/// and across multiple processes without locking for every read. Our current
-/// implementation handles the steps above in following ways:
-///
-/// Step 1. Use \ref sys::fs::resize_file_sparse to grow the file to its max
-///         size (typically several GB). If the file system doesn't support
-///         sparse file, this may return a fully allocated file.
-/// Step 2. Call \ref sys::fs::mapped_file_region to map the entire file.
-/// Step 3. [Automatic as part of step 2.]
-/// Step 4. If supported, use \c fallocate or similiar APIs to ensure the file
-///         system storage for the sparse file so we won't end up with partial
-///         file if the disk is out of space.
-///
-/// Additionally, we attempt to resize the file to its actual data size when
-/// closing the mapping, if this is the only concurrent instance. This is done
-/// using file locks. Shrinking the file mitigates problems with having large
-/// files: on filesystems without sparse files it avoids unnecessary space use;
-/// it also avoids allocating the full size if another process copies the file,
-/// which typically loses sparseness. These mitigations only work while the file
-/// is not in use.
-///
-/// The capacity and the header offset is determined by the first user of the
-/// MappedFileRegionArena instance and any future mismatched value from the
-/// original will result in error on creation.
-///
-/// To support resizing, we use two separate file locks:
-/// 1. We use a shared reader lock on a ".shared" file until destruction.
-/// 2. We use a lock on the main file during initialization - shared to check
-///    the status, upgraded to exclusive to resize/initialize the file.
-///
-/// Then during destruction we attempt to get exclusive access on (1), which
-/// requires no concurrent readers. If so, we shrink the file. Using two
-/// separate locks simplifies the implementation and enables it to work on
-/// platforms (e.g. Windows) where a shared/reader lock prevents writing.
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CAS/MappedFileRegionArena.h"
-#include "OnDiskCommon.h"
-#include "llvm/ADT/StringExtras.h"
-
-#if LLVM_ON_UNIX
-#include <sys/stat.h>
-#if __has_include(<sys/param.h>)
-#include <sys/param.h>
-#endif
-#ifdef DEV_BSIZE
-#define MAPPED_FILE_BSIZE DEV_BSIZE
-#elif __linux__
-#define MAPPED_FILE_BSIZE 512
-#endif
-#endif
-
-using namespace llvm;
-using namespace llvm::cas;
-using namespace llvm::cas::ondisk;
-
-namespace {
-struct FileWithLock {
-  std::string Path;
-  int FD = -1;
-  std::optional<sys::fs::LockKind> Locked;
-
-private:
-  FileWithLock(std::string PathStr, Error &E) : Path(std::move(PathStr)) {
-    ErrorAsOutParameter EOP(&E);
-    if (std::error_code EC = sys::fs::openFileForReadWrite(
-            Path, FD, sys::fs::CD_OpenAlways, sys::fs::OF_None))
-      E = createFileError(Path, EC);
-  }
-
-public:
-  FileWithLock(FileWithLock &) = delete;
-  FileWithLock(FileWithLock &&Other) {
-    Path = std::move(Other.Path);
-    FD = Other.FD;
-    Other.FD = -1;
-    Locked = Other.Locked;
-    Other.Locked = std::nullopt;
-  }
-
-  ~FileWithLock() { consumeError(unlock()); }
-
-  static Expected<FileWithLock> open(StringRef Path) {
-    Error E = Error::success();
-    FileWithLock Result(Path.str(), E);
-    if (E)
-      return std::move(E);
-    return std::move(Result);
-  }
-
-  Error lock(sys::fs::LockKind LK) {
-    assert(!Locked && "already locked");
-    if (std::error_code EC = lockFileThreadSafe(FD, LK))
-      return createFileError(Path, EC);
-    Locked = LK;
-    return Error::success();
-  }
-
-  Error switchLock(sys::fs::LockKind LK) {
-    assert(Locked && "not locked");
-    if (auto E = unlock())
-      return E;
-
-    return lock(LK);
-  }
-
-  Error unlock() {
-    if (Locked) {
-      Locked = std::nullopt;
-      if (std::error_code EC = unlockFileThreadSafe(FD))
-        return createFileError(Path, EC);
-    }
-    return Error::success();
-  }
-
-  // Return true if succeed to lock the file exclusively.
-  bool tryLockExclusive() {
-    assert(!Locked && "can only try to lock if not locked");
-    if (tryLockFileThreadSafe(FD) == std::error_code()) {
-      Locked = sys::fs::LockKind::Exclusive;
-      return true;
-    }
-
-    return false;
-  }
-
-  // Release the lock so it will not be unlocked on destruction.
-  void release() {
-    Locked = std::nullopt;
-    FD = -1;
-  }
-};
-
-struct FileSizeInfo {
-  uint64_t Size;
-  uint64_t AllocatedSize;
-
-  static ErrorOr<FileSizeInfo> get(sys::fs::file_t File);
-};
-} // end anonymous namespace
-
-Expected<MappedFileRegionArena> MappedFileRegionArena::create(
-    const Twine &Path, uint64_t Capacity, uint64_t HeaderOffset,
-    function_ref<Error(MappedFileRegionArena &)> NewFileConstructor) {
-  uint64_t MinCapacity = HeaderOffset + sizeof(Header);
-  if (Capacity < MinCapacity)
-    return createStringError(
-        std::make_error_code(std::errc::invalid_argument),
-        "capacity is too small to hold MappedFileRegionArena");
-
-  MappedFileRegionArena Result;
-  Result.Path = Path.str();
-
-  // Open the shared lock file. See file comment for details of locking scheme.
-  SmallString<128> SharedFilePath(Result.Path);
-  SharedFilePath.append(".shared");
-
-  auto SharedFileLock = FileWithLock::open(SharedFilePath);
-  if (!SharedFileLock)
-    return SharedFileLock.takeError();
-  Result.SharedLockFD = SharedFileLock->FD;
-
-  // Take shared/reader lock that will be held until destroyImpl if construction
-  // is successful.
-  if (auto E = SharedFileLock->lock(sys::fs::LockKind::Shared))
-    return std::move(E);
-
-  // Take shared/reader lock for initialization.
-  auto MainFile = FileWithLock::open(Result.Path);
-  if (!MainFile)
-    return MainFile.takeError();
-  if (Error E = MainFile->lock(sys::fs::LockKind::Shared))
-    return std::move(E);
-  Result.FD = MainFile->FD;
-
-  sys::fs::file_t File = sys::fs::convertFDToNativeFile(MainFile->FD);
-  auto FileSize = FileSizeInfo::get(File);
-  if (!FileSize)
-    return createFileError(Result.Path, FileSize.getError());
-
-  // If the size is smaller than the capacity, we need to initialize the file.
-  // It maybe empty, or may have been shrunk during a previous close.
-  if (FileSize->Size < Capacity) {
-    // Lock the file exclusively so only one process will do the initialization.
-    if (Error E = MainFile->switchLock(sys::fs::LockKind::Exclusive))
-      return std::move(E);
-    // Retrieve the current size now that we have exclusive access.
-    FileSize = FileSizeInfo::get(File);
-    if (!FileSize)
-      return createFileError(Result.Path, FileSize.getError());
-  }
-
-  if (FileSize->Size >= MinCapacity) {
-    // File is initialized. Read out the header to check for capacity and
-    // offset.
-    SmallVector<char, sizeof(Header)> HeaderContent(sizeof(Header));
-    auto Size = sys::fs::readNativeFileSlice(File, HeaderContent, HeaderOffset);
-    if (!Size)
-      return Size.takeError();
-
-    Header *H = reinterpret_cast<Header *>(HeaderContent.data());
-    if (H->HeaderOffset != HeaderOffset)
-      return createStringError(
-          std::make_error_code(std::errc::invalid_argument),
-          "specified header offset (" + utostr(HeaderOffset) +
-              ") does not match existing config (" + utostr(H->HeaderOffset) +
-              ")");
-
-    // If the capacity doesn't match, use the existing capacity instead.
-    if (H->Capacity != Capacity)
-      Capacity = H->Capacity;
-  }
-
-  // If the size is smaller than capacity, we need to resize the file.
-  if (FileSize->Size < Capacity) {
-    assert(MainFile->Locked == sys::fs::LockKind::Exclusive);
-    if (std::error_code EC =
-            sys::fs::resize_file_sparse(MainFile->FD, Capacity))
-      return createFileError(Result.Path, EC);
-  }
-
-  // Create the mapped region.
-  {
-    std::error_code EC;
-    sys::fs::mapped_file_region Map(
-        File, sys::fs::mapped_file_region::readwrite, Capacity, 0, EC);
-    if (EC)
-      return createFileError(Result.Path, EC);
-    Result.Region = std::move(Map);
-  }
-
-  // Initialize the header.
-  Result.initializeHeader(HeaderOffset);
-  if (FileSize->Size < MinCapacity) {
-    assert(MainFile->Locked == sys::fs::LockKind::Exclusive);
-    // If we need to fully initialize the file, call NewFileConstructor.
-    if (Error E = NewFileConstructor(Result))
-      return std::move(E);
-
-    Result.H->HeaderOffset.exchange(HeaderOffset);
-    Result.H->Capacity.exchange(Capacity);
-  }
-
-  if (MainFile->Locked == sys::fs::LockKind::Exclusive) {
-    // If holding an exclusive lock, we might have resized the file and
-    // performed some read/write to the file. Query the file size again to make
-    // sure everything is up-to-date. Otherwise, FileSize info is already
-    // up-to-date.
-    FileSize = FileSizeInfo::get(File);
-    if (!FileSize)
-      return createFileError(Result.Path, FileSize.getError());
-    Result.H->AllocatedSize.exchange(FileSize->AllocatedSize);
-  }
-
-  // Release the shared lock so it can be closed in destoryImpl().
-  SharedFileLock->release();
-  return Result;
-}
-
-void MappedFileRegionArena::destroyImpl() {
-  if (!FD)
-    return;
-
-  // Drop the shared lock indicating we are no longer accessing the file.
-  if (SharedLockFD)
-    (void)unlockFileThreadSafe(*SharedLockFD);
-
-  // Attempt to truncate the file if we can get exclusive access. Ignore any
-  // errors.
-  if (H) {
-    assert(SharedLockFD && "Must have shared lock file open");
-    if (tryLockFileThreadSafe(*SharedLockFD) == std::error_code()) {
-      size_t Size = size();
-      // sync to file system to make sure all contents are up-to-date.
-      (void)Region.sync();
-      // unmap the file before resizing since that is the requirement for
-      // some platforms.
-      Region.unmap();
-      (void)sys::fs::resize_file(*FD, Size);
-      (void)unlockFileThreadSafe(*SharedLockFD);
-    }
-  }
-
-  auto Close = [](std::optional<int> &FD) {
-    if (FD) {
-      sys::fs::file_t File = sys::fs::convertFDToNativeFile(*FD);
-      sys::fs::closeFile(File);
-      FD = std::nullopt;
-    }
-  };
-
-  // Close the file and shared lock.
-  Close(FD);
-  Close(SharedLockFD);
-}
-
-void MappedFileRegionArena::initializeHeader(uint64_t HeaderOffset) {
-  assert(capacity() < (uint64_t)INT64_MAX && "capacity must fit in int64_t");
-  uint64_t HeaderEndOffset = HeaderOffset + sizeof(decltype(*H));
-  assert(HeaderEndOffset <= capacity() &&
-         "Expected end offset to be pre-allocated");
-  assert(isAligned(Align::Of<decltype(*H)>(), HeaderOffset) &&
-         "Expected end offset to be aligned");
-  H = reinterpret_cast<decltype(H)>(data() + HeaderOffset);
-
-  uint64_t ExistingValue = 0;
-  if (!H->BumpPtr.compare_exchange_strong(ExistingValue, HeaderEndOffset))
-    assert(ExistingValue >= HeaderEndOffset &&
-           "Expected 0, or past the end of the header itself");
-}
-
-static Error createAllocatorOutOfSpaceError() {
-  return createStringError(std::make_error_code(std::errc::not_enough_memory),
-                           "memory mapped file allocator is out of space");
-}
-
-Expected<int64_t> MappedFileRegionArena::allocateOffset(uint64_t AllocSize) {
-  AllocSize = alignTo(AllocSize, getAlign());
-  uint64_t OldEnd = H->BumpPtr.fetch_add(AllocSize);
-  uint64_t NewEnd = OldEnd + AllocSize;
-  if (LLVM_UNLIKELY(NewEnd > capacity())) {
-    // Return the allocation. If the start already passed the end, that means
-    // some other concurrent allocations already consumed all the capacity.
-    // There is no need to return the original value. If the start was not
-    // passed the end, current allocation certainly bumped it passed the end.
-    // All other al...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/158694


More information about the llvm-commits mailing list