[llvm] Revert "[CAS] Add MappedFileRegionArena" (PR #158694)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 15 10:32:58 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-platform-windows
Author: Paul Kirth (ilovepi)
<details>
<summary>Changes</summary>
Reverts llvm/llvm-project#<!-- -->114099
This broke bots: https://lab.llvm.org/buildbot/#/builders/140/builds/30748
---
Patch is 38.68 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/158694.diff
12 Files Affected:
- (modified) llvm/CMakeLists.txt (-1)
- (removed) llvm/include/llvm/CAS/MappedFileRegionArena.h (-130)
- (modified) llvm/include/llvm/Config/llvm-config.h.cmake (-3)
- (modified) llvm/include/llvm/Support/FileSystem.h (-5)
- (modified) llvm/lib/CAS/CMakeLists.txt (-2)
- (removed) llvm/lib/CAS/MappedFileRegionArena.cpp (-388)
- (removed) llvm/lib/CAS/OnDiskCommon.cpp (-121)
- (removed) llvm/lib/CAS/OnDiskCommon.h (-46)
- (modified) llvm/lib/Support/Unix/Path.inc (-5)
- (modified) llvm/lib/Support/Windows/Path.inc (-17)
- (modified) llvm/unittests/CAS/CMakeLists.txt (-5)
- (removed) llvm/unittests/CAS/ProgramTest.cpp (-239)
``````````diff
diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index e8af7fb432f40..b98192968a3ab 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -878,7 +878,6 @@ option (LLVM_ENABLE_SPHINX "Use Sphinx to generate llvm documentation." OFF)
option (LLVM_ENABLE_OCAMLDOC "Build OCaml bindings documentation." ON)
option (LLVM_ENABLE_BINDINGS "Build bindings." ON)
option (LLVM_ENABLE_TELEMETRY "Enable the telemetry library. If set to OFF, library cannot be enabled after build (eg., at runtime)" ON)
-option (LLVM_ENABLE_ONDISK_CAS "Build OnDiskCAS." ON)
set(LLVM_INSTALL_DOXYGEN_HTML_DIR "${CMAKE_INSTALL_DOCDIR}/llvm/doxygen-html"
CACHE STRING "Doxygen-generated HTML documentation install directory")
diff --git a/llvm/include/llvm/CAS/MappedFileRegionArena.h b/llvm/include/llvm/CAS/MappedFileRegionArena.h
deleted file mode 100644
index ff51f0eb59929..0000000000000
--- a/llvm/include/llvm/CAS/MappedFileRegionArena.h
+++ /dev/null
@@ -1,130 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// This file declares interface for MappedFileRegionArena, a bump pointer
-/// allocator, backed by a memory-mapped file.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CAS_MAPPEDFILEREGIONARENA_H
-#define LLVM_CAS_MAPPEDFILEREGIONARENA_H
-
-#include "llvm/Support/Alignment.h"
-#include "llvm/Support/FileSystem.h"
-#include <atomic>
-
-namespace llvm::cas {
-
-/// Allocator for an owned mapped file region that supports thread-safe and
-/// process-safe bump pointer allocation.
-///
-/// This allocator is designed to create a sparse file when supported by the
-/// filesystem's \c ftruncate so that it can be used with a large maximum size.
-/// It will also attempt to shrink the underlying file down to its current
-/// allocation size when the last concurrent mapping is closed.
-///
-/// Process-safe. Uses file locks when resizing the file during initialization
-/// and destruction.
-///
-/// Thread-safe. Requires OS support thread-safe file lock.
-///
-/// Provides 8-byte alignment for all allocations.
-class MappedFileRegionArena {
-public:
- using RegionT = sys::fs::mapped_file_region;
-
- /// Header for MappedFileRegionArena. It can be configured to be located
- /// at any location within the file and the allocation will be appended after
- /// the header.
- struct Header {
- // BumpPtr for new allocation.
- std::atomic<uint64_t> BumpPtr;
- // Allocated size on disk.
- std::atomic<uint64_t> AllocatedSize;
- // Capacity of the file.
- std::atomic<uint64_t> Capacity;
- // Offset from the beginning of the file to this header (for verification).
- std::atomic<uint64_t> HeaderOffset;
- };
-
- /// Create a \c MappedFileRegionArena.
- ///
- /// \param Path the path to open the mapped region.
- /// \param Capacity the maximum size for the mapped file region.
- /// \param HeaderOffset the offset at which to store the header. This is so
- /// that information can be stored before the header, like a file magic.
- /// \param NewFileConstructor is for constructing new files. It has exclusive
- /// access to the file. Must call \c initializeBumpPtr.
- static Expected<MappedFileRegionArena>
- create(const Twine &Path, uint64_t Capacity, uint64_t HeaderOffset,
- function_ref<Error(MappedFileRegionArena &)> NewFileConstructor);
-
- /// Minimum alignment for allocations, currently hardcoded to 8B.
- static constexpr Align getAlign() {
- // Trick Align into giving us '8' as a constexpr.
- struct alignas(8) T {};
- static_assert(alignof(T) == 8, "Tautology failed?");
- return Align::Of<T>();
- }
-
- /// Allocate at least \p AllocSize. Rounds up to \a getAlign().
- Expected<char *> allocate(uint64_t AllocSize) {
- auto Offset = allocateOffset(AllocSize);
- if (LLVM_UNLIKELY(!Offset))
- return Offset.takeError();
- return data() + *Offset;
- }
- /// Allocate, returning the offset from \a data() instead of a pointer.
- Expected<int64_t> allocateOffset(uint64_t AllocSize);
-
- char *data() const { return Region.data(); }
- uint64_t size() const { return H->BumpPtr; }
- uint64_t capacity() const { return Region.size(); }
-
- RegionT &getRegion() { return Region; }
-
- ~MappedFileRegionArena() { destroyImpl(); }
-
- MappedFileRegionArena() = default;
- MappedFileRegionArena(MappedFileRegionArena &&RHS) { moveImpl(RHS); }
- MappedFileRegionArena &operator=(MappedFileRegionArena &&RHS) {
- destroyImpl();
- moveImpl(RHS);
- return *this;
- }
-
- MappedFileRegionArena(const MappedFileRegionArena &) = delete;
- MappedFileRegionArena &operator=(const MappedFileRegionArena &) = delete;
-
-private:
- // initialize header from offset.
- void initializeHeader(uint64_t HeaderOffset);
-
- void destroyImpl();
- void moveImpl(MappedFileRegionArena &RHS) {
- std::swap(Region, RHS.Region);
- std::swap(H, RHS.H);
- std::swap(Path, RHS.Path);
- std::swap(FD, RHS.FD);
- std::swap(SharedLockFD, RHS.SharedLockFD);
- }
-
-private:
- RegionT Region;
- Header *H = nullptr;
- std::string Path;
- // File descriptor for the main storage file.
- std::optional<int> FD;
- // File descriptor for the file used as reader/writer lock.
- std::optional<int> SharedLockFD;
-};
-
-} // namespace llvm::cas
-
-#endif // LLVM_CAS_MAPPEDFILEREGIONARENA_H
diff --git a/llvm/include/llvm/Config/llvm-config.h.cmake b/llvm/include/llvm/Config/llvm-config.h.cmake
index 6488d6c01b5c6..39136bc45c292 100644
--- a/llvm/include/llvm/Config/llvm-config.h.cmake
+++ b/llvm/include/llvm/Config/llvm-config.h.cmake
@@ -146,7 +146,4 @@
coverage bugs, and to 0 otherwise. */
#cmakedefine01 LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN
-/* Define to 1 to enable LLVM OnDisk Content Addressable Storage */
-#cmakedefine01 LLVM_ENABLE_ONDISK_CAS
-
#endif
diff --git a/llvm/include/llvm/Support/FileSystem.h b/llvm/include/llvm/Support/FileSystem.h
index c203779307840..a21b0a272d2b0 100644
--- a/llvm/include/llvm/Support/FileSystem.h
+++ b/llvm/include/llvm/Support/FileSystem.h
@@ -410,11 +410,6 @@ LLVM_ABI std::error_code copy_file(const Twine &From, int ToFD);
/// platform-specific error_code.
LLVM_ABI std::error_code resize_file(int FD, uint64_t Size);
-/// Resize path to size with sparse files explicitly enabled. It uses
-/// FSCTL_SET_SPARSE On Windows. This is the same as resize_file on
-/// non-Windows
-LLVM_ABI std::error_code resize_file_sparse(int FD, uint64_t Size);
-
/// Resize \p FD to \p Size before mapping \a mapped_file_region::readwrite. On
/// non-Windows, this calls \a resize_file(). On Windows, this is a no-op,
/// since the subsequent mapping (via \c CreateFileMapping) automatically
diff --git a/llvm/lib/CAS/CMakeLists.txt b/llvm/lib/CAS/CMakeLists.txt
index 6ed724bc2fd76..f3d2b41c704bc 100644
--- a/llvm/lib/CAS/CMakeLists.txt
+++ b/llvm/lib/CAS/CMakeLists.txt
@@ -3,9 +3,7 @@ add_llvm_component_library(LLVMCAS
ActionCaches.cpp
BuiltinCAS.cpp
InMemoryCAS.cpp
- MappedFileRegionArena.cpp
ObjectStore.cpp
- OnDiskCommon.cpp
ADDITIONAL_HEADER_DIRS
${LLVM_MAIN_INCLUDE_DIR}/llvm/CAS
diff --git a/llvm/lib/CAS/MappedFileRegionArena.cpp b/llvm/lib/CAS/MappedFileRegionArena.cpp
deleted file mode 100644
index 3c920edcaae6a..0000000000000
--- a/llvm/lib/CAS/MappedFileRegionArena.cpp
+++ /dev/null
@@ -1,388 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-/// \file Implements MappedFileRegionArena.
-///
-/// A bump pointer allocator, backed by a memory-mapped file.
-///
-/// The effect we want is:
-///
-/// Step 1. If it doesn't exist, create the file with an initial size.
-/// Step 2. Reserve virtual memory large enough for the max file size.
-/// Step 3. Map the file into memory in the reserved region.
-/// Step 4. Increase the file size and update the mapping when necessary.
-///
-/// However, updating the mapping is challenging when it needs to work portably,
-/// and across multiple processes without locking for every read. Our current
-/// implementation handles the steps above in following ways:
-///
-/// Step 1. Use \ref sys::fs::resize_file_sparse to grow the file to its max
-/// size (typically several GB). If the file system doesn't support
-/// sparse file, this may return a fully allocated file.
-/// Step 2. Call \ref sys::fs::mapped_file_region to map the entire file.
-/// Step 3. [Automatic as part of step 2.]
-/// Step 4. If supported, use \c fallocate or similiar APIs to ensure the file
-/// system storage for the sparse file so we won't end up with partial
-/// file if the disk is out of space.
-///
-/// Additionally, we attempt to resize the file to its actual data size when
-/// closing the mapping, if this is the only concurrent instance. This is done
-/// using file locks. Shrinking the file mitigates problems with having large
-/// files: on filesystems without sparse files it avoids unnecessary space use;
-/// it also avoids allocating the full size if another process copies the file,
-/// which typically loses sparseness. These mitigations only work while the file
-/// is not in use.
-///
-/// The capacity and the header offset is determined by the first user of the
-/// MappedFileRegionArena instance and any future mismatched value from the
-/// original will result in error on creation.
-///
-/// To support resizing, we use two separate file locks:
-/// 1. We use a shared reader lock on a ".shared" file until destruction.
-/// 2. We use a lock on the main file during initialization - shared to check
-/// the status, upgraded to exclusive to resize/initialize the file.
-///
-/// Then during destruction we attempt to get exclusive access on (1), which
-/// requires no concurrent readers. If so, we shrink the file. Using two
-/// separate locks simplifies the implementation and enables it to work on
-/// platforms (e.g. Windows) where a shared/reader lock prevents writing.
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CAS/MappedFileRegionArena.h"
-#include "OnDiskCommon.h"
-#include "llvm/ADT/StringExtras.h"
-
-#if LLVM_ON_UNIX
-#include <sys/stat.h>
-#if __has_include(<sys/param.h>)
-#include <sys/param.h>
-#endif
-#ifdef DEV_BSIZE
-#define MAPPED_FILE_BSIZE DEV_BSIZE
-#elif __linux__
-#define MAPPED_FILE_BSIZE 512
-#endif
-#endif
-
-using namespace llvm;
-using namespace llvm::cas;
-using namespace llvm::cas::ondisk;
-
-namespace {
-struct FileWithLock {
- std::string Path;
- int FD = -1;
- std::optional<sys::fs::LockKind> Locked;
-
-private:
- FileWithLock(std::string PathStr, Error &E) : Path(std::move(PathStr)) {
- ErrorAsOutParameter EOP(&E);
- if (std::error_code EC = sys::fs::openFileForReadWrite(
- Path, FD, sys::fs::CD_OpenAlways, sys::fs::OF_None))
- E = createFileError(Path, EC);
- }
-
-public:
- FileWithLock(FileWithLock &) = delete;
- FileWithLock(FileWithLock &&Other) {
- Path = std::move(Other.Path);
- FD = Other.FD;
- Other.FD = -1;
- Locked = Other.Locked;
- Other.Locked = std::nullopt;
- }
-
- ~FileWithLock() { consumeError(unlock()); }
-
- static Expected<FileWithLock> open(StringRef Path) {
- Error E = Error::success();
- FileWithLock Result(Path.str(), E);
- if (E)
- return std::move(E);
- return std::move(Result);
- }
-
- Error lock(sys::fs::LockKind LK) {
- assert(!Locked && "already locked");
- if (std::error_code EC = lockFileThreadSafe(FD, LK))
- return createFileError(Path, EC);
- Locked = LK;
- return Error::success();
- }
-
- Error switchLock(sys::fs::LockKind LK) {
- assert(Locked && "not locked");
- if (auto E = unlock())
- return E;
-
- return lock(LK);
- }
-
- Error unlock() {
- if (Locked) {
- Locked = std::nullopt;
- if (std::error_code EC = unlockFileThreadSafe(FD))
- return createFileError(Path, EC);
- }
- return Error::success();
- }
-
- // Return true if succeed to lock the file exclusively.
- bool tryLockExclusive() {
- assert(!Locked && "can only try to lock if not locked");
- if (tryLockFileThreadSafe(FD) == std::error_code()) {
- Locked = sys::fs::LockKind::Exclusive;
- return true;
- }
-
- return false;
- }
-
- // Release the lock so it will not be unlocked on destruction.
- void release() {
- Locked = std::nullopt;
- FD = -1;
- }
-};
-
-struct FileSizeInfo {
- uint64_t Size;
- uint64_t AllocatedSize;
-
- static ErrorOr<FileSizeInfo> get(sys::fs::file_t File);
-};
-} // end anonymous namespace
-
-Expected<MappedFileRegionArena> MappedFileRegionArena::create(
- const Twine &Path, uint64_t Capacity, uint64_t HeaderOffset,
- function_ref<Error(MappedFileRegionArena &)> NewFileConstructor) {
- uint64_t MinCapacity = HeaderOffset + sizeof(Header);
- if (Capacity < MinCapacity)
- return createStringError(
- std::make_error_code(std::errc::invalid_argument),
- "capacity is too small to hold MappedFileRegionArena");
-
- MappedFileRegionArena Result;
- Result.Path = Path.str();
-
- // Open the shared lock file. See file comment for details of locking scheme.
- SmallString<128> SharedFilePath(Result.Path);
- SharedFilePath.append(".shared");
-
- auto SharedFileLock = FileWithLock::open(SharedFilePath);
- if (!SharedFileLock)
- return SharedFileLock.takeError();
- Result.SharedLockFD = SharedFileLock->FD;
-
- // Take shared/reader lock that will be held until destroyImpl if construction
- // is successful.
- if (auto E = SharedFileLock->lock(sys::fs::LockKind::Shared))
- return std::move(E);
-
- // Take shared/reader lock for initialization.
- auto MainFile = FileWithLock::open(Result.Path);
- if (!MainFile)
- return MainFile.takeError();
- if (Error E = MainFile->lock(sys::fs::LockKind::Shared))
- return std::move(E);
- Result.FD = MainFile->FD;
-
- sys::fs::file_t File = sys::fs::convertFDToNativeFile(MainFile->FD);
- auto FileSize = FileSizeInfo::get(File);
- if (!FileSize)
- return createFileError(Result.Path, FileSize.getError());
-
- // If the size is smaller than the capacity, we need to initialize the file.
- // It maybe empty, or may have been shrunk during a previous close.
- if (FileSize->Size < Capacity) {
- // Lock the file exclusively so only one process will do the initialization.
- if (Error E = MainFile->switchLock(sys::fs::LockKind::Exclusive))
- return std::move(E);
- // Retrieve the current size now that we have exclusive access.
- FileSize = FileSizeInfo::get(File);
- if (!FileSize)
- return createFileError(Result.Path, FileSize.getError());
- }
-
- if (FileSize->Size >= MinCapacity) {
- // File is initialized. Read out the header to check for capacity and
- // offset.
- SmallVector<char, sizeof(Header)> HeaderContent(sizeof(Header));
- auto Size = sys::fs::readNativeFileSlice(File, HeaderContent, HeaderOffset);
- if (!Size)
- return Size.takeError();
-
- Header *H = reinterpret_cast<Header *>(HeaderContent.data());
- if (H->HeaderOffset != HeaderOffset)
- return createStringError(
- std::make_error_code(std::errc::invalid_argument),
- "specified header offset (" + utostr(HeaderOffset) +
- ") does not match existing config (" + utostr(H->HeaderOffset) +
- ")");
-
- // If the capacity doesn't match, use the existing capacity instead.
- if (H->Capacity != Capacity)
- Capacity = H->Capacity;
- }
-
- // If the size is smaller than capacity, we need to resize the file.
- if (FileSize->Size < Capacity) {
- assert(MainFile->Locked == sys::fs::LockKind::Exclusive);
- if (std::error_code EC =
- sys::fs::resize_file_sparse(MainFile->FD, Capacity))
- return createFileError(Result.Path, EC);
- }
-
- // Create the mapped region.
- {
- std::error_code EC;
- sys::fs::mapped_file_region Map(
- File, sys::fs::mapped_file_region::readwrite, Capacity, 0, EC);
- if (EC)
- return createFileError(Result.Path, EC);
- Result.Region = std::move(Map);
- }
-
- // Initialize the header.
- Result.initializeHeader(HeaderOffset);
- if (FileSize->Size < MinCapacity) {
- assert(MainFile->Locked == sys::fs::LockKind::Exclusive);
- // If we need to fully initialize the file, call NewFileConstructor.
- if (Error E = NewFileConstructor(Result))
- return std::move(E);
-
- Result.H->HeaderOffset.exchange(HeaderOffset);
- Result.H->Capacity.exchange(Capacity);
- }
-
- if (MainFile->Locked == sys::fs::LockKind::Exclusive) {
- // If holding an exclusive lock, we might have resized the file and
- // performed some read/write to the file. Query the file size again to make
- // sure everything is up-to-date. Otherwise, FileSize info is already
- // up-to-date.
- FileSize = FileSizeInfo::get(File);
- if (!FileSize)
- return createFileError(Result.Path, FileSize.getError());
- Result.H->AllocatedSize.exchange(FileSize->AllocatedSize);
- }
-
- // Release the shared lock so it can be closed in destoryImpl().
- SharedFileLock->release();
- return Result;
-}
-
-void MappedFileRegionArena::destroyImpl() {
- if (!FD)
- return;
-
- // Drop the shared lock indicating we are no longer accessing the file.
- if (SharedLockFD)
- (void)unlockFileThreadSafe(*SharedLockFD);
-
- // Attempt to truncate the file if we can get exclusive access. Ignore any
- // errors.
- if (H) {
- assert(SharedLockFD && "Must have shared lock file open");
- if (tryLockFileThreadSafe(*SharedLockFD) == std::error_code()) {
- size_t Size = size();
- // sync to file system to make sure all contents are up-to-date.
- (void)Region.sync();
- // unmap the file before resizing since that is the requirement for
- // some platforms.
- Region.unmap();
- (void)sys::fs::resize_file(*FD, Size);
- (void)unlockFileThreadSafe(*SharedLockFD);
- }
- }
-
- auto Close = [](std::optional<int> &FD) {
- if (FD) {
- sys::fs::file_t File = sys::fs::convertFDToNativeFile(*FD);
- sys::fs::closeFile(File);
- FD = std::nullopt;
- }
- };
-
- // Close the file and shared lock.
- Close(FD);
- Close(SharedLockFD);
-}
-
-void MappedFileRegionArena::initializeHeader(uint64_t HeaderOffset) {
- assert(capacity() < (uint64_t)INT64_MAX && "capacity must fit in int64_t");
- uint64_t HeaderEndOffset = HeaderOffset + sizeof(decltype(*H));
- assert(HeaderEndOffset <= capacity() &&
- "Expected end offset to be pre-allocated");
- assert(isAligned(Align::Of<decltype(*H)>(), HeaderOffset) &&
- "Expected end offset to be aligned");
- H = reinterpret_cast<decltype(H)>(data() + HeaderOffset);
-
- uint64_t ExistingValue = 0;
- if (!H->BumpPtr.compare_exchange_strong(ExistingValue, HeaderEndOffset))
- assert(ExistingValue >= HeaderEndOffset &&
- "Expected 0, or past the end of the header itself");
-}
-
-static Error createAllocatorOutOfSpaceError() {
- return createStringError(std::make_error_code(std::errc::not_enough_memory),
- "memory mapped file allocator is out of space");
-}
-
-Expected<int64_t> MappedFileRegionArena::allocateOffset(uint64_t AllocSize) {
- AllocSize = alignTo(AllocSize, getAlign());
- uint64_t OldEnd = H->BumpPtr.fetch_add(AllocSize);
- uint64_t NewEnd = OldEnd + AllocSize;
- if (LLVM_UNLIKELY(NewEnd > capacity())) {
- // Return the allocation. If the start already passed the end, that means
- // some other concurrent allocations already consumed all the capacity.
- // There is no need to return the original value. If the start was not
- // passed the end, current allocation certainly bumped it passed the end.
- // All other al...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/158694
More information about the llvm-commits
mailing list