[llvm] Re-land #161264: [CAS] Add OnDiskDataAllocator (PR #162112)
Steven Wu via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 6 09:30:57 PDT 2025
https://github.com/cachemeifyoucan created https://github.com/llvm/llvm-project/pull/162112
Fix the build configuration that has OnDiskCAS disabled.
>From fe479f35c2445d432fe843e2347fe3048bcf80a8 Mon Sep 17 00:00:00 2001
From: Steven Wu <stevenwu at apple.com>
Date: Mon, 6 Oct 2025 09:30:47 -0700
Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?=
=?UTF-8?q?l=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.7
---
llvm/include/llvm/CAS/OnDiskDataAllocator.h | 95 +++++++
llvm/lib/CAS/CMakeLists.txt | 1 +
llvm/lib/CAS/OnDiskDataAllocator.cpp | 234 ++++++++++++++++++
llvm/unittests/CAS/CMakeLists.txt | 1 +
.../unittests/CAS/OnDiskDataAllocatorTest.cpp | 66 +++++
5 files changed, 397 insertions(+)
create mode 100644 llvm/include/llvm/CAS/OnDiskDataAllocator.h
create mode 100644 llvm/lib/CAS/OnDiskDataAllocator.cpp
create mode 100644 llvm/unittests/CAS/OnDiskDataAllocatorTest.cpp
diff --git a/llvm/include/llvm/CAS/OnDiskDataAllocator.h b/llvm/include/llvm/CAS/OnDiskDataAllocator.h
new file mode 100644
index 0000000000000..2809df800621b
--- /dev/null
+++ b/llvm/include/llvm/CAS/OnDiskDataAllocator.h
@@ -0,0 +1,95 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This file declares interface for OnDiskDataAllocator, a file backed data
+/// pool can be used to allocate space to store data packed in a single file. It
+/// is based on MappedFileRegionArena and includes a header in the beginning to
+/// provide metadata.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CAS_ONDISKDATAALLOCATOR_H
+#define LLVM_CAS_ONDISKDATAALLOCATOR_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/CAS/FileOffset.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm::cas {
+
+/// Sink for data. Stores variable length data with 8-byte alignment. Does not
+/// track size of data, which is assumed to known from context, or embedded.
+/// Uses 0-padding but does not guarantee 0-termination.
+class OnDiskDataAllocator {
+public:
+ using ValueProxy = MutableArrayRef<char>;
+
+ /// A pointer to data stored on disk.
+ class OnDiskPtr {
+ public:
+ FileOffset getOffset() const { return Offset; }
+ explicit operator bool() const { return bool(getOffset()); }
+ const ValueProxy &operator*() const {
+ assert(Offset && "Null dereference");
+ return Value;
+ }
+ const ValueProxy *operator->() const {
+ assert(Offset && "Null dereference");
+ return &Value;
+ }
+
+ OnDiskPtr() = default;
+
+ private:
+ friend class OnDiskDataAllocator;
+ OnDiskPtr(FileOffset Offset, ValueProxy Value)
+ : Offset(Offset), Value(Value) {}
+ FileOffset Offset;
+ ValueProxy Value;
+ };
+
+ /// Get the data of \p Size stored at the given \p Offset. Note the allocator
+ /// doesn't keep track of the allocation size, thus \p Size doesn't need to
+ /// match the size of allocation but needs to be smaller.
+ Expected<ArrayRef<char>> get(FileOffset Offset, size_t Size) const;
+
+ /// Allocate at least \p Size with 8-byte alignment.
+ Expected<OnDiskPtr> allocate(size_t Size);
+
+ /// \returns the buffer that was allocated at \p create time, with size
+ /// \p UserHeaderSize.
+ MutableArrayRef<uint8_t> getUserHeader();
+
+ size_t size() const;
+ size_t capacity() const;
+
+ static Expected<OnDiskDataAllocator>
+ create(const Twine &Path, const Twine &TableName, uint64_t MaxFileSize,
+ std::optional<uint64_t> NewFileInitialSize,
+ uint32_t UserHeaderSize = 0,
+ function_ref<void(void *)> UserHeaderInit = nullptr);
+
+ OnDiskDataAllocator(OnDiskDataAllocator &&RHS);
+ OnDiskDataAllocator &operator=(OnDiskDataAllocator &&RHS);
+
+ // No copy. Just call \a create() again.
+ OnDiskDataAllocator(const OnDiskDataAllocator &) = delete;
+ OnDiskDataAllocator &operator=(const OnDiskDataAllocator &) = delete;
+
+ ~OnDiskDataAllocator();
+
+private:
+ struct ImplType;
+ explicit OnDiskDataAllocator(std::unique_ptr<ImplType> Impl);
+ std::unique_ptr<ImplType> Impl;
+};
+
+} // namespace llvm::cas
+
+#endif // LLVM_CAS_ONDISKDATAALLOCATOR_H
diff --git a/llvm/lib/CAS/CMakeLists.txt b/llvm/lib/CAS/CMakeLists.txt
index 7ae5f7e46418e..bca39b645af45 100644
--- a/llvm/lib/CAS/CMakeLists.txt
+++ b/llvm/lib/CAS/CMakeLists.txt
@@ -7,6 +7,7 @@ add_llvm_component_library(LLVMCAS
MappedFileRegionArena.cpp
ObjectStore.cpp
OnDiskCommon.cpp
+ OnDiskDataAllocator.cpp
OnDiskTrieRawHashMap.cpp
ADDITIONAL_HEADER_DIRS
diff --git a/llvm/lib/CAS/OnDiskDataAllocator.cpp b/llvm/lib/CAS/OnDiskDataAllocator.cpp
new file mode 100644
index 0000000000000..13bbd66139178
--- /dev/null
+++ b/llvm/lib/CAS/OnDiskDataAllocator.cpp
@@ -0,0 +1,234 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file Implements OnDiskDataAllocator.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CAS/OnDiskDataAllocator.h"
+#include "DatabaseFile.h"
+#include "llvm/Config/llvm-config.h"
+
+using namespace llvm;
+using namespace llvm::cas;
+using namespace llvm::cas::ondisk;
+
+#if LLVM_ENABLE_ONDISK_CAS
+
+//===----------------------------------------------------------------------===//
+// DataAllocator data structures.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// DataAllocator table layout:
+/// - [8-bytes: Generic table header]
+/// - 8-bytes: AllocatorOffset (reserved for implementing free lists)
+/// - 8-bytes: Size for user data header
+/// - <user data buffer>
+///
+/// Record layout:
+/// - <data>
+class DataAllocatorHandle {
+public:
+ static constexpr TableHandle::TableKind Kind =
+ TableHandle::TableKind::DataAllocator;
+
+ struct Header {
+ TableHandle::Header GenericHeader;
+ std::atomic<int64_t> AllocatorOffset;
+ const uint64_t UserHeaderSize;
+ };
+
+ operator TableHandle() const {
+ if (!H)
+ return TableHandle();
+ return TableHandle(*Region, H->GenericHeader);
+ }
+
+ Expected<MutableArrayRef<char>> allocate(MappedFileRegionArena &Alloc,
+ size_t DataSize) {
+ assert(&Alloc.getRegion() == Region);
+ auto Ptr = Alloc.allocate(DataSize);
+ if (LLVM_UNLIKELY(!Ptr))
+ return Ptr.takeError();
+ return MutableArrayRef(*Ptr, DataSize);
+ }
+
+ explicit operator bool() const { return H; }
+ const Header &getHeader() const { return *H; }
+ MappedFileRegion &getRegion() const { return *Region; }
+
+ MutableArrayRef<uint8_t> getUserHeader() {
+ return MutableArrayRef(reinterpret_cast<uint8_t *>(H + 1),
+ H->UserHeaderSize);
+ }
+
+ static Expected<DataAllocatorHandle>
+ create(MappedFileRegionArena &Alloc, StringRef Name, uint32_t UserHeaderSize);
+
+ DataAllocatorHandle() = default;
+ DataAllocatorHandle(MappedFileRegion &Region, Header &H)
+ : Region(&Region), H(&H) {}
+ DataAllocatorHandle(MappedFileRegion &Region, intptr_t HeaderOffset)
+ : DataAllocatorHandle(
+ Region, *reinterpret_cast<Header *>(Region.data() + HeaderOffset)) {
+ }
+
+private:
+ MappedFileRegion *Region = nullptr;
+ Header *H = nullptr;
+};
+
+} // end anonymous namespace
+
+struct OnDiskDataAllocator::ImplType {
+ DatabaseFile File;
+ DataAllocatorHandle Store;
+};
+
+Expected<DataAllocatorHandle>
+DataAllocatorHandle::create(MappedFileRegionArena &Alloc, StringRef Name,
+ uint32_t UserHeaderSize) {
+ // Allocate.
+ auto Offset =
+ Alloc.allocateOffset(sizeof(Header) + UserHeaderSize + Name.size() + 1);
+ if (LLVM_UNLIKELY(!Offset))
+ return Offset.takeError();
+
+ // Construct the header and the name.
+ assert(Name.size() <= UINT16_MAX && "Expected smaller table name");
+ auto *H = new (Alloc.getRegion().data() + *Offset)
+ Header{{TableHandle::TableKind::DataAllocator,
+ static_cast<uint16_t>(Name.size()),
+ static_cast<int32_t>(sizeof(Header) + UserHeaderSize)},
+ /*AllocatorOffset=*/{0},
+ /*UserHeaderSize=*/UserHeaderSize};
+ // Memset UserHeader.
+ char *UserHeader = reinterpret_cast<char *>(H + 1);
+ memset(UserHeader, 0, UserHeaderSize);
+ // Write database file name (null-terminated).
+ char *NameStorage = UserHeader + UserHeaderSize;
+ llvm::copy(Name, NameStorage);
+ NameStorage[Name.size()] = 0;
+ return DataAllocatorHandle(Alloc.getRegion(), *H);
+}
+
+Expected<OnDiskDataAllocator> OnDiskDataAllocator::create(
+ const Twine &PathTwine, const Twine &TableNameTwine, uint64_t MaxFileSize,
+ std::optional<uint64_t> NewFileInitialSize, uint32_t UserHeaderSize,
+ function_ref<void(void *)> UserHeaderInit) {
+ assert(!UserHeaderSize || UserHeaderInit);
+ SmallString<128> PathStorage;
+ StringRef Path = PathTwine.toStringRef(PathStorage);
+ SmallString<128> TableNameStorage;
+ StringRef TableName = TableNameTwine.toStringRef(TableNameStorage);
+
+ // Constructor for if the file doesn't exist.
+ auto NewDBConstructor = [&](DatabaseFile &DB) -> Error {
+ auto Store =
+ DataAllocatorHandle::create(DB.getAlloc(), TableName, UserHeaderSize);
+ if (LLVM_UNLIKELY(!Store))
+ return Store.takeError();
+
+ if (auto E = DB.addTable(*Store))
+ return E;
+
+ if (UserHeaderSize)
+ UserHeaderInit(Store->getUserHeader().data());
+ return Error::success();
+ };
+
+ // Get or create the file.
+ Expected<DatabaseFile> File =
+ DatabaseFile::create(Path, MaxFileSize, NewDBConstructor);
+ if (!File)
+ return File.takeError();
+
+ // Find the table and validate it.
+ std::optional<TableHandle> Table = File->findTable(TableName);
+ if (!Table)
+ return createTableConfigError(std::errc::argument_out_of_domain, Path,
+ TableName, "table not found");
+ if (Error E = checkTable("table kind", (size_t)DataAllocatorHandle::Kind,
+ (size_t)Table->getHeader().Kind, Path, TableName))
+ return std::move(E);
+ auto Store = Table->cast<DataAllocatorHandle>();
+ assert(Store && "Already checked the kind");
+
+ // Success.
+ OnDiskDataAllocator::ImplType Impl{DatabaseFile(std::move(*File)), Store};
+ return OnDiskDataAllocator(std::make_unique<ImplType>(std::move(Impl)));
+}
+
+Expected<OnDiskDataAllocator::OnDiskPtr>
+OnDiskDataAllocator::allocate(size_t Size) {
+ auto Data = Impl->Store.allocate(Impl->File.getAlloc(), Size);
+ if (LLVM_UNLIKELY(!Data))
+ return Data.takeError();
+
+ return OnDiskPtr(FileOffset(Data->data() - Impl->Store.getRegion().data()),
+ *Data);
+}
+
+Expected<ArrayRef<char>> OnDiskDataAllocator::get(FileOffset Offset,
+ size_t Size) const {
+ assert(Offset);
+ assert(Impl);
+ if (Offset.get() + Size >= Impl->File.getAlloc().size())
+ return createStringError(make_error_code(std::errc::protocol_error),
+ "requested size too large in allocator");
+ return ArrayRef<char>{Impl->File.getRegion().data() + Offset.get(), Size};
+}
+
+MutableArrayRef<uint8_t> OnDiskDataAllocator::getUserHeader() {
+ return Impl->Store.getUserHeader();
+}
+
+size_t OnDiskDataAllocator::size() const { return Impl->File.size(); }
+size_t OnDiskDataAllocator::capacity() const {
+ return Impl->File.getRegion().size();
+}
+
+OnDiskDataAllocator::OnDiskDataAllocator(std::unique_ptr<ImplType> Impl)
+ : Impl(std::move(Impl)) {}
+
+#else // !LLVM_ENABLE_ONDISK_CAS
+
+struct OnDiskDataAllocator::ImplType {};
+
+Expected<OnDiskDataAllocator> OnDiskDataAllocator::create(
+ const Twine &Path, const Twine &TableName, uint64_t MaxFileSize,
+ std::optional<uint64_t> NewFileInitialSize, uint32_t UserHeaderSize,
+ function_ref<void(void *)> UserHeaderInit) {
+ return createStringError(make_error_code(std::errc::not_supported),
+ "OnDiskDataAllocator is not supported");
+}
+
+Expected<OnDiskDataAllocator::OnDiskPtr>
+OnDiskDataAllocator::allocate(size_t Size) {
+ return createStringError(make_error_code(std::errc::not_supported),
+ "OnDiskDataAllocator is not supported");
+}
+
+Expected<ArrayRef<char>> OnDiskDataAllocator::get(FileOffset Offset,
+ size_t Size) const {
+ return createStringError(make_error_code(std::errc::not_supported),
+ "OnDiskDataAllocator is not supported");
+}
+
+MutableArrayRef<uint8_t> OnDiskDataAllocator::getUserHeader() { return {}; }
+
+size_t OnDiskDataAllocator::size() const { return 0; }
+size_t OnDiskDataAllocator::capacity() const { return 0; }
+
+#endif // LLVM_ENABLE_ONDISK_CAS
+
+OnDiskDataAllocator::OnDiskDataAllocator(OnDiskDataAllocator &&RHS) = default;
+OnDiskDataAllocator &
+OnDiskDataAllocator::operator=(OnDiskDataAllocator &&RHS) = default;
+OnDiskDataAllocator::~OnDiskDataAllocator() = default;
diff --git a/llvm/unittests/CAS/CMakeLists.txt b/llvm/unittests/CAS/CMakeLists.txt
index 0f8fcb9e98954..ee40e6c9879a1 100644
--- a/llvm/unittests/CAS/CMakeLists.txt
+++ b/llvm/unittests/CAS/CMakeLists.txt
@@ -8,6 +8,7 @@ add_llvm_unittest(CASTests
ActionCacheTest.cpp
CASTestConfig.cpp
ObjectStoreTest.cpp
+ OnDiskDataAllocatorTest.cpp
OnDiskTrieRawHashMapTest.cpp
ProgramTest.cpp
)
diff --git a/llvm/unittests/CAS/OnDiskDataAllocatorTest.cpp b/llvm/unittests/CAS/OnDiskDataAllocatorTest.cpp
new file mode 100644
index 0000000000000..966fa03076841
--- /dev/null
+++ b/llvm/unittests/CAS/OnDiskDataAllocatorTest.cpp
@@ -0,0 +1,66 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CAS/OnDiskDataAllocator.h"
+#include "llvm/CAS/MappedFileRegionArena.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/Alignment.h"
+#include "llvm/Testing/Support/Error.h"
+#include "llvm/Testing/Support/SupportHelpers.h"
+
+#if LLVM_ENABLE_ONDISK_CAS
+
+using namespace llvm;
+using namespace llvm::cas;
+
+TEST(OnDiskDataAllocatorTest, Allocate) {
+ unittest::TempDir Temp("data-allocator", /*Unique=*/true);
+ constexpr size_t MB = 1024u * 1024u;
+
+ std::optional<OnDiskDataAllocator> Allocator;
+ ASSERT_THAT_ERROR(OnDiskDataAllocator::create(
+ Temp.path("allocator"), "data", /*MaxFileSize=*/MB,
+ /*NewFileInitialSize=*/std::nullopt)
+ .moveInto(Allocator),
+ Succeeded());
+
+ // Allocate.
+ {
+ for (size_t Size = 1; Size < 16; ++Size) {
+ OnDiskDataAllocator::OnDiskPtr P;
+ ASSERT_THAT_ERROR(Allocator->allocate(Size).moveInto(P), Succeeded());
+ EXPECT_TRUE(
+ isAligned(MappedFileRegionArena::getAlign(), P.getOffset().get()));
+ }
+ }
+
+ // Out of space.
+ {
+ OnDiskDataAllocator::OnDiskPtr P;
+ ASSERT_THAT_ERROR(Allocator->allocate(MB).moveInto(P), Failed());
+ }
+
+ // Check size and capacity.
+ {
+ ASSERT_EQ(Allocator->capacity(), MB);
+ ASSERT_LE(Allocator->size(), MB);
+ }
+
+ // Get.
+ {
+ OnDiskDataAllocator::OnDiskPtr P;
+ ASSERT_THAT_ERROR(Allocator->allocate(32).moveInto(P), Succeeded());
+ ArrayRef<char> Data;
+ ASSERT_THAT_ERROR(Allocator->get(P.getOffset(), 16).moveInto(Data),
+ Succeeded());
+ ASSERT_THAT_ERROR(Allocator->get(P.getOffset(), 1025).moveInto(Data),
+ Failed());
+ }
+}
+
+#endif // LLVM_ENABLE_ONDISK_CAS
More information about the llvm-commits
mailing list