[llvm] [CAS] Add OnDiskCAS (PR #114103)

Steven Wu via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 22 12:51:46 PDT 2025


https://github.com/cachemeifyoucan updated https://github.com/llvm/llvm-project/pull/114103

>From 4b951ef459415adbab70b1b685590d60a8dfd29e Mon Sep 17 00:00:00 2001
From: Steven Wu <stevenwu at apple.com>
Date: Mon, 20 Oct 2025 14:41:11 -0700
Subject: [PATCH] [CAS] Add OnDiskCAS

Add OnDiskCAS abstraction, that implements ObjectStore and ActionCache
interface using OnDiskGraphDB and OnDiskKeyValueDB.

Reviewers:

Pull Request: https://github.com/llvm/llvm-project/pull/114103
---
 llvm/include/llvm/CAS/ActionCache.h           |  10 +
 .../llvm/CAS/BuiltinUnifiedCASDatabases.h     |  59 ++
 llvm/include/llvm/CAS/ObjectStore.h           |  49 +-
 llvm/include/llvm/CAS/UnifiedOnDiskCache.h    | 191 +++++
 llvm/lib/CAS/ActionCaches.cpp                 | 156 +++++
 llvm/lib/CAS/BuiltinCAS.cpp                   |  14 +-
 llvm/lib/CAS/BuiltinCAS.h                     |  25 +-
 llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp   |  38 +
 llvm/lib/CAS/CMakeLists.txt                   |   3 +
 llvm/lib/CAS/InMemoryCAS.cpp                  |   8 +
 llvm/lib/CAS/ObjectStore.cpp                  |  93 ++-
 llvm/lib/CAS/OnDiskCAS.cpp                    | 228 ++++++
 llvm/lib/CAS/UnifiedOnDiskCache.cpp           | 655 ++++++++++++++++++
 llvm/unittests/CAS/ActionCacheTest.cpp        |   6 +-
 .../CAS/BuiltinUnifiedCASDatabasesTest.cpp    |  67 ++
 llvm/unittests/CAS/CASTestConfig.cpp          |  23 +-
 llvm/unittests/CAS/CASTestConfig.h            |  44 +-
 llvm/unittests/CAS/CMakeLists.txt             |   2 +
 llvm/unittests/CAS/ObjectStoreTest.cpp        | 134 +++-
 llvm/unittests/CAS/UnifiedOnDiskCacheTest.cpp | 191 +++++
 20 files changed, 1967 insertions(+), 29 deletions(-)
 create mode 100644 llvm/include/llvm/CAS/BuiltinUnifiedCASDatabases.h
 create mode 100644 llvm/include/llvm/CAS/UnifiedOnDiskCache.h
 create mode 100644 llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp
 create mode 100644 llvm/lib/CAS/OnDiskCAS.cpp
 create mode 100644 llvm/lib/CAS/UnifiedOnDiskCache.cpp
 create mode 100644 llvm/unittests/CAS/BuiltinUnifiedCASDatabasesTest.cpp
 create mode 100644 llvm/unittests/CAS/UnifiedOnDiskCacheTest.cpp

diff --git a/llvm/include/llvm/CAS/ActionCache.h b/llvm/include/llvm/CAS/ActionCache.h
index 69ee4dde1974a..781ad81001368 100644
--- a/llvm/include/llvm/CAS/ActionCache.h
+++ b/llvm/include/llvm/CAS/ActionCache.h
@@ -75,6 +75,9 @@ class ActionCache {
                    CanBeDistributed);
   }
 
+  /// Validate the ActionCache contents.
+  virtual Error validate() const = 0;
+
   virtual ~ActionCache() = default;
 
 protected:
@@ -97,6 +100,13 @@ class ActionCache {
 /// Create an action cache in memory.
 std::unique_ptr<ActionCache> createInMemoryActionCache();
 
+/// Get a reasonable default on-disk path for a persistent ActionCache for the
+/// current user.
+std::string getDefaultOnDiskActionCachePath();
+
+/// Create an action cache on disk.
+Expected<std::unique_ptr<ActionCache>> createOnDiskActionCache(StringRef Path);
+
 } // end namespace llvm::cas
 
 #endif // LLVM_CAS_ACTIONCACHE_H
diff --git a/llvm/include/llvm/CAS/BuiltinUnifiedCASDatabases.h b/llvm/include/llvm/CAS/BuiltinUnifiedCASDatabases.h
new file mode 100644
index 0000000000000..6c31a82ff9db0
--- /dev/null
+++ b/llvm/include/llvm/CAS/BuiltinUnifiedCASDatabases.h
@@ -0,0 +1,59 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CAS_BUILTINUNIFIEDCASDATABASES_H
+#define LLVM_CAS_BUILTINUNIFIEDCASDATABASES_H
+
+#include "llvm/Support/Error.h"
+
+namespace llvm::cas {
+
+class ActionCache;
+class ObjectStore;
+
+/// Create on-disk \c ObjectStore and \c ActionCache instances based on
+/// \c ondisk::UnifiedOnDiskCache, with built-in hashing.
+Expected<std::pair<std::unique_ptr<ObjectStore>, std::unique_ptr<ActionCache>>>
+createOnDiskUnifiedCASDatabases(StringRef Path);
+
+/// Represents the result of validating the contents using
+/// \c validateOnDiskUnifiedCASDatabasesIfNeeded.
+///
+/// Note: invalid results are handled as an \c Error.
+enum class ValidationResult {
+  /// The data is already valid.
+  Valid,
+  /// The data was invalid, but was recovered.
+  Recovered,
+  /// Validation was skipped, as it was not needed.
+  Skipped,
+};
+
+/// Validate the data in \p Path, if needed to ensure correctness.
+///
+/// \param Path directory for the on-disk database.
+/// \param CheckHash Whether to validate hashes match the data.
+/// \param AllowRecovery Whether to automatically recover from invalid data by
+/// marking the files for garbage collection.
+/// \param ForceValidation Whether to force validation to occur even if it
+/// should not be necessary.
+/// \param LLVMCasBinary If provided, validation is performed out-of-process
+/// using the given \c llvm-cas executable which protects against crashes
+/// during validation. Otherwise validation is performed in-process.
+///
+/// \returns \c Valid if the data is already valid, \c Recovered if data
+/// was invalid but has been cleared, \c Skipped if validation is not needed,
+/// or an \c Error if validation cannot be performed or if the data is left
+/// in an invalid state because \p AllowRecovery is false.
+Expected<ValidationResult> validateOnDiskUnifiedCASDatabasesIfNeeded(
+    StringRef Path, bool CheckHash, bool AllowRecovery, bool ForceValidation,
+    std::optional<StringRef> LLVMCasBinary);
+
+} // namespace llvm::cas
+
+#endif // LLVM_CAS_BUILTINUNIFIEDCASDATABASES_H
diff --git a/llvm/include/llvm/CAS/ObjectStore.h b/llvm/include/llvm/CAS/ObjectStore.h
index 6db5dd3904095..febfff815e86e 100644
--- a/llvm/include/llvm/CAS/ObjectStore.h
+++ b/llvm/include/llvm/CAS/ObjectStore.h
@@ -111,7 +111,10 @@ class ObjectStore {
   virtual Expected<bool> isMaterialized(ObjectRef Ref) const = 0;
 
   /// Validate the underlying object referred by CASID.
-  virtual Error validate(const CASID &ID) = 0;
+  virtual Error validateObject(const CASID &ID) = 0;
+
+  /// Validate the entire ObjectStore.
+  virtual Error validate(bool CheckHash) const = 0;
 
 protected:
   /// Load the object referenced by \p Ref.
@@ -215,9 +218,39 @@ class ObjectStore {
     return Data.size();
   }
 
+  /// Set the size for limiting growth of on-disk storage. This has an effect
+  /// for when the instance is closed.
+  ///
+  /// Implementations may be not have this implemented.
+  virtual Error setSizeLimit(std::optional<uint64_t> SizeLimit) {
+    return Error::success();
+  }
+
+  /// \returns the storage size of the on-disk CAS data.
+  ///
+  /// Implementations that don't have an implementation for this should return
+  /// \p std::nullopt.
+  virtual Expected<std::optional<uint64_t>> getStorageSize() const {
+    return std::nullopt;
+  }
+
+  /// Prune local storage to reduce its size according to the desired size
+  /// limit. Pruning can happen concurrently with other operations.
+  ///
+  /// Implementations may be not have this implemented.
+  virtual Error pruneStorageData() { return Error::success(); }
+
   /// Validate the whole node tree.
   Error validateTree(ObjectRef Ref);
 
+  /// Import object from another CAS. This will import the full tree from the
+  /// other CAS.
+  Expected<ObjectRef> importObject(ObjectStore &Upstream, ObjectRef Other);
+
+  /// Print the ObjectStore internals for debugging purpose.
+  virtual void print(raw_ostream &) const {}
+  void dump() const;
+
   /// Get CASContext
   const CASContext &getContext() const { return Context; }
 
@@ -292,6 +325,20 @@ class ObjectProxy {
 
 std::unique_ptr<ObjectStore> createInMemoryCAS();
 
+/// \returns true if \c LLVM_ENABLE_ONDISK_CAS configuration was enabled.
+bool isOnDiskCASEnabled();
+
+/// Gets or creates a persistent on-disk path at \p Path.
+Expected<std::unique_ptr<ObjectStore>> createOnDiskCAS(const Twine &Path);
+
+/// Set \p Path to a reasonable default on-disk path for a persistent CAS for
+/// the current user.
+Error getDefaultOnDiskCASPath(SmallVectorImpl<char> &Path);
+
+/// Get a reasonable default on-disk path for a persistent CAS for the current
+/// user.
+llvm::Expected<std::string> getDefaultOnDiskCASPath();
+
 } // namespace cas
 } // namespace llvm
 
diff --git a/llvm/include/llvm/CAS/UnifiedOnDiskCache.h b/llvm/include/llvm/CAS/UnifiedOnDiskCache.h
new file mode 100644
index 0000000000000..fa9d2fcfdb4e4
--- /dev/null
+++ b/llvm/include/llvm/CAS/UnifiedOnDiskCache.h
@@ -0,0 +1,191 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CAS_UNIFIEDONDISKCACHE_H
+#define LLVM_CAS_UNIFIEDONDISKCACHE_H
+
+#include "llvm/CAS/BuiltinUnifiedCASDatabases.h"
+#include "llvm/CAS/OnDiskGraphDB.h"
+#include <atomic>
+
+namespace llvm::cas::ondisk {
+
+class OnDiskKeyValueDB;
+
+/// A unified CAS nodes and key-value database, using on-disk storage for both.
+/// It manages storage growth and provides APIs for garbage collection.
+///
+/// High-level properties:
+/// * While \p UnifiedOnDiskCache is open on a directory, by any process, the
+///   storage size in that directory will keep growing unrestricted. For data to
+///   become eligible for garbage-collection there should be no open instances
+///   of \p UnifiedOnDiskCache for that directory, by any process.
+/// * Garbage-collection needs to be triggered explicitly by the client. It can
+///   be triggered on a directory concurrently, at any time and by any process,
+///   without affecting any active readers/writers, in the same process or other
+///   processes.
+///
+/// Usage patterns should be that an instance of \p UnifiedOnDiskCache is open
+/// for a limited period of time, e.g. for the duration of a build operation.
+/// For long-living processes that need periodic access to a
+/// \p UnifiedOnDiskCache, the client should device a scheme where access is
+/// performed within some defined period. For example, if a service is designed
+/// to continuously wait for requests that access a \p UnifiedOnDiskCache, it
+/// could keep the instance alive while new requests are coming in but close it
+/// after a time period in which there are no new requests.
+class UnifiedOnDiskCache {
+public:
+  /// The \p OnDiskGraphDB instance for the open directory.
+  OnDiskGraphDB &getGraphDB() { return *PrimaryGraphDB; }
+
+  /// Associate an \p ObjectID, of the \p OnDiskGraphDB instance, with a key.
+  ///
+  /// \param Key the hash bytes for the key.
+  /// \param Value the \p ObjectID value.
+  ///
+  /// \returns the \p ObjectID associated with the \p Key. It may be different
+  /// than \p Value if another value was already associated with this key.
+  Expected<ObjectID> KVPut(ArrayRef<uint8_t> Key, ObjectID Value);
+
+  /// Associate an \p ObjectID, of the \p OnDiskGraphDB instance, with a key.
+  /// An \p ObjectID as a key is equivalent to its digest bytes.
+  ///
+  /// \param Key the \p ObjectID for the key.
+  /// \param Value the \p ObjectID value.
+  ///
+  /// \returns the \p ObjectID associated with the \p Key. It may be different
+  /// than \p Value if another value was already associated with this key.
+  Expected<ObjectID> KVPut(ObjectID Key, ObjectID Value);
+
+  /// \returns the \p ObjectID, of the \p OnDiskGraphDB instance, associated
+  /// with the \p Key, or \p std::nullopt if the key does not exist.
+  Expected<std::optional<ObjectID>> KVGet(ArrayRef<uint8_t> Key);
+
+  /// Open a \p UnifiedOnDiskCache instance for a directory.
+  ///
+  /// \param Path directory for the on-disk database. The directory will be
+  /// created if it doesn't exist.
+  /// \param SizeLimit Optional size for limiting growth. This has an effect for
+  /// when the instance is closed.
+  /// \param HashName Identifier name for the hashing algorithm that is going to
+  /// be used.
+  /// \param HashByteSize Size for the object digest hash bytes.
+  /// \param FaultInPolicy Controls how nodes are copied to primary store. This
+  /// is recorded at creation time and subsequent opens need to pass the same
+  /// policy otherwise the \p open will fail.
+  static Expected<std::unique_ptr<UnifiedOnDiskCache>>
+  open(StringRef Path, std::optional<uint64_t> SizeLimit, StringRef HashName,
+       unsigned HashByteSize,
+       OnDiskGraphDB::FaultInPolicy FaultInPolicy =
+           OnDiskGraphDB::FaultInPolicy::FullTree);
+
+  /// Validate the data in \p Path, if needed to ensure correctness.
+  ///
+  /// Note: if invalid data is detected and \p AllowRecovery is true, then
+  /// recovery requires exclusive access to the CAS and it is an error to
+  /// attempt recovery if there is concurrent use of the CAS.
+  ///
+  /// \param Path directory for the on-disk database.
+  /// \param HashName Identifier name for the hashing algorithm that is going to
+  /// be used.
+  /// \param HashByteSize Size for the object digest hash bytes.
+  /// \param CheckHash Whether to validate hashes match the data.
+  /// \param AllowRecovery Whether to automatically recover from invalid data by
+  /// marking the files for garbage collection.
+  /// \param ForceValidation Whether to force validation to occur even if it
+  /// should not be necessary.
+  /// \param LLVMCasBinary If provided, validation is performed out-of-process
+  /// using the given \c llvm-cas executable which protects against crashes
+  /// during validation. Otherwise validation is performed in-process.
+  ///
+  /// \returns \c Valid if the data is already valid, \c Recovered if data
+  /// was invalid but has been cleared, \c Skipped if validation is not needed,
+  /// or an \c Error if validation cannot be performed or if the data is left
+  /// in an invalid state because \p AllowRecovery is false.
+  static Expected<ValidationResult>
+  validateIfNeeded(StringRef Path, StringRef HashName, unsigned HashByteSize,
+                   bool CheckHash, bool AllowRecovery, bool ForceValidation,
+                   std::optional<StringRef> LLVMCasBinary);
+
+  /// This is called implicitly at destruction time, so it is not required for a
+  /// client to call this. After calling \p close the only method that is valid
+  /// to call is \p needsGarbageCollection.
+  ///
+  /// \param CheckSizeLimit if true it will check whether the primary store has
+  /// exceeded its intended size limit. If false the check is skipped even if a
+  /// \p SizeLimit was passed to the \p open call.
+  Error close(bool CheckSizeLimit = true);
+
+  /// Set the size for limiting growth. This has an effect for when the instance
+  /// is closed.
+  void setSizeLimit(std::optional<uint64_t> SizeLimit);
+
+  /// \returns the storage size of the cache data.
+  uint64_t getStorageSize() const;
+
+  /// \returns whether the primary store has exceeded the intended size limit.
+  /// This can return false even if the overall size of the opened directory is
+  /// over the \p SizeLimit passed to \p open. To know whether garbage
+  /// collection needs to be triggered or not, call \p needsGarbaseCollection.
+  bool hasExceededSizeLimit() const;
+
+  /// \returns whether there are unused data that can be deleted using a
+  /// \p collectGarbage call.
+  bool needsGarbageCollection() const { return NeedsGarbageCollection; }
+
+  /// Remove any unused data from the directory at \p Path. If there are no such
+  /// data the operation is a no-op.
+  ///
+  /// This can be called concurrently, regardless of whether there is an open
+  /// \p UnifiedOnDiskCache instance or not; it has no effect on readers/writers
+  /// in the same process or other processes.
+  ///
+  /// It is recommended that garbage-collection is triggered concurrently in the
+  /// background, so that it has minimal effect on the workload of the process.
+  static Error collectGarbage(StringRef Path);
+
+  /// Remove unused data from the current UnifiedOnDiskCache.
+  Error collectGarbage();
+
+  /// Validate the key value databases.
+  Error validateActionCache();
+
+  /// Get the upstream OnDiskGraphDB if exists.
+  ///
+  /// \returns upstream database or nullptr if upstream database doesn't exist.
+  OnDiskGraphDB *getUpstreamGraphDB() const { return UpstreamGraphDB; }
+
+  ~UnifiedOnDiskCache();
+
+private:
+  UnifiedOnDiskCache();
+
+  Expected<std::optional<ObjectID>>
+  faultInFromUpstreamKV(ArrayRef<uint8_t> Key);
+
+  /// \returns the storage size of the primary directory.
+  uint64_t getPrimaryStorageSize() const;
+
+  std::string RootPath;
+  std::atomic<uint64_t> SizeLimit;
+
+  int LockFD = -1;
+
+  std::atomic<bool> NeedsGarbageCollection;
+  std::string PrimaryDBDir;
+
+  OnDiskGraphDB *UpstreamGraphDB = nullptr;
+  std::unique_ptr<OnDiskGraphDB> PrimaryGraphDB;
+
+  std::unique_ptr<OnDiskKeyValueDB> UpstreamKVDB;
+  std::unique_ptr<OnDiskKeyValueDB> PrimaryKVDB;
+};
+
+} // namespace llvm::cas::ondisk
+
+#endif // LLVM_CAS_UNIFIEDONDISKCACHE_H
diff --git a/llvm/lib/CAS/ActionCaches.cpp b/llvm/lib/CAS/ActionCaches.cpp
index 571c5b3ca5b4b..dac50e0740c5c 100644
--- a/llvm/lib/CAS/ActionCaches.cpp
+++ b/llvm/lib/CAS/ActionCaches.cpp
@@ -13,7 +13,11 @@
 #include "BuiltinCAS.h"
 #include "llvm/ADT/TrieRawHashMap.h"
 #include "llvm/CAS/ActionCache.h"
+#include "llvm/CAS/OnDiskKeyValueDB.h"
+#include "llvm/CAS/UnifiedOnDiskCache.h"
+#include "llvm/Config/llvm-config.h"
 #include "llvm/Support/BLAKE3.h"
+#include "llvm/Support/Path.h"
 
 #define DEBUG_TYPE "cas-action-caches"
 
@@ -47,12 +51,51 @@ class InMemoryActionCache final : public ActionCache {
   Expected<std::optional<CASID>> getImpl(ArrayRef<uint8_t> ActionKey,
                                          bool CanBeDistributed) const final;
 
+  Error validate() const final {
+    return createStringError("InMemoryActionCache doesn't support validate()");
+  }
+
 private:
   using DataT = CacheEntry<sizeof(HashType)>;
   using InMemoryCacheT = ThreadSafeTrieRawHashMap<DataT, sizeof(HashType)>;
 
   InMemoryCacheT Cache;
 };
+
+class OnDiskActionCache final : public ActionCache {
+public:
+  Error putImpl(ArrayRef<uint8_t> ActionKey, const CASID &Result,
+                bool CanBeDistributed) final;
+  Expected<std::optional<CASID>> getImpl(ArrayRef<uint8_t> ActionKey,
+                                         bool CanBeDistributed) const final;
+
+  static Expected<std::unique_ptr<OnDiskActionCache>> create(StringRef Path);
+
+  Error validate() const final;
+
+private:
+  static StringRef getHashName() { return "BLAKE3"; }
+
+  OnDiskActionCache(std::unique_ptr<ondisk::OnDiskKeyValueDB> DB);
+
+  std::unique_ptr<ondisk::OnDiskKeyValueDB> DB;
+  using DataT = CacheEntry<sizeof(HashType)>;
+};
+
+class UnifiedOnDiskActionCache final : public ActionCache {
+public:
+  Error putImpl(ArrayRef<uint8_t> ActionKey, const CASID &Result,
+                bool CanBeDistributed) final;
+  Expected<std::optional<CASID>> getImpl(ArrayRef<uint8_t> ActionKey,
+                                         bool CanBeDistributed) const final;
+
+  UnifiedOnDiskActionCache(std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB);
+
+  Error validate() const final;
+
+private:
+  std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB;
+};
 } // end namespace
 
 static Error createResultCachePoisonedError(ArrayRef<uint8_t> KeyHash,
@@ -92,10 +135,123 @@ Error InMemoryActionCache::putImpl(ArrayRef<uint8_t> Key, const CASID &Result,
                                         Observed.getValue());
 }
 
+static constexpr StringLiteral DefaultName = "actioncache";
+
 namespace llvm::cas {
 
+std::string getDefaultOnDiskActionCachePath() {
+  SmallString<128> Path;
+  if (!llvm::sys::path::cache_directory(Path))
+    report_fatal_error("cannot get default cache directory");
+  llvm::sys::path::append(Path, builtin::DefaultDir, DefaultName);
+  return Path.str().str();
+}
+
 std::unique_ptr<ActionCache> createInMemoryActionCache() {
   return std::make_unique<InMemoryActionCache>();
 }
 
 } // namespace llvm::cas
+
+OnDiskActionCache::OnDiskActionCache(
+    std::unique_ptr<ondisk::OnDiskKeyValueDB> DB)
+    : ActionCache(builtin::BuiltinCASContext::getDefaultContext()),
+      DB(std::move(DB)) {}
+
+Expected<std::unique_ptr<OnDiskActionCache>>
+OnDiskActionCache::create(StringRef AbsPath) {
+  std::unique_ptr<ondisk::OnDiskKeyValueDB> DB;
+  if (Error E = ondisk::OnDiskKeyValueDB::open(AbsPath, getHashName(),
+                                               sizeof(HashType), getHashName(),
+                                               sizeof(DataT))
+                    .moveInto(DB))
+    return std::move(E);
+  return std::unique_ptr<OnDiskActionCache>(
+      new OnDiskActionCache(std::move(DB)));
+}
+
+Expected<std::optional<CASID>>
+OnDiskActionCache::getImpl(ArrayRef<uint8_t> Key,
+                           bool /*CanBeDistributed*/) const {
+  std::optional<ArrayRef<char>> Val;
+  if (Error E = DB->get(Key).moveInto(Val))
+    return std::move(E);
+  if (!Val)
+    return std::nullopt;
+  return CASID::create(&getContext(), toStringRef(*Val));
+}
+
+Error OnDiskActionCache::putImpl(ArrayRef<uint8_t> Key, const CASID &Result,
+                                 bool /*CanBeDistributed*/) {
+  auto ResultHash = Result.getHash();
+  ArrayRef Expected((const char *)ResultHash.data(), ResultHash.size());
+  ArrayRef<char> Observed;
+  if (Error E = DB->put(Key, Expected).moveInto(Observed))
+    return E;
+
+  if (Expected == Observed)
+    return Error::success();
+
+  return createResultCachePoisonedError(
+      Key, getContext(), Result,
+      ArrayRef((const uint8_t *)Observed.data(), Observed.size()));
+}
+
+Error OnDiskActionCache::validate() const {
+  // FIXME: without the matching CAS there is nothing we can check about the
+  // cached values. The hash size is already validated by the DB validator.
+  return DB->validate(nullptr);
+}
+
+UnifiedOnDiskActionCache::UnifiedOnDiskActionCache(
+    std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB)
+    : ActionCache(builtin::BuiltinCASContext::getDefaultContext()),
+      UniDB(std::move(UniDB)) {}
+
+Expected<std::optional<CASID>>
+UnifiedOnDiskActionCache::getImpl(ArrayRef<uint8_t> Key,
+                                  bool /*CanBeDistributed*/) const {
+  std::optional<ondisk::ObjectID> Val;
+  if (Error E = UniDB->KVGet(Key).moveInto(Val))
+    return std::move(E);
+  if (!Val)
+    return std::nullopt;
+  return CASID::create(&getContext(),
+                       toStringRef(UniDB->getGraphDB().getDigest(*Val)));
+}
+
+Error UnifiedOnDiskActionCache::putImpl(ArrayRef<uint8_t> Key,
+                                        const CASID &Result,
+                                        bool /*CanBeDistributed*/) {
+  auto Expected = UniDB->getGraphDB().getReference(Result.getHash());
+  if (LLVM_UNLIKELY(!Expected))
+    return Expected.takeError();
+  std::optional<ondisk::ObjectID> Observed;
+  if (Error E = UniDB->KVPut(Key, *Expected).moveInto(Observed))
+    return E;
+
+  if (*Expected == Observed)
+    return Error::success();
+
+  return createResultCachePoisonedError(
+      Key, getContext(), Result, UniDB->getGraphDB().getDigest(*Observed));
+}
+
+Error UnifiedOnDiskActionCache::validate() const {
+  return UniDB->validateActionCache();
+}
+
+Expected<std::unique_ptr<ActionCache>>
+cas::createOnDiskActionCache(StringRef Path) {
+#if LLVM_ENABLE_ONDISK_CAS
+  return OnDiskActionCache::create(Path);
+#else
+  return createStringError(inconvertibleErrorCode(), "OnDiskCache is disabled");
+#endif
+}
+
+std::unique_ptr<ActionCache>
+cas::builtin::createActionCacheFromUnifiedOnDiskCache(
+    std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB) {
+  return std::make_unique<UnifiedOnDiskActionCache>(std::move(UniDB));
+}
diff --git a/llvm/lib/CAS/BuiltinCAS.cpp b/llvm/lib/CAS/BuiltinCAS.cpp
index 73646ad2c3528..e9bc6d8beed4e 100644
--- a/llvm/lib/CAS/BuiltinCAS.cpp
+++ b/llvm/lib/CAS/BuiltinCAS.cpp
@@ -9,6 +9,7 @@
 #include "BuiltinCAS.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/CAS/BuiltinObjectHasher.h"
+#include "llvm/CAS/UnifiedOnDiskCache.h"
 #include "llvm/Support/Process.h"
 
 using namespace llvm;
@@ -68,7 +69,7 @@ Expected<ObjectRef> BuiltinCAS::store(ArrayRef<ObjectRef> Refs,
                    Refs, Data);
 }
 
-Error BuiltinCAS::validate(const CASID &ID) {
+Error BuiltinCAS::validateObject(const CASID &ID) {
   auto Ref = getReference(ID);
   if (!Ref)
     return createUnknownObjectError(ID);
@@ -92,3 +93,14 @@ Error BuiltinCAS::validate(const CASID &ID) {
 
   return Error::success();
 }
+
+Expected<std::unique_ptr<ondisk::UnifiedOnDiskCache>>
+cas::builtin::createBuiltinUnifiedOnDiskCache(StringRef Path) {
+#if LLVM_ENABLE_ONDISK_CAS
+  return ondisk::UnifiedOnDiskCache::open(Path, /*SizeLimit=*/std::nullopt,
+                                          BuiltinCASContext::getHashName(),
+                                          sizeof(HashType));
+#else
+  return createStringError(inconvertibleErrorCode(), "OnDiskCache is disabled");
+#endif
+}
diff --git a/llvm/lib/CAS/BuiltinCAS.h b/llvm/lib/CAS/BuiltinCAS.h
index 3b5374d5e1850..4d2de66cf636f 100644
--- a/llvm/lib/CAS/BuiltinCAS.h
+++ b/llvm/lib/CAS/BuiltinCAS.h
@@ -1,4 +1,4 @@
-//===- BuiltinCAS.h ---------------------------------------------*- C++ -*-===//
+//===----------------------------------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -15,6 +15,9 @@
 
 namespace llvm::cas {
 class ActionCache;
+namespace ondisk {
+class UnifiedOnDiskCache;
+} // namespace ondisk
 namespace builtin {
 
 /// Common base class for builtin CAS implementations using the same CASContext.
@@ -65,9 +68,27 @@ class BuiltinCAS : public ObjectStore {
                              "corrupt storage");
   }
 
-  Error validate(const CASID &ID) final;
+  Error validateObject(const CASID &ID) final;
 };
 
+/// Create a \p UnifiedOnDiskCache instance that uses \p BLAKE3 hashing.
+Expected<std::unique_ptr<ondisk::UnifiedOnDiskCache>>
+createBuiltinUnifiedOnDiskCache(StringRef Path);
+
+/// \param UniDB A \p UnifiedOnDiskCache instance from \p
+/// createBuiltinUnifiedOnDiskCache.
+std::unique_ptr<ObjectStore> createObjectStoreFromUnifiedOnDiskCache(
+    std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB);
+
+/// \param UniDB A \p UnifiedOnDiskCache instance from \p
+/// createBuiltinUnifiedOnDiskCache.
+std::unique_ptr<ActionCache> createActionCacheFromUnifiedOnDiskCache(
+    std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB);
+
+// FIXME: Proxy not portable. Maybe also error-prone?
+constexpr StringLiteral DefaultDirProxy = "/^llvm::cas::builtin::default";
+constexpr StringLiteral DefaultDir = "llvm.cas.builtin.default";
+
 } // end namespace builtin
 } // end namespace llvm::cas
 
diff --git a/llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp b/llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp
new file mode 100644
index 0000000000000..f3f6fa043bc52
--- /dev/null
+++ b/llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp
@@ -0,0 +1,38 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CAS/BuiltinUnifiedCASDatabases.h"
+#include "BuiltinCAS.h"
+#include "llvm/CAS/ActionCache.h"
+#include "llvm/CAS/UnifiedOnDiskCache.h"
+
+using namespace llvm;
+using namespace llvm::cas;
+
+Expected<std::pair<std::unique_ptr<ObjectStore>, std::unique_ptr<ActionCache>>>
+cas::createOnDiskUnifiedCASDatabases(StringRef Path) {
+  std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB;
+  if (Error E = builtin::createBuiltinUnifiedOnDiskCache(Path).moveInto(UniDB))
+    return std::move(E);
+  auto CAS = builtin::createObjectStoreFromUnifiedOnDiskCache(UniDB);
+  auto AC = builtin::createActionCacheFromUnifiedOnDiskCache(std::move(UniDB));
+  return std::make_pair(std::move(CAS), std::move(AC));
+}
+
+Expected<ValidationResult> cas::validateOnDiskUnifiedCASDatabasesIfNeeded(
+    StringRef Path, bool CheckHash, bool AllowRecovery, bool ForceValidation,
+    std::optional<StringRef> LLVMCasBinary) {
+#if LLVM_ENABLE_ONDISK_CAS
+  return ondisk::UnifiedOnDiskCache::validateIfNeeded(
+      Path, builtin::BuiltinCASContext::getHashName(),
+      sizeof(builtin::HashType), CheckHash, AllowRecovery, ForceValidation,
+      LLVMCasBinary);
+#else
+  return createStringError(inconvertibleErrorCode(), "OnDiskCache is disabled");
+#endif
+}
diff --git a/llvm/lib/CAS/CMakeLists.txt b/llvm/lib/CAS/CMakeLists.txt
index a2f8c49e50145..aad77dce370d8 100644
--- a/llvm/lib/CAS/CMakeLists.txt
+++ b/llvm/lib/CAS/CMakeLists.txt
@@ -2,15 +2,18 @@ add_llvm_component_library(LLVMCAS
   ActionCache.cpp
   ActionCaches.cpp
   BuiltinCAS.cpp
+  BuiltinUnifiedCASDatabases.cpp
   DatabaseFile.cpp
   InMemoryCAS.cpp
   MappedFileRegionArena.cpp
   ObjectStore.cpp
+  OnDiskCAS.cpp
   OnDiskCommon.cpp
   OnDiskDataAllocator.cpp
   OnDiskGraphDB.cpp
   OnDiskKeyValueDB.cpp
   OnDiskTrieRawHashMap.cpp
+  UnifiedOnDiskCache.cpp
 
   ADDITIONAL_HEADER_DIRS
   ${LLVM_MAIN_INCLUDE_DIR}/llvm/CAS
diff --git a/llvm/lib/CAS/InMemoryCAS.cpp b/llvm/lib/CAS/InMemoryCAS.cpp
index c63ee70de0849..2d4eedd5bdc8f 100644
--- a/llvm/lib/CAS/InMemoryCAS.cpp
+++ b/llvm/lib/CAS/InMemoryCAS.cpp
@@ -233,6 +233,12 @@ class InMemoryCAS : public BuiltinCAS {
     return cast<InMemoryObject>(asInMemoryObject(Node)).getData();
   }
 
+  void print(raw_ostream &OS) const final;
+
+  Error validate(bool CheckHash) const final {
+    return createStringError("InMemoryCAS doesn't support validate()");
+  }
+
   InMemoryCAS() = default;
 
 private:
@@ -271,6 +277,8 @@ ArrayRef<const InMemoryObject *> InMemoryObject::getRefs() const {
   return cast<InMemoryInlineObject>(this)->getRefsImpl();
 }
 
+void InMemoryCAS::print(raw_ostream &OS) const {}
+
 Expected<ObjectRef>
 InMemoryCAS::storeFromNullTerminatedRegion(ArrayRef<uint8_t> ComputedHash,
                                            sys::fs::mapped_file_region Map) {
diff --git a/llvm/lib/CAS/ObjectStore.cpp b/llvm/lib/CAS/ObjectStore.cpp
index e0be50bbe013a..3110577e03774 100644
--- a/llvm/lib/CAS/ObjectStore.cpp
+++ b/llvm/lib/CAS/ObjectStore.cpp
@@ -1,4 +1,4 @@
-//===- ObjectStore.cpp ------------------------------------------*- C++ -*-===//
+//===----------------------------------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -12,7 +12,7 @@
 #include "llvm/Support/Errc.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include <optional>
+#include <deque>
 
 using namespace llvm;
 using namespace llvm::cas;
@@ -21,6 +21,7 @@ void CASContext::anchor() {}
 void ObjectStore::anchor() {}
 
 LLVM_DUMP_METHOD void CASID::dump() const { print(dbgs()); }
+LLVM_DUMP_METHOD void ObjectStore::dump() const { print(dbgs()); }
 LLVM_DUMP_METHOD void ObjectRef::dump() const { print(dbgs()); }
 LLVM_DUMP_METHOD void ObjectHandle::dump() const { print(dbgs()); }
 
@@ -141,7 +142,7 @@ Error ObjectStore::validateTree(ObjectRef Root) {
     auto [I, Inserted] = ValidatedRefs.insert(Ref);
     if (!Inserted)
       continue; // already validated.
-    if (Error E = validate(getID(Ref)))
+    if (Error E = validateObject(getID(Ref)))
       return E;
     Expected<ObjectHandle> Obj = load(Ref);
     if (!Obj)
@@ -155,6 +156,92 @@ Error ObjectStore::validateTree(ObjectRef Root) {
   return Error::success();
 }
 
+Expected<ObjectRef> ObjectStore::importObject(ObjectStore &Upstream,
+                                              ObjectRef Other) {
+  // Copy the full CAS tree from upstream with depth-first ordering to ensure
+  // all the child nodes are available in downstream CAS before inserting
+  // current object. This uses a similar algorithm as
+  // `OnDiskGraphDB::importFullTree` but doesn't assume the upstream CAS schema
+  // so it can be used to import from any other ObjectStore reguardless of the
+  // CAS schema.
+
+  // There is no work to do if importing from self.
+  if (this == &Upstream)
+    return Other;
+
+  /// Keeps track of the state of visitation for current node and all of its
+  /// parents. Upstream Cursor holds information only from upstream CAS.
+  struct UpstreamCursor {
+    ObjectRef Ref;
+    ObjectHandle Node;
+    size_t RefsCount;
+    std::deque<ObjectRef> Refs;
+  };
+  SmallVector<UpstreamCursor, 16> CursorStack;
+  /// PrimaryNodeStack holds the ObjectRef of the current CAS, with nodes either
+  /// just stored in the CAS or nodes already exists in the current CAS.
+  SmallVector<ObjectRef, 128> PrimaryRefStack;
+  /// A map from upstream ObjectRef to current ObjectRef.
+  llvm::DenseMap<ObjectRef, ObjectRef> CreatedObjects;
+
+  auto enqueueNode = [&](ObjectRef Ref, ObjectHandle Node) {
+    unsigned NumRefs = Upstream.getNumRefs(Node);
+    std::deque<ObjectRef> Refs;
+    for (unsigned I = 0; I < NumRefs; ++I)
+      Refs.push_back(Upstream.readRef(Node, I));
+
+    CursorStack.push_back({Ref, Node, NumRefs, std::move(Refs)});
+  };
+
+  auto UpstreamHandle = Upstream.load(Other);
+  if (!UpstreamHandle)
+    return UpstreamHandle.takeError();
+  enqueueNode(Other, *UpstreamHandle);
+
+  while (!CursorStack.empty()) {
+    UpstreamCursor &Cur = CursorStack.back();
+    if (Cur.Refs.empty()) {
+      // Copy the node data into the primary store.
+      // The bottom of \p PrimaryRefStack contains the ObjectRef for the
+      // current node.
+      assert(PrimaryRefStack.size() >= Cur.RefsCount);
+      auto Refs = ArrayRef(PrimaryRefStack)
+                      .slice(PrimaryRefStack.size() - Cur.RefsCount);
+      auto NewNode = store(Refs, Upstream.getData(Cur.Node));
+      if (!NewNode)
+        return NewNode.takeError();
+
+      // Remove the current node and its IDs from the stack.
+      PrimaryRefStack.truncate(PrimaryRefStack.size() - Cur.RefsCount);
+      CursorStack.pop_back();
+
+      PrimaryRefStack.push_back(*NewNode);
+      CreatedObjects.try_emplace(Cur.Ref, *NewNode);
+      continue;
+    }
+
+    // Check if the node exists already.
+    auto CurrentID = Cur.Refs.front();
+    Cur.Refs.pop_front();
+    auto Ref = CreatedObjects.find(CurrentID);
+    if (Ref != CreatedObjects.end()) {
+      // If exists already, just need to enqueue the primary node.
+      PrimaryRefStack.push_back(Ref->second);
+      continue;
+    }
+
+    // Load child.
+    auto PrimaryID = Upstream.load(CurrentID);
+    if (LLVM_UNLIKELY(!PrimaryID))
+      return PrimaryID.takeError();
+
+    enqueueNode(CurrentID, *PrimaryID);
+  }
+
+  assert(PrimaryRefStack.size() == 1);
+  return PrimaryRefStack.front();
+}
+
 std::unique_ptr<MemoryBuffer>
 ObjectProxy::getMemoryBuffer(StringRef Name,
                              bool RequiresNullTerminator) const {
diff --git a/llvm/lib/CAS/OnDiskCAS.cpp b/llvm/lib/CAS/OnDiskCAS.cpp
new file mode 100644
index 0000000000000..035722459236a
--- /dev/null
+++ b/llvm/lib/CAS/OnDiskCAS.cpp
@@ -0,0 +1,228 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "BuiltinCAS.h"
+#include "llvm/CAS/BuiltinCASContext.h"
+#include "llvm/CAS/BuiltinObjectHasher.h"
+#include "llvm/CAS/OnDiskGraphDB.h"
+#include "llvm/CAS/UnifiedOnDiskCache.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/Path.h"
+
+using namespace llvm;
+using namespace llvm::cas;
+using namespace llvm::cas::builtin;
+
+namespace {
+
+class OnDiskCAS : public BuiltinCAS {
+public:
+  Expected<ObjectRef> storeImpl(ArrayRef<uint8_t> ComputedHash,
+                                ArrayRef<ObjectRef> Refs,
+                                ArrayRef<char> Data) final;
+
+  Expected<std::optional<ObjectHandle>> loadIfExists(ObjectRef Ref) final;
+
+  CASID getID(ObjectRef Ref) const final;
+
+  std::optional<ObjectRef> getReference(const CASID &ID) const final;
+
+  Expected<bool> isMaterialized(ObjectRef Ref) const final;
+
+  ArrayRef<char> getDataConst(ObjectHandle Node) const final;
+
+  void print(raw_ostream &OS) const final;
+  Error validate(bool CheckHash) const final;
+
+  static Expected<std::unique_ptr<OnDiskCAS>> open(StringRef Path);
+
+  OnDiskCAS(std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB)
+      : UnifiedDB(std::move(UniDB)), DB(&UnifiedDB->getGraphDB()) {}
+
+private:
+  ObjectHandle convertHandle(ondisk::ObjectHandle Node) const {
+    return makeObjectHandle(Node.getOpaqueData());
+  }
+
+  ondisk::ObjectHandle convertHandle(ObjectHandle Node) const {
+    return ondisk::ObjectHandle(Node.getInternalRef(*this));
+  }
+
+  ObjectRef convertRef(ondisk::ObjectID Ref) const {
+    return makeObjectRef(Ref.getOpaqueData());
+  }
+
+  ondisk::ObjectID convertRef(ObjectRef Ref) const {
+    return ondisk::ObjectID::fromOpaqueData(Ref.getInternalRef(*this));
+  }
+
+  size_t getNumRefs(ObjectHandle Node) const final {
+    auto RefsRange = DB->getObjectRefs(convertHandle(Node));
+    return std::distance(RefsRange.begin(), RefsRange.end());
+  }
+  ObjectRef readRef(ObjectHandle Node, size_t I) const final {
+    auto RefsRange = DB->getObjectRefs(convertHandle(Node));
+    return convertRef(RefsRange.begin()[I]);
+  }
+  Error forEachRef(ObjectHandle Node,
+                   function_ref<Error(ObjectRef)> Callback) const final;
+
+  Error setSizeLimit(std::optional<uint64_t> SizeLimit) final;
+  Expected<std::optional<uint64_t>> getStorageSize() const final;
+  Error pruneStorageData() final;
+
+  OnDiskCAS(std::unique_ptr<ondisk::OnDiskGraphDB> GraphDB)
+      : OwnedDB(std::move(GraphDB)), DB(OwnedDB.get()) {}
+
+  std::unique_ptr<ondisk::OnDiskGraphDB> OwnedDB;
+  std::shared_ptr<ondisk::UnifiedOnDiskCache> UnifiedDB;
+  ondisk::OnDiskGraphDB *DB;
+};
+
+} // end anonymous namespace
+
+void OnDiskCAS::print(raw_ostream &OS) const { DB->print(OS); }
+Error OnDiskCAS::validate(bool CheckHash) const {
+  auto Hasher = [](ArrayRef<ArrayRef<uint8_t>> Refs, ArrayRef<char> Data,
+                   SmallVectorImpl<uint8_t> &Result) {
+    auto Hash = BuiltinObjectHasher<llvm::cas::builtin::HasherT>::hashObject(
+        Refs, Data);
+    Result.assign(Hash.begin(), Hash.end());
+  };
+
+  if (auto E = DB->validate(CheckHash, Hasher))
+    return E;
+  if (UnifiedDB && UnifiedDB->getUpstreamGraphDB())
+    return UnifiedDB->getUpstreamGraphDB()->validate(CheckHash, Hasher);
+
+  return Error::success();
+}
+
+CASID OnDiskCAS::getID(ObjectRef Ref) const {
+  ArrayRef<uint8_t> Hash = DB->getDigest(convertRef(Ref));
+  return CASID::create(&getContext(), toStringRef(Hash));
+}
+
+std::optional<ObjectRef> OnDiskCAS::getReference(const CASID &ID) const {
+  std::optional<ondisk::ObjectID> ObjID =
+      DB->getExistingReference(ID.getHash());
+  if (!ObjID)
+    return std::nullopt;
+  return convertRef(*ObjID);
+}
+
+Expected<bool> OnDiskCAS::isMaterialized(ObjectRef ExternalRef) const {
+  return DB->isMaterialized(convertRef(ExternalRef));
+}
+
+ArrayRef<char> OnDiskCAS::getDataConst(ObjectHandle Node) const {
+  return DB->getObjectData(convertHandle(Node));
+}
+
+Expected<std::optional<ObjectHandle>>
+OnDiskCAS::loadIfExists(ObjectRef ExternalRef) {
+  Expected<std::optional<ondisk::ObjectHandle>> ObjHnd =
+      DB->load(convertRef(ExternalRef));
+  if (!ObjHnd)
+    return ObjHnd.takeError();
+  if (!*ObjHnd)
+    return std::nullopt;
+  return convertHandle(**ObjHnd);
+}
+
+Expected<ObjectRef> OnDiskCAS::storeImpl(ArrayRef<uint8_t> ComputedHash,
+                                         ArrayRef<ObjectRef> Refs,
+                                         ArrayRef<char> Data) {
+  SmallVector<ondisk::ObjectID, 64> IDs;
+  IDs.reserve(Refs.size());
+  for (ObjectRef Ref : Refs) {
+    IDs.push_back(convertRef(Ref));
+  }
+
+  auto StoredID = DB->getReference(ComputedHash);
+  if (LLVM_UNLIKELY(!StoredID))
+    return StoredID.takeError();
+  if (Error E = DB->store(*StoredID, IDs, Data))
+    return std::move(E);
+  return convertRef(*StoredID);
+}
+
+Error OnDiskCAS::forEachRef(ObjectHandle Node,
+                            function_ref<Error(ObjectRef)> Callback) const {
+  auto RefsRange = DB->getObjectRefs(convertHandle(Node));
+  for (ondisk::ObjectID Ref : RefsRange) {
+    if (Error E = Callback(convertRef(Ref)))
+      return E;
+  }
+  return Error::success();
+}
+
+Error OnDiskCAS::setSizeLimit(std::optional<uint64_t> SizeLimit) {
+  UnifiedDB->setSizeLimit(SizeLimit);
+  return Error::success();
+}
+
+Expected<std::optional<uint64_t>> OnDiskCAS::getStorageSize() const {
+  return UnifiedDB->getStorageSize();
+}
+
+Error OnDiskCAS::pruneStorageData() { return UnifiedDB->collectGarbage(); }
+
+Expected<std::unique_ptr<OnDiskCAS>> OnDiskCAS::open(StringRef AbsPath) {
+  Expected<std::unique_ptr<ondisk::OnDiskGraphDB>> DB =
+      ondisk::OnDiskGraphDB::open(AbsPath, BuiltinCASContext::getHashName(),
+                                  sizeof(HashType));
+  if (!DB)
+    return DB.takeError();
+  return std::unique_ptr<OnDiskCAS>(new OnDiskCAS(std::move(*DB)));
+}
+
+bool cas::isOnDiskCASEnabled() {
+#if LLVM_ENABLE_ONDISK_CAS
+  return true;
+#else
+  return false;
+#endif
+}
+
+Expected<std::unique_ptr<ObjectStore>> cas::createOnDiskCAS(const Twine &Path) {
+#if LLVM_ENABLE_ONDISK_CAS
+  // FIXME: An absolute path isn't really good enough. Should open a directory
+  // and use openat() for files underneath.
+  SmallString<256> AbsPath;
+  Path.toVector(AbsPath);
+  sys::fs::make_absolute(AbsPath);
+
+  return OnDiskCAS::open(AbsPath);
+#else
+  return createStringError(inconvertibleErrorCode(), "OnDiskCAS is disabled");
+#endif /* LLVM_ENABLE_ONDISK_CAS */
+}
+
+std::unique_ptr<ObjectStore>
+cas::builtin::createObjectStoreFromUnifiedOnDiskCache(
+    std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB) {
+  return std::make_unique<OnDiskCAS>(std::move(UniDB));
+}
+
+static constexpr StringLiteral DefaultName = "cas";
+
+Error cas::getDefaultOnDiskCASPath(SmallVectorImpl<char> &Path) {
+  if (!llvm::sys::path::cache_directory(Path))
+    return createStringError("cache directory is not available");
+  llvm::sys::path::append(Path, DefaultDir, DefaultName);
+  return Error::success();
+}
+
+Expected<std::string> cas::getDefaultOnDiskCASPath() {
+  SmallString<128> Path;
+  if (auto E = getDefaultOnDiskCASPath(Path))
+    return std::move(E);
+  return Path.str().str();
+}
diff --git a/llvm/lib/CAS/UnifiedOnDiskCache.cpp b/llvm/lib/CAS/UnifiedOnDiskCache.cpp
new file mode 100644
index 0000000000000..acf327d57cfe0
--- /dev/null
+++ b/llvm/lib/CAS/UnifiedOnDiskCache.cpp
@@ -0,0 +1,655 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Encapsulates \p OnDiskGraphDB and \p OnDiskKeyValueDB instances within one
+/// directory while also restricting storage growth with a scheme of chaining
+/// the two most recent directories (primary & upstream), where the primary
+/// "faults-in" data from the upstream one. When the primary (most recent)
+/// directory exceeds its intended limit a new empty directory becomes the
+/// primary one.
+///
+/// Within the top-level directory (the path that \p UnifiedOnDiskCache::open
+/// receives) there are directories named like this:
+///
+/// 'v<version>.<x>'
+/// 'v<version>.<x+1'
+/// 'v<version>.<x+2>'
+/// ...
+///
+/// 'version' is the version integer for this \p UnifiedOnDiskCache's scheme and
+/// the part after the dot is an increasing integer. The primary directory is
+/// the one with the highest integer and the upstream one is the directory
+/// before it. For example, if the sub-directories contained are:
+///
+/// 'v1.5', 'v1.6', 'v1.7', 'v1.8'
+///
+/// Then the primary one is 'v1.8', the upstream one is 'v1.7', and the rest are
+/// unused directories that can be safely deleted at any time and by any
+/// process.
+///
+/// Contained within the top-level directory is a file named "lock" which is
+/// used for processes to take shared or exclusive locks for the contents of the
+/// top directory. While a \p UnifiedOnDiskCache is open it keeps a shared lock
+/// for the top-level directory; when it closes, if the primary sub-directory
+/// exceeded its limit, it attempts to get an exclusive lock in order to create
+/// a new empty primary directory; if it can't get the exclusive lock it gives
+/// up and lets the next \p UnifiedOnDiskCache instance that closes to attempt
+/// again.
+///
+/// The downside of this scheme is that while \p UnifiedOnDiskCache is open on a
+/// directory, by any process, the storage size in that directory will keep
+/// growing unrestricted. But the major benefit is that garbage-collection can
+/// be triggered on a directory concurrently, at any time and by any process,
+/// without affecting any active readers/writers in the same process or other
+/// processes.
+///
+/// The \c UnifiedOnDiskCache also provides validation and recovery on top of
+/// the underlying on-disk storage. The low-level storage is designed to remain
+/// coherent across regular process crashes, but may be invalid after power loss
+/// or similar system failures. \c UnifiedOnDiskCache::validateIfNeeded allows
+/// validating the contents once per boot and can recover by marking invalid
+/// data for garbage collection.
+///
+/// The data recovery described above requires exclusive access to the CAS, and
+/// it is an error to attempt recovery if the CAS is open in any process/thread.
+/// In order to maximize backwards compatibility with tools that do not perform
+/// validation before opening the CAS, we do not attempt to get exclusive access
+/// until recovery is actually performed, meaning as long as the data is valid
+/// it will not conflict with concurrent use.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CAS/UnifiedOnDiskCache.h"
+#include "BuiltinCAS.h"
+#include "OnDiskCommon.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CAS/OnDiskGraphDB.h"
+#include "llvm/CAS/OnDiskKeyValueDB.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/FileUtilities.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/raw_ostream.h"
+#include <optional>
+
+#if __has_include(<sys/sysctl.h>)
+#include <sys/sysctl.h>
+#endif
+
+using namespace llvm;
+using namespace llvm::cas;
+using namespace llvm::cas::ondisk;
+
+/// FIXME: When the version of \p DBDirPrefix is bumped up we need to figure out
+/// how to handle the leftover sub-directories of the previous version, within
+/// the \p UnifiedOnDiskCache::collectGarbage function.
+static constexpr StringLiteral DBDirPrefix = "v1.";
+
+static constexpr StringLiteral ValidationFilename = "v1.validation";
+static constexpr StringLiteral CorruptPrefix = "corrupt.";
+
+Expected<ObjectID> UnifiedOnDiskCache::KVPut(ObjectID Key, ObjectID Value) {
+  return KVPut(PrimaryGraphDB->getDigest(Key), Value);
+}
+
+Expected<ObjectID> UnifiedOnDiskCache::KVPut(ArrayRef<uint8_t> Key,
+                                             ObjectID Value) {
+  static_assert(sizeof(Value.getOpaqueData()) == sizeof(uint64_t),
+                "unexpected return opaque type");
+  std::array<char, sizeof(uint64_t)> ValBytes;
+  support::endian::write64le(ValBytes.data(), Value.getOpaqueData());
+  Expected<ArrayRef<char>> Existing = PrimaryKVDB->put(Key, ValBytes);
+  if (!Existing)
+    return Existing.takeError();
+  assert(Existing->size() == sizeof(uint64_t));
+  return ObjectID::fromOpaqueData(support::endian::read64le(Existing->data()));
+}
+
+Expected<std::optional<ObjectID>>
+UnifiedOnDiskCache::KVGet(ArrayRef<uint8_t> Key) {
+  std::optional<ArrayRef<char>> Value;
+  if (Error E = PrimaryKVDB->get(Key).moveInto(Value))
+    return std::move(E);
+  if (!Value) {
+    if (UpstreamKVDB)
+      return faultInFromUpstreamKV(Key);
+    return std::nullopt;
+  }
+  assert(Value->size() == sizeof(uint64_t));
+  return ObjectID::fromOpaqueData(support::endian::read64le(Value->data()));
+}
+
+Expected<std::optional<ObjectID>>
+UnifiedOnDiskCache::faultInFromUpstreamKV(ArrayRef<uint8_t> Key) {
+  assert(UpstreamGraphDB);
+  assert(UpstreamKVDB);
+
+  std::optional<ArrayRef<char>> UpstreamValue;
+  if (Error E = UpstreamKVDB->get(Key).moveInto(UpstreamValue))
+    return std::move(E);
+  if (!UpstreamValue)
+    return std::nullopt;
+
+  // The value is the \p ObjectID in the context of the upstream
+  // \p OnDiskGraphDB instance. Translate it to the context of the primary
+  // \p OnDiskGraphDB instance.
+  assert(UpstreamValue->size() == sizeof(uint64_t));
+  ObjectID UpstreamID = ObjectID::fromOpaqueData(
+      support::endian::read64le(UpstreamValue->data()));
+  auto PrimaryID =
+      PrimaryGraphDB->getReference(UpstreamGraphDB->getDigest(UpstreamID));
+  if (LLVM_UNLIKELY(!PrimaryID))
+    return PrimaryID.takeError();
+  return KVPut(Key, *PrimaryID);
+}
+
+Error UnifiedOnDiskCache::validateActionCache() {
+  auto ValidateRef = [&](FileOffset Offset, ArrayRef<char> Value) -> Error {
+    assert(Value.size() == sizeof(uint64_t) && "should be validated already");
+    auto ID = ObjectID::fromOpaqueData(support::endian::read64le(Value.data()));
+    auto formatError = [&](Twine Msg) {
+      return createStringError(
+          llvm::errc::illegal_byte_sequence,
+          "bad record at 0x" +
+              utohexstr((unsigned)Offset.get(), /*LowerCase=*/true) + ": " +
+              Msg.str());
+    };
+    if (ID.getOpaqueData() == 0)
+      return formatError("zero is not a valid ref");
+    return Error::success();
+  };
+  if (Error E = PrimaryKVDB->validate(ValidateRef))
+    return E;
+  if (UpstreamKVDB)
+    return UpstreamKVDB->validate(ValidateRef);
+  return Error::success();
+}
+
+/// \returns all the 'v<version>.<x>' names of sub-directories, sorted with
+/// ascending order of the integer after the dot. Corrupt directories, if
+/// included, will come first.
+static Error getAllDBDirs(StringRef Path, SmallVectorImpl<std::string> &DBDirs,
+                          bool IncludeCorrupt = false) {
+  struct DBDir {
+    uint64_t Order;
+    std::string Name;
+  };
+  SmallVector<DBDir, 6> FoundDBDirs;
+
+  std::error_code EC;
+  for (sys::fs::directory_iterator DirI(Path, EC), DirE; !EC && DirI != DirE;
+       DirI.increment(EC)) {
+    if (DirI->type() != sys::fs::file_type::directory_file)
+      continue;
+    StringRef SubDir = sys::path::filename(DirI->path());
+    if (IncludeCorrupt && SubDir.starts_with(CorruptPrefix)) {
+      FoundDBDirs.push_back({0, std::string(SubDir)});
+      continue;
+    }
+    if (!SubDir.starts_with(DBDirPrefix))
+      continue;
+    uint64_t Order;
+    if (SubDir.substr(DBDirPrefix.size()).getAsInteger(10, Order))
+      return createStringError(inconvertibleErrorCode(),
+                               "unexpected directory " + DirI->path());
+    FoundDBDirs.push_back({Order, std::string(SubDir)});
+  }
+  if (EC)
+    return createFileError(Path, EC);
+
+  llvm::sort(FoundDBDirs, [](const DBDir &LHS, const DBDir &RHS) -> bool {
+    return LHS.Order <= RHS.Order;
+  });
+  for (DBDir &Dir : FoundDBDirs)
+    DBDirs.push_back(std::move(Dir.Name));
+  return Error::success();
+}
+
+static Error getAllGarbageDirs(StringRef Path,
+                               SmallVectorImpl<std::string> &DBDirs) {
+  if (Error E = getAllDBDirs(Path, DBDirs, /*IncludeCorrupt=*/true))
+    return E;
+
+  // FIXME: When the version of \p DBDirPrefix is bumped up we need to figure
+  // out how to handle the leftover sub-directories of the previous version.
+
+  for (unsigned Keep = 2; Keep > 0 && !DBDirs.empty(); --Keep) {
+    StringRef Back(DBDirs.back());
+    if (Back.starts_with(CorruptPrefix))
+      break;
+    DBDirs.pop_back();
+  }
+  return Error::success();
+}
+
+/// \returns Given a sub-directory named 'v<version>.<x>', it outputs the
+/// 'v<version>.<x+1>' name.
+static void getNextDBDirName(StringRef DBDir, llvm::raw_ostream &OS) {
+  assert(DBDir.starts_with(DBDirPrefix));
+  uint64_t Count;
+  bool Failed = DBDir.substr(DBDirPrefix.size()).getAsInteger(10, Count);
+  assert(!Failed);
+  (void)Failed;
+  OS << DBDirPrefix << Count + 1;
+}
+
+static Error validateOutOfProcess(StringRef LLVMCasBinary, StringRef RootPath,
+                                  bool CheckHash) {
+  SmallVector<StringRef> Args{LLVMCasBinary, "-cas", RootPath, "-validate"};
+  if (CheckHash)
+    Args.push_back("-check-hash");
+
+  llvm::SmallString<128> StdErrPath;
+  int StdErrFD = -1;
+  if (std::error_code EC = sys::fs::createTemporaryFile(
+          "llvm-cas-validate-stderr", "txt", StdErrFD, StdErrPath,
+          llvm::sys::fs::OF_Text))
+    return createStringError(EC, "failed to create temporary file");
+  FileRemover OutputRemover(StdErrPath.c_str());
+
+  std::optional<llvm::StringRef> Redirects[] = {
+      {""}, // stdin = /dev/null
+      {""}, // stdout = /dev/null
+      StdErrPath.str(),
+  };
+
+  std::string ErrMsg;
+  int Result =
+      sys::ExecuteAndWait(LLVMCasBinary, Args, /*Env=*/std::nullopt, Redirects,
+                          /*SecondsToWait=*/120, /*MemoryLimit=*/0, &ErrMsg);
+
+  if (Result == -1)
+    return createStringError("failed to exec " + join(Args, " ") + ": " +
+                             ErrMsg);
+  if (Result != 0) {
+    llvm::SmallString<64> Err("cas contents invalid");
+    if (!ErrMsg.empty()) {
+      Err += ": ";
+      Err += ErrMsg;
+    }
+    auto StdErrBuf = MemoryBuffer::getFile(StdErrPath.c_str());
+    if (StdErrBuf && !(*StdErrBuf)->getBuffer().empty()) {
+      Err += ": ";
+      Err += (*StdErrBuf)->getBuffer();
+    }
+    return createStringError(Err);
+  }
+  return Error::success();
+}
+
+static Error validateInProcess(StringRef RootPath, StringRef HashName,
+                               unsigned HashByteSize, bool CheckHash) {
+  std::shared_ptr<UnifiedOnDiskCache> UniDB;
+  if (Error E = UnifiedOnDiskCache::open(RootPath, std::nullopt, HashName,
+                                         HashByteSize)
+                    .moveInto(UniDB))
+    return E;
+  auto CAS = builtin::createObjectStoreFromUnifiedOnDiskCache(UniDB);
+  if (Error E = CAS->validate(CheckHash))
+    return E;
+  if (Error E = UniDB->validateActionCache())
+    return E;
+  return Error::success();
+}
+
+static Expected<uint64_t> getBootTime() {
+#if __has_include(<sys/sysctl.h>) && defined(KERN_BOOTTIME)
+  struct timeval TV;
+  size_t TVLen = sizeof(TV);
+  int KernBoot[2] = {CTL_KERN, KERN_BOOTTIME};
+  if (sysctl(KernBoot, 2, &TV, &TVLen, nullptr, 0) < 0)
+    return createStringError(llvm::errnoAsErrorCode(),
+                             "failed to get boottime");
+  if (TVLen != sizeof(TV))
+    return createStringError("sysctl kern.boottime unexpected format");
+  return TV.tv_sec;
+#elif defined(__linux__)
+  // Use the mtime for /proc, which is recreated during system boot.
+  // We could also read /proc/stat and search for 'btime'.
+  sys::fs::file_status Status;
+  if (std::error_code EC = sys::fs::status("/proc", Status))
+    return createFileError("/proc", EC);
+  return Status.getLastModificationTime().time_since_epoch().count();
+#else
+  llvm::report_fatal_error("unimplemented");
+#endif
+}
+
+Expected<ValidationResult>
+UnifiedOnDiskCache::validateIfNeeded(StringRef RootPath, StringRef HashName,
+                                     unsigned HashByteSize, bool CheckHash,
+                                     bool AllowRecovery, bool ForceValidation,
+                                     std::optional<StringRef> LLVMCasBinary) {
+  if (std::error_code EC = sys::fs::create_directories(RootPath))
+    return createFileError(RootPath, EC);
+
+  SmallString<256> PathBuf(RootPath);
+  sys::path::append(PathBuf, ValidationFilename);
+  int FD = -1;
+  if (std::error_code EC = sys::fs::openFileForReadWrite(
+          PathBuf, FD, sys::fs::CD_OpenAlways, sys::fs::OF_None))
+    return createFileError(PathBuf, EC);
+  assert(FD != -1);
+
+  sys::fs::file_t File = sys::fs::convertFDToNativeFile(FD);
+  auto CloseFile = make_scope_exit([&]() { sys::fs::closeFile(File); });
+
+  if (std::error_code EC = lockFileThreadSafe(FD, sys::fs::LockKind::Exclusive))
+    return createFileError(PathBuf, EC);
+  auto UnlockFD = make_scope_exit([&]() { unlockFileThreadSafe(FD); });
+
+  SmallString<8> Bytes;
+  if (Error E = sys::fs::readNativeFileToEOF(File, Bytes))
+    return createFileError(PathBuf, std::move(E));
+
+  uint64_t ValidationBootTime = 0;
+  if (!Bytes.empty() &&
+      StringRef(Bytes).trim().getAsInteger(10, ValidationBootTime))
+    return createFileError(PathBuf, errc::illegal_byte_sequence,
+                           "expected integer");
+
+  static uint64_t BootTime = 0;
+  if (BootTime == 0)
+    if (Error E = getBootTime().moveInto(BootTime))
+      return std::move(E);
+
+  bool Recovered = false;
+  bool Skipped = false;
+  std::string LogValidationError;
+
+  if (ValidationBootTime == BootTime && !ForceValidation) {
+    Skipped = true;
+    return ValidationResult::Skipped;
+  }
+
+  // Validate!
+  bool NeedsRecovery = false;
+  Error E =
+      LLVMCasBinary
+          ? validateOutOfProcess(*LLVMCasBinary, RootPath, CheckHash)
+          : validateInProcess(RootPath, HashName, HashByteSize, CheckHash);
+  if (E) {
+    if (AllowRecovery) {
+      consumeError(std::move(E));
+      NeedsRecovery = true;
+    } else {
+      return std::move(E);
+    }
+  }
+
+  if (NeedsRecovery) {
+    sys::path::remove_filename(PathBuf);
+    sys::path::append(PathBuf, "lock");
+
+    int LockFD = -1;
+    if (std::error_code EC = sys::fs::openFileForReadWrite(
+            PathBuf, LockFD, sys::fs::CD_OpenAlways, sys::fs::OF_None))
+      return createFileError(PathBuf, EC);
+    sys::fs::file_t LockFile = sys::fs::convertFDToNativeFile(LockFD);
+    auto CloseLock = make_scope_exit([&]() { sys::fs::closeFile(LockFile); });
+    if (std::error_code EC = tryLockFileThreadSafe(LockFD)) {
+      if (EC == std::errc::no_lock_available)
+        return createFileError(
+            PathBuf, EC,
+            "CAS validation requires exclusive access but CAS was in use");
+      return createFileError(PathBuf, EC);
+    }
+    auto UnlockFD = make_scope_exit([&]() { unlockFileThreadSafe(LockFD); });
+
+    SmallVector<std::string, 4> DBDirs;
+    if (Error E = getAllDBDirs(RootPath, DBDirs))
+      return std::move(E);
+
+    for (StringRef DBDir : DBDirs) {
+      sys::path::remove_filename(PathBuf);
+      sys::path::append(PathBuf, DBDir);
+      std::error_code EC;
+      int Attempt = 0, MaxAttempts = 100;
+      SmallString<128> GCPath;
+      for (; Attempt < MaxAttempts; ++Attempt) {
+        GCPath.assign(RootPath);
+        sys::path::append(GCPath, CorruptPrefix + std::to_string(Attempt) +
+                                      "." + DBDir);
+        EC = sys::fs::rename(PathBuf, GCPath);
+        // Darwin uses ENOTEMPTY. Linux may return either ENOTEMPTY or EEXIST.
+        if (EC != errc::directory_not_empty && EC != errc::file_exists)
+          break;
+      }
+      if (Attempt == MaxAttempts)
+        return createStringError(
+            EC, "rename " + PathBuf +
+                    " failed: too many CAS directories awaiting pruning");
+      if (EC)
+        return createStringError(EC, "rename " + PathBuf + " to " + GCPath +
+                                         " failed: " + EC.message());
+    }
+    Recovered = true;
+  }
+
+  if (ValidationBootTime != BootTime) {
+    // Fix filename in case we have error to report.
+    sys::path::remove_filename(PathBuf);
+    sys::path::append(PathBuf, ValidationFilename);
+    if (std::error_code EC = sys::fs::resize_file(FD, 0))
+      return createFileError(PathBuf, EC);
+    raw_fd_ostream OS(FD, /*shouldClose=*/false);
+    OS.seek(0); // resize does not reset position
+    OS << BootTime << '\n';
+    if (OS.has_error())
+      return createFileError(PathBuf, OS.error());
+  }
+
+  return NeedsRecovery ? ValidationResult::Recovered : ValidationResult::Valid;
+}
+
+Expected<std::unique_ptr<UnifiedOnDiskCache>>
+UnifiedOnDiskCache::open(StringRef RootPath, std::optional<uint64_t> SizeLimit,
+                         StringRef HashName, unsigned HashByteSize,
+                         OnDiskGraphDB::FaultInPolicy FaultInPolicy) {
+  if (std::error_code EC = sys::fs::create_directories(RootPath))
+    return createFileError(RootPath, EC);
+
+  SmallString<256> PathBuf(RootPath);
+  sys::path::append(PathBuf, "lock");
+  int LockFD = -1;
+  if (std::error_code EC = sys::fs::openFileForReadWrite(
+          PathBuf, LockFD, sys::fs::CD_OpenAlways, sys::fs::OF_None))
+    return createFileError(PathBuf, EC);
+  assert(LockFD != -1);
+  // Locking the directory using shared lock, which will prevent other processes
+  // from creating a new chain (essentially while a \p UnifiedOnDiskCache
+  // instance holds a shared lock the storage for the primary directory will
+  // grow unrestricted).
+  if (std::error_code EC =
+          lockFileThreadSafe(LockFD, sys::fs::LockKind::Shared))
+    return createFileError(PathBuf, EC);
+
+  SmallVector<std::string, 4> DBDirs;
+  if (Error E = getAllDBDirs(RootPath, DBDirs))
+    return std::move(E);
+  if (DBDirs.empty())
+    DBDirs.push_back((Twine(DBDirPrefix) + "1").str());
+
+  assert(!DBDirs.empty());
+
+  /// If there is only one directory open databases on it. If there are 2 or
+  /// more directories, get the most recent directories and chain them, with the
+  /// most recent being the primary one. The remaining directories are unused
+  /// data than can be garbage-collected.
+  std::unique_ptr<OnDiskGraphDB> UpstreamGraphDB;
+  std::unique_ptr<OnDiskKeyValueDB> UpstreamKVDB;
+  if (DBDirs.size() > 1) {
+    StringRef UpstreamDir = *(DBDirs.end() - 2);
+    PathBuf = RootPath;
+    sys::path::append(PathBuf, UpstreamDir);
+    if (Error E = OnDiskGraphDB::open(PathBuf, HashName, HashByteSize,
+                                      /*UpstreamDB=*/nullptr, FaultInPolicy)
+                      .moveInto(UpstreamGraphDB))
+      return std::move(E);
+    if (Error E = OnDiskKeyValueDB::open(PathBuf, HashName, HashByteSize,
+                                         /*ValueName=*/"objectid",
+                                         /*ValueSize=*/sizeof(uint64_t))
+                      .moveInto(UpstreamKVDB))
+      return std::move(E);
+  }
+  OnDiskGraphDB *UpstreamGraphDBPtr = UpstreamGraphDB.get();
+
+  StringRef PrimaryDir = *(DBDirs.end() - 1);
+  PathBuf = RootPath;
+  sys::path::append(PathBuf, PrimaryDir);
+  std::unique_ptr<OnDiskGraphDB> PrimaryGraphDB;
+  if (Error E = OnDiskGraphDB::open(PathBuf, HashName, HashByteSize,
+                                    std::move(UpstreamGraphDB), FaultInPolicy)
+                    .moveInto(PrimaryGraphDB))
+    return std::move(E);
+  std::unique_ptr<OnDiskKeyValueDB> PrimaryKVDB;
+  // \p UnifiedOnDiskCache does manual chaining for key-value requests,
+  // including an extra translation step of the value during fault-in.
+  if (Error E = OnDiskKeyValueDB::open(PathBuf, HashName, HashByteSize,
+                                       /*ValueName=*/"objectid",
+                                       /*ValueSize=*/sizeof(uint64_t))
+                    .moveInto(PrimaryKVDB))
+    return std::move(E);
+
+  auto UniDB = std::unique_ptr<UnifiedOnDiskCache>(new UnifiedOnDiskCache());
+  UniDB->RootPath = RootPath;
+  UniDB->SizeLimit = SizeLimit.value_or(0);
+  UniDB->LockFD = LockFD;
+  UniDB->NeedsGarbageCollection = DBDirs.size() > 2;
+  UniDB->PrimaryDBDir = PrimaryDir;
+  UniDB->UpstreamGraphDB = UpstreamGraphDBPtr;
+  UniDB->PrimaryGraphDB = std::move(PrimaryGraphDB);
+  UniDB->UpstreamKVDB = std::move(UpstreamKVDB);
+  UniDB->PrimaryKVDB = std::move(PrimaryKVDB);
+
+  return std::move(UniDB);
+}
+
+void UnifiedOnDiskCache::setSizeLimit(std::optional<uint64_t> SizeLimit) {
+  this->SizeLimit = SizeLimit.value_or(0);
+}
+
+uint64_t UnifiedOnDiskCache::getStorageSize() const {
+  uint64_t TotalSize = getPrimaryStorageSize();
+  if (UpstreamGraphDB)
+    TotalSize += UpstreamGraphDB->getStorageSize();
+  if (UpstreamKVDB)
+    TotalSize += UpstreamKVDB->getStorageSize();
+  return TotalSize;
+}
+
+uint64_t UnifiedOnDiskCache::getPrimaryStorageSize() const {
+  return PrimaryGraphDB->getStorageSize() + PrimaryKVDB->getStorageSize();
+}
+
+bool UnifiedOnDiskCache::hasExceededSizeLimit() const {
+  uint64_t CurSizeLimit = SizeLimit;
+  if (!CurSizeLimit)
+    return false;
+
+  // If the hard limit is beyond 85%, declare above limit and request clean up.
+  unsigned CurrentPrecent =
+      std::max(PrimaryGraphDB->getHardStorageLimitUtilization(),
+               PrimaryKVDB->getHardStorageLimitUtilization());
+  if (CurrentPrecent > 85)
+    return true;
+
+  // We allow each of the directories in the chain to reach up to half the
+  // intended size limit. Check whether the primary directory has exceeded half
+  // the limit or not, in order to decide whether we need to start a new chain.
+  //
+  // We could check the size limit against the sum of sizes of both the primary
+  // and upstream directories but then if the upstream is significantly larger
+  // than the intended limit, it would trigger a new chain to be created before
+  // the primary has reached its own limit. Essentially in such situation we
+  // prefer reclaiming the storage later in order to have more consistent cache
+  // hits behavior.
+  return (CurSizeLimit / 2) < getPrimaryStorageSize();
+}
+
+Error UnifiedOnDiskCache::close(bool CheckSizeLimit) {
+  if (LockFD == -1)
+    return Error::success(); // already closed.
+  auto _1 = make_scope_exit([&]() {
+    assert(LockFD >= 0);
+    sys::fs::file_t LockFile = sys::fs::convertFDToNativeFile(LockFD);
+    sys::fs::closeFile(LockFile);
+    LockFD = -1;
+  });
+
+  bool ExceededSizeLimit = CheckSizeLimit ? hasExceededSizeLimit() : false;
+  PrimaryKVDB.reset();
+  UpstreamKVDB.reset();
+  PrimaryGraphDB.reset();
+  UpstreamGraphDB = nullptr;
+  if (std::error_code EC = unlockFileThreadSafe(LockFD))
+    return createFileError(RootPath, EC);
+
+  if (!ExceededSizeLimit)
+    return Error::success();
+
+  // The primary directory exceeded its intended size limit. Try to get an
+  // exclusive lock in order to create a new primary directory for next time
+  // this \p UnifiedOnDiskCache path is opened.
+
+  if (std::error_code EC = tryLockFileThreadSafe(
+          LockFD, std::chrono::milliseconds(0), sys::fs::LockKind::Exclusive)) {
+    if (EC == errc::no_lock_available)
+      return Error::success(); // couldn't get exclusive lock, give up.
+    return createFileError(RootPath, EC);
+  }
+  auto _2 = make_scope_exit([&]() { unlockFileThreadSafe(LockFD); });
+
+  // Managed to get an exclusive lock which means there are no other open
+  // \p UnifiedOnDiskCache instances for the same path, so we can safely start a
+  // new primary directory. To start a new primary directory we just have to
+  // create a new empty directory with the next consecutive index; since this is
+  // an atomic operation we will leave the top-level directory in a consistent
+  // state even if the process dies during this code-path.
+
+  SmallString<256> PathBuf(RootPath);
+  raw_svector_ostream OS(PathBuf);
+  OS << sys::path::get_separator();
+  getNextDBDirName(PrimaryDBDir, OS);
+  if (std::error_code EC = sys::fs::create_directory(PathBuf))
+    return createFileError(PathBuf, EC);
+
+  NeedsGarbageCollection = true;
+  return Error::success();
+}
+
+UnifiedOnDiskCache::UnifiedOnDiskCache() = default;
+
+UnifiedOnDiskCache::~UnifiedOnDiskCache() { consumeError(close()); }
+
+Error UnifiedOnDiskCache::collectGarbage(StringRef Path) {
+  SmallVector<std::string, 4> DBDirs;
+  if (Error E = getAllGarbageDirs(Path, DBDirs))
+    return E;
+
+  SmallString<256> PathBuf(Path);
+  for (StringRef UnusedSubDir : DBDirs) {
+    sys::path::append(PathBuf, UnusedSubDir);
+    if (std::error_code EC = sys::fs::remove_directories(PathBuf))
+      return createFileError(PathBuf, EC);
+    sys::path::remove_filename(PathBuf);
+  }
+  return Error::success();
+}
+
+Error UnifiedOnDiskCache::collectGarbage() { return collectGarbage(RootPath); }
diff --git a/llvm/unittests/CAS/ActionCacheTest.cpp b/llvm/unittests/CAS/ActionCacheTest.cpp
index db67e30ca203b..692da230b6e09 100644
--- a/llvm/unittests/CAS/ActionCacheTest.cpp
+++ b/llvm/unittests/CAS/ActionCacheTest.cpp
@@ -21,7 +21,7 @@ using namespace llvm;
 using namespace llvm::cas;
 
 TEST_P(CASTest, ActionCacheHit) {
-  std::shared_ptr<ObjectStore> CAS = createObjectStore();
+  std::unique_ptr<ObjectStore> CAS = createObjectStore();
   std::unique_ptr<ActionCache> Cache = createActionCache();
 
   std::optional<ObjectProxy> ID;
@@ -36,7 +36,7 @@ TEST_P(CASTest, ActionCacheHit) {
 }
 
 TEST_P(CASTest, ActionCacheMiss) {
-  std::shared_ptr<ObjectStore> CAS = createObjectStore();
+  std::unique_ptr<ObjectStore> CAS = createObjectStore();
   std::unique_ptr<ActionCache> Cache = createActionCache();
 
   std::optional<ObjectProxy> ID1, ID2;
@@ -59,7 +59,7 @@ TEST_P(CASTest, ActionCacheMiss) {
 }
 
 TEST_P(CASTest, ActionCacheRewrite) {
-  std::shared_ptr<ObjectStore> CAS = createObjectStore();
+  std::unique_ptr<ObjectStore> CAS = createObjectStore();
   std::unique_ptr<ActionCache> Cache = createActionCache();
 
   std::optional<ObjectProxy> ID1, ID2;
diff --git a/llvm/unittests/CAS/BuiltinUnifiedCASDatabasesTest.cpp b/llvm/unittests/CAS/BuiltinUnifiedCASDatabasesTest.cpp
new file mode 100644
index 0000000000000..19522e9372d85
--- /dev/null
+++ b/llvm/unittests/CAS/BuiltinUnifiedCASDatabasesTest.cpp
@@ -0,0 +1,67 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CAS/BuiltinUnifiedCASDatabases.h"
+#include "CASTestConfig.h"
+#include "llvm/CAS/ActionCache.h"
+#include "llvm/CAS/ObjectStore.h"
+#include "llvm/Testing/Support/Error.h"
+#include "llvm/Testing/Support/SupportHelpers.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+using namespace llvm::cas;
+
+TEST_F(OnDiskCASTest, UnifiedCASMaterializationCheckPreventsGarbageCollection) {
+  unittest::TempDir Temp("on-disk-unified-cas", /*Unique=*/true);
+
+  auto WithCAS = [&](llvm::function_ref<void(ObjectStore &)> Action) {
+    std::pair<std::unique_ptr<ObjectStore>, std::unique_ptr<ActionCache>> DBs;
+    ASSERT_THAT_ERROR(
+        createOnDiskUnifiedCASDatabases(Temp.path()).moveInto(DBs),
+        Succeeded());
+    ObjectStore &CAS = *DBs.first;
+    ASSERT_THAT_ERROR(CAS.setSizeLimit(1), Succeeded());
+    Action(CAS);
+  };
+
+  std::optional<CASID> ID;
+
+  // Create an object in the CAS.
+  WithCAS([&ID](ObjectStore &CAS) {
+    std::optional<ObjectRef> Ref;
+    ASSERT_THAT_ERROR(CAS.store({}, "blah").moveInto(Ref), Succeeded());
+    ASSERT_TRUE(Ref.has_value());
+
+    ID = CAS.getID(*Ref);
+  });
+
+  // Check materialization and prune the storage.
+  WithCAS([&ID](ObjectStore &CAS) {
+    std::optional<ObjectRef> Ref = CAS.getReference(*ID);
+    ASSERT_TRUE(Ref.has_value());
+
+    std::optional<bool> IsMaterialized;
+    ASSERT_THAT_ERROR(CAS.isMaterialized(*Ref).moveInto(IsMaterialized),
+                      Succeeded());
+    ASSERT_TRUE(IsMaterialized);
+
+    ASSERT_THAT_ERROR(CAS.pruneStorageData(), Succeeded());
+  });
+
+  // Verify that the previous materialization check kept the object in the CAS.
+  WithCAS([&ID](ObjectStore &CAS) {
+    std::optional<ObjectRef> Ref = CAS.getReference(*ID);
+    ASSERT_TRUE(Ref.has_value());
+
+    std::optional<bool> IsMaterialized;
+    ASSERT_THAT_ERROR(CAS.isMaterialized(*Ref).moveInto(IsMaterialized),
+                      Succeeded());
+    ASSERT_TRUE(IsMaterialized);
+  });
+}
diff --git a/llvm/unittests/CAS/CASTestConfig.cpp b/llvm/unittests/CAS/CASTestConfig.cpp
index 91d0970367ac3..dc1578a50541c 100644
--- a/llvm/unittests/CAS/CASTestConfig.cpp
+++ b/llvm/unittests/CAS/CASTestConfig.cpp
@@ -8,13 +8,20 @@
 
 #include "CASTestConfig.h"
 #include "llvm/CAS/ObjectStore.h"
+#include "llvm/Testing/Support/Error.h"
 #include "gtest/gtest.h"
 
 using namespace llvm;
 using namespace llvm::cas;
 
+namespace llvm::unittest::cas {
+void MockEnv::anchor() {}
+MockEnv::~MockEnv() {}
+} // namespace llvm::unittest::cas
+
 static CASTestingEnv createInMemory(int I) {
-  return CASTestingEnv{createInMemoryCAS(), createInMemoryActionCache()};
+  return CASTestingEnv{createInMemoryCAS(), createInMemoryActionCache(),
+                       nullptr, std::nullopt};
 }
 
 INSTANTIATE_TEST_SUITE_P(InMemoryCAS, CASTest,
@@ -22,7 +29,7 @@ INSTANTIATE_TEST_SUITE_P(InMemoryCAS, CASTest,
 
 #if LLVM_ENABLE_ONDISK_CAS
 namespace llvm::cas::ondisk {
-extern void setMaxMappingSize(uint64_t Size);
+void setMaxMappingSize(uint64_t Size);
 } // namespace llvm::cas::ondisk
 
 void setMaxOnDiskCASMappingSize() {
@@ -30,6 +37,18 @@ void setMaxOnDiskCASMappingSize() {
   std::call_once(
       Flag, [] { llvm::cas::ondisk::setMaxMappingSize(100 * 1024 * 1024); });
 }
+
+CASTestingEnv createOnDisk(int I) {
+  unittest::TempDir Temp("on-disk-cas", /*Unique=*/true);
+  std::unique_ptr<ObjectStore> CAS;
+  EXPECT_THAT_ERROR(createOnDiskCAS(Temp.path()).moveInto(CAS), Succeeded());
+  std::unique_ptr<ActionCache> Cache;
+  EXPECT_THAT_ERROR(createOnDiskActionCache(Temp.path()).moveInto(Cache),
+                    Succeeded());
+  return CASTestingEnv{std::move(CAS), std::move(Cache), nullptr,
+                       std::move(Temp)};
+}
+INSTANTIATE_TEST_SUITE_P(OnDiskCAS, CASTest, ::testing::Values(createOnDisk));
 #else
 void setMaxOnDiskCASMappingSize() {}
 #endif /* LLVM_ENABLE_ONDISK_CAS */
diff --git a/llvm/unittests/CAS/CASTestConfig.h b/llvm/unittests/CAS/CASTestConfig.h
index c08968b95b9cc..27033c93d57bb 100644
--- a/llvm/unittests/CAS/CASTestConfig.h
+++ b/llvm/unittests/CAS/CASTestConfig.h
@@ -6,16 +6,29 @@
 //
 //===----------------------------------------------------------------------===//
 
+#ifndef LLVM_UNITTESTS_CASTESTCONFIG_H
+#define LLVM_UNITTESTS_CASTESTCONFIG_H
+
 #include "llvm/CAS/ActionCache.h"
 #include "llvm/CAS/ObjectStore.h"
+#include "llvm/Testing/Support/SupportHelpers.h"
 #include "gtest/gtest.h"
+#include <memory>
 
-#ifndef LLVM_UNITTESTS_CASTESTCONFIG_H
-#define LLVM_UNITTESTS_CASTESTCONFIG_H
+namespace llvm::unittest::cas {
+class MockEnv {
+  void anchor();
+
+public:
+  virtual ~MockEnv();
+};
+} // namespace llvm::unittest::cas
 
 struct CASTestingEnv {
   std::unique_ptr<llvm::cas::ObjectStore> CAS;
   std::unique_ptr<llvm::cas::ActionCache> Cache;
+  std::unique_ptr<llvm::unittest::cas::MockEnv> Env;
+  std::optional<llvm::unittest::TempDir> Temp;
 };
 
 void setMaxOnDiskCASMappingSize();
@@ -24,26 +37,49 @@ void setMaxOnDiskCASMappingSize();
 class OnDiskCASTest : public ::testing::Test {
 protected:
   void SetUp() override {
+#if !LLVM_ENABLE_ONDISK_CAS
+    GTEST_SKIP() << "OnDiskCAS is not enabled";
+#endif
     // Use a smaller database size for testing to conserve disk space.
     setMaxOnDiskCASMappingSize();
   }
 };
 
+// Parametered test fixture for ObjectStore and ActionCache tests.
 class CASTest
     : public testing::TestWithParam<std::function<CASTestingEnv(int)>> {
 protected:
   std::optional<int> NextCASIndex;
 
+  llvm::SmallVector<llvm::unittest::TempDir> Dirs;
+
+  llvm::SmallVector<std::unique_ptr<llvm::unittest::cas::MockEnv>> Envs;
+
   std::unique_ptr<llvm::cas::ObjectStore> createObjectStore() {
     auto TD = GetParam()(++(*NextCASIndex));
+    if (TD.Temp)
+      Dirs.push_back(std::move(*TD.Temp));
+    if (TD.Env)
+      Envs.emplace_back(std::move(TD.Env));
     return std::move(TD.CAS);
   }
   std::unique_ptr<llvm::cas::ActionCache> createActionCache() {
     auto TD = GetParam()(++(*NextCASIndex));
+    if (TD.Temp)
+      Dirs.push_back(std::move(*TD.Temp));
+    if (TD.Env)
+      Envs.emplace_back(std::move(TD.Env));
     return std::move(TD.Cache);
   }
-  void SetUp() { NextCASIndex = 0; }
-  void TearDown() { NextCASIndex = std::nullopt; }
+  void SetUp() {
+    NextCASIndex = 0;
+    setMaxOnDiskCASMappingSize();
+  }
+  void TearDown() {
+    NextCASIndex = std::nullopt;
+    Dirs.clear();
+    Envs.clear();
+  }
 };
 
 #endif
diff --git a/llvm/unittests/CAS/CMakeLists.txt b/llvm/unittests/CAS/CMakeLists.txt
index da469f7fccb5a..91e49be770745 100644
--- a/llvm/unittests/CAS/CMakeLists.txt
+++ b/llvm/unittests/CAS/CMakeLists.txt
@@ -1,9 +1,11 @@
 set(ONDISK_CAS_TEST_SOURCES
+  BuiltinUnifiedCASDatabasesTest.cpp
   OnDiskGraphDBTest.cpp
   OnDiskDataAllocatorTest.cpp
   OnDiskKeyValueDBTest.cpp
   OnDiskTrieRawHashMapTest.cpp
   ProgramTest.cpp
+  UnifiedOnDiskCacheTest.cpp
   )
 
 set(LLVM_OPTIONAL_SOURCES
diff --git a/llvm/unittests/CAS/ObjectStoreTest.cpp b/llvm/unittests/CAS/ObjectStoreTest.cpp
index 54083fdb408f6..b43ae33d74127 100644
--- a/llvm/unittests/CAS/ObjectStoreTest.cpp
+++ b/llvm/unittests/CAS/ObjectStoreTest.cpp
@@ -1,4 +1,4 @@
-//===- ObjectStoreTest.cpp ------------------------------------------------===//
+//===----------------------------------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -76,7 +76,7 @@ multiline text multiline text multiline text multiline text multiline text)",
 
   // Run validation on all CASIDs.
   for (int I = 0, E = IDs.size(); I != E; ++I)
-    ASSERT_THAT_ERROR(CAS1->validate(IDs[I]), Succeeded());
+    ASSERT_THAT_ERROR(CAS1->validateObject(IDs[I]), Succeeded());
 
   // Check that the blobs can be retrieved multiple times.
   for (int I = 0, E = IDs.size(); I != E; ++I) {
@@ -120,15 +120,15 @@ TEST_P(CASTest, BlobsBig) {
     std::optional<CASID> ID2;
     ASSERT_THAT_ERROR(CAS->createProxy({}, String1).moveInto(ID1), Succeeded());
     ASSERT_THAT_ERROR(CAS->createProxy({}, String1).moveInto(ID2), Succeeded());
-    ASSERT_THAT_ERROR(CAS->validate(*ID1), Succeeded());
-    ASSERT_THAT_ERROR(CAS->validate(*ID2), Succeeded());
+    ASSERT_THAT_ERROR(CAS->validateObject(*ID1), Succeeded());
+    ASSERT_THAT_ERROR(CAS->validateObject(*ID2), Succeeded());
     ASSERT_EQ(ID1, ID2);
 
     String1.append(String2);
     ASSERT_THAT_ERROR(CAS->createProxy({}, String2).moveInto(ID1), Succeeded());
     ASSERT_THAT_ERROR(CAS->createProxy({}, String2).moveInto(ID2), Succeeded());
-    ASSERT_THAT_ERROR(CAS->validate(*ID1), Succeeded());
-    ASSERT_THAT_ERROR(CAS->validate(*ID2), Succeeded());
+    ASSERT_THAT_ERROR(CAS->validateObject(*ID1), Succeeded());
+    ASSERT_THAT_ERROR(CAS->validateObject(*ID2), Succeeded());
     ASSERT_EQ(ID1, ID2);
     String2.append(String1);
   }
@@ -176,10 +176,11 @@ multiline text multiline text multiline text multiline text multiline text)",
 
     // Check basic printing of IDs.
     IDs.push_back(CAS1->getID(*Node));
-    auto ID = CAS1->getID(Nodes.back());
-    EXPECT_EQ(ID.toString(), IDs.back().toString());
-    EXPECT_EQ(*Node, Nodes.back());
-    EXPECT_EQ(ID, IDs.back());
+    EXPECT_EQ(IDs.back().toString(), IDs.back().toString());
+    EXPECT_EQ(Nodes.front(), Nodes.front());
+    EXPECT_EQ(Nodes.back(), Nodes.back());
+    EXPECT_EQ(IDs.front(), IDs.front());
+    EXPECT_EQ(IDs.back(), IDs.back());
     if (Nodes.size() <= 1)
       continue;
     EXPECT_NE(Nodes.front(), Nodes.back());
@@ -266,7 +267,7 @@ TEST_P(CASTest, NodesBig) {
   }
 
   for (auto ID : CreatedNodes)
-    ASSERT_THAT_ERROR(CAS->validate(CAS->getID(ID)), Succeeded());
+    ASSERT_THAT_ERROR(CAS->validateObject(CAS->getID(ID)), Succeeded());
 }
 
 #if LLVM_ENABLE_THREADS
@@ -332,17 +333,124 @@ static void testBlobsParallel1(ObjectStore &CAS, uint64_t BlobSize) {
 }
 
 TEST_P(CASTest, BlobsParallel) {
-  std::shared_ptr<ObjectStore> CAS = createObjectStore();
+  std::unique_ptr<ObjectStore> CAS = createObjectStore();
   uint64_t Size = 1ULL * 1024;
   ASSERT_NO_FATAL_FAILURE(testBlobsParallel1(*CAS, Size));
 }
 
 #ifdef EXPENSIVE_CHECKS
 TEST_P(CASTest, BlobsBigParallel) {
-  std::shared_ptr<ObjectStore> CAS = createObjectStore();
+  std::unique_ptr<ObjectStore> CAS = createObjectStore();
   // 100k is large enough to be standalone files in our on-disk cas.
   uint64_t Size = 100ULL * 1024;
   ASSERT_NO_FATAL_FAILURE(testBlobsParallel1(*CAS, Size));
 }
 #endif // EXPENSIVE_CHECKS
+
+#ifndef _WIN32 // create_link won't work for directories on Windows
+TEST_F(OnDiskCASTest, OnDiskCASBlobsParallelMultiCAS) {
+  // This test intentionally uses symlinked paths to the same CAS to subvert the
+  // shared memory mappings that would normally be created within a single
+  // process. This breaks the lock file guarantees, so we must be careful not
+  // to create or destroy the CAS objects concurrently, which is when the locks
+  // are normally important.
+  unittest::TempDir Temp("on-disk-cas", /*Unique=*/true);
+  ASSERT_EQ(sys::fs::create_directory(Temp.path("real_cas")),
+            std::error_code());
+  ASSERT_EQ(sys::fs::create_link("real_cas", Temp.path("sym_cas1")),
+            std::error_code());
+  ASSERT_EQ(sys::fs::create_link("real_cas", Temp.path("sym_cas2")),
+            std::error_code());
+  ASSERT_EQ(sys::fs::create_link("real_cas", Temp.path("sym_cas3")),
+            std::error_code());
+
+  std::unique_ptr<ObjectStore> CAS1, CAS2, CAS3, CAS4;
+  ASSERT_THAT_ERROR(createOnDiskCAS(Temp.path("real_cas")).moveInto(CAS1),
+                    Succeeded());
+  ASSERT_THAT_ERROR(createOnDiskCAS(Temp.path("sym_cas1")).moveInto(CAS2),
+                    Succeeded());
+  ASSERT_THAT_ERROR(createOnDiskCAS(Temp.path("sym_cas2")).moveInto(CAS3),
+                    Succeeded());
+  ASSERT_THAT_ERROR(createOnDiskCAS(Temp.path("sym_cas3")).moveInto(CAS4),
+                    Succeeded());
+
+  uint64_t Size = 1ULL * 1024;
+  ASSERT_NO_FATAL_FAILURE(testBlobsParallel(*CAS1, *CAS2, *CAS3, *CAS4, Size));
+}
+
+TEST_F(OnDiskCASTest, OnDiskCASBlobsBigParallelMultiCAS) {
+  // See comment in BlobsParallelMultiCAS.
+  unittest::TempDir Temp("on-disk-cas", /*Unique=*/true);
+  ASSERT_EQ(sys::fs::create_directory(Temp.path("real_cas")),
+            std::error_code());
+  ASSERT_EQ(sys::fs::create_link("real_cas", Temp.path("sym_cas1")),
+            std::error_code());
+  ASSERT_EQ(sys::fs::create_link("real_cas", Temp.path("sym_cas2")),
+            std::error_code());
+  ASSERT_EQ(sys::fs::create_link("real_cas", Temp.path("sym_cas3")),
+            std::error_code());
+
+  std::unique_ptr<ObjectStore> CAS1, CAS2, CAS3, CAS4;
+  ASSERT_THAT_ERROR(createOnDiskCAS(Temp.path("real_cas")).moveInto(CAS1),
+                    Succeeded());
+  ASSERT_THAT_ERROR(createOnDiskCAS(Temp.path("sym_cas1")).moveInto(CAS2),
+                    Succeeded());
+  ASSERT_THAT_ERROR(createOnDiskCAS(Temp.path("sym_cas2")).moveInto(CAS3),
+                    Succeeded());
+  ASSERT_THAT_ERROR(createOnDiskCAS(Temp.path("sym_cas3")).moveInto(CAS4),
+                    Succeeded());
+
+  // 100k is large enough to be standalone files in our on-disk cas.
+  uint64_t Size = 100ULL * 1024;
+  ASSERT_NO_FATAL_FAILURE(testBlobsParallel(*CAS1, *CAS2, *CAS3, *CAS4, Size));
+}
+#endif // _WIN32
 #endif // LLVM_ENABLE_THREADS
+
+TEST_F(OnDiskCASTest, OnDiskCASDiskSize) {
+  unittest::TempDir Temp("on-disk-cas", /*Unique=*/true);
+  std::unique_ptr<ObjectStore> CAS;
+  ASSERT_THAT_ERROR(createOnDiskCAS(Temp.path()).moveInto(CAS), Succeeded());
+
+  uint64_t MaxSize = 100 * 1024 * 1024;
+
+  // Check that we map the files to the correct size.
+  auto CheckFileSizes = [&](bool Mapped) {
+    bool FoundIndex = false, FoundData = false;
+    std::error_code EC;
+    for (sys::fs::directory_iterator I(Temp.path(), EC), E; I != E && !EC;
+         I.increment(EC)) {
+      StringRef Filename = sys::path::filename(I->path());
+      if (Filename.starts_with("index.") && !Filename.ends_with(".shared")) {
+        FoundIndex = true;
+        ASSERT_TRUE(I->status());
+        if (Mapped)
+          EXPECT_EQ(I->status()->getSize(), MaxSize);
+        else
+          EXPECT_LT(I->status()->getSize(), MaxSize);
+      }
+      if (Filename.starts_with("data.") && !Filename.ends_with(".shared")) {
+        FoundData = true;
+        ASSERT_TRUE(I->status());
+        if (Mapped)
+          EXPECT_EQ(I->status()->getSize(), MaxSize);
+        else
+          EXPECT_LT(I->status()->getSize(), MaxSize);
+      }
+    }
+    ASSERT_TRUE(FoundIndex);
+    ASSERT_TRUE(FoundData);
+  };
+
+  // Check that we have the full mapping size when the CAS is open.
+  CheckFileSizes(/*Mapped=*/true);
+  CAS.reset();
+  // Check that the CAS is shrunk to a smaller size.
+  CheckFileSizes(/*Mapped=*/false);
+
+  // Repeat the checks when starting from an existing CAS.
+  ASSERT_THAT_ERROR(createOnDiskCAS(Temp.path()).moveInto(CAS), Succeeded());
+  CheckFileSizes(/*Mapped=*/true);
+  CAS.reset();
+  CheckFileSizes(/*Mapped=*/false);
+}
diff --git a/llvm/unittests/CAS/UnifiedOnDiskCacheTest.cpp b/llvm/unittests/CAS/UnifiedOnDiskCacheTest.cpp
new file mode 100644
index 0000000000000..e25288a26eb92
--- /dev/null
+++ b/llvm/unittests/CAS/UnifiedOnDiskCacheTest.cpp
@@ -0,0 +1,191 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CAS/UnifiedOnDiskCache.h"
+#include "CASTestConfig.h"
+#include "OnDiskCommonUtils.h"
+#include "llvm/Testing/Support/Error.h"
+#include "llvm/Testing/Support/SupportHelpers.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+using namespace llvm::cas;
+using namespace llvm::cas::ondisk;
+using namespace llvm::unittest::cas;
+
+/// Visits all the files of a directory recursively and returns the sum of their
+/// sizes.
+static Expected<size_t> countFileSizes(StringRef Path) {
+  size_t TotalSize = 0;
+  std::error_code EC;
+  for (sys::fs::directory_iterator DirI(Path, EC), DirE; !EC && DirI != DirE;
+       DirI.increment(EC)) {
+    if (DirI->type() == sys::fs::file_type::directory_file) {
+      Expected<size_t> Subsize = countFileSizes(DirI->path());
+      if (!Subsize)
+        return Subsize.takeError();
+      TotalSize += *Subsize;
+      continue;
+    }
+    ErrorOr<sys::fs::basic_file_status> Stat = DirI->status();
+    if (!Stat)
+      return createFileError(DirI->path(), Stat.getError());
+    TotalSize += Stat->getSize();
+  }
+  if (EC)
+    return createFileError(Path, EC);
+  return TotalSize;
+}
+
+TEST_F(OnDiskCASTest, UnifiedOnDiskCacheTest) {
+  unittest::TempDir Temp("ondisk-unified", /*Unique=*/true);
+  std::unique_ptr<UnifiedOnDiskCache> UniDB;
+
+  const uint64_t SizeLimit = 1024ull * 64;
+  auto reopenDB = [&]() {
+    UniDB.reset();
+    ASSERT_THAT_ERROR(UnifiedOnDiskCache::open(Temp.path(), SizeLimit, "blake3",
+                                               sizeof(HashType))
+                          .moveInto(UniDB),
+                      Succeeded());
+  };
+
+  reopenDB();
+
+  HashType RootHash;
+  HashType OtherHash;
+  HashType Key1Hash;
+  HashType Key2Hash;
+  {
+    OnDiskGraphDB &DB = UniDB->getGraphDB();
+    std::optional<ObjectID> ID1;
+    ASSERT_THAT_ERROR(store(DB, "1", {}).moveInto(ID1), Succeeded());
+    std::optional<ObjectID> ID2;
+    ASSERT_THAT_ERROR(store(DB, "2", {}).moveInto(ID2), Succeeded());
+    std::optional<ObjectID> IDRoot;
+    ASSERT_THAT_ERROR(store(DB, "root", {*ID1, *ID2}).moveInto(IDRoot),
+                      Succeeded());
+    ArrayRef<uint8_t> Digest = DB.getDigest(*IDRoot);
+    ASSERT_EQ(Digest.size(), RootHash.size());
+    llvm::copy(Digest, RootHash.data());
+
+    std::optional<ObjectID> IDOther;
+    ASSERT_THAT_ERROR(store(DB, "other", {}).moveInto(IDOther), Succeeded());
+    Digest = DB.getDigest(*IDOther);
+    ASSERT_EQ(Digest.size(), OtherHash.size());
+    llvm::copy(Digest, OtherHash.data());
+
+    Key1Hash = digest("key1");
+    std::optional<ObjectID> Val;
+    ASSERT_THAT_ERROR(UniDB->KVPut(Key1Hash, *IDRoot).moveInto(Val),
+                      Succeeded());
+    EXPECT_EQ(IDRoot, Val);
+
+    Key2Hash = digest("key2");
+    std::optional<ObjectID> KeyID;
+    ASSERT_THAT_ERROR(DB.getReference(Key2Hash).moveInto(KeyID), Succeeded());
+    ASSERT_THAT_ERROR(UniDB->KVPut(*KeyID, *ID1).moveInto(Val), Succeeded());
+  }
+
+  auto checkTree = [&](const HashType &Digest, StringRef ExpectedTree) {
+    OnDiskGraphDB &DB = UniDB->getGraphDB();
+    std::optional<ObjectID> ID;
+    ASSERT_THAT_ERROR(DB.getReference(Digest).moveInto(ID), Succeeded());
+    std::string PrintedTree;
+    raw_string_ostream OS(PrintedTree);
+    ASSERT_THAT_ERROR(printTree(DB, *ID, OS), Succeeded());
+    EXPECT_EQ(PrintedTree, ExpectedTree);
+  };
+  auto checkRootTree = [&]() {
+    return checkTree(RootHash, "root\n  1\n  2\n");
+  };
+
+  auto checkKey = [&](const HashType &Key, StringRef ExpectedData) {
+    OnDiskGraphDB &DB = UniDB->getGraphDB();
+    std::optional<ObjectID> Val;
+    ASSERT_THAT_ERROR(UniDB->KVGet(Key).moveInto(Val), Succeeded());
+    ASSERT_TRUE(Val.has_value());
+    std::optional<ondisk::ObjectHandle> Obj;
+    ASSERT_THAT_ERROR(DB.load(*Val).moveInto(Obj), Succeeded());
+    EXPECT_EQ(toStringRef(DB.getObjectData(*Obj)), ExpectedData);
+  };
+
+  checkRootTree();
+  checkTree(OtherHash, "other\n");
+  checkKey(Key1Hash, "root");
+  checkKey(Key2Hash, "1");
+
+  auto storeBigObject = [&](unsigned Index) {
+    SmallString<1000> Buf;
+    Buf.append(970, 'a');
+    raw_svector_ostream(Buf) << Index;
+    std::optional<ObjectID> ID;
+    ASSERT_THAT_ERROR(store(UniDB->getGraphDB(), Buf, {}).moveInto(ID),
+                      Succeeded());
+  };
+
+  uint64_t PrevStoreSize = UniDB->getStorageSize();
+  unsigned Index = 0;
+  while (!UniDB->hasExceededSizeLimit()) {
+    storeBigObject(Index++);
+  }
+  EXPECT_GT(UniDB->getStorageSize(), PrevStoreSize);
+  UniDB->setSizeLimit(SizeLimit * 2);
+  EXPECT_FALSE(UniDB->hasExceededSizeLimit());
+  UniDB->setSizeLimit(SizeLimit);
+  EXPECT_TRUE(UniDB->hasExceededSizeLimit());
+
+  reopenDB();
+
+  EXPECT_FALSE(UniDB->hasExceededSizeLimit());
+  EXPECT_FALSE(UniDB->needsGarbageCollection());
+
+  checkRootTree();
+  checkKey(Key1Hash, "root");
+
+  while (!UniDB->hasExceededSizeLimit()) {
+    storeBigObject(Index++);
+  }
+  PrevStoreSize = UniDB->getStorageSize();
+  ASSERT_THAT_ERROR(UniDB->close(), Succeeded());
+  EXPECT_TRUE(UniDB->needsGarbageCollection());
+
+  reopenDB();
+  EXPECT_TRUE(UniDB->needsGarbageCollection());
+
+  std::optional<size_t> DirSizeBefore;
+  ASSERT_THAT_ERROR(countFileSizes(Temp.path()).moveInto(DirSizeBefore),
+                    Succeeded());
+
+  ASSERT_THAT_ERROR(UnifiedOnDiskCache::collectGarbage(Temp.path()),
+                    Succeeded());
+
+  std::optional<size_t> DirSizeAfter;
+  ASSERT_THAT_ERROR(countFileSizes(Temp.path()).moveInto(DirSizeAfter),
+                    Succeeded());
+  EXPECT_LT(*DirSizeAfter, *DirSizeBefore);
+
+  reopenDB();
+  EXPECT_FALSE(UniDB->needsGarbageCollection());
+
+  checkRootTree();
+  checkKey(Key1Hash, "root");
+
+  EXPECT_LT(UniDB->getStorageSize(), PrevStoreSize);
+
+  // 'Other' tree and 'Key2' got garbage-collected.
+  {
+    OnDiskGraphDB &DB = UniDB->getGraphDB();
+    std::optional<ObjectID> ID;
+    ASSERT_THAT_ERROR(DB.getReference(OtherHash).moveInto(ID), Succeeded());
+    EXPECT_FALSE(DB.containsObject(*ID));
+    std::optional<ObjectID> Val;
+    ASSERT_THAT_ERROR(UniDB->KVGet(Key2Hash).moveInto(Val), Succeeded());
+    EXPECT_FALSE(Val.has_value());
+  }
+}



More information about the llvm-commits mailing list