[llvm] 6747ea0 - [CAS] Add UnifiedOnDiskCache and OnDiskCAS (#114103)

via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 3 09:50:33 PST 2025


Author: Steven Wu
Date: 2025-11-03T09:50:28-08:00
New Revision: 6747ea050dfc917b78c205102d9545902140ea2d

URL: https://github.com/llvm/llvm-project/commit/6747ea050dfc917b78c205102d9545902140ea2d
DIFF: https://github.com/llvm/llvm-project/commit/6747ea050dfc917b78c205102d9545902140ea2d.diff

LOG: [CAS] Add UnifiedOnDiskCache and OnDiskCAS (#114103)

Add a new abstraction layer UnifiedOnDiskCache that adds new functions
of disk space management and data validation that builds on top of
OnDiskGraphDB and OnDiskKeyValueDB.

Build upon UnifiedOnDiskCache, it is OnDiskCAS that implements
ObjectStore and ActionCache interface for LLVM tools to interact with
CAS storage.

Added: 
    llvm/include/llvm/CAS/BuiltinUnifiedCASDatabases.h
    llvm/include/llvm/CAS/UnifiedOnDiskCache.h
    llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp
    llvm/lib/CAS/OnDiskCAS.cpp
    llvm/lib/CAS/UnifiedOnDiskCache.cpp
    llvm/unittests/CAS/BuiltinUnifiedCASDatabasesTest.cpp
    llvm/unittests/CAS/UnifiedOnDiskCacheTest.cpp

Modified: 
    llvm/include/llvm/CAS/ActionCache.h
    llvm/include/llvm/CAS/ObjectStore.h
    llvm/include/llvm/CAS/OnDiskGraphDB.h
    llvm/include/llvm/CAS/OnDiskKeyValueDB.h
    llvm/lib/CAS/ActionCaches.cpp
    llvm/lib/CAS/BuiltinCAS.cpp
    llvm/lib/CAS/BuiltinCAS.h
    llvm/lib/CAS/CMakeLists.txt
    llvm/lib/CAS/InMemoryCAS.cpp
    llvm/lib/CAS/ObjectStore.cpp
    llvm/lib/CAS/OnDiskGraphDB.cpp
    llvm/lib/CAS/OnDiskKeyValueDB.cpp
    llvm/unittests/CAS/ActionCacheTest.cpp
    llvm/unittests/CAS/CASTestConfig.cpp
    llvm/unittests/CAS/CASTestConfig.h
    llvm/unittests/CAS/CMakeLists.txt
    llvm/unittests/CAS/ObjectStoreTest.cpp
    llvm/unittests/CAS/OnDiskCommonUtils.h
    llvm/unittests/CAS/OnDiskGraphDBTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CAS/ActionCache.h b/llvm/include/llvm/CAS/ActionCache.h
index 69ee4dde1974a..7f5b11223c54d 100644
--- a/llvm/include/llvm/CAS/ActionCache.h
+++ b/llvm/include/llvm/CAS/ActionCache.h
@@ -75,6 +75,9 @@ class ActionCache {
                    CanBeDistributed);
   }
 
+  /// Validate the ActionCache contents.
+  virtual Error validate() const = 0;
+
   virtual ~ActionCache() = default;
 
 protected:
@@ -97,6 +100,9 @@ class ActionCache {
 /// Create an action cache in memory.
 std::unique_ptr<ActionCache> createInMemoryActionCache();
 
+/// Create an action cache on disk.
+Expected<std::unique_ptr<ActionCache>> createOnDiskActionCache(StringRef Path);
+
 } // end namespace llvm::cas
 
 #endif // LLVM_CAS_ACTIONCACHE_H

diff  --git a/llvm/include/llvm/CAS/BuiltinUnifiedCASDatabases.h b/llvm/include/llvm/CAS/BuiltinUnifiedCASDatabases.h
new file mode 100644
index 0000000000000..6c165c421b168
--- /dev/null
+++ b/llvm/include/llvm/CAS/BuiltinUnifiedCASDatabases.h
@@ -0,0 +1,59 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CAS_BUILTINUNIFIEDCASDATABASES_H
+#define LLVM_CAS_BUILTINUNIFIEDCASDATABASES_H
+
+#include "llvm/Support/Error.h"
+
+namespace llvm::cas {
+
+class ActionCache;
+class ObjectStore;
+
+/// Create on-disk \c ObjectStore and \c ActionCache instances based on
+/// \c ondisk::UnifiedOnDiskCache, with built-in hashing.
+Expected<std::pair<std::unique_ptr<ObjectStore>, std::unique_ptr<ActionCache>>>
+createOnDiskUnifiedCASDatabases(StringRef Path);
+
+/// Represents the result of validating the contents using
+/// \c validateOnDiskUnifiedCASDatabasesIfNeeded.
+///
+/// Note: invalid results are handled as an \c Error.
+enum class ValidationResult {
+  /// The data is already valid.
+  Valid,
+  /// The data was invalid, but was recovered.
+  Recovered,
+  /// Validation was skipped, as it was not needed.
+  Skipped,
+};
+
+/// Validate the data in \p Path, if needed to ensure correctness.
+///
+/// \param Path directory for the on-disk database.
+/// \param CheckHash Whether to validate hashes match the data.
+/// \param AllowRecovery Whether to automatically recover from invalid data by
+/// marking the files for garbage collection.
+/// \param ForceValidation Whether to force validation to occur even if it
+/// should not be necessary.
+/// \param LLVMCasBinaryPath If provided, validation is performed out-of-process
+/// using the given \c llvm-cas executable which protects against crashes
+/// during validation. Otherwise validation is performed in-process.
+///
+/// \returns \c Valid if the data is already valid, \c Recovered if data
+/// was invalid but has been cleared, \c Skipped if validation is not needed,
+/// or an \c Error if validation cannot be performed or if the data is left
+/// in an invalid state because \p AllowRecovery is false.
+Expected<ValidationResult> validateOnDiskUnifiedCASDatabasesIfNeeded(
+    StringRef Path, bool CheckHash, bool AllowRecovery, bool ForceValidation,
+    std::optional<StringRef> LLVMCasBinaryPath);
+
+} // namespace llvm::cas
+
+#endif // LLVM_CAS_BUILTINUNIFIEDCASDATABASES_H

diff  --git a/llvm/include/llvm/CAS/ObjectStore.h b/llvm/include/llvm/CAS/ObjectStore.h
index 6db5dd3904095..29950fe9d9029 100644
--- a/llvm/include/llvm/CAS/ObjectStore.h
+++ b/llvm/include/llvm/CAS/ObjectStore.h
@@ -5,6 +5,11 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the declaration of the ObjectStore class.
+///
+//===----------------------------------------------------------------------===//
 
 #ifndef LLVM_CAS_OBJECTSTORE_H
 #define LLVM_CAS_OBJECTSTORE_H
@@ -111,7 +116,10 @@ class ObjectStore {
   virtual Expected<bool> isMaterialized(ObjectRef Ref) const = 0;
 
   /// Validate the underlying object referred by CASID.
-  virtual Error validate(const CASID &ID) = 0;
+  virtual Error validateObject(const CASID &ID) = 0;
+
+  /// Validate the entire ObjectStore.
+  virtual Error validate(bool CheckHash) const = 0;
 
 protected:
   /// Load the object referenced by \p Ref.
@@ -215,9 +223,39 @@ class ObjectStore {
     return Data.size();
   }
 
+  /// Set the size for limiting growth of on-disk storage. This has an effect
+  /// for when the instance is closed.
+  ///
+  /// Implementations may leave this unimplemented.
+  virtual Error setSizeLimit(std::optional<uint64_t> SizeLimit) {
+    return Error::success();
+  }
+
+  /// \returns the storage size of the on-disk CAS data.
+  ///
+  /// Implementations that don't have an implementation for this should return
+  /// \p std::nullopt.
+  virtual Expected<std::optional<uint64_t>> getStorageSize() const {
+    return std::nullopt;
+  }
+
+  /// Prune local storage to reduce its size according to the desired size
+  /// limit. Pruning can happen concurrently with other operations.
+  ///
+  /// Implementations may leave this unimplemented.
+  virtual Error pruneStorageData() { return Error::success(); }
+
   /// Validate the whole node tree.
   Error validateTree(ObjectRef Ref);
 
+  /// Import object from another CAS. This will import the full tree from the
+  /// other CAS.
+  Expected<ObjectRef> importObject(ObjectStore &Upstream, ObjectRef Other);
+
+  /// Print the ObjectStore internals for debugging purpose.
+  virtual void print(raw_ostream &) const {}
+  void dump() const;
+
   /// Get CASContext
   const CASContext &getContext() const { return Context; }
 
@@ -290,8 +328,15 @@ class ObjectProxy {
   ObjectHandle H;
 };
 
+/// Create an in memory CAS.
 std::unique_ptr<ObjectStore> createInMemoryCAS();
 
+/// \returns true if \c LLVM_ENABLE_ONDISK_CAS configuration was enabled.
+bool isOnDiskCASEnabled();
+
+/// Create a persistent on-disk path at \p Path.
+Expected<std::unique_ptr<ObjectStore>> createOnDiskCAS(const Twine &Path);
+
 } // namespace cas
 } // namespace llvm
 

diff  --git a/llvm/include/llvm/CAS/OnDiskGraphDB.h b/llvm/include/llvm/CAS/OnDiskGraphDB.h
index 5f0ee0e131c0f..76cc528711b69 100644
--- a/llvm/include/llvm/CAS/OnDiskGraphDB.h
+++ b/llvm/include/llvm/CAS/OnDiskGraphDB.h
@@ -340,13 +340,16 @@ class OnDiskGraphDB {
   /// \param HashByteSize Size for the object digest hash bytes.
   /// \param UpstreamDB Optional on-disk store to be used for faulting-in nodes
   /// if they don't exist in the primary store. The upstream store is only used
-  /// for reading nodes, new nodes are only written to the primary store.
+  /// for reading nodes, new nodes are only written to the primary store. User
+  /// need to make sure \p UpstreamDB outlives current instance of
+  /// OnDiskGraphDB and the common usage is to have an \p UnifiedOnDiskCache to
+  /// manage both.
   /// \param Policy If \p UpstreamDB is provided, controls how nodes are copied
   /// to primary store. This is recorded at creation time and subsequent opens
   /// need to pass the same policy otherwise the \p open will fail.
   static Expected<std::unique_ptr<OnDiskGraphDB>>
   open(StringRef Path, StringRef HashName, unsigned HashByteSize,
-       std::unique_ptr<OnDiskGraphDB> UpstreamDB = nullptr,
+       OnDiskGraphDB *UpstreamDB = nullptr,
        FaultInPolicy Policy = FaultInPolicy::FullTree);
 
   ~OnDiskGraphDB();
@@ -438,8 +441,7 @@ class OnDiskGraphDB {
 
   // Private constructor.
   OnDiskGraphDB(StringRef RootPath, OnDiskTrieRawHashMap Index,
-                OnDiskDataAllocator DataPool,
-                std::unique_ptr<OnDiskGraphDB> UpstreamDB,
+                OnDiskDataAllocator DataPool, OnDiskGraphDB *UpstreamDB,
                 FaultInPolicy Policy);
 
   /// Mapping from hash to object reference.
@@ -459,7 +461,7 @@ class OnDiskGraphDB {
   std::string RootPath;
 
   /// Optional on-disk store to be used for faulting-in nodes.
-  std::unique_ptr<OnDiskGraphDB> UpstreamDB;
+  OnDiskGraphDB *UpstreamDB = nullptr;
 
   /// The policy used to fault in data from upstream.
   FaultInPolicy FIPolicy;

diff  --git a/llvm/include/llvm/CAS/OnDiskKeyValueDB.h b/llvm/include/llvm/CAS/OnDiskKeyValueDB.h
index b762518366c21..17ae52f0307fc 100644
--- a/llvm/include/llvm/CAS/OnDiskKeyValueDB.h
+++ b/llvm/include/llvm/CAS/OnDiskKeyValueDB.h
@@ -19,6 +19,8 @@
 
 namespace llvm::cas::ondisk {
 
+class UnifiedOnDiskCache;
+
 /// An on-disk key-value data store with the following properties:
 /// * Keys are fixed length binary hashes with expected normal distribution.
 /// * Values are buffers of the same size, specified at creation time.
@@ -59,9 +61,13 @@ class OnDiskKeyValueDB {
   /// \param KeySize Size for the key hash bytes.
   /// \param ValueName Identifier name for the values.
   /// \param ValueSize Size for the value bytes.
+  /// \param UnifiedCache An optional UnifiedOnDiskCache that manages the size
+  /// and lifetime of the CAS instance and it must owns current initializing
+  /// KeyValueDB after initialized.
   static Expected<std::unique_ptr<OnDiskKeyValueDB>>
   open(StringRef Path, StringRef HashName, unsigned KeySize,
-       StringRef ValueName, size_t ValueSize);
+       StringRef ValueName, size_t ValueSize,
+       UnifiedOnDiskCache *UnifiedCache = nullptr);
 
   using CheckValueT =
       function_ref<Error(FileOffset Offset, ArrayRef<char> Data)>;
@@ -70,11 +76,14 @@ class OnDiskKeyValueDB {
   Error validate(CheckValueT CheckValue) const;
 
 private:
-  OnDiskKeyValueDB(size_t ValueSize, OnDiskTrieRawHashMap Cache)
-      : ValueSize(ValueSize), Cache(std::move(Cache)) {}
+  OnDiskKeyValueDB(size_t ValueSize, OnDiskTrieRawHashMap Cache,
+                   UnifiedOnDiskCache *UnifiedCache)
+      : ValueSize(ValueSize), Cache(std::move(Cache)),
+        UnifiedCache(UnifiedCache) {}
 
   const size_t ValueSize;
   OnDiskTrieRawHashMap Cache;
+  UnifiedOnDiskCache *UnifiedCache = nullptr;
 };
 
 } // namespace llvm::cas::ondisk

diff  --git a/llvm/include/llvm/CAS/UnifiedOnDiskCache.h b/llvm/include/llvm/CAS/UnifiedOnDiskCache.h
new file mode 100644
index 0000000000000..6e0878a65fe72
--- /dev/null
+++ b/llvm/include/llvm/CAS/UnifiedOnDiskCache.h
@@ -0,0 +1,172 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CAS_UNIFIEDONDISKCACHE_H
+#define LLVM_CAS_UNIFIEDONDISKCACHE_H
+
+#include "llvm/CAS/BuiltinUnifiedCASDatabases.h"
+#include "llvm/CAS/OnDiskGraphDB.h"
+#include <atomic>
+
+namespace llvm::cas::ondisk {
+
+class OnDiskKeyValueDB;
+
+/// A unified CAS nodes and key-value database, using on-disk storage for both.
+/// It manages storage growth and provides APIs for garbage collection.
+///
+/// High-level properties:
+/// * While \p UnifiedOnDiskCache is open on a directory, by any process, the
+///   storage size in that directory will keep growing unrestricted. For data to
+///   become eligible for garbage-collection there should be no open instances
+///   of \p UnifiedOnDiskCache for that directory, by any process.
+/// * Garbage-collection needs to be triggered explicitly by the client. It can
+///   be triggered on a directory concurrently, at any time and by any process,
+///   without affecting any active readers/writers, in the same process or other
+///   processes.
+///
+/// Usage patterns should be that an instance of \p UnifiedOnDiskCache is open
+/// for a limited period of time, e.g. for the duration of a build operation.
+/// For long-living processes that need periodic access to a
+/// \p UnifiedOnDiskCache, the client should devise a scheme where access is
+/// performed within some defined period. For example, if a service is designed
+/// to continuously wait for requests that access a \p UnifiedOnDiskCache, it
+/// could keep the instance alive while new requests are coming in but close it
+/// after a time period in which there are no new requests.
+class UnifiedOnDiskCache {
+public:
+  /// The \p OnDiskGraphDB instance for the open directory.
+  OnDiskGraphDB &getGraphDB() { return *PrimaryGraphDB; }
+
+  /// The \p OnDiskGraphDB instance for the open directory.
+  OnDiskKeyValueDB &getKeyValueDB() { return *PrimaryKVDB; }
+
+  /// Open a \p UnifiedOnDiskCache instance for a directory.
+  ///
+  /// \param Path directory for the on-disk database. The directory will be
+  /// created if it doesn't exist.
+  /// \param SizeLimit Optional size for limiting growth. This has an effect for
+  /// when the instance is closed.
+  /// \param HashName Identifier name for the hashing algorithm that is going to
+  /// be used.
+  /// \param HashByteSize Size for the object digest hash bytes.
+  /// \param FaultInPolicy Controls how nodes are copied to primary store. This
+  /// is recorded at creation time and subsequent opens need to pass the same
+  /// policy otherwise the \p open will fail.
+  static Expected<std::unique_ptr<UnifiedOnDiskCache>>
+  open(StringRef Path, std::optional<uint64_t> SizeLimit, StringRef HashName,
+       unsigned HashByteSize,
+       OnDiskGraphDB::FaultInPolicy FaultInPolicy =
+           OnDiskGraphDB::FaultInPolicy::FullTree);
+
+  /// Validate the data in \p Path, if needed to ensure correctness.
+  ///
+  /// Note: if invalid data is detected and \p AllowRecovery is true, then
+  /// recovery requires exclusive access to the CAS and it is an error to
+  /// attempt recovery if there is concurrent use of the CAS.
+  ///
+  /// \param Path directory for the on-disk database.
+  /// \param HashName Identifier name for the hashing algorithm that is going to
+  /// be used.
+  /// \param HashByteSize Size for the object digest hash bytes.
+  /// \param CheckHash Whether to validate hashes match the data.
+  /// \param AllowRecovery Whether to automatically recover from invalid data by
+  /// marking the files for garbage collection.
+  /// \param ForceValidation Whether to force validation to occur even if it
+  /// should not be necessary.
+  /// \param LLVMCasBinary If provided, validation is performed out-of-process
+  /// using the given \c llvm-cas executable which protects against crashes
+  /// during validation. Otherwise validation is performed in-process.
+  ///
+  /// \returns \c Valid if the data is already valid, \c Recovered if data
+  /// was invalid but has been cleared, \c Skipped if validation is not needed,
+  /// or an \c Error if validation cannot be performed or if the data is left
+  /// in an invalid state because \p AllowRecovery is false.
+  static Expected<ValidationResult>
+  validateIfNeeded(StringRef Path, StringRef HashName, unsigned HashByteSize,
+                   bool CheckHash, bool AllowRecovery, bool ForceValidation,
+                   std::optional<StringRef> LLVMCasBinary);
+
+  /// This is called implicitly at destruction time, so it is not required for a
+  /// client to call this. After calling \p close the only method that is valid
+  /// to call is \p needsGarbageCollection.
+  ///
+  /// \param CheckSizeLimit if true it will check whether the primary store has
+  /// exceeded its intended size limit. If false the check is skipped even if a
+  /// \p SizeLimit was passed to the \p open call.
+  Error close(bool CheckSizeLimit = true);
+
+  /// Set the size for limiting growth. This has an effect for when the instance
+  /// is closed.
+  void setSizeLimit(std::optional<uint64_t> SizeLimit);
+
+  /// \returns the storage size of the cache data.
+  uint64_t getStorageSize() const;
+
+  /// \returns whether the primary store has exceeded the intended size limit.
+  /// This can return false even if the overall size of the opened directory is
+  /// over the \p SizeLimit passed to \p open. To know whether garbage
+  /// collection needs to be triggered or not, call \p needsGarbaseCollection.
+  bool hasExceededSizeLimit() const;
+
+  /// \returns whether there are unused data that can be deleted using a
+  /// \p collectGarbage call.
+  bool needsGarbageCollection() const { return NeedsGarbageCollection; }
+
+  /// Remove any unused data from the directory at \p Path. If there are no such
+  /// data the operation is a no-op.
+  ///
+  /// This can be called concurrently, regardless of whether there is an open
+  /// \p UnifiedOnDiskCache instance or not; it has no effect on readers/writers
+  /// in the same process or other processes.
+  ///
+  /// It is recommended that garbage-collection is triggered concurrently in the
+  /// background, so that it has minimal effect on the workload of the process.
+  static Error collectGarbage(StringRef Path);
+
+  /// Remove unused data from the current UnifiedOnDiskCache.
+  Error collectGarbage();
+
+  /// Helper function to convert the value stored in KeyValueDB and ObjectID.
+  static ObjectID getObjectIDFromValue(ArrayRef<char> Value);
+
+  using ValueBytes = std::array<char, sizeof(uint64_t)>;
+  static ValueBytes getValueFromObjectID(ObjectID ID);
+
+  ~UnifiedOnDiskCache();
+
+private:
+  friend class OnDiskGraphDB;
+  friend class OnDiskKeyValueDB;
+
+  UnifiedOnDiskCache();
+
+  Expected<std::optional<ArrayRef<char>>>
+  faultInFromUpstreamKV(ArrayRef<uint8_t> Key);
+
+  /// \returns the storage size of the primary directory.
+  uint64_t getPrimaryStorageSize() const;
+
+  std::string RootPath;
+  std::atomic<uint64_t> SizeLimit;
+
+  int LockFD = -1;
+
+  std::atomic<bool> NeedsGarbageCollection;
+  std::string PrimaryDBDir;
+
+  std::unique_ptr<OnDiskGraphDB> UpstreamGraphDB;
+  std::unique_ptr<OnDiskGraphDB> PrimaryGraphDB;
+
+  std::unique_ptr<OnDiskKeyValueDB> UpstreamKVDB;
+  std::unique_ptr<OnDiskKeyValueDB> PrimaryKVDB;
+};
+
+} // namespace llvm::cas::ondisk
+
+#endif // LLVM_CAS_UNIFIEDONDISKCACHE_H

diff  --git a/llvm/lib/CAS/ActionCaches.cpp b/llvm/lib/CAS/ActionCaches.cpp
index 571c5b3ca5b4b..003c850275ff4 100644
--- a/llvm/lib/CAS/ActionCaches.cpp
+++ b/llvm/lib/CAS/ActionCaches.cpp
@@ -13,7 +13,11 @@
 #include "BuiltinCAS.h"
 #include "llvm/ADT/TrieRawHashMap.h"
 #include "llvm/CAS/ActionCache.h"
+#include "llvm/CAS/OnDiskKeyValueDB.h"
+#include "llvm/CAS/UnifiedOnDiskCache.h"
+#include "llvm/Config/llvm-config.h"
 #include "llvm/Support/BLAKE3.h"
+#include "llvm/Support/Errc.h"
 
 #define DEBUG_TYPE "cas-action-caches"
 
@@ -47,12 +51,54 @@ class InMemoryActionCache final : public ActionCache {
   Expected<std::optional<CASID>> getImpl(ArrayRef<uint8_t> ActionKey,
                                          bool CanBeDistributed) const final;
 
+  Error validate() const final {
+    return createStringError("InMemoryActionCache doesn't support validate()");
+  }
+
 private:
   using DataT = CacheEntry<sizeof(HashType)>;
   using InMemoryCacheT = ThreadSafeTrieRawHashMap<DataT, sizeof(HashType)>;
 
   InMemoryCacheT Cache;
 };
+
+/// Builtin basic OnDiskActionCache that uses one underlying OnDiskKeyValueDB.
+class OnDiskActionCache final : public ActionCache {
+public:
+  Error putImpl(ArrayRef<uint8_t> ActionKey, const CASID &Result,
+                bool CanBeDistributed) final;
+  Expected<std::optional<CASID>> getImpl(ArrayRef<uint8_t> ActionKey,
+                                         bool CanBeDistributed) const final;
+
+  static Expected<std::unique_ptr<OnDiskActionCache>> create(StringRef Path);
+
+  Error validate() const final;
+
+private:
+  static StringRef getHashName() { return "BLAKE3"; }
+
+  OnDiskActionCache(std::unique_ptr<ondisk::OnDiskKeyValueDB> DB);
+
+  std::unique_ptr<ondisk::OnDiskKeyValueDB> DB;
+  using DataT = CacheEntry<sizeof(HashType)>;
+};
+
+/// Builtin unified ActionCache that wraps around UnifiedOnDiskCache to provide
+/// access to its ActionCache.
+class UnifiedOnDiskActionCache final : public ActionCache {
+public:
+  Error putImpl(ArrayRef<uint8_t> ActionKey, const CASID &Result,
+                bool CanBeDistributed) final;
+  Expected<std::optional<CASID>> getImpl(ArrayRef<uint8_t> ActionKey,
+                                         bool CanBeDistributed) const final;
+
+  UnifiedOnDiskActionCache(std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB);
+
+  Error validate() const final;
+
+private:
+  std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB;
+};
 } // end namespace
 
 static Error createResultCachePoisonedError(ArrayRef<uint8_t> KeyHash,
@@ -99,3 +145,123 @@ std::unique_ptr<ActionCache> createInMemoryActionCache() {
 }
 
 } // namespace llvm::cas
+
+OnDiskActionCache::OnDiskActionCache(
+    std::unique_ptr<ondisk::OnDiskKeyValueDB> DB)
+    : ActionCache(builtin::BuiltinCASContext::getDefaultContext()),
+      DB(std::move(DB)) {}
+
+Expected<std::unique_ptr<OnDiskActionCache>>
+OnDiskActionCache::create(StringRef AbsPath) {
+  std::unique_ptr<ondisk::OnDiskKeyValueDB> DB;
+  if (Error E = ondisk::OnDiskKeyValueDB::open(AbsPath, getHashName(),
+                                               sizeof(HashType), getHashName(),
+                                               sizeof(DataT))
+                    .moveInto(DB))
+    return std::move(E);
+  return std::unique_ptr<OnDiskActionCache>(
+      new OnDiskActionCache(std::move(DB)));
+}
+
+Expected<std::optional<CASID>>
+OnDiskActionCache::getImpl(ArrayRef<uint8_t> Key,
+                           bool /*CanBeDistributed*/) const {
+  std::optional<ArrayRef<char>> Val;
+  if (Error E = DB->get(Key).moveInto(Val))
+    return std::move(E);
+  if (!Val)
+    return std::nullopt;
+  return CASID::create(&getContext(), toStringRef(*Val));
+}
+
+Error OnDiskActionCache::putImpl(ArrayRef<uint8_t> Key, const CASID &Result,
+                                 bool /*CanBeDistributed*/) {
+  auto ResultHash = Result.getHash();
+  ArrayRef Expected((const char *)ResultHash.data(), ResultHash.size());
+  ArrayRef<char> Observed;
+  if (Error E = DB->put(Key, Expected).moveInto(Observed))
+    return E;
+
+  if (Expected == Observed)
+    return Error::success();
+
+  return createResultCachePoisonedError(
+      Key, getContext(), Result,
+      ArrayRef((const uint8_t *)Observed.data(), Observed.size()));
+}
+
+Error OnDiskActionCache::validate() const {
+  // FIXME: without the matching CAS there is nothing we can check about the
+  // cached values. The hash size is already validated by the DB validator.
+  return DB->validate(nullptr);
+}
+
+UnifiedOnDiskActionCache::UnifiedOnDiskActionCache(
+    std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB)
+    : ActionCache(builtin::BuiltinCASContext::getDefaultContext()),
+      UniDB(std::move(UniDB)) {}
+
+Expected<std::optional<CASID>>
+UnifiedOnDiskActionCache::getImpl(ArrayRef<uint8_t> Key,
+                                  bool /*CanBeDistributed*/) const {
+  std::optional<ArrayRef<char>> Val;
+  if (Error E = UniDB->getKeyValueDB().get(Key).moveInto(Val))
+    return std::move(E);
+  if (!Val)
+    return std::nullopt;
+  auto ID = ondisk::UnifiedOnDiskCache::getObjectIDFromValue(*Val);
+  return CASID::create(&getContext(),
+                       toStringRef(UniDB->getGraphDB().getDigest(ID)));
+}
+
+Error UnifiedOnDiskActionCache::putImpl(ArrayRef<uint8_t> Key,
+                                        const CASID &Result,
+                                        bool /*CanBeDistributed*/) {
+  auto Expected = UniDB->getGraphDB().getReference(Result.getHash());
+  if (LLVM_UNLIKELY(!Expected))
+    return Expected.takeError();
+
+  auto Value = ondisk::UnifiedOnDiskCache::getValueFromObjectID(*Expected);
+  std::optional<ArrayRef<char>> Observed;
+  if (Error E = UniDB->getKeyValueDB().put(Key, Value).moveInto(Observed))
+    return E;
+
+  auto ObservedID = ondisk::UnifiedOnDiskCache::getObjectIDFromValue(*Observed);
+  if (*Expected == ObservedID)
+    return Error::success();
+
+  return createResultCachePoisonedError(
+      Key, getContext(), Result, UniDB->getGraphDB().getDigest(ObservedID));
+}
+
+Error UnifiedOnDiskActionCache::validate() const {
+  auto ValidateRef = [](FileOffset Offset, ArrayRef<char> Value) -> Error {
+    auto ID = ondisk::UnifiedOnDiskCache::getObjectIDFromValue(Value);
+    auto formatError = [&](Twine Msg) {
+      return createStringError(
+          llvm::errc::illegal_byte_sequence,
+          "bad record at 0x" +
+              utohexstr((unsigned)Offset.get(), /*LowerCase=*/true) + ": " +
+              Msg.str());
+    };
+    if (ID.getOpaqueData() == 0)
+      return formatError("zero is not a valid ref");
+    return Error::success();
+  };
+  return UniDB->getKeyValueDB().validate(ValidateRef);
+}
+
+Expected<std::unique_ptr<ActionCache>>
+cas::createOnDiskActionCache(StringRef Path) {
+#if LLVM_ENABLE_ONDISK_CAS
+  return OnDiskActionCache::create(Path);
+#else
+  return createStringError(inconvertibleErrorCode(), "OnDiskCache is disabled");
+#endif
+}
+
+std::unique_ptr<ActionCache>
+cas::builtin::createActionCacheFromUnifiedOnDiskCache(
+    std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB) {
+  return std::make_unique<UnifiedOnDiskActionCache>(std::move(UniDB));
+}

diff  --git a/llvm/lib/CAS/BuiltinCAS.cpp b/llvm/lib/CAS/BuiltinCAS.cpp
index 73646ad2c3528..e9bc6d8beed4e 100644
--- a/llvm/lib/CAS/BuiltinCAS.cpp
+++ b/llvm/lib/CAS/BuiltinCAS.cpp
@@ -9,6 +9,7 @@
 #include "BuiltinCAS.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/CAS/BuiltinObjectHasher.h"
+#include "llvm/CAS/UnifiedOnDiskCache.h"
 #include "llvm/Support/Process.h"
 
 using namespace llvm;
@@ -68,7 +69,7 @@ Expected<ObjectRef> BuiltinCAS::store(ArrayRef<ObjectRef> Refs,
                    Refs, Data);
 }
 
-Error BuiltinCAS::validate(const CASID &ID) {
+Error BuiltinCAS::validateObject(const CASID &ID) {
   auto Ref = getReference(ID);
   if (!Ref)
     return createUnknownObjectError(ID);
@@ -92,3 +93,14 @@ Error BuiltinCAS::validate(const CASID &ID) {
 
   return Error::success();
 }
+
+Expected<std::unique_ptr<ondisk::UnifiedOnDiskCache>>
+cas::builtin::createBuiltinUnifiedOnDiskCache(StringRef Path) {
+#if LLVM_ENABLE_ONDISK_CAS
+  return ondisk::UnifiedOnDiskCache::open(Path, /*SizeLimit=*/std::nullopt,
+                                          BuiltinCASContext::getHashName(),
+                                          sizeof(HashType));
+#else
+  return createStringError(inconvertibleErrorCode(), "OnDiskCache is disabled");
+#endif
+}

diff  --git a/llvm/lib/CAS/BuiltinCAS.h b/llvm/lib/CAS/BuiltinCAS.h
index 3b5374d5e1850..4d2de66cf636f 100644
--- a/llvm/lib/CAS/BuiltinCAS.h
+++ b/llvm/lib/CAS/BuiltinCAS.h
@@ -1,4 +1,4 @@
-//===- BuiltinCAS.h ---------------------------------------------*- C++ -*-===//
+//===----------------------------------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -15,6 +15,9 @@
 
 namespace llvm::cas {
 class ActionCache;
+namespace ondisk {
+class UnifiedOnDiskCache;
+} // namespace ondisk
 namespace builtin {
 
 /// Common base class for builtin CAS implementations using the same CASContext.
@@ -65,9 +68,27 @@ class BuiltinCAS : public ObjectStore {
                              "corrupt storage");
   }
 
-  Error validate(const CASID &ID) final;
+  Error validateObject(const CASID &ID) final;
 };
 
+/// Create a \p UnifiedOnDiskCache instance that uses \p BLAKE3 hashing.
+Expected<std::unique_ptr<ondisk::UnifiedOnDiskCache>>
+createBuiltinUnifiedOnDiskCache(StringRef Path);
+
+/// \param UniDB A \p UnifiedOnDiskCache instance from \p
+/// createBuiltinUnifiedOnDiskCache.
+std::unique_ptr<ObjectStore> createObjectStoreFromUnifiedOnDiskCache(
+    std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB);
+
+/// \param UniDB A \p UnifiedOnDiskCache instance from \p
+/// createBuiltinUnifiedOnDiskCache.
+std::unique_ptr<ActionCache> createActionCacheFromUnifiedOnDiskCache(
+    std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB);
+
+// FIXME: Proxy not portable. Maybe also error-prone?
+constexpr StringLiteral DefaultDirProxy = "/^llvm::cas::builtin::default";
+constexpr StringLiteral DefaultDir = "llvm.cas.builtin.default";
+
 } // end namespace builtin
 } // end namespace llvm::cas
 

diff  --git a/llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp b/llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp
new file mode 100644
index 0000000000000..f3f6fa043bc52
--- /dev/null
+++ b/llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp
@@ -0,0 +1,38 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CAS/BuiltinUnifiedCASDatabases.h"
+#include "BuiltinCAS.h"
+#include "llvm/CAS/ActionCache.h"
+#include "llvm/CAS/UnifiedOnDiskCache.h"
+
+using namespace llvm;
+using namespace llvm::cas;
+
+Expected<std::pair<std::unique_ptr<ObjectStore>, std::unique_ptr<ActionCache>>>
+cas::createOnDiskUnifiedCASDatabases(StringRef Path) {
+  std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB;
+  if (Error E = builtin::createBuiltinUnifiedOnDiskCache(Path).moveInto(UniDB))
+    return std::move(E);
+  auto CAS = builtin::createObjectStoreFromUnifiedOnDiskCache(UniDB);
+  auto AC = builtin::createActionCacheFromUnifiedOnDiskCache(std::move(UniDB));
+  return std::make_pair(std::move(CAS), std::move(AC));
+}
+
+Expected<ValidationResult> cas::validateOnDiskUnifiedCASDatabasesIfNeeded(
+    StringRef Path, bool CheckHash, bool AllowRecovery, bool ForceValidation,
+    std::optional<StringRef> LLVMCasBinary) {
+#if LLVM_ENABLE_ONDISK_CAS
+  return ondisk::UnifiedOnDiskCache::validateIfNeeded(
+      Path, builtin::BuiltinCASContext::getHashName(),
+      sizeof(builtin::HashType), CheckHash, AllowRecovery, ForceValidation,
+      LLVMCasBinary);
+#else
+  return createStringError(inconvertibleErrorCode(), "OnDiskCache is disabled");
+#endif
+}

diff  --git a/llvm/lib/CAS/CMakeLists.txt b/llvm/lib/CAS/CMakeLists.txt
index a2f8c49e50145..aad77dce370d8 100644
--- a/llvm/lib/CAS/CMakeLists.txt
+++ b/llvm/lib/CAS/CMakeLists.txt
@@ -2,15 +2,18 @@ add_llvm_component_library(LLVMCAS
   ActionCache.cpp
   ActionCaches.cpp
   BuiltinCAS.cpp
+  BuiltinUnifiedCASDatabases.cpp
   DatabaseFile.cpp
   InMemoryCAS.cpp
   MappedFileRegionArena.cpp
   ObjectStore.cpp
+  OnDiskCAS.cpp
   OnDiskCommon.cpp
   OnDiskDataAllocator.cpp
   OnDiskGraphDB.cpp
   OnDiskKeyValueDB.cpp
   OnDiskTrieRawHashMap.cpp
+  UnifiedOnDiskCache.cpp
 
   ADDITIONAL_HEADER_DIRS
   ${LLVM_MAIN_INCLUDE_DIR}/llvm/CAS

diff  --git a/llvm/lib/CAS/InMemoryCAS.cpp b/llvm/lib/CAS/InMemoryCAS.cpp
index c63ee70de0849..2d4eedd5bdc8f 100644
--- a/llvm/lib/CAS/InMemoryCAS.cpp
+++ b/llvm/lib/CAS/InMemoryCAS.cpp
@@ -233,6 +233,12 @@ class InMemoryCAS : public BuiltinCAS {
     return cast<InMemoryObject>(asInMemoryObject(Node)).getData();
   }
 
+  void print(raw_ostream &OS) const final;
+
+  Error validate(bool CheckHash) const final {
+    return createStringError("InMemoryCAS doesn't support validate()");
+  }
+
   InMemoryCAS() = default;
 
 private:
@@ -271,6 +277,8 @@ ArrayRef<const InMemoryObject *> InMemoryObject::getRefs() const {
   return cast<InMemoryInlineObject>(this)->getRefsImpl();
 }
 
+void InMemoryCAS::print(raw_ostream &OS) const {}
+
 Expected<ObjectRef>
 InMemoryCAS::storeFromNullTerminatedRegion(ArrayRef<uint8_t> ComputedHash,
                                            sys::fs::mapped_file_region Map) {

diff  --git a/llvm/lib/CAS/ObjectStore.cpp b/llvm/lib/CAS/ObjectStore.cpp
index e0be50bbe013a..3110577e03774 100644
--- a/llvm/lib/CAS/ObjectStore.cpp
+++ b/llvm/lib/CAS/ObjectStore.cpp
@@ -1,4 +1,4 @@
-//===- ObjectStore.cpp ------------------------------------------*- C++ -*-===//
+//===----------------------------------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -12,7 +12,7 @@
 #include "llvm/Support/Errc.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include <optional>
+#include <deque>
 
 using namespace llvm;
 using namespace llvm::cas;
@@ -21,6 +21,7 @@ void CASContext::anchor() {}
 void ObjectStore::anchor() {}
 
 LLVM_DUMP_METHOD void CASID::dump() const { print(dbgs()); }
+LLVM_DUMP_METHOD void ObjectStore::dump() const { print(dbgs()); }
 LLVM_DUMP_METHOD void ObjectRef::dump() const { print(dbgs()); }
 LLVM_DUMP_METHOD void ObjectHandle::dump() const { print(dbgs()); }
 
@@ -141,7 +142,7 @@ Error ObjectStore::validateTree(ObjectRef Root) {
     auto [I, Inserted] = ValidatedRefs.insert(Ref);
     if (!Inserted)
       continue; // already validated.
-    if (Error E = validate(getID(Ref)))
+    if (Error E = validateObject(getID(Ref)))
       return E;
     Expected<ObjectHandle> Obj = load(Ref);
     if (!Obj)
@@ -155,6 +156,92 @@ Error ObjectStore::validateTree(ObjectRef Root) {
   return Error::success();
 }
 
+Expected<ObjectRef> ObjectStore::importObject(ObjectStore &Upstream,
+                                              ObjectRef Other) {
+  // Copy the full CAS tree from upstream with depth-first ordering to ensure
+  // all the child nodes are available in downstream CAS before inserting
+  // current object. This uses a similar algorithm as
+  // `OnDiskGraphDB::importFullTree` but doesn't assume the upstream CAS schema
+  // so it can be used to import from any other ObjectStore reguardless of the
+  // CAS schema.
+
+  // There is no work to do if importing from self.
+  if (this == &Upstream)
+    return Other;
+
+  /// Keeps track of the state of visitation for current node and all of its
+  /// parents. Upstream Cursor holds information only from upstream CAS.
+  struct UpstreamCursor {
+    ObjectRef Ref;
+    ObjectHandle Node;
+    size_t RefsCount;
+    std::deque<ObjectRef> Refs;
+  };
+  SmallVector<UpstreamCursor, 16> CursorStack;
+  /// PrimaryNodeStack holds the ObjectRef of the current CAS, with nodes either
+  /// just stored in the CAS or nodes already exists in the current CAS.
+  SmallVector<ObjectRef, 128> PrimaryRefStack;
+  /// A map from upstream ObjectRef to current ObjectRef.
+  llvm::DenseMap<ObjectRef, ObjectRef> CreatedObjects;
+
+  auto enqueueNode = [&](ObjectRef Ref, ObjectHandle Node) {
+    unsigned NumRefs = Upstream.getNumRefs(Node);
+    std::deque<ObjectRef> Refs;
+    for (unsigned I = 0; I < NumRefs; ++I)
+      Refs.push_back(Upstream.readRef(Node, I));
+
+    CursorStack.push_back({Ref, Node, NumRefs, std::move(Refs)});
+  };
+
+  auto UpstreamHandle = Upstream.load(Other);
+  if (!UpstreamHandle)
+    return UpstreamHandle.takeError();
+  enqueueNode(Other, *UpstreamHandle);
+
+  while (!CursorStack.empty()) {
+    UpstreamCursor &Cur = CursorStack.back();
+    if (Cur.Refs.empty()) {
+      // Copy the node data into the primary store.
+      // The bottom of \p PrimaryRefStack contains the ObjectRef for the
+      // current node.
+      assert(PrimaryRefStack.size() >= Cur.RefsCount);
+      auto Refs = ArrayRef(PrimaryRefStack)
+                      .slice(PrimaryRefStack.size() - Cur.RefsCount);
+      auto NewNode = store(Refs, Upstream.getData(Cur.Node));
+      if (!NewNode)
+        return NewNode.takeError();
+
+      // Remove the current node and its IDs from the stack.
+      PrimaryRefStack.truncate(PrimaryRefStack.size() - Cur.RefsCount);
+      CursorStack.pop_back();
+
+      PrimaryRefStack.push_back(*NewNode);
+      CreatedObjects.try_emplace(Cur.Ref, *NewNode);
+      continue;
+    }
+
+    // Check if the node exists already.
+    auto CurrentID = Cur.Refs.front();
+    Cur.Refs.pop_front();
+    auto Ref = CreatedObjects.find(CurrentID);
+    if (Ref != CreatedObjects.end()) {
+      // If exists already, just need to enqueue the primary node.
+      PrimaryRefStack.push_back(Ref->second);
+      continue;
+    }
+
+    // Load child.
+    auto PrimaryID = Upstream.load(CurrentID);
+    if (LLVM_UNLIKELY(!PrimaryID))
+      return PrimaryID.takeError();
+
+    enqueueNode(CurrentID, *PrimaryID);
+  }
+
+  assert(PrimaryRefStack.size() == 1);
+  return PrimaryRefStack.front();
+}
+
 std::unique_ptr<MemoryBuffer>
 ObjectProxy::getMemoryBuffer(StringRef Name,
                              bool RequiresNullTerminator) const {

diff  --git a/llvm/lib/CAS/OnDiskCAS.cpp b/llvm/lib/CAS/OnDiskCAS.cpp
new file mode 100644
index 0000000000000..7d29f4499211e
--- /dev/null
+++ b/llvm/lib/CAS/OnDiskCAS.cpp
@@ -0,0 +1,211 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "BuiltinCAS.h"
+#include "llvm/CAS/BuiltinCASContext.h"
+#include "llvm/CAS/BuiltinObjectHasher.h"
+#include "llvm/CAS/OnDiskGraphDB.h"
+#include "llvm/CAS/UnifiedOnDiskCache.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Error.h"
+
+using namespace llvm;
+using namespace llvm::cas;
+using namespace llvm::cas::builtin;
+
+namespace {
+
+class OnDiskCAS : public BuiltinCAS {
+public:
+  Expected<ObjectRef> storeImpl(ArrayRef<uint8_t> ComputedHash,
+                                ArrayRef<ObjectRef> Refs,
+                                ArrayRef<char> Data) final;
+
+  Expected<std::optional<ObjectHandle>> loadIfExists(ObjectRef Ref) final;
+
+  CASID getID(ObjectRef Ref) const final;
+
+  std::optional<ObjectRef> getReference(const CASID &ID) const final;
+
+  Expected<bool> isMaterialized(ObjectRef Ref) const final;
+
+  ArrayRef<char> getDataConst(ObjectHandle Node) const final;
+
+  void print(raw_ostream &OS) const final;
+  Error validate(bool CheckHash) const final;
+
+  static Expected<std::unique_ptr<OnDiskCAS>> open(StringRef Path);
+
+  OnDiskCAS(std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB)
+      : UnifiedDB(std::move(UniDB)), DB(&UnifiedDB->getGraphDB()) {}
+
+private:
+  ObjectHandle convertHandle(ondisk::ObjectHandle Node) const {
+    return makeObjectHandle(Node.getOpaqueData());
+  }
+
+  ondisk::ObjectHandle convertHandle(ObjectHandle Node) const {
+    return ondisk::ObjectHandle(Node.getInternalRef(*this));
+  }
+
+  ObjectRef convertRef(ondisk::ObjectID Ref) const {
+    return makeObjectRef(Ref.getOpaqueData());
+  }
+
+  ondisk::ObjectID convertRef(ObjectRef Ref) const {
+    return ondisk::ObjectID::fromOpaqueData(Ref.getInternalRef(*this));
+  }
+
+  size_t getNumRefs(ObjectHandle Node) const final {
+    auto RefsRange = DB->getObjectRefs(convertHandle(Node));
+    return std::distance(RefsRange.begin(), RefsRange.end());
+  }
+
+  ObjectRef readRef(ObjectHandle Node, size_t I) const final {
+    auto RefsRange = DB->getObjectRefs(convertHandle(Node));
+    return convertRef(RefsRange.begin()[I]);
+  }
+
+  Error forEachRef(ObjectHandle Node,
+                   function_ref<Error(ObjectRef)> Callback) const final;
+
+  Error setSizeLimit(std::optional<uint64_t> SizeLimit) final;
+  Expected<std::optional<uint64_t>> getStorageSize() const final;
+  Error pruneStorageData() final;
+
+  OnDiskCAS(std::unique_ptr<ondisk::OnDiskGraphDB> GraphDB)
+      : OwnedDB(std::move(GraphDB)), DB(OwnedDB.get()) {}
+
+  std::unique_ptr<ondisk::OnDiskGraphDB> OwnedDB;
+  std::shared_ptr<ondisk::UnifiedOnDiskCache> UnifiedDB;
+  ondisk::OnDiskGraphDB *DB;
+};
+
+} // end anonymous namespace
+
+void OnDiskCAS::print(raw_ostream &OS) const { DB->print(OS); }
+Error OnDiskCAS::validate(bool CheckHash) const {
+  auto Hasher = [](ArrayRef<ArrayRef<uint8_t>> Refs, ArrayRef<char> Data,
+                   SmallVectorImpl<uint8_t> &Result) {
+    auto Hash = BuiltinObjectHasher<llvm::cas::builtin::HasherT>::hashObject(
+        Refs, Data);
+    Result.assign(Hash.begin(), Hash.end());
+  };
+
+  if (auto E = DB->validate(CheckHash, Hasher))
+    return E;
+
+  return Error::success();
+}
+
+CASID OnDiskCAS::getID(ObjectRef Ref) const {
+  ArrayRef<uint8_t> Hash = DB->getDigest(convertRef(Ref));
+  return CASID::create(&getContext(), toStringRef(Hash));
+}
+
+std::optional<ObjectRef> OnDiskCAS::getReference(const CASID &ID) const {
+  std::optional<ondisk::ObjectID> ObjID =
+      DB->getExistingReference(ID.getHash());
+  if (!ObjID)
+    return std::nullopt;
+  return convertRef(*ObjID);
+}
+
+Expected<bool> OnDiskCAS::isMaterialized(ObjectRef ExternalRef) const {
+  return DB->isMaterialized(convertRef(ExternalRef));
+}
+
+ArrayRef<char> OnDiskCAS::getDataConst(ObjectHandle Node) const {
+  return DB->getObjectData(convertHandle(Node));
+}
+
+Expected<std::optional<ObjectHandle>>
+OnDiskCAS::loadIfExists(ObjectRef ExternalRef) {
+  Expected<std::optional<ondisk::ObjectHandle>> ObjHnd =
+      DB->load(convertRef(ExternalRef));
+  if (!ObjHnd)
+    return ObjHnd.takeError();
+  if (!*ObjHnd)
+    return std::nullopt;
+  return convertHandle(**ObjHnd);
+}
+
+Expected<ObjectRef> OnDiskCAS::storeImpl(ArrayRef<uint8_t> ComputedHash,
+                                         ArrayRef<ObjectRef> Refs,
+                                         ArrayRef<char> Data) {
+  SmallVector<ondisk::ObjectID, 64> IDs;
+  IDs.reserve(Refs.size());
+  for (ObjectRef Ref : Refs) {
+    IDs.push_back(convertRef(Ref));
+  }
+
+  auto StoredID = DB->getReference(ComputedHash);
+  if (LLVM_UNLIKELY(!StoredID))
+    return StoredID.takeError();
+  if (Error E = DB->store(*StoredID, IDs, Data))
+    return std::move(E);
+  return convertRef(*StoredID);
+}
+
+Error OnDiskCAS::forEachRef(ObjectHandle Node,
+                            function_ref<Error(ObjectRef)> Callback) const {
+  auto RefsRange = DB->getObjectRefs(convertHandle(Node));
+  for (ondisk::ObjectID Ref : RefsRange) {
+    if (Error E = Callback(convertRef(Ref)))
+      return E;
+  }
+  return Error::success();
+}
+
+Error OnDiskCAS::setSizeLimit(std::optional<uint64_t> SizeLimit) {
+  UnifiedDB->setSizeLimit(SizeLimit);
+  return Error::success();
+}
+
+Expected<std::optional<uint64_t>> OnDiskCAS::getStorageSize() const {
+  return UnifiedDB->getStorageSize();
+}
+
+Error OnDiskCAS::pruneStorageData() { return UnifiedDB->collectGarbage(); }
+
+Expected<std::unique_ptr<OnDiskCAS>> OnDiskCAS::open(StringRef AbsPath) {
+  Expected<std::unique_ptr<ondisk::OnDiskGraphDB>> DB =
+      ondisk::OnDiskGraphDB::open(AbsPath, BuiltinCASContext::getHashName(),
+                                  sizeof(HashType));
+  if (!DB)
+    return DB.takeError();
+  return std::unique_ptr<OnDiskCAS>(new OnDiskCAS(std::move(*DB)));
+}
+
+bool cas::isOnDiskCASEnabled() {
+#if LLVM_ENABLE_ONDISK_CAS
+  return true;
+#else
+  return false;
+#endif
+}
+
+Expected<std::unique_ptr<ObjectStore>> cas::createOnDiskCAS(const Twine &Path) {
+#if LLVM_ENABLE_ONDISK_CAS
+  // FIXME: An absolute path isn't really good enough. Should open a directory
+  // and use openat() for files underneath.
+  SmallString<256> AbsPath;
+  Path.toVector(AbsPath);
+  sys::fs::make_absolute(AbsPath);
+
+  return OnDiskCAS::open(AbsPath);
+#else
+  return createStringError(inconvertibleErrorCode(), "OnDiskCAS is disabled");
+#endif /* LLVM_ENABLE_ONDISK_CAS */
+}
+
+std::unique_ptr<ObjectStore>
+cas::builtin::createObjectStoreFromUnifiedOnDiskCache(
+    std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB) {
+  return std::make_unique<OnDiskCAS>(std::move(UniDB));
+}

diff  --git a/llvm/lib/CAS/OnDiskGraphDB.cpp b/llvm/lib/CAS/OnDiskGraphDB.cpp
index 64cbe9dc8e159..245b6fb832549 100644
--- a/llvm/lib/CAS/OnDiskGraphDB.cpp
+++ b/llvm/lib/CAS/OnDiskGraphDB.cpp
@@ -893,6 +893,10 @@ int64_t DataRecordHandle::getDataRelOffset() const {
 }
 
 Error OnDiskGraphDB::validate(bool Deep, HashingFuncT Hasher) const {
+  if (UpstreamDB) {
+    if (auto E = UpstreamDB->validate(Deep, Hasher))
+      return E;
+  }
   return Index.validate([&](FileOffset Offset,
                             OnDiskTrieRawHashMap::ConstValueProxy Record)
                             -> Error {
@@ -1202,11 +1206,8 @@ OnDiskGraphDB::load(ObjectID ExternalRef) {
     return I.takeError();
   TrieRecord::Data Object = I->Ref.load();
 
-  if (Object.SK == TrieRecord::StorageKind::Unknown) {
-    if (!UpstreamDB)
-      return std::nullopt;
+  if (Object.SK == TrieRecord::StorageKind::Unknown)
     return faultInFromUpstream(ExternalRef);
-  }
 
   if (Object.SK == TrieRecord::StorageKind::DataPool)
     return ObjectHandle::fromFileOffset(Object.Offset);
@@ -1286,8 +1287,10 @@ OnDiskGraphDB::getObjectPresence(ObjectID ExternalRef,
   TrieRecord::Data Object = I->Ref.load();
   if (Object.SK != TrieRecord::StorageKind::Unknown)
     return ObjectPresence::InPrimaryDB;
+
   if (!CheckUpstream || !UpstreamDB)
     return ObjectPresence::Missing;
+
   std::optional<ObjectID> UpstreamID =
       UpstreamDB->getExistingReference(getDigest(*I));
   return UpstreamID.has_value() ? ObjectPresence::OnlyInUpstreamDB
@@ -1549,9 +1552,10 @@ unsigned OnDiskGraphDB::getHardStorageLimitUtilization() const {
   return std::max(IndexPercent, DataPercent);
 }
 
-Expected<std::unique_ptr<OnDiskGraphDB>> OnDiskGraphDB::open(
-    StringRef AbsPath, StringRef HashName, unsigned HashByteSize,
-    std::unique_ptr<OnDiskGraphDB> UpstreamDB, FaultInPolicy Policy) {
+Expected<std::unique_ptr<OnDiskGraphDB>>
+OnDiskGraphDB::open(StringRef AbsPath, StringRef HashName,
+                    unsigned HashByteSize, OnDiskGraphDB *UpstreamDB,
+                    FaultInPolicy Policy) {
   if (std::error_code EC = sys::fs::create_directories(AbsPath))
     return createFileError(AbsPath, EC);
 
@@ -1604,18 +1608,15 @@ Expected<std::unique_ptr<OnDiskGraphDB>> OnDiskGraphDB::open(
                              "unexpected user header in '" + DataPoolPath +
                                  "'");
 
-  return std::unique_ptr<OnDiskGraphDB>(
-      new OnDiskGraphDB(AbsPath, std::move(*Index), std::move(*DataPool),
-                        std::move(UpstreamDB), Policy));
+  return std::unique_ptr<OnDiskGraphDB>(new OnDiskGraphDB(
+      AbsPath, std::move(*Index), std::move(*DataPool), UpstreamDB, Policy));
 }
 
 OnDiskGraphDB::OnDiskGraphDB(StringRef RootPath, OnDiskTrieRawHashMap Index,
                              OnDiskDataAllocator DataPool,
-                             std::unique_ptr<OnDiskGraphDB> UpstreamDB,
-                             FaultInPolicy Policy)
+                             OnDiskGraphDB *UpstreamDB, FaultInPolicy Policy)
     : Index(std::move(Index)), DataPool(std::move(DataPool)),
-      RootPath(RootPath.str()), UpstreamDB(std::move(UpstreamDB)),
-      FIPolicy(Policy) {
+      RootPath(RootPath.str()), UpstreamDB(UpstreamDB), FIPolicy(Policy) {
   /// Lifetime for "big" objects not in DataPool.
   ///
   /// NOTE: Could use ThreadSafeTrieRawHashMap here. For now, doing something
@@ -1638,7 +1639,6 @@ Error OnDiskGraphDB::importFullTree(ObjectID PrimaryID,
   // against the process dying during importing and leaving the database with an
   // incomplete tree. Note that if the upstream has missing nodes then the tree
   // will be copied with missing nodes as well, it won't be considered an error.
-
   struct UpstreamCursor {
     ObjectHandle Node;
     size_t RefsCount;
@@ -1720,7 +1720,6 @@ Error OnDiskGraphDB::importSingleNode(ObjectID PrimaryID,
   // Copy the node data into the primary store.
   // FIXME: Use hard-link or cloning if the file-system supports it and data is
   // stored into a separate file.
-
   auto Data = UpstreamDB->getObjectData(UpstreamNode);
   auto UpstreamRefs = UpstreamDB->getObjectRefs(UpstreamNode);
   SmallVector<ObjectID, 64> Refs;
@@ -1737,7 +1736,8 @@ Error OnDiskGraphDB::importSingleNode(ObjectID PrimaryID,
 
 Expected<std::optional<ObjectHandle>>
 OnDiskGraphDB::faultInFromUpstream(ObjectID PrimaryID) {
-  assert(UpstreamDB);
+  if (!UpstreamDB)
+    return std::nullopt;
 
   auto UpstreamID = UpstreamDB->getReference(getDigest(PrimaryID));
   if (LLVM_UNLIKELY(!UpstreamID))

diff  --git a/llvm/lib/CAS/OnDiskKeyValueDB.cpp b/llvm/lib/CAS/OnDiskKeyValueDB.cpp
index 21860717da3bf..15656cb38a5e5 100644
--- a/llvm/lib/CAS/OnDiskKeyValueDB.cpp
+++ b/llvm/lib/CAS/OnDiskKeyValueDB.cpp
@@ -20,6 +20,7 @@
 #include "llvm/CAS/OnDiskKeyValueDB.h"
 #include "OnDiskCommon.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/CAS/UnifiedOnDiskCache.h"
 #include "llvm/Support/Alignment.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Errc.h"
@@ -53,15 +54,21 @@ Expected<std::optional<ArrayRef<char>>>
 OnDiskKeyValueDB::get(ArrayRef<uint8_t> Key) {
   // Check the result cache.
   OnDiskTrieRawHashMap::ConstOnDiskPtr ActionP = Cache.find(Key);
-  if (!ActionP)
+  if (ActionP) {
+    assert(isAddrAligned(Align(8), ActionP->Data.data()));
+    return ActionP->Data;
+  }
+  if (!UnifiedCache || !UnifiedCache->UpstreamKVDB)
     return std::nullopt;
-  assert(isAddrAligned(Align(8), ActionP->Data.data()));
-  return ActionP->Data;
+
+  // Try to fault in from upstream.
+  return UnifiedCache->faultInFromUpstreamKV(Key);
 }
 
 Expected<std::unique_ptr<OnDiskKeyValueDB>>
 OnDiskKeyValueDB::open(StringRef Path, StringRef HashName, unsigned KeySize,
-                       StringRef ValueName, size_t ValueSize) {
+                       StringRef ValueName, size_t ValueSize,
+                       UnifiedOnDiskCache *Cache) {
   if (std::error_code EC = sys::fs::create_directories(Path))
     return createFileError(Path, EC);
 
@@ -87,10 +94,14 @@ OnDiskKeyValueDB::open(StringRef Path, StringRef HashName, unsigned KeySize,
     return std::move(E);
 
   return std::unique_ptr<OnDiskKeyValueDB>(
-      new OnDiskKeyValueDB(ValueSize, std::move(*ActionCache)));
+      new OnDiskKeyValueDB(ValueSize, std::move(*ActionCache), Cache));
 }
 
 Error OnDiskKeyValueDB::validate(CheckValueT CheckValue) const {
+  if (UnifiedCache && UnifiedCache->UpstreamKVDB) {
+    if (auto E = UnifiedCache->UpstreamKVDB->validate(CheckValue))
+      return E;
+  }
   return Cache.validate(
       [&](FileOffset Offset,
           OnDiskTrieRawHashMap::ConstValueProxy Record) -> Error {

diff  --git a/llvm/lib/CAS/UnifiedOnDiskCache.cpp b/llvm/lib/CAS/UnifiedOnDiskCache.cpp
new file mode 100644
index 0000000000000..ae9d818241f4b
--- /dev/null
+++ b/llvm/lib/CAS/UnifiedOnDiskCache.cpp
@@ -0,0 +1,613 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Encapsulates \p OnDiskGraphDB and \p OnDiskKeyValueDB instances within one
+/// directory while also restricting storage growth with a scheme of chaining
+/// the two most recent directories (primary & upstream), where the primary
+/// "faults-in" data from the upstream one. When the primary (most recent)
+/// directory exceeds its intended limit a new empty directory becomes the
+/// primary one.
+///
+/// Within the top-level directory (the path that \p UnifiedOnDiskCache::open
+/// receives) there are directories named like this:
+///
+/// 'v<version>.<x>'
+/// 'v<version>.<x+1>'
+/// 'v<version>.<x+2>'
+/// ...
+///
+/// 'version' is the version integer for this \p UnifiedOnDiskCache's scheme and
+/// the part after the dot is an increasing integer. The primary directory is
+/// the one with the highest integer and the upstream one is the directory
+/// before it. For example, if the sub-directories contained are:
+///
+/// 'v1.5', 'v1.6', 'v1.7', 'v1.8'
+///
+/// Then the primary one is 'v1.8', the upstream one is 'v1.7', and the rest are
+/// unused directories that can be safely deleted at any time and by any
+/// process.
+///
+/// Contained within the top-level directory is a file named "lock" which is
+/// used for processes to take shared or exclusive locks for the contents of the
+/// top directory. While a \p UnifiedOnDiskCache is open it keeps a shared lock
+/// for the top-level directory; when it closes, if the primary sub-directory
+/// exceeded its limit, it attempts to get an exclusive lock in order to create
+/// a new empty primary directory; if it can't get the exclusive lock it gives
+/// up and lets the next \p UnifiedOnDiskCache instance that closes to attempt
+/// again.
+///
+/// The downside of this scheme is that while \p UnifiedOnDiskCache is open on a
+/// directory, by any process, the storage size in that directory will keep
+/// growing unrestricted. But the major benefit is that garbage-collection can
+/// be triggered on a directory concurrently, at any time and by any process,
+/// without affecting any active readers/writers in the same process or other
+/// processes.
+///
+/// The \c UnifiedOnDiskCache also provides validation and recovery on top of
+/// the underlying on-disk storage. The low-level storage is designed to remain
+/// coherent across regular process crashes, but may be invalid after power loss
+/// or similar system failures. \c UnifiedOnDiskCache::validateIfNeeded allows
+/// validating the contents once per boot and can recover by marking invalid
+/// data for garbage collection.
+///
+/// The data recovery described above requires exclusive access to the CAS, and
+/// it is an error to attempt recovery if the CAS is open in any process/thread.
+/// In order to maximize backwards compatibility with tools that do not perform
+/// validation before opening the CAS, we do not attempt to get exclusive access
+/// until recovery is actually performed, meaning as long as the data is valid
+/// it will not conflict with concurrent use.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CAS/UnifiedOnDiskCache.h"
+#include "BuiltinCAS.h"
+#include "OnDiskCommon.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CAS/ActionCache.h"
+#include "llvm/CAS/OnDiskGraphDB.h"
+#include "llvm/CAS/OnDiskKeyValueDB.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/FileUtilities.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/raw_ostream.h"
+#include <optional>
+
+#if __has_include(<sys/sysctl.h>)
+#include <sys/sysctl.h>
+#endif
+
+using namespace llvm;
+using namespace llvm::cas;
+using namespace llvm::cas::ondisk;
+
+/// FIXME: When the version of \p DBDirPrefix is bumped up we need to figure out
+/// how to handle the leftover sub-directories of the previous version, within
+/// the \p UnifiedOnDiskCache::collectGarbage function.
+static constexpr StringLiteral DBDirPrefix = "v1.";
+
+static constexpr StringLiteral ValidationFilename = "v1.validation";
+static constexpr StringLiteral CorruptPrefix = "corrupt.";
+
+ObjectID UnifiedOnDiskCache::getObjectIDFromValue(ArrayRef<char> Value) {
+  // little endian encoded.
+  assert(Value.size() == sizeof(uint64_t));
+  return ObjectID::fromOpaqueData(support::endian::read64le(Value.data()));
+}
+
+UnifiedOnDiskCache::ValueBytes
+UnifiedOnDiskCache::getValueFromObjectID(ObjectID ID) {
+  // little endian encoded.
+  UnifiedOnDiskCache::ValueBytes ValBytes;
+  static_assert(ValBytes.size() == sizeof(ID.getOpaqueData()));
+  support::endian::write64le(ValBytes.data(), ID.getOpaqueData());
+  return ValBytes;
+}
+
+Expected<std::optional<ArrayRef<char>>>
+UnifiedOnDiskCache::faultInFromUpstreamKV(ArrayRef<uint8_t> Key) {
+  assert(UpstreamGraphDB);
+  assert(UpstreamKVDB);
+
+  std::optional<ArrayRef<char>> UpstreamValue;
+  if (Error E = UpstreamKVDB->get(Key).moveInto(UpstreamValue))
+    return std::move(E);
+  if (!UpstreamValue)
+    return std::nullopt;
+
+  // The value is the \p ObjectID in the context of the upstream
+  // \p OnDiskGraphDB instance. Translate it to the context of the primary
+  // \p OnDiskGraphDB instance.
+  ObjectID UpstreamID = getObjectIDFromValue(*UpstreamValue);
+  auto PrimaryID =
+      PrimaryGraphDB->getReference(UpstreamGraphDB->getDigest(UpstreamID));
+  if (LLVM_UNLIKELY(!PrimaryID))
+    return PrimaryID.takeError();
+  return PrimaryKVDB->put(Key, getValueFromObjectID(*PrimaryID));
+}
+
+/// \returns all the 'v<version>.<x>' names of sub-directories, sorted with
+/// ascending order of the integer after the dot. Corrupt directories, if
+/// included, will come first.
+static Expected<SmallVector<std::string, 4>>
+getAllDBDirs(StringRef Path, bool IncludeCorrupt = false) {
+  struct DBDir {
+    uint64_t Order;
+    std::string Name;
+  };
+  SmallVector<DBDir> FoundDBDirs;
+
+  std::error_code EC;
+  for (sys::fs::directory_iterator DirI(Path, EC), DirE; !EC && DirI != DirE;
+       DirI.increment(EC)) {
+    if (DirI->type() != sys::fs::file_type::directory_file)
+      continue;
+    StringRef SubDir = sys::path::filename(DirI->path());
+    if (IncludeCorrupt && SubDir.starts_with(CorruptPrefix)) {
+      FoundDBDirs.push_back({0, std::string(SubDir)});
+      continue;
+    }
+    if (!SubDir.starts_with(DBDirPrefix))
+      continue;
+    uint64_t Order;
+    if (SubDir.substr(DBDirPrefix.size()).getAsInteger(10, Order))
+      return createStringError(inconvertibleErrorCode(),
+                               "unexpected directory " + DirI->path());
+    FoundDBDirs.push_back({Order, std::string(SubDir)});
+  }
+  if (EC)
+    return createFileError(Path, EC);
+
+  llvm::sort(FoundDBDirs, [](const DBDir &LHS, const DBDir &RHS) -> bool {
+    return LHS.Order <= RHS.Order;
+  });
+
+  SmallVector<std::string, 4> DBDirs;
+  for (DBDir &Dir : FoundDBDirs)
+    DBDirs.push_back(std::move(Dir.Name));
+  return DBDirs;
+}
+
+static Expected<SmallVector<std::string, 4>> getAllGarbageDirs(StringRef Path) {
+  auto DBDirs = getAllDBDirs(Path, /*IncludeCorrupt=*/true);
+  if (!DBDirs)
+    return DBDirs.takeError();
+
+  // FIXME: When the version of \p DBDirPrefix is bumped up we need to figure
+  // out how to handle the leftover sub-directories of the previous version.
+
+  for (unsigned Keep = 2; Keep > 0 && !DBDirs->empty(); --Keep) {
+    StringRef Back(DBDirs->back());
+    if (Back.starts_with(CorruptPrefix))
+      break;
+    DBDirs->pop_back();
+  }
+  return *DBDirs;
+}
+
+/// \returns Given a sub-directory named 'v<version>.<x>', it outputs the
+/// 'v<version>.<x+1>' name.
+static void getNextDBDirName(StringRef DBDir, llvm::raw_ostream &OS) {
+  assert(DBDir.starts_with(DBDirPrefix));
+  uint64_t Count;
+  bool Failed = DBDir.substr(DBDirPrefix.size()).getAsInteger(10, Count);
+  assert(!Failed);
+  (void)Failed;
+  OS << DBDirPrefix << Count + 1;
+}
+
+static Error validateOutOfProcess(StringRef LLVMCasBinary, StringRef RootPath,
+                                  bool CheckHash) {
+  SmallVector<StringRef> Args{LLVMCasBinary, "-cas", RootPath, "-validate"};
+  if (CheckHash)
+    Args.push_back("-check-hash");
+
+  llvm::SmallString<128> StdErrPath;
+  int StdErrFD = -1;
+  if (std::error_code EC = sys::fs::createTemporaryFile(
+          "llvm-cas-validate-stderr", "txt", StdErrFD, StdErrPath,
+          llvm::sys::fs::OF_Text))
+    return createStringError(EC, "failed to create temporary file");
+  FileRemover OutputRemover(StdErrPath.c_str());
+
+  std::optional<llvm::StringRef> Redirects[] = {
+      {""}, // stdin = /dev/null
+      {""}, // stdout = /dev/null
+      StdErrPath.str(),
+  };
+
+  std::string ErrMsg;
+  int Result =
+      sys::ExecuteAndWait(LLVMCasBinary, Args, /*Env=*/std::nullopt, Redirects,
+                          /*SecondsToWait=*/120, /*MemoryLimit=*/0, &ErrMsg);
+
+  if (Result == -1)
+    return createStringError("failed to exec " + join(Args, " ") + ": " +
+                             ErrMsg);
+  if (Result != 0) {
+    llvm::SmallString<64> Err("cas contents invalid");
+    if (!ErrMsg.empty()) {
+      Err += ": ";
+      Err += ErrMsg;
+    }
+    auto StdErrBuf = MemoryBuffer::getFile(StdErrPath.c_str());
+    if (StdErrBuf && !(*StdErrBuf)->getBuffer().empty()) {
+      Err += ": ";
+      Err += (*StdErrBuf)->getBuffer();
+    }
+    return createStringError(Err);
+  }
+  return Error::success();
+}
+
+static Error validateInProcess(StringRef RootPath, StringRef HashName,
+                               unsigned HashByteSize, bool CheckHash) {
+  std::shared_ptr<UnifiedOnDiskCache> UniDB;
+  if (Error E = UnifiedOnDiskCache::open(RootPath, std::nullopt, HashName,
+                                         HashByteSize)
+                    .moveInto(UniDB))
+    return E;
+  auto CAS = builtin::createObjectStoreFromUnifiedOnDiskCache(UniDB);
+  if (Error E = CAS->validate(CheckHash))
+    return E;
+  auto Cache = builtin::createActionCacheFromUnifiedOnDiskCache(UniDB);
+  if (Error E = Cache->validate())
+    return E;
+  return Error::success();
+}
+
+static Expected<uint64_t> getBootTime() {
+#if __has_include(<sys/sysctl.h>) && defined(KERN_BOOTTIME)
+  struct timeval TV;
+  size_t TVLen = sizeof(TV);
+  int KernBoot[2] = {CTL_KERN, KERN_BOOTTIME};
+  if (sysctl(KernBoot, 2, &TV, &TVLen, nullptr, 0) < 0)
+    return createStringError(llvm::errnoAsErrorCode(),
+                             "failed to get boottime");
+  if (TVLen != sizeof(TV))
+    return createStringError("sysctl kern.boottime unexpected format");
+  return TV.tv_sec;
+#elif defined(__linux__)
+  // Use the mtime for /proc, which is recreated during system boot.
+  // We could also read /proc/stat and search for 'btime'.
+  sys::fs::file_status Status;
+  if (std::error_code EC = sys::fs::status("/proc", Status))
+    return createFileError("/proc", EC);
+  return Status.getLastModificationTime().time_since_epoch().count();
+#else
+  llvm::report_fatal_error("getBootTime unimplemented");
+#endif
+}
+
+Expected<ValidationResult> UnifiedOnDiskCache::validateIfNeeded(
+    StringRef RootPath, StringRef HashName, unsigned HashByteSize,
+    bool CheckHash, bool AllowRecovery, bool ForceValidation,
+    std::optional<StringRef> LLVMCasBinaryPath) {
+  if (std::error_code EC = sys::fs::create_directories(RootPath))
+    return createFileError(RootPath, EC);
+
+  SmallString<256> PathBuf(RootPath);
+  sys::path::append(PathBuf, ValidationFilename);
+  int FD = -1;
+  if (std::error_code EC = sys::fs::openFileForReadWrite(
+          PathBuf, FD, sys::fs::CD_OpenAlways, sys::fs::OF_None))
+    return createFileError(PathBuf, EC);
+  assert(FD != -1);
+
+  sys::fs::file_t File = sys::fs::convertFDToNativeFile(FD);
+  auto CloseFile = make_scope_exit([&]() { sys::fs::closeFile(File); });
+
+  if (std::error_code EC = lockFileThreadSafe(FD, sys::fs::LockKind::Exclusive))
+    return createFileError(PathBuf, EC);
+  auto UnlockFD = make_scope_exit([&]() { unlockFileThreadSafe(FD); });
+
+  SmallString<8> Bytes;
+  if (Error E = sys::fs::readNativeFileToEOF(File, Bytes))
+    return createFileError(PathBuf, std::move(E));
+
+  uint64_t ValidationBootTime = 0;
+  if (!Bytes.empty() &&
+      StringRef(Bytes).trim().getAsInteger(10, ValidationBootTime))
+    return createFileError(PathBuf, errc::illegal_byte_sequence,
+                           "expected integer");
+
+  static uint64_t BootTime = 0;
+  if (BootTime == 0)
+    if (Error E = getBootTime().moveInto(BootTime))
+      return std::move(E);
+
+  std::string LogValidationError;
+
+  if (ValidationBootTime == BootTime && !ForceValidation)
+    return ValidationResult::Skipped;
+
+  // Validate!
+  bool NeedsRecovery = false;
+  if (Error E =
+          LLVMCasBinaryPath
+              ? validateOutOfProcess(*LLVMCasBinaryPath, RootPath, CheckHash)
+              : validateInProcess(RootPath, HashName, HashByteSize,
+                                  CheckHash)) {
+    if (AllowRecovery) {
+      consumeError(std::move(E));
+      NeedsRecovery = true;
+    } else {
+      return std::move(E);
+    }
+  }
+
+  if (NeedsRecovery) {
+    sys::path::remove_filename(PathBuf);
+    sys::path::append(PathBuf, "lock");
+
+    int LockFD = -1;
+    if (std::error_code EC = sys::fs::openFileForReadWrite(
+            PathBuf, LockFD, sys::fs::CD_OpenAlways, sys::fs::OF_None))
+      return createFileError(PathBuf, EC);
+    sys::fs::file_t LockFile = sys::fs::convertFDToNativeFile(LockFD);
+    auto CloseLock = make_scope_exit([&]() { sys::fs::closeFile(LockFile); });
+    if (std::error_code EC = tryLockFileThreadSafe(LockFD)) {
+      if (EC == std::errc::no_lock_available)
+        return createFileError(
+            PathBuf, EC,
+            "CAS validation requires exclusive access but CAS was in use");
+      return createFileError(PathBuf, EC);
+    }
+    auto UnlockFD = make_scope_exit([&]() { unlockFileThreadSafe(LockFD); });
+
+    auto DBDirs = getAllDBDirs(RootPath);
+    if (!DBDirs)
+      return DBDirs.takeError();
+
+    for (StringRef DBDir : *DBDirs) {
+      sys::path::remove_filename(PathBuf);
+      sys::path::append(PathBuf, DBDir);
+      std::error_code EC;
+      int Attempt = 0, MaxAttempts = 100;
+      SmallString<128> GCPath;
+      for (; Attempt < MaxAttempts; ++Attempt) {
+        GCPath.assign(RootPath);
+        sys::path::append(GCPath, CorruptPrefix + std::to_string(Attempt) +
+                                      "." + DBDir);
+        EC = sys::fs::rename(PathBuf, GCPath);
+        // Darwin uses ENOTEMPTY. Linux may return either ENOTEMPTY or EEXIST.
+        if (EC != errc::directory_not_empty && EC != errc::file_exists)
+          break;
+      }
+      if (Attempt == MaxAttempts)
+        return createStringError(
+            EC, "rename " + PathBuf +
+                    " failed: too many CAS directories awaiting pruning");
+      if (EC)
+        return createStringError(EC, "rename " + PathBuf + " to " + GCPath +
+                                         " failed: " + EC.message());
+    }
+  }
+
+  if (ValidationBootTime != BootTime) {
+    // Fix filename in case we have error to report.
+    sys::path::remove_filename(PathBuf);
+    sys::path::append(PathBuf, ValidationFilename);
+    if (std::error_code EC = sys::fs::resize_file(FD, 0))
+      return createFileError(PathBuf, EC);
+    raw_fd_ostream OS(FD, /*shouldClose=*/false);
+    OS.seek(0); // resize does not reset position
+    OS << BootTime << '\n';
+    if (OS.has_error())
+      return createFileError(PathBuf, OS.error());
+  }
+
+  return NeedsRecovery ? ValidationResult::Recovered : ValidationResult::Valid;
+}
+
+Expected<std::unique_ptr<UnifiedOnDiskCache>>
+UnifiedOnDiskCache::open(StringRef RootPath, std::optional<uint64_t> SizeLimit,
+                         StringRef HashName, unsigned HashByteSize,
+                         OnDiskGraphDB::FaultInPolicy FaultInPolicy) {
+  if (std::error_code EC = sys::fs::create_directories(RootPath))
+    return createFileError(RootPath, EC);
+
+  SmallString<256> PathBuf(RootPath);
+  sys::path::append(PathBuf, "lock");
+  int LockFD = -1;
+  if (std::error_code EC = sys::fs::openFileForReadWrite(
+          PathBuf, LockFD, sys::fs::CD_OpenAlways, sys::fs::OF_None))
+    return createFileError(PathBuf, EC);
+  assert(LockFD != -1);
+  // Locking the directory using shared lock, which will prevent other processes
+  // from creating a new chain (essentially while a \p UnifiedOnDiskCache
+  // instance holds a shared lock the storage for the primary directory will
+  // grow unrestricted).
+  if (std::error_code EC =
+          lockFileThreadSafe(LockFD, sys::fs::LockKind::Shared))
+    return createFileError(PathBuf, EC);
+
+  auto DBDirs = getAllDBDirs(RootPath);
+  if (!DBDirs)
+    return DBDirs.takeError();
+  if (DBDirs->empty())
+    DBDirs->push_back((Twine(DBDirPrefix) + "1").str());
+
+  assert(!DBDirs->empty());
+
+  /// If there is only one directory open databases on it. If there are 2 or
+  /// more directories, get the most recent directories and chain them, with the
+  /// most recent being the primary one. The remaining directories are unused
+  /// data than can be garbage-collected.
+  auto UniDB = std::unique_ptr<UnifiedOnDiskCache>(new UnifiedOnDiskCache());
+  std::unique_ptr<OnDiskGraphDB> UpstreamGraphDB;
+  std::unique_ptr<OnDiskKeyValueDB> UpstreamKVDB;
+  if (DBDirs->size() > 1) {
+    StringRef UpstreamDir = *(DBDirs->end() - 2);
+    PathBuf = RootPath;
+    sys::path::append(PathBuf, UpstreamDir);
+    if (Error E = OnDiskGraphDB::open(PathBuf, HashName, HashByteSize,
+                                      /*UpstreamDB=*/nullptr, FaultInPolicy)
+                      .moveInto(UpstreamGraphDB))
+      return std::move(E);
+    if (Error E = OnDiskKeyValueDB::open(PathBuf, HashName, HashByteSize,
+                                         /*ValueName=*/"objectid",
+                                         /*ValueSize=*/sizeof(uint64_t))
+                      .moveInto(UpstreamKVDB))
+      return std::move(E);
+  }
+
+  StringRef PrimaryDir = *(DBDirs->end() - 1);
+  PathBuf = RootPath;
+  sys::path::append(PathBuf, PrimaryDir);
+  std::unique_ptr<OnDiskGraphDB> PrimaryGraphDB;
+  if (Error E = OnDiskGraphDB::open(PathBuf, HashName, HashByteSize,
+                                    UpstreamGraphDB.get(), FaultInPolicy)
+                    .moveInto(PrimaryGraphDB))
+    return std::move(E);
+  std::unique_ptr<OnDiskKeyValueDB> PrimaryKVDB;
+  // \p UnifiedOnDiskCache does manual chaining for key-value requests,
+  // including an extra translation step of the value during fault-in.
+  if (Error E =
+          OnDiskKeyValueDB::open(PathBuf, HashName, HashByteSize,
+                                 /*ValueName=*/"objectid",
+                                 /*ValueSize=*/sizeof(uint64_t), UniDB.get())
+              .moveInto(PrimaryKVDB))
+    return std::move(E);
+
+  UniDB->RootPath = RootPath;
+  UniDB->SizeLimit = SizeLimit.value_or(0);
+  UniDB->LockFD = LockFD;
+  UniDB->NeedsGarbageCollection = DBDirs->size() > 2;
+  UniDB->PrimaryDBDir = PrimaryDir;
+  UniDB->UpstreamGraphDB = std::move(UpstreamGraphDB);
+  UniDB->PrimaryGraphDB = std::move(PrimaryGraphDB);
+  UniDB->UpstreamKVDB = std::move(UpstreamKVDB);
+  UniDB->PrimaryKVDB = std::move(PrimaryKVDB);
+
+  return std::move(UniDB);
+}
+
+void UnifiedOnDiskCache::setSizeLimit(std::optional<uint64_t> SizeLimit) {
+  this->SizeLimit = SizeLimit.value_or(0);
+}
+
+uint64_t UnifiedOnDiskCache::getStorageSize() const {
+  uint64_t TotalSize = getPrimaryStorageSize();
+  if (UpstreamGraphDB)
+    TotalSize += UpstreamGraphDB->getStorageSize();
+  if (UpstreamKVDB)
+    TotalSize += UpstreamKVDB->getStorageSize();
+  return TotalSize;
+}
+
+uint64_t UnifiedOnDiskCache::getPrimaryStorageSize() const {
+  return PrimaryGraphDB->getStorageSize() + PrimaryKVDB->getStorageSize();
+}
+
+bool UnifiedOnDiskCache::hasExceededSizeLimit() const {
+  uint64_t CurSizeLimit = SizeLimit;
+  if (!CurSizeLimit)
+    return false;
+
+  // If the hard limit is beyond 85%, declare above limit and request clean up.
+  unsigned CurrentPercent =
+      std::max(PrimaryGraphDB->getHardStorageLimitUtilization(),
+               PrimaryKVDB->getHardStorageLimitUtilization());
+  if (CurrentPercent > 85)
+    return true;
+
+  // We allow each of the directories in the chain to reach up to half the
+  // intended size limit. Check whether the primary directory has exceeded half
+  // the limit or not, in order to decide whether we need to start a new chain.
+  //
+  // We could check the size limit against the sum of sizes of both the primary
+  // and upstream directories but then if the upstream is significantly larger
+  // than the intended limit, it would trigger a new chain to be created before
+  // the primary has reached its own limit. Essentially in such situation we
+  // prefer reclaiming the storage later in order to have more consistent cache
+  // hits behavior.
+  return (CurSizeLimit / 2) < getPrimaryStorageSize();
+}
+
+Error UnifiedOnDiskCache::close(bool CheckSizeLimit) {
+  if (LockFD == -1)
+    return Error::success(); // already closed.
+  auto CloseLock = make_scope_exit([&]() {
+    assert(LockFD >= 0);
+    sys::fs::file_t LockFile = sys::fs::convertFDToNativeFile(LockFD);
+    sys::fs::closeFile(LockFile);
+    LockFD = -1;
+  });
+
+  bool ExceededSizeLimit = CheckSizeLimit ? hasExceededSizeLimit() : false;
+  UpstreamKVDB.reset();
+  PrimaryKVDB.reset();
+  UpstreamGraphDB.reset();
+  PrimaryGraphDB.reset();
+  if (std::error_code EC = unlockFileThreadSafe(LockFD))
+    return createFileError(RootPath, EC);
+
+  if (!ExceededSizeLimit)
+    return Error::success();
+
+  // The primary directory exceeded its intended size limit. Try to get an
+  // exclusive lock in order to create a new primary directory for next time
+  // this \p UnifiedOnDiskCache path is opened.
+
+  if (std::error_code EC = tryLockFileThreadSafe(
+          LockFD, std::chrono::milliseconds(0), sys::fs::LockKind::Exclusive)) {
+    if (EC == errc::no_lock_available)
+      return Error::success(); // couldn't get exclusive lock, give up.
+    return createFileError(RootPath, EC);
+  }
+  auto UnlockFile = make_scope_exit([&]() { unlockFileThreadSafe(LockFD); });
+
+  // Managed to get an exclusive lock which means there are no other open
+  // \p UnifiedOnDiskCache instances for the same path, so we can safely start a
+  // new primary directory. To start a new primary directory we just have to
+  // create a new empty directory with the next consecutive index; since this is
+  // an atomic operation we will leave the top-level directory in a consistent
+  // state even if the process dies during this code-path.
+
+  SmallString<256> PathBuf(RootPath);
+  raw_svector_ostream OS(PathBuf);
+  OS << sys::path::get_separator();
+  getNextDBDirName(PrimaryDBDir, OS);
+  if (std::error_code EC = sys::fs::create_directory(PathBuf))
+    return createFileError(PathBuf, EC);
+
+  NeedsGarbageCollection = true;
+  return Error::success();
+}
+
+UnifiedOnDiskCache::UnifiedOnDiskCache() = default;
+
+UnifiedOnDiskCache::~UnifiedOnDiskCache() { consumeError(close()); }
+
+Error UnifiedOnDiskCache::collectGarbage(StringRef Path) {
+  auto DBDirs = getAllGarbageDirs(Path);
+  if (!DBDirs)
+    return DBDirs.takeError();
+
+  SmallString<256> PathBuf(Path);
+  for (StringRef UnusedSubDir : *DBDirs) {
+    sys::path::append(PathBuf, UnusedSubDir);
+    if (std::error_code EC = sys::fs::remove_directories(PathBuf))
+      return createFileError(PathBuf, EC);
+    sys::path::remove_filename(PathBuf);
+  }
+  return Error::success();
+}
+
+Error UnifiedOnDiskCache::collectGarbage() { return collectGarbage(RootPath); }

diff  --git a/llvm/unittests/CAS/ActionCacheTest.cpp b/llvm/unittests/CAS/ActionCacheTest.cpp
index db67e30ca203b..692da230b6e09 100644
--- a/llvm/unittests/CAS/ActionCacheTest.cpp
+++ b/llvm/unittests/CAS/ActionCacheTest.cpp
@@ -21,7 +21,7 @@ using namespace llvm;
 using namespace llvm::cas;
 
 TEST_P(CASTest, ActionCacheHit) {
-  std::shared_ptr<ObjectStore> CAS = createObjectStore();
+  std::unique_ptr<ObjectStore> CAS = createObjectStore();
   std::unique_ptr<ActionCache> Cache = createActionCache();
 
   std::optional<ObjectProxy> ID;
@@ -36,7 +36,7 @@ TEST_P(CASTest, ActionCacheHit) {
 }
 
 TEST_P(CASTest, ActionCacheMiss) {
-  std::shared_ptr<ObjectStore> CAS = createObjectStore();
+  std::unique_ptr<ObjectStore> CAS = createObjectStore();
   std::unique_ptr<ActionCache> Cache = createActionCache();
 
   std::optional<ObjectProxy> ID1, ID2;
@@ -59,7 +59,7 @@ TEST_P(CASTest, ActionCacheMiss) {
 }
 
 TEST_P(CASTest, ActionCacheRewrite) {
-  std::shared_ptr<ObjectStore> CAS = createObjectStore();
+  std::unique_ptr<ObjectStore> CAS = createObjectStore();
   std::unique_ptr<ActionCache> Cache = createActionCache();
 
   std::optional<ObjectProxy> ID1, ID2;

diff  --git a/llvm/unittests/CAS/BuiltinUnifiedCASDatabasesTest.cpp b/llvm/unittests/CAS/BuiltinUnifiedCASDatabasesTest.cpp
new file mode 100644
index 0000000000000..19522e9372d85
--- /dev/null
+++ b/llvm/unittests/CAS/BuiltinUnifiedCASDatabasesTest.cpp
@@ -0,0 +1,67 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CAS/BuiltinUnifiedCASDatabases.h"
+#include "CASTestConfig.h"
+#include "llvm/CAS/ActionCache.h"
+#include "llvm/CAS/ObjectStore.h"
+#include "llvm/Testing/Support/Error.h"
+#include "llvm/Testing/Support/SupportHelpers.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+using namespace llvm::cas;
+
+TEST_F(OnDiskCASTest, UnifiedCASMaterializationCheckPreventsGarbageCollection) {
+  unittest::TempDir Temp("on-disk-unified-cas", /*Unique=*/true);
+
+  auto WithCAS = [&](llvm::function_ref<void(ObjectStore &)> Action) {
+    std::pair<std::unique_ptr<ObjectStore>, std::unique_ptr<ActionCache>> DBs;
+    ASSERT_THAT_ERROR(
+        createOnDiskUnifiedCASDatabases(Temp.path()).moveInto(DBs),
+        Succeeded());
+    ObjectStore &CAS = *DBs.first;
+    ASSERT_THAT_ERROR(CAS.setSizeLimit(1), Succeeded());
+    Action(CAS);
+  };
+
+  std::optional<CASID> ID;
+
+  // Create an object in the CAS.
+  WithCAS([&ID](ObjectStore &CAS) {
+    std::optional<ObjectRef> Ref;
+    ASSERT_THAT_ERROR(CAS.store({}, "blah").moveInto(Ref), Succeeded());
+    ASSERT_TRUE(Ref.has_value());
+
+    ID = CAS.getID(*Ref);
+  });
+
+  // Check materialization and prune the storage.
+  WithCAS([&ID](ObjectStore &CAS) {
+    std::optional<ObjectRef> Ref = CAS.getReference(*ID);
+    ASSERT_TRUE(Ref.has_value());
+
+    std::optional<bool> IsMaterialized;
+    ASSERT_THAT_ERROR(CAS.isMaterialized(*Ref).moveInto(IsMaterialized),
+                      Succeeded());
+    ASSERT_TRUE(IsMaterialized);
+
+    ASSERT_THAT_ERROR(CAS.pruneStorageData(), Succeeded());
+  });
+
+  // Verify that the previous materialization check kept the object in the CAS.
+  WithCAS([&ID](ObjectStore &CAS) {
+    std::optional<ObjectRef> Ref = CAS.getReference(*ID);
+    ASSERT_TRUE(Ref.has_value());
+
+    std::optional<bool> IsMaterialized;
+    ASSERT_THAT_ERROR(CAS.isMaterialized(*Ref).moveInto(IsMaterialized),
+                      Succeeded());
+    ASSERT_TRUE(IsMaterialized);
+  });
+}

diff  --git a/llvm/unittests/CAS/CASTestConfig.cpp b/llvm/unittests/CAS/CASTestConfig.cpp
index 10e4b689e151e..08cbf1daf727d 100644
--- a/llvm/unittests/CAS/CASTestConfig.cpp
+++ b/llvm/unittests/CAS/CASTestConfig.cpp
@@ -8,6 +8,7 @@
 
 #include "CASTestConfig.h"
 #include "llvm/CAS/ObjectStore.h"
+#include "llvm/Testing/Support/Error.h"
 #include "gtest/gtest.h"
 #include <mutex>
 
@@ -15,7 +16,8 @@ using namespace llvm;
 using namespace llvm::cas;
 
 static CASTestingEnv createInMemory(int I) {
-  return CASTestingEnv{createInMemoryCAS(), createInMemoryActionCache()};
+  return CASTestingEnv{createInMemoryCAS(), createInMemoryActionCache(),
+                       std::nullopt};
 }
 
 INSTANTIATE_TEST_SUITE_P(InMemoryCAS, CASTest,
@@ -23,7 +25,7 @@ INSTANTIATE_TEST_SUITE_P(InMemoryCAS, CASTest,
 
 #if LLVM_ENABLE_ONDISK_CAS
 namespace llvm::cas::ondisk {
-extern void setMaxMappingSize(uint64_t Size);
+void setMaxMappingSize(uint64_t Size);
 } // namespace llvm::cas::ondisk
 
 void setMaxOnDiskCASMappingSize() {
@@ -31,6 +33,17 @@ void setMaxOnDiskCASMappingSize() {
   std::call_once(
       Flag, [] { llvm::cas::ondisk::setMaxMappingSize(100 * 1024 * 1024); });
 }
+
+static CASTestingEnv createOnDisk(int I) {
+  unittest::TempDir Temp("on-disk-cas", /*Unique=*/true);
+  std::unique_ptr<ObjectStore> CAS;
+  EXPECT_THAT_ERROR(createOnDiskCAS(Temp.path()).moveInto(CAS), Succeeded());
+  std::unique_ptr<ActionCache> Cache;
+  EXPECT_THAT_ERROR(createOnDiskActionCache(Temp.path()).moveInto(Cache),
+                    Succeeded());
+  return CASTestingEnv{std::move(CAS), std::move(Cache), std::move(Temp)};
+}
+INSTANTIATE_TEST_SUITE_P(OnDiskCAS, CASTest, ::testing::Values(createOnDisk));
 #else
 void setMaxOnDiskCASMappingSize() {}
 #endif /* LLVM_ENABLE_ONDISK_CAS */

diff  --git a/llvm/unittests/CAS/CASTestConfig.h b/llvm/unittests/CAS/CASTestConfig.h
index 8d3c55305f1b3..b1c0e59ff2b92 100644
--- a/llvm/unittests/CAS/CASTestConfig.h
+++ b/llvm/unittests/CAS/CASTestConfig.h
@@ -6,16 +6,28 @@
 //
 //===----------------------------------------------------------------------===//
 
+#ifndef LLVM_UNITTESTS_CASTESTCONFIG_H
+#define LLVM_UNITTESTS_CASTESTCONFIG_H
+
 #include "llvm/CAS/ActionCache.h"
 #include "llvm/CAS/ObjectStore.h"
+#include "llvm/Testing/Support/SupportHelpers.h"
 #include "gtest/gtest.h"
+#include <memory>
 
-#ifndef LLVM_UNITTESTS_CASTESTCONFIG_H
-#define LLVM_UNITTESTS_CASTESTCONFIG_H
+namespace llvm::unittest::cas {
+class MockEnv {
+  void anchor();
+
+public:
+  virtual ~MockEnv();
+};
+} // namespace llvm::unittest::cas
 
 struct CASTestingEnv {
   std::unique_ptr<llvm::cas::ObjectStore> CAS;
   std::unique_ptr<llvm::cas::ActionCache> Cache;
+  std::optional<llvm::unittest::TempDir> Temp;
 };
 
 void setMaxOnDiskCASMappingSize();
@@ -24,26 +36,47 @@ void setMaxOnDiskCASMappingSize();
 class OnDiskCASTest : public ::testing::Test {
 protected:
   void SetUp() override {
+#if !LLVM_ENABLE_ONDISK_CAS
+    GTEST_SKIP() << "OnDiskCAS is not enabled";
+#endif
     // Use a smaller database size for testing to conserve disk space.
     setMaxOnDiskCASMappingSize();
   }
 };
 
+// Parametered test fixture for ObjectStore and ActionCache tests.
 class CASTest
     : public testing::TestWithParam<std::function<CASTestingEnv(int)>> {
 protected:
   std::optional<int> NextCASIndex;
 
+  llvm::SmallVector<llvm::unittest::TempDir> Dirs;
+
+  llvm::SmallVector<std::unique_ptr<llvm::unittest::cas::MockEnv>> Envs;
+
   std::unique_ptr<llvm::cas::ObjectStore> createObjectStore() {
     auto TD = GetParam()(++(*NextCASIndex));
+    if (TD.Temp)
+      Dirs.push_back(std::move(*TD.Temp));
     return std::move(TD.CAS);
   }
   std::unique_ptr<llvm::cas::ActionCache> createActionCache() {
     auto TD = GetParam()(++(*NextCASIndex));
+    if (TD.Temp)
+      Dirs.push_back(std::move(*TD.Temp));
     return std::move(TD.Cache);
   }
-  void SetUp() override { NextCASIndex = 0; }
-  void TearDown() override { NextCASIndex = std::nullopt; }
+
+  void SetUp() override {
+    NextCASIndex = 0;
+    setMaxOnDiskCASMappingSize();
+  }
+
+  void TearDown() override {
+    NextCASIndex = std::nullopt;
+    Dirs.clear();
+    Envs.clear();
+  }
 };
 
 #endif

diff  --git a/llvm/unittests/CAS/CMakeLists.txt b/llvm/unittests/CAS/CMakeLists.txt
index da469f7fccb5a..91e49be770745 100644
--- a/llvm/unittests/CAS/CMakeLists.txt
+++ b/llvm/unittests/CAS/CMakeLists.txt
@@ -1,9 +1,11 @@
 set(ONDISK_CAS_TEST_SOURCES
+  BuiltinUnifiedCASDatabasesTest.cpp
   OnDiskGraphDBTest.cpp
   OnDiskDataAllocatorTest.cpp
   OnDiskKeyValueDBTest.cpp
   OnDiskTrieRawHashMapTest.cpp
   ProgramTest.cpp
+  UnifiedOnDiskCacheTest.cpp
   )
 
 set(LLVM_OPTIONAL_SOURCES

diff  --git a/llvm/unittests/CAS/ObjectStoreTest.cpp b/llvm/unittests/CAS/ObjectStoreTest.cpp
index 54083fdb408f6..b43ae33d74127 100644
--- a/llvm/unittests/CAS/ObjectStoreTest.cpp
+++ b/llvm/unittests/CAS/ObjectStoreTest.cpp
@@ -1,4 +1,4 @@
-//===- ObjectStoreTest.cpp ------------------------------------------------===//
+//===----------------------------------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -76,7 +76,7 @@ multiline text multiline text multiline text multiline text multiline text)",
 
   // Run validation on all CASIDs.
   for (int I = 0, E = IDs.size(); I != E; ++I)
-    ASSERT_THAT_ERROR(CAS1->validate(IDs[I]), Succeeded());
+    ASSERT_THAT_ERROR(CAS1->validateObject(IDs[I]), Succeeded());
 
   // Check that the blobs can be retrieved multiple times.
   for (int I = 0, E = IDs.size(); I != E; ++I) {
@@ -120,15 +120,15 @@ TEST_P(CASTest, BlobsBig) {
     std::optional<CASID> ID2;
     ASSERT_THAT_ERROR(CAS->createProxy({}, String1).moveInto(ID1), Succeeded());
     ASSERT_THAT_ERROR(CAS->createProxy({}, String1).moveInto(ID2), Succeeded());
-    ASSERT_THAT_ERROR(CAS->validate(*ID1), Succeeded());
-    ASSERT_THAT_ERROR(CAS->validate(*ID2), Succeeded());
+    ASSERT_THAT_ERROR(CAS->validateObject(*ID1), Succeeded());
+    ASSERT_THAT_ERROR(CAS->validateObject(*ID2), Succeeded());
     ASSERT_EQ(ID1, ID2);
 
     String1.append(String2);
     ASSERT_THAT_ERROR(CAS->createProxy({}, String2).moveInto(ID1), Succeeded());
     ASSERT_THAT_ERROR(CAS->createProxy({}, String2).moveInto(ID2), Succeeded());
-    ASSERT_THAT_ERROR(CAS->validate(*ID1), Succeeded());
-    ASSERT_THAT_ERROR(CAS->validate(*ID2), Succeeded());
+    ASSERT_THAT_ERROR(CAS->validateObject(*ID1), Succeeded());
+    ASSERT_THAT_ERROR(CAS->validateObject(*ID2), Succeeded());
     ASSERT_EQ(ID1, ID2);
     String2.append(String1);
   }
@@ -176,10 +176,11 @@ multiline text multiline text multiline text multiline text multiline text)",
 
     // Check basic printing of IDs.
     IDs.push_back(CAS1->getID(*Node));
-    auto ID = CAS1->getID(Nodes.back());
-    EXPECT_EQ(ID.toString(), IDs.back().toString());
-    EXPECT_EQ(*Node, Nodes.back());
-    EXPECT_EQ(ID, IDs.back());
+    EXPECT_EQ(IDs.back().toString(), IDs.back().toString());
+    EXPECT_EQ(Nodes.front(), Nodes.front());
+    EXPECT_EQ(Nodes.back(), Nodes.back());
+    EXPECT_EQ(IDs.front(), IDs.front());
+    EXPECT_EQ(IDs.back(), IDs.back());
     if (Nodes.size() <= 1)
       continue;
     EXPECT_NE(Nodes.front(), Nodes.back());
@@ -266,7 +267,7 @@ TEST_P(CASTest, NodesBig) {
   }
 
   for (auto ID : CreatedNodes)
-    ASSERT_THAT_ERROR(CAS->validate(CAS->getID(ID)), Succeeded());
+    ASSERT_THAT_ERROR(CAS->validateObject(CAS->getID(ID)), Succeeded());
 }
 
 #if LLVM_ENABLE_THREADS
@@ -332,17 +333,124 @@ static void testBlobsParallel1(ObjectStore &CAS, uint64_t BlobSize) {
 }
 
 TEST_P(CASTest, BlobsParallel) {
-  std::shared_ptr<ObjectStore> CAS = createObjectStore();
+  std::unique_ptr<ObjectStore> CAS = createObjectStore();
   uint64_t Size = 1ULL * 1024;
   ASSERT_NO_FATAL_FAILURE(testBlobsParallel1(*CAS, Size));
 }
 
 #ifdef EXPENSIVE_CHECKS
 TEST_P(CASTest, BlobsBigParallel) {
-  std::shared_ptr<ObjectStore> CAS = createObjectStore();
+  std::unique_ptr<ObjectStore> CAS = createObjectStore();
   // 100k is large enough to be standalone files in our on-disk cas.
   uint64_t Size = 100ULL * 1024;
   ASSERT_NO_FATAL_FAILURE(testBlobsParallel1(*CAS, Size));
 }
 #endif // EXPENSIVE_CHECKS
+
+#ifndef _WIN32 // create_link won't work for directories on Windows
+TEST_F(OnDiskCASTest, OnDiskCASBlobsParallelMultiCAS) {
+  // This test intentionally uses symlinked paths to the same CAS to subvert the
+  // shared memory mappings that would normally be created within a single
+  // process. This breaks the lock file guarantees, so we must be careful not
+  // to create or destroy the CAS objects concurrently, which is when the locks
+  // are normally important.
+  unittest::TempDir Temp("on-disk-cas", /*Unique=*/true);
+  ASSERT_EQ(sys::fs::create_directory(Temp.path("real_cas")),
+            std::error_code());
+  ASSERT_EQ(sys::fs::create_link("real_cas", Temp.path("sym_cas1")),
+            std::error_code());
+  ASSERT_EQ(sys::fs::create_link("real_cas", Temp.path("sym_cas2")),
+            std::error_code());
+  ASSERT_EQ(sys::fs::create_link("real_cas", Temp.path("sym_cas3")),
+            std::error_code());
+
+  std::unique_ptr<ObjectStore> CAS1, CAS2, CAS3, CAS4;
+  ASSERT_THAT_ERROR(createOnDiskCAS(Temp.path("real_cas")).moveInto(CAS1),
+                    Succeeded());
+  ASSERT_THAT_ERROR(createOnDiskCAS(Temp.path("sym_cas1")).moveInto(CAS2),
+                    Succeeded());
+  ASSERT_THAT_ERROR(createOnDiskCAS(Temp.path("sym_cas2")).moveInto(CAS3),
+                    Succeeded());
+  ASSERT_THAT_ERROR(createOnDiskCAS(Temp.path("sym_cas3")).moveInto(CAS4),
+                    Succeeded());
+
+  uint64_t Size = 1ULL * 1024;
+  ASSERT_NO_FATAL_FAILURE(testBlobsParallel(*CAS1, *CAS2, *CAS3, *CAS4, Size));
+}
+
+TEST_F(OnDiskCASTest, OnDiskCASBlobsBigParallelMultiCAS) {
+  // See comment in BlobsParallelMultiCAS.
+  unittest::TempDir Temp("on-disk-cas", /*Unique=*/true);
+  ASSERT_EQ(sys::fs::create_directory(Temp.path("real_cas")),
+            std::error_code());
+  ASSERT_EQ(sys::fs::create_link("real_cas", Temp.path("sym_cas1")),
+            std::error_code());
+  ASSERT_EQ(sys::fs::create_link("real_cas", Temp.path("sym_cas2")),
+            std::error_code());
+  ASSERT_EQ(sys::fs::create_link("real_cas", Temp.path("sym_cas3")),
+            std::error_code());
+
+  std::unique_ptr<ObjectStore> CAS1, CAS2, CAS3, CAS4;
+  ASSERT_THAT_ERROR(createOnDiskCAS(Temp.path("real_cas")).moveInto(CAS1),
+                    Succeeded());
+  ASSERT_THAT_ERROR(createOnDiskCAS(Temp.path("sym_cas1")).moveInto(CAS2),
+                    Succeeded());
+  ASSERT_THAT_ERROR(createOnDiskCAS(Temp.path("sym_cas2")).moveInto(CAS3),
+                    Succeeded());
+  ASSERT_THAT_ERROR(createOnDiskCAS(Temp.path("sym_cas3")).moveInto(CAS4),
+                    Succeeded());
+
+  // 100k is large enough to be standalone files in our on-disk cas.
+  uint64_t Size = 100ULL * 1024;
+  ASSERT_NO_FATAL_FAILURE(testBlobsParallel(*CAS1, *CAS2, *CAS3, *CAS4, Size));
+}
+#endif // _WIN32
 #endif // LLVM_ENABLE_THREADS
+
+TEST_F(OnDiskCASTest, OnDiskCASDiskSize) {
+  unittest::TempDir Temp("on-disk-cas", /*Unique=*/true);
+  std::unique_ptr<ObjectStore> CAS;
+  ASSERT_THAT_ERROR(createOnDiskCAS(Temp.path()).moveInto(CAS), Succeeded());
+
+  uint64_t MaxSize = 100 * 1024 * 1024;
+
+  // Check that we map the files to the correct size.
+  auto CheckFileSizes = [&](bool Mapped) {
+    bool FoundIndex = false, FoundData = false;
+    std::error_code EC;
+    for (sys::fs::directory_iterator I(Temp.path(), EC), E; I != E && !EC;
+         I.increment(EC)) {
+      StringRef Filename = sys::path::filename(I->path());
+      if (Filename.starts_with("index.") && !Filename.ends_with(".shared")) {
+        FoundIndex = true;
+        ASSERT_TRUE(I->status());
+        if (Mapped)
+          EXPECT_EQ(I->status()->getSize(), MaxSize);
+        else
+          EXPECT_LT(I->status()->getSize(), MaxSize);
+      }
+      if (Filename.starts_with("data.") && !Filename.ends_with(".shared")) {
+        FoundData = true;
+        ASSERT_TRUE(I->status());
+        if (Mapped)
+          EXPECT_EQ(I->status()->getSize(), MaxSize);
+        else
+          EXPECT_LT(I->status()->getSize(), MaxSize);
+      }
+    }
+    ASSERT_TRUE(FoundIndex);
+    ASSERT_TRUE(FoundData);
+  };
+
+  // Check that we have the full mapping size when the CAS is open.
+  CheckFileSizes(/*Mapped=*/true);
+  CAS.reset();
+  // Check that the CAS is shrunk to a smaller size.
+  CheckFileSizes(/*Mapped=*/false);
+
+  // Repeat the checks when starting from an existing CAS.
+  ASSERT_THAT_ERROR(createOnDiskCAS(Temp.path()).moveInto(CAS), Succeeded());
+  CheckFileSizes(/*Mapped=*/true);
+  CAS.reset();
+  CheckFileSizes(/*Mapped=*/false);
+}

diff  --git a/llvm/unittests/CAS/OnDiskCommonUtils.h b/llvm/unittests/CAS/OnDiskCommonUtils.h
index 89f93e08366c9..48a1830f9b219 100644
--- a/llvm/unittests/CAS/OnDiskCommonUtils.h
+++ b/llvm/unittests/CAS/OnDiskCommonUtils.h
@@ -12,6 +12,8 @@
 
 #include "llvm/CAS/BuiltinObjectHasher.h"
 #include "llvm/CAS/OnDiskGraphDB.h"
+#include "llvm/CAS/OnDiskKeyValueDB.h"
+#include "llvm/CAS/UnifiedOnDiskCache.h"
 #include "llvm/Support/BLAKE3.h"
 #include "llvm/Testing/Support/Error.h"
 
@@ -58,6 +60,25 @@ inline Expected<ObjectID> store(OnDiskGraphDB &DB, StringRef Data,
   return ID;
 }
 
+inline Expected<ObjectID> cachePut(OnDiskKeyValueDB &DB, ArrayRef<uint8_t> Key,
+                                   ObjectID ID) {
+  auto Value = UnifiedOnDiskCache::getValueFromObjectID(ID);
+  auto Result = DB.put(Key, Value);
+  if (!Result)
+    return Result.takeError();
+  return UnifiedOnDiskCache::getObjectIDFromValue(*Result);
+}
+
+inline Expected<std::optional<ObjectID>> cacheGet(OnDiskKeyValueDB &DB,
+                                                  ArrayRef<uint8_t> Key) {
+  auto Result = DB.get(Key);
+  if (!Result)
+    return Result.takeError();
+  if (!*Result)
+    return std::nullopt;
+  return UnifiedOnDiskCache::getObjectIDFromValue(**Result);
+}
+
 inline Error printTree(OnDiskGraphDB &DB, ObjectID ID, raw_ostream &OS,
                        unsigned Indent = 0) {
   std::optional<ondisk::ObjectHandle> Obj;

diff  --git a/llvm/unittests/CAS/OnDiskGraphDBTest.cpp b/llvm/unittests/CAS/OnDiskGraphDBTest.cpp
index 3c2e96318a5ed..e9c73bfb6c8d3 100644
--- a/llvm/unittests/CAS/OnDiskGraphDBTest.cpp
+++ b/llvm/unittests/CAS/OnDiskGraphDBTest.cpp
@@ -102,7 +102,7 @@ TEST_F(OnDiskCASTest, OnDiskGraphDBFaultInSingleNode) {
   std::unique_ptr<OnDiskGraphDB> DB;
   ASSERT_THAT_ERROR(
       OnDiskGraphDB::open(Temp.path(), "blake3", sizeof(HashType),
-                          std::move(UpstreamDB),
+                          UpstreamDB.get(),
                           OnDiskGraphDB::FaultInPolicy::SingleNode)
           .moveInto(DB),
       Succeeded());
@@ -208,7 +208,7 @@ TEST_F(OnDiskCASTest, OnDiskGraphDBFaultInFullTree) {
   unittest::TempDir Temp("ondiskcas", /*Unique=*/true);
   std::unique_ptr<OnDiskGraphDB> DB;
   ASSERT_THAT_ERROR(OnDiskGraphDB::open(Temp.path(), "blake3", sizeof(HashType),
-                                        std::move(UpstreamDB),
+                                        UpstreamDB.get(),
                                         OnDiskGraphDB::FaultInPolicy::FullTree)
                         .moveInto(DB),
                     Succeeded());
@@ -264,14 +264,14 @@ TEST_F(OnDiskCASTest, OnDiskGraphDBFaultInPolicyConflict) {
     unittest::TempDir Temp("ondiskcas", /*Unique=*/true);
     std::unique_ptr<OnDiskGraphDB> DB;
     ASSERT_THAT_ERROR(OnDiskGraphDB::open(Temp.path(), "blake3",
-                                          sizeof(HashType),
-                                          std::move(UpstreamDB), Policy1)
+                                          sizeof(HashType), UpstreamDB.get(),
+                                          Policy1)
                           .moveInto(DB),
                       Succeeded());
     DB.reset();
     ASSERT_THAT_ERROR(OnDiskGraphDB::open(Temp.path(), "blake3",
-                                          sizeof(HashType),
-                                          std::move(UpstreamDB), Policy2)
+                                          sizeof(HashType), UpstreamDB.get(),
+                                          Policy2)
                           .moveInto(DB),
                       Failed());
   };

diff  --git a/llvm/unittests/CAS/UnifiedOnDiskCacheTest.cpp b/llvm/unittests/CAS/UnifiedOnDiskCacheTest.cpp
new file mode 100644
index 0000000000000..09aebc2d4bc19
--- /dev/null
+++ b/llvm/unittests/CAS/UnifiedOnDiskCacheTest.cpp
@@ -0,0 +1,198 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CAS/UnifiedOnDiskCache.h"
+#include "CASTestConfig.h"
+#include "OnDiskCommonUtils.h"
+#include "llvm/Testing/Support/Error.h"
+#include "llvm/Testing/Support/SupportHelpers.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+using namespace llvm::cas;
+using namespace llvm::cas::ondisk;
+using namespace llvm::unittest::cas;
+
+/// Visits all the files of a directory recursively and returns the sum of their
+/// sizes.
+static Expected<size_t> countFileSizes(StringRef Path) {
+  size_t TotalSize = 0;
+  std::error_code EC;
+  for (sys::fs::directory_iterator DirI(Path, EC), DirE; !EC && DirI != DirE;
+       DirI.increment(EC)) {
+    if (DirI->type() == sys::fs::file_type::directory_file) {
+      Expected<size_t> Subsize = countFileSizes(DirI->path());
+      if (!Subsize)
+        return Subsize.takeError();
+      TotalSize += *Subsize;
+      continue;
+    }
+    ErrorOr<sys::fs::basic_file_status> Stat = DirI->status();
+    if (!Stat)
+      return createFileError(DirI->path(), Stat.getError());
+    TotalSize += Stat->getSize();
+  }
+  if (EC)
+    return createFileError(Path, EC);
+  return TotalSize;
+}
+
+TEST_F(OnDiskCASTest, UnifiedOnDiskCacheTest) {
+  unittest::TempDir Temp("ondisk-unified", /*Unique=*/true);
+  std::unique_ptr<UnifiedOnDiskCache> UniDB;
+
+  const uint64_t SizeLimit = 1024ull * 64;
+  auto reopenDB = [&]() {
+    UniDB.reset();
+    ASSERT_THAT_ERROR(UnifiedOnDiskCache::open(Temp.path(), SizeLimit, "blake3",
+                                               sizeof(HashType))
+                          .moveInto(UniDB),
+                      Succeeded());
+  };
+
+  reopenDB();
+
+  HashType RootHash;
+  HashType OtherHash;
+  HashType Key1Hash;
+  HashType Key2Hash;
+  {
+    OnDiskGraphDB &DB = UniDB->getGraphDB();
+    std::optional<ObjectID> ID1;
+    ASSERT_THAT_ERROR(store(DB, "1", {}).moveInto(ID1), Succeeded());
+    std::optional<ObjectID> ID2;
+    ASSERT_THAT_ERROR(store(DB, "2", {}).moveInto(ID2), Succeeded());
+    std::optional<ObjectID> IDRoot;
+    ASSERT_THAT_ERROR(store(DB, "root", {*ID1, *ID2}).moveInto(IDRoot),
+                      Succeeded());
+    ArrayRef<uint8_t> Digest = DB.getDigest(*IDRoot);
+    ASSERT_EQ(Digest.size(), RootHash.size());
+    llvm::copy(Digest, RootHash.data());
+
+    std::optional<ObjectID> IDOther;
+    ASSERT_THAT_ERROR(store(DB, "other", {}).moveInto(IDOther), Succeeded());
+    Digest = DB.getDigest(*IDOther);
+    ASSERT_EQ(Digest.size(), OtherHash.size());
+    llvm::copy(Digest, OtherHash.data());
+
+    Key1Hash = digest("key1");
+    std::optional<ObjectID> Val;
+    ASSERT_THAT_ERROR(
+        cachePut(UniDB->getKeyValueDB(), Key1Hash, *IDRoot).moveInto(Val),
+        Succeeded());
+    EXPECT_EQ(IDRoot, Val);
+
+    Key2Hash = digest("key2");
+    std::optional<ObjectID> KeyID;
+    ASSERT_THAT_ERROR(DB.getReference(Key2Hash).moveInto(KeyID), Succeeded());
+    ASSERT_THAT_ERROR(cachePut(UniDB->getKeyValueDB(),
+                               UniDB->getGraphDB().getDigest(*KeyID), *ID1)
+                          .moveInto(Val),
+                      Succeeded());
+  }
+
+  auto checkTree = [&](const HashType &Digest, StringRef ExpectedTree) {
+    OnDiskGraphDB &DB = UniDB->getGraphDB();
+    std::optional<ObjectID> ID;
+    ASSERT_THAT_ERROR(DB.getReference(Digest).moveInto(ID), Succeeded());
+    std::string PrintedTree;
+    raw_string_ostream OS(PrintedTree);
+    ASSERT_THAT_ERROR(printTree(DB, *ID, OS), Succeeded());
+    EXPECT_EQ(PrintedTree, ExpectedTree);
+  };
+  auto checkRootTree = [&]() {
+    return checkTree(RootHash, "root\n  1\n  2\n");
+  };
+
+  auto checkKey = [&](const HashType &Key, StringRef ExpectedData) {
+    OnDiskGraphDB &DB = UniDB->getGraphDB();
+    std::optional<ObjectID> Val;
+    ASSERT_THAT_ERROR(cacheGet(UniDB->getKeyValueDB(), Key).moveInto(Val),
+                      Succeeded());
+
+    ASSERT_TRUE(Val.has_value());
+    std::optional<ondisk::ObjectHandle> Obj;
+    ASSERT_THAT_ERROR(DB.load(*Val).moveInto(Obj), Succeeded());
+    EXPECT_EQ(toStringRef(DB.getObjectData(*Obj)), ExpectedData);
+  };
+
+  checkRootTree();
+  checkTree(OtherHash, "other\n");
+  checkKey(Key1Hash, "root");
+  checkKey(Key2Hash, "1");
+
+  auto storeBigObject = [&](unsigned Index) {
+    SmallString<1000> Buf;
+    Buf.append(970, 'a');
+    raw_svector_ostream(Buf) << Index;
+    std::optional<ObjectID> ID;
+    ASSERT_THAT_ERROR(store(UniDB->getGraphDB(), Buf, {}).moveInto(ID),
+                      Succeeded());
+  };
+
+  uint64_t PrevStoreSize = UniDB->getStorageSize();
+  unsigned Index = 0;
+  while (!UniDB->hasExceededSizeLimit()) {
+    storeBigObject(Index++);
+  }
+  EXPECT_GT(UniDB->getStorageSize(), PrevStoreSize);
+  UniDB->setSizeLimit(SizeLimit * 2);
+  EXPECT_FALSE(UniDB->hasExceededSizeLimit());
+  UniDB->setSizeLimit(SizeLimit);
+  EXPECT_TRUE(UniDB->hasExceededSizeLimit());
+
+  reopenDB();
+
+  EXPECT_FALSE(UniDB->hasExceededSizeLimit());
+  EXPECT_FALSE(UniDB->needsGarbageCollection());
+
+  checkRootTree();
+  checkKey(Key1Hash, "root");
+
+  while (!UniDB->hasExceededSizeLimit()) {
+    storeBigObject(Index++);
+  }
+  PrevStoreSize = UniDB->getStorageSize();
+  ASSERT_THAT_ERROR(UniDB->close(), Succeeded());
+  EXPECT_TRUE(UniDB->needsGarbageCollection());
+
+  reopenDB();
+  EXPECT_TRUE(UniDB->needsGarbageCollection());
+
+  std::optional<size_t> DirSizeBefore;
+  ASSERT_THAT_ERROR(countFileSizes(Temp.path()).moveInto(DirSizeBefore),
+                    Succeeded());
+
+  ASSERT_THAT_ERROR(UnifiedOnDiskCache::collectGarbage(Temp.path()),
+                    Succeeded());
+
+  std::optional<size_t> DirSizeAfter;
+  ASSERT_THAT_ERROR(countFileSizes(Temp.path()).moveInto(DirSizeAfter),
+                    Succeeded());
+  EXPECT_LT(*DirSizeAfter, *DirSizeBefore);
+
+  reopenDB();
+  EXPECT_FALSE(UniDB->needsGarbageCollection());
+
+  checkRootTree();
+  checkKey(Key1Hash, "root");
+
+  EXPECT_LT(UniDB->getStorageSize(), PrevStoreSize);
+
+  // 'Other' tree and 'Key2' got garbage-collected.
+  {
+    OnDiskGraphDB &DB = UniDB->getGraphDB();
+    std::optional<ObjectID> ID;
+    ASSERT_THAT_ERROR(DB.getReference(OtherHash).moveInto(ID), Succeeded());
+    EXPECT_FALSE(DB.containsObject(*ID));
+    std::optional<ObjectID> Val;
+    ASSERT_THAT_ERROR(cacheGet(UniDB->getKeyValueDB(), Key2Hash).moveInto(Val),
+                      Succeeded());
+    EXPECT_FALSE(Val.has_value());
+  }
+}


        


More information about the llvm-commits mailing list