[llvm] [llvm][cas] Validate OnDiskKeyValueDB against the corresponding OnDiskGraphDB (PR #180852)

Ben Langmuir via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 10 14:47:49 PST 2026


https://github.com/benlangmuir created https://github.com/llvm/llvm-project/pull/180852

We were previously using the primary OnDiskGraphDB when validating the upstream OnDiskKeyValueDB, which is incorrect since the values being stored are direct offsets and therefore cannot be used across DBs without translating to a hash value first.

rdar://170067863

>From dd0fa55cc50ec7da316d277590c5bb236c99f4b2 Mon Sep 17 00:00:00 2001
From: Ben Langmuir <blangmuir at apple.com>
Date: Tue, 10 Feb 2026 13:56:53 -0800
Subject: [PATCH] [llvm][cas] Validate OnDiskKeyValueDB against the
 corresponding OnDiskGraphDB

We were previously using the primary OnDiskGraphDB when validating the upstream
OnDiskKeyValueDB, which is incorrect since the values being stored are direct
offsets and therefore cannot be used across DBs without translating to a hash
value first.

rdar://170067863
---
 llvm/include/llvm/CAS/OnDiskKeyValueDB.h    |  7 ++---
 llvm/lib/CAS/ActionCaches.cpp               |  6 +----
 llvm/lib/CAS/OnDiskKeyValueDB.cpp           | 30 ++++++++++++++++-----
 llvm/lib/CAS/UnifiedOnDiskCache.cpp         | 15 +----------
 llvm/test/tools/llvm-cas/validation.test    |  7 +++++
 llvm/unittests/CAS/OnDiskKeyValueDBTest.cpp |  8 +-----
 6 files changed, 35 insertions(+), 38 deletions(-)

diff --git a/llvm/include/llvm/CAS/OnDiskKeyValueDB.h b/llvm/include/llvm/CAS/OnDiskKeyValueDB.h
index 123e20ed9117f..68cced665f28e 100644
--- a/llvm/include/llvm/CAS/OnDiskKeyValueDB.h
+++ b/llvm/include/llvm/CAS/OnDiskKeyValueDB.h
@@ -72,11 +72,8 @@ class OnDiskKeyValueDB {
        UnifiedOnDiskCache *UnifiedCache = nullptr,
        std::shared_ptr<OnDiskCASLogger> Logger = nullptr);
 
-  using CheckValueT =
-      function_ref<Error(FileOffset Offset, ArrayRef<char> Data)>;
-  /// Validate the storage with a callback \p CheckValue to check the stored
-  /// value.
-  LLVM_ABI_FOR_TEST Error validate(CheckValueT CheckValue) const;
+  /// Validate the storage.
+  LLVM_ABI_FOR_TEST Error validate() const;
 
 private:
   OnDiskKeyValueDB(size_t ValueSize, OnDiskTrieRawHashMap Cache,
diff --git a/llvm/lib/CAS/ActionCaches.cpp b/llvm/lib/CAS/ActionCaches.cpp
index 89cdd481e185a..ffeca0ad3251c 100644
--- a/llvm/lib/CAS/ActionCaches.cpp
+++ b/llvm/lib/CAS/ActionCaches.cpp
@@ -197,11 +197,7 @@ Error OnDiskActionCache::putImpl(ArrayRef<uint8_t> Key, const CASID &Result,
       ArrayRef((const uint8_t *)Observed.data(), Observed.size()));
 }
 
-Error OnDiskActionCache::validate() const {
-  // FIXME: without the matching CAS there is nothing we can check about the
-  // cached values. The hash size is already validated by the DB validator.
-  return DB->validate(nullptr);
-}
+Error OnDiskActionCache::validate() const { return DB->validate(); }
 
 UnifiedOnDiskActionCache::UnifiedOnDiskActionCache(
     std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB)
diff --git a/llvm/lib/CAS/OnDiskKeyValueDB.cpp b/llvm/lib/CAS/OnDiskKeyValueDB.cpp
index d67d4455bcdd8..b8883f3d75082 100644
--- a/llvm/lib/CAS/OnDiskKeyValueDB.cpp
+++ b/llvm/lib/CAS/OnDiskKeyValueDB.cpp
@@ -20,6 +20,7 @@
 #include "llvm/CAS/OnDiskKeyValueDB.h"
 #include "OnDiskCommon.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/CAS/OnDiskTrieRawHashMap.h"
 #include "llvm/CAS/UnifiedOnDiskCache.h"
 #include "llvm/Support/Alignment.h"
 #include "llvm/Support/Compiler.h"
@@ -99,11 +100,8 @@ OnDiskKeyValueDB::open(StringRef Path, StringRef HashName, unsigned KeySize,
       new OnDiskKeyValueDB(ValueSize, std::move(*ActionCache), Cache));
 }
 
-Error OnDiskKeyValueDB::validate(CheckValueT CheckValue) const {
-  if (UnifiedCache && UnifiedCache->UpstreamKVDB) {
-    if (auto E = UnifiedCache->UpstreamKVDB->validate(CheckValue))
-      return E;
-  }
+static Error validateOnDiskKeyValueDB(const OnDiskTrieRawHashMap &Cache,
+                                      size_t ValueSize, OnDiskGraphDB *CAS) {
   return Cache.validate(
       [&](FileOffset Offset,
           OnDiskTrieRawHashMap::ConstValueProxy Record) -> Error {
@@ -119,8 +117,26 @@ Error OnDiskKeyValueDB::validate(CheckValueT CheckValue) const {
           return formatError("wrong cache value size");
         if (!isAddrAligned(Align(8), Record.Data.data()))
           return formatError("wrong cache value alignment");
-        if (CheckValue)
-          return CheckValue(Offset, Record.Data);
+        if (CAS) {
+          auto ID =
+              ondisk::UnifiedOnDiskCache::getObjectIDFromValue(Record.Data);
+          if (Error E = CAS->validateObjectID(ID))
+            return formatError(llvm::toString(std::move(E)));
+        }
         return Error::success();
       });
 }
+
+Error OnDiskKeyValueDB::validate() const {
+  if (UnifiedCache && UnifiedCache->UpstreamKVDB) {
+    assert(UnifiedCache->UpstreamGraphDB &&
+           "upstream cache and cas must be paired");
+    if (auto E = validateOnDiskKeyValueDB(UnifiedCache->UpstreamKVDB->Cache,
+                                          UnifiedCache->UpstreamKVDB->ValueSize,
+                                          UnifiedCache->UpstreamGraphDB.get()))
+      return E;
+  }
+  return validateOnDiskKeyValueDB(
+      Cache, ValueSize,
+      UnifiedCache ? UnifiedCache->PrimaryGraphDB.get() : nullptr);
+}
diff --git a/llvm/lib/CAS/UnifiedOnDiskCache.cpp b/llvm/lib/CAS/UnifiedOnDiskCache.cpp
index ac3552c4f6330..3d0a8b3eba135 100644
--- a/llvm/lib/CAS/UnifiedOnDiskCache.cpp
+++ b/llvm/lib/CAS/UnifiedOnDiskCache.cpp
@@ -252,20 +252,7 @@ static Error validateOutOfProcess(StringRef LLVMCasBinary, StringRef RootPath,
 }
 
 Error UnifiedOnDiskCache::validateActionCache() const {
-  auto ValidateRef = [this](FileOffset Offset, ArrayRef<char> Value) -> Error {
-    auto ID = ondisk::UnifiedOnDiskCache::getObjectIDFromValue(Value);
-    auto formatError = [&](Twine Msg) {
-      return createStringError(
-          llvm::errc::illegal_byte_sequence,
-          "bad record at 0x" +
-              utohexstr((unsigned)Offset.get(), /*LowerCase=*/true) + ": " +
-              Msg.str());
-    };
-    if (Error E = this->getGraphDB().validateObjectID(ID))
-      return formatError(llvm::toString(std::move(E)));
-    return Error::success();
-  };
-  return getKeyValueDB().validate(ValidateRef);
+  return getKeyValueDB().validate();
 }
 
 static Error validateInProcess(StringRef RootPath, StringRef HashName,
diff --git a/llvm/test/tools/llvm-cas/validation.test b/llvm/test/tools/llvm-cas/validation.test
index db7047d7e3904..a41cb8a3faad8 100644
--- a/llvm/test/tools/llvm-cas/validation.test
+++ b/llvm/test/tools/llvm-cas/validation.test
@@ -26,6 +26,13 @@ RUN:   --data - >%t/abc.casid
 RUN: llvm-cas --cas %t/ac --put-cache-key @%t/abc.casid @%t/empty.casid
 RUN: llvm-cas --cas %t/ac --validate
 
+# Check that validation passes in an upstream db.
+RUN: mkdir %t/ac/v1.2
+RUN: llvm-cas --cas %t/ac --validate
+# Fault in from upstream DB.
+RUN: llvm-cas --cas %t/ac --get-cache-result @%t/abc.casid
+RUN: llvm-cas --cas %t/ac --validate
+
 # Check that validation fails if the objects referenced are missing.
 RUN: mv %t/ac/v1.1/index.v1 %t/tmp.index.v1
 RUN: not llvm-cas --cas %t/ac --validate
diff --git a/llvm/unittests/CAS/OnDiskKeyValueDBTest.cpp b/llvm/unittests/CAS/OnDiskKeyValueDBTest.cpp
index 41512d04b60bd..79a23829337dc 100644
--- a/llvm/unittests/CAS/OnDiskKeyValueDBTest.cpp
+++ b/llvm/unittests/CAS/OnDiskKeyValueDBTest.cpp
@@ -49,13 +49,7 @@ TEST_F(OnDiskCASTest, OnDiskKeyValueDBTest) {
   }
 
   // Validate
-  {
-    auto ValidateFunc = [](FileOffset Offset, ArrayRef<char> Data) -> Error {
-      EXPECT_EQ(Data.size(), sizeof(ValueType));
-      return Error::success();
-    };
-    ASSERT_THAT_ERROR(DB->validate(ValidateFunc), Succeeded());
-  }
+  ASSERT_THAT_ERROR(DB->validate(), Succeeded());
 
   // Size
   {



More information about the llvm-commits mailing list