[llvm] ebb61a5 - [CAS] Add llvm-cas tools to inspect on-disk LLVMCAS (#166481)

via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 7 10:32:59 PST 2025


Author: Steven Wu
Date: 2025-11-07T10:32:55-08:00
New Revision: ebb61a5beaa25b834deee60a31c84c59ba53f288

URL: https://github.com/llvm/llvm-project/commit/ebb61a5beaa25b834deee60a31c84c59ba53f288
DIFF: https://github.com/llvm/llvm-project/commit/ebb61a5beaa25b834deee60a31c84c59ba53f288.diff

LOG: [CAS] Add llvm-cas tools to inspect on-disk LLVMCAS (#166481)

Add a command-line tool `llvm-cas` to inspect the OnDisk CAS for
debugging purpose. It can be used to lookup/update ObjectStore or
put/get cache entries from ActionCache, together with other debugging
capabilities.

Added: 
    llvm/test/tools/llvm-cas/Inputs/oneline
    llvm/test/tools/llvm-cas/Inputs/oneline-nonewline
    llvm/test/tools/llvm-cas/action-cache.test
    llvm/test/tools/llvm-cas/cache.test
    llvm/test/tools/llvm-cas/dump.test
    llvm/test/tools/llvm-cas/lit.local.cfg
    llvm/test/tools/llvm-cas/make-blob.test
    llvm/test/tools/llvm-cas/make-node.test
    llvm/test/tools/llvm-cas/print-id.test
    llvm/test/tools/llvm-cas/validation.test
    llvm/tools/llvm-cas/CMakeLists.txt
    llvm/tools/llvm-cas/Options.td
    llvm/tools/llvm-cas/llvm-cas.cpp

Modified: 
    llvm/test/CMakeLists.txt
    llvm/test/lit.cfg.py
    llvm/test/lit.site.cfg.py.in

Removed: 
    


################################################################################
diff  --git a/llvm/test/CMakeLists.txt b/llvm/test/CMakeLists.txt
index f01422e3b0990..e547c3429058b 100644
--- a/llvm/test/CMakeLists.txt
+++ b/llvm/test/CMakeLists.txt
@@ -30,6 +30,7 @@ llvm_canonicalize_cmake_booleans(
   LLVM_INCLUDE_SPIRV_TOOLS_TESTS
   LLVM_APPEND_VC_REV
   LLVM_HAS_LOGF128
+  LLVM_ENABLE_ONDISK_CAS
   )
 
 configure_lit_site_cfg(
@@ -81,6 +82,7 @@ set(LLVM_TEST_DEPENDS
   llvm-bcanalyzer
   llvm-bitcode-strip
   llvm-c-test
+  llvm-cas
   llvm-cat
   llvm-cfi-verify
   llvm-cgdata

diff  --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index 35ea8b84d7ec1..d30cd2002d36c 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -233,6 +233,7 @@ def get_asan_rtlib():
         "llvm-addr2line",
         "llvm-bcanalyzer",
         "llvm-bitcode-strip",
+        "llvm-cas",
         "llvm-cgdata",
         "llvm-config",
         "llvm-cov",
@@ -796,6 +797,9 @@ def host_unwind_supports_jit():
 if config.expensive_checks:
     config.available_features.add("expensive_checks")
 
+if config.have_ondisk_cas:
+    config.available_features.add("ondisk_cas")
+
 if "MemoryWithOrigins" in config.llvm_use_sanitizer:
     config.available_features.add("use_msan_with_origins")
 

diff  --git a/llvm/test/lit.site.cfg.py.in b/llvm/test/lit.site.cfg.py.in
index 973e0ec934a52..c5cb7160a3d40 100644
--- a/llvm/test/lit.site.cfg.py.in
+++ b/llvm/test/lit.site.cfg.py.in
@@ -66,6 +66,7 @@ config.spirv_tools_tests = @LLVM_INCLUDE_SPIRV_TOOLS_TESTS@
 config.have_vc_rev = @LLVM_APPEND_VC_REV@
 config.force_vc_rev = "@LLVM_FORCE_VC_REVISION@"
 config.has_logf128 = @LLVM_HAS_LOGF128@
+config.have_ondisk_cas = @LLVM_ENABLE_ONDISK_CAS@
 
 import lit.llvm
 lit.llvm.initialize(lit_config, config)

diff  --git a/llvm/test/tools/llvm-cas/Inputs/oneline b/llvm/test/tools/llvm-cas/Inputs/oneline
new file mode 100644
index 0000000000000..d95f3ad14dee6
--- /dev/null
+++ b/llvm/test/tools/llvm-cas/Inputs/oneline
@@ -0,0 +1 @@
+content

diff  --git a/llvm/test/tools/llvm-cas/Inputs/oneline-nonewline b/llvm/test/tools/llvm-cas/Inputs/oneline-nonewline
new file mode 100644
index 0000000000000..6b584e8ece562
--- /dev/null
+++ b/llvm/test/tools/llvm-cas/Inputs/oneline-nonewline
@@ -0,0 +1 @@
+content
\ No newline at end of file

diff  --git a/llvm/test/tools/llvm-cas/action-cache.test b/llvm/test/tools/llvm-cas/action-cache.test
new file mode 100644
index 0000000000000..fcb212c24e215
--- /dev/null
+++ b/llvm/test/tools/llvm-cas/action-cache.test
@@ -0,0 +1,14 @@
+RUN: rm -rf %t %t.cas
+RUN: mkdir %t
+
+RUN: llvm-cas --cas %t.cas --make-blob \
+RUN:   --data %S/Inputs/oneline >%t/oneline.casid
+RUN: llvm-cas --cas %t.cas --make-blob \
+RUN:   --data %S/Inputs/oneline-nonewline >%t/oneline-nonewline.casid
+
+RUN: llvm-cas --cas %t.cas --put-cache-key @%t/oneline.casid @%t/oneline-nonewline.casid
+RUN: llvm-cas --cas %t.cas --get-cache-result @%t/oneline.casid > %t/result.casid
+RUN: 
diff  %t/oneline-nonewline.casid %t/result.casid
+
+RUN: not llvm-cas --cas %t.cas --get-cache-result @%t/oneline-nonewline.casid 2>&1 | FileCheck %s
+CHECK: result not found

diff  --git a/llvm/test/tools/llvm-cas/cache.test b/llvm/test/tools/llvm-cas/cache.test
new file mode 100644
index 0000000000000..f0ce69190d418
--- /dev/null
+++ b/llvm/test/tools/llvm-cas/cache.test
@@ -0,0 +1,14 @@
+RUN: rm -rf %t %t.cas
+RUN: mkdir %t
+
+RUN: llvm-cas --cas %t.cas --make-blob \
+RUN:   --data /dev/null > %t/empty.casid
+RUN: echo "abc" | \
+RUN:   llvm-cas --cas %t.cas --make-blob \
+RUN:   --data - >%t/abc.casid
+
+RUN: llvm-cas --cas %t/cas --put-cache-key @%t/abc.casid @%t/empty.casid
+RUN: llvm-cas --cas %t/cas --get-cache-result @%t/abc.casid > %t/empty2.casid
+RUN: 
diff  %t/empty.casid %t/empty2.casid
+
+RUN: not llvm-cas --cas %t/cas --get-cache-result @%t/empty.casid

diff  --git a/llvm/test/tools/llvm-cas/dump.test b/llvm/test/tools/llvm-cas/dump.test
new file mode 100644
index 0000000000000..f23bac6cdf849
--- /dev/null
+++ b/llvm/test/tools/llvm-cas/dump.test
@@ -0,0 +1,27 @@
+RUN: rm -rf %t
+RUN: mkdir %t
+
+RUN: llvm-cas --cas %t/cas --make-blob \
+RUN:   --data - </dev/null
+
+RUN: llvm-cas --cas %t/cas --make-blob \
+RUN:   --data %s
+
+RUN: llvm-cas --cas %t/cas --dump | FileCheck %s
+
+// check the dump format.
+CHECK:      index:
+CHECK-NEXT: hash-num-bits=
+CHECK-NEXT: root addr=
+// it should has at least one index
+CHECK-NEXT: - index=
+
+// two records
+CHECK:      record
+CHECK-NEXT: - addr=
+CHECK-NEXT: - addr=
+
+// both should be small enough to be in data pool
+CHECK:      pool:
+CHECK-NEXT: - addr=
+CHECK-NEXT: - addr=

diff  --git a/llvm/test/tools/llvm-cas/lit.local.cfg b/llvm/test/tools/llvm-cas/lit.local.cfg
new file mode 100644
index 0000000000000..379945b68925d
--- /dev/null
+++ b/llvm/test/tools/llvm-cas/lit.local.cfg
@@ -0,0 +1,2 @@
+if not config.have_ondisk_cas:
+    config.unsupported = True

diff  --git a/llvm/test/tools/llvm-cas/make-blob.test b/llvm/test/tools/llvm-cas/make-blob.test
new file mode 100644
index 0000000000000..532a3a3351f80
--- /dev/null
+++ b/llvm/test/tools/llvm-cas/make-blob.test
@@ -0,0 +1,41 @@
+RUN: rm -rf %t %t.cas
+RUN: mkdir %t
+
+RUN: llvm-cas --cas %t.cas --make-blob \
+RUN:   --data - </dev/null >%t/empty.casid
+RUN: sed -e 's,^.,CHECK: ,' <%t/empty.casid >%t/empty.check
+RUN: llvm-cas --cas %t.cas --make-blob \
+RUN:   --data /dev/null | FileCheck %t/empty.check
+RUN: echo "abc" | \
+RUN:   llvm-cas --cas %t.cas --make-blob \
+RUN:   --data - >%t/abc.casid
+RUN: llvm-cas --cas %t.cas --make-blob \
+RUN:   --data %S/Inputs/oneline >%t/oneline.casid
+RUN: llvm-cas --cas %t.cas --make-blob \
+RUN:   --data %S/Inputs/oneline-nonewline >%t/oneline-nonewline.casid
+
+RUN: llvm-cas --cas %t.cas --cat-node-data @%t/empty.casid |\
+RUN:   FileCheck %s -check-prefix CHECK-EMPTY -allow-empty
+CHECK-EMPTY-NOT: {{.}}
+
+RUN: llvm-cas --cas %t.cas --cat-node-data @%t/abc.casid |\
+RUN:   FileCheck %s -check-prefix CHECK-ABC
+CHECK-ABC: abc
+
+RUN: llvm-cas --cas %t.cas --cat-node-data @%t/oneline-nonewline.casid |\
+RUN:   FileCheck %s -check-prefix CHECK-ONELINE
+RUN: llvm-cas --cas %t.cas --cat-node-data @%t/oneline.casid |\
+RUN:   FileCheck %s -check-prefix CHECK-ONELINE
+CHECK-ONELINE: content
+
+# Double-check newlines.
+RUN: llvm-cas --cas %t.cas --cat-node-data @%t/oneline-nonewline.casid \
+RUN:   >%t/oneline-nonewline
+RUN: 
diff  %S/Inputs/oneline-nonewline %t/oneline-nonewline
+RUN: llvm-cas --cas %t.cas --cat-node-data @%t/oneline.casid \
+RUN:   >%t/oneline
+RUN: 
diff  %S/Inputs/oneline %t/oneline
+
+# Validate
+RUN: llvm-cas --cas %t.cas --validate-object @%t/oneline-nonewline.casid
+RUN: llvm-cas --cas %t.cas --validate-object @%t/oneline.casid

diff  --git a/llvm/test/tools/llvm-cas/make-node.test b/llvm/test/tools/llvm-cas/make-node.test
new file mode 100644
index 0000000000000..de548af8fa2bf
--- /dev/null
+++ b/llvm/test/tools/llvm-cas/make-node.test
@@ -0,0 +1,37 @@
+RUN: rm -rf %t
+RUN: mkdir %t
+
+# Make some empty objects.
+RUN: llvm-cas --cas %t/cas --make-node \
+RUN:   --data - </dev/null >%t/empty.casid
+
+RUN: llvm-cas --cas %t/cas --cat-node-data @%t/empty.casid |\
+RUN:   FileCheck %s -check-prefix CHECK-EMPTY -allow-empty
+RUN: llvm-cas --cas %t/cas --ls-node-refs @%t/empty.casid |\
+RUN:   FileCheck %s -check-prefix CHECK-EMPTY -allow-empty
+CHECK-EMPTY-NOT: {{.}}
+
+# Make a complex object, which references existing ones. Reference a blob and
+# other objects, and reference one of them twice to be sure they don't get
+# deduped.
+RUN: llvm-cas --cas %t/cas --make-blob --data /dev/null \
+RUN:   >%t/empty-blob.casid
+RUN: cat %t/empty.casid %t/empty.casid %t/empty-blob.casid \
+RUN:   >%t/complex.refs
+RUN: cat %t/complex.refs | sed -e 's,^.,CHECK: ,' > %t/complex.check
+RUN: llvm-cas --cas %t/cas --make-node \
+RUN:   --data %S/Inputs/oneline @%t/complex.refs \
+RUN:   >%t/complex.casid
+RUN: llvm-cas --cas %t/cas --cat-node-data \
+RUN:   @%t/complex.casid | FileCheck %s -check-prefix COMPLEX-DATA
+RUN: llvm-cas --cas %t/cas --ls-node-refs @%t/complex.casid |\
+RUN:   FileCheck %t/complex.check
+COMPLEX-DATA: content
+
+RUN: llvm-cas --cas %t/cas --validate-object @%t/complex.casid
+
+# Import from a new CAS.
+RUN: llvm-cas --cas %t/new-cas --upstream-cas %t/cas --import @%t/complex.casid
+RUN: llvm-cas --cas %t/new-cas --cat-node-data \
+RUN:   @%t/complex.casid | FileCheck %s -check-prefix COMPLEX-DATA
+RUN: llvm-cas --cas %t/new-cas --validate

diff  --git a/llvm/test/tools/llvm-cas/print-id.test b/llvm/test/tools/llvm-cas/print-id.test
new file mode 100644
index 0000000000000..5a2efd58dde11
--- /dev/null
+++ b/llvm/test/tools/llvm-cas/print-id.test
@@ -0,0 +1,13 @@
+RUN: rm -rf %t
+RUN: mkdir %t
+
+RUN: llvm-cas --cas %t/cas --make-blob --data %S/Inputs/oneline > %t/id
+
+# Confirm that the ID has the right prefix, is well-formed, and that there's
+# nothing else on the line.
+RUN: FileCheck %s --match-full-lines --strict-whitespace <%t/id
+CHECK:llvmcas://{{[a-z0-9]+}}
+
+# Confirm that there's a newline after.
+RUN: wc -l <%t/id | FileCheck %s -check-prefix=NEWLINE
+NEWLINE: 1

diff  --git a/llvm/test/tools/llvm-cas/validation.test b/llvm/test/tools/llvm-cas/validation.test
new file mode 100644
index 0000000000000..13f24f0873463
--- /dev/null
+++ b/llvm/test/tools/llvm-cas/validation.test
@@ -0,0 +1,31 @@
+RUN: rm -rf %t
+RUN: mkdir %t
+
+# Ingest a blob which just fits inside the CAS data pool to make sure the validate passes.
+RUN: truncate -s 7 %t/file
+RUN: cat %t/file | \
+RUN:   llvm-cas --cas %t/cas  --make-blob \
+RUN:   --data -
+RUN: llvm-cas --cas %t/cas --validate --check-hash
+
+RUN: llvm-cas --cas %t/cas --validate
+RUN: llvm-cas --cas %t/cas --validate --check-hash
+
+RUN: rm %t/cas/v1.1/data.v1
+RUN: not llvm-cas --cas %t/cas --validate
+RUN: not llvm-cas --cas %t/cas --validate --check-hash
+
+RUN: mkdir %t/ac
+
+RUN: llvm-cas --cas %t/ac --make-blob \
+RUN:   --data /dev/null > %t/empty.casid
+RUN: echo "abc" | \
+RUN:   llvm-cas --cas %t/ac  --make-blob \
+RUN:   --data - >%t/abc.casid
+
+RUN: llvm-cas --cas %t/ac --put-cache-key @%t/abc.casid @%t/empty.casid
+RUN: llvm-cas --cas %t/ac --validate
+# Note: records are 40 bytes (32 hash bytes + 8 byte value), so trim the last
+# allocated record, leaving it invalid.
+RUN: truncate -s -40 %t/ac/v1.1/actions.v1
+RUN: not llvm-cas --cas %t/ac --validate

diff  --git a/llvm/tools/llvm-cas/CMakeLists.txt b/llvm/tools/llvm-cas/CMakeLists.txt
new file mode 100644
index 0000000000000..e9d40cb49e015
--- /dev/null
+++ b/llvm/tools/llvm-cas/CMakeLists.txt
@@ -0,0 +1,17 @@
+set(LLVM_TARGET_DEFINITIONS Options.td)
+tablegen(LLVM Options.inc -gen-opt-parser-defs)
+add_public_tablegen_target(LLVMCASToolTableGen)
+
+set(LLVM_LINK_COMPONENTS
+  Support
+  CAS
+  Option
+  )
+
+add_llvm_tool(llvm-cas
+  llvm-cas.cpp
+
+  DEPENDS
+  ${tablegen_deps}
+  LLVMCASToolTableGen
+  )

diff  --git a/llvm/tools/llvm-cas/Options.td b/llvm/tools/llvm-cas/Options.td
new file mode 100644
index 0000000000000..5ae64c104fdb6
--- /dev/null
+++ b/llvm/tools/llvm-cas/Options.td
@@ -0,0 +1,63 @@
+include "llvm/Option/OptParser.td"
+
+class F<string name> : Flag<["--", "-"], name>;
+
+def grp_action : OptionGroup<"Actions">, HelpText<"llvm-cas actions">;
+
+def help : F<"help">, HelpText<"Prints this help output">;
+def : Flag<["-"], "h">, Alias<help>, HelpText<"Alias for --help">;
+
+// Tool actions
+
+def cas_dump : F<"dump">, HelpText<"Dump internal contents">, Group<grp_action>;
+def cat_node_data : F<"cat-node-data">,
+                    HelpText<"Cat node data">,
+                    Group<grp_action>;
+def make_blob : F<"make-blob">, HelpText<"Make blob">, Group<grp_action>;
+def make_node : F<"make-node">, HelpText<"Make node">, Group<grp_action>;
+def ls_node_refs : F<"ls-node-refs">,
+                   HelpText<"List node refs">,
+                   Group<grp_action>;
+def import : F<"import">,
+             HelpText<"Import objects from another CAS">,
+             Group<grp_action>;
+def put_cache_key : F<"put-cache-key">,
+                    HelpText<"Set a value for a cache key">,
+                    Group<grp_action>;
+def get_cache_result : F<"get-cache-result">,
+                       HelpText<"Get the result value from a cache key">,
+                       Group<grp_action>;
+def validate : F<"validate">,
+               HelpText<"Validate ObjectStore">,
+               Group<grp_action>;
+def validate_object : F<"validate-object">,
+                      HelpText<"Validate the object for CASID">,
+                      Group<grp_action>;
+def validate_if_needed : F<"validate-if-needed">,
+                         HelpText<"Validate cas contents if needed">,
+                         Group<grp_action>;
+def prune : F<"prune">, HelpText<"Prune local cas storage">, Group<grp_action>;
+
+// Tool options
+
+def cas_path : Separate<["-", "--"], "cas">,
+               MetaVarName<"<path>">,
+               HelpText<"Path to CAS on disk">;
+
+def upstream_cas : Separate<["-", "--"], "upstream-cas">,
+                   MetaVarName<"<path>">,
+                   HelpText<"Path to another upstream CAS">;
+
+def data : Separate<["-", "--"], "data">,
+           MetaVarName<"<path>">,
+           HelpText<"Path to data or '-' for stdin">;
+
+def check_hash : F<"check-hash">,
+                 HelpText<"Check all hashes during validation">;
+
+def allow_recovery : F<"allow-recovery">,
+                     HelpText<"Allow recovery of CAS data">;
+
+def force : F<"force">, HelpText<"Force validation even if unnecessary">;
+
+def in_process : F<"in-process">, HelpText<"Validation in-process">;

diff  --git a/llvm/tools/llvm-cas/llvm-cas.cpp b/llvm/tools/llvm-cas/llvm-cas.cpp
new file mode 100644
index 0000000000000..e72ee470d2319
--- /dev/null
+++ b/llvm/tools/llvm-cas/llvm-cas.cpp
@@ -0,0 +1,405 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file A utility for operating on LLVM CAS.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CAS/ActionCache.h"
+#include "llvm/CAS/BuiltinUnifiedCASDatabases.h"
+#include "llvm/CAS/ObjectStore.h"
+#include "llvm/Option/Arg.h"
+#include "llvm/Option/ArgList.h"
+#include "llvm/Option/Option.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/InitLLVM.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace llvm::cas;
+
+namespace {
+enum ID {
+  OPT_INVALID = 0, // This is not an option ID.
+#define OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__),
+#include "Options.inc"
+#undef OPTION
+};
+
+#define OPTTABLE_STR_TABLE_CODE
+#include "Options.inc"
+#undef OPTTABLE_STR_TABLE_CODE
+
+#define OPTTABLE_PREFIXES_TABLE_CODE
+#include "Options.inc"
+#undef OPTTABLE_PREFIXES_TABLE_CODE
+
+using namespace llvm::opt;
+static constexpr opt::OptTable::Info InfoTable[] = {
+#define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__),
+#include "Options.inc"
+#undef OPTION
+};
+
+class LLVMCASOptTable : public opt::GenericOptTable {
+public:
+  LLVMCASOptTable()
+      : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {}
+};
+
+enum class CommandKind {
+  Invalid,
+  Dump,
+  CatNodeData,
+  MakeBlob,
+  MakeNode,
+  ListObjectReferences,
+  Import,
+  PutCacheKey,
+  GetCacheResult,
+  Validate,
+  ValidateObject,
+  ValidateIfNeeded,
+  Prune,
+};
+
+struct CommandOptions {
+  CommandKind Command = CommandKind::Invalid;
+  std::vector<std::string> Inputs;
+  std::string CASPath;
+  std::string UpstreamCASPath;
+  std::string DataPath;
+  bool CheckHash;
+  bool AllowRecovery;
+  bool Force;
+  bool InProcess;
+
+  static CommandKind getCommandKind(opt::Arg &A) {
+    switch (A.getOption().getID()) {
+    case OPT_cas_dump:
+      return CommandKind::Dump;
+    case OPT_cat_node_data:
+      return CommandKind::CatNodeData;
+    case OPT_make_blob:
+      return CommandKind::MakeBlob;
+    case OPT_make_node:
+      return CommandKind::MakeNode;
+    case OPT_ls_node_refs:
+      return CommandKind::ListObjectReferences;
+    case OPT_import:
+      return CommandKind::Import;
+    case OPT_put_cache_key:
+      return CommandKind::PutCacheKey;
+    case OPT_get_cache_result:
+      return CommandKind::GetCacheResult;
+    case OPT_validate:
+      return CommandKind::Validate;
+    case OPT_validate_object:
+      return CommandKind::ValidateObject;
+    case OPT_validate_if_needed:
+      return CommandKind::ValidateIfNeeded;
+    case OPT_prune:
+      return CommandKind::Prune;
+    }
+    return CommandKind::Invalid;
+  }
+
+  // Command requires input.
+  static bool requiresInput(CommandKind Kind) {
+    return Kind != CommandKind::ValidateIfNeeded &&
+           Kind != CommandKind::Validate && Kind != CommandKind::MakeBlob &&
+           Kind != CommandKind::MakeNode && Kind != CommandKind::Dump &&
+           Kind != CommandKind::Prune;
+  }
+};
+} // namespace
+
+static int dump(ObjectStore &CAS);
+static int listObjectReferences(ObjectStore &CAS, const CASID &ID);
+static int catNodeData(ObjectStore &CAS, const CASID &ID);
+static int makeBlob(ObjectStore &CAS, StringRef DataPath);
+static int makeNode(ObjectStore &CAS, ArrayRef<std::string> References,
+                    StringRef DataPath);
+static int import(ObjectStore &FromCAS, ObjectStore &ToCAS,
+                  ArrayRef<std::string> Objects);
+static int putCacheKey(ObjectStore &CAS, ActionCache &AC,
+                       ArrayRef<std::string> Objects);
+static int getCacheResult(ObjectStore &CAS, ActionCache &AC, const CASID &ID);
+static int validateObject(ObjectStore &CAS, const CASID &ID);
+static int validate(ObjectStore &CAS, ActionCache &AC, bool CheckHash);
+static int validateIfNeeded(StringRef Path, bool CheckHash, bool Force,
+                            bool AllowRecovery, bool InProcess,
+                            const char *Argv0);
+static int prune(cas::ObjectStore &CAS);
+
+static Expected<CommandOptions> parseOptions(int Argc, char **Argv) {
+  BumpPtrAllocator Alloc;
+  StringSaver Saver(Alloc);
+  SmallVector<const char *> ExpanedArgs;
+  if (!cl::expandResponseFiles(Argc, Argv, nullptr, Saver, ExpanedArgs))
+    return createStringError("cannot expand response file");
+
+  LLVMCASOptTable T;
+  unsigned MI, MC;
+  opt::InputArgList Args = T.ParseArgs(ExpanedArgs, MI, MC);
+
+  for (auto *Arg : Args.filtered(OPT_UNKNOWN)) {
+    llvm::errs() << "ignoring unknown option: " << Arg->getSpelling() << '\n';
+  }
+
+  if (Args.hasArg(OPT_help)) {
+    T.printHelp(
+        outs(),
+        (std::string(Argv[0]) + " [action] [options] <input files>").c_str(),
+        "llvm-cas tool that performs CAS actions.", false);
+    exit(0);
+  }
+
+  CommandOptions Opts;
+  for (auto *A : Args.filtered(OPT_grp_action))
+    Opts.Command = CommandOptions::getCommandKind(*A);
+
+  if (Opts.Command == CommandKind::Invalid)
+    return createStringError("no command action is specified");
+
+  for (auto *File : Args.filtered(OPT_INPUT))
+    Opts.Inputs.push_back(File->getValue());
+  Opts.CASPath = Args.getLastArgValue(OPT_cas_path);
+  Opts.UpstreamCASPath = Args.getLastArgValue(OPT_upstream_cas);
+  Opts.DataPath = Args.getLastArgValue(OPT_data);
+  Opts.CheckHash = Args.hasArg(OPT_check_hash);
+  Opts.AllowRecovery = Args.hasArg(OPT_allow_recovery);
+  Opts.Force = Args.hasArg(OPT_force);
+  Opts.InProcess = Args.hasArg(OPT_in_process);
+
+  // Validate options.
+  if (Opts.CASPath.empty())
+    return createStringError("missing --cas <path>");
+
+  if (Opts.Inputs.empty() && CommandOptions::requiresInput(Opts.Command))
+    return createStringError("missing <input> to operate on");
+
+  return Opts;
+}
+
+int main(int Argc, char **Argv) {
+  InitLLVM X(Argc, Argv);
+
+  ExitOnError ExitOnErr;
+  auto Opts = ExitOnErr(parseOptions(Argc, Argv));
+
+  if (Opts.Command == CommandKind::ValidateIfNeeded)
+    return validateIfNeeded(Opts.CASPath, Opts.CheckHash, Opts.Force,
+                            Opts.AllowRecovery, Opts.InProcess, Argv[0]);
+
+  auto [CAS, AC] = ExitOnErr(createOnDiskUnifiedCASDatabases(Opts.CASPath));
+  assert(CAS);
+
+  if (Opts.Command == CommandKind::Dump)
+    return dump(*CAS);
+
+  if (Opts.Command == CommandKind::Validate)
+    return validate(*CAS, *AC, Opts.CheckHash);
+
+  if (Opts.Command == CommandKind::MakeBlob)
+    return makeBlob(*CAS, Opts.DataPath);
+
+  if (Opts.Command == CommandKind::MakeNode)
+    return makeNode(*CAS, Opts.Inputs, Opts.DataPath);
+
+  if (Opts.Command == CommandKind::Prune)
+    return prune(*CAS);
+
+  if (Opts.Command == CommandKind::Import) {
+    if (Opts.UpstreamCASPath.empty())
+      ExitOnErr(createStringError("missing '-upstream-cas'"));
+
+    auto [UpstreamCAS, _] =
+        ExitOnErr(createOnDiskUnifiedCASDatabases(Opts.UpstreamCASPath));
+    return import(*UpstreamCAS, *CAS, Opts.Inputs);
+  }
+
+  if (Opts.Command == CommandKind::PutCacheKey ||
+      Opts.Command == CommandKind::GetCacheResult) {
+    if (!AC)
+      ExitOnErr(createStringError("no action-cache available"));
+  }
+
+  if (Opts.Command == CommandKind::PutCacheKey)
+    return putCacheKey(*CAS, *AC, Opts.Inputs);
+
+  // Remaining commands need exactly one CAS object.
+  if (Opts.Inputs.size() > 1)
+    ExitOnErr(createStringError("too many <object>s, expected 1"));
+  CASID ID = ExitOnErr(CAS->parseID(Opts.Inputs.front()));
+
+  if (Opts.Command == CommandKind::GetCacheResult)
+    return getCacheResult(*CAS, *AC, ID);
+
+  if (Opts.Command == CommandKind::ListObjectReferences)
+    return listObjectReferences(*CAS, ID);
+
+  if (Opts.Command == CommandKind::CatNodeData)
+    return catNodeData(*CAS, ID);
+
+  assert(Opts.Command == CommandKind::ValidateObject);
+  return validateObject(*CAS, ID);
+}
+
+static Expected<std::unique_ptr<MemoryBuffer>> openBuffer(StringRef DataPath) {
+  if (DataPath.empty())
+    return createStringError("--data missing");
+  return errorOrToExpected(DataPath == "-"
+                               ? llvm::MemoryBuffer::getSTDIN()
+                               : llvm::MemoryBuffer::getFile(DataPath));
+}
+
+int dump(ObjectStore &CAS) {
+  ExitOnError ExitOnErr("llvm-cas: dump: ");
+  CAS.print(llvm::outs());
+  return 0;
+}
+
+int makeBlob(ObjectStore &CAS, StringRef DataPath) {
+  ExitOnError ExitOnErr("llvm-cas: make-blob: ");
+  std::unique_ptr<MemoryBuffer> Buffer = ExitOnErr(openBuffer(DataPath));
+
+  ObjectProxy Blob = ExitOnErr(CAS.createProxy({}, Buffer->getBuffer()));
+  llvm::outs() << Blob.getID() << "\n";
+  return 0;
+}
+
+int catNodeData(ObjectStore &CAS, const CASID &ID) {
+  ExitOnError ExitOnErr("llvm-cas: cat-node-data: ");
+  llvm::outs() << ExitOnErr(CAS.getProxy(ID)).getData();
+  return 0;
+}
+
+int listObjectReferences(ObjectStore &CAS, const CASID &ID) {
+  ExitOnError ExitOnErr("llvm-cas: ls-node-refs: ");
+
+  ObjectProxy Object = ExitOnErr(CAS.getProxy(ID));
+  ExitOnErr(Object.forEachReference([&](ObjectRef Ref) -> Error {
+    llvm::outs() << CAS.getID(Ref) << "\n";
+    return Error::success();
+  }));
+
+  return 0;
+}
+
+static int makeNode(ObjectStore &CAS, ArrayRef<std::string> Objects,
+                    StringRef DataPath) {
+  std::unique_ptr<MemoryBuffer> Data =
+      ExitOnError("llvm-cas: make-node: data: ")(openBuffer(DataPath));
+
+  SmallVector<ObjectRef> IDs;
+  for (StringRef Object : Objects) {
+    ExitOnError ObjectErr("llvm-cas: make-node: ref: ");
+    std::optional<ObjectRef> ID =
+        CAS.getReference(ObjectErr(CAS.parseID(Object)));
+    if (!ID)
+      ObjectErr(createStringError("unknown object '" + Object + "'"));
+    IDs.push_back(*ID);
+  }
+
+  ExitOnError ExitOnErr("llvm-cas: make-node: ");
+  ObjectProxy Object = ExitOnErr(CAS.createProxy(IDs, Data->getBuffer()));
+  llvm::outs() << Object.getID() << "\n";
+  return 0;
+}
+
+static int import(ObjectStore &FromCAS, ObjectStore &ToCAS,
+                  ArrayRef<std::string> Objects) {
+  ExitOnError ExitOnErr("llvm-cas: import: ");
+
+  for (StringRef Object : Objects) {
+    CASID ID = ExitOnErr(FromCAS.parseID(Object));
+    auto Ref = FromCAS.getReference(ID);
+    if (!Ref)
+      ExitOnErr(createStringError("input not found: " + ID.toString()));
+
+    auto Imported = ExitOnErr(ToCAS.importObject(FromCAS, *Ref));
+    llvm::outs() << ToCAS.getID(Imported).toString() << "\n";
+  }
+  return 0;
+}
+
+static int putCacheKey(ObjectStore &CAS, ActionCache &AC,
+                       ArrayRef<std::string> Objects) {
+  ExitOnError ExitOnErr("llvm-cas: put-cache-key: ");
+
+  if (Objects.size() % 2 != 0)
+    ExitOnErr(createStringError("expected pairs of inputs"));
+  while (!Objects.empty()) {
+    CASID Key = ExitOnErr(CAS.parseID(Objects[0]));
+    CASID Result = ExitOnErr(CAS.parseID(Objects[1]));
+    Objects = Objects.drop_front(2);
+    ExitOnErr(AC.put(Key, Result));
+  }
+  return 0;
+}
+
+static int getCacheResult(ObjectStore &CAS, ActionCache &AC, const CASID &ID) {
+  ExitOnError ExitOnErr("llvm-cas: get-cache-result: ");
+
+  auto Result = ExitOnErr(AC.get(ID));
+  if (!Result) {
+    outs() << "result not found\n";
+    return 1;
+  }
+  outs() << *Result << "\n";
+  return 0;
+}
+
+int validateObject(ObjectStore &CAS, const CASID &ID) {
+  ExitOnError ExitOnErr("llvm-cas: validate-object: ");
+  ExitOnErr(CAS.validateObject(ID));
+  outs() << ID << ": validated successfully\n";
+  return 0;
+}
+
+int validate(ObjectStore &CAS, ActionCache &AC, bool CheckHash) {
+  ExitOnError ExitOnErr("llvm-cas: validate: ");
+  ExitOnErr(CAS.validate(CheckHash));
+  ExitOnErr(AC.validate());
+  outs() << "validated successfully\n";
+  return 0;
+}
+
+int validateIfNeeded(StringRef Path, bool CheckHash, bool Force,
+                     bool AllowRecovery, bool InProcess, const char *Argv0) {
+  ExitOnError ExitOnErr("llvm-cas: validate-if-needed: ");
+  std::string ExecStorage;
+  std::optional<StringRef> Exec;
+  if (!InProcess) {
+    ExecStorage = sys::fs::getMainExecutable(Argv0, (void *)validateIfNeeded);
+    Exec = ExecStorage;
+  }
+  ValidationResult Result = ExitOnErr(validateOnDiskUnifiedCASDatabasesIfNeeded(
+      Path, CheckHash, AllowRecovery, Force, Exec));
+  switch (Result) {
+  case ValidationResult::Valid:
+    outs() << "validated successfully\n";
+    break;
+  case ValidationResult::Recovered:
+    outs() << "recovered from invalid data\n";
+    break;
+  case ValidationResult::Skipped:
+    outs() << "validation skipped\n";
+    break;
+  }
+  return 0;
+}
+
+static int prune(cas::ObjectStore &CAS) {
+  ExitOnError ExitOnErr("llvm-cas: prune: ");
+  ExitOnErr(CAS.pruneStorageData());
+  return 0;
+}


        


More information about the llvm-commits mailing list