[llvm] [CAS] Add LLVMCAS library with InMemoryCAS implementation (PR #114096)

Steven Wu via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 1 13:26:20 PDT 2024


================
@@ -0,0 +1,360 @@
+//===- ObjectStoreTest.cpp ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CAS/ObjectStore.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/ThreadPool.h"
+#include "llvm/Testing/Support/Error.h"
+#include "gtest/gtest.h"
+
+#include "CASTestConfig.h"
+
+using namespace llvm;
+using namespace llvm::cas;
+
+TEST_P(CASTest, PrintIDs) {
+  std::unique_ptr<ObjectStore> CAS = createObjectStore();
+
+  std::optional<CASID> ID1, ID2;
+  ASSERT_THAT_ERROR(CAS->createProxy(std::nullopt, "1").moveInto(ID1),
+                    Succeeded());
+  ASSERT_THAT_ERROR(CAS->createProxy(std::nullopt, "2").moveInto(ID2),
+                    Succeeded());
+  EXPECT_NE(ID1, ID2);
+  std::string PrintedID1 = ID1->toString();
+  std::string PrintedID2 = ID2->toString();
+  EXPECT_NE(PrintedID1, PrintedID2);
+
+  std::optional<CASID> ParsedID1, ParsedID2;
+  ASSERT_THAT_ERROR(CAS->parseID(PrintedID1).moveInto(ParsedID1), Succeeded());
+  ASSERT_THAT_ERROR(CAS->parseID(PrintedID2).moveInto(ParsedID2), Succeeded());
+  EXPECT_EQ(ID1, ParsedID1);
+  EXPECT_EQ(ID2, ParsedID2);
+}
+
+TEST_P(CASTest, Blobs) {
+  std::unique_ptr<ObjectStore> CAS1 = createObjectStore();
+  StringRef ContentStrings[] = {
+      "word",
+      "some longer text std::string's local memory",
+      R"(multiline text multiline text multiline text multiline text
+multiline text multiline text multiline text multiline text multiline text
+multiline text multiline text multiline text multiline text multiline text
+multiline text multiline text multiline text multiline text multiline text
+multiline text multiline text multiline text multiline text multiline text
+multiline text multiline text multiline text multiline text multiline text)",
+  };
+
+  SmallVector<CASID> IDs;
+  for (StringRef Content : ContentStrings) {
+    // Use StringRef::str() to create a temporary std::string. This could cause
+    // problems if the CAS is storing references to the input string instead of
+    // copying it.
+    std::optional<ObjectProxy> Blob;
+    ASSERT_THAT_ERROR(CAS1->createProxy(std::nullopt, Content).moveInto(Blob),
+                      Succeeded());
+    IDs.push_back(Blob->getID());
+
+    // Check basic printing of IDs.
+    EXPECT_EQ(IDs.back().toString(), IDs.back().toString());
+    if (IDs.size() > 2)
+      EXPECT_NE(IDs.front().toString(), IDs.back().toString());
+  }
+
+  // Check that the blobs give the same IDs later.
+  for (int I = 0, E = IDs.size(); I != E; ++I) {
+    std::optional<ObjectProxy> Blob;
+    ASSERT_THAT_ERROR(
+        CAS1->createProxy(std::nullopt, ContentStrings[I]).moveInto(Blob),
+        Succeeded());
+    EXPECT_EQ(IDs[I], Blob->getID());
+  }
+
+  // Run validation on all CASIDs.
+  for (int I = 0, E = IDs.size(); I != E; ++I)
+    ASSERT_THAT_ERROR(CAS1->validate(IDs[I]), Succeeded());
+
+  // Check that the blobs can be retrieved multiple times.
+  for (int I = 0, E = IDs.size(); I != E; ++I) {
+    for (int J = 0, JE = 3; J != JE; ++J) {
+      std::optional<ObjectProxy> Buffer;
+      ASSERT_THAT_ERROR(CAS1->getProxy(IDs[I]).moveInto(Buffer), Succeeded());
+      EXPECT_EQ(ContentStrings[I], Buffer->getData());
+    }
+  }
+
+  // Confirm these blobs don't exist in a fresh CAS instance.
+  std::unique_ptr<ObjectStore> CAS2 = createObjectStore();
+  for (int I = 0, E = IDs.size(); I != E; ++I) {
+    std::optional<ObjectProxy> Proxy;
+    EXPECT_THAT_ERROR(CAS2->getProxy(IDs[I]).moveInto(Proxy), Failed());
+  }
+
+  // Insert into the second CAS and confirm the IDs are stable. Getting them
+  // should work now.
+  for (int I = IDs.size(), E = 0; I != E; --I) {
+    auto &ID = IDs[I - 1];
+    auto &Content = ContentStrings[I - 1];
+    std::optional<ObjectProxy> Blob;
+    ASSERT_THAT_ERROR(CAS2->createProxy(std::nullopt, Content).moveInto(Blob),
+                      Succeeded());
+    EXPECT_EQ(ID, Blob->getID());
+
+    std::optional<ObjectProxy> Buffer;
+    ASSERT_THAT_ERROR(CAS2->getProxy(ID).moveInto(Buffer), Succeeded());
+    EXPECT_EQ(Content, Buffer->getData());
+  }
+}
+
+TEST_P(CASTest, BlobsBig) {
+  // A little bit of validation that bigger blobs are okay. Climb up to 1MB.
+  std::unique_ptr<ObjectStore> CAS = createObjectStore();
+  SmallString<256> String1 = StringRef("a few words");
+  SmallString<256> String2 = StringRef("others");
+  while (String1.size() < 1024U * 1024U) {
+    std::optional<CASID> ID1;
+    std::optional<CASID> ID2;
+    ASSERT_THAT_ERROR(CAS->createProxy(std::nullopt, String1).moveInto(ID1),
+                      Succeeded());
+    ASSERT_THAT_ERROR(CAS->createProxy(std::nullopt, String1).moveInto(ID2),
+                      Succeeded());
+    ASSERT_THAT_ERROR(CAS->validate(*ID1), Succeeded());
+    ASSERT_THAT_ERROR(CAS->validate(*ID2), Succeeded());
+    ASSERT_EQ(ID1, ID2);
+
+    String1.append(String2);
+    ASSERT_THAT_ERROR(CAS->createProxy(std::nullopt, String2).moveInto(ID1),
+                      Succeeded());
+    ASSERT_THAT_ERROR(CAS->createProxy(std::nullopt, String2).moveInto(ID2),
+                      Succeeded());
+    ASSERT_THAT_ERROR(CAS->validate(*ID1), Succeeded());
+    ASSERT_THAT_ERROR(CAS->validate(*ID2), Succeeded());
+    ASSERT_EQ(ID1, ID2);
+    String2.append(String1);
+  }
+
+  // Specifically check near 1MB for objects large enough they're likely to be
+  // stored externally in an on-disk CAS and will be near a page boundary.
+  SmallString<0> Storage;
+  const size_t InterestingSize = 1024U * 1024ULL;
+  const size_t SizeE = InterestingSize + 2;
+  if (Storage.size() < SizeE)
+    Storage.resize(SizeE, '\01');
+  for (size_t Size = InterestingSize - 2; Size != SizeE; ++Size) {
+    StringRef Data(Storage.data(), Size);
+    std::optional<ObjectProxy> Blob;
+    ASSERT_THAT_ERROR(CAS->createProxy(std::nullopt, Data).moveInto(Blob),
+                      Succeeded());
+    ASSERT_EQ(Data, Blob->getData());
+    ASSERT_EQ(0, Blob->getData().end()[0]);
+  }
+}
+
+TEST_P(CASTest, LeafNodes) {
+  std::unique_ptr<ObjectStore> CAS1 = createObjectStore();
+  StringRef ContentStrings[] = {
+      "word",
+      "some longer text std::string's local memory",
+      R"(multiline text multiline text multiline text multiline text
+multiline text multiline text multiline text multiline text multiline text
+multiline text multiline text multiline text multiline text multiline text
+multiline text multiline text multiline text multiline text multiline text
+multiline text multiline text multiline text multiline text multiline text
+multiline text multiline text multiline text multiline text multiline text)",
+  };
+
+  SmallVector<ObjectRef> Nodes;
+  SmallVector<CASID> IDs;
+  for (StringRef Content : ContentStrings) {
+    // Use StringRef::str() to create a temporary std::string. This could cause
+    // problems if the CAS is storing references to the input string instead of
+    // copying it.
+    std::optional<ObjectRef> Node;
+    ASSERT_THAT_ERROR(
+        CAS1->store(std::nullopt, arrayRefFromStringRef<char>(Content))
+            .moveInto(Node),
+        Succeeded());
+    Nodes.push_back(*Node);
+
+    // Check basic printing of IDs.
+    IDs.push_back(CAS1->getID(*Node));
+    EXPECT_EQ(IDs.back().toString(), IDs.back().toString());
+    EXPECT_EQ(Nodes.front(), Nodes.front());
+    EXPECT_EQ(Nodes.back(), Nodes.back());
+    EXPECT_EQ(IDs.front(), IDs.front());
+    EXPECT_EQ(IDs.back(), IDs.back());
----------------
cachemeifyoucan wrote:

Good point. I switch it up a bit so it looks more meaningful. 

https://github.com/llvm/llvm-project/pull/114096


More information about the llvm-commits mailing list