[llvm] [CAS] Add LLVMCAS library with InMemoryCAS implementation (PR #114096)
Steven Wu via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 1 13:26:20 PDT 2024
================
@@ -0,0 +1,360 @@
+//===- ObjectStoreTest.cpp ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CAS/ObjectStore.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/ThreadPool.h"
+#include "llvm/Testing/Support/Error.h"
+#include "gtest/gtest.h"
+
+#include "CASTestConfig.h"
+
+using namespace llvm;
+using namespace llvm::cas;
+
+TEST_P(CASTest, PrintIDs) {
+ std::unique_ptr<ObjectStore> CAS = createObjectStore();
+
+ std::optional<CASID> ID1, ID2;
+ ASSERT_THAT_ERROR(CAS->createProxy(std::nullopt, "1").moveInto(ID1),
+ Succeeded());
+ ASSERT_THAT_ERROR(CAS->createProxy(std::nullopt, "2").moveInto(ID2),
+ Succeeded());
+ EXPECT_NE(ID1, ID2);
+ std::string PrintedID1 = ID1->toString();
+ std::string PrintedID2 = ID2->toString();
+ EXPECT_NE(PrintedID1, PrintedID2);
+
+ std::optional<CASID> ParsedID1, ParsedID2;
+ ASSERT_THAT_ERROR(CAS->parseID(PrintedID1).moveInto(ParsedID1), Succeeded());
+ ASSERT_THAT_ERROR(CAS->parseID(PrintedID2).moveInto(ParsedID2), Succeeded());
+ EXPECT_EQ(ID1, ParsedID1);
+ EXPECT_EQ(ID2, ParsedID2);
+}
+
+TEST_P(CASTest, Blobs) {
+ std::unique_ptr<ObjectStore> CAS1 = createObjectStore();
+ StringRef ContentStrings[] = {
+ "word",
+ "some longer text std::string's local memory",
+ R"(multiline text multiline text multiline text multiline text
+multiline text multiline text multiline text multiline text multiline text
+multiline text multiline text multiline text multiline text multiline text
+multiline text multiline text multiline text multiline text multiline text
+multiline text multiline text multiline text multiline text multiline text
+multiline text multiline text multiline text multiline text multiline text)",
+ };
+
+ SmallVector<CASID> IDs;
+ for (StringRef Content : ContentStrings) {
+ // Use StringRef::str() to create a temporary std::string. This could cause
+ // problems if the CAS is storing references to the input string instead of
+ // copying it.
+ std::optional<ObjectProxy> Blob;
+ ASSERT_THAT_ERROR(CAS1->createProxy(std::nullopt, Content).moveInto(Blob),
+ Succeeded());
+ IDs.push_back(Blob->getID());
+
+ // Check basic printing of IDs.
+ EXPECT_EQ(IDs.back().toString(), IDs.back().toString());
+ if (IDs.size() > 2)
+ EXPECT_NE(IDs.front().toString(), IDs.back().toString());
+ }
+
+ // Check that the blobs give the same IDs later.
+ for (int I = 0, E = IDs.size(); I != E; ++I) {
+ std::optional<ObjectProxy> Blob;
+ ASSERT_THAT_ERROR(
+ CAS1->createProxy(std::nullopt, ContentStrings[I]).moveInto(Blob),
+ Succeeded());
+ EXPECT_EQ(IDs[I], Blob->getID());
+ }
+
+ // Run validation on all CASIDs.
+ for (int I = 0, E = IDs.size(); I != E; ++I)
+ ASSERT_THAT_ERROR(CAS1->validate(IDs[I]), Succeeded());
+
+ // Check that the blobs can be retrieved multiple times.
+ for (int I = 0, E = IDs.size(); I != E; ++I) {
+ for (int J = 0, JE = 3; J != JE; ++J) {
+ std::optional<ObjectProxy> Buffer;
+ ASSERT_THAT_ERROR(CAS1->getProxy(IDs[I]).moveInto(Buffer), Succeeded());
+ EXPECT_EQ(ContentStrings[I], Buffer->getData());
+ }
+ }
+
+ // Confirm these blobs don't exist in a fresh CAS instance.
+ std::unique_ptr<ObjectStore> CAS2 = createObjectStore();
+ for (int I = 0, E = IDs.size(); I != E; ++I) {
+ std::optional<ObjectProxy> Proxy;
+ EXPECT_THAT_ERROR(CAS2->getProxy(IDs[I]).moveInto(Proxy), Failed());
+ }
+
+ // Insert into the second CAS and confirm the IDs are stable. Getting them
+ // should work now.
+ for (int I = IDs.size(), E = 0; I != E; --I) {
+ auto &ID = IDs[I - 1];
+ auto &Content = ContentStrings[I - 1];
+ std::optional<ObjectProxy> Blob;
+ ASSERT_THAT_ERROR(CAS2->createProxy(std::nullopt, Content).moveInto(Blob),
+ Succeeded());
+ EXPECT_EQ(ID, Blob->getID());
+
+ std::optional<ObjectProxy> Buffer;
+ ASSERT_THAT_ERROR(CAS2->getProxy(ID).moveInto(Buffer), Succeeded());
+ EXPECT_EQ(Content, Buffer->getData());
+ }
+}
+
+TEST_P(CASTest, BlobsBig) {
+ // A little bit of validation that bigger blobs are okay. Climb up to 1MB.
+ std::unique_ptr<ObjectStore> CAS = createObjectStore();
+ SmallString<256> String1 = StringRef("a few words");
+ SmallString<256> String2 = StringRef("others");
+ while (String1.size() < 1024U * 1024U) {
+ std::optional<CASID> ID1;
+ std::optional<CASID> ID2;
+ ASSERT_THAT_ERROR(CAS->createProxy(std::nullopt, String1).moveInto(ID1),
+ Succeeded());
+ ASSERT_THAT_ERROR(CAS->createProxy(std::nullopt, String1).moveInto(ID2),
+ Succeeded());
+ ASSERT_THAT_ERROR(CAS->validate(*ID1), Succeeded());
+ ASSERT_THAT_ERROR(CAS->validate(*ID2), Succeeded());
+ ASSERT_EQ(ID1, ID2);
+
+ String1.append(String2);
+ ASSERT_THAT_ERROR(CAS->createProxy(std::nullopt, String2).moveInto(ID1),
+ Succeeded());
+ ASSERT_THAT_ERROR(CAS->createProxy(std::nullopt, String2).moveInto(ID2),
+ Succeeded());
+ ASSERT_THAT_ERROR(CAS->validate(*ID1), Succeeded());
+ ASSERT_THAT_ERROR(CAS->validate(*ID2), Succeeded());
+ ASSERT_EQ(ID1, ID2);
+ String2.append(String1);
+ }
+
+ // Specifically check near 1MB for objects large enough they're likely to be
+ // stored externally in an on-disk CAS and will be near a page boundary.
+ SmallString<0> Storage;
+ const size_t InterestingSize = 1024U * 1024ULL;
+ const size_t SizeE = InterestingSize + 2;
+ if (Storage.size() < SizeE)
+ Storage.resize(SizeE, '\01');
+ for (size_t Size = InterestingSize - 2; Size != SizeE; ++Size) {
+ StringRef Data(Storage.data(), Size);
+ std::optional<ObjectProxy> Blob;
+ ASSERT_THAT_ERROR(CAS->createProxy(std::nullopt, Data).moveInto(Blob),
+ Succeeded());
+ ASSERT_EQ(Data, Blob->getData());
+ ASSERT_EQ(0, Blob->getData().end()[0]);
+ }
+}
+
+TEST_P(CASTest, LeafNodes) {
+ std::unique_ptr<ObjectStore> CAS1 = createObjectStore();
+ StringRef ContentStrings[] = {
+ "word",
+ "some longer text std::string's local memory",
+ R"(multiline text multiline text multiline text multiline text
+multiline text multiline text multiline text multiline text multiline text
+multiline text multiline text multiline text multiline text multiline text
+multiline text multiline text multiline text multiline text multiline text
+multiline text multiline text multiline text multiline text multiline text
+multiline text multiline text multiline text multiline text multiline text)",
+ };
+
+ SmallVector<ObjectRef> Nodes;
+ SmallVector<CASID> IDs;
+ for (StringRef Content : ContentStrings) {
+ // Use StringRef::str() to create a temporary std::string. This could cause
+ // problems if the CAS is storing references to the input string instead of
+ // copying it.
+ std::optional<ObjectRef> Node;
+ ASSERT_THAT_ERROR(
+ CAS1->store(std::nullopt, arrayRefFromStringRef<char>(Content))
+ .moveInto(Node),
+ Succeeded());
+ Nodes.push_back(*Node);
+
+ // Check basic printing of IDs.
+ IDs.push_back(CAS1->getID(*Node));
+ EXPECT_EQ(IDs.back().toString(), IDs.back().toString());
+ EXPECT_EQ(Nodes.front(), Nodes.front());
+ EXPECT_EQ(Nodes.back(), Nodes.back());
+ EXPECT_EQ(IDs.front(), IDs.front());
+ EXPECT_EQ(IDs.back(), IDs.back());
----------------
cachemeifyoucan wrote:
Good point. I switch it up a bit so it looks more meaningful.
https://github.com/llvm/llvm-project/pull/114096
More information about the llvm-commits
mailing list