[llvm] [CAS] Add LLVMCAS library with InMemoryCAS implementation (PR #114096)
Paul Kirth via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 8 15:59:10 PDT 2025
================
@@ -0,0 +1,359 @@
+//===- ObjectStoreTest.cpp ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CAS/ObjectStore.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/ThreadPool.h"
+#include "llvm/Testing/Support/Error.h"
+#include "gtest/gtest.h"
+
+#include "CASTestConfig.h"
+
+using namespace llvm;
+using namespace llvm::cas;
+
+TEST_P(CASTest, PrintIDs) {
+ std::unique_ptr<ObjectStore> CAS = createObjectStore();
+
+ std::optional<CASID> ID1, ID2;
+ ASSERT_THAT_ERROR(CAS->createProxy(std::nullopt, "1").moveInto(ID1),
+ Succeeded());
+ ASSERT_THAT_ERROR(CAS->createProxy(std::nullopt, "2").moveInto(ID2),
+ Succeeded());
+ EXPECT_NE(ID1, ID2);
+ std::string PrintedID1 = ID1->toString();
+ std::string PrintedID2 = ID2->toString();
+ EXPECT_NE(PrintedID1, PrintedID2);
+
+ std::optional<CASID> ParsedID1, ParsedID2;
+ ASSERT_THAT_ERROR(CAS->parseID(PrintedID1).moveInto(ParsedID1), Succeeded());
+ ASSERT_THAT_ERROR(CAS->parseID(PrintedID2).moveInto(ParsedID2), Succeeded());
+ EXPECT_EQ(ID1, ParsedID1);
+ EXPECT_EQ(ID2, ParsedID2);
+}
+
+TEST_P(CASTest, Blobs) {
+ std::unique_ptr<ObjectStore> CAS1 = createObjectStore();
+ StringRef ContentStrings[] = {
+ "word",
+ "some longer text std::string's local memory",
+ R"(multiline text multiline text multiline text multiline text
+multiline text multiline text multiline text multiline text multiline text
+multiline text multiline text multiline text multiline text multiline text
+multiline text multiline text multiline text multiline text multiline text
+multiline text multiline text multiline text multiline text multiline text
+multiline text multiline text multiline text multiline text multiline text)",
+ };
+
+ SmallVector<CASID> IDs;
+ for (StringRef Content : ContentStrings) {
+ // Use StringRef::str() to create a temporary std::string. This could cause
+ // problems if the CAS is storing references to the input string instead of
+ // copying it.
+ std::optional<ObjectProxy> Blob;
+ ASSERT_THAT_ERROR(CAS1->createProxy(std::nullopt, Content).moveInto(Blob),
+ Succeeded());
+ IDs.push_back(Blob->getID());
+
+ // Check basic printing of IDs.
+ EXPECT_EQ(IDs.back().toString(), IDs.back().toString());
+ if (IDs.size() > 2)
+ EXPECT_NE(IDs.front().toString(), IDs.back().toString());
+ }
+
+ // Check that the blobs give the same IDs later.
+ for (int I = 0, E = IDs.size(); I != E; ++I) {
+ std::optional<ObjectProxy> Blob;
+ ASSERT_THAT_ERROR(
+ CAS1->createProxy(std::nullopt, ContentStrings[I]).moveInto(Blob),
+ Succeeded());
+ EXPECT_EQ(IDs[I], Blob->getID());
+ }
+
+ // Run validation on all CASIDs.
+ for (int I = 0, E = IDs.size(); I != E; ++I)
+ ASSERT_THAT_ERROR(CAS1->validate(IDs[I]), Succeeded());
+
+ // Check that the blobs can be retrieved multiple times.
+ for (int I = 0, E = IDs.size(); I != E; ++I) {
+ for (int J = 0, JE = 3; J != JE; ++J) {
+ std::optional<ObjectProxy> Buffer;
+ ASSERT_THAT_ERROR(CAS1->getProxy(IDs[I]).moveInto(Buffer), Succeeded());
+ EXPECT_EQ(ContentStrings[I], Buffer->getData());
+ }
+ }
+
+ // Confirm these blobs don't exist in a fresh CAS instance.
+ std::unique_ptr<ObjectStore> CAS2 = createObjectStore();
+ for (int I = 0, E = IDs.size(); I != E; ++I) {
+ std::optional<ObjectProxy> Proxy;
+ EXPECT_THAT_ERROR(CAS2->getProxy(IDs[I]).moveInto(Proxy), Failed());
+ }
+
+ // Insert into the second CAS and confirm the IDs are stable. Getting them
+ // should work now.
+ for (int I = IDs.size(), E = 0; I != E; --I) {
+ auto &ID = IDs[I - 1];
+ auto &Content = ContentStrings[I - 1];
+ std::optional<ObjectProxy> Blob;
+ ASSERT_THAT_ERROR(CAS2->createProxy(std::nullopt, Content).moveInto(Blob),
+ Succeeded());
+ EXPECT_EQ(ID, Blob->getID());
+
+ std::optional<ObjectProxy> Buffer;
+ ASSERT_THAT_ERROR(CAS2->getProxy(ID).moveInto(Buffer), Succeeded());
+ EXPECT_EQ(Content, Buffer->getData());
+ }
+}
+
+TEST_P(CASTest, BlobsBig) {
+ // A little bit of validation that bigger blobs are okay. Climb up to 1MB.
+ std::unique_ptr<ObjectStore> CAS = createObjectStore();
+ SmallString<256> String1 = StringRef("a few words");
+ SmallString<256> String2 = StringRef("others");
+ while (String1.size() < 1024U * 1024U) {
+ std::optional<CASID> ID1;
+ std::optional<CASID> ID2;
+ ASSERT_THAT_ERROR(CAS->createProxy(std::nullopt, String1).moveInto(ID1),
+ Succeeded());
+ ASSERT_THAT_ERROR(CAS->createProxy(std::nullopt, String1).moveInto(ID2),
+ Succeeded());
+ ASSERT_THAT_ERROR(CAS->validate(*ID1), Succeeded());
+ ASSERT_THAT_ERROR(CAS->validate(*ID2), Succeeded());
+ ASSERT_EQ(ID1, ID2);
+
+ String1.append(String2);
+ ASSERT_THAT_ERROR(CAS->createProxy(std::nullopt, String2).moveInto(ID1),
+ Succeeded());
+ ASSERT_THAT_ERROR(CAS->createProxy(std::nullopt, String2).moveInto(ID2),
+ Succeeded());
+ ASSERT_THAT_ERROR(CAS->validate(*ID1), Succeeded());
+ ASSERT_THAT_ERROR(CAS->validate(*ID2), Succeeded());
+ ASSERT_EQ(ID1, ID2);
+ String2.append(String1);
+ }
+
+ // Specifically check near 1MB for objects large enough they're likely to be
+ // stored externally in an on-disk CAS and will be near a page boundary.
+ SmallString<0> Storage;
+ const size_t InterestingSize = 1024U * 1024ULL;
+ const size_t SizeE = InterestingSize + 2;
+ if (Storage.size() < SizeE)
+ Storage.resize(SizeE, '\01');
+ for (size_t Size = InterestingSize - 2; Size != SizeE; ++Size) {
+ StringRef Data(Storage.data(), Size);
+ std::optional<ObjectProxy> Blob;
+ ASSERT_THAT_ERROR(CAS->createProxy(std::nullopt, Data).moveInto(Blob),
+ Succeeded());
+ ASSERT_EQ(Data, Blob->getData());
+ ASSERT_EQ(0, Blob->getData().end()[0]);
+ }
+}
+
+TEST_P(CASTest, LeafNodes) {
+ std::unique_ptr<ObjectStore> CAS1 = createObjectStore();
+ StringRef ContentStrings[] = {
+ "word",
+ "some longer text std::string's local memory",
+ R"(multiline text multiline text multiline text multiline text
+multiline text multiline text multiline text multiline text multiline text
+multiline text multiline text multiline text multiline text multiline text
+multiline text multiline text multiline text multiline text multiline text
+multiline text multiline text multiline text multiline text multiline text
+multiline text multiline text multiline text multiline text multiline text)",
+ };
+
+ SmallVector<ObjectRef> Nodes;
+ SmallVector<CASID> IDs;
+ for (StringRef Content : ContentStrings) {
+ // Use StringRef::str() to create a temporary std::string. This could cause
+ // problems if the CAS is storing references to the input string instead of
+ // copying it.
+ std::optional<ObjectRef> Node;
+ ASSERT_THAT_ERROR(
+ CAS1->store(std::nullopt, arrayRefFromStringRef<char>(Content))
+ .moveInto(Node),
+ Succeeded());
+ Nodes.push_back(*Node);
+
+ // Check basic printing of IDs.
+ IDs.push_back(CAS1->getID(*Node));
+ auto ID = CAS1->getID(Nodes.back());
+ EXPECT_EQ(ID.toString(), IDs.back().toString());
+ EXPECT_EQ(*Node, Nodes.back());
+ EXPECT_EQ(ID, IDs.back());
+ if (Nodes.size() <= 1)
+ continue;
+ EXPECT_NE(Nodes.front(), Nodes.back());
+ EXPECT_NE(IDs.front(), IDs.back());
+ }
+
+ // Check that the blobs give the same IDs later.
+ for (int I = 0, E = IDs.size(); I != E; ++I) {
+ std::optional<ObjectRef> Node;
+ ASSERT_THAT_ERROR(CAS1->store(std::nullopt, arrayRefFromStringRef<char>(
+ ContentStrings[I]))
+ .moveInto(Node),
+ Succeeded());
+ EXPECT_EQ(IDs[I], CAS1->getID(*Node));
+ }
+
+ // Check that the blobs can be retrieved multiple times.
+ for (int I = 0, E = IDs.size(); I != E; ++I) {
+ for (int J = 0, JE = 3; J != JE; ++J) {
+ std::optional<ObjectProxy> Object;
+ ASSERT_THAT_ERROR(CAS1->getProxy(IDs[I]).moveInto(Object), Succeeded());
+ ASSERT_TRUE(Object);
+ EXPECT_EQ(ContentStrings[I], Object->getData());
+ }
+ }
+
+ // Confirm these blobs don't exist in a fresh CAS instance.
+ std::unique_ptr<ObjectStore> CAS2 = createObjectStore();
+ for (int I = 0, E = IDs.size(); I != E; ++I) {
+ std::optional<ObjectProxy> Object;
+ EXPECT_THAT_ERROR(CAS2->getProxy(IDs[I]).moveInto(Object), Failed());
+ }
+
+ // Insert into the second CAS and confirm the IDs are stable. Getting them
+ // should work now.
+ for (int I = IDs.size(), E = 0; I != E; --I) {
+ auto &ID = IDs[I - 1];
+ auto &Content = ContentStrings[I - 1];
+ std::optional<ObjectRef> Node;
+ ASSERT_THAT_ERROR(
+ CAS2->store(std::nullopt, arrayRefFromStringRef<char>(Content))
+ .moveInto(Node),
+ Succeeded());
+ EXPECT_EQ(ID, CAS2->getID(*Node));
+
+ std::optional<ObjectProxy> Object;
+ ASSERT_THAT_ERROR(CAS2->getProxy(ID).moveInto(Object), Succeeded());
+ ASSERT_TRUE(Object);
+ EXPECT_EQ(Content, Object->getData());
+ }
+}
+
+TEST_P(CASTest, NodesBig) {
+ std::unique_ptr<ObjectStore> CAS = createObjectStore();
+
+ // Specifically check near 1MB for objects large enough they're likely to be
+ // stored externally in an on-disk CAS, and such that one of them will be
+ // near a page boundary.
+ SmallString<0> Storage;
+ constexpr size_t InterestingSize = 1024U * 1024ULL;
+ constexpr size_t WordSize = sizeof(void *);
+
+ // Start much smaller to account for headers.
+ constexpr size_t SizeB = InterestingSize - 8 * WordSize;
+ constexpr size_t SizeE = InterestingSize + 1;
+ if (Storage.size() < SizeE)
+ Storage.resize(SizeE, '\01');
+
+ SmallVector<ObjectRef, 4> CreatedNodes;
+ // Avoid checking every size because this is an expensive test. Just check
+ // for data that is 8B-word-aligned, and one less. Also appending the created
+ // nodes as the references in the next block to check references are created
+ // correctly.
+ for (size_t Size = SizeB; Size < SizeE; Size += WordSize) {
+ for (bool IsAligned : {false, true}) {
+ StringRef Data(Storage.data(), Size - (IsAligned ? 0 : 1));
+ std::optional<ObjectProxy> Node;
+ ASSERT_THAT_ERROR(CAS->createProxy(CreatedNodes, Data).moveInto(Node),
+ Succeeded());
+ ASSERT_EQ(Data, Node->getData());
+ ASSERT_EQ(0, Node->getData().end()[0]);
+ ASSERT_EQ(Node->getNumReferences(), CreatedNodes.size());
+ CreatedNodes.emplace_back(Node->getRef());
+ }
+ }
+
+ for (auto ID : CreatedNodes)
+ ASSERT_THAT_ERROR(CAS->validate(CAS->getID(ID)), Succeeded());
+}
+
+/// Common test functionality for creating blobs in parallel. You can vary which
+/// cas instances are the same or different, and the size of the created blobs.
+static void testBlobsParallel(ObjectStore &Read1, ObjectStore &Read2,
+ ObjectStore &Write1, ObjectStore &Write2,
+ uint64_t BlobSize) {
+ SCOPED_TRACE(testBlobsParallel);
+ unsigned BlobCount = 100;
+ std::vector<std::string> Blobs;
+ Blobs.reserve(BlobCount);
+ for (unsigned I = 0; I < BlobCount; ++I) {
+ std::string Blob;
+ Blob.reserve(BlobSize);
+ while (Blob.size() < BlobSize) {
+ auto R = sys::Process::GetRandomNumber();
+ Blob.append((char *)&R, sizeof(R));
+ }
+ assert(Blob.size() >= BlobSize);
+ Blob.resize(BlobSize);
+ Blobs.push_back(std::move(Blob));
+ }
+
+ std::mutex NodesMtx;
+ std::vector<std::optional<CASID>> CreatedNodes(BlobCount);
+
+ auto Producer = [&](unsigned I, ObjectStore *CAS) {
+ std::optional<ObjectProxy> Node;
+ EXPECT_THAT_ERROR(CAS->createProxy({}, Blobs[I]).moveInto(Node),
+ Succeeded());
+ {
+ std::lock_guard<std::mutex> L(NodesMtx);
+ CreatedNodes[I] = Node ? Node->getID() : CASID::getDenseMapTombstoneKey();
+ }
+ };
+
+ auto Consumer = [&](unsigned I, ObjectStore *CAS) {
+ std::optional<CASID> ID;
+ while (!ID) {
+ // Busy wait.
+ std::lock_guard<std::mutex> L(NodesMtx);
+ ID = CreatedNodes[I];
+ }
----------------
ilovepi wrote:
can the loop ever execute more than once?
https://github.com/llvm/llvm-project/pull/114096
More information about the llvm-commits
mailing list