[llvm] [CAS] LLVMCAS implementation (PR #68448)

via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 6 14:00:19 PDT 2023


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-support

<details>
<summary>Changes</summary>

Adds Content Addressable Storage implementation for LLVM, which includes:
* InMemoryCAS (non-persistent)
* OnDiskCAS (persistent)
Also comes with various level of abstraction for create/query/chaining CAS instance.

---

Patch is 383.74 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/68448.diff


59 Files Affected:

- (modified) llvm/CMakeLists.txt (+12) 
- (added) llvm/docs/ContentAddressableStorage.md (+120) 
- (modified) llvm/docs/Reference.rst (+4) 
- (added) llvm/include/llvm/ADT/TrieRawHashMap.h (+398) 
- (added) llvm/include/llvm/CAS/ActionCache.h (+149) 
- (added) llvm/include/llvm/CAS/BuiltinCASContext.h (+88) 
- (added) llvm/include/llvm/CAS/BuiltinObjectHasher.h (+84) 
- (added) llvm/include/llvm/CAS/BuiltinUnifiedCASDatabases.h (+26) 
- (added) llvm/include/llvm/CAS/CASFileSystem.h (+33) 
- (added) llvm/include/llvm/CAS/CASID.h (+156) 
- (added) llvm/include/llvm/CAS/CASNodeSchema.h (+76) 
- (added) llvm/include/llvm/CAS/CASReference.h (+207) 
- (added) llvm/include/llvm/CAS/HierarchicalTreeBuilder.h (+88) 
- (added) llvm/include/llvm/CAS/MappedFileRegionBumpPtr.h (+126) 
- (added) llvm/include/llvm/CAS/ObjectStore.h (+362) 
- (added) llvm/include/llvm/CAS/OnDiskGraphDB.h (+406) 
- (added) llvm/include/llvm/CAS/OnDiskHashMappedTrie.h (+391) 
- (added) llvm/include/llvm/CAS/OnDiskKeyValueDB.h (+65) 
- (added) llvm/include/llvm/CAS/TreeEntry.h (+73) 
- (added) llvm/include/llvm/CAS/TreeSchema.h (+127) 
- (added) llvm/include/llvm/CAS/UnifiedOnDiskCache.h (+140) 
- (modified) llvm/include/llvm/Support/FileSystem.h (+6-2) 
- (added) llvm/lib/CAS/ActionCache.cpp (+60) 
- (added) llvm/lib/CAS/ActionCaches.cpp (+242) 
- (added) llvm/lib/CAS/BuiltinCAS.cpp (+108) 
- (added) llvm/lib/CAS/BuiltinCAS.h (+98) 
- (added) llvm/lib/CAS/BuiltinObjectHasher.h (+73) 
- (added) llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp (+25) 
- (added) llvm/lib/CAS/CASNodeSchema.cpp (+23) 
- (added) llvm/lib/CAS/CMakeLists.txt (+29) 
- (added) llvm/lib/CAS/HashMappedTrieIndexGenerator.h (+90) 
- (added) llvm/lib/CAS/HierarchicalTreeBuilder.cpp (+266) 
- (added) llvm/lib/CAS/InMemoryCAS.cpp (+322) 
- (added) llvm/lib/CAS/MappedFileRegionBumpPtr.cpp (+284) 
- (added) llvm/lib/CAS/ObjectStore.cpp (+259) 
- (added) llvm/lib/CAS/OnDiskCAS.cpp (+205) 
- (added) llvm/lib/CAS/OnDiskCommon.cpp (+26) 
- (added) llvm/lib/CAS/OnDiskCommon.h (+24) 
- (added) llvm/lib/CAS/OnDiskGraphDB.cpp (+1508) 
- (added) llvm/lib/CAS/OnDiskHashMappedTrie.cpp (+1356) 
- (added) llvm/lib/CAS/OnDiskKeyValueDB.cpp (+78) 
- (added) llvm/lib/CAS/TreeEntry.cpp (+47) 
- (added) llvm/lib/CAS/TreeSchema.cpp (+233) 
- (added) llvm/lib/CAS/UnifiedOnDiskCache.cpp (+339) 
- (modified) llvm/lib/CMakeLists.txt (+1) 
- (modified) llvm/lib/Support/CMakeLists.txt (+1) 
- (added) llvm/lib/Support/TrieHashIndexGenerator.h (+89) 
- (added) llvm/lib/Support/TrieRawHashMap.cpp (+493) 
- (modified) llvm/lib/Support/Unix/Path.inc (+7-4) 
- (modified) llvm/lib/Support/Windows/Path.inc (+8-4) 
- (added) llvm/tools/llvm-cas/CMakeLists.txt (+8) 
- (added) llvm/tools/llvm-cas/llvm-cas.cpp (+450) 
- (modified) llvm/unittests/ADT/CMakeLists.txt (+1) 
- (added) llvm/unittests/ADT/TrieRawHashMapTest.cpp (+345) 
- (added) llvm/unittests/CAS/CASTestConfig.cpp (+22) 
- (added) llvm/unittests/CAS/CASTestConfig.h (+36) 
- (added) llvm/unittests/CAS/CMakeLists.txt (+12) 
- (added) llvm/unittests/CAS/ObjectStoreTest.cpp (+280) 
- (modified) llvm/unittests/CMakeLists.txt (+1) 


``````````diff
diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index 103c08ffbe83b38..c5f36daa0223ad3 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -758,6 +758,18 @@ option (LLVM_ENABLE_SPHINX "Use Sphinx to generate llvm documentation." OFF)
 option (LLVM_ENABLE_OCAMLDOC "Build OCaml bindings documentation." ON)
 option (LLVM_ENABLE_BINDINGS "Build bindings." ON)
 
+if(UNIX AND CMAKE_SIZEOF_VOID_P GREATER_EQUAL 8)
+  set(LLVM_ENABLE_ONDISK_CAS_default ON)
+else()
+  set(LLVM_ENABLE_ONDISK_CAS_default OFF)
+endif()
+option(LLVM_ENABLE_ONDISK_CAS "Build OnDiskCAS." ${LLVM_ENABLE_ONDISK_CAS_default})
+option(LLVM_CAS_ENABLE_REMOTE_CACHE "Build remote CAS service" OFF)
+if(LLVM_CAS_ENABLE_REMOTE_CACHE)
+  include(FindGRPC)
+endif()
+
+
 set(LLVM_INSTALL_DOXYGEN_HTML_DIR "${CMAKE_INSTALL_DOCDIR}/llvm/doxygen-html"
     CACHE STRING "Doxygen-generated HTML documentation install directory")
 set(LLVM_INSTALL_OCAMLDOC_HTML_DIR "${CMAKE_INSTALL_DOCDIR}/llvm/ocaml-html"
diff --git a/llvm/docs/ContentAddressableStorage.md b/llvm/docs/ContentAddressableStorage.md
new file mode 100644
index 000000000000000..4f2d9a6a3a91857
--- /dev/null
+++ b/llvm/docs/ContentAddressableStorage.md
@@ -0,0 +1,120 @@
+# Content Addressable Storage
+
+## Introduction to CAS
+
+Content Addressable Storage, or `CAS`, is a storage system where it assigns
+unique addresses to the data stored. It is very useful for data deduplicaton
+and creating unique identifiers.
+
+Unlikely other kind of storage system like file system, CAS is immutable. It
+is more reliable to model a computation when representing the inputs and outputs
+of the computation using objects stored in CAS.
+
+The basic unit of the CAS library is a CASObject, where it contains:
+
+* Data: arbitrary data
+* References: references to other CASObject
+
+It can be conceptually modeled as something like:
+
+```
+struct CASObject {
+  ArrayRef<char> Data;
+  ArrayRef<CASObject*> Refs;
+}
+```
+
+Such abstraction can allow simple composition of CASObjects into a DAG to
+represent complicated data structure while still allowing data deduplication.
+Note you can compare two DAGs by just comparing the CASObject hash of two
+root nodes.
+
+
+
+## LLVM CAS Library User Guide
+
+The CAS-like storage provided in LLVM is `llvm::cas::ObjectStore`.
+To reference a CASObject, there are few different abstractions provided
+with different trade-offs:
+
+### ObjectRef
+
+`ObjectRef` is a lightweight reference to a CASObject stored in the CAS.
+This is the most commonly used abstraction and it is cheap to copy/pass
+along. It has following properties:
+
+* `ObjectRef` is only meaningful within the `ObjectStore` that created the ref.
+`ObjectRef` created by different `ObjectStore` cannot be cross-referenced or
+compared.
+* `ObjectRef` doesn't guarantee the existence of the CASObject it points to. An
+explicitly load is required before accessing the data stored in CASObject.
+This load can also fail, for reasons like but not limited to: object does
+not exist, corrupted CAS storage, operation timeout, etc.
+* If two `ObjectRef` are equal, it is guarantee that the object they point to
+(if exists) are identical. If they are not equal, the underlying objects are
+guaranteed to be not the same.
+
+### ObjectProxy
+
+`ObjectProxy` represents a loaded CASObject. With an `ObjectProxy`, the
+underlying stored data and references can be accessed without the need
+of error handling. The class APIs also provide convenient methods to
+access underlying data. The lifetime of the underlying data is equal to
+the lifetime of the instance of `ObjectStore` unless explicitly copied.
+
+### CASID
+
+`CASID` is the hash identifier for CASObjects. It owns the underlying
+storage for hash value so it can be expensive to copy and compare depending
+on the hash algorithm. `CASID` is generally only useful in rare situations
+like printing raw hash value or exchanging hash values between different
+CAS instances with the same hashing schema.
+
+### ObjectStore
+
+`ObjectStore` is the CAS-like object storage. It provides API to save
+and load CASObjects, for example:
+
+```
+ObjectRef A, B, C;
+Expected<ObjectRef> Stored = ObjectStore.store("data", {A, B});
+Expected<ObjectProxy> Loaded = ObjectStore.getProxy(C);
+```
+
+It also provides APIs to convert between `ObjectRef`, `ObjectProxy` and
+`CASID`.
+
+
+
+## CAS Library Implementation Guide
+
+The LLVM ObjectStore APIs are designed so that it is easy to add
+customized CAS implementation that are interchangeable with builtin
+CAS implementations.
+
+To add your own implementation, you just need to add a subclass to
+`llvm::cas::ObjectStore` and implement all its pure virtual methods.
+To be interchangeable with LLVM ObjectStore, the new CAS implementation
+needs to conform to following contracts:
+
+* Different CASObject stored in the ObjectStore needs to have a different hash
+and result in a different `ObjectRef`. Vice versa, same CASObject should have
+same hash and same `ObjectRef`. Note two different CASObjects with identical
+data but different references are considered different objects.
+* `ObjectRef`s are comparable within the same `ObjectStore` instance, and can
+be used to determine the equality of the underlying CASObjects.
+* The loaded objects from the ObjectStore need to have the lifetime to be at
+least as long as the ObjectStore itself.
+
+If not specified, the behavior can be implementation defined. For example,
+`ObjectRef` can be used to point to a loaded CASObject so
+`ObjectStore` never fails to load. It is also legal to use a stricter model
+than required. For example, an `ObjectRef` that can be used to compare
+objects between different `ObjectStore` instances is legal but user
+of the ObjectStore should not depend on this behavior.
+
+For CAS library implementer, there is also a `ObjectHandle` class that
+is an internal representation of a loaded CASObject reference.
+`ObjectProxy` is just a pair of `ObjectHandle` and `ObjectStore`, because
+just like `ObjectRef`, `ObjectHandle` is only useful when paired with
+the ObjectStore that knows about the loaded CASObject.
diff --git a/llvm/docs/Reference.rst b/llvm/docs/Reference.rst
index 3a1d1665be439e2..ddd5ffb10c6ac85 100644
--- a/llvm/docs/Reference.rst
+++ b/llvm/docs/Reference.rst
@@ -15,6 +15,7 @@ LLVM and API reference documentation.
    BranchWeightMetadata
    Bugpoint
    CommandGuide/index
+   ContentAddressableStorage
    ConvergenceAndUniformity
    ConvergentOperations
    Coroutines
@@ -228,3 +229,6 @@ Additional Topics
 :doc:`ConvergenceAndUniformity`
    A description of uniformity analysis in the presence of irreducible
    control flow, and its implementation.
+
+:doc:`ContentAddressableStorage`
+   A reference guide for using LLVM's CAS library.
diff --git a/llvm/include/llvm/ADT/TrieRawHashMap.h b/llvm/include/llvm/ADT/TrieRawHashMap.h
new file mode 100644
index 000000000000000..607f64924e75d6c
--- /dev/null
+++ b/llvm/include/llvm/ADT/TrieRawHashMap.h
@@ -0,0 +1,398 @@
+//===- TrieRawHashMap.h -----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_TRIERAWHASHMAP_H
+#define LLVM_ADT_TRIERAWHASHMAP_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Casting.h"
+#include <atomic>
+#include <optional>
+
+namespace llvm {
+
+class raw_ostream;
+
+/// TrieRawHashMap - is a lock-free thread-safe trie that is can be used to
+/// store/index data based on a hash value. It can be customized to work with
+/// any hash algorithm or store any data.
+///
+/// Data structure:
+/// Data node stored in the Trie contains both hash and data:
+/// struct {
+///    HashT Hash;
+///    DataT Data;
+/// };
+///
+/// Data is stored/indexed via a prefix tree, where each node in the tree can be
+/// either the root, a sub-trie or a data node. Assuming a 4-bit hash and two
+/// data objects {0001, A} and {0100, B}, it can be stored in a trie
+/// (assuming Root has 2 bits, SubTrie has 1 bit):
+///  +--------+
+///  |Root[00]| -> {0001, A}
+///  |    [01]| -> {0100, B}
+///  |    [10]| (empty)
+///  |    [11]| (empty)
+///  +--------+
+///
+/// Inserting a new object {0010, C} will result in:
+///  +--------+    +----------+
+///  |Root[00]| -> |SubTrie[0]| -> {0001, A}
+///  |        |    |       [1]| -> {0010, C}
+///  |        |    +----------+
+///  |    [01]| -> {0100, B}
+///  |    [10]| (empty)
+///  |    [11]| (empty)
+///  +--------+
+/// Note object A is sinked down to a sub-trie during the insertion. All the
+/// nodes are inserted through compare-exchange to ensure thread-safe and
+/// lock-free.
+///
+/// To find an object in the trie, walk the tree with prefix of the hash until
+/// the data node is found. Then the hash is compared with the hash stored in
+/// the data node to see if the is the same object.
+///
+/// Hash collision is not allowed so it is recommanded to use trie with a
+/// "strong" hashing algorithm. A well-distributed hash can also result in
+/// better performance and memory usage.
+///
+/// It currently does not support iteration and deletion.
+
+/// Base class for a lock-free thread-safe hash-mapped trie.
+class ThreadSafeTrieRawHashMapBase {
+public:
+  static constexpr size_t TrieContentBaseSize = 4;
+  static constexpr size_t DefaultNumRootBits = 6;
+  static constexpr size_t DefaultNumSubtrieBits = 4;
+
+private:
+  template <class T> struct AllocValueType {
+    char Base[TrieContentBaseSize];
+    std::aligned_union_t<sizeof(T), T> Content;
+  };
+
+protected:
+  template <class T>
+  static constexpr size_t DefaultContentAllocSize = sizeof(AllocValueType<T>);
+
+  template <class T>
+  static constexpr size_t DefaultContentAllocAlign = alignof(AllocValueType<T>);
+
+  template <class T>
+  static constexpr size_t DefaultContentOffset =
+      offsetof(AllocValueType<T>, Content);
+
+public:
+  void operator delete(void *Ptr) { ::free(Ptr); }
+
+  LLVM_DUMP_METHOD void dump() const;
+  void print(raw_ostream &OS) const;
+
+protected:
+  /// Result of a lookup. Suitable for an insertion hint. Maybe could be
+  /// expanded into an iterator of sorts, but likely not useful (visiting
+  /// everything in the trie should probably be done some way other than
+  /// through an iterator pattern).
+  class PointerBase {
+  protected:
+    void *get() const { return I == -2u ? P : nullptr; }
+
+  public:
+    PointerBase() noexcept = default;
+    PointerBase(PointerBase &&) = default;
+    PointerBase(const PointerBase &) = default;
+    PointerBase &operator=(PointerBase &&) = default;
+    PointerBase &operator=(const PointerBase &) = default;
+
+  private:
+    friend class ThreadSafeTrieRawHashMapBase;
+    explicit PointerBase(void *Content) : P(Content), I(-2u) {}
+    PointerBase(void *P, unsigned I, unsigned B) : P(P), I(I), B(B) {}
+
+    bool isHint() const { return I != -1u && I != -2u; }
+
+    void *P = nullptr;
+    unsigned I = -1u;
+    unsigned B = 0;
+  };
+
+  /// Find the stored content with hash.
+  PointerBase find(ArrayRef<uint8_t> Hash) const;
+
+  /// Insert and return the stored content.
+  PointerBase
+  insert(PointerBase Hint, ArrayRef<uint8_t> Hash,
+         function_ref<const uint8_t *(void *Mem, ArrayRef<uint8_t> Hash)>
+             Constructor);
+
+  ThreadSafeTrieRawHashMapBase() = delete;
+
+  ThreadSafeTrieRawHashMapBase(
+      size_t ContentAllocSize, size_t ContentAllocAlign, size_t ContentOffset,
+      std::optional<size_t> NumRootBits = std::nullopt,
+      std::optional<size_t> NumSubtrieBits = std::nullopt);
+
+  /// Destructor, which asserts if there's anything to do. Subclasses should
+  /// call \a destroyImpl().
+  ///
+  /// \pre \a destroyImpl() was already called.
+  ~ThreadSafeTrieRawHashMapBase();
+  void destroyImpl(function_ref<void(void *ValueMem)> Destructor);
+
+  ThreadSafeTrieRawHashMapBase(ThreadSafeTrieRawHashMapBase &&RHS);
+
+  // Move assignment can be implemented in a thread-safe way if NumRootBits and
+  // NumSubtrieBits are stored inside the Root.
+  ThreadSafeTrieRawHashMapBase &
+  operator=(ThreadSafeTrieRawHashMapBase &&RHS) = delete;
+
+  // No copy.
+  ThreadSafeTrieRawHashMapBase(const ThreadSafeTrieRawHashMapBase &) = delete;
+  ThreadSafeTrieRawHashMapBase &
+  operator=(const ThreadSafeTrieRawHashMapBase &) = delete;
+
+  // Debug functions. Implementation details and not guaranteed to be
+  // thread-safe.
+  PointerBase getRoot() const;
+  unsigned getStartBit(PointerBase P) const;
+  unsigned getNumBits(PointerBase P) const;
+  unsigned getNumSlotUsed(PointerBase P) const;
+  std::string getTriePrefixAsString(PointerBase P) const;
+  unsigned getNumTries() const;
+  // Visit next trie in the allocation chain.
+  PointerBase getNextTrie(PointerBase P) const;
+
+private:
+  friend class TrieRawHashMapTestHelper;
+  const unsigned short ContentAllocSize;
+  const unsigned short ContentAllocAlign;
+  const unsigned short ContentOffset;
+  unsigned short NumRootBits;
+  unsigned short NumSubtrieBits;
+  struct ImplType;
+  // ImplPtr is owned by ThreadSafeTrieRawHashMapBase and needs to be freed in
+  // destoryImpl.
+  std::atomic<ImplType *> ImplPtr;
+  ImplType &getOrCreateImpl();
+  ImplType *getImpl() const;
+};
+
+/// Lock-free thread-safe hash-mapped trie.
+template <class T, size_t NumHashBytes>
+class ThreadSafeTrieRawHashMap : public ThreadSafeTrieRawHashMapBase {
+public:
+  using HashT = std::array<uint8_t, NumHashBytes>;
+
+  class LazyValueConstructor;
+  struct value_type {
+    const HashT Hash;
+    T Data;
+
+    value_type(value_type &&) = default;
+    value_type(const value_type &) = default;
+
+    value_type(ArrayRef<uint8_t> Hash, const T &Data)
+        : Hash(makeHash(Hash)), Data(Data) {}
+    value_type(ArrayRef<uint8_t> Hash, T &&Data)
+        : Hash(makeHash(Hash)), Data(std::move(Data)) {}
+
+  private:
+    friend class LazyValueConstructor;
+
+    struct EmplaceTag {};
+    template <class... ArgsT>
+    value_type(ArrayRef<uint8_t> Hash, EmplaceTag, ArgsT &&...Args)
+        : Hash(makeHash(Hash)), Data(std::forward<ArgsT>(Args)...) {}
+
+    static HashT makeHash(ArrayRef<uint8_t> HashRef) {
+      HashT Hash;
+      std::copy(HashRef.begin(), HashRef.end(), Hash.data());
+      return Hash;
+    }
+  };
+
+  using ThreadSafeTrieRawHashMapBase::operator delete;
+  using HashType = HashT;
+
+  using ThreadSafeTrieRawHashMapBase::dump;
+  using ThreadSafeTrieRawHashMapBase::print;
+
+private:
+  template <class ValueT> class PointerImpl : PointerBase {
+    friend class ThreadSafeTrieRawHashMap;
+
+    ValueT *get() const {
+      if (void *B = PointerBase::get())
+        return reinterpret_cast<ValueT *>(B);
+      return nullptr;
+    }
+
+  public:
+    ValueT &operator*() const {
+      assert(get());
+      return *get();
+    }
+    ValueT *operator->() const {
+      assert(get());
+      return get();
+    }
+    explicit operator bool() const { return get(); }
+
+    PointerImpl() = default;
+    PointerImpl(PointerImpl &&) = default;
+    PointerImpl(const PointerImpl &) = default;
+    PointerImpl &operator=(PointerImpl &&) = default;
+    PointerImpl &operator=(const PointerImpl &) = default;
+
+  protected:
+    PointerImpl(PointerBase Result) : PointerBase(Result) {}
+  };
+
+public:
+  class pointer;
+  class const_pointer;
+  class pointer : public PointerImpl<value_type> {
+    friend class ThreadSafeTrieRawHashMap;
+    friend class const_pointer;
+
+  public:
+    pointer() = default;
+    pointer(pointer &&) = default;
+    pointer(const pointer &) = default;
+    pointer &operator=(pointer &&) = default;
+    pointer &operator=(const pointer &) = default;
+
+  private:
+    pointer(PointerBase Result) : pointer::PointerImpl(Result) {}
+  };
+
+  class const_pointer : public PointerImpl<const value_type> {
+    friend class ThreadSafeTrieRawHashMap;
+
+  public:
+    const_pointer() = default;
+    const_pointer(const_pointer &&) = default;
+    const_pointer(const const_pointer &) = default;
+    const_pointer &operator=(const_pointer &&) = default;
+    const_pointer &operator=(const const_pointer &) = default;
+
+    const_pointer(const pointer &P) : const_pointer::PointerImpl(P) {}
+
+  private:
+    const_pointer(PointerBase Result) : const_pointer::PointerImpl(Result) {}
+  };
+
+  class LazyValueConstructor {
+  public:
+    value_type &operator()(T &&RHS) {
+      assert(Mem && "Constructor already called, or moved away");
+      return assign(::new (Mem) value_type(Hash, std::move(RHS)));
+    }
+    value_type &operator()(const T &RHS) {
+      assert(Mem && "Constructor already called, or moved away");
+      return assign(::new (Mem) value_type(Hash, RHS));
+    }
+    template <class... ArgsT> value_type &emplace(ArgsT &&...Args) {
+      assert(Mem && "Constructor already called, or moved away");
+      return assign(::new (Mem)
+                        value_type(Hash, typename value_type::EmplaceTag{},
+                                   std::forward<ArgsT>(Args)...));
+    }
+
+    LazyValueConstructor(LazyValueConstructor &&RHS)
+        : Mem(RHS.Mem), Result(RHS.Result), Hash(RHS.Hash) {
+      RHS.Mem = nullptr; // Moved away, cannot call.
+    }
+    ~LazyValueConstructor() { assert(!Mem && "Constructor never called!"); }
+
+  private:
+    value_type &assign(value_type *V) {
+      Mem = nullptr;
+      Result = V;
+      return *V;
+    }
+    friend class ThreadSafeTrieRawHashMap;
+    LazyValueConstructor() = delete;
+    LazyValueConstructor(void *Mem, value_type *&Result, ArrayRef<uint8_t> Hash)
+        : Mem(Mem), Result(Result), Hash(Hash) {
+      assert(Hash.size() == sizeof(HashT) && "Invalid hash");
+      assert(Mem && "Invalid memory for construction");
+    }
+    void *Mem;
+    value_type *&Result;
+    ArrayRef<uint8_t> Hash;
+  };
+
+  /// Insert with a hint. Default-constructed hint will work, but it's
+  /// recommended to start with a lookup to avoid overhead in object creation
+  /// if it already exists.
+  pointer insertLazy(const_pointer Hint, ArrayRef<uint8_t> Hash,
+                     function_ref<void(LazyValueConstructor)> OnConstruct) {
+    return pointer(ThreadSafeTrieRawHashMapBase::insert(
+        Hint, Hash, [&](void *Mem, ArrayRef<uint8_t> Hash) {
+          value_type *Result = nullptr;
+          OnConstruct(LazyValueConstructor(Mem, Result, Hash));
+          return Result->Hash.data();
+        }));
+  }
+
+  pointer insertLazy(ArrayRef<uint8_t> Hash,
+                     function_ref<void(LazyValueConstructor)> OnConstruct) {
+    return insertLazy(const_pointer(), Hash, OnConstruct);
+  }
+
+  pointer insert(const_pointer Hint, value_type &&HashedData) {
+    return insertLazy(Hint, HashedData.Hash, [&](LazyValueConstructor C) {
+      C(std::move(HashedData.Data));
+    });
+  }
+
+  pointer insert(const_pointer Hint, const value_type &HashedData) {
+    return insertLazy(Hint, HashedData.Hash,
+                      [&](LazyValueConstructor C) { C(HashedData.Data); });
+  }
+
+  pointer find(ArrayRef<uint8_t> Hash) {
+    assert(Hash.size() == std::tuple_size<HashT>::value);
+    return ThreadSafeTrieRawHashMapBase::find(Hash);
+  }
+
+  const_pointer find(ArrayRef<uint8_t> Hash) const {
+    assert(Hash.size() == std::tuple_size<HashT>::value);
+    return ThreadSafeTrieRawHashMapBase::find(Hash);
+  }
+
+  ThreadSafeTrieRawHashMap(std::optional<size_t> NumRootBits = std::nullopt,
+                           std::optional<size_t> NumSubtrieBits = std::nullopt)
+      : ThreadSafeTrieRawHashMapBase(DefaultContentAllocSize<value_type>,
+                                     DefaultContentAllocAlign<value_type>,
+                                     DefaultContentOffset<value_type>,
+                                     NumRootBits, NumSubtrieBits) {}
+
+  ~ThreadSafeTrieRawHashMap() {
+    if constexpr (std::is_trivially_destructible<value_type>::value)
+      this->destroyImpl(nullptr);
+    else
+      this->destroyImpl(
+          [](void *P) { static_cast<value_type *>(P)->~value_type(); });
+  }
+
+  // Move constructor okay.
+  ThreadSafeTrieRawHashMap(ThreadSafeTrieRawHashMap...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/68448


More information about the llvm-commits mailing list