[clang] Introduce paged vector (PR #66430)

Giulio Eulisse via cfe-commits cfe-commits at lists.llvm.org
Thu Sep 28 01:58:04 PDT 2023


================
@@ -0,0 +1,301 @@
+//===- llvm/ADT/PagedVector.h - 'Lazyly allocated' vectors --------*- C++
+//-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PagedVector class.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_ADT_PAGEDVECTOR_H
+#define LLVM_ADT_PAGEDVECTOR_H
+
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Support/Allocator.h"
+#include <cassert>
+#include <vector>
+
+namespace llvm {
+/// A vector that allocates memory in pages.
+///
+/// Order is kept, but memory is allocated only when one element of the page is
+/// accessed. This introduces a level of indirection, but it is useful when you
+/// have a sparsely initialised vector where the full size is allocated upfront.
+///
+/// As a side effect the elements are initialised later than in a normal vector.
+/// On the first access to one of the elements of a given page, all the elements
+/// of the page are initialised. This also means that the elements of the page
+/// are initialised beyond the size of the vector.
+///
+/// Similarly on destruction the elements are destroyed only when the page is
+/// not needed anymore, delaying invoking the destructor of the elements.
+///
+/// Notice that this has iterators only on materialised elements. This
+/// is deliberately done under the assumption you would dereference the elements
+/// while iterating, therefore materialising them and losing the gains in terms
+/// of memory usage this container provides. If you have such a use case, you
+/// probably want to use a normal std::vector or a llvm::SmallVector.
+template <typename T, size_t PageSize = 1024 / sizeof(T)> class PagedVector {
+  static_assert(PageSize > 1, "PageSize must be greater than 0. Most likely "
+                              "you want it to be greater than 16.");
+  /// The actual number of elements in the vector which can be accessed.
+  size_t Size = 0;
+
+  /// The position of the initial element of the page in the Data vector.
+  /// Pages are allocated contiguously in the Data vector.
+  mutable SmallVector<T *, 0> PageToDataPtrs;
+  /// Actual page data. All the page elements are allocated on the
+  /// first access of any of the elements of the page. Elements are default
+  /// constructed and elements of the page are stored contiguously. The order of
+  /// the elements however depends on the order of access of the pages.
+  PointerIntPair<BumpPtrAllocator *, 1, bool> Allocator;
+
+  constexpr static T *InvalidPage = nullptr;
+
+public:
+  using value_type = T;
+
+  /// Default constructor. We build our own allocator and mark it as such with
+  /// `true` in the second pair element.
+  PagedVector() : Allocator(new BumpPtrAllocator, true) {}
+  PagedVector(BumpPtrAllocator *A) : Allocator(A, false) {
+    assert(A != nullptr && "Allocator cannot be null");
+  }
+
+  ~PagedVector() {
+    clear();
+    // If we own the allocator, delete it.
+    if (Allocator.getInt())
+      delete Allocator.getPointer();
+  }
+
+  // Forbid copy and move as we do not need them for the current use case.
+  PagedVector(const PagedVector &) = delete;
+  PagedVector(PagedVector &&) = delete;
+  PagedVector &operator=(const PagedVector &) = delete;
+  PagedVector &operator=(PagedVector &&) = delete;
+
+  /// Look up an element at position `Index`.
+  /// If the associated page is not filled, it will be filled with default
+  /// constructed elements.
+  T &operator[](size_t Index) const {
+    assert(Index < Size);
+    assert(Index / PageSize < PageToDataPtrs.size());
+    T *&PagePtr = PageToDataPtrs[Index / PageSize];
+    // If the page was not yet allocated, allocate it.
+    if (PagePtr == InvalidPage) {
+      T *NewPagePtr = Allocator.getPointer()->template Allocate<T>(PageSize);
+      // We need to invoke the default constructor on all the elements of the
+      // page.
+      std::uninitialized_value_construct_n(NewPagePtr, PageSize);
+
+      PagePtr = NewPagePtr;
+    }
+    // Dereference the element in the page.
+    return PagePtr[Index % PageSize];
+  }
+
+  /// Return the capacity of the vector. I.e. the maximum size it can be
+  /// expanded to with the resize method without allocating more pages.
+  [[nodiscard]] size_t capacity() const {
+    return PageToDataPtrs.size() * PageSize;
+  }
+
+  /// Return the size of the vector. I.e. the maximum index that can be
+  /// accessed, i.e. the maximum value which was used as argument of the
+  /// resize method.
+  [[nodiscard]] size_t size() const { return Size; }
+
+  /// Resize the vector. Notice that the constructor of the elements will not
+  /// be invoked until an element of a given page is accessed, at which point
+  /// all the elements of the page will be constructed.
+  ///
+  /// If the new size is smaller than the current size, the elements of the
+  /// pages that are not needed anymore will be destroyed, however, elements of
+  /// the last page will not be destroyed.
+  ///
+  /// For these reason the usage of this vector is discouraged if you rely
+  /// on the construction / destructor of the elements to be invoked.
+  void resize(size_t NewSize) {
+    if (NewSize == 0) {
+      clear();
+      return;
+    }
+    // Handle shrink case: destroy the elements in the pages that are not
+    // needed anymore and deallocate the pages.
+    //
+    // On the other hand, we do not destroy the extra elements in the last page,
+    // because we might need them later and the logic is simpler if we do not
+    // destroy them. This means that elements are only destroyed only when the
+    // page they belong to is destroyed. This is similar to what happens on
+    // access of the elements of a page, where all the elements of the page are
+    // constructed not only the one effectively neeeded.
+    size_t NewLastPage = (NewSize - 1) / PageSize;
+    if (NewSize < Size) {
+      // Destruct the elements in the pages that are not needed anymore.
+      // Notice that we need to do this only if the constructor of the elements
+      // is not trivial.
+      if constexpr (!std::is_trivially_destructible_v<T>) {
----------------
ktf wrote:

AFAICT, it actually optimises it out when using RelWithDebugInput:

```
-> 165    /// lookup index and reset the size.
-> 166    void clear() {
-> 167      Size = 0;
-> 168      for (T *Page : PageToDataPtrs) {
-> 169        if (Page == nullptr)

->  0x1003504d8 <+2800>: stur   xzr, [x29, #-0x48]

** 41       std::memcpy(&R, Data, sizeof(R));

    0x1003504dc <+2804>: ldur   x8, [x29, #-0x30]

   175      }
   176      // If we own the allocator, simply reset it.
** 177      if (Allocator.getInt() == true)

    0x1003504e0 <+2808>: tbnz   w8, #0x2, 0x1003504ec     ; <+2820> [inlined] llvm::PointerIntPairInfo<llvm::BumpPtrAllocatorImpl<llvm::MallocAllocator, 4096ul, 4096ul, 128ul>*, 1u, llvm::P
ointerLikeTypeTraits<llvm::BumpPtrAllocatorImpl<llvm::MallocAllocator, 4096ul, 4096ul, 128ul>*>>::getPointer(long) at PointerIntPair.h:192:40

   614    void clear() {
   615      this->destroy_range(this->begin(), this->end());
** 616      this->Size = 0;
   617    }
```

while I see the call with Debug:

```
** 171        std::destroy_n(Page, PageSize);
   172        // If we do not own the allocator, deallocate the pages one by one.

    0x1004c0fa0 <+124>: ldr    x0, [sp, #0x8]
    0x1004c0fa4 <+128>: mov    x1, #0xa
    0x1004c0fa8 <+132>: bl     0x1004c3f3c               ; std::__1::destroy_n[abi:v160006]<int*, unsigned long> at construct_at.h:120
    0x1004c0fac <+136>: ldr    x8, [sp]
```

If debug performance for this actually matters, I am happy to protect with a constexpr.

https://github.com/llvm/llvm-project/pull/66430


More information about the cfe-commits mailing list