[clang] Introduce paged vector (PR #66430)

Tue Sep 19 15:14:42 PDT 2023

================
@@ -0,0 +1,322 @@
+//===- llvm/ADT/PagedVector.h - 'Lazyly allocated' vectors --------*- C++
+//-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PagedVector class.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_ADT_PAGEDVECTOR_H
+#define LLVM_ADT_PAGEDVECTOR_H
+
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/Support/Allocator.h"
+#include <cassert>
+#include <iostream>
+#include <vector>
+
+namespace llvm {
+// A vector that allocates memory in pages.
+// Order is kept, but memory is allocated only when one element of the page is
+// accessed. This introduces a level of indirection, but it is useful when you
+// have a sparsely initialised vector where the full size is allocated upfront
+// with the default constructor and elements are initialised later, on first
+// access.
+//
+// Notice that this does not have iterators, because if you
+// have iterators it probably means you are going to touch
+// all the memory in any case, so better use a std::vector in
+// the first place.
+//
+// Pages are allocated in SLAB_SIZE chunks, using the BumpPtrAllocator.
+template <typename T, std::size_t PAGE_SIZE = 1024 / sizeof(T)>
+class PagedVector {
+  static_assert(PAGE_SIZE > 0, "PAGE_SIZE must be greater than 0. Most likely "
+                               "you want it to be greater than 16.");
+  // The actual number of element in the vector which can be accessed.
+  std::size_t Size = 0;
+
+  // The position of the initial element of the page in the Data vector.
+  // Pages are allocated contiguously in the Data vector.
+  mutable std::vector<uintptr_t> PageToDataIdx;
+  // Actual page data. All the page elements are added to this vector on the
+  // first access of any of the elements of the page. Elements default
+  // constructed and elements of the page are stored contiguously. The order of
+  // the elements however depends on the order of access of the pages.
+  PointerIntPair<BumpPtrAllocator *, 1, bool> Allocator;
+
+  constexpr static uintptr_t InvalidPage = SIZE_MAX;
+
+public:
+  using value_type = T;
+
+  // Default constructor. We build our own allocator.
+  PagedVector() : Allocator(new BumpPtrAllocator, true) {}
+  PagedVector(BumpPtrAllocator *A) : Allocator(A, false) {}
+
+  ~PagedVector() {
+    // If we own the allocator, delete it.
+    if (Allocator.getInt() == true)
+      delete Allocator.getPointer();
+  }
+
+  // Lookup an element at position i.
+  // If the associated page is not filled, it will be filled with default
+  // constructed elements. If the associated page is filled, return the element.
+  T &operator[](std::size_t Index) const {
+    assert(Index < Size);
+    assert(Index / PAGE_SIZE < PageToDataIdx.size());
+    uintptr_t &PagePtr = PageToDataIdx[Index / PAGE_SIZE];
+    // If the page was not yet allocated, allocate it.
+    if (PagePtr == InvalidPage) {
+      T *NewPagePtr = Allocator.getPointer()->template Allocate<T>(PAGE_SIZE);
+      // We need to invoke the default constructor on all the elements of the
+      // page.
+      for (std::size_t I = 0; I < PAGE_SIZE; ++I)
+        new (NewPagePtr + I) T();
+
+      PagePtr = reinterpret_cast<uintptr_t>(NewPagePtr);
+    }
+    // Dereference the element in the page.
+    return *((Index % PAGE_SIZE) + reinterpret_cast<T *>(PagePtr));
+  }
+
+  // Return the capacity of the vector. I.e. the maximum size it can be expanded
+  // to with the resize method without allocating more pages.
+  [[nodiscard]] std::size_t capacity() const {
+    return PageToDataIdx.size() * PAGE_SIZE;
+  }
+
+  // Return the size of the vector. I.e. the maximum index that can be
+  // accessed, i.e. the maximum value which was used as argument of the
+  // resize method.
+  [[nodiscard]] std::size_t size() const { return Size; }
+
+  // Expands the vector to the given NewSize number of elements.
+  // If the vector was smaller, allocates new pages as needed.
+  // It should be called only with NewSize >= Size.
+  void resize(std::size_t NewSize) {
+    // Handle shrink case: delete the pages and update the size.
+    if (NewSize < Size) {
+      std::size_t NewLastPage = (NewSize - 1) / PAGE_SIZE;
+      for (std::size_t I = NewLastPage + 1; I < PageToDataIdx.size(); ++I) {
+        uintptr_t PagePtr = PageToDataIdx[I];
+        if (PagePtr == InvalidPage)
+          continue;
+        T *Page = reinterpret_cast<T *>(PagePtr);
+        // We need to invoke the destructor on all the elements of the page.
+        for (std::size_t J = 0; J < PAGE_SIZE; ++J)
+          Page[J].~T();
+        Allocator.getPointer()->Deallocate(Page);
+      }
+      // Delete the extra ones in the new last page.
+      uintptr_t PagePtr = PageToDataIdx[NewLastPage];
+      if (PagePtr != InvalidPage) {
+        T *Page = reinterpret_cast<T *>(PagePtr);
+        // If the new size and the old size are on the same page, we need to
+        // delete only the elements between the new size and the old size.
+        // Otherwise we need to delete all the remaining elements in the page.
+        std::size_t OldPage = (Size - 1) / PAGE_SIZE;
+        std::size_t NewPage = (NewSize - 1) / PAGE_SIZE;
+        std::size_t LastPageElements =
+            OldPage == NewPage ? Size % PAGE_SIZE : PAGE_SIZE;
+        for (std::size_t J = NewSize % PAGE_SIZE; J < LastPageElements; ++J)
+          Page[J].~T();
+      }
+      PageToDataIdx.resize(NewLastPage + 1);
+    }
+    Size = NewSize;
+    // If the capacity is enough, just update the size and continue
+    // with the currently allocated pages.
+    if (Size <= capacity())
+      return;
+    // The number of pages to allocate. The Remainder is calculated
+    // for the case in which the NewSize is not a multiple of PAGE_SIZE.
+    // In that case we need one more page.
+    std::size_t Pages = Size / PAGE_SIZE;
+    std::size_t Remainder = Size % PAGE_SIZE;
+    if (Remainder != 0)
+      Pages += 1;
+    assert(Pages > PageToDataIdx.size());
+    // We use InvalidPage to indicate that a page has not been allocated yet.
+    // This cannot be 0, because 0 is a valid page id.
+    // We use InvalidPage instead of a separate bool to avoid wasting space.
+    PageToDataIdx.resize(Pages, InvalidPage);
+  }
+
+  // Return true if the vector is empty
+  [[nodiscard]] bool empty() const { return Size == 0; }
+
+  /// Clear the vector, i.e. clear the allocated pages, the whole page
+  /// lookup index and reset the size.
+  void clear() {
+    Size = 0;
+    // If we own the allocator, simply reset it, otherwise we
+    // deallocate the pages one by one.
+    if (Allocator.getInt() == true)
+      Allocator.getPointer()->Reset();
+    else
+      for (uintptr_t Page : PageToDataIdx)
+        Allocator.getPointer()->Deallocate(reinterpret_cast<T *>(Page));
+
+    PageToDataIdx.clear();
+  }
+
+  // Iterator on all the elements of the vector
+  // which have actually being constructed.
+  class MaterialisedIterator {
+    PagedVector const *PV;
+    std::size_t ElementIdx;
+
+  public:
+    using iterator_category = std::forward_iterator_tag;
+    using value_type = T;
+    using difference_type = std::ptrdiff_t;
+    using pointer = T *;
+    using reference = T &;
+
+    MaterialisedIterator(PagedVector const *PV, std::size_t ElementIdx)
+        : PV(PV), ElementIdx(ElementIdx) {}
+
+    // When incrementing the iterator, we skip the elements which have not
+    // been materialised yet.
+    MaterialisedIterator &operator++() {
+      while (ElementIdx < PV->Size)
+        if (PV->PageToDataIdx[ElementIdx++ / PAGE_SIZE] != InvalidPage)
+          break;
+
+      return *this;
+    }
+    // Post increment operator.
+    MaterialisedIterator operator++(int) {
+      MaterialisedIterator Copy = *this;
+      ++*this;
+      return Copy;
+    }
+
+    std::ptrdiff_t operator-(MaterialisedIterator const &Other) const {
+      assert(PV == Other.PV);
+      // If they are on the same table we can just subtract the indices.
+      // Otherwise we have to iterate over the pages to find the difference.
+      // If a page is invalid, we skip it.
+      if (PV == Other.PV)
+        return ElementIdx - Other.ElementIdx;
+
+      std::size_t ElementMin = std::min(ElementIdx, Other.ElementIdx);
+      std::size_t ElementMax = std::max(ElementIdx, Other.ElementIdx);
+      std::size_t PageMin = ElementMin / PAGE_SIZE;
+      std::size_t PageMax = ElementMax / PAGE_SIZE;
+
+      std::size_t Count = 0ULL;
+      for (std::size_t PageIdx = PageMin; PageIdx < PageMax; ++PageIdx) {
+        if (PV->PageToDataIdx[PageIdx] == InvalidPage)
+          continue;
+
+        Count += PAGE_SIZE;
+      }
+      Count += ElementMax % PAGE_SIZE;
+      Count += PAGE_SIZE - ElementMin % PAGE_SIZE;
----------------
ktf wrote:

I dropped the associated function, as discussed below.

https://github.com/llvm/llvm-project/pull/66430