[clang] Introduce paged vector (PR #66430)

Giulio Eulisse via cfe-commits cfe-commits at lists.llvm.org
Wed Sep 27 06:19:56 PDT 2023


https://github.com/ktf updated https://github.com/llvm/llvm-project/pull/66430

>From 47392e5996cccada1d638495a9376b0430142c8c Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+ktf at users.noreply.github.com>
Date: Thu, 14 Sep 2023 21:58:21 +0200
Subject: [PATCH 01/16] Introduce PagedVector class

The goal of the class is to be an (almost) drop in replacement for
SmallVector and std::vector when those are presized and filled later,
as it happens in SourceManager and ASTReader.

By splitting the actual vector in pages of the same size and allocating
the pages only when they are needed, using this containers reduces the
memory usage by a factor 4 for the cases relevant to the ALICE
experiment ROOT / cling usage.
---
 clang/include/clang/Basic/SourceManager.h     |   3 +-
 clang/include/clang/Serialization/ASTReader.h |   5 +-
 clang/lib/Basic/SourceManager.cpp             |  10 +-
 clang/lib/Serialization/ASTReader.cpp         |   5 +-
 llvm/docs/ProgrammersManual.rst               |  33 ++
 llvm/include/llvm/ADT/PagedVector.h           | 323 +++++++++++++++++
 llvm/unittests/ADT/CMakeLists.txt             |   1 +
 llvm/unittests/ADT/PagedVectorTest.cpp        | 342 ++++++++++++++++++
 8 files changed, 712 insertions(+), 10 deletions(-)
 create mode 100644 llvm/include/llvm/ADT/PagedVector.h
 create mode 100644 llvm/unittests/ADT/PagedVectorTest.cpp

diff --git a/clang/include/clang/Basic/SourceManager.h b/clang/include/clang/Basic/SourceManager.h
index 2f846502d6f3327..e37caa2252532f9 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -43,6 +43,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
@@ -699,7 +700,7 @@ class SourceManager : public RefCountedBase<SourceManager> {
   ///
   /// Negative FileIDs are indexes into this table. To get from ID to an index,
   /// use (-ID - 2).
-  SmallVector<SrcMgr::SLocEntry, 0> LoadedSLocEntryTable;
+  llvm::PagedVector<SrcMgr::SLocEntry> LoadedSLocEntryTable;
 
   /// The starting offset of the next local SLocEntry.
   ///
diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h
index dc1eb21c27801fe..65e19c6e44cf571 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -38,6 +38,7 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -487,7 +488,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the type with
   /// ID = (I + 1) << FastQual::Width has already been loaded
-  std::vector<QualType> TypesLoaded;
+  llvm::PagedVector<QualType> TypesLoaded;
 
   using GlobalTypeMapType =
       ContinuousRangeMap<serialization::TypeID, ModuleFile *, 4>;
@@ -501,7 +502,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the declaration with ID
   /// = I + 1 has already been loaded.
-  std::vector<Decl *> DeclsLoaded;
+  llvm::PagedVector<Decl *> DeclsLoaded;
 
   using GlobalDeclMapType =
       ContinuousRangeMap<serialization::DeclID, ModuleFile *, 4>;
diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp
index 0521ac7b30339ab..7fa8b8096ac4931 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -2344,11 +2344,11 @@ SourceManager::MemoryBufferSizes SourceManager::getMemoryBufferSizes() const {
 }
 
 size_t SourceManager::getDataStructureSizes() const {
-  size_t size = llvm::capacity_in_bytes(MemBufferInfos)
-    + llvm::capacity_in_bytes(LocalSLocEntryTable)
-    + llvm::capacity_in_bytes(LoadedSLocEntryTable)
-    + llvm::capacity_in_bytes(SLocEntryLoaded)
-    + llvm::capacity_in_bytes(FileInfos);
+  size_t size = llvm::capacity_in_bytes(MemBufferInfos) +
+                llvm::capacity_in_bytes(LocalSLocEntryTable) +
+                llvm::capacity_in_bytes(LoadedSLocEntryTable) +
+                llvm::capacity_in_bytes(SLocEntryLoaded) +
+                llvm::capacity_in_bytes(FileInfos);
 
   if (OverriddenFilesInfo)
     size += llvm::capacity_in_bytes(OverriddenFilesInfo->OverriddenFiles);
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 0952244d037a77c..badd54987af18dd 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -7944,9 +7944,10 @@ void ASTReader::PrintStats() {
   std::fprintf(stderr, "*** AST File Statistics:\n");
 
   unsigned NumTypesLoaded =
-      TypesLoaded.size() - llvm::count(TypesLoaded, QualType());
+      TypesLoaded.size() - llvm::count(TypesLoaded.materialised(), QualType());
   unsigned NumDeclsLoaded =
-      DeclsLoaded.size() - llvm::count(DeclsLoaded, (Decl *)nullptr);
+      DeclsLoaded.size() -
+      llvm::count(DeclsLoaded.materialised(), (Decl *)nullptr);
   unsigned NumIdentifiersLoaded =
       IdentifiersLoaded.size() -
       llvm::count(IdentifiersLoaded, (IdentifierInfo *)nullptr);
diff --git a/llvm/docs/ProgrammersManual.rst b/llvm/docs/ProgrammersManual.rst
index 43dd985d9779ed2..c8ebeb136a7bf68 100644
--- a/llvm/docs/ProgrammersManual.rst
+++ b/llvm/docs/ProgrammersManual.rst
@@ -1625,6 +1625,39 @@ SmallVector has grown a few other minor advantages over std::vector, causing
    and is no longer "private to the implementation". A name like
    ``SmallVectorHeader`` might be more appropriate.
 
+.. _dss_pagedvector:
+
+llvm/ADT/PagedVector.h
+^^^^^^^^^^^^^^^^^^^^^^
+
+``PagedVector<Type, PageSize>`` is a random access container that allocates
+``PageSize`` elements of type ``Type`` when the first element of a page is accessed
+via the ``operator[]``.  This is useful for cases where the number of
+elements is known in advance; their actual initialization is expensive; and they are
+sparsely used. This utility uses page-granular lazily initialization when the element is accessed. When the
+number of used pages is small significant memory savings can be achieved.
+
+The main advantage is that a ``PagedVector`` allows to delay the actual
+allocation of the page until it's needed, at the extra cost of one integer per
+page and one extra indirection when accessing elements with their positional
+index. 
+
+In order to minimise the memory footprint of this container, it's important to
+balance the PageSize so that it's not too small (otherwise the overhead of the
+integer per page might become too high) and not too big (otherwise the memory is
+wasted if the page is not fully used).
+
+Moreover, while retaining the order of the elements based on their insertion
+index, like a vector, iterating over the elements via ``begin()`` and ``end()``
+is not provided in the API, due to the fact accessing the elements in order
+would allocate all the iterated pages, defeating memory savings and the purpose
+of the ``PagedVector``.
+
+Finally a ``materialised_begin()`` and ``materialised_end`` iterators are
+provided to access the elements associated to the accessed pages, which could
+speed up operations that need to iterate over initialized elements in a
+non-ordered manner.
+
 .. _dss_vector:
 
 <vector>
diff --git a/llvm/include/llvm/ADT/PagedVector.h b/llvm/include/llvm/ADT/PagedVector.h
new file mode 100644
index 000000000000000..953c21ac5f347b6
--- /dev/null
+++ b/llvm/include/llvm/ADT/PagedVector.h
@@ -0,0 +1,323 @@
+//===- llvm/ADT/PagedVector.h - 'Lazyly allocated' vectors --------*- C++
+//-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PagedVector class.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_ADT_PAGEDVECTOR_H
+#define LLVM_ADT_PAGEDVECTOR_H
+
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Support/Allocator.h"
+#include <cassert>
+#include <vector>
+
+namespace llvm {
+/// A vector that allocates memory in pages.
+///
+/// Order is kept, but memory is allocated only when one element of the page is
+/// accessed. This introduces a level of indirection, but it is useful when you
+/// have a sparsely initialised vector where the full size is allocated upfront.
+///
+/// As a side effect the elements are initialised later than in a normal vector.
+/// On the first access to one of the elements of a given page all, the elements
+/// of the page are initialised. This also means that the elements of the page
+/// are initialised beyond the size of the vector.
+///
+/// Similarly on destruction the elements are destroyed only when the page is
+/// not needed anymore, delaying invoking the destructor of the elements.
+///
+/// Notice that this does not have iterators, because if you have iterators it
+/// probably means you are going to touch all the memory in any case, so better
+/// use a std::vector in the first place.
+template <typename T, size_t PageSize = 1024 / sizeof(T)> class PagedVector {
+  static_assert(PageSize > 1, "PageSize must be greater than 0. Most likely "
+                              "you want it to be greater than 16.");
+  /// The actual number of element in the vector which can be accessed.
+  size_t Size = 0;
+
+  /// The position of the initial element of the page in the Data vector.
+  /// Pages are allocated contiguously in the Data vector.
+  mutable SmallVector<T *, 0> PageToDataPtrs;
+  /// Actual page data. All the page elements are added to this vector on the
+  /// first access of any of the elements of the page. Elements default
+  /// constructed and elements of the page are stored contiguously. The order of
+  /// the elements however depends on the order of access of the pages.
+  PointerIntPair<BumpPtrAllocator *, 1, bool> Allocator;
+
+  constexpr static T *InvalidPage = nullptr;
+
+public:
+  using value_type = T;
+
+  /// Default constructor. We build our own allocator and mark it as such with
+  /// `true` in the second pair element.
+  PagedVector() : Allocator(new BumpPtrAllocator, true) {}
+  PagedVector(BumpPtrAllocator *A) : Allocator(A, false) {
+    assert(A != nullptr && "Allocator cannot be null");
+  }
+
+  ~PagedVector() {
+    clear();
+    // If we own the allocator, delete it.
+    if (Allocator.getInt())
+      delete Allocator.getPointer();
+  }
+
+  /// Look up an element at position `Index`.
+  /// If the associated page is not filled, it will be filled with default
+  /// constructed elements. If the associated page is filled, return the
+  /// element.
+  T &operator[](size_t Index) const {
+    assert(Index < Size);
+    assert(Index / PageSize < PageToDataPtrs.size());
+    T *&PagePtr = PageToDataPtrs[Index / PageSize];
+    // If the page was not yet allocated, allocate it.
+    if (PagePtr == InvalidPage) {
+      T *NewPagePtr = Allocator.getPointer()->template Allocate<T>(PageSize);
+      // We need to invoke the default constructor on all the elements of the
+      // page.
+      for (size_t I = 0; I < PageSize; ++I)
+        new (NewPagePtr + I) T();
+
+      PagePtr = NewPagePtr;
+    }
+    // Dereference the element in the page.
+    return *((Index % PageSize) + PagePtr);
+  }
+
+  /// Return the capacity of the vector. I.e. the maximum size it can be
+  /// expanded to with the resize method without allocating more pages.
+  [[nodiscard]] size_t capacity() const {
+    return PageToDataPtrs.size() * PageSize;
+  }
+
+  /// Return the size of the vector. I.e. the maximum index that can be
+  /// accessed, i.e. the maximum value which was used as argument of the
+  /// resize method.
+  [[nodiscard]] size_t size() const { return Size; }
+
+  /// Resize the vector. Notice that the constructor of the elements will not
+  /// be invoked until an element of a given page is accessed, at which point
+  /// all the elements of the page will be constructed.
+  ///
+  /// If the new size is smaller than the current size, the elements of the
+  /// pages that are not needed anymore will be destroyed, however, elements of
+  /// the last page will not be destroyed.
+  ///
+  /// For these reason the usage of this vector is discouraged if you rely
+  /// on the construction / destructor of the elements to be invoked.
+  void resize(size_t NewSize) {
+    if (NewSize == 0) {
+      clear();
+      return;
+    }
+    // Handle shrink case: destroy the elements in the pages that are not
+    // needed anymore and deallocate the pages.
+    //
+    // On the other hand, we do not destroy the extra elements in the last page,
+    // because we might need them later and the logic is simpler if we do not
+    // destroy them. This means that elements are only destroyed only when the
+    // page they belong to is destroyed. This is similar to what happens on
+    // access of the elements of a page, where all the elements of the page are
+    // constructed not only the one effectively neeeded.
+    if (NewSize < Size) {
+      size_t NewLastPage = (NewSize - 1) / PageSize;
+      for (size_t I = NewLastPage + 1, E = PageToDataPtrs.size(); I < E; ++I) {
+        T *PagePtr = PageToDataPtrs[I];
+        if (PagePtr == InvalidPage)
+          continue;
+        T *Page = PagePtr;
+        // We need to invoke the destructor on all the elements of the page.
+        for (size_t J = 0; J < PageSize; ++J)
+          Page[J].~T();
+        Allocator.getPointer()->Deallocate(Page);
+        // We mark the page invalid, to avoid double deletion.
+        PageToDataPtrs[I] = InvalidPage;
+      }
+      PageToDataPtrs.resize(NewLastPage + 1);
+    }
+    Size = NewSize;
+    // If the capacity is enough, just update the size and continue
+    // with the currently allocated pages. Notice that we do not
+    // need to default construct any new element, because that was already done
+    // when the page was allocated.
+    if (Size <= capacity())
+      return;
+    // The number of pages to allocate. The Remainder is calculated
+    // for the case in which the NewSize is not a multiple of PageSize.
+    // In that case we need one more page.
+    size_t Pages = Size / PageSize;
+    size_t Remainder = Size % PageSize;
+    if (Remainder != 0)
+      Pages += 1;
+    assert(Pages > PageToDataPtrs.size());
+    // We use InvalidPage to indicate that a page has not been allocated yet.
+    // This cannot be 0, because 0 is a valid page id.
+    // We use InvalidPage instead of a separate bool to avoid wasting space.
+    PageToDataPtrs.resize(Pages, InvalidPage);
+  }
+
+  [[nodiscard]] bool empty() const { return Size == 0; }
+
+  /// Clear the vector, i.e. clear the allocated pages, the whole page
+  /// lookup index and reset the size.
+  void clear() {
+    Size = 0;
+    for (T *Page : PageToDataPtrs) {
+      if (Page == InvalidPage)
+        continue;
+      for (size_t I = 0; I < PageSize; ++I)
+        Page[I].~T();
+      // If we do not own the allocator, deallocate the pages one by one.
+      if (!Allocator.getInt())
+        Allocator.getPointer()->Deallocate(Page);
+    }
+    // If we own the allocator, simply reset it.
+    if (Allocator.getInt() == true)
+      Allocator.getPointer()->Reset();
+    PageToDataPtrs.clear();
+  }
+
+  /// Iterator on all the elements of the vector
+  /// which have actually being constructed.
+  class MaterialisedIterator {
+    const PagedVector *PV;
+    size_t ElementIdx;
+
+  public:
+    using iterator_category = std::forward_iterator_tag;
+    using value_type = T;
+    using difference_type = std::ptrdiff_t;
+    using pointer = T *;
+    using reference = T &;
+
+    MaterialisedIterator(PagedVector const *PV, size_t ElementIdx)
+        : PV(PV), ElementIdx(ElementIdx) {}
+
+    /// Pre-increment operator.
+    ///
+    /// When incrementing the iterator, we skip the elements which have not
+    /// been materialised yet.
+    MaterialisedIterator &operator++() {
+      ++ElementIdx;
+      if (ElementIdx % PageSize == 0) {
+        while (ElementIdx < PV->Size &&
+               PV->PageToDataPtrs[ElementIdx / PageSize] == InvalidPage)
+          ElementIdx += PageSize;
+        if (ElementIdx > PV->Size)
+          ElementIdx = PV->Size;
+      }
+
+      return *this;
+    }
+
+    MaterialisedIterator operator++(int) {
+      MaterialisedIterator Copy = *this;
+      ++*this;
+      return Copy;
+    }
+
+    /// Dereference operator.
+    ///
+    /// Notice this can materialise elements if needed so there might be
+    /// a page allocation and additional construction of the elements of
+    /// such page.
+    T const &operator*() const {
+      assert(ElementIdx < PV->Size);
+      assert(PV->PageToDataPtrs[ElementIdx / PageSize] != InvalidPage);
+      T *PagePtr = PV->PageToDataPtrs[ElementIdx / PageSize];
+      return *((ElementIdx % PageSize) + PagePtr);
+    }
+
+    friend bool operator==(MaterialisedIterator const &LHS,
+                           MaterialisedIterator const &RHS);
+    friend bool operator!=(MaterialisedIterator const &LHS,
+                           MaterialisedIterator const &RHS);
+
+    [[nodiscard]] size_t getIndex() const { return ElementIdx; }
+  };
+
+  /// Equality operator.
+  friend bool operator==(MaterialisedIterator const &LHS,
+                         MaterialisedIterator const &RHS) {
+    assert(LHS.PV == RHS.PV);
+    auto *PV = LHS.PV;
+
+    // Any iterator for an empty vector is equal to any other iterator.
+    if (LHS.PV->empty())
+      return true;
+    // Get the pages of the two iterators. If between the two pages there
+    // are no valid pages, we can condider the iterators equal.
+    size_t PageMin = std::min(LHS.ElementIdx, RHS.ElementIdx) / PageSize;
+    size_t PageMax = std::max(LHS.ElementIdx, RHS.ElementIdx) / PageSize;
+    // If the two pages are past the end, the iterators are equal.
+    if (PageMin >= PV->PageToDataPtrs.size())
+      return true;
+    // If only the last page is past the end, the iterators are equal if
+    // all the pages up to the end are invalid.
+    if (PageMax >= PV->PageToDataPtrs.size()) {
+      for (size_t PageIdx = PageMin; PageIdx < PV->PageToDataPtrs.size();
+           ++PageIdx)
+        if (LHS.PV->PageToDataPtrs[PageIdx] != InvalidPage)
+          return false;
+      return true;
+    }
+
+    T *Page1 = PV->PageToDataPtrs[PageMin];
+    T *Page2 = PV->PageToDataPtrs[PageMax];
+    if (Page1 == InvalidPage && Page2 == InvalidPage)
+      return true;
+
+    // If the two pages are the same, the iterators are equal if they point
+    // to the same element.
+    if (PageMin == PageMax)
+      return LHS.ElementIdx == RHS.ElementIdx;
+
+    // If the two pages are different, the iterators are equal if all the
+    // pages between them are invalid.
+    return std::all_of(PV->PageToDataPtrs.begin() + PageMin,
+                       PV->PageToDataPtrs.begin() + PageMax,
+                       [](T *Page) { return Page == InvalidPage; });
+  }
+
+  friend bool operator!=(MaterialisedIterator const &LHS,
+                         MaterialisedIterator const &RHS) {
+    return !(LHS == RHS);
+  }
+
+  friend class MaterialisedIterator;
+
+  /// Iterators over the materialised elements of the vector.
+  ///
+  /// This includes all the elements belonging to allocated pages,
+  /// even if they have not been accessed yet. It's enough to access
+  /// one element of a page to materialise all the elements of the page.
+  MaterialisedIterator materialised_begin() const {
+    // Look for the first valid page
+    for (size_t ElementIdx = 0; ElementIdx < Size; ElementIdx += PageSize)
+      if (PageToDataPtrs[ElementIdx / PageSize] != InvalidPage)
+        return MaterialisedIterator(this, ElementIdx);
+
+    return MaterialisedIterator(this, Size);
+  }
+
+  MaterialisedIterator materialised_end() const {
+    return MaterialisedIterator(this, Size);
+  }
+
+  [[nodiscard]] llvm::iterator_range<MaterialisedIterator>
+  materialised() const {
+    return {materialised_begin(), materialised_end()};
+  }
+};
+} // namespace llvm
+#endif // LLVM_ADT_PAGEDVECTOR_H
diff --git a/llvm/unittests/ADT/CMakeLists.txt b/llvm/unittests/ADT/CMakeLists.txt
index c5190255ba7738e..9aa7120f30696da 100644
--- a/llvm/unittests/ADT/CMakeLists.txt
+++ b/llvm/unittests/ADT/CMakeLists.txt
@@ -51,6 +51,7 @@ add_llvm_unittest(ADTTests
   MapVectorTest.cpp
   MoveOnly.cpp
   PackedVectorTest.cpp
+  PagedVectorTest.cpp
   PointerEmbeddedIntTest.cpp
   PointerIntPairTest.cpp
   PointerSumTypeTest.cpp
diff --git a/llvm/unittests/ADT/PagedVectorTest.cpp b/llvm/unittests/ADT/PagedVectorTest.cpp
new file mode 100644
index 000000000000000..50c1153cdc22169
--- /dev/null
+++ b/llvm/unittests/ADT/PagedVectorTest.cpp
@@ -0,0 +1,342 @@
+//===- llvm/unittest/ADT/PagedVectorTest.cpp ------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// PagedVector unit tests.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/PagedVector.h"
+#include "gtest/gtest.h"
+#include <iterator>
+
+namespace llvm {
+TEST(PagedVectorTest, EmptyTest) {
+  PagedVector<int, 10> V;
+  EXPECT_EQ(V.empty(), true);
+  EXPECT_EQ(V.size(), 0ULL);
+  EXPECT_EQ(V.capacity(), 0ULL);
+  EXPECT_EQ(V.materialised_begin().getIndex(), 0ULL);
+  EXPECT_EQ(V.materialised_end().getIndex(), 0ULL);
+  EXPECT_EQ(std::distance(V.materialised_begin(), V.materialised_end()), 0LL);
+
+  EXPECT_DEATH(V[0], "Index < Size");
+  EXPECT_DEATH(PagedVector<int>(nullptr), "Allocator cannot be null");
+}
+
+TEST(PagedVectorTest, ExpandTest) {
+  PagedVector<int, 10> V;
+  V.resize(2);
+  EXPECT_EQ(V.empty(), false);
+  EXPECT_EQ(V.size(), 2ULL);
+  EXPECT_EQ(V.capacity(), 10ULL);
+  EXPECT_EQ(V.materialised_begin().getIndex(), 2ULL);
+  EXPECT_EQ(V.materialised_end().getIndex(), 2ULL);
+  EXPECT_EQ(std::distance(V.materialised_begin(), V.materialised_end()), 0LL);
+}
+
+TEST(PagedVectorTest, FullPageFillingTest) {
+  PagedVector<int, 10> V;
+  V.resize(10);
+  EXPECT_EQ(V.empty(), false);
+  EXPECT_EQ(V.size(), 10ULL);
+  EXPECT_EQ(V.capacity(), 10ULL);
+  for (int I = 0; I < 10; ++I) {
+    V[I] = I;
+  }
+  EXPECT_EQ(V.empty(), false);
+  EXPECT_EQ(V.size(), 10ULL);
+  EXPECT_EQ(V.capacity(), 10ULL);
+  EXPECT_EQ(V.materialised_begin().getIndex(), 0ULL);
+  EXPECT_EQ(V.materialised_end().getIndex(), 10ULL);
+  EXPECT_EQ(std::distance(V.materialised_begin(), V.materialised_end()), 10LL);
+  for (int I = 0; I < 10; ++I) {
+    EXPECT_EQ(V[I], I);
+  }
+}
+
+TEST(PagedVectorTest, HalfPageFillingTest) {
+  PagedVector<int, 10> V;
+  V.resize(5);
+  EXPECT_EQ(V.empty(), false);
+  EXPECT_EQ(V.size(), 5ULL);
+  EXPECT_EQ(V.capacity(), 10ULL);
+  for (int I = 0; I < 5; ++I) {
+    V[I] = I;
+  }
+  EXPECT_EQ(std::distance(V.materialised_begin(), V.materialised_end()), 5LL);
+  for (int I = 0; I < 5; ++I) {
+    EXPECT_EQ(V[I], I);
+  }
+  for (int I = 5; I < 10; ++I) {
+    EXPECT_DEATH(V[I], "Index < Size");
+  }
+}
+
+TEST(PagedVectorTest, FillFullMultiPageTest) {
+  PagedVector<int, 10> V;
+  V.resize(20);
+  EXPECT_EQ(V.empty(), false);
+  EXPECT_EQ(V.size(), 20ULL);
+  EXPECT_EQ(V.capacity(), 20ULL);
+  for (int I = 0; I < 20; ++I) {
+    V[I] = I;
+  }
+  EXPECT_EQ(std::distance(V.materialised_begin(), V.materialised_end()), 20LL);
+  for (auto MI = V.materialised_begin(), ME = V.materialised_end(); MI != ME;
+       ++MI) {
+    EXPECT_EQ(*MI, std::distance(V.materialised_begin(), MI));
+  }
+}
+
+TEST(PagedVectorTest, FillHalfMultiPageTest) {
+  PagedVector<int, 10> V;
+  V.resize(20);
+  EXPECT_EQ(V.empty(), false);
+  EXPECT_EQ(V.size(), 20ULL);
+  EXPECT_EQ(V.capacity(), 20ULL);
+  for (int I = 0; I < 5; ++I) {
+    V[I] = I;
+  }
+  for (int I = 10; I < 15; ++I) {
+    V[I] = I;
+  }
+  EXPECT_EQ(std::distance(V.materialised_begin(), V.materialised_end()), 20LL);
+  for (int I = 0; I < 5; ++I) {
+    EXPECT_EQ(V[I], I);
+  }
+  for (int I = 10; I < 15; ++I) {
+    EXPECT_EQ(V[I], I);
+  }
+}
+
+TEST(PagedVectorTest, FillLastMultiPageTest) {
+  PagedVector<int, 10> V;
+  V.resize(20);
+  EXPECT_EQ(V.empty(), false);
+  EXPECT_EQ(V.size(), 20ULL);
+  EXPECT_EQ(V.capacity(), 20ULL);
+  for (int I = 10; I < 15; ++I) {
+    V[I] = I;
+  }
+  for (int I = 10; I < 15; ++I) {
+    EXPECT_EQ(V[I], I);
+  }
+
+  // Since we fill the last page only, the materialised vector
+  // should contain only the last page.
+  int J = 10;
+  for (auto MI = V.materialised_begin(), ME = V.materialised_end(); MI != ME;
+       ++MI) {
+    if (J < 15) {
+      EXPECT_EQ(*MI, J);
+    } else {
+      EXPECT_EQ(*MI, 0);
+    }
+    ++J;
+  }
+  EXPECT_EQ(std::distance(V.materialised_begin(), V.materialised_end()), 10LL);
+}
+
+// Filling the first element of all the pages
+// will allocate all of them
+TEST(PagedVectorTest, FillSparseMultiPageTest) {
+  PagedVector<int, 10> V;
+  V.resize(100);
+  EXPECT_EQ(V.empty(), false);
+  EXPECT_EQ(V.size(), 100ULL);
+  EXPECT_EQ(V.capacity(), 100ULL);
+  for (int I = 0; I < 10; ++I) {
+    V[I * 10] = I;
+  }
+  EXPECT_EQ(std::distance(V.materialised_begin(), V.materialised_end()), 100LL);
+  for (int I = 0; I < 100; ++I) {
+    if (I % 10 == 0) {
+      EXPECT_EQ(V[I], I / 10);
+    } else {
+      EXPECT_EQ(V[I], 0);
+    }
+  }
+}
+
+struct TestHelper {
+  int A = -1;
+};
+
+// Use this to count how many times the constructor / destructor are called
+struct TestHelper2 {
+  int A = -1;
+  static int constructed;
+  static int destroyed;
+
+  TestHelper2() { constructed++; }
+  ~TestHelper2() { destroyed++; }
+};
+
+int TestHelper2::constructed = 0;
+int TestHelper2::destroyed = 0;
+
+TEST(PagedVectorTest, FillNonTrivialConstructor) {
+  PagedVector<TestHelper, 10> V;
+  V.resize(10);
+  EXPECT_EQ(V.empty(), false);
+  EXPECT_EQ(V.size(), 10ULL);
+  EXPECT_EQ(V.capacity(), 10ULL);
+  EXPECT_EQ(std::distance(V.materialised_begin(), V.materialised_end()), 0LL);
+  for (int I = 0; I < 10; ++I) {
+    EXPECT_EQ(V[I].A, -1);
+  }
+  EXPECT_EQ(std::distance(V.materialised_begin(), V.materialised_end()), 10LL);
+}
+
+// Elements are constructed, destructed in pages, so we expect
+// the number of constructed / destructed elements to be a multiple of the
+// page size and the constructor is invoked when the page is actually accessed
+// the first time.
+TEST(PagedVectorTest, FillNonTrivialConstructorDestructor) {
+  PagedVector<TestHelper2, 10> V;
+  V.resize(19);
+  EXPECT_EQ(TestHelper2::constructed, 0);
+  EXPECT_EQ(V.empty(), false);
+  EXPECT_EQ(V.size(), 19ULL);
+  EXPECT_EQ(V.capacity(), 20ULL);
+  EXPECT_EQ(std::distance(V.materialised_begin(), V.materialised_end()), 0LL);
+  EXPECT_EQ(V[0].A, -1);
+  EXPECT_EQ(TestHelper2::constructed, 10);
+
+  for (int I = 0; I < 10; ++I) {
+    EXPECT_EQ(V[I].A, -1);
+    EXPECT_EQ(TestHelper2::constructed, 10);
+  }
+  for (int I = 10; I < 11; ++I) {
+    EXPECT_EQ(V[I].A, -1);
+    EXPECT_EQ(TestHelper2::constructed, 20);
+  }
+  for (int I = 0; I < 19; ++I) {
+    EXPECT_EQ(V[I].A, -1);
+    EXPECT_EQ(TestHelper2::constructed, 20);
+  }
+  EXPECT_EQ(std::distance(V.materialised_begin(), V.materialised_end()), 19LL);
+  // We initialise the whole page, not just the materialised part
+  // EXPECT_EQ(TestHelper2::constructed, 20);
+  V.resize(18);
+  EXPECT_EQ(TestHelper2::destroyed, 0);
+  V.resize(1);
+  EXPECT_EQ(TestHelper2::destroyed, 10);
+  V.resize(0);
+  EXPECT_EQ(TestHelper2::destroyed, 20);
+
+  // Add a few empty pages so that we can test that the destructor
+  // is called only for the materialised pages
+  V.resize(50);
+  V[49].A = 0;
+  EXPECT_EQ(TestHelper2::constructed, 30);
+  EXPECT_EQ(TestHelper2::destroyed, 20);
+  EXPECT_EQ(V[49].A, 0);
+  V.resize(0);
+  EXPECT_EQ(TestHelper2::destroyed, 30);
+}
+
+TEST(PagedVectorTest, ShrinkTest) {
+  PagedVector<int, 10> V;
+  V.resize(20);
+  EXPECT_EQ(V.empty(), false);
+  EXPECT_EQ(V.size(), 20ULL);
+  EXPECT_EQ(V.capacity(), 20ULL);
+  for (int I = 0; I < 20; ++I) {
+    V[I] = I;
+  }
+  EXPECT_EQ(std::distance(V.materialised_begin(), V.materialised_end()), 20LL);
+  V.resize(9);
+  EXPECT_EQ(V.empty(), false);
+  EXPECT_EQ(V.size(), 9ULL);
+  EXPECT_EQ(V.capacity(), 10ULL);
+  for (int I = 0; I < 9; ++I) {
+    EXPECT_EQ(V[I], I);
+  }
+  EXPECT_EQ(std::distance(V.materialised_begin(), V.materialised_end()), 9LL);
+  V.resize(0);
+  EXPECT_EQ(V.empty(), true);
+  EXPECT_EQ(V.size(), 0ULL);
+  EXPECT_EQ(V.capacity(), 0ULL);
+  EXPECT_EQ(std::distance(V.materialised_begin(), V.materialised_end()), 0LL);
+  EXPECT_DEATH(V[0], "Index < Size");
+}
+
+TEST(PagedVectorTest, FunctionalityTest) {
+  PagedVector<int, 10> V;
+  EXPECT_EQ(V.empty(), true);
+
+  // Next ten numbers are 10..19
+  V.resize(2);
+  EXPECT_EQ(V.empty(), false);
+  V.resize(10);
+  V.resize(20);
+  V.resize(30);
+  EXPECT_EQ(std::distance(V.materialised_begin(), V.materialised_end()), 0LL);
+
+  EXPECT_EQ(V.size(), 30ULL);
+  for (int I = 0; I < 10; ++I) {
+    V[I] = I;
+  }
+  for (int I = 0; I < 10; ++I) {
+    EXPECT_EQ(V[I], I);
+  }
+  EXPECT_EQ(std::distance(V.materialised_begin(), V.materialised_end()), 10LL);
+  for (int I = 20; I < 30; ++I) {
+    V[I] = I;
+  }
+  for (int I = 20; I < 30; ++I) {
+    EXPECT_EQ(V[I], I);
+  }
+  EXPECT_EQ(std::distance(V.materialised_begin(), V.materialised_end()), 20LL);
+
+  for (int I = 10; I < 20; ++I) {
+    V[I] = I;
+  }
+  for (int I = 10; I < 20; ++I) {
+    EXPECT_EQ(V[I], I);
+  }
+  EXPECT_EQ(std::distance(V.materialised_begin(), V.materialised_end()), 30LL);
+  V.resize(35);
+  EXPECT_EQ(std::distance(V.materialised_begin(), V.materialised_end()), 30LL);
+  for (int I = 30; I < 35; ++I) {
+    V[I] = I;
+  }
+  EXPECT_EQ(std::distance(V.materialised_begin(), V.materialised_end()), 35LL);
+  EXPECT_EQ(V.size(), 35ULL);
+  EXPECT_EQ(V.capacity(), 40ULL);
+  V.resize(37);
+  for (int I = 30; I < 37; ++I) {
+    V[I] = I;
+  }
+  EXPECT_EQ(V.size(), 37ULL);
+  EXPECT_EQ(V.capacity(), 40ULL);
+  for (int I = 0; I < 37; ++I) {
+    EXPECT_EQ(V[I], I);
+  }
+
+  V.resize(41);
+  V[40] = 40;
+  EXPECT_EQ(V.size(), 41ULL);
+  EXPECT_EQ(V.capacity(), 50ULL);
+  for (int I = 0; I < 36; ++I) {
+    EXPECT_EQ(V[I], I);
+  }
+  for (int I = 37; I < 40; ++I) {
+    EXPECT_EQ(V[I], 0);
+  }
+  V.resize(50);
+  EXPECT_EQ(V.capacity(), 50ULL);
+  EXPECT_EQ(V.size(), 50ULL);
+  EXPECT_EQ(V[40], 40);
+  V.resize(50ULL);
+  V.clear();
+  EXPECT_EQ(V.size(), 0ULL);
+  EXPECT_EQ(V.capacity(), 0ULL);
+}
+} // namespace llvm

>From e55458b7698e96e70ec7327b7512eaeebfa29a74 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+ktf at users.noreply.github.com>
Date: Tue, 26 Sep 2023 21:58:33 +0200
Subject: [PATCH 02/16] Use std::destroy_n

---
 llvm/include/llvm/ADT/PagedVector.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/llvm/include/llvm/ADT/PagedVector.h b/llvm/include/llvm/ADT/PagedVector.h
index 953c21ac5f347b6..8fdae6a9de1afe8 100644
--- a/llvm/include/llvm/ADT/PagedVector.h
+++ b/llvm/include/llvm/ADT/PagedVector.h
@@ -137,8 +137,7 @@ template <typename T, size_t PageSize = 1024 / sizeof(T)> class PagedVector {
           continue;
         T *Page = PagePtr;
         // We need to invoke the destructor on all the elements of the page.
-        for (size_t J = 0; J < PageSize; ++J)
-          Page[J].~T();
+        std::destroy_n(Page, PageSize);
         Allocator.getPointer()->Deallocate(Page);
         // We mark the page invalid, to avoid double deletion.
         PageToDataPtrs[I] = InvalidPage;
@@ -175,8 +174,7 @@ template <typename T, size_t PageSize = 1024 / sizeof(T)> class PagedVector {
     for (T *Page : PageToDataPtrs) {
       if (Page == InvalidPage)
         continue;
-      for (size_t I = 0; I < PageSize; ++I)
-        Page[I].~T();
+      std::destroy_n(Page, PageSize);
       // If we do not own the allocator, deallocate the pages one by one.
       if (!Allocator.getInt())
         Allocator.getPointer()->Deallocate(Page);

>From 5b1e6353df7e0689ea0f15aedda8b54a3c8ad0b4 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+ktf at users.noreply.github.com>
Date: Tue, 26 Sep 2023 22:02:47 +0200
Subject: [PATCH 03/16] Reflow

---
 llvm/docs/ProgrammersManual.rst | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/llvm/docs/ProgrammersManual.rst b/llvm/docs/ProgrammersManual.rst
index c8ebeb136a7bf68..b400f1bb575406f 100644
--- a/llvm/docs/ProgrammersManual.rst
+++ b/llvm/docs/ProgrammersManual.rst
@@ -1631,11 +1631,12 @@ llvm/ADT/PagedVector.h
 ^^^^^^^^^^^^^^^^^^^^^^
 
 ``PagedVector<Type, PageSize>`` is a random access container that allocates
-``PageSize`` elements of type ``Type`` when the first element of a page is accessed
-via the ``operator[]``.  This is useful for cases where the number of
-elements is known in advance; their actual initialization is expensive; and they are
-sparsely used. This utility uses page-granular lazily initialization when the element is accessed. When the
-number of used pages is small significant memory savings can be achieved.
+``PageSize`` elements of type ``Type`` when the first element of a page is
+accessed via the ``operator[]``.  This is useful for cases where the number of
+elements is known in advance; their actual initialization is expensive; and
+they are sparsely used. This utility uses page-granular lazily initialization
+when the element is accessed. When the number of used pages is small
+significant memory savings can be achieved.
 
 The main advantage is that a ``PagedVector`` allows to delay the actual
 allocation of the page until it's needed, at the extra cost of one integer per
@@ -1644,8 +1645,8 @@ index.
 
 In order to minimise the memory footprint of this container, it's important to
 balance the PageSize so that it's not too small (otherwise the overhead of the
-integer per page might become too high) and not too big (otherwise the memory is
-wasted if the page is not fully used).
+integer per page might become too high) and not too big (otherwise the memory
+is wasted if the page is not fully used).
 
 Moreover, while retaining the order of the elements based on their insertion
 index, like a vector, iterating over the elements via ``begin()`` and ``end()``

>From af432ee73ff4004f132cb5183c49849665300cee Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+ktf at users.noreply.github.com>
Date: Tue, 26 Sep 2023 22:07:09 +0200
Subject: [PATCH 04/16] Array syntax

---
 llvm/include/llvm/ADT/PagedVector.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/include/llvm/ADT/PagedVector.h b/llvm/include/llvm/ADT/PagedVector.h
index 8fdae6a9de1afe8..337ff85fb4fcdad 100644
--- a/llvm/include/llvm/ADT/PagedVector.h
+++ b/llvm/include/llvm/ADT/PagedVector.h
@@ -91,7 +91,7 @@ template <typename T, size_t PageSize = 1024 / sizeof(T)> class PagedVector {
       PagePtr = NewPagePtr;
     }
     // Dereference the element in the page.
-    return *((Index % PageSize) + PagePtr);
+    return PagePtr[Index % PageSize];
   }
 
   /// Return the capacity of the vector. I.e. the maximum size it can be
@@ -233,7 +233,7 @@ template <typename T, size_t PageSize = 1024 / sizeof(T)> class PagedVector {
       assert(ElementIdx < PV->Size);
       assert(PV->PageToDataPtrs[ElementIdx / PageSize] != InvalidPage);
       T *PagePtr = PV->PageToDataPtrs[ElementIdx / PageSize];
-      return *((ElementIdx % PageSize) + PagePtr);
+      return PagePtr[ElementIdx % PageSize];
     }
 
     friend bool operator==(MaterialisedIterator const &LHS,

>From ba184178f98f1fae305cc0bdb434e45e72a7924c Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+ktf at users.noreply.github.com>
Date: Tue, 26 Sep 2023 22:11:20 +0200
Subject: [PATCH 05/16] Use std::uninitialized_value_construct_n

---
 llvm/include/llvm/ADT/PagedVector.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/include/llvm/ADT/PagedVector.h b/llvm/include/llvm/ADT/PagedVector.h
index 337ff85fb4fcdad..d48832ca3db3468 100644
--- a/llvm/include/llvm/ADT/PagedVector.h
+++ b/llvm/include/llvm/ADT/PagedVector.h
@@ -85,8 +85,7 @@ template <typename T, size_t PageSize = 1024 / sizeof(T)> class PagedVector {
       T *NewPagePtr = Allocator.getPointer()->template Allocate<T>(PageSize);
       // We need to invoke the default constructor on all the elements of the
       // page.
-      for (size_t I = 0; I < PageSize; ++I)
-        new (NewPagePtr + I) T();
+      std::uninitialized_value_construct_n(NewPagePtr, PageSize);
 
       PagePtr = NewPagePtr;
     }

>From 0021f1500cf00fde92f45c848ae537a02f83da54 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+ktf at users.noreply.github.com>
Date: Tue, 26 Sep 2023 22:13:03 +0200
Subject: [PATCH 06/16] Rename loop end variable

---
 llvm/include/llvm/ADT/PagedVector.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/include/llvm/ADT/PagedVector.h b/llvm/include/llvm/ADT/PagedVector.h
index d48832ca3db3468..b53e830b9e2a1dd 100644
--- a/llvm/include/llvm/ADT/PagedVector.h
+++ b/llvm/include/llvm/ADT/PagedVector.h
@@ -130,7 +130,7 @@ template <typename T, size_t PageSize = 1024 / sizeof(T)> class PagedVector {
     // constructed not only the one effectively neeeded.
     if (NewSize < Size) {
       size_t NewLastPage = (NewSize - 1) / PageSize;
-      for (size_t I = NewLastPage + 1, E = PageToDataPtrs.size(); I < E; ++I) {
+      for (size_t I = NewLastPage + 1, N = PageToDataPtrs.size(); I < N; ++I) {
         T *PagePtr = PageToDataPtrs[I];
         if (PagePtr == InvalidPage)
           continue;

>From 61a76c4e8b643462a14e0db2de44508886e697c9 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+ktf at users.noreply.github.com>
Date: Wed, 27 Sep 2023 08:29:23 +0200
Subject: [PATCH 07/16] Update llvm/include/llvm/ADT/PagedVector.h

Co-authored-by: Richard Smith <richard at metafoo.co.uk>
---
 llvm/include/llvm/ADT/PagedVector.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/include/llvm/ADT/PagedVector.h b/llvm/include/llvm/ADT/PagedVector.h
index b53e830b9e2a1dd..9550b0b0e2c8c55 100644
--- a/llvm/include/llvm/ADT/PagedVector.h
+++ b/llvm/include/llvm/ADT/PagedVector.h
@@ -41,7 +41,7 @@ namespace llvm {
 template <typename T, size_t PageSize = 1024 / sizeof(T)> class PagedVector {
   static_assert(PageSize > 1, "PageSize must be greater than 0. Most likely "
                               "you want it to be greater than 16.");
-  /// The actual number of element in the vector which can be accessed.
+  /// The actual number of elements in the vector which can be accessed.
   size_t Size = 0;
 
   /// The position of the initial element of the page in the Data vector.

>From 4857d75c0dfb9bd9377799d8db600a83575843da Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+ktf at users.noreply.github.com>
Date: Wed, 27 Sep 2023 08:30:10 +0200
Subject: [PATCH 08/16] Update llvm/include/llvm/ADT/PagedVector.h

Co-authored-by: Richard Smith <richard at metafoo.co.uk>
---
 llvm/include/llvm/ADT/PagedVector.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/include/llvm/ADT/PagedVector.h b/llvm/include/llvm/ADT/PagedVector.h
index 9550b0b0e2c8c55..a6deeb175654519 100644
--- a/llvm/include/llvm/ADT/PagedVector.h
+++ b/llvm/include/llvm/ADT/PagedVector.h
@@ -48,7 +48,7 @@ template <typename T, size_t PageSize = 1024 / sizeof(T)> class PagedVector {
   /// Pages are allocated contiguously in the Data vector.
   mutable SmallVector<T *, 0> PageToDataPtrs;
   /// Actual page data. All the page elements are added to this vector on the
-  /// first access of any of the elements of the page. Elements default
+  /// first access of any of the elements of the page. Elements are default
   /// constructed and elements of the page are stored contiguously. The order of
   /// the elements however depends on the order of access of the pages.
   PointerIntPair<BumpPtrAllocator *, 1, bool> Allocator;

>From 4796630ee0ce8fd9c857b71538953d1155c7af32 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+ktf at users.noreply.github.com>
Date: Wed, 27 Sep 2023 08:30:29 +0200
Subject: [PATCH 09/16] Update llvm/include/llvm/ADT/PagedVector.h

Co-authored-by: Richard Smith <richard at metafoo.co.uk>
---
 llvm/include/llvm/ADT/PagedVector.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/include/llvm/ADT/PagedVector.h b/llvm/include/llvm/ADT/PagedVector.h
index a6deeb175654519..df74b6070dfc946 100644
--- a/llvm/include/llvm/ADT/PagedVector.h
+++ b/llvm/include/llvm/ADT/PagedVector.h
@@ -28,7 +28,7 @@ namespace llvm {
 /// have a sparsely initialised vector where the full size is allocated upfront.
 ///
 /// As a side effect the elements are initialised later than in a normal vector.
-/// On the first access to one of the elements of a given page all, the elements
+/// On the first access to one of the elements of a given page, all the elements
 /// of the page are initialised. This also means that the elements of the page
 /// are initialised beyond the size of the vector.
 ///

>From 985444327c26d5caaec416f386c41f264c08b634 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+ktf at users.noreply.github.com>
Date: Wed, 27 Sep 2023 09:27:05 +0200
Subject: [PATCH 10/16] Assume iterators are always the same

---
 llvm/include/llvm/ADT/PagedVector.h | 48 +++++++----------------------
 1 file changed, 11 insertions(+), 37 deletions(-)

diff --git a/llvm/include/llvm/ADT/PagedVector.h b/llvm/include/llvm/ADT/PagedVector.h
index df74b6070dfc946..5a372f78f420239 100644
--- a/llvm/include/llvm/ADT/PagedVector.h
+++ b/llvm/include/llvm/ADT/PagedVector.h
@@ -247,43 +247,17 @@ template <typename T, size_t PageSize = 1024 / sizeof(T)> class PagedVector {
   friend bool operator==(MaterialisedIterator const &LHS,
                          MaterialisedIterator const &RHS) {
     assert(LHS.PV == RHS.PV);
-    auto *PV = LHS.PV;
-
-    // Any iterator for an empty vector is equal to any other iterator.
-    if (LHS.PV->empty())
-      return true;
-    // Get the pages of the two iterators. If between the two pages there
-    // are no valid pages, we can condider the iterators equal.
-    size_t PageMin = std::min(LHS.ElementIdx, RHS.ElementIdx) / PageSize;
-    size_t PageMax = std::max(LHS.ElementIdx, RHS.ElementIdx) / PageSize;
-    // If the two pages are past the end, the iterators are equal.
-    if (PageMin >= PV->PageToDataPtrs.size())
-      return true;
-    // If only the last page is past the end, the iterators are equal if
-    // all the pages up to the end are invalid.
-    if (PageMax >= PV->PageToDataPtrs.size()) {
-      for (size_t PageIdx = PageMin; PageIdx < PV->PageToDataPtrs.size();
-           ++PageIdx)
-        if (LHS.PV->PageToDataPtrs[PageIdx] != InvalidPage)
-          return false;
-      return true;
-    }
-
-    T *Page1 = PV->PageToDataPtrs[PageMin];
-    T *Page2 = PV->PageToDataPtrs[PageMax];
-    if (Page1 == InvalidPage && Page2 == InvalidPage)
-      return true;
-
-    // If the two pages are the same, the iterators are equal if they point
-    // to the same element.
-    if (PageMin == PageMax)
-      return LHS.ElementIdx == RHS.ElementIdx;
-
-    // If the two pages are different, the iterators are equal if all the
-    // pages between them are invalid.
-    return std::all_of(PV->PageToDataPtrs.begin() + PageMin,
-                       PV->PageToDataPtrs.begin() + PageMax,
-                       [](T *Page) { return Page == InvalidPage; });
+    // Make sure we are comparing either end iterators or iterators pointing
+    // to materialised elements.
+    // It should not be possible to build two iterators pointing to non
+    // materialised elements.
+    assert(LHS.ElementIdx >= LHS.PV->Size ||
+           (LHS.ElementIdx / PageSize < LHS.PV->PageToDataPtrs.size() &&
+            LHS.PV->PageToDataPtrs[LHS.ElementIdx / PageSize] != InvalidPage));
+    assert(RHS.ElementIdx >= RHS.PV->Size ||
+           (RHS.ElementIdx / PageSize < RHS.PV->PageToDataPtrs.size() &&
+            RHS.PV->PageToDataPtrs[RHS.ElementIdx / PageSize] != InvalidPage));
+    return LHS.ElementIdx == RHS.ElementIdx;
   }
 
   friend bool operator!=(MaterialisedIterator const &LHS,

>From 45b55bd2374750944f7dba42a8d78f67f05aab76 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+ktf at users.noreply.github.com>
Date: Wed, 27 Sep 2023 09:50:11 +0200
Subject: [PATCH 11/16] Explicitly remove copy / move iterators

---
 llvm/include/llvm/ADT/PagedVector.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/llvm/include/llvm/ADT/PagedVector.h b/llvm/include/llvm/ADT/PagedVector.h
index 5a372f78f420239..ae1690a5955a3de 100644
--- a/llvm/include/llvm/ADT/PagedVector.h
+++ b/llvm/include/llvm/ADT/PagedVector.h
@@ -72,6 +72,12 @@ template <typename T, size_t PageSize = 1024 / sizeof(T)> class PagedVector {
       delete Allocator.getPointer();
   }
 
+  // Forbid copy and move as we do not need them for the current use case.
+  PagedVector(const PagedVector &) = delete;
+  PagedVector(PagedVector &&) = delete;
+  PagedVector &operator=(const PagedVector &) = delete;
+  PagedVector &operator=(PagedVector &&) = delete;
+
   /// Look up an element at position `Index`.
   /// If the associated page is not filled, it will be filled with default
   /// constructed elements. If the associated page is filled, return the

>From 132a584d1c376754ff6e601b617ca7deee6cd24c Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+ktf at users.noreply.github.com>
Date: Wed, 27 Sep 2023 10:00:37 +0200
Subject: [PATCH 12/16] fix comment

---
 llvm/include/llvm/ADT/PagedVector.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/include/llvm/ADT/PagedVector.h b/llvm/include/llvm/ADT/PagedVector.h
index ae1690a5955a3de..e59dbc9319a9dbb 100644
--- a/llvm/include/llvm/ADT/PagedVector.h
+++ b/llvm/include/llvm/ADT/PagedVector.h
@@ -47,7 +47,7 @@ template <typename T, size_t PageSize = 1024 / sizeof(T)> class PagedVector {
   /// The position of the initial element of the page in the Data vector.
   /// Pages are allocated contiguously in the Data vector.
   mutable SmallVector<T *, 0> PageToDataPtrs;
-  /// Actual page data. All the page elements are added to this vector on the
+  /// Actual page data. All the page elements are allocated on the
   /// first access of any of the elements of the page. Elements are default
   /// constructed and elements of the page are stored contiguously. The order of
   /// the elements however depends on the order of access of the pages.

>From 41c1be1fe264e7610d895b0589a56fdc86fd6833 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+ktf at users.noreply.github.com>
Date: Wed, 27 Sep 2023 10:05:01 +0200
Subject: [PATCH 13/16] Fix comment

---
 llvm/include/llvm/ADT/PagedVector.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm/ADT/PagedVector.h b/llvm/include/llvm/ADT/PagedVector.h
index e59dbc9319a9dbb..0d098b797dfd3fa 100644
--- a/llvm/include/llvm/ADT/PagedVector.h
+++ b/llvm/include/llvm/ADT/PagedVector.h
@@ -35,9 +35,11 @@ namespace llvm {
 /// Similarly on destruction the elements are destroyed only when the page is
 /// not needed anymore, delaying invoking the destructor of the elements.
 ///
-/// Notice that this does not have iterators, because if you have iterators it
-/// probably means you are going to touch all the memory in any case, so better
-/// use a std::vector in the first place.
+/// Notice that this has iterators only on materialised elements. This
+/// is deliberately done under the assumption you would dereference the elements
+/// while iterating, therefore materialising them and losing the gains in terms
+/// of memory usage this container provides. If you have such a use case, you
+/// probably want to use a normal std::vector or a llvm::SmallVector.
 template <typename T, size_t PageSize = 1024 / sizeof(T)> class PagedVector {
   static_assert(PageSize > 1, "PageSize must be greater than 0. Most likely "
                               "you want it to be greater than 16.");

>From cd9209cd48771d190de8fd3e00b90741c8a007f9 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+ktf at users.noreply.github.com>
Date: Wed, 27 Sep 2023 11:13:54 +0200
Subject: [PATCH 14/16] Fix NewPageSize computation

---
 llvm/include/llvm/ADT/PagedVector.h | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/llvm/include/llvm/ADT/PagedVector.h b/llvm/include/llvm/ADT/PagedVector.h
index 0d098b797dfd3fa..e57a671aa3dc4e3 100644
--- a/llvm/include/llvm/ADT/PagedVector.h
+++ b/llvm/include/llvm/ADT/PagedVector.h
@@ -158,18 +158,12 @@ template <typename T, size_t PageSize = 1024 / sizeof(T)> class PagedVector {
     // when the page was allocated.
     if (Size <= capacity())
       return;
-    // The number of pages to allocate. The Remainder is calculated
-    // for the case in which the NewSize is not a multiple of PageSize.
-    // In that case we need one more page.
-    size_t Pages = Size / PageSize;
-    size_t Remainder = Size % PageSize;
-    if (Remainder != 0)
-      Pages += 1;
-    assert(Pages > PageToDataPtrs.size());
     // We use InvalidPage to indicate that a page has not been allocated yet.
     // This cannot be 0, because 0 is a valid page id.
     // We use InvalidPage instead of a separate bool to avoid wasting space.
-    PageToDataPtrs.resize(Pages, InvalidPage);
+    size_t NewLastPage = (NewSize - 1) / PageSize;
+    assert(NewLastPage + 1 > PageToDataPtrs.size());
+    PageToDataPtrs.resize(NewLastPage + 1, InvalidPage);
   }
 
   [[nodiscard]] bool empty() const { return Size == 0; }

>From a4076b7d723d494f8d52e796e8b049cb288b9aec Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+ktf at users.noreply.github.com>
Date: Wed, 27 Sep 2023 14:38:49 +0200
Subject: [PATCH 15/16] Update llvm/include/llvm/ADT/PagedVector.h

Co-authored-by: Richard Smith <richard at metafoo.co.uk>
---
 llvm/include/llvm/ADT/PagedVector.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/include/llvm/ADT/PagedVector.h b/llvm/include/llvm/ADT/PagedVector.h
index e57a671aa3dc4e3..acdd1b6250f52b7 100644
--- a/llvm/include/llvm/ADT/PagedVector.h
+++ b/llvm/include/llvm/ADT/PagedVector.h
@@ -82,8 +82,7 @@ template <typename T, size_t PageSize = 1024 / sizeof(T)> class PagedVector {
 
   /// Look up an element at position `Index`.
   /// If the associated page is not filled, it will be filled with default
-  /// constructed elements. If the associated page is filled, return the
-  /// element.
+  /// constructed elements.
   T &operator[](size_t Index) const {
     assert(Index < Size);
     assert(Index / PageSize < PageToDataPtrs.size());

>From 2c002b3f16da1c83cd32b2d0754f270214f07063 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+ktf at users.noreply.github.com>
Date: Wed, 27 Sep 2023 15:12:21 +0200
Subject: [PATCH 16/16] More comments on resize addressed

* NewLastPage calculated once.
* Use destroy_n only when neededed.
---
 llvm/include/llvm/ADT/PagedVector.h | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/llvm/include/llvm/ADT/PagedVector.h b/llvm/include/llvm/ADT/PagedVector.h
index acdd1b6250f52b7..c22ca154ba4bc35 100644
--- a/llvm/include/llvm/ADT/PagedVector.h
+++ b/llvm/include/llvm/ADT/PagedVector.h
@@ -135,16 +135,23 @@ template <typename T, size_t PageSize = 1024 / sizeof(T)> class PagedVector {
     // page they belong to is destroyed. This is similar to what happens on
     // access of the elements of a page, where all the elements of the page are
     // constructed not only the one effectively neeeded.
+    size_t NewLastPage = (NewSize - 1) / PageSize;
     if (NewSize < Size) {
-      size_t NewLastPage = (NewSize - 1) / PageSize;
+      // Destruct the elements in the pages that are not needed anymore.
+      // Notice that we need to do this only if the constructor of the elements
+      // is not trivial.
+      if constexpr (!std::is_trivially_destructible_v<T>) {
+        for (size_t I = NewLastPage + 1, N = PageToDataPtrs.size(); I < N; ++I) {
+          T *Page = PageToDataPtrs[I];
+          // We need to invoke the destructor on all the elements of the page.
+          if (Page != InvalidPage)
+            std::destroy_n(Page, PageSize);
+        }
+      }
       for (size_t I = NewLastPage + 1, N = PageToDataPtrs.size(); I < N; ++I) {
-        T *PagePtr = PageToDataPtrs[I];
-        if (PagePtr == InvalidPage)
-          continue;
-        T *Page = PagePtr;
-        // We need to invoke the destructor on all the elements of the page.
-        std::destroy_n(Page, PageSize);
-        Allocator.getPointer()->Deallocate(Page);
+        T *Page = PageToDataPtrs[I];
+        if (Page != InvalidPage)
+          Allocator.getPointer()->Deallocate(Page);
         // We mark the page invalid, to avoid double deletion.
         PageToDataPtrs[I] = InvalidPage;
       }
@@ -160,7 +167,6 @@ template <typename T, size_t PageSize = 1024 / sizeof(T)> class PagedVector {
     // We use InvalidPage to indicate that a page has not been allocated yet.
     // This cannot be 0, because 0 is a valid page id.
     // We use InvalidPage instead of a separate bool to avoid wasting space.
-    size_t NewLastPage = (NewSize - 1) / PageSize;
     assert(NewLastPage + 1 > PageToDataPtrs.size());
     PageToDataPtrs.resize(NewLastPage + 1, InvalidPage);
   }



More information about the cfe-commits mailing list