[clang] Introduce paged vector (PR #66430)

Giulio Eulisse via cfe-commits cfe-commits at lists.llvm.org
Tue Sep 19 23:35:17 PDT 2023


https://github.com/ktf updated https://github.com/llvm/llvm-project/pull/66430

>From 5d2553755add3e7d2adb0b2119d6a0c355df1551 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+ktf at users.noreply.github.com>
Date: Thu, 14 Sep 2023 21:58:21 +0200
Subject: [PATCH 01/14] Introduce PagedVector class

The goal of the class is to be an (almost) drop in replacement for
SmallVector and std::vector when those are presized and filled later,
as it happens in SourceManager and ASTReader.

By splitting the actual vector in pages of the same size and allocating
the pages only when they are needed, using this containers reduces the
memory usage by a factor 4 for the cases relevant to the ALICE
experiment ROOT / cling usage.
---
 clang/include/clang/Basic/SourceManager.h     |   3 +-
 clang/include/clang/Serialization/ASTReader.h |   5 +-
 clang/lib/Basic/SourceManager.cpp             |  10 +-
 clang/lib/Serialization/ASTReader.cpp         |   5 +-
 llvm/docs/ProgrammersManual.rst               |  32 ++
 llvm/include/llvm/ADT/PagedVector.h           | 322 ++++++++++++++++++
 llvm/unittests/ADT/CMakeLists.txt             |   1 +
 llvm/unittests/ADT/PagedVectorTest.cpp        | 265 ++++++++++++++
 8 files changed, 633 insertions(+), 10 deletions(-)
 create mode 100644 llvm/include/llvm/ADT/PagedVector.h
 create mode 100644 llvm/unittests/ADT/PagedVectorTest.cpp

diff --git a/clang/include/clang/Basic/SourceManager.h b/clang/include/clang/Basic/SourceManager.h
index 2f846502d6f3327..e37caa2252532f9 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -43,6 +43,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
@@ -699,7 +700,7 @@ class SourceManager : public RefCountedBase<SourceManager> {
   ///
   /// Negative FileIDs are indexes into this table. To get from ID to an index,
   /// use (-ID - 2).
-  SmallVector<SrcMgr::SLocEntry, 0> LoadedSLocEntryTable;
+  llvm::PagedVector<SrcMgr::SLocEntry> LoadedSLocEntryTable;
 
   /// The starting offset of the next local SLocEntry.
   ///
diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h
index dc1eb21c27801fe..65e19c6e44cf571 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -38,6 +38,7 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -487,7 +488,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the type with
   /// ID = (I + 1) << FastQual::Width has already been loaded
-  std::vector<QualType> TypesLoaded;
+  llvm::PagedVector<QualType> TypesLoaded;
 
   using GlobalTypeMapType =
       ContinuousRangeMap<serialization::TypeID, ModuleFile *, 4>;
@@ -501,7 +502,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the declaration with ID
   /// = I + 1 has already been loaded.
-  std::vector<Decl *> DeclsLoaded;
+  llvm::PagedVector<Decl *> DeclsLoaded;
 
   using GlobalDeclMapType =
       ContinuousRangeMap<serialization::DeclID, ModuleFile *, 4>;
diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp
index 0521ac7b30339ab..7fa8b8096ac4931 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -2344,11 +2344,11 @@ SourceManager::MemoryBufferSizes SourceManager::getMemoryBufferSizes() const {
 }
 
 size_t SourceManager::getDataStructureSizes() const {
-  size_t size = llvm::capacity_in_bytes(MemBufferInfos)
-    + llvm::capacity_in_bytes(LocalSLocEntryTable)
-    + llvm::capacity_in_bytes(LoadedSLocEntryTable)
-    + llvm::capacity_in_bytes(SLocEntryLoaded)
-    + llvm::capacity_in_bytes(FileInfos);
+  size_t size = llvm::capacity_in_bytes(MemBufferInfos) +
+                llvm::capacity_in_bytes(LocalSLocEntryTable) +
+                llvm::capacity_in_bytes(LoadedSLocEntryTable) +
+                llvm::capacity_in_bytes(SLocEntryLoaded) +
+                llvm::capacity_in_bytes(FileInfos);
 
   if (OverriddenFilesInfo)
     size += llvm::capacity_in_bytes(OverriddenFilesInfo->OverriddenFiles);
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 0952244d037a77c..badd54987af18dd 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -7944,9 +7944,10 @@ void ASTReader::PrintStats() {
   std::fprintf(stderr, "*** AST File Statistics:\n");
 
   unsigned NumTypesLoaded =
-      TypesLoaded.size() - llvm::count(TypesLoaded, QualType());
+      TypesLoaded.size() - llvm::count(TypesLoaded.materialised(), QualType());
   unsigned NumDeclsLoaded =
-      DeclsLoaded.size() - llvm::count(DeclsLoaded, (Decl *)nullptr);
+      DeclsLoaded.size() -
+      llvm::count(DeclsLoaded.materialised(), (Decl *)nullptr);
   unsigned NumIdentifiersLoaded =
       IdentifiersLoaded.size() -
       llvm::count(IdentifiersLoaded, (IdentifierInfo *)nullptr);
diff --git a/llvm/docs/ProgrammersManual.rst b/llvm/docs/ProgrammersManual.rst
index 43dd985d9779ed2..469be68b1dd8ba4 100644
--- a/llvm/docs/ProgrammersManual.rst
+++ b/llvm/docs/ProgrammersManual.rst
@@ -1625,6 +1625,38 @@ SmallVector has grown a few other minor advantages over std::vector, causing
    and is no longer "private to the implementation". A name like
    ``SmallVectorHeader`` might be more appropriate.
 
+.. _dss_pagedvector:
+
+llvm/ADT/PagedVector.h
+^^^^^^^^^^^^^^^^^^^^^^
+
+``PagedVector<Type, PageSize>`` is a random access container that allocates
+(PageSize) elements of type Type when the first element of a page is accessed
+via the ``operator[]``.  This is useful for the case in which the number of
+elements is known in advance and their actual initialization is expensive and
+sparse so that it's only done lazily when the element is accessed. When the
+number of used pages is small significant memory savings can be achieved.
+
+The main advantage is that a ``PagedVector`` allows to delay the actual allocation
+of the page until it's needed, at the extra cost of one integer per page and one
+extra indirection when accessing elements with their positional index. 
+
+In order to maximise the memory footprint of this container, it's important to
+balance the PageSize so that it's not too small (otherwise the overhead of the
+integer per page might become too high) and not too big (otherwise the memory is
+wasted if the page is not fully used).
+
+Moreover, while retaining the oder of the elements based on their insertion
+index, like a vector, iterating over the elements via begin() and end() is not
+provided in the API, due to the fact accessing the elements in order would
+allocate all the iterated pages, defeating memory savings and the purpose of the
+``PagedVector``.
+
+Finally a ``materialised_begin()`` and ``materialised_end`` iterators are
+provided to access the elements associated to the accessed pages, which could
+speed up operations that need to iterate over initialized elements in a
+non-ordered manner.
+
 .. _dss_vector:
 
 <vector>
diff --git a/llvm/include/llvm/ADT/PagedVector.h b/llvm/include/llvm/ADT/PagedVector.h
new file mode 100644
index 000000000000000..799b9d44777ed5b
--- /dev/null
+++ b/llvm/include/llvm/ADT/PagedVector.h
@@ -0,0 +1,322 @@
+//===- llvm/ADT/PagedVector.h - 'Lazyly allocated' vectors --------*- C++
+//-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PagedVector class.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_ADT_PAGEDVECTOR_H
+#define LLVM_ADT_PAGEDVECTOR_H
+
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/Support/Allocator.h"
+#include <cassert>
+#include <iostream>
+#include <vector>
+
+namespace llvm {
+// A vector that allocates memory in pages.
+// Order is kept, but memory is allocated only when one element of the page is
+// accessed. This introduces a level of indirection, but it is useful when you
+// have a sparsely initialised vector where the full size is allocated upfront
+// with the default constructor and elements are initialised later, on first
+// access.
+//
+// Notice that this does not have iterators, because if you
+// have iterators it probably means you are going to touch
+// all the memory in any case, so better use a std::vector in
+// the first place.
+//
+// Pages are allocated in SLAB_SIZE chunks, using the BumpPtrAllocator.
+template <typename T, std::size_t PAGE_SIZE = 1024 / sizeof(T)>
+class PagedVector {
+  static_assert(PAGE_SIZE > 0, "PAGE_SIZE must be greater than 0. Most likely "
+                               "you want it to be greater than 16.");
+  // The actual number of element in the vector which can be accessed.
+  std::size_t Size = 0;
+
+  // The position of the initial element of the page in the Data vector.
+  // Pages are allocated contiguously in the Data vector.
+  mutable std::vector<uintptr_t> PageToDataIdx;
+  // Actual page data. All the page elements are added to this vector on the
+  // first access of any of the elements of the page. Elements default
+  // constructed and elements of the page are stored contiguously. The order of
+  // the elements however depends on the order of access of the pages.
+  PointerIntPair<BumpPtrAllocator *, 1, bool> Allocator;
+
+  constexpr static uintptr_t InvalidPage = SIZE_MAX;
+
+public:
+  using value_type = T;
+
+  // Default constructor. We build our own allocator.
+  PagedVector() : Allocator(new BumpPtrAllocator, true) {}
+  PagedVector(BumpPtrAllocator *A) : Allocator(A, false) {}
+
+  ~PagedVector() {
+    // If we own the allocator, delete it.
+    if (Allocator.getInt() == true)
+      delete Allocator.getPointer();
+  }
+
+  // Lookup an element at position i.
+  // If the associated page is not filled, it will be filled with default
+  // constructed elements. If the associated page is filled, return the element.
+  T &operator[](std::size_t Index) const {
+    assert(Index < Size);
+    assert(Index / PAGE_SIZE < PageToDataIdx.size());
+    uintptr_t &PagePtr = PageToDataIdx[Index / PAGE_SIZE];
+    // If the page was not yet allocated, allocate it.
+    if (PagePtr == InvalidPage) {
+      T *NewPagePtr = Allocator.getPointer()->template Allocate<T>(PAGE_SIZE);
+      // We need to invoke the default constructor on all the elements of the
+      // page.
+      for (std::size_t I = 0; I < PAGE_SIZE; ++I)
+        new (NewPagePtr + I) T();
+
+      PagePtr = reinterpret_cast<uintptr_t>(NewPagePtr);
+    }
+    // Dereference the element in the page.
+    return *((Index % PAGE_SIZE) + reinterpret_cast<T *>(PagePtr));
+  }
+
+  // Return the capacity of the vector. I.e. the maximum size it can be expanded
+  // to with the resize method without allocating more pages.
+  [[nodiscard]] std::size_t capacity() const {
+    return PageToDataIdx.size() * PAGE_SIZE;
+  }
+
+  // Return the size of the vector. I.e. the maximum index that can be
+  // accessed, i.e. the maximum value which was used as argument of the
+  // resize method.
+  [[nodiscard]] std::size_t size() const { return Size; }
+
+  // Expands the vector to the given NewSize number of elements.
+  // If the vector was smaller, allocates new pages as needed.
+  // It should be called only with NewSize >= Size.
+  void resize(std::size_t NewSize) {
+    // Handle shrink case: delete the pages and update the size.
+    if (NewSize < Size) {
+      std::size_t NewLastPage = (NewSize - 1) / PAGE_SIZE;
+      for (std::size_t I = NewLastPage + 1; I < PageToDataIdx.size(); ++I) {
+        uintptr_t PagePtr = PageToDataIdx[I];
+        if (PagePtr == InvalidPage)
+          continue;
+        T *Page = reinterpret_cast<T *>(PagePtr);
+        // We need to invoke the destructor on all the elements of the page.
+        for (std::size_t J = 0; J < PAGE_SIZE; ++J)
+          Page[J].~T();
+        Allocator.getPointer()->Deallocate(Page);
+      }
+      // Delete the extra ones in the new last page.
+      uintptr_t PagePtr = PageToDataIdx[NewLastPage];
+      if (PagePtr != InvalidPage) {
+        T *Page = reinterpret_cast<T *>(PagePtr);
+        // If the new size and the old size are on the same page, we need to
+        // delete only the elements between the new size and the old size.
+        // Otherwise we need to delete all the remaining elements in the page.
+        std::size_t OldPage = (Size - 1) / PAGE_SIZE;
+        std::size_t NewPage = (NewSize - 1) / PAGE_SIZE;
+        std::size_t LastPageElements =
+            OldPage == NewPage ? Size % PAGE_SIZE : PAGE_SIZE;
+        for (std::size_t J = NewSize % PAGE_SIZE; J < LastPageElements; ++J)
+          Page[J].~T();
+      }
+      PageToDataIdx.resize(NewLastPage + 1);
+    }
+    Size = NewSize;
+    // If the capacity is enough, just update the size and continue
+    // with the currently allocated pages.
+    if (Size <= capacity())
+      return;
+    // The number of pages to allocate. The Remainder is calculated
+    // for the case in which the NewSize is not a multiple of PAGE_SIZE.
+    // In that case we need one more page.
+    std::size_t Pages = Size / PAGE_SIZE;
+    std::size_t Remainder = Size % PAGE_SIZE;
+    if (Remainder != 0)
+      Pages += 1;
+    assert(Pages > PageToDataIdx.size());
+    // We use InvalidPage to indicate that a page has not been allocated yet.
+    // This cannot be 0, because 0 is a valid page id.
+    // We use InvalidPage instead of a separate bool to avoid wasting space.
+    PageToDataIdx.resize(Pages, InvalidPage);
+  }
+
+  // Return true if the vector is empty
+  [[nodiscard]] bool empty() const { return Size == 0; }
+
+  /// Clear the vector, i.e. clear the allocated pages, the whole page
+  /// lookup index and reset the size.
+  void clear() {
+    Size = 0;
+    // If we own the allocator, simply reset it, otherwise we
+    // deallocate the pages one by one.
+    if (Allocator.getInt() == true)
+      Allocator.getPointer()->Reset();
+    else
+      for (uintptr_t Page : PageToDataIdx)
+        Allocator.getPointer()->Deallocate(reinterpret_cast<T *>(Page));
+
+    PageToDataIdx.clear();
+  }
+
+  // Iterator on all the elements of the vector
+  // which have actually being constructed.
+  class MaterialisedIterator {
+    PagedVector const *PV;
+    std::size_t ElementIdx;
+
+  public:
+    using iterator_category = std::forward_iterator_tag;
+    using value_type = T;
+    using difference_type = std::ptrdiff_t;
+    using pointer = T *;
+    using reference = T &;
+
+    MaterialisedIterator(PagedVector const *PV, std::size_t ElementIdx)
+        : PV(PV), ElementIdx(ElementIdx) {}
+
+    // When incrementing the iterator, we skip the elements which have not
+    // been materialised yet.
+    MaterialisedIterator &operator++() {
+      while (ElementIdx < PV->Size)
+        if (PV->PageToDataIdx[ElementIdx++ / PAGE_SIZE] != InvalidPage)
+          break;
+
+      return *this;
+    }
+    // Post increment operator.
+    MaterialisedIterator operator++(int) {
+      MaterialisedIterator Copy = *this;
+      ++*this;
+      return Copy;
+    }
+
+    std::ptrdiff_t operator-(MaterialisedIterator const &Other) const {
+      assert(PV == Other.PV);
+      // If they are on the same table we can just subtract the indices.
+      // Otherwise we have to iterate over the pages to find the difference.
+      // If a page is invalid, we skip it.
+      if (PV == Other.PV)
+        return ElementIdx - Other.ElementIdx;
+
+      std::size_t ElementMin = std::min(ElementIdx, Other.ElementIdx);
+      std::size_t ElementMax = std::max(ElementIdx, Other.ElementIdx);
+      std::size_t PageMin = ElementMin / PAGE_SIZE;
+      std::size_t PageMax = ElementMax / PAGE_SIZE;
+
+      std::size_t Count = 0ULL;
+      for (std::size_t PageIdx = PageMin; PageIdx < PageMax; ++PageIdx) {
+        if (PV->PageToDataIdx[PageIdx] == InvalidPage)
+          continue;
+
+        Count += PAGE_SIZE;
+      }
+      Count += ElementMax % PAGE_SIZE;
+      Count += PAGE_SIZE - ElementMin % PAGE_SIZE;
+
+      return Count;
+    }
+
+    // When dereferencing the iterator, we materialise the page if needed.
+    T const &operator*() const {
+      assert(ElementIdx < PV->Size);
+      assert(PV->PageToDataIdx[ElementIdx / PAGE_SIZE] != InvalidPage);
+      T *PagePtr =
+          reinterpret_cast<T *>(PV->PageToDataIdx[ElementIdx / PAGE_SIZE]);
+      return *((ElementIdx % PAGE_SIZE) + PagePtr);
+    }
+
+    // Equality operator.
+    bool operator==(MaterialisedIterator const &Other) const {
+      // Iterators of two different vectors are never equal.
+      if (PV != Other.PV)
+        return false;
+      // Any iterator for an empty vector is equal to any other iterator.
+      if (PV->empty())
+        return true;
+      // Get the pages of the two iterators. If between the two pages there
+      // are no valid pages, we can condider the iterators equal.
+      std::size_t PageMin = std::min(ElementIdx, Other.ElementIdx) / PAGE_SIZE;
+      std::size_t PageMax = std::max(ElementIdx, Other.ElementIdx) / PAGE_SIZE;
+      // If the two pages are past the end, the iterators are equal.
+      if (PageMin >= PV->PageToDataIdx.size())
+        return true;
+      // If only the last page is past the end, the iterators are equal if
+      // all the pages up to the end are invalid.
+      if (PageMax >= PV->PageToDataIdx.size()) {
+        for (std::size_t PageIdx = PageMin; PageIdx < PV->PageToDataIdx.size();
+             ++PageIdx)
+          if (PV->PageToDataIdx[PageIdx] != InvalidPage)
+            return false;
+        return true;
+      }
+
+      uintptr_t Page1 = PV->PageToDataIdx[PageMin];
+      uintptr_t Page2 = PV->PageToDataIdx[PageMax];
+      if (Page1 == InvalidPage && Page2 == InvalidPage)
+        return true;
+
+      // If the two pages are the same, the iterators are equal if they point
+      // to the same element.
+      if (PageMin == PageMax)
+        return ElementIdx == Other.ElementIdx;
+
+      // If the two pages are different, the iterators are equal if all the
+      // pages between them are invalid.
+      for (std::size_t PageIdx = PageMin; PageIdx < PageMax; ++PageIdx)
+        if (PV->PageToDataIdx[PageIdx] != InvalidPage)
+          return false;
+
+      return true;
+    }
+
+    bool operator!=(MaterialisedIterator const &Other) const {
+      return !(*this == Other);
+    }
+
+    [[nodiscard]] size_t getIndex() const { return ElementIdx; }
+  };
+
+  // Iterators over the materialised elements of the vector.
+  // This includes all the elements belonging to allocated pages,
+  // even if they have not been accessed yet. It's enough to access
+  // one element of a page to materialise all the elements of the page.
+  MaterialisedIterator materialisedBegin() const {
+    // Look for the first valid page
+    for (std::size_t ElementIdx = 0ULL; ElementIdx < Size; ++ElementIdx)
+      if (PageToDataIdx[ElementIdx / PAGE_SIZE] != InvalidPage)
+        return MaterialisedIterator(this, ElementIdx);
+
+    return MaterialisedIterator(this, Size);
+  }
+
+  MaterialisedIterator materialisedEnd() const {
+    return MaterialisedIterator(this, Size);
+  }
+
+  // Range over the materialised elements of the vector.
+  // Use the MaterialisedIterator to iterate over the elements.
+  class MaterialisedRange {
+    MaterialisedIterator Begin;
+    MaterialisedIterator End;
+
+  public:
+    MaterialisedRange(MaterialisedIterator Begin, MaterialisedIterator End)
+        : Begin(Begin), End(End) {}
+    MaterialisedIterator begin() const { return Begin; }
+    MaterialisedIterator end() const { return End; }
+  };
+
+  MaterialisedRange materialised() const {
+    return MaterialisedRange(materialisedBegin(), materialisedEnd());
+  }
+};
+} // namespace llvm
+#endif // LLVM_ADT_PAGEDVECTOR_H
diff --git a/llvm/unittests/ADT/CMakeLists.txt b/llvm/unittests/ADT/CMakeLists.txt
index c5190255ba7738e..9aa7120f30696da 100644
--- a/llvm/unittests/ADT/CMakeLists.txt
+++ b/llvm/unittests/ADT/CMakeLists.txt
@@ -51,6 +51,7 @@ add_llvm_unittest(ADTTests
   MapVectorTest.cpp
   MoveOnly.cpp
   PackedVectorTest.cpp
+  PagedVectorTest.cpp
   PointerEmbeddedIntTest.cpp
   PointerIntPairTest.cpp
   PointerSumTypeTest.cpp
diff --git a/llvm/unittests/ADT/PagedVectorTest.cpp b/llvm/unittests/ADT/PagedVectorTest.cpp
new file mode 100644
index 000000000000000..c6a5da2abc210cc
--- /dev/null
+++ b/llvm/unittests/ADT/PagedVectorTest.cpp
@@ -0,0 +1,265 @@
+//===- llvm/unittest/ADT/PagedVectorTest.cpp ------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// PagedVector unit tests.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/PagedVector.h"
+#include "gtest/gtest.h"
+#include <iterator>
+
+namespace llvm {
+TEST(PagedVectorTest, EmptyTest) {
+  PagedVector<int, 10> V;
+  EXPECT_EQ(V.empty(), true);
+  EXPECT_EQ(V.size(), 0ULL);
+  EXPECT_EQ(V.capacity(), 0ULL);
+  EXPECT_EQ(V.materialisedBegin().getIndex(), 0ULL);
+  EXPECT_EQ(V.materialisedEnd().getIndex(), 0ULL);
+  EXPECT_EQ(std::distance(V.materialisedBegin(), V.materialisedEnd()), 0LL);
+}
+
+TEST(PagedVectorTest, ExpandTest) {
+  PagedVector<int, 10> V;
+  V.resize(2);
+  EXPECT_EQ(V.empty(), false);
+  EXPECT_EQ(V.size(), 2ULL);
+  EXPECT_EQ(V.capacity(), 10ULL);
+  EXPECT_EQ(V.materialisedBegin().getIndex(), 2ULL);
+  EXPECT_EQ(V.materialisedEnd().getIndex(), 2ULL);
+  EXPECT_EQ(std::distance(V.materialisedBegin(), V.materialisedEnd()), 0LL);
+}
+
+TEST(PagedVectorTest, FullPageFillingTest) {
+  PagedVector<int, 10> V;
+  V.resize(10);
+  EXPECT_EQ(V.empty(), false);
+  EXPECT_EQ(V.size(), 10ULL);
+  EXPECT_EQ(V.capacity(), 10ULL);
+  for (int I = 0; I < 10; ++I) {
+    V[I] = I;
+  }
+  EXPECT_EQ(V.empty(), false);
+  EXPECT_EQ(V.size(), 10ULL);
+  EXPECT_EQ(V.capacity(), 10ULL);
+  EXPECT_EQ(V.materialisedBegin().getIndex(), 0ULL);
+  EXPECT_EQ(V.materialisedEnd().getIndex(), 10ULL);
+  EXPECT_EQ(std::distance(V.materialisedBegin(), V.materialisedEnd()), 10LL);
+  for (int I = 0; I < 10; ++I) {
+    EXPECT_EQ(V[I], I);
+  }
+}
+
+TEST(PagedVectorTest, HalfPageFillingTest) {
+  PagedVector<int, 10> V;
+  V.resize(5);
+  EXPECT_EQ(V.empty(), false);
+  EXPECT_EQ(V.size(), 5ULL);
+  EXPECT_EQ(V.capacity(), 10ULL);
+  for (int I = 0; I < 5; ++I) {
+    V[I] = I;
+  }
+  EXPECT_EQ(std::distance(V.materialisedBegin(), V.materialisedEnd()), 5LL);
+  for (int I = 0; I < 5; ++I) {
+    EXPECT_EQ(V[I], I);
+  }
+}
+
+TEST(PagedVectorTest, FillFullMultiPageTest) {
+  PagedVector<int, 10> V;
+  V.resize(20);
+  EXPECT_EQ(V.empty(), false);
+  EXPECT_EQ(V.size(), 20ULL);
+  EXPECT_EQ(V.capacity(), 20ULL);
+  for (int I = 0; I < 20; ++I) {
+    V[I] = I;
+  }
+  EXPECT_EQ(std::distance(V.materialisedBegin(), V.materialisedEnd()), 20LL);
+}
+
+TEST(PagedVectorTest, FillHalfMultiPageTest) {
+  PagedVector<int, 10> V;
+  V.resize(20);
+  EXPECT_EQ(V.empty(), false);
+  EXPECT_EQ(V.size(), 20ULL);
+  EXPECT_EQ(V.capacity(), 20ULL);
+  for (int I = 0; I < 5; ++I) {
+    V[I] = I;
+  }
+  for (int I = 10; I < 15; ++I) {
+    V[I] = I;
+  }
+  EXPECT_EQ(std::distance(V.materialisedBegin(), V.materialisedEnd()), 20LL);
+  for (int I = 0; I < 5; ++I) {
+    EXPECT_EQ(V[I], I);
+  }
+  for (int I = 10; I < 15; ++I) {
+    EXPECT_EQ(V[I], I);
+  }
+}
+
+TEST(PagedVectorTest, FillLastMultiPageTest) {
+  PagedVector<int, 10> V;
+  V.resize(20);
+  EXPECT_EQ(V.empty(), false);
+  EXPECT_EQ(V.size(), 20ULL);
+  EXPECT_EQ(V.capacity(), 20ULL);
+  for (int I = 10; I < 15; ++I) {
+    V[I] = I;
+  }
+  for (int I = 10; I < 15; ++I) {
+    EXPECT_EQ(V[I], I);
+  }
+
+  // Since we fill the last page only, the materialised vector
+  // should contain only the last page.
+  int J = 10;
+  for (auto MI = V.materialisedBegin(), ME = V.materialisedEnd(); MI != ME;
+       ++MI) {
+    if (J < 15) {
+      EXPECT_EQ(*MI, J);
+    } else {
+      EXPECT_EQ(*MI, 0);
+    }
+    ++J;
+  }
+  EXPECT_EQ(std::distance(V.materialisedBegin(), V.materialisedEnd()), 10LL);
+}
+
+// Filling the first element of all the pages
+// will allocate all of them
+TEST(PagedVectorTest, FillSparseMultiPageTest) {
+  PagedVector<int, 10> V;
+  V.resize(100);
+  EXPECT_EQ(V.empty(), false);
+  EXPECT_EQ(V.size(), 100ULL);
+  EXPECT_EQ(V.capacity(), 100ULL);
+  for (int I = 0; I < 10; ++I) {
+    V[I * 10] = I;
+  }
+  EXPECT_EQ(std::distance(V.materialisedBegin(), V.materialisedEnd()), 100LL);
+  for (int I = 0; I < 100; ++I) {
+    if (I % 10 == 0) {
+      EXPECT_EQ(V[I], I / 10);
+    } else {
+      EXPECT_EQ(V[I], 0);
+    }
+  }
+}
+
+struct TestHelper {
+  int A = -1;
+};
+
+TEST(PagedVectorTest, FillNonTrivialConstructor) {
+  PagedVector<TestHelper, 10> V;
+  V.resize(10);
+  EXPECT_EQ(V.empty(), false);
+  EXPECT_EQ(V.size(), 10ULL);
+  EXPECT_EQ(V.capacity(), 10ULL);
+  EXPECT_EQ(std::distance(V.materialisedBegin(), V.materialisedEnd()), 0LL);
+  for (int I = 0; I < 10; ++I) {
+    EXPECT_EQ(V[I].A, -1);
+  }
+  EXPECT_EQ(std::distance(V.materialisedBegin(), V.materialisedEnd()), 10LL);
+}
+
+TEST(PagedVectorTest, ShrinkTest) {
+  PagedVector<int, 10> V;
+  V.resize(20);
+  EXPECT_EQ(V.empty(), false);
+  EXPECT_EQ(V.size(), 20ULL);
+  EXPECT_EQ(V.capacity(), 20ULL);
+  for (int I = 0; I < 20; ++I) {
+    V[I] = I;
+  }
+  EXPECT_EQ(std::distance(V.materialisedBegin(), V.materialisedEnd()), 20LL);
+  V.resize(9);
+  EXPECT_EQ(V.empty(), false);
+  EXPECT_EQ(V.size(), 9ULL);
+  EXPECT_EQ(V.capacity(), 10ULL);
+  for (int I = 0; I < 9; ++I) {
+    EXPECT_EQ(V[I], I);
+  }
+  EXPECT_EQ(std::distance(V.materialisedBegin(), V.materialisedEnd()), 9LL);
+}
+
+TEST(PagedVectorTest, FunctionalityTest) {
+  PagedVector<int, 10> V;
+  EXPECT_EQ(V.empty(), true);
+
+  // Next ten numbers are 10..19
+  V.resize(2);
+  EXPECT_EQ(V.empty(), false);
+  V.resize(10);
+  V.resize(20);
+  V.resize(30);
+  EXPECT_EQ(std::distance(V.materialisedBegin(), V.materialisedEnd()), 0LL);
+
+  EXPECT_EQ(V.size(), 30ULL);
+  for (int I = 0; I < 10; ++I) {
+    V[I] = I;
+  }
+  for (int I = 0; I < 10; ++I) {
+    EXPECT_EQ(V[I], I);
+  }
+  EXPECT_EQ(std::distance(V.materialisedBegin(), V.materialisedEnd()), 10LL);
+  for (int I = 20; I < 30; ++I) {
+    V[I] = I;
+  }
+  for (int I = 20; I < 30; ++I) {
+    EXPECT_EQ(V[I], I);
+  }
+  EXPECT_EQ(std::distance(V.materialisedBegin(), V.materialisedEnd()), 20LL);
+
+  for (int I = 10; I < 20; ++I) {
+    V[I] = I;
+  }
+  for (int I = 10; I < 20; ++I) {
+    EXPECT_EQ(V[I], I);
+  }
+  EXPECT_EQ(std::distance(V.materialisedBegin(), V.materialisedEnd()), 30LL);
+  V.resize(35);
+  EXPECT_EQ(std::distance(V.materialisedBegin(), V.materialisedEnd()), 30LL);
+  for (int I = 30; I < 35; ++I) {
+    V[I] = I;
+  }
+  EXPECT_EQ(std::distance(V.materialisedBegin(), V.materialisedEnd()), 35LL);
+  EXPECT_EQ(V.size(), 35ULL);
+  EXPECT_EQ(V.capacity(), 40ULL);
+  V.resize(37);
+  for (int I = 30; I < 37; ++I) {
+    V[I] = I;
+  }
+  EXPECT_EQ(V.size(), 37ULL);
+  EXPECT_EQ(V.capacity(), 40ULL);
+  for (int I = 0; I < 37; ++I) {
+    EXPECT_EQ(V[I], I);
+  }
+
+  V.resize(41);
+  V[40] = 40;
+  EXPECT_EQ(V.size(), 41ULL);
+  EXPECT_EQ(V.capacity(), 50ULL);
+  for (int I = 0; I < 36; ++I) {
+    EXPECT_EQ(V[I], I);
+  }
+  for (int I = 37; I < 40; ++I) {
+    EXPECT_EQ(V[I], 0);
+  }
+  V.resize(50);
+  EXPECT_EQ(V.capacity(), 50ULL);
+  EXPECT_EQ(V.size(), 50ULL);
+  EXPECT_EQ(V[40], 40);
+  V.resize(50ULL);
+  V.clear();
+  EXPECT_EQ(V.size(), 0ULL);
+  EXPECT_EQ(V.capacity(), 0ULL);
+}
+} // namespace llvm

>From 5b1c72485b61ab7a21c20ba5785042e523b6e635 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+ktf at users.noreply.github.com>
Date: Tue, 19 Sep 2023 23:50:41 +0200
Subject: [PATCH 02/14] Update llvm/include/llvm/ADT/PagedVector.h

Co-authored-by: Jakub Kuderski <kubakuderski at gmail.com>
---
 llvm/include/llvm/ADT/PagedVector.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/include/llvm/ADT/PagedVector.h b/llvm/include/llvm/ADT/PagedVector.h
index 799b9d44777ed5b..899510ee66cb57e 100644
--- a/llvm/include/llvm/ADT/PagedVector.h
+++ b/llvm/include/llvm/ADT/PagedVector.h
@@ -169,7 +169,7 @@ class PagedVector {
   // Iterator on all the elements of the vector
   // which have actually being constructed.
   class MaterialisedIterator {
-    PagedVector const *PV;
+    const PagedVector *PV;
     std::size_t ElementIdx;
 
   public:

>From 810d7ee128d66610fb7fdee981b1b5d1a3d6104f Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+ktf at users.noreply.github.com>
Date: Tue, 19 Sep 2023 23:52:51 +0200
Subject: [PATCH 03/14] Update llvm/docs/ProgrammersManual.rst

Co-authored-by: Jakub Kuderski <kubakuderski at gmail.com>
---
 llvm/docs/ProgrammersManual.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/docs/ProgrammersManual.rst b/llvm/docs/ProgrammersManual.rst
index 469be68b1dd8ba4..7d5cf3f3c0a9a41 100644
--- a/llvm/docs/ProgrammersManual.rst
+++ b/llvm/docs/ProgrammersManual.rst
@@ -1647,7 +1647,7 @@ integer per page might become too high) and not too big (otherwise the memory is
 wasted if the page is not fully used).
 
 Moreover, while retaining the oder of the elements based on their insertion
-index, like a vector, iterating over the elements via begin() and end() is not
+index, like a vector, iterating over the elements via ``begin()`` and ``end()`` is not
 provided in the API, due to the fact accessing the elements in order would
 allocate all the iterated pages, defeating memory savings and the purpose of the
 ``PagedVector``.

>From d1e98e2d6df482655a3247ac4ca0414a054a2bd5 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+ktf at users.noreply.github.com>
Date: Tue, 19 Sep 2023 23:56:48 +0200
Subject: [PATCH 04/14] Update llvm/include/llvm/ADT/PagedVector.h

Co-authored-by: Richard Smith <richard at metafoo.co.uk>
---
 llvm/include/llvm/ADT/PagedVector.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/include/llvm/ADT/PagedVector.h b/llvm/include/llvm/ADT/PagedVector.h
index 899510ee66cb57e..3cfbf8c907a8c3c 100644
--- a/llvm/include/llvm/ADT/PagedVector.h
+++ b/llvm/include/llvm/ADT/PagedVector.h
@@ -290,7 +290,7 @@ class PagedVector {
   // one element of a page to materialise all the elements of the page.
   MaterialisedIterator materialisedBegin() const {
     // Look for the first valid page
-    for (std::size_t ElementIdx = 0ULL; ElementIdx < Size; ++ElementIdx)
+    for (std::size_t ElementIdx = 0; ElementIdx < Size; ElementIdx += PAGE_SIZE)
       if (PageToDataIdx[ElementIdx / PAGE_SIZE] != InvalidPage)
         return MaterialisedIterator(this, ElementIdx);
 

>From 62cebd40a95b1188bb880f6e8dfa09269d6b5f0b Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+ktf at users.noreply.github.com>
Date: Wed, 20 Sep 2023 00:10:31 +0200
Subject: [PATCH 05/14] Update llvm/include/llvm/ADT/PagedVector.h

Co-authored-by: Richard Smith <richard at metafoo.co.uk>
---
 llvm/include/llvm/ADT/PagedVector.h | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm/ADT/PagedVector.h b/llvm/include/llvm/ADT/PagedVector.h
index 3cfbf8c907a8c3c..4b73d6098ba724c 100644
--- a/llvm/include/llvm/ADT/PagedVector.h
+++ b/llvm/include/llvm/ADT/PagedVector.h
@@ -185,9 +185,13 @@ class PagedVector {
     // When incrementing the iterator, we skip the elements which have not
     // been materialised yet.
     MaterialisedIterator &operator++() {
-      while (ElementIdx < PV->Size)
-        if (PV->PageToDataIdx[ElementIdx++ / PAGE_SIZE] != InvalidPage)
-          break;
+      ++ElementIdx;
+      if (ElementIdx % PAGE_SIZE == 0) {
+        while (ElementIdx < PV->Size && PV->PageToDataIdx[ElementIdx / PAGE_SIZE] == InvalidPage)
+          ElementIdx += PAGE_SIZE;
+        if (ElementIdx > PV->Size)
+          ElementIdx = PV->Size;
+      }
 
       return *this;
     }

>From b124255b70c8f79fbcb85f4ed2620438be23f499 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+ktf at users.noreply.github.com>
Date: Wed, 20 Sep 2023 00:28:21 +0200
Subject: [PATCH 06/14] Update llvm/include/llvm/ADT/PagedVector.h

Co-authored-by: Richard Smith <richard at metafoo.co.uk>
---
 llvm/include/llvm/ADT/PagedVector.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/include/llvm/ADT/PagedVector.h b/llvm/include/llvm/ADT/PagedVector.h
index 4b73d6098ba724c..7705a7140e7f12a 100644
--- a/llvm/include/llvm/ADT/PagedVector.h
+++ b/llvm/include/llvm/ADT/PagedVector.h
@@ -59,6 +59,7 @@ class PagedVector {
   PagedVector(BumpPtrAllocator *A) : Allocator(A, false) {}
 
   ~PagedVector() {
+    clear();
     // If we own the allocator, delete it.
     if (Allocator.getInt() == true)
       delete Allocator.getPointer();

>From 32616faf6c776d059a23d9c1228a3278a9df6245 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+ktf at users.noreply.github.com>
Date: Tue, 19 Sep 2023 02:09:47 +0200
Subject: [PATCH 07/14] use any_of

---
 llvm/docs/ProgrammersManual.rst     | 15 +++---
 llvm/include/llvm/ADT/PagedVector.h | 79 ++++++++++++++---------------
 2 files changed, 46 insertions(+), 48 deletions(-)

diff --git a/llvm/docs/ProgrammersManual.rst b/llvm/docs/ProgrammersManual.rst
index 7d5cf3f3c0a9a41..1096701288c033c 100644
--- a/llvm/docs/ProgrammersManual.rst
+++ b/llvm/docs/ProgrammersManual.rst
@@ -1637,9 +1637,10 @@ elements is known in advance and their actual initialization is expensive and
 sparse so that it's only done lazily when the element is accessed. When the
 number of used pages is small significant memory savings can be achieved.
 
-The main advantage is that a ``PagedVector`` allows to delay the actual allocation
-of the page until it's needed, at the extra cost of one integer per page and one
-extra indirection when accessing elements with their positional index. 
+The main advantage is that a ``PagedVector`` allows to delay the actual
+allocation of the page until it's needed, at the extra cost of one integer per
+page and one extra indirection when accessing elements with their positional
+index. 
 
 In order to maximise the memory footprint of this container, it's important to
 balance the PageSize so that it's not too small (otherwise the overhead of the
@@ -1647,10 +1648,10 @@ integer per page might become too high) and not too big (otherwise the memory is
 wasted if the page is not fully used).
 
 Moreover, while retaining the oder of the elements based on their insertion
-index, like a vector, iterating over the elements via ``begin()`` and ``end()`` is not
-provided in the API, due to the fact accessing the elements in order would
-allocate all the iterated pages, defeating memory savings and the purpose of the
-``PagedVector``.
+index, like a vector, iterating over the elements via ``begin()`` and ``end()``
+is not provided in the API, due to the fact accessing the elements in order
+would allocate all the iterated pages, defeating memory savings and the purpose
+of the ``PagedVector``.
 
 Finally a ``materialised_begin()`` and ``materialised_end`` iterators are
 provided to access the elements associated to the accessed pages, which could
diff --git a/llvm/include/llvm/ADT/PagedVector.h b/llvm/include/llvm/ADT/PagedVector.h
index 7705a7140e7f12a..dcda0eead637c38 100644
--- a/llvm/include/llvm/ADT/PagedVector.h
+++ b/llvm/include/llvm/ADT/PagedVector.h
@@ -27,16 +27,15 @@ namespace llvm {
 // with the default constructor and elements are initialised later, on first
 // access.
 //
-// Notice that this does not have iterators, because if you
-// have iterators it probably means you are going to touch
-// all the memory in any case, so better use a std::vector in
-// the first place.
+// Notice that this does not have iterators, because if you have iterators it
+// probably means you are going to touch all the memory in any case, so better
+// use a std::vector in the first place.
 //
 // Pages are allocated in SLAB_SIZE chunks, using the BumpPtrAllocator.
-template <typename T, std::size_t PAGE_SIZE = 1024 / sizeof(T)>
+template <typename T, std::size_t PageSize = 1024 / sizeof(T)>
 class PagedVector {
-  static_assert(PAGE_SIZE > 0, "PAGE_SIZE must be greater than 0. Most likely "
-                               "you want it to be greater than 16.");
+  static_assert(PageSize > 0, "PageSize must be greater than 0. Most likely "
+                              "you want it to be greater than 16.");
   // The actual number of element in the vector which can be accessed.
   std::size_t Size = 0;
 
@@ -70,26 +69,26 @@ class PagedVector {
   // constructed elements. If the associated page is filled, return the element.
   T &operator[](std::size_t Index) const {
     assert(Index < Size);
-    assert(Index / PAGE_SIZE < PageToDataIdx.size());
-    uintptr_t &PagePtr = PageToDataIdx[Index / PAGE_SIZE];
+    assert(Index / PageSize < PageToDataIdx.size());
+    uintptr_t &PagePtr = PageToDataIdx[Index / PageSize];
     // If the page was not yet allocated, allocate it.
     if (PagePtr == InvalidPage) {
-      T *NewPagePtr = Allocator.getPointer()->template Allocate<T>(PAGE_SIZE);
+      T *NewPagePtr = Allocator.getPointer()->template Allocate<T>(PageSize);
       // We need to invoke the default constructor on all the elements of the
       // page.
-      for (std::size_t I = 0; I < PAGE_SIZE; ++I)
+      for (std::size_t I = 0; I < PageSize; ++I)
         new (NewPagePtr + I) T();
 
       PagePtr = reinterpret_cast<uintptr_t>(NewPagePtr);
     }
     // Dereference the element in the page.
-    return *((Index % PAGE_SIZE) + reinterpret_cast<T *>(PagePtr));
+    return *((Index % PageSize) + reinterpret_cast<T *>(PagePtr));
   }
 
   // Return the capacity of the vector. I.e. the maximum size it can be expanded
   // to with the resize method without allocating more pages.
   [[nodiscard]] std::size_t capacity() const {
-    return PageToDataIdx.size() * PAGE_SIZE;
+    return PageToDataIdx.size() * PageSize;
   }
 
   // Return the size of the vector. I.e. the maximum index that can be
@@ -103,14 +102,14 @@ class PagedVector {
   void resize(std::size_t NewSize) {
     // Handle shrink case: delete the pages and update the size.
     if (NewSize < Size) {
-      std::size_t NewLastPage = (NewSize - 1) / PAGE_SIZE;
+      std::size_t NewLastPage = (NewSize - 1) / PageSize;
       for (std::size_t I = NewLastPage + 1; I < PageToDataIdx.size(); ++I) {
         uintptr_t PagePtr = PageToDataIdx[I];
         if (PagePtr == InvalidPage)
           continue;
         T *Page = reinterpret_cast<T *>(PagePtr);
         // We need to invoke the destructor on all the elements of the page.
-        for (std::size_t J = 0; J < PAGE_SIZE; ++J)
+        for (std::size_t J = 0; J < PageSize; ++J)
           Page[J].~T();
         Allocator.getPointer()->Deallocate(Page);
       }
@@ -121,11 +120,11 @@ class PagedVector {
         // If the new size and the old size are on the same page, we need to
         // delete only the elements between the new size and the old size.
         // Otherwise we need to delete all the remaining elements in the page.
-        std::size_t OldPage = (Size - 1) / PAGE_SIZE;
-        std::size_t NewPage = (NewSize - 1) / PAGE_SIZE;
+        std::size_t OldPage = (Size - 1) / PageSize;
+        std::size_t NewPage = (NewSize - 1) / PageSize;
         std::size_t LastPageElements =
-            OldPage == NewPage ? Size % PAGE_SIZE : PAGE_SIZE;
-        for (std::size_t J = NewSize % PAGE_SIZE; J < LastPageElements; ++J)
+            OldPage == NewPage ? Size % PageSize : PageSize;
+        for (std::size_t J = NewSize % PageSize; J < LastPageElements; ++J)
           Page[J].~T();
       }
       PageToDataIdx.resize(NewLastPage + 1);
@@ -136,10 +135,10 @@ class PagedVector {
     if (Size <= capacity())
       return;
     // The number of pages to allocate. The Remainder is calculated
-    // for the case in which the NewSize is not a multiple of PAGE_SIZE.
+    // for the case in which the NewSize is not a multiple of PageSize.
     // In that case we need one more page.
-    std::size_t Pages = Size / PAGE_SIZE;
-    std::size_t Remainder = Size % PAGE_SIZE;
+    std::size_t Pages = Size / PageSize;
+    std::size_t Remainder = Size % PageSize;
     if (Remainder != 0)
       Pages += 1;
     assert(Pages > PageToDataIdx.size());
@@ -187,9 +186,9 @@ class PagedVector {
     // been materialised yet.
     MaterialisedIterator &operator++() {
       ++ElementIdx;
-      if (ElementIdx % PAGE_SIZE == 0) {
-        while (ElementIdx < PV->Size && PV->PageToDataIdx[ElementIdx / PAGE_SIZE] == InvalidPage)
-          ElementIdx += PAGE_SIZE;
+      if (ElementIdx % PageSize == 0) {
+        while (ElementIdx < PV->Size && PV->PageToDataIdx[ElementIdx / PageSize] == InvalidPage)
+          ElementIdx += PageSize;
         if (ElementIdx > PV->Size)
           ElementIdx = PV->Size;
       }
@@ -213,18 +212,18 @@ class PagedVector {
 
       std::size_t ElementMin = std::min(ElementIdx, Other.ElementIdx);
       std::size_t ElementMax = std::max(ElementIdx, Other.ElementIdx);
-      std::size_t PageMin = ElementMin / PAGE_SIZE;
-      std::size_t PageMax = ElementMax / PAGE_SIZE;
+      std::size_t PageMin = ElementMin / PageSize;
+      std::size_t PageMax = ElementMax / PageSize;
 
-      std::size_t Count = 0ULL;
+      std::size_t Count = 0;
       for (std::size_t PageIdx = PageMin; PageIdx < PageMax; ++PageIdx) {
         if (PV->PageToDataIdx[PageIdx] == InvalidPage)
           continue;
 
-        Count += PAGE_SIZE;
+        Count += PageSize;
       }
-      Count += ElementMax % PAGE_SIZE;
-      Count += PAGE_SIZE - ElementMin % PAGE_SIZE;
+      Count += ElementMax % PageSize;
+      Count += PageSize - ElementMin % PageSize;
 
       return Count;
     }
@@ -232,10 +231,10 @@ class PagedVector {
     // When dereferencing the iterator, we materialise the page if needed.
     T const &operator*() const {
       assert(ElementIdx < PV->Size);
-      assert(PV->PageToDataIdx[ElementIdx / PAGE_SIZE] != InvalidPage);
+      assert(PV->PageToDataIdx[ElementIdx / PageSize] != InvalidPage);
       T *PagePtr =
-          reinterpret_cast<T *>(PV->PageToDataIdx[ElementIdx / PAGE_SIZE]);
-      return *((ElementIdx % PAGE_SIZE) + PagePtr);
+          reinterpret_cast<T *>(PV->PageToDataIdx[ElementIdx / PageSize]);
+      return *((ElementIdx % PageSize) + PagePtr);
     }
 
     // Equality operator.
@@ -248,8 +247,8 @@ class PagedVector {
         return true;
       // Get the pages of the two iterators. If between the two pages there
       // are no valid pages, we can condider the iterators equal.
-      std::size_t PageMin = std::min(ElementIdx, Other.ElementIdx) / PAGE_SIZE;
-      std::size_t PageMax = std::max(ElementIdx, Other.ElementIdx) / PAGE_SIZE;
+      std::size_t PageMin = std::min(ElementIdx, Other.ElementIdx) / PageSize;
+      std::size_t PageMax = std::max(ElementIdx, Other.ElementIdx) / PageSize;
       // If the two pages are past the end, the iterators are equal.
       if (PageMin >= PV->PageToDataIdx.size())
         return true;
@@ -275,11 +274,9 @@ class PagedVector {
 
       // If the two pages are different, the iterators are equal if all the
       // pages between them are invalid.
-      for (std::size_t PageIdx = PageMin; PageIdx < PageMax; ++PageIdx)
-        if (PV->PageToDataIdx[PageIdx] != InvalidPage)
-          return false;
-
-      return true;
+      return std::all_of(PV->PageToDataIdx.begin() + PageMin,
+                         PV->PageToDataIdx.begin() + PageMax,
+                         [](uintptr_t Page) { return Page == InvalidPage; });
     }
 
     bool operator!=(MaterialisedIterator const &Other) const {

>From ec1226e92aa7e7ca80c8e7d8dc0f24fbdfae6b3f Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+ktf at users.noreply.github.com>
Date: Tue, 19 Sep 2023 23:59:29 +0200
Subject: [PATCH 08/14] fix

---
 llvm/include/llvm/ADT/PagedVector.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/include/llvm/ADT/PagedVector.h b/llvm/include/llvm/ADT/PagedVector.h
index dcda0eead637c38..b7e05a4a42edcfc 100644
--- a/llvm/include/llvm/ADT/PagedVector.h
+++ b/llvm/include/llvm/ADT/PagedVector.h
@@ -292,8 +292,8 @@ class PagedVector {
   // one element of a page to materialise all the elements of the page.
   MaterialisedIterator materialisedBegin() const {
     // Look for the first valid page
-    for (std::size_t ElementIdx = 0; ElementIdx < Size; ElementIdx += PAGE_SIZE)
-      if (PageToDataIdx[ElementIdx / PAGE_SIZE] != InvalidPage)
+    for (std::size_t ElementIdx = 0; ElementIdx < Size; ElementIdx += PageSize)
+      if (PageToDataIdx[ElementIdx / PageSize] != InvalidPage)
         return MaterialisedIterator(this, ElementIdx);
 
     return MaterialisedIterator(this, Size);

>From 813614e01be2786e8d396be65b2fb38abfd8d3cc Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+ktf at users.noreply.github.com>
Date: Wed, 20 Sep 2023 00:10:48 +0200
Subject: [PATCH 09/14] fix

---
 llvm/unittests/ADT/PagedVectorTest.cpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/llvm/unittests/ADT/PagedVectorTest.cpp b/llvm/unittests/ADT/PagedVectorTest.cpp
index c6a5da2abc210cc..b04fe37d3c0bdea 100644
--- a/llvm/unittests/ADT/PagedVectorTest.cpp
+++ b/llvm/unittests/ADT/PagedVectorTest.cpp
@@ -69,6 +69,9 @@ TEST(PagedVectorTest, HalfPageFillingTest) {
   for (int I = 0; I < 5; ++I) {
     EXPECT_EQ(V[I], I);
   }
+  for (int I = 5; I < 10; ++I) {
+    EXPECT_DEATH(V[I], "(Index < Size)");
+  }
 }
 
 TEST(PagedVectorTest, FillFullMultiPageTest) {
@@ -81,6 +84,10 @@ TEST(PagedVectorTest, FillFullMultiPageTest) {
     V[I] = I;
   }
   EXPECT_EQ(std::distance(V.materialisedBegin(), V.materialisedEnd()), 20LL);
+  for (auto MI = V.materialisedBegin(), ME = V.materialisedEnd(); MI != ME;
+       ++MI) {
+    EXPECT_EQ(*MI, std::distance(V.materialisedBegin(), MI));
+  }
 }
 
 TEST(PagedVectorTest, FillHalfMultiPageTest) {

>From c662561af572f4b7d80887fff3e9507910372ca3 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+ktf at users.noreply.github.com>
Date: Wed, 20 Sep 2023 00:18:42 +0200
Subject: [PATCH 10/14] fix

---
 llvm/include/llvm/ADT/PagedVector.h | 26 --------------------------
 1 file changed, 26 deletions(-)

diff --git a/llvm/include/llvm/ADT/PagedVector.h b/llvm/include/llvm/ADT/PagedVector.h
index b7e05a4a42edcfc..9fe14f32eb68d37 100644
--- a/llvm/include/llvm/ADT/PagedVector.h
+++ b/llvm/include/llvm/ADT/PagedVector.h
@@ -202,32 +202,6 @@ class PagedVector {
       return Copy;
     }
 
-    std::ptrdiff_t operator-(MaterialisedIterator const &Other) const {
-      assert(PV == Other.PV);
-      // If they are on the same table we can just subtract the indices.
-      // Otherwise we have to iterate over the pages to find the difference.
-      // If a page is invalid, we skip it.
-      if (PV == Other.PV)
-        return ElementIdx - Other.ElementIdx;
-
-      std::size_t ElementMin = std::min(ElementIdx, Other.ElementIdx);
-      std::size_t ElementMax = std::max(ElementIdx, Other.ElementIdx);
-      std::size_t PageMin = ElementMin / PageSize;
-      std::size_t PageMax = ElementMax / PageSize;
-
-      std::size_t Count = 0;
-      for (std::size_t PageIdx = PageMin; PageIdx < PageMax; ++PageIdx) {
-        if (PV->PageToDataIdx[PageIdx] == InvalidPage)
-          continue;
-
-        Count += PageSize;
-      }
-      Count += ElementMax % PageSize;
-      Count += PageSize - ElementMin % PageSize;
-
-      return Count;
-    }
-
     // When dereferencing the iterator, we materialise the page if needed.
     T const &operator*() const {
       assert(ElementIdx < PV->Size);

>From 5e619e93e94286b4a11cf25b31453b3f990b5e7f Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+ktf at users.noreply.github.com>
Date: Wed, 20 Sep 2023 00:29:00 +0200
Subject: [PATCH 11/14] fix

---
 llvm/include/llvm/ADT/PagedVector.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/llvm/include/llvm/ADT/PagedVector.h b/llvm/include/llvm/ADT/PagedVector.h
index 9fe14f32eb68d37..0c59b6b6316b3f7 100644
--- a/llvm/include/llvm/ADT/PagedVector.h
+++ b/llvm/include/llvm/ADT/PagedVector.h
@@ -160,8 +160,11 @@ class PagedVector {
     if (Allocator.getInt() == true)
       Allocator.getPointer()->Reset();
     else
-      for (uintptr_t Page : PageToDataIdx)
+      for (uintptr_t Page : PageToDataIdx) {
+        for (std::size_t I = 0; I < PageSize; ++I)
+          reinterpret_cast<T *>(Page)[I].~T();
         Allocator.getPointer()->Deallocate(reinterpret_cast<T *>(Page));
+      }
 
     PageToDataIdx.clear();
   }

>From 3c75cc0182200b797e07f1020dfc3870255bb134 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+ktf at users.noreply.github.com>
Date: Wed, 20 Sep 2023 00:34:33 +0200
Subject: [PATCH 12/14] fix

---
 llvm/include/llvm/ADT/PagedVector.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/llvm/include/llvm/ADT/PagedVector.h b/llvm/include/llvm/ADT/PagedVector.h
index 0c59b6b6316b3f7..274d63955cfcf3d 100644
--- a/llvm/include/llvm/ADT/PagedVector.h
+++ b/llvm/include/llvm/ADT/PagedVector.h
@@ -55,7 +55,9 @@ class PagedVector {
 
   // Default constructor. We build our own allocator.
   PagedVector() : Allocator(new BumpPtrAllocator, true) {}
-  PagedVector(BumpPtrAllocator *A) : Allocator(A, false) {}
+  PagedVector(BumpPtrAllocator *A) : Allocator(A, false) {
+    assert(A != nullptr && "Allocator cannot be null");
+  }
 
   ~PagedVector() {
     clear();

>From b778cc5f77e59d1afd98571ffdf510b6859b1dd0 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+ktf at users.noreply.github.com>
Date: Wed, 20 Sep 2023 00:38:21 +0200
Subject: [PATCH 13/14] Apply suggestions from code review

Co-authored-by: Jakub Kuderski <kubakuderski at gmail.com>
Co-authored-by: Richard Smith <richard at metafoo.co.uk>
---
 llvm/include/llvm/ADT/PagedVector.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/llvm/include/llvm/ADT/PagedVector.h b/llvm/include/llvm/ADT/PagedVector.h
index 274d63955cfcf3d..5f2da0399c5c0ab 100644
--- a/llvm/include/llvm/ADT/PagedVector.h
+++ b/llvm/include/llvm/ADT/PagedVector.h
@@ -53,7 +53,7 @@ class PagedVector {
 public:
   using value_type = T;
 
-  // Default constructor. We build our own allocator.
+  // Default constructor. We build our own allocator and mark it as such with `true` in the second pair element.
   PagedVector() : Allocator(new BumpPtrAllocator, true) {}
   PagedVector(BumpPtrAllocator *A) : Allocator(A, false) {
     assert(A != nullptr && "Allocator cannot be null");
@@ -66,7 +66,7 @@ class PagedVector {
       delete Allocator.getPointer();
   }
 
-  // Lookup an element at position i.
+  // Look up an element at position `Index`.
   // If the associated page is not filled, it will be filled with default
   // constructed elements. If the associated page is filled, return the element.
   T &operator[](std::size_t Index) const {
@@ -105,7 +105,7 @@ class PagedVector {
     // Handle shrink case: delete the pages and update the size.
     if (NewSize < Size) {
       std::size_t NewLastPage = (NewSize - 1) / PageSize;
-      for (std::size_t I = NewLastPage + 1; I < PageToDataIdx.size(); ++I) {
+      for (std::size_t I = NewLastPage + 1, E = PageToDataIdx.size(); I < E; ++I) {
         uintptr_t PagePtr = PageToDataIdx[I];
         if (PagePtr == InvalidPage)
           continue;
@@ -115,7 +115,7 @@ class PagedVector {
           Page[J].~T();
         Allocator.getPointer()->Deallocate(Page);
       }
-      // Delete the extra ones in the new last page.
+      // Destroy the extra ones in the new last page.
       uintptr_t PagePtr = PageToDataIdx[NewLastPage];
       if (PagePtr != InvalidPage) {
         T *Page = reinterpret_cast<T *>(PagePtr);
@@ -130,6 +130,7 @@ class PagedVector {
           Page[J].~T();
       }
       PageToDataIdx.resize(NewLastPage + 1);
+      return;
     }
     Size = NewSize;
     // If the capacity is enough, just update the size and continue

>From 9fab34e734082745031d41556af7fdbfec900420 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+ktf at users.noreply.github.com>
Date: Wed, 20 Sep 2023 08:34:46 +0200
Subject: [PATCH 14/14] Fix wrong return

---
 llvm/include/llvm/ADT/PagedVector.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/include/llvm/ADT/PagedVector.h b/llvm/include/llvm/ADT/PagedVector.h
index 5f2da0399c5c0ab..4c160cc507750f6 100644
--- a/llvm/include/llvm/ADT/PagedVector.h
+++ b/llvm/include/llvm/ADT/PagedVector.h
@@ -130,7 +130,6 @@ class PagedVector {
           Page[J].~T();
       }
       PageToDataIdx.resize(NewLastPage + 1);
-      return;
     }
     Size = NewSize;
     // If the capacity is enough, just update the size and continue



More information about the cfe-commits mailing list