[llvm] Introduce paged vector (PR #66430)

Giulio Eulisse via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 15 03:41:41 PDT 2023


https://github.com/ktf updated https://github.com/llvm/llvm-project/pull/66430

>From b952f0577dfe8112f394bd5392212f843c0cc86e Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+ktf at users.noreply.github.com>
Date: Thu, 14 Sep 2023 21:58:21 +0200
Subject: [PATCH 01/12] Introduce PagedVector class

The goal of the class is to be an (almost) drop in replacement for
SmallVector and std::vector when those are presized and filled later,
as it happens in SourceManager and ASTReader.

By splitting the actual vector in pages of the same size and allocating
the pages only when they are needed, using this containers reduces the
memory usage by a factor 4 for the cases relevant to the ALICE
experiment ROOT / cling usage.
---
 llvm/include/llvm/ADT/PagedVector.h | 96 +++++++++++++++++++++++++++++
 1 file changed, 96 insertions(+)
 create mode 100644 llvm/include/llvm/ADT/PagedVector.h

diff --git a/llvm/include/llvm/ADT/PagedVector.h b/llvm/include/llvm/ADT/PagedVector.h
new file mode 100644
index 000000000000000..dab0d249aa706e4
--- /dev/null
+++ b/llvm/include/llvm/ADT/PagedVector.h
@@ -0,0 +1,96 @@
+//===- llvm/ADT/PagedVector.h - 'Lazyly allocated' vectors --------*- C++
+//-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PagedVector class.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_ADT_PAGEDVECTOR_H
+#define LLVM_ADT_PAGEDVECTOR_H
+
+#include <vector>
+
+// Notice that this does not have iterators, because if you
+// have iterators it probably means you are going to touch
+// all the memory in any case, so better use a std::vector in
+// the first place.
+template <typename T, int PAGE_SIZE = 1024 / sizeof(T)> class PagedVector {
+  size_t Size = 0;
+  // Index of where to find a given page in the data
+  mutable std::vector<int> Lookup;
+  // Actual data
+  mutable std::vector<T> Data;
+
+public:
+  // Add a range to the vector.
+  // When vector is accessed, it will call the callback to fill the range
+  // with data.
+
+  // Lookup an element at position i.
+  // If the given range is not filled, it will be filled.
+  // If the given range is filled, return the element.
+  T &operator[](int Index) const { return at(Index); }
+
+  T &at(int Index) const {
+    auto &PageId = Lookup[Index / PAGE_SIZE];
+    // If the range is not filled, fill it
+    if (PageId == -1) {
+      int OldSize = Data.size();
+      PageId = OldSize / PAGE_SIZE;
+      // Allocate the memory
+      Data.resize(OldSize + PAGE_SIZE);
+      // Fill the whole capacity with empty elements
+      for (int I = 0; I < PAGE_SIZE; ++I) {
+        Data[I + OldSize] = T();
+      }
+    }
+    // Return the element
+    return Data[Index % PAGE_SIZE + PAGE_SIZE * PageId];
+  }
+
+  // Return the size of the vector
+  size_t capacity() const { return Lookup.size() * PAGE_SIZE; }
+
+  size_t size() const { return Size; }
+
+  // Expands the vector to the given size.
+  // If the vector is already bigger, does nothing.
+  void expand(size_t NewSize) {
+    // You cannot shrink the vector, otherwise
+    // you would have to invalidate
+    assert(NewSize >= Size);
+    if (NewSize <= Size) {
+      return;
+    }
+    if (NewSize <= capacity()) {
+      Size = NewSize;
+      return;
+    }
+    auto Pages = NewSize / PAGE_SIZE;
+    auto Remainder = NewSize % PAGE_SIZE;
+    if (Remainder) {
+      Pages += 1;
+    }
+    assert(Pages > Lookup.size());
+    Lookup.resize(Pages, -1);
+    Size = NewSize;
+  }
+
+  // Return true if the vector is empty
+  bool empty() const { return Size == 0; }
+  /// Clear the vector
+  void clear() {
+    Size = 0;
+    Lookup.clear();
+    Data.clear();
+  }
+
+  std::vector<T> const &materialised() const { return Data; }
+};
+
+#endif // LLVM_ADT_PAGEDVECTOR_H

>From f0c75c8af0fea27b5b758e2e5775787f33595de3 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+ktf at users.noreply.github.com>
Date: Thu, 14 Sep 2023 22:20:29 +0200
Subject: [PATCH 02/12] Use PagedVector to reduce memory usage of sparsely
 populated vectors

---
 clang/include/clang/Basic/SourceManager.h     |  3 ++-
 clang/include/clang/Serialization/ASTReader.h |  5 +++--
 clang/lib/Basic/SourceManager.cpp             | 12 ++++++------
 clang/lib/Serialization/ASTReader.cpp         |  9 +++++----
 4 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/clang/include/clang/Basic/SourceManager.h b/clang/include/clang/Basic/SourceManager.h
index 2f846502d6f3327..b1942a3d86afc37 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -43,6 +43,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
@@ -699,7 +700,7 @@ class SourceManager : public RefCountedBase<SourceManager> {
   ///
   /// Negative FileIDs are indexes into this table. To get from ID to an index,
   /// use (-ID - 2).
-  SmallVector<SrcMgr::SLocEntry, 0> LoadedSLocEntryTable;
+  PagedVector<SrcMgr::SLocEntry> LoadedSLocEntryTable;
 
   /// The starting offset of the next local SLocEntry.
   ///
diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h
index dc1eb21c27801fe..567aecc8246e761 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -38,6 +38,7 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -487,7 +488,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the type with
   /// ID = (I + 1) << FastQual::Width has already been loaded
-  std::vector<QualType> TypesLoaded;
+  PagedVector<QualType> TypesLoaded;
 
   using GlobalTypeMapType =
       ContinuousRangeMap<serialization::TypeID, ModuleFile *, 4>;
@@ -501,7 +502,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the declaration with ID
   /// = I + 1 has already been loaded.
-  std::vector<Decl *> DeclsLoaded;
+  PagedVector<Decl *> DeclsLoaded;
 
   using GlobalDeclMapType =
       ContinuousRangeMap<serialization::DeclID, ModuleFile *, 4>;
diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp
index 0521ac7b30339ab..c028afe63ac85ad 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -458,7 +458,7 @@ SourceManager::AllocateLoadedSLocEntries(unsigned NumSLocEntries,
       CurrentLoadedOffset - TotalSize < NextLocalOffset) {
     return std::make_pair(0, 0);
   }
-  LoadedSLocEntryTable.resize(LoadedSLocEntryTable.size() + NumSLocEntries);
+  LoadedSLocEntryTable.expand(LoadedSLocEntryTable.size() + NumSLocEntries);
   SLocEntryLoaded.resize(LoadedSLocEntryTable.size());
   CurrentLoadedOffset -= TotalSize;
   int ID = LoadedSLocEntryTable.size();
@@ -2344,11 +2344,11 @@ SourceManager::MemoryBufferSizes SourceManager::getMemoryBufferSizes() const {
 }
 
 size_t SourceManager::getDataStructureSizes() const {
-  size_t size = llvm::capacity_in_bytes(MemBufferInfos)
-    + llvm::capacity_in_bytes(LocalSLocEntryTable)
-    + llvm::capacity_in_bytes(LoadedSLocEntryTable)
-    + llvm::capacity_in_bytes(SLocEntryLoaded)
-    + llvm::capacity_in_bytes(FileInfos);
+  size_t size = llvm::capacity_in_bytes(MemBufferInfos) +
+                llvm::capacity_in_bytes(LocalSLocEntryTable) +
+                llvm::capacity_in_bytes(LoadedSLocEntryTable.materialised()) +
+                llvm::capacity_in_bytes(SLocEntryLoaded) +
+                llvm::capacity_in_bytes(FileInfos);
 
   if (OverriddenFilesInfo)
     size += llvm::capacity_in_bytes(OverriddenFilesInfo->OverriddenFiles);
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 0952244d037a77c..18b1a9a2480a73e 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -3260,7 +3260,7 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
           std::make_pair(LocalBaseTypeIndex,
                          F.BaseTypeIndex - LocalBaseTypeIndex));
 
-        TypesLoaded.resize(TypesLoaded.size() + F.LocalNumTypes);
+        TypesLoaded.expand(TypesLoaded.size() + F.LocalNumTypes);
       }
       break;
     }
@@ -3290,7 +3290,7 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
         // module.
         F.GlobalToLocalDeclIDs[&F] = LocalBaseDeclID;
 
-        DeclsLoaded.resize(DeclsLoaded.size() + F.LocalNumDecls);
+        DeclsLoaded.expand(DeclsLoaded.size() + F.LocalNumDecls);
       }
       break;
     }
@@ -7944,9 +7944,10 @@ void ASTReader::PrintStats() {
   std::fprintf(stderr, "*** AST File Statistics:\n");
 
   unsigned NumTypesLoaded =
-      TypesLoaded.size() - llvm::count(TypesLoaded, QualType());
+      TypesLoaded.size() - llvm::count(TypesLoaded.materialised(), QualType());
   unsigned NumDeclsLoaded =
-      DeclsLoaded.size() - llvm::count(DeclsLoaded, (Decl *)nullptr);
+      DeclsLoaded.size() -
+      llvm::count(DeclsLoaded.materialised(), (Decl *)nullptr);
   unsigned NumIdentifiersLoaded =
       IdentifiersLoaded.size() -
       llvm::count(IdentifiersLoaded, (IdentifierInfo *)nullptr);

>From 6d5095809dcb95fbad4231925e4b8c08d094143c Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+ktf at users.noreply.github.com>
Date: Fri, 15 Sep 2023 09:30:15 +0200
Subject: [PATCH 03/12] Update documentation

---
 llvm/docs/ProgrammersManual.rst | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/llvm/docs/ProgrammersManual.rst b/llvm/docs/ProgrammersManual.rst
index 43dd985d9779ed2..8c26e01728b6910 100644
--- a/llvm/docs/ProgrammersManual.rst
+++ b/llvm/docs/ProgrammersManual.rst
@@ -1625,6 +1625,35 @@ SmallVector has grown a few other minor advantages over std::vector, causing
    and is no longer "private to the implementation". A name like
    ``SmallVectorHeader`` might be more appropriate.
 
+.. _dss_smallvector:
+
+llvm/ADT/PagedVector.h
+^^^^^^^^^^^^^^^^^^^^^^
+
+``PagedVector<Type, PageSize>`` is a random access container that allocates (PageSize) elements
+of type Type when the first element of a page is accessed via the ``operator[]`` or the ``at()``
+method.  This is useful for the case in which the number of elements is known in advance and 
+their actual initialization is expensive and sparse so that it's only done lazily when the element is 
+accessed. Typical cases for this are the ASTReader::TypesLoaded and ASTReader::DeclsLoaded containers.
+
+The main advantage is that a PagedVector allows to delay the actual allocation of the page until it's needed,
+at the extra cost of one integer per page and one extra indirection when accessing elements with their positional
+index. 
+
+In order to maximise the memory footprint of this container, it's important to balance the PageSize so that 
+it's not too small (otherwise the overhead of the integer per page might become too high) and not too big 
+(otherwise the memory is wasted if the page is not fully used).
+
+The PagedVector can only be expanded at the end, and it's not possible to shrink it, to keep the implementation
+simple and not give the false hope that the resize operation is cheap.
+
+Moreover, while retaining the oder of the elements based on their insertion index, like a vector, iterating over
+the elements via begin() and end() is not provided in the API, due to the fact accessing the elements in order would
+allocate all the iterated pages, defeating memory savings and the purpose of the PagedVector.
+
+Finally a ``materialised()`` method is provided to access the elements associated to the accessed pages, which could
+speedup operations that need to iterate over initialized elements in a non-ordered manner.
+
 .. _dss_vector:
 
 <vector>

>From b5dc8ed4ae7ccd7702e5a3065b62fb4675f36084 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+ktf at users.noreply.github.com>
Date: Fri, 15 Sep 2023 09:32:51 +0200
Subject: [PATCH 04/12] Improve comments

---
 llvm/include/llvm/ADT/PagedVector.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/llvm/include/llvm/ADT/PagedVector.h b/llvm/include/llvm/ADT/PagedVector.h
index dab0d249aa706e4..635f2fd0829b36b 100644
--- a/llvm/include/llvm/ADT/PagedVector.h
+++ b/llvm/include/llvm/ADT/PagedVector.h
@@ -15,6 +15,9 @@
 
 #include <vector>
 
+// A vector that allocates memory in pages.
+// Order is kept, but memory is allocated only when one element of the page is
+// accessed.
 // Notice that this does not have iterators, because if you
 // have iterators it probably means you are going to touch
 // all the memory in any case, so better use a std::vector in
@@ -23,17 +26,13 @@ template <typename T, int PAGE_SIZE = 1024 / sizeof(T)> class PagedVector {
   size_t Size = 0;
   // Index of where to find a given page in the data
   mutable std::vector<int> Lookup;
-  // Actual data
+  // Actual page data
   mutable std::vector<T> Data;
 
 public:
-  // Add a range to the vector.
-  // When vector is accessed, it will call the callback to fill the range
-  // with data.
-
   // Lookup an element at position i.
-  // If the given range is not filled, it will be filled.
-  // If the given range is filled, return the element.
+  // If the associated page is not filled, it will be filled with default
+  // constructed elements. If the associated page is filled, return the element.
   T &operator[](int Index) const { return at(Index); }
 
   T &at(int Index) const {
@@ -83,6 +82,7 @@ template <typename T, int PAGE_SIZE = 1024 / sizeof(T)> class PagedVector {
 
   // Return true if the vector is empty
   bool empty() const { return Size == 0; }
+
   /// Clear the vector
   void clear() {
     Size = 0;

>From ab3d0c107f08f89d15e710297095456ebef61755 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+ktf at users.noreply.github.com>
Date: Fri, 15 Sep 2023 09:49:54 +0200
Subject: [PATCH 05/12] Improve comments

---
 llvm/include/llvm/ADT/PagedVector.h | 49 ++++++++++++++++++++++-------
 1 file changed, 38 insertions(+), 11 deletions(-)

diff --git a/llvm/include/llvm/ADT/PagedVector.h b/llvm/include/llvm/ADT/PagedVector.h
index 635f2fd0829b36b..4aa59e8deed7b1c 100644
--- a/llvm/include/llvm/ADT/PagedVector.h
+++ b/llvm/include/llvm/ADT/PagedVector.h
@@ -17,24 +17,32 @@
 
 // A vector that allocates memory in pages.
 // Order is kept, but memory is allocated only when one element of the page is
-// accessed.
+// accessed. This introduces a level of indirection, but it is useful when you
+// have a sparsely initialised vector like for the case of ASTReader::DeclsLoaded
+// or ASTReader::TypesLoaded.
+//
 // Notice that this does not have iterators, because if you
 // have iterators it probably means you are going to touch
 // all the memory in any case, so better use a std::vector in
 // the first place.
 template <typename T, int PAGE_SIZE = 1024 / sizeof(T)> class PagedVector {
+  // The actual number of element in the vector which can be accessed.
   size_t Size = 0;
-  // Index of where to find a given page in the data
+  // The position of the initial element of the page in the Data vector.
+  // Pages are allocated contiguously in the Data vector.
   mutable std::vector<int> Lookup;
-  // Actual page data
+  // Actual page data. All the page elements are added to this vector on the
+  // first access of any of the elements of the page. Elements default constructed
+  // and elements of the page are stored contiguously. The oder of the elements however
+  // depends on the order of access of the pages.
   mutable std::vector<T> Data;
-
 public:
+  // Lookup an element at position Index.
+  T &operator[](int Index) const { return at(Index); }
+
   // Lookup an element at position i.
   // If the associated page is not filled, it will be filled with default
   // constructed elements. If the associated page is filled, return the element.
-  T &operator[](int Index) const { return at(Index); }
-
   T &at(int Index) const {
     auto &PageId = Lookup[Index / PAGE_SIZE];
     // If the range is not filled, fill it
@@ -52,30 +60,44 @@ template <typename T, int PAGE_SIZE = 1024 / sizeof(T)> class PagedVector {
     return Data[Index % PAGE_SIZE + PAGE_SIZE * PageId];
   }
 
-  // Return the size of the vector
+  // Return the capacity of the vector. I.e. the maximum size it can be expanded
+  // to with the expand method without allocating more pages.
   size_t capacity() const { return Lookup.size() * PAGE_SIZE; }
 
+  // Return the size of the vector. I.e. the maximum index that can be
+  // accessed, i.e. the maximum value which was used as argument of the
+  // expand method.
   size_t size() const { return Size; }
 
-  // Expands the vector to the given size.
-  // If the vector is already bigger, does nothing.
+  // Expands the vector to the given NewSize number of elements.
+  // If the vector was smaller, allocates new pages as needed.
+  // It should be called only with NewSize >= Size.
   void expand(size_t NewSize) {
     // You cannot shrink the vector, otherwise
-    // you would have to invalidate
+    // one would have to invalidate contents which is expensive and
+    // while giving the false hope that the resize is cheap.
     assert(NewSize >= Size);
     if (NewSize <= Size) {
       return;
     }
+    // If the capacity is enough, just update the size and continue
+    // with the currently allocated pages.
     if (NewSize <= capacity()) {
       Size = NewSize;
       return;
     }
+    // The number of pages to allocate. The Remainder is calculated
+    // for the case in which the NewSize is not a multiple of PAGE_SIZE.
+    // In that case we need one more page.
     auto Pages = NewSize / PAGE_SIZE;
     auto Remainder = NewSize % PAGE_SIZE;
     if (Remainder) {
       Pages += 1;
     }
     assert(Pages > Lookup.size());
+    // We use -1 to indicate that a page has not been allocated yet.
+    // This cannot be 0, because 0 is a valid page id.
+    // We use -1 instead of a separate bool to avoid wasting space.
     Lookup.resize(Pages, -1);
     Size = NewSize;
   }
@@ -83,13 +105,18 @@ template <typename T, int PAGE_SIZE = 1024 / sizeof(T)> class PagedVector {
   // Return true if the vector is empty
   bool empty() const { return Size == 0; }
 
-  /// Clear the vector
+  /// Clear the vector, i.e. clear the allocated pages, the whole page
+  /// lookup index and reset the size.
   void clear() {
     Size = 0;
     Lookup.clear();
     Data.clear();
   }
 
+  /// Return the materialised vector. This is useful if you want to iterate
+  /// in an efficient way over the non default constructed elements.
+  /// It's not called data() because that would be misleading, since only
+  /// elements for pages which have been accessed are actually allocated.
   std::vector<T> const &materialised() const { return Data; }
 };
 

>From fd3e94777b2ca01113da7a49a1dcd4ecfd895010 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+ktf at users.noreply.github.com>
Date: Fri, 15 Sep 2023 10:04:34 +0200
Subject: [PATCH 06/12] Proper accounting for loaded types and decl

---
 clang/lib/Serialization/ASTReader.cpp |  5 ++---
 llvm/include/llvm/ADT/PagedVector.h   | 11 ++++++-----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 18b1a9a2480a73e..500f629a9f1e2cd 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -7944,10 +7944,9 @@ void ASTReader::PrintStats() {
   std::fprintf(stderr, "*** AST File Statistics:\n");
 
   unsigned NumTypesLoaded =
-      TypesLoaded.size() - llvm::count(TypesLoaded.materialised(), QualType());
+      llvm::count_if(TypesLoaded.materialised(), [](QualType& item) { return item != QualType(); });
   unsigned NumDeclsLoaded =
-      DeclsLoaded.size() -
-      llvm::count(DeclsLoaded.materialised(), (Decl *)nullptr);
+      llvm::count_if(DeclsLoaded.materialised(), [](Decl* item) { return item != (Decl *)nullptr);
   unsigned NumIdentifiersLoaded =
       IdentifiersLoaded.size() -
       llvm::count(IdentifiersLoaded, (IdentifierInfo *)nullptr);
diff --git a/llvm/include/llvm/ADT/PagedVector.h b/llvm/include/llvm/ADT/PagedVector.h
index 4aa59e8deed7b1c..b1d643a282339ea 100644
--- a/llvm/include/llvm/ADT/PagedVector.h
+++ b/llvm/include/llvm/ADT/PagedVector.h
@@ -18,8 +18,8 @@
 // A vector that allocates memory in pages.
 // Order is kept, but memory is allocated only when one element of the page is
 // accessed. This introduces a level of indirection, but it is useful when you
-// have a sparsely initialised vector like for the case of ASTReader::DeclsLoaded
-// or ASTReader::TypesLoaded.
+// have a sparsely initialised vector like for the case of
+// ASTReader::DeclsLoaded or ASTReader::TypesLoaded.
 //
 // Notice that this does not have iterators, because if you
 // have iterators it probably means you are going to touch
@@ -32,10 +32,11 @@ template <typename T, int PAGE_SIZE = 1024 / sizeof(T)> class PagedVector {
   // Pages are allocated contiguously in the Data vector.
   mutable std::vector<int> Lookup;
   // Actual page data. All the page elements are added to this vector on the
-  // first access of any of the elements of the page. Elements default constructed
-  // and elements of the page are stored contiguously. The oder of the elements however
-  // depends on the order of access of the pages.
+  // first access of any of the elements of the page. Elements default
+  // constructed and elements of the page are stored contiguously. The oder of
+  // the elements however depends on the order of access of the pages.
   mutable std::vector<T> Data;
+
 public:
   // Lookup an element at position Index.
   T &operator[](int Index) const { return at(Index); }

>From 86a9af8b330a7799c8cd33be581bdf4177e11972 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+ktf at users.noreply.github.com>
Date: Fri, 15 Sep 2023 10:41:08 +0200
Subject: [PATCH 07/12] Add unit test

---
 llvm/include/llvm/ADT/PagedVector.h    |  3 +-
 llvm/unittests/ADT/CMakeLists.txt      |  1 +
 llvm/unittests/ADT/PagedVectorTest.cpp | 84 ++++++++++++++++++++++++++
 3 files changed, 87 insertions(+), 1 deletion(-)
 create mode 100644 llvm/unittests/ADT/PagedVectorTest.cpp

diff --git a/llvm/include/llvm/ADT/PagedVector.h b/llvm/include/llvm/ADT/PagedVector.h
index b1d643a282339ea..ec133b8f26f048f 100644
--- a/llvm/include/llvm/ADT/PagedVector.h
+++ b/llvm/include/llvm/ADT/PagedVector.h
@@ -15,6 +15,7 @@
 
 #include <vector>
 
+namespace llvm {
 // A vector that allocates memory in pages.
 // Order is kept, but memory is allocated only when one element of the page is
 // accessed. This introduces a level of indirection, but it is useful when you
@@ -120,5 +121,5 @@ template <typename T, int PAGE_SIZE = 1024 / sizeof(T)> class PagedVector {
   /// elements for pages which have been accessed are actually allocated.
   std::vector<T> const &materialised() const { return Data; }
 };
-
+} // namespace llvm
 #endif // LLVM_ADT_PAGEDVECTOR_H
diff --git a/llvm/unittests/ADT/CMakeLists.txt b/llvm/unittests/ADT/CMakeLists.txt
index c5190255ba7738e..9aa7120f30696da 100644
--- a/llvm/unittests/ADT/CMakeLists.txt
+++ b/llvm/unittests/ADT/CMakeLists.txt
@@ -51,6 +51,7 @@ add_llvm_unittest(ADTTests
   MapVectorTest.cpp
   MoveOnly.cpp
   PackedVectorTest.cpp
+  PagedVectorTest.cpp
   PointerEmbeddedIntTest.cpp
   PointerIntPairTest.cpp
   PointerSumTypeTest.cpp
diff --git a/llvm/unittests/ADT/PagedVectorTest.cpp b/llvm/unittests/ADT/PagedVectorTest.cpp
new file mode 100644
index 000000000000000..e4513c4e1a8ed5c
--- /dev/null
+++ b/llvm/unittests/ADT/PagedVectorTest.cpp
@@ -0,0 +1,84 @@
+//===- llvm/unittest/ADT/PagedVectorTest.cpp ------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// PagedVector unit tests.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/PagedVector.h"
+#include "gtest/gtest.h"
+
+namespace llvm {
+TEST(PagedVectorTest, FunctionalityTest) {
+  PagedVector<int, 10> V;
+
+  // Next ten numbers are 10..19
+  V.expand(2);
+  V.expand(10);
+  V.expand(20);
+  V.expand(30);
+  EXPECT_EQ(V.materialised().size(), 0ULL);
+
+  EXPECT_EQ(V.size(), 30ULL);
+  for (int I = 0; I < 10; ++I) {
+    V[I] = I;
+  }
+  for (int I = 0; I < 10; ++I) {
+    EXPECT_EQ(V[I], I);
+  }
+  EXPECT_EQ(V.materialised().size(), 10ULL);
+  for (int I = 20; I < 30; ++I) {
+    V[I] = I;
+  }
+  for (int I = 20; I < 30; ++I) {
+    EXPECT_EQ(V[I], I);
+  }
+  EXPECT_EQ(V.materialised().size(), 20ULL);
+
+  for (int I = 10; I < 20; ++I) {
+    V[I] = I;
+  }
+  for (int I = 10; I < 20; ++I) {
+    EXPECT_EQ(V[I], I);
+  }
+  EXPECT_EQ(V.materialised().size(), 30ULL);
+  V.expand(35);
+  EXPECT_EQ(V.materialised().size(), 30ULL);
+  for (int I = 30; I < 35; ++I) {
+    V[I] = I;
+  }
+  EXPECT_EQ(V.materialised().size(), 40ULL);
+  EXPECT_EQ(V.size(), 35ULL);
+  EXPECT_EQ(V.capacity(), 40ULL);
+  V.expand(37);
+  for (int I = 30; I < 37; ++I) {
+    V[I] = I;
+  }
+  EXPECT_EQ(V.size(), 37ULL);
+  EXPECT_EQ(V.capacity(), 40ULL);
+  for (int I = 0; I < 37; ++I) {
+    EXPECT_EQ(V[I], I);
+  }
+
+  V.expand(41);
+  V[40] = 40;
+  EXPECT_EQ(V.size(), 41ULL);
+  EXPECT_EQ(V.capacity(), 50ULL);
+  for (int I = 0; I < 36; ++I) {
+    EXPECT_EQ(V[I], I);
+  }
+  for (int I = 37; I < 40; ++I) {
+    EXPECT_EQ(V[I], 0);
+  }
+  V.expand(50);
+  EXPECT_EQ(V.capacity(), 50ULL);
+  EXPECT_EQ(V.size(), 50ULL);
+  EXPECT_EQ(V[40], 40);
+  V.expand(50ULL);
+}
+} // namespace llvm

>From ca1e218a4480cf37288331d1f6c3f573a4127b91 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+ktf at users.noreply.github.com>
Date: Fri, 15 Sep 2023 11:21:04 +0200
Subject: [PATCH 08/12] Addess comments

---
 llvm/docs/ProgrammersManual.rst     | 4 ++--
 llvm/include/llvm/ADT/PagedVector.h | 5 +++--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/llvm/docs/ProgrammersManual.rst b/llvm/docs/ProgrammersManual.rst
index 8c26e01728b6910..0dff4875972d5d8 100644
--- a/llvm/docs/ProgrammersManual.rst
+++ b/llvm/docs/ProgrammersManual.rst
@@ -1625,7 +1625,7 @@ SmallVector has grown a few other minor advantages over std::vector, causing
    and is no longer "private to the implementation". A name like
    ``SmallVectorHeader`` might be more appropriate.
 
-.. _dss_smallvector:
+.. _dss_pagedvector:
 
 llvm/ADT/PagedVector.h
 ^^^^^^^^^^^^^^^^^^^^^^
@@ -1634,7 +1634,7 @@ llvm/ADT/PagedVector.h
 of type Type when the first element of a page is accessed via the ``operator[]`` or the ``at()``
 method.  This is useful for the case in which the number of elements is known in advance and 
 their actual initialization is expensive and sparse so that it's only done lazily when the element is 
-accessed. Typical cases for this are the ASTReader::TypesLoaded and ASTReader::DeclsLoaded containers.
+accessed. When the number of used pages is small significant memory savings can be achieved.
 
 The main advantage is that a PagedVector allows to delay the actual allocation of the page until it's needed,
 at the extra cost of one integer per page and one extra indirection when accessing elements with their positional
diff --git a/llvm/include/llvm/ADT/PagedVector.h b/llvm/include/llvm/ADT/PagedVector.h
index ec133b8f26f048f..12edc17eeba956b 100644
--- a/llvm/include/llvm/ADT/PagedVector.h
+++ b/llvm/include/llvm/ADT/PagedVector.h
@@ -19,8 +19,9 @@ namespace llvm {
 // A vector that allocates memory in pages.
 // Order is kept, but memory is allocated only when one element of the page is
 // accessed. This introduces a level of indirection, but it is useful when you
-// have a sparsely initialised vector like for the case of
-// ASTReader::DeclsLoaded or ASTReader::TypesLoaded.
+// have a sparsely initialised vector where the full size is allocated upfront
+// with the default constructor and elements are initialised later, on first
+// access.
 //
 // Notice that this does not have iterators, because if you
 // have iterators it probably means you are going to touch

>From f2ebc3fcefd936420ffb0ce46019c9c83bcb82df Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+ktf at users.noreply.github.com>
Date: Fri, 15 Sep 2023 11:29:09 +0200
Subject: [PATCH 09/12] Fix missing namespaces

---
 clang/include/clang/Basic/SourceManager.h     | 2 +-
 clang/include/clang/Serialization/ASTReader.h | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/clang/include/clang/Basic/SourceManager.h b/clang/include/clang/Basic/SourceManager.h
index b1942a3d86afc37..e37caa2252532f9 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -700,7 +700,7 @@ class SourceManager : public RefCountedBase<SourceManager> {
   ///
   /// Negative FileIDs are indexes into this table. To get from ID to an index,
   /// use (-ID - 2).
-  PagedVector<SrcMgr::SLocEntry> LoadedSLocEntryTable;
+  llvm::PagedVector<SrcMgr::SLocEntry> LoadedSLocEntryTable;
 
   /// The starting offset of the next local SLocEntry.
   ///
diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h
index 567aecc8246e761..65e19c6e44cf571 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -488,7 +488,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the type with
   /// ID = (I + 1) << FastQual::Width has already been loaded
-  PagedVector<QualType> TypesLoaded;
+  llvm::PagedVector<QualType> TypesLoaded;
 
   using GlobalTypeMapType =
       ContinuousRangeMap<serialization::TypeID, ModuleFile *, 4>;
@@ -502,7 +502,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the declaration with ID
   /// = I + 1 has already been loaded.
-  PagedVector<Decl *> DeclsLoaded;
+  llvm::PagedVector<Decl *> DeclsLoaded;
 
   using GlobalDeclMapType =
       ContinuousRangeMap<serialization::DeclID, ModuleFile *, 4>;

>From 875e73690eecdcd529febba64b0c7f8728f58ac9 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+ktf at users.noreply.github.com>
Date: Fri, 15 Sep 2023 11:39:09 +0200
Subject: [PATCH 10/12] Address one more round of comments

---
 clang/lib/Serialization/ASTReader.cpp |  6 ++++--
 llvm/include/llvm/ADT/PagedVector.h   | 14 ++++++++++----
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 500f629a9f1e2cd..6e8893b19519eb4 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -7944,9 +7944,11 @@ void ASTReader::PrintStats() {
   std::fprintf(stderr, "*** AST File Statistics:\n");
 
   unsigned NumTypesLoaded =
-      llvm::count_if(TypesLoaded.materialised(), [](QualType& item) { return item != QualType(); });
+      llvm::count_if(TypesLoaded.materialised(),
+                     [](QualType const &item) { return item != QualType(); });
   unsigned NumDeclsLoaded =
-      llvm::count_if(DeclsLoaded.materialised(), [](Decl* item) { return item != (Decl *)nullptr);
+      llvm::count_if(DeclsLoaded.materialised(),
+                     [](Decl const *item) { return item != (Decl *)nullptr; });
   unsigned NumIdentifiersLoaded =
       IdentifiersLoaded.size() -
       llvm::count(IdentifiersLoaded, (IdentifierInfo *)nullptr);
diff --git a/llvm/include/llvm/ADT/PagedVector.h b/llvm/include/llvm/ADT/PagedVector.h
index 12edc17eeba956b..58961e7de0a164c 100644
--- a/llvm/include/llvm/ADT/PagedVector.h
+++ b/llvm/include/llvm/ADT/PagedVector.h
@@ -41,12 +41,14 @@ template <typename T, int PAGE_SIZE = 1024 / sizeof(T)> class PagedVector {
 
 public:
   // Lookup an element at position Index.
-  T &operator[](int Index) const { return at(Index); }
+  T &operator[](size_t Index) const { return at(Index); }
 
   // Lookup an element at position i.
   // If the associated page is not filled, it will be filled with default
   // constructed elements. If the associated page is filled, return the element.
-  T &at(int Index) const {
+  T &at(size_t Index) const {
+    assert(Index < Size);
+    assert(Index / PAGE_SIZE < Lookup.size());
     auto &PageId = Lookup[Index / PAGE_SIZE];
     // If the range is not filled, fill it
     if (PageId == -1) {
@@ -59,8 +61,13 @@ template <typename T, int PAGE_SIZE = 1024 / sizeof(T)> class PagedVector {
         Data[I + OldSize] = T();
       }
     }
+    // Calculate the actual position in the Data vector
+    // by taking the start of the page and adding the offset
+    // in the page.
+    size_t StoreIndex = Index % PAGE_SIZE + PAGE_SIZE * PageId;
     // Return the element
-    return Data[Index % PAGE_SIZE + PAGE_SIZE * PageId];
+    assert(StoreIndex < Data.size());
+    return Data[StoreIndex];
   }
 
   // Return the capacity of the vector. I.e. the maximum size it can be expanded
@@ -79,7 +86,6 @@ template <typename T, int PAGE_SIZE = 1024 / sizeof(T)> class PagedVector {
     // You cannot shrink the vector, otherwise
     // one would have to invalidate contents which is expensive and
     // while giving the false hope that the resize is cheap.
-    assert(NewSize >= Size);
     if (NewSize <= Size) {
       return;
     }

>From b7d30b4ca6aaa8d0589ef797c50d2a636efee5fe Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+ktf at users.noreply.github.com>
Date: Fri, 15 Sep 2023 12:28:19 +0200
Subject: [PATCH 11/12] Fix size_t to std::size_t

---
 llvm/include/llvm/ADT/PagedVector.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/llvm/include/llvm/ADT/PagedVector.h b/llvm/include/llvm/ADT/PagedVector.h
index 58961e7de0a164c..6e57467c03ef652 100644
--- a/llvm/include/llvm/ADT/PagedVector.h
+++ b/llvm/include/llvm/ADT/PagedVector.h
@@ -29,7 +29,7 @@ namespace llvm {
 // the first place.
 template <typename T, int PAGE_SIZE = 1024 / sizeof(T)> class PagedVector {
   // The actual number of element in the vector which can be accessed.
-  size_t Size = 0;
+  std::size_t Size = 0;
   // The position of the initial element of the page in the Data vector.
   // Pages are allocated contiguously in the Data vector.
   mutable std::vector<int> Lookup;
@@ -41,12 +41,12 @@ template <typename T, int PAGE_SIZE = 1024 / sizeof(T)> class PagedVector {
 
 public:
   // Lookup an element at position Index.
-  T &operator[](size_t Index) const { return at(Index); }
+  T &operator[](std::size_t Index) const { return at(Index); }
 
   // Lookup an element at position i.
   // If the associated page is not filled, it will be filled with default
   // constructed elements. If the associated page is filled, return the element.
-  T &at(size_t Index) const {
+  T &at(std::size_t Index) const {
     assert(Index < Size);
     assert(Index / PAGE_SIZE < Lookup.size());
     auto &PageId = Lookup[Index / PAGE_SIZE];
@@ -64,7 +64,7 @@ template <typename T, int PAGE_SIZE = 1024 / sizeof(T)> class PagedVector {
     // Calculate the actual position in the Data vector
     // by taking the start of the page and adding the offset
     // in the page.
-    size_t StoreIndex = Index % PAGE_SIZE + PAGE_SIZE * PageId;
+    std::size_t StoreIndex = Index % PAGE_SIZE + PAGE_SIZE * PageId;
     // Return the element
     assert(StoreIndex < Data.size());
     return Data[StoreIndex];
@@ -72,17 +72,17 @@ template <typename T, int PAGE_SIZE = 1024 / sizeof(T)> class PagedVector {
 
   // Return the capacity of the vector. I.e. the maximum size it can be expanded
   // to with the expand method without allocating more pages.
-  size_t capacity() const { return Lookup.size() * PAGE_SIZE; }
+  std::size_t capacity() const { return Lookup.size() * PAGE_SIZE; }
 
   // Return the size of the vector. I.e. the maximum index that can be
   // accessed, i.e. the maximum value which was used as argument of the
   // expand method.
-  size_t size() const { return Size; }
+  std::size_t size() const { return Size; }
 
   // Expands the vector to the given NewSize number of elements.
   // If the vector was smaller, allocates new pages as needed.
   // It should be called only with NewSize >= Size.
-  void expand(size_t NewSize) {
+  void expand(std::size_t NewSize) {
     // You cannot shrink the vector, otherwise
     // one would have to invalidate contents which is expensive and
     // while giving the false hope that the resize is cheap.

>From add6e61608b677f67a76106adac7f61d42a589d5 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+ktf at users.noreply.github.com>
Date: Fri, 15 Sep 2023 12:41:22 +0200
Subject: [PATCH 12/12] Add missing header on Linux

---
 llvm/include/llvm/ADT/PagedVector.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/include/llvm/ADT/PagedVector.h b/llvm/include/llvm/ADT/PagedVector.h
index 6e57467c03ef652..a0d86e135e87ccd 100644
--- a/llvm/include/llvm/ADT/PagedVector.h
+++ b/llvm/include/llvm/ADT/PagedVector.h
@@ -13,6 +13,7 @@
 #ifndef LLVM_ADT_PAGEDVECTOR_H
 #define LLVM_ADT_PAGEDVECTOR_H
 
+#include <cassert>
 #include <vector>
 
 namespace llvm {



More information about the llvm-commits mailing list