[clang] Introduce paged vector (PR #66430)

Giulio Eulisse via cfe-commits cfe-commits at lists.llvm.org
Thu Sep 14 13:48:58 PDT 2023


https://github.com/ktf created https://github.com/llvm/llvm-project/pull/66430:

None

>From b952f0577dfe8112f394bd5392212f843c0cc86e Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+ktf at users.noreply.github.com>
Date: Thu, 14 Sep 2023 21:58:21 +0200
Subject: [PATCH 1/2] Introduce PagedVector class

The goal of the class is to be an (almost) drop in replacement for
SmallVector and std::vector when those are presized and filled later,
as it happens in SourceManager and ASTReader.

By splitting the actual vector in pages of the same size and allocating
the pages only when they are needed, using this containers reduces the
memory usage by a factor 4 for the cases relevant to the ALICE
experiment ROOT / cling usage.
---
 llvm/include/llvm/ADT/PagedVector.h | 96 +++++++++++++++++++++++++++++
 1 file changed, 96 insertions(+)
 create mode 100644 llvm/include/llvm/ADT/PagedVector.h

diff --git a/llvm/include/llvm/ADT/PagedVector.h b/llvm/include/llvm/ADT/PagedVector.h
new file mode 100644
index 000000000000000..dab0d249aa706e4
--- /dev/null
+++ b/llvm/include/llvm/ADT/PagedVector.h
@@ -0,0 +1,96 @@
+//===- llvm/ADT/PagedVector.h - 'Lazyly allocated' vectors --------*- C++
+//-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PagedVector class.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_ADT_PAGEDVECTOR_H
+#define LLVM_ADT_PAGEDVECTOR_H
+
+#include <vector>
+
+// Notice that this does not have iterators, because if you
+// have iterators it probably means you are going to touch
+// all the memory in any case, so better use a std::vector in
+// the first place.
+template <typename T, int PAGE_SIZE = 1024 / sizeof(T)> class PagedVector {
+  size_t Size = 0;
+  // Index of where to find a given page in the data
+  mutable std::vector<int> Lookup;
+  // Actual data
+  mutable std::vector<T> Data;
+
+public:
+  // Add a range to the vector.
+  // When vector is accessed, it will call the callback to fill the range
+  // with data.
+
+  // Lookup an element at position i.
+  // If the given range is not filled, it will be filled.
+  // If the given range is filled, return the element.
+  T &operator[](int Index) const { return at(Index); }
+
+  T &at(int Index) const {
+    auto &PageId = Lookup[Index / PAGE_SIZE];
+    // If the range is not filled, fill it
+    if (PageId == -1) {
+      int OldSize = Data.size();
+      PageId = OldSize / PAGE_SIZE;
+      // Allocate the memory
+      Data.resize(OldSize + PAGE_SIZE);
+      // Fill the whole capacity with empty elements
+      for (int I = 0; I < PAGE_SIZE; ++I) {
+        Data[I + OldSize] = T();
+      }
+    }
+    // Return the element
+    return Data[Index % PAGE_SIZE + PAGE_SIZE * PageId];
+  }
+
+  // Return the size of the vector
+  size_t capacity() const { return Lookup.size() * PAGE_SIZE; }
+
+  size_t size() const { return Size; }
+
+  // Expands the vector to the given size.
+  // If the vector is already bigger, does nothing.
+  void expand(size_t NewSize) {
+    // You cannot shrink the vector, otherwise
+    // you would have to invalidate
+    assert(NewSize >= Size);
+    if (NewSize <= Size) {
+      return;
+    }
+    if (NewSize <= capacity()) {
+      Size = NewSize;
+      return;
+    }
+    auto Pages = NewSize / PAGE_SIZE;
+    auto Remainder = NewSize % PAGE_SIZE;
+    if (Remainder) {
+      Pages += 1;
+    }
+    assert(Pages > Lookup.size());
+    Lookup.resize(Pages, -1);
+    Size = NewSize;
+  }
+
+  // Return true if the vector is empty
+  bool empty() const { return Size == 0; }
+  /// Clear the vector
+  void clear() {
+    Size = 0;
+    Lookup.clear();
+    Data.clear();
+  }
+
+  std::vector<T> const &materialised() const { return Data; }
+};
+
+#endif // LLVM_ADT_PAGEDVECTOR_H

>From f0c75c8af0fea27b5b758e2e5775787f33595de3 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+ktf at users.noreply.github.com>
Date: Thu, 14 Sep 2023 22:20:29 +0200
Subject: [PATCH 2/2] Use PagedVector to reduce memory usage of sparsely
 populated vectors

---
 clang/include/clang/Basic/SourceManager.h     |  3 ++-
 clang/include/clang/Serialization/ASTReader.h |  5 +++--
 clang/lib/Basic/SourceManager.cpp             | 12 ++++++------
 clang/lib/Serialization/ASTReader.cpp         |  9 +++++----
 4 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/clang/include/clang/Basic/SourceManager.h b/clang/include/clang/Basic/SourceManager.h
index 2f846502d6f3327..b1942a3d86afc37 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -43,6 +43,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
@@ -699,7 +700,7 @@ class SourceManager : public RefCountedBase<SourceManager> {
   ///
   /// Negative FileIDs are indexes into this table. To get from ID to an index,
   /// use (-ID - 2).
-  SmallVector<SrcMgr::SLocEntry, 0> LoadedSLocEntryTable;
+  PagedVector<SrcMgr::SLocEntry> LoadedSLocEntryTable;
 
   /// The starting offset of the next local SLocEntry.
   ///
diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h
index dc1eb21c27801fe..567aecc8246e761 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -38,6 +38,7 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -487,7 +488,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the type with
   /// ID = (I + 1) << FastQual::Width has already been loaded
-  std::vector<QualType> TypesLoaded;
+  PagedVector<QualType> TypesLoaded;
 
   using GlobalTypeMapType =
       ContinuousRangeMap<serialization::TypeID, ModuleFile *, 4>;
@@ -501,7 +502,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the declaration with ID
   /// = I + 1 has already been loaded.
-  std::vector<Decl *> DeclsLoaded;
+  PagedVector<Decl *> DeclsLoaded;
 
   using GlobalDeclMapType =
       ContinuousRangeMap<serialization::DeclID, ModuleFile *, 4>;
diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp
index 0521ac7b30339ab..c028afe63ac85ad 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -458,7 +458,7 @@ SourceManager::AllocateLoadedSLocEntries(unsigned NumSLocEntries,
       CurrentLoadedOffset - TotalSize < NextLocalOffset) {
     return std::make_pair(0, 0);
   }
-  LoadedSLocEntryTable.resize(LoadedSLocEntryTable.size() + NumSLocEntries);
+  LoadedSLocEntryTable.expand(LoadedSLocEntryTable.size() + NumSLocEntries);
   SLocEntryLoaded.resize(LoadedSLocEntryTable.size());
   CurrentLoadedOffset -= TotalSize;
   int ID = LoadedSLocEntryTable.size();
@@ -2344,11 +2344,11 @@ SourceManager::MemoryBufferSizes SourceManager::getMemoryBufferSizes() const {
 }
 
 size_t SourceManager::getDataStructureSizes() const {
-  size_t size = llvm::capacity_in_bytes(MemBufferInfos)
-    + llvm::capacity_in_bytes(LocalSLocEntryTable)
-    + llvm::capacity_in_bytes(LoadedSLocEntryTable)
-    + llvm::capacity_in_bytes(SLocEntryLoaded)
-    + llvm::capacity_in_bytes(FileInfos);
+  size_t size = llvm::capacity_in_bytes(MemBufferInfos) +
+                llvm::capacity_in_bytes(LocalSLocEntryTable) +
+                llvm::capacity_in_bytes(LoadedSLocEntryTable.materialised()) +
+                llvm::capacity_in_bytes(SLocEntryLoaded) +
+                llvm::capacity_in_bytes(FileInfos);
 
   if (OverriddenFilesInfo)
     size += llvm::capacity_in_bytes(OverriddenFilesInfo->OverriddenFiles);
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 0952244d037a77c..18b1a9a2480a73e 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -3260,7 +3260,7 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
           std::make_pair(LocalBaseTypeIndex,
                          F.BaseTypeIndex - LocalBaseTypeIndex));
 
-        TypesLoaded.resize(TypesLoaded.size() + F.LocalNumTypes);
+        TypesLoaded.expand(TypesLoaded.size() + F.LocalNumTypes);
       }
       break;
     }
@@ -3290,7 +3290,7 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
         // module.
         F.GlobalToLocalDeclIDs[&F] = LocalBaseDeclID;
 
-        DeclsLoaded.resize(DeclsLoaded.size() + F.LocalNumDecls);
+        DeclsLoaded.expand(DeclsLoaded.size() + F.LocalNumDecls);
       }
       break;
     }
@@ -7944,9 +7944,10 @@ void ASTReader::PrintStats() {
   std::fprintf(stderr, "*** AST File Statistics:\n");
 
   unsigned NumTypesLoaded =
-      TypesLoaded.size() - llvm::count(TypesLoaded, QualType());
+      TypesLoaded.size() - llvm::count(TypesLoaded.materialised(), QualType());
   unsigned NumDeclsLoaded =
-      DeclsLoaded.size() - llvm::count(DeclsLoaded, (Decl *)nullptr);
+      DeclsLoaded.size() -
+      llvm::count(DeclsLoaded.materialised(), (Decl *)nullptr);
   unsigned NumIdentifiersLoaded =
       IdentifiersLoaded.size() -
       llvm::count(IdentifiersLoaded, (IdentifierInfo *)nullptr);



More information about the cfe-commits mailing list