[clang] Introduce paged vector (PR #66430)
Vassil Vassilev via cfe-commits
cfe-commits at lists.llvm.org
Thu Sep 21 02:59:15 PDT 2023
================
@@ -0,0 +1,309 @@
+//===- llvm/ADT/PagedVector.h - 'Lazyly allocated' vectors --------*- C++
+//-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PagedVector class.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_ADT_PAGEDVECTOR_H
+#define LLVM_ADT_PAGEDVECTOR_H
+
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Allocator.h"
+#include <cassert>
+#include <vector>
+
+namespace llvm {
+// A vector that allocates memory in pages.
+// Order is kept, but memory is allocated only when one element of the page is
+// accessed. This introduces a level of indirection, but it is useful when you
+// have a sparsely initialised vector where the full size is allocated upfront
+// with the default constructor and elements are initialised later, on first
+// access.
+//
+// Notice that this does not have iterators, because if you have iterators it
+// probably means you are going to touch all the memory in any case, so better
+// use a std::vector in the first place.
+//
+// Pages are allocated in SLAB_SIZE chunks, using the BumpPtrAllocator.
+template <typename T, size_t PageSize = 1024 / sizeof(T)> class PagedVector {
+ static_assert(PageSize > 0, "PageSize must be greater than 0. Most likely "
+ "you want it to be greater than 16.");
+ // The actual number of element in the vector which can be accessed.
+ size_t Size = 0;
+
+ // The position of the initial element of the page in the Data vector.
+ // Pages are allocated contiguously in the Data vector.
+ mutable SmallVector<uintptr_t, 0> PageToDataIdx;
+ // Actual page data. All the page elements are added to this vector on the
+ // first access of any of the elements of the page. Elements default
+ // constructed and elements of the page are stored contiguously. The order of
+ // the elements however depends on the order of access of the pages.
+ PointerIntPair<BumpPtrAllocator *, 1, bool> Allocator;
+
+ constexpr static uintptr_t InvalidPage = SIZE_MAX;
+
+public:
+ using value_type = T;
+
+ // Default constructor. We build our own allocator and mark it as such with
+ // `true` in the second pair element.
+ PagedVector() : Allocator(new BumpPtrAllocator, true) {}
+ PagedVector(BumpPtrAllocator *A) : Allocator(A, false) {
+ assert(A != nullptr && "Allocator cannot be null");
+ }
+
+ ~PagedVector() {
+ clear();
+ // If we own the allocator, delete it.
+ if (Allocator.getInt() == true)
+ delete Allocator.getPointer();
+ }
+
+ // Look up an element at position `Index`.
+ // If the associated page is not filled, it will be filled with default
+ // constructed elements. If the associated page is filled, return the element.
+ T &operator[](size_t Index) const {
+ assert(Index < Size);
+ assert(Index / PageSize < PageToDataIdx.size());
+ uintptr_t &PagePtr = PageToDataIdx[Index / PageSize];
+ // If the page was not yet allocated, allocate it.
+ if (PagePtr == InvalidPage) {
+ T *NewPagePtr = Allocator.getPointer()->template Allocate<T>(PageSize);
+ // We need to invoke the default constructor on all the elements of the
+ // page.
+ for (size_t I = 0; I < PageSize; ++I)
+ new (NewPagePtr + I) T();
+
+ PagePtr = reinterpret_cast<uintptr_t>(NewPagePtr);
+ }
+ // Dereference the element in the page.
+ return *((Index % PageSize) + reinterpret_cast<T *>(PagePtr));
+ }
+
+ // Return the capacity of the vector. I.e. the maximum size it can be expanded
+ // to with the resize method without allocating more pages.
+ [[nodiscard]] size_t capacity() const {
+ return PageToDataIdx.size() * PageSize;
+ }
+
+ // Return the size of the vector. I.e. the maximum index that can be
+ // accessed, i.e. the maximum value which was used as argument of the
+ // resize method.
+ [[nodiscard]] size_t size() const { return Size; }
+
+ // Expands the vector to the given NewSize number of elements.
+ // If the vector was smaller, allocates new pages as needed.
+ // It should be called only with NewSize >= Size.
+ void resize(size_t NewSize) {
+ if (NewSize == 0) {
+ clear();
+ return;
+ }
+ // Handle shrink case: delete the pages and update the size.
+ if (NewSize < Size) {
+ size_t NewLastPage = (NewSize - 1) / PageSize;
+ for (size_t I = NewLastPage + 1, E = PageToDataIdx.size(); I < E; ++I) {
+ uintptr_t PagePtr = PageToDataIdx[I];
+ if (PagePtr == InvalidPage)
+ continue;
+ T *Page = reinterpret_cast<T *>(PagePtr);
+ // We need to invoke the destructor on all the elements of the page.
+ for (size_t J = 0; J < PageSize; ++J)
+ Page[J].~T();
+ Allocator.getPointer()->Deallocate(Page);
+ }
+ // Destroy the extra ones in the new last page.
+ uintptr_t PagePtr = PageToDataIdx[NewLastPage];
+ if (PagePtr != InvalidPage) {
+ T *Page = reinterpret_cast<T *>(PagePtr);
+ // If the new size and the old size are on the same page, we need to
+ // delete only the elements between the new size and the old size.
+ // Otherwise we need to delete all the remaining elements in the page.
+ size_t OldPage = (Size - 1) / PageSize;
+ size_t NewPage = (NewSize - 1) / PageSize;
+ size_t LastPageElements =
+ OldPage == NewPage ? Size % PageSize : PageSize;
+ for (size_t J = NewSize % PageSize; J < LastPageElements; ++J)
+ Page[J].~T();
+ }
+ PageToDataIdx.resize(NewLastPage + 1);
+ }
+ Size = NewSize;
+ // If the capacity is enough, just update the size and continue
+ // with the currently allocated pages.
+ if (Size <= capacity())
+ return;
+ // The number of pages to allocate. The Remainder is calculated
+ // for the case in which the NewSize is not a multiple of PageSize.
+ // In that case we need one more page.
+ size_t Pages = Size / PageSize;
+ size_t Remainder = Size % PageSize;
+ if (Remainder != 0)
+ Pages += 1;
+ assert(Pages > PageToDataIdx.size());
+ // We use InvalidPage to indicate that a page has not been allocated yet.
+ // This cannot be 0, because 0 is a valid page id.
+ // We use InvalidPage instead of a separate bool to avoid wasting space.
+ PageToDataIdx.resize(Pages, InvalidPage);
+ }
+
+ // Return true if the vector is empty
+ [[nodiscard]] bool empty() const { return Size == 0; }
+
+ /// Clear the vector, i.e. clear the allocated pages, the whole page
+ /// lookup index and reset the size.
+ void clear() {
+ Size = 0;
+ for (uintptr_t Page : PageToDataIdx) {
+ for (size_t I = 0; I < PageSize; ++I)
+ reinterpret_cast<T *>(Page)[I].~T();
+ // If we do not own the allocator, deallocate the pages one by one.
+ if (Allocator.getInt() == false) {
----------------
vgvassilev wrote:
```suggestion
if (!Allocator.getInt()) {
```
https://github.com/llvm/llvm-project/pull/66430
More information about the cfe-commits
mailing list