[llvm] b5f0eae - ADT: SmallVector size/capacity use word-size integers when elements are small

Andrew Browne via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 24 18:59:19 PDT 2020


Author: Andrew Browne
Date: 2020-04-24T18:57:54-07:00
New Revision: b5f0eae1dc3c09c020cdf9d07238dec9acdacf5f

URL: https://github.com/llvm/llvm-project/commit/b5f0eae1dc3c09c020cdf9d07238dec9acdacf5f
DIFF: https://github.com/llvm/llvm-project/commit/b5f0eae1dc3c09c020cdf9d07238dec9acdacf5f.diff

LOG: ADT: SmallVector size/capacity use word-size integers when elements are small

SmallVector currently uses 32bit integers for size and capacity to reduce
sizeof(SmallVector). This limits the number of elements to UINT32_MAX.

For a SmallVector<char>, this limits the SmallVector size to only 4GB.
Buffering bitcode output uses SmallVector<char>, but needs >4GB output.

This changes SmallVector size and capacity to conditionally use word-size
integers if the element type is small (<4 bytes). For larger elements types,
the vector size can reach ~16GB with 32bit size.

Making this conditional on the element type provides both the smaller
sizeof(SmallVector) for larger types which are unlikely to grow so large,
and supports larger capacities for smaller element types.

Added: 
    

Modified: 
    llvm/include/llvm/ADT/SmallVector.h
    llvm/lib/Support/SmallVector.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/ADT/SmallVector.h b/llvm/include/llvm/ADT/SmallVector.h
index 12a58761955b..d02d7a65b23e 100644
--- a/llvm/include/llvm/ADT/SmallVector.h
+++ b/llvm/include/llvm/ADT/SmallVector.h
@@ -16,10 +16,10 @@
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/AlignOf.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/MemAlloc.h"
 #include "llvm/Support/type_traits.h"
-#include "llvm/Support/ErrorHandling.h"
 #include <algorithm>
 #include <cassert>
 #include <cstddef>
@@ -27,6 +27,7 @@
 #include <cstring>
 #include <initializer_list>
 #include <iterator>
+#include <limits>
 #include <memory>
 #include <new>
 #include <type_traits>
@@ -34,11 +35,23 @@
 
 namespace llvm {
 
-/// This is all the non-templated stuff common to all SmallVectors.
-class SmallVectorBase {
+/// This is all the stuff common to all SmallVectors.
+///
+/// The template parameter specifies the type which should be used to hold the
+/// Size and Capacity of the SmallVector, so it can be adjusted.
+/// Using 32 bit size is desirable to shink the size of the SmallVector.
+/// Using 64 bit size is desirable for cases like SmallVector<char>, where a
+/// 32 bit size would limit the vector to ~4GB. SmallVectors are used for
+/// buffering bitcode output - which can exceed 4GB.
+template <class Size_T> class SmallVectorBase {
 protected:
   void *BeginX;
-  unsigned Size = 0, Capacity;
+  Size_T Size = 0, Capacity;
+
+  /// The maximum value of the Size_T used.
+  static constexpr size_t SizeTypeMax() {
+    return std::numeric_limits<Size_T>::max();
+  }
 
   SmallVectorBase() = delete;
   SmallVectorBase(void *FirstEl, size_t TotalCapacity)
@@ -70,9 +83,13 @@ class SmallVectorBase {
   }
 };
 
+template <class T>
+using SmallVectorSizeType =
+    typename std::conditional<sizeof(T) < 4, uintptr_t, uint32_t>::type;
+
 /// Figure out the offset of the first element.
 template <class T, typename = void> struct SmallVectorAlignmentAndSize {
-  AlignedCharArrayUnion<SmallVectorBase> Base;
+  AlignedCharArrayUnion<SmallVectorBase<SmallVectorSizeType<T>>> Base;
   AlignedCharArrayUnion<T> FirstEl;
 };
 
@@ -80,7 +97,10 @@ template <class T, typename = void> struct SmallVectorAlignmentAndSize {
 /// the type T is a POD. The extra dummy template argument is used by ArrayRef
 /// to avoid unnecessarily requiring T to be complete.
 template <typename T, typename = void>
-class SmallVectorTemplateCommon : public SmallVectorBase {
+class SmallVectorTemplateCommon
+    : public SmallVectorBase<SmallVectorSizeType<T>> {
+  using Base = SmallVectorBase<SmallVectorSizeType<T>>;
+
   /// Find the address of the first element.  For this pointer math to be valid
   /// with small-size of 0 for T with lots of alignment, it's important that
   /// SmallVectorStorage is properly-aligned even for small-size of 0.
@@ -92,21 +112,20 @@ class SmallVectorTemplateCommon : public SmallVectorBase {
   // Space after 'FirstEl' is clobbered, do not add any instance vars after it.
 
 protected:
-  SmallVectorTemplateCommon(size_t Size)
-      : SmallVectorBase(getFirstEl(), Size) {}
+  SmallVectorTemplateCommon(size_t Size) : Base(getFirstEl(), Size) {}
 
   void grow_pod(size_t MinCapacity, size_t TSize) {
-    SmallVectorBase::grow_pod(getFirstEl(), MinCapacity, TSize);
+    Base::grow_pod(getFirstEl(), MinCapacity, TSize);
   }
 
   /// Return true if this is a smallvector which has not had dynamic
   /// memory allocated for it.
-  bool isSmall() const { return BeginX == getFirstEl(); }
+  bool isSmall() const { return this->BeginX == getFirstEl(); }
 
   /// Put this vector in a state of being small.
   void resetToSmall() {
-    BeginX = getFirstEl();
-    Size = Capacity = 0; // FIXME: Setting Capacity to 0 is suspect.
+    this->BeginX = getFirstEl();
+    this->Size = this->Capacity = 0; // FIXME: Setting Capacity to 0 is suspect.
   }
 
 public:
@@ -124,6 +143,10 @@ class SmallVectorTemplateCommon : public SmallVectorBase {
   using pointer = T *;
   using const_pointer = const T *;
 
+  using Base::capacity;
+  using Base::empty;
+  using Base::size;
+
   // forward iterator creation methods.
   iterator begin() { return (iterator)this->BeginX; }
   const_iterator begin() const { return (const_iterator)this->BeginX; }
@@ -137,7 +160,9 @@ class SmallVectorTemplateCommon : public SmallVectorBase {
   const_reverse_iterator rend() const { return const_reverse_iterator(begin());}
 
   size_type size_in_bytes() const { return size() * sizeof(T); }
-  size_type max_size() const { return size_type(-1) / sizeof(T); }
+  size_type max_size() const {
+    return std::min(this->SizeTypeMax(), size_type(-1) / sizeof(T));
+  }
 
   size_t capacity_in_bytes() const { return capacity() * sizeof(T); }
 
@@ -232,18 +257,21 @@ class SmallVectorTemplateBase : public SmallVectorTemplateCommon<T> {
 // Define this out-of-line to dissuade the C++ compiler from inlining it.
 template <typename T, bool TriviallyCopyable>
 void SmallVectorTemplateBase<T, TriviallyCopyable>::grow(size_t MinSize) {
-  if (MinSize > UINT32_MAX)
+  // Ensure we can fit the new capacity.
+  // This is only going to be applicable when the capacity is 32 bit.
+  if (MinSize > this->SizeTypeMax())
     report_bad_alloc_error("SmallVector capacity overflow during allocation");
 
   // Ensure we can meet the guarantee of space for at least one more element.
   // The above check alone will not catch the case where grow is called with a
   // default MinCapacity of 0, but the current capacity cannot be increased.
-  if (this->capacity() == size_t(UINT32_MAX))
+  // This is only going to be applicable when the capacity is 32 bit.
+  if (this->capacity() == this->SizeTypeMax())
     report_bad_alloc_error("SmallVector capacity unable to grow");
 
   // Always grow, even from zero.
   size_t NewCapacity = size_t(NextPowerOf2(this->capacity() + 2));
-  NewCapacity = std::min(std::max(NewCapacity, MinSize), size_t(UINT32_MAX));
+  NewCapacity = std::min(std::max(NewCapacity, MinSize), this->SizeTypeMax());
   T *NewElts = static_cast<T*>(llvm::safe_malloc(NewCapacity*sizeof(T)));
 
   // Move the elements over.

diff  --git a/llvm/lib/Support/SmallVector.cpp b/llvm/lib/Support/SmallVector.cpp
index 9ece0c5a3b62..e267bdeadfb9 100644
--- a/llvm/lib/Support/SmallVector.cpp
+++ b/llvm/lib/Support/SmallVector.cpp
@@ -37,24 +37,30 @@ static_assert(sizeof(SmallVector<void *, 1>) ==
                   sizeof(unsigned) * 2 + sizeof(void *) * 2,
               "wasted space in SmallVector size 1");
 
-/// grow_pod - This is an implementation of the grow() method which only works
-/// on POD-like datatypes and is out of line to reduce code duplication.
-/// This function will report a fatal error if it cannot increase capacity.
-void SmallVectorBase::grow_pod(void *FirstEl, size_t MinCapacity,
-                               size_t TSize) {
-  // Ensure we can fit the new capacity in 32 bits.
-  if (MinCapacity > UINT32_MAX)
+static_assert(sizeof(SmallVector<char, 0>) ==
+                  sizeof(void *) * 2 + sizeof(void *),
+              "1 byte elements have word-sized type for size and capacity");
+
+// Note: Moving this function into the header may cause performance regression.
+template <class Size_T>
+void SmallVectorBase<Size_T>::grow_pod(void *FirstEl, size_t MinCapacity,
+                                       size_t TSize) {
+  // Ensure we can fit the new capacity.
+  // This is only going to be applicable when the capacity is 32 bit.
+  if (MinCapacity > SizeTypeMax())
     report_bad_alloc_error("SmallVector capacity overflow during allocation");
 
   // Ensure we can meet the guarantee of space for at least one more element.
   // The above check alone will not catch the case where grow is called with a
   // default MinCapacity of 0, but the current capacity cannot be increased.
-  if (capacity() == size_t(UINT32_MAX))
+  // This is only going to be applicable when the capacity is 32 bit.
+  if (capacity() == SizeTypeMax())
     report_bad_alloc_error("SmallVector capacity unable to grow");
 
+  // In theory 2*capacity can overflow if the capacity is 64 bit, but the
+  // original capacity would never be large enough for this to be a problem.
   size_t NewCapacity = 2 * capacity() + 1; // Always grow.
-  NewCapacity =
-      std::min(std::max(NewCapacity, MinCapacity), size_t(UINT32_MAX));
+  NewCapacity = std::min(std::max(NewCapacity, MinCapacity), SizeTypeMax());
 
   void *NewElts;
   if (BeginX == FirstEl) {
@@ -70,3 +76,6 @@ void SmallVectorBase::grow_pod(void *FirstEl, size_t MinCapacity,
   this->BeginX = NewElts;
   this->Capacity = NewCapacity;
 }
+
+template class llvm::SmallVectorBase<uint32_t>;
+template class llvm::SmallVectorBase<uintptr_t>;


        


More information about the llvm-commits mailing list