[llvm] d7ff003 - ADT: Fix reference invalidation in SmallVector::emplace_back and assign(N,V)
Duncan P. N. Exon Smith via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 21 12:11:51 PST 2021
Author: Duncan P. N. Exon Smith
Date: 2021-01-21T12:11:41-08:00
New Revision: d7ff0036463fbf049a240fe3792fcfcd8081c41e
URL: https://github.com/llvm/llvm-project/commit/d7ff0036463fbf049a240fe3792fcfcd8081c41e
DIFF: https://github.com/llvm/llvm-project/commit/d7ff0036463fbf049a240fe3792fcfcd8081c41e.diff
LOG: ADT: Fix reference invalidation in SmallVector::emplace_back and assign(N,V)
This fixes the final (I think?) reference invalidation in `SmallVector`
that we need to fix to align with `std::vector`. (There is still some
left in the range insert / append / assign, but the standard calls that
UB for `std::vector` so I think we don't care?)
For POD-like types, reimplement `emplace_back()` in terms of
`push_back()`, taking a copy even for large `T` rather than lose the
realloc optimization in `grow_pod()`.
For other types, split the grow operation in three and construct the new
element in the middle.
- `mallocForGrow()` calculates the new capacity and returns the result
of `safe_malloc()`. We only need a single definition per
`SmallVectorBase` so this is defined in SmallVector.cpp to avoid code
size bloat. Moving this part of non-POD grow to the source file also
allows the logic to be easily shared with `grow_pod`, and
`report_size_overflow()` and `report_at_maximum_capacity()` can move
there too.
- `moveElementsForGrow()` moves elements from the old to the new
allocation.
- `takeAllocationForGrow()` frees the old allocation and saves the
new allocation and capacity .
`SmallVector:assign(size_type, const T&)` also uses the split-grow
operations for non-POD, but it also has a semantic change when not
growing. Previously, assign would start with `clear()`, and so the old
elements were destructed and all elements of the new vector were
copy-constructed (potentially invalidating references). The new
implementation skips destruction and uses copy-assignment for the prefix
of the new vector that fits. The new semantics match what libc++ does
for `std::vector::assign()`.
Note that the following is another possible implementation:
```
void assign(size_type NumElts, ValueParamT Elt) {
std::fill_n(this->begin(), std::min(NumElts, this->size()), Elt);
this->resize(NumElts, Elt);
}
```
The downside of this simpler implementation is that if the vector has to
grow there will be `size()` redundant copy operations.
(I had planned on splitting this patch up into three for committing
(after getting performance numbers / initial review), but I've realized
that if this does for some reason need to be reverted we'll probably
want to revert the whole package...)
Differential Revision: https://reviews.llvm.org/D94739
Added:
Modified:
llvm/include/llvm/ADT/SmallVector.h
llvm/lib/Support/SmallVector.cpp
llvm/unittests/ADT/SmallVectorTest.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/ADT/SmallVector.h b/llvm/include/llvm/ADT/SmallVector.h
index 2e47846ee7cf..dd72937c19e2 100644
--- a/llvm/include/llvm/ADT/SmallVector.h
+++ b/llvm/include/llvm/ADT/SmallVector.h
@@ -56,18 +56,16 @@ template <class Size_T> class SmallVectorBase {
SmallVectorBase(void *FirstEl, size_t TotalCapacity)
: BeginX(FirstEl), Capacity(TotalCapacity) {}
+ /// This is a helper for \a grow() that's out of line to reduce code
+ /// duplication. This function will report a fatal error if it can't grow at
+ /// least to \p MinSize.
+ void *mallocForGrow(size_t MinSize, size_t TSize, size_t &NewCapacity);
+
/// This is an implementation of the grow() method which only works
/// on POD-like data types and is out of line to reduce code duplication.
/// This function will report a fatal error if it cannot increase capacity.
void grow_pod(void *FirstEl, size_t MinSize, size_t TSize);
- /// Report that MinSize doesn't fit into this vector's size type. Throws
- /// std::length_error or calls report_fatal_error.
- LLVM_ATTRIBUTE_NORETURN static void report_size_overflow(size_t MinSize);
- /// Report that this vector is already at maximum capacity. Throws
- /// std::length_error or calls report_fatal_error.
- LLVM_ATTRIBUTE_NORETURN static void report_at_maximum_capacity();
-
public:
size_t size() const { return Size; }
size_t capacity() const { return Capacity; }
@@ -211,15 +209,6 @@ class SmallVectorTemplateCommon
bool> = false>
void assertSafeToAddRange(ItTy, ItTy) {}
- /// Check whether any argument will be invalidated by growing for
- /// emplace_back.
- template <class ArgType1, class... ArgTypes>
- void assertSafeToEmplace(ArgType1 &Arg1, ArgTypes &... Args) {
- this->assertSafeToAdd(&Arg1);
- this->assertSafeToEmplace(Args...);
- }
- void assertSafeToEmplace() {}
-
/// Reserve enough space to add one element, and return the updated element
/// pointer in case it was a reference to the storage.
template <class U>
@@ -359,6 +348,21 @@ class SmallVectorTemplateBase : public SmallVectorTemplateCommon<T> {
/// element, or MinSize more elements if specified.
void grow(size_t MinSize = 0);
+ /// Create a new allocation big enough for \p MinSize and pass back its size
+ /// in \p NewCapacity. This is the first section of \a grow().
+ T *mallocForGrow(size_t MinSize, size_t &NewCapacity) {
+ return static_cast<T *>(
+ SmallVectorBase<SmallVectorSizeType<T>>::mallocForGrow(
+ MinSize, sizeof(T), NewCapacity));
+ }
+
+ /// Move existing elements over to the new allocation \p NewElts, the middle
+ /// section of \a grow().
+ void moveElementsForGrow(T *NewElts);
+
+ /// Transfer ownership of the allocation, finishing up \a grow().
+ void takeAllocationForGrow(T *NewElts, size_t NewCapacity);
+
/// Reserve enough space to add one element, and return the updated element
/// pointer in case it was a reference to the storage.
const T *reserveForParamAndGetAddress(const T &Elt, size_t N = 1) {
@@ -375,6 +379,27 @@ class SmallVectorTemplateBase : public SmallVectorTemplateCommon<T> {
static T &&forward_value_param(T &&V) { return std::move(V); }
static const T &forward_value_param(const T &V) { return V; }
+ void growAndAssign(size_t NumElts, const T &Elt) {
+ // Grow manually in case Elt is an internal reference.
+ size_t NewCapacity;
+ T *NewElts = mallocForGrow(NumElts, NewCapacity);
+ std::uninitialized_fill_n(NewElts, NumElts, Elt);
+ this->destroy_range(this->begin(), this->end());
+ takeAllocationForGrow(NewElts, NewCapacity);
+ this->set_size(NumElts);
+ }
+
+ template <typename... ArgTypes> T &growAndEmplaceBack(ArgTypes &&... Args) {
+ // Grow manually in case one of Args is an internal reference.
+ size_t NewCapacity;
+ T *NewElts = mallocForGrow(0, NewCapacity);
+ ::new ((void *)(NewElts + this->size())) T(std::forward<ArgTypes>(Args)...);
+ moveElementsForGrow(NewElts);
+ takeAllocationForGrow(NewElts, NewCapacity);
+ this->set_size(this->size() + 1);
+ return this->back();
+ }
+
public:
void push_back(const T &Elt) {
const T *EltPtr = reserveForParamAndGetAddress(Elt);
@@ -397,29 +422,27 @@ class SmallVectorTemplateBase : public SmallVectorTemplateCommon<T> {
// Define this out-of-line to dissuade the C++ compiler from inlining it.
template <typename T, bool TriviallyCopyable>
void SmallVectorTemplateBase<T, TriviallyCopyable>::grow(size_t MinSize) {
- // Ensure we can fit the new capacity.
- // This is only going to be applicable when the capacity is 32 bit.
- if (MinSize > this->SizeTypeMax())
- this->report_size_overflow(MinSize);
-
- // Ensure we can meet the guarantee of space for at least one more element.
- // The above check alone will not catch the case where grow is called with a
- // default MinSize of 0, but the current capacity cannot be increased.
- // This is only going to be applicable when the capacity is 32 bit.
- if (this->capacity() == this->SizeTypeMax())
- this->report_at_maximum_capacity();
-
- // Always grow, even from zero.
- size_t NewCapacity = size_t(NextPowerOf2(this->capacity() + 2));
- NewCapacity = std::min(std::max(NewCapacity, MinSize), this->SizeTypeMax());
- T *NewElts = static_cast<T*>(llvm::safe_malloc(NewCapacity*sizeof(T)));
+ size_t NewCapacity;
+ T *NewElts = mallocForGrow(MinSize, NewCapacity);
+ moveElementsForGrow(NewElts);
+ takeAllocationForGrow(NewElts, NewCapacity);
+}
+// Define this out-of-line to dissuade the C++ compiler from inlining it.
+template <typename T, bool TriviallyCopyable>
+void SmallVectorTemplateBase<T, TriviallyCopyable>::moveElementsForGrow(
+ T *NewElts) {
// Move the elements over.
this->uninitialized_move(this->begin(), this->end(), NewElts);
// Destroy the original elements.
destroy_range(this->begin(), this->end());
+}
+// Define this out-of-line to dissuade the C++ compiler from inlining it.
+template <typename T, bool TriviallyCopyable>
+void SmallVectorTemplateBase<T, TriviallyCopyable>::takeAllocationForGrow(
+ T *NewElts, size_t NewCapacity) {
// If this wasn't grown from the inline copy, deallocate the old space.
if (!this->isSmall())
free(this->begin());
@@ -502,6 +525,23 @@ class SmallVectorTemplateBase<T, true> : public SmallVectorTemplateCommon<T> {
/// Copy \p V or return a reference, depending on \a ValueParamT.
static ValueParamT forward_value_param(ValueParamT V) { return V; }
+ void growAndAssign(size_t NumElts, T Elt) {
+ // Elt has been copied in case it's an internal reference, side-stepping
+ // reference invalidation problems without losing the realloc optimization.
+ this->set_size(0);
+ this->grow(NumElts);
+ std::uninitialized_fill_n(this->begin(), NumElts, Elt);
+ this->set_size(NumElts);
+ }
+
+ template <typename... ArgTypes> T &growAndEmplaceBack(ArgTypes &&... Args) {
+ // Use push_back with a copy in case Args has an internal reference,
+ // side-stepping reference invalidation problems without losing the realloc
+ // optimization.
+ push_back(T(std::forward<ArgTypes>(Args)...));
+ return this->back();
+ }
+
public:
void push_back(ValueParamT Elt) {
const T *EltPtr = reserveForParamAndGetAddress(Elt);
@@ -624,17 +664,25 @@ class SmallVectorImpl : public SmallVectorTemplateBase<T> {
append(IL.begin(), IL.end());
}
- // FIXME: Consider assigning over existing elements, rather than clearing &
- // re-initializing them - for all assign(...) variants.
+ void assign(size_type NumElts, ValueParamT Elt) {
+ // Note that Elt could be an internal reference.
+ if (NumElts > this->capacity()) {
+ this->growAndAssign(NumElts, Elt);
+ return;
+ }
- void assign(size_type NumElts, const T &Elt) {
- this->assertSafeToReferenceAfterResize(&Elt, 0);
- clear();
- this->reserve(NumElts);
+ // Assign over existing elements.
+ std::fill_n(this->begin(), std::min(NumElts, this->size()), Elt);
+ if (NumElts > this->size())
+ std::uninitialized_fill_n(this->end(), NumElts - this->size(), Elt);
+ else if (NumElts < this->size())
+ this->destroy_range(this->begin() + NumElts, this->end());
this->set_size(NumElts);
- std::uninitialized_fill(this->begin(), this->end(), Elt);
}
+ // FIXME: Consider assigning over existing elements, rather than clearing &
+ // re-initializing them - for all assign(...) variants.
+
template <typename in_iter,
typename = std::enable_if_t<std::is_convertible<
typename std::iterator_traits<in_iter>::iterator_category,
@@ -854,9 +902,9 @@ class SmallVectorImpl : public SmallVectorTemplateBase<T> {
}
template <typename... ArgTypes> reference emplace_back(ArgTypes &&... Args) {
- this->assertSafeToEmplace(Args...);
if (LLVM_UNLIKELY(this->size() >= this->capacity()))
- this->grow();
+ return this->growAndEmplaceBack(std::forward<ArgTypes>(Args)...);
+
::new ((void *)this->end()) T(std::forward<ArgTypes>(Args)...);
this->set_size(this->size() + 1);
return this->back();
diff --git a/llvm/lib/Support/SmallVector.cpp b/llvm/lib/Support/SmallVector.cpp
index 7e3d01fe6864..0005f7840912 100644
--- a/llvm/lib/Support/SmallVector.cpp
+++ b/llvm/lib/Support/SmallVector.cpp
@@ -45,12 +45,15 @@ static_assert(sizeof(SmallVector<char, 0>) ==
sizeof(void *) * 2 + sizeof(void *),
"1 byte elements have word-sized type for size and capacity");
-template <class Size_T>
-void SmallVectorBase<Size_T>::report_size_overflow(size_t MinSize) {
+/// Report that MinSize doesn't fit into this vector's size type. Throws
+/// std::length_error or calls report_fatal_error.
+LLVM_ATTRIBUTE_NORETURN
+static void report_size_overflow(size_t MinSize, size_t MaxSize);
+static void report_size_overflow(size_t MinSize, size_t MaxSize) {
std::string Reason = "SmallVector unable to grow. Requested capacity (" +
std::to_string(MinSize) +
") is larger than maximum value for size type (" +
- std::to_string(SizeTypeMax()) + ")";
+ std::to_string(MaxSize) + ")";
#ifdef LLVM_ENABLE_EXCEPTIONS
throw std::length_error(Reason);
#else
@@ -58,10 +61,13 @@ void SmallVectorBase<Size_T>::report_size_overflow(size_t MinSize) {
#endif
}
-template <class Size_T> void SmallVectorBase<Size_T>::report_at_maximum_capacity() {
+/// Report that this vector is already at maximum capacity. Throws
+/// std::length_error or calls report_fatal_error.
+LLVM_ATTRIBUTE_NORETURN static void report_at_maximum_capacity(size_t MaxSize);
+static void report_at_maximum_capacity(size_t MaxSize) {
std::string Reason =
"SmallVector capacity unable to grow. Already at maximum size " +
- std::to_string(SizeTypeMax());
+ std::to_string(MaxSize);
#ifdef LLVM_ENABLE_EXCEPTIONS
throw std::length_error(Reason);
#else
@@ -71,25 +77,40 @@ template <class Size_T> void SmallVectorBase<Size_T>::report_at_maximum_capacity
// Note: Moving this function into the header may cause performance regression.
template <class Size_T>
-void SmallVectorBase<Size_T>::grow_pod(void *FirstEl, size_t MinSize,
- size_t TSize) {
+static size_t getNewCapacity(size_t MinSize, size_t TSize, size_t OldCapacity) {
+ constexpr size_t MaxSize = std::numeric_limits<Size_T>::max();
+
// Ensure we can fit the new capacity.
// This is only going to be applicable when the capacity is 32 bit.
- if (MinSize > SizeTypeMax())
- report_size_overflow(MinSize);
+ if (MinSize > MaxSize)
+ report_size_overflow(MinSize, MaxSize);
// Ensure we can meet the guarantee of space for at least one more element.
// The above check alone will not catch the case where grow is called with a
// default MinSize of 0, but the current capacity cannot be increased.
// This is only going to be applicable when the capacity is 32 bit.
- if (capacity() == SizeTypeMax())
- report_at_maximum_capacity();
+ if (OldCapacity == MaxSize)
+ report_at_maximum_capacity(MaxSize);
// In theory 2*capacity can overflow if the capacity is 64 bit, but the
// original capacity would never be large enough for this to be a problem.
- size_t NewCapacity = 2 * capacity() + 1; // Always grow.
- NewCapacity = std::min(std::max(NewCapacity, MinSize), SizeTypeMax());
+ size_t NewCapacity = 2 * OldCapacity + 1; // Always grow.
+ return std::min(std::max(NewCapacity, MinSize), MaxSize);
+}
+// Note: Moving this function into the header may cause performance regression.
+template <class Size_T>
+void *SmallVectorBase<Size_T>::mallocForGrow(size_t MinSize, size_t TSize,
+ size_t &NewCapacity) {
+ NewCapacity = getNewCapacity<Size_T>(MinSize, TSize, this->capacity());
+ return llvm::safe_malloc(NewCapacity * TSize);
+}
+
+// Note: Moving this function into the header may cause performance regression.
+template <class Size_T>
+void SmallVectorBase<Size_T>::grow_pod(void *FirstEl, size_t MinSize,
+ size_t TSize) {
+ size_t NewCapacity = getNewCapacity<Size_T>(MinSize, TSize, this->capacity());
void *NewElts;
if (BeginX == FirstEl) {
NewElts = safe_malloc(NewCapacity * TSize);
diff --git a/llvm/unittests/ADT/SmallVectorTest.cpp b/llvm/unittests/ADT/SmallVectorTest.cpp
index 1f6c7db99fa8..b2cccc1a1e56 100644
--- a/llvm/unittests/ADT/SmallVectorTest.cpp
+++ b/llvm/unittests/ADT/SmallVectorTest.cpp
@@ -1179,16 +1179,41 @@ TYPED_TEST(SmallVectorReferenceInvalidationTest, AppendRange) {
}
TYPED_TEST(SmallVectorReferenceInvalidationTest, Assign) {
+ // Note: setup adds [1, 2, ...] to V until it's at capacity in small mode.
auto &V = this->V;
(void)V;
-#if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST
- // Regardless of capacity, assign should never reference an internal element.
- EXPECT_DEATH(V.assign(1, V.back()), this->AssertionMessage);
- EXPECT_DEATH(V.assign(this->NumBuiltinElts(V), V.back()),
- this->AssertionMessage);
- EXPECT_DEATH(V.assign(this->NumBuiltinElts(V) + 1, V.back()),
- this->AssertionMessage);
-#endif
+ int N = this->NumBuiltinElts(V);
+ ASSERT_EQ(unsigned(N), V.size());
+ ASSERT_EQ(unsigned(N), V.capacity());
+
+ // Check assign that shrinks in small mode.
+ V.assign(1, V.back());
+ EXPECT_EQ(1u, V.size());
+ EXPECT_EQ(N, V[0]);
+
+ // Check assign that grows within small mode.
+ ASSERT_LT(V.size(), V.capacity());
+ V.assign(V.capacity(), V.back());
+ for (int I = 0, E = V.size(); I != E; ++I) {
+ EXPECT_EQ(N, V[I]);
+
+ // Reset to [1, 2, ...].
+ V[I] = I + 1;
+ }
+
+ // Check assign that grows to large mode.
+ ASSERT_EQ(2, V[1]);
+ V.assign(V.capacity() + 1, V[1]);
+ for (int I = 0, E = V.size(); I != E; ++I) {
+ EXPECT_EQ(2, V[I]);
+
+ // Reset to [1, 2, ...].
+ V[I] = I + 1;
+ }
+
+ // Check assign that shrinks in large mode.
+ V.assign(1, V[1]);
+ EXPECT_EQ(2, V[0]);
}
TYPED_TEST(SmallVectorReferenceInvalidationTest, AssignRange) {
@@ -1289,11 +1314,25 @@ TYPED_TEST(SmallVectorReferenceInvalidationTest, InsertRange) {
}
TYPED_TEST(SmallVectorReferenceInvalidationTest, EmplaceBack) {
+ // Note: setup adds [1, 2, ...] to V until it's at capacity in small mode.
auto &V = this->V;
- (void)V;
-#if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST
- EXPECT_DEATH(V.emplace_back(V.back()), this->AssertionMessage);
-#endif
+ int N = this->NumBuiltinElts(V);
+
+ // Push back a reference to last element when growing from small storage.
+ V.emplace_back(V.back());
+ EXPECT_EQ(N, V.back());
+
+ // Check that the old value is still there (not moved away).
+ EXPECT_EQ(N, V[V.size() - 2]);
+
+ // Fill storage again.
+ V.back() = V.size();
+ while (V.size() < V.capacity())
+ V.push_back(V.size() + 1);
+
+ // Push back a reference to last element when growing from large storage.
+ V.emplace_back(V.back());
+ EXPECT_EQ(int(V.size()) - 1, V.back());
}
template <class VectorT>
@@ -1315,25 +1354,42 @@ class SmallVectorInternalReferenceInvalidationTest
SmallVectorTestBase::SetUp();
// Fill up the small size so that insertions move the elements.
- V.push_back(std::make_pair(0, 0));
+ for (int I = 0, E = NumBuiltinElts(V); I != E; ++I)
+ V.emplace_back(I + 1, I + 1);
}
};
// Test pairs of the same types from SmallVectorReferenceInvalidationTestTypes.
using SmallVectorInternalReferenceInvalidationTestTypes =
- ::testing::Types<SmallVector<std::pair<int, int>, 1>,
- SmallVector<std::pair<Constructable, Constructable>, 1>>;
+ ::testing::Types<SmallVector<std::pair<int, int>, 3>,
+ SmallVector<std::pair<Constructable, Constructable>, 3>>;
TYPED_TEST_CASE(SmallVectorInternalReferenceInvalidationTest,
SmallVectorInternalReferenceInvalidationTestTypes);
TYPED_TEST(SmallVectorInternalReferenceInvalidationTest, EmplaceBack) {
+ // Note: setup adds [1, 2, ...] to V until it's at capacity in small mode.
auto &V = this->V;
- (void)V;
-#if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST
- EXPECT_DEATH(V.emplace_back(V.back().first, 0), this->AssertionMessage);
- EXPECT_DEATH(V.emplace_back(0, V.back().second), this->AssertionMessage);
-#endif
+ int N = this->NumBuiltinElts(V);
+
+ // Push back a reference to last element when growing from small storage.
+ V.emplace_back(V.back().first, V.back().second);
+ EXPECT_EQ(N, V.back().first);
+ EXPECT_EQ(N, V.back().second);
+
+ // Check that the old value is still there (not moved away).
+ EXPECT_EQ(N, V[V.size() - 2].first);
+ EXPECT_EQ(N, V[V.size() - 2].second);
+
+ // Fill storage again.
+ V.back().first = V.back().second = V.size();
+ while (V.size() < V.capacity())
+ V.emplace_back(V.size() + 1, V.size() + 1);
+
+ // Push back a reference to last element when growing from large storage.
+ V.emplace_back(V.back().first, V.back().second);
+ EXPECT_EQ(int(V.size()) - 1, V.back().first);
+ EXPECT_EQ(int(V.size()) - 1, V.back().second);
}
} // end namespace
More information about the llvm-commits
mailing list