[llvm] [SmallPtrSet] Don't leave tombstones in small mode (PR #96762)
Nikita Popov via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 27 00:28:17 PDT 2024
https://github.com/nikic updated https://github.com/llvm/llvm-project/pull/96762
>From 64f81fea91561b22a84ca7fa6f3c5d889ef99aa1 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Tue, 25 Jun 2024 10:27:15 +0200
Subject: [PATCH 1/2] [SmallPtrSet] Don't leave tombstones in small mode
When erasing elements in small mode, we currently leave behind
tombstones. This means that insertion into the SmallPtrSet also
has to check for these, making the operation more expensive.
We don't really need the tombstones in small mode, because we can
just replace with the last element in the set instead. This changes
the order, but SmallPtrSet order is fundamentally unstable anyway.
However, not leaving tombstones means that the erase() operation
now invalidates iterators. This means that consumers that want
to remove elements while iterating over the set have to use
remove_if() instead. If they fail to do so, there will be an
assertion failure thanks to debug epochs, so any such cases are
easy to detect (and I have already fixed any inside llvm at least).
---
llvm/include/llvm/ADT/SmallPtrSet.h | 55 +++++++++++++++++---------
llvm/unittests/ADT/SmallPtrSetTest.cpp | 39 ------------------
2 files changed, 37 insertions(+), 57 deletions(-)
diff --git a/llvm/include/llvm/ADT/SmallPtrSet.h b/llvm/include/llvm/ADT/SmallPtrSet.h
index 7bf3d825fd03c..c9c7a10fd018b 100644
--- a/llvm/include/llvm/ADT/SmallPtrSet.h
+++ b/llvm/include/llvm/ADT/SmallPtrSet.h
@@ -127,22 +127,11 @@ class SmallPtrSetImplBase : public DebugEpochBase {
std::pair<const void *const *, bool> insert_imp(const void *Ptr) {
if (isSmall()) {
// Check to see if it is already in the set.
- const void **LastTombstone = nullptr;
for (const void **APtr = SmallArray, **E = SmallArray + NumNonEmpty;
APtr != E; ++APtr) {
const void *Value = *APtr;
if (Value == Ptr)
return std::make_pair(APtr, false);
- if (Value == getTombstoneMarker())
- LastTombstone = APtr;
- }
-
- // Did we find any tombstone marker?
- if (LastTombstone != nullptr) {
- *LastTombstone = Ptr;
- --NumTombstones;
- incrementEpoch();
- return std::make_pair(LastTombstone, true);
}
// Nope, there isn't. If we stay small, just 'pushback' now.
@@ -161,14 +150,27 @@ class SmallPtrSetImplBase : public DebugEpochBase {
/// that the derived class can check that the right type of pointer is passed
/// in.
bool erase_imp(const void * Ptr) {
- const void *const *P = find_imp(Ptr);
- if (P == EndPointer())
+ if (isSmall()) {
+ for (const void **APtr = SmallArray, **E = SmallArray + NumNonEmpty;
+ APtr != E; ++APtr) {
+ if (*APtr == Ptr) {
+ *APtr = SmallArray[--NumNonEmpty];
+ incrementEpoch();
+ return true;
+ }
+ }
return false;
+ }
- const void **Loc = const_cast<const void **>(P);
- assert(*Loc == Ptr && "broken find!");
- *Loc = getTombstoneMarker();
+ auto *Bucket = FindBucketFor(Ptr);
+ if (*Bucket != Ptr)
+ return false;
+
+ *const_cast<const void **>(Bucket) = getTombstoneMarker();
NumTombstones++;
+ // Treat this consistently from an API perspective, even if we don't
+ // actually invalidate iterators here.
+ incrementEpoch();
return true;
}
@@ -192,9 +194,9 @@ class SmallPtrSetImplBase : public DebugEpochBase {
return EndPointer();
}
-private:
bool isSmall() const { return CurArray == SmallArray; }
+private:
std::pair<const void *const *, bool> insert_imp_big(const void *Ptr);
const void * const *FindBucketFor(const void *Ptr) const;
@@ -352,7 +354,7 @@ class SmallPtrSetImpl : public SmallPtrSetImplBase {
}
/// erase - If the set contains the specified pointer, remove it and return
- /// true, otherwise return false.
+ /// true, otherwise return false. Invalidates iterators if it returns true.
bool erase(PtrType Ptr) {
return erase_imp(PtrTraits::getAsVoidPointer(Ptr));
}
@@ -362,6 +364,22 @@ class SmallPtrSetImpl : public SmallPtrSetImplBase {
template <typename UnaryPredicate>
bool remove_if(UnaryPredicate P) {
bool Removed = false;
+ if (isSmall()) {
+ const void **APtr = SmallArray, **E = SmallArray + NumNonEmpty;
+ while (APtr != E) {
+ PtrType Ptr = PtrTraits::getFromVoidPointer(const_cast<void *>(*APtr));
+ if (P(Ptr)) {
+ *APtr = *--E;
+ --NumNonEmpty;
+ incrementEpoch();
+ Removed = true;
+ } else {
+ ++APtr;
+ }
+ }
+ return Removed;
+ }
+
for (const void **APtr = CurArray, **E = EndPointer(); APtr != E; ++APtr) {
const void *Value = *APtr;
if (Value == getTombstoneMarker() || Value == getEmptyMarker())
@@ -370,6 +388,7 @@ class SmallPtrSetImpl : public SmallPtrSetImplBase {
if (P(Ptr)) {
*APtr = getTombstoneMarker();
++NumTombstones;
+ incrementEpoch();
Removed = true;
}
}
diff --git a/llvm/unittests/ADT/SmallPtrSetTest.cpp b/llvm/unittests/ADT/SmallPtrSetTest.cpp
index 1ed9133c2b551..b45318d076a3d 100644
--- a/llvm/unittests/ADT/SmallPtrSetTest.cpp
+++ b/llvm/unittests/ADT/SmallPtrSetTest.cpp
@@ -243,45 +243,6 @@ TEST(SmallPtrSetTest, SwapTest) {
EXPECT_TRUE(a.count(&buf[3]));
}
-void checkEraseAndIterators(SmallPtrSetImpl<int*> &S) {
- int buf[3];
-
- S.insert(&buf[0]);
- S.insert(&buf[1]);
- S.insert(&buf[2]);
-
- // Iterators must still be valid after erase() calls;
- auto B = S.begin();
- auto M = std::next(B);
- auto E = S.end();
- EXPECT_TRUE(*B == &buf[0] || *B == &buf[1] || *B == &buf[2]);
- EXPECT_TRUE(*M == &buf[0] || *M == &buf[1] || *M == &buf[2]);
- EXPECT_TRUE(*B != *M);
- int *Removable = *std::next(M);
- // No iterator points to Removable now.
- EXPECT_TRUE(Removable == &buf[0] || Removable == &buf[1] ||
- Removable == &buf[2]);
- EXPECT_TRUE(Removable != *B && Removable != *M);
-
- S.erase(Removable);
-
- // B,M,E iterators should still be valid
- EXPECT_EQ(B, S.begin());
- EXPECT_EQ(M, std::next(B));
- EXPECT_EQ(E, S.end());
- EXPECT_EQ(std::next(M), E);
-}
-
-TEST(SmallPtrSetTest, EraseTest) {
- // Test when set stays small.
- SmallPtrSet<int *, 8> B;
- checkEraseAndIterators(B);
-
- // Test when set grows big.
- SmallPtrSet<int *, 2> A;
- checkEraseAndIterators(A);
-}
-
// Verify that dereferencing and iteration work.
TEST(SmallPtrSetTest, dereferenceAndIterate) {
int Ints[] = {0, 1, 2, 3, 4, 5, 6, 7};
>From 2d6b05ae3b313d0e68f9d59b537f20c56f3e3823 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Thu, 27 Jun 2024 09:27:16 +0200
Subject: [PATCH 2/2] improve docs
---
llvm/docs/ProgrammersManual.rst | 6 ++++--
llvm/include/llvm/ADT/SmallPtrSet.h | 21 +++++++++++++++++----
2 files changed, 21 insertions(+), 6 deletions(-)
diff --git a/llvm/docs/ProgrammersManual.rst b/llvm/docs/ProgrammersManual.rst
index f4a16c43d4499..e00165caae0c9 100644
--- a/llvm/docs/ProgrammersManual.rst
+++ b/llvm/docs/ProgrammersManual.rst
@@ -2066,8 +2066,10 @@ insertion/deleting/queries with low constant factors) and is very stingy with
malloc traffic.
Note that, unlike :ref:`std::set <dss_set>`, the iterators of ``SmallPtrSet``
-are invalidated whenever an insertion occurs. Also, the values visited by the
-iterators are not visited in sorted order.
+are invalidated whenever an insertion or erasure occurs. The ``remove_if``
+method can be used to remove elements while iterating over the set.
+
+Also, the values visited by the iterators are not visited in sorted order.
.. _dss_stringset:
diff --git a/llvm/include/llvm/ADT/SmallPtrSet.h b/llvm/include/llvm/ADT/SmallPtrSet.h
index c9c7a10fd018b..597cad8444f69 100644
--- a/llvm/include/llvm/ADT/SmallPtrSet.h
+++ b/llvm/include/llvm/ADT/SmallPtrSet.h
@@ -353,14 +353,27 @@ class SmallPtrSetImpl : public SmallPtrSetImplBase {
return insert(Ptr).first;
}
- /// erase - If the set contains the specified pointer, remove it and return
- /// true, otherwise return false. Invalidates iterators if it returns true.
+ /// Remove pointer from the set.
+ ///
+ /// Returns whether the pointer was in the set. Invalidates iterators if
+ /// true is returned. To remove elements while iterating over the set, use
+ /// remove_if() instead.
bool erase(PtrType Ptr) {
return erase_imp(PtrTraits::getAsVoidPointer(Ptr));
}
- /// Remove elements that match the given predicate. Returns whether anything
- /// was removed.
+ /// Remove elements that match the given predicate.
+ ///
+ /// This method is a safe replacement for the following pattern, which is not
+ /// valid, because the erase() calls would invalidate the iterator:
+ ///
+ /// for (PtrType *Ptr : Set)
+ /// if (Pred(P))
+ /// Set.erase(P);
+ ///
+ /// Returns whether anything was removed. It is safe to read the set inside
+ /// the predicate function. However, the predicate must not modify the set
+ /// itself, only indicate a removal by returning true.
template <typename UnaryPredicate>
bool remove_if(UnaryPredicate P) {
bool Removed = false;
More information about the llvm-commits
mailing list