[llvm-branch-commits] [llvm] 7aa76e1 - Revert "[DenseMap] Replace tombstone deletion with TAOCP 6.4 Algorithm R (#19…"
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri May 29 07:36:27 PDT 2026
Author: Martin Storsjö
Date: 2026-05-29T17:36:22+03:00
New Revision: 7aa76e1f19ed6cfa046f87b5abe8f5034ac6edd3
URL: https://github.com/llvm/llvm-project/commit/7aa76e1f19ed6cfa046f87b5abe8f5034ac6edd3
DIFF: https://github.com/llvm/llvm-project/commit/7aa76e1f19ed6cfa046f87b5abe8f5034ac6edd3.diff
LOG: Revert "[DenseMap] Replace tombstone deletion with TAOCP 6.4 Algorithm R (#19…"
This reverts commit c4d820ce311b74acf70e52ec04856fc89b503ab9.
Added:
Modified:
llvm/include/llvm/ADT/DenseMap.h
llvm/lib/IR/Value.cpp
llvm/unittests/ADT/BitVectorTest.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/ADT/DenseMap.h b/llvm/include/llvm/ADT/DenseMap.h
index b52906a215a7c..081d7e198e94c 100644
--- a/llvm/include/llvm/ADT/DenseMap.h
+++ b/llvm/include/llvm/ADT/DenseMap.h
@@ -53,10 +53,6 @@ struct DenseMapPair : std::pair<KeyT, ValueT> {
} // end namespace detail
-// Befriended below so DenseMapBase can expose its bucket-relocation callback
-// erase to ValueHandleBase, the only caller that caches bucket pointers.
-class ValueHandleBase;
-
template <typename KeyT, typename ValueT,
typename KeyInfoT = DenseMapInfo<KeyT>,
typename Bucket = llvm::detail::DenseMapPair<KeyT, ValueT>,
@@ -124,7 +120,7 @@ class DenseMapBase : public DebugEpochBase {
void clear() {
incrementEpoch();
- if (getNumEntries() == 0)
+ if (getNumEntries() == 0 && getNumTombstones() == 0)
return;
// If the capacity of the array is huge, and the # elements used is small,
@@ -140,11 +136,14 @@ class DenseMapBase : public DebugEpochBase {
for (BucketT &B : buckets())
B.getFirst() = EmptyKey;
} else {
+ const KeyT TombstoneKey = KeyInfoT::getTombstoneKey();
unsigned NumEntries = getNumEntries();
for (BucketT &B : buckets()) {
if (!KeyInfoT::isEqual(B.getFirst(), EmptyKey)) {
- B.getSecond().~ValueT();
- --NumEntries;
+ if (!KeyInfoT::isEqual(B.getFirst(), TombstoneKey)) {
+ B.getSecond().~ValueT();
+ --NumEntries;
+ }
B.getFirst() = EmptyKey;
}
}
@@ -152,6 +151,7 @@ class DenseMapBase : public DebugEpochBase {
(void)NumEntries;
}
setNumEntries(0);
+ setNumTombstones(0);
}
void shrink_and_clear() {
@@ -325,19 +325,26 @@ class DenseMapBase : public DebugEpochBase {
return Ret;
}
- void eraseFromFilledBucket(BucketT *TheBucket) {
- eraseFromFilledBucket(TheBucket, [](BucketT &) {});
- }
-
bool erase(const KeyT &Val) {
BucketT *TheBucket = doFind(Val);
if (!TheBucket)
return false; // not in map.
- eraseFromFilledBucket(TheBucket);
+ incrementEpoch();
+ TheBucket->getSecond().~ValueT();
+ TheBucket->getFirst() = KeyInfoT::getTombstoneKey();
+ decrementNumEntries();
+ incrementNumTombstones();
return true;
}
- void erase(iterator I) { eraseFromFilledBucket(&*I); }
+ void erase(iterator I) {
+ BucketT *TheBucket = &*I;
+ incrementEpoch();
+ TheBucket->getSecond().~ValueT();
+ TheBucket->getFirst() = KeyInfoT::getTombstoneKey();
+ decrementNumEntries();
+ incrementNumTombstones();
+ }
/// Remove entries that match the given predicate. \p Pred is invoked
/// with a reference to each live bucket and must not access the map being
@@ -347,22 +354,22 @@ class DenseMapBase : public DebugEpochBase {
/// into the map are invalidated.
template <typename Predicate> bool remove_if(Predicate Pred) {
const KeyT EmptyKey = KeyInfoT::getEmptyKey();
- unsigned NumBuckets = getNumBuckets();
+ const KeyT TombstoneKey = KeyInfoT::getTombstoneKey();
bool Removed = false;
for (BucketT &B : buckets()) {
- if (KeyInfoT::isEqual(B.getFirst(), EmptyKey))
+ if (KeyInfoT::isEqual(B.getFirst(), EmptyKey) ||
+ KeyInfoT::isEqual(B.getFirst(), TombstoneKey))
continue;
if (Pred(B)) {
B.getSecond().~ValueT();
- B.getFirst() = EmptyKey;
+ B.getFirst() = TombstoneKey;
decrementNumEntries();
+ incrementNumTombstones();
Removed = true;
}
}
- if (Removed) {
+ if (Removed)
incrementEpoch();
- this->grow(NumBuckets);
- }
return Removed;
}
@@ -399,10 +406,12 @@ class DenseMapBase : public DebugEpochBase {
struct ExactBucketCount {};
void initWithExactBucketCount(unsigned NewNumBuckets) {
- if (derived().allocateBuckets(NewNumBuckets))
+ if (derived().allocateBuckets(NewNumBuckets)) {
initEmpty();
- else
+ } else {
setNumEntries(0);
+ setNumTombstones(0);
+ }
}
void destroyAll() {
@@ -416,8 +425,10 @@ class DenseMapBase : public DebugEpochBase {
return;
const KeyT EmptyKey = KeyInfoT::getEmptyKey();
+ const KeyT TombstoneKey = KeyInfoT::getTombstoneKey();
for (BucketT &B : buckets()) {
- if (!KeyInfoT::isEqual(B.getFirst(), EmptyKey))
+ if (!KeyInfoT::isEqual(B.getFirst(), EmptyKey) &&
+ !KeyInfoT::isEqual(B.getFirst(), TombstoneKey))
B.getSecond().~ValueT();
B.getFirst().~KeyT();
}
@@ -427,6 +438,7 @@ class DenseMapBase : public DebugEpochBase {
static_assert(std::is_base_of_v<DenseMapBase, DerivedT>,
"Must pass the derived type to this template!");
setNumEntries(0);
+ setNumTombstones(0);
assert((getNumBuckets() & (getNumBuckets() - 1)) == 0 &&
"# initial buckets must be a power of two!");
@@ -451,8 +463,10 @@ class DenseMapBase : public DebugEpochBase {
void moveFrom(DerivedT &Other) {
// Insert all the old elements.
const KeyT EmptyKey = KeyInfoT::getEmptyKey();
+ const KeyT TombstoneKey = KeyInfoT::getTombstoneKey();
for (BucketT &B : Other.buckets()) {
- if (!KeyInfoT::isEqual(B.getFirst(), EmptyKey)) {
+ if (!KeyInfoT::isEqual(B.getFirst(), EmptyKey) &&
+ !KeyInfoT::isEqual(B.getFirst(), TombstoneKey)) {
// Insert the key/value into the new table.
BucketT *DestBucket;
bool FoundVal = LookupBucketFor(B.getFirst(), DestBucket);
@@ -474,6 +488,7 @@ class DenseMapBase : public DebugEpochBase {
this->destroyAll();
derived().deallocateBuckets();
setNumEntries(0);
+ setNumTombstones(0);
if (!derived().allocateBuckets(other.getNumBuckets())) {
// The bucket list is empty. No work to do.
return;
@@ -483,6 +498,7 @@ class DenseMapBase : public DebugEpochBase {
assert(getNumBuckets() == other.getNumBuckets());
setNumEntries(other.getNumEntries());
+ setNumTombstones(other.getNumTombstones());
BucketT *Buckets = getBuckets();
const BucketT *OtherBuckets = other.getBuckets();
@@ -493,66 +509,17 @@ class DenseMapBase : public DebugEpochBase {
NumBuckets * sizeof(BucketT));
} else {
const KeyT EmptyKey = KeyInfoT::getEmptyKey();
+ const KeyT TombstoneKey = KeyInfoT::getTombstoneKey();
for (size_t I = 0; I < NumBuckets; ++I) {
::new (&Buckets[I].getFirst()) KeyT(OtherBuckets[I].getFirst());
- if (!KeyInfoT::isEqual(Buckets[I].getFirst(), EmptyKey))
+ if (!KeyInfoT::isEqual(Buckets[I].getFirst(), EmptyKey) &&
+ !KeyInfoT::isEqual(Buckets[I].getFirst(), TombstoneKey))
::new (&Buckets[I].getSecond()) ValueT(OtherBuckets[I].getSecond());
}
}
}
private:
- // ValueHandleBase caches pointers into the bucket array, so it needs the
- // callback erase below to fix them up as entries shift. It is the only
- // intended caller; do not add new ones.
- friend class ValueHandleBase;
-
- /// Erase the entry at \p TheBucket and close the resulting hole via Knuth
- /// TAOCP 6.4 Algorithm R. For callers that cache pointers into the bucket
- /// array, call \p OnMoved per shifted bucket.
- template <typename OnMovedT>
- void eraseFromFilledBucket(BucketT *TheBucket, OnMovedT &&OnMoved) {
- incrementEpoch();
- TheBucket->getSecond().~ValueT();
- decrementNumEntries();
-
- BucketT *BucketsPtr = getBuckets();
- const unsigned NumBuckets = getNumBuckets();
- const unsigned Mask = NumBuckets - 1;
- const KeyT EmptyKey = KeyInfoT::getEmptyKey();
- unsigned I = static_cast<unsigned>(TheBucket - BucketsPtr);
- unsigned J = I;
- while (true) {
- J = (J + 1) & Mask;
- BucketT &BJ = BucketsPtr[J];
- if (KeyInfoT::isEqual(BJ.getFirst(), EmptyKey))
- break;
- auto Ideal = KeyInfoT::getHashValue(BJ.getFirst());
- // If the hole (I) lies on the linear-probe chain from the home bucket
- // (Ideal) to J, shift J into the hole and make J the new hole.
- if (((I - Ideal) & Mask) < ((J - Ideal) & Mask)) {
- BucketT &BI = BucketsPtr[I];
- BI.getFirst() = std::move(BJ.getFirst());
- ::new (&BI.getSecond()) ValueT(std::move(BJ.getSecond()));
- BJ.getSecond().~ValueT();
- OnMoved(BI);
- I = J;
- }
- }
- BucketsPtr[I].getFirst() = EmptyKey;
- }
-
- /// Erase \p Val and close the resulting hole by potentially shifting other
- /// entries into it. For callers that cache pointers into the bucket array,
- /// call \p OnMoved per shifted bucket.
- template <typename OnMovedT> bool erase(const KeyT &Val, OnMovedT &&OnMoved) {
- BucketT *TheBucket = doFind(Val);
- if (!TheBucket)
- return false;
- eraseFromFilledBucket(TheBucket, std::forward<OnMovedT>(OnMoved));
- return true;
- }
-
DerivedT &derived() { return *static_cast<DerivedT *>(this); }
const DerivedT &derived() const {
return *static_cast<const DerivedT *>(this);
@@ -595,6 +562,14 @@ class DenseMapBase : public DebugEpochBase {
void decrementNumEntries() { setNumEntries(getNumEntries() - 1); }
+ unsigned getNumTombstones() const { return derived().getNumTombstones(); }
+
+ void setNumTombstones(unsigned Num) { derived().setNumTombstones(Num); }
+
+ void incrementNumTombstones() { setNumTombstones(getNumTombstones() + 1); }
+
+ void decrementNumTombstones() { setNumTombstones(getNumTombstones() - 1); }
+
const BucketT *getBuckets() const { return derived().getBuckets(); }
BucketT *getBuckets() { return derived().getBuckets(); }
@@ -630,21 +605,37 @@ class DenseMapBase : public DebugEpochBase {
BucketT *TheBucket) {
incrementEpoch();
- // Grow the table if the load factor would exceed 3/4 after insertion.
- // Linear probing with gap-closing deletion (Knuth Algorithm R) keeps
- // every chain compact and bounded by the table's empty-bucket count,
- // so no tombstone-driven resize is needed.
+ // If the load of the hash table is more than 3/4, or if fewer than 1/8 of
+ // the buckets are empty (meaning that many are filled with tombstones),
+ // grow the table.
+ //
+ // The later case is tricky. For example, if we had one empty bucket with
+ // tons of tombstones, failing lookups (e.g. for insertion) would have to
+ // probe almost the entire table until it found the empty bucket. If the
+ // table completely filled with tombstones, no lookup would ever succeed,
+ // causing infinite loops in lookup.
unsigned NewNumEntries = getNumEntries() + 1;
unsigned NumBuckets = getNumBuckets();
if (LLVM_UNLIKELY(NewNumEntries * 4 >= NumBuckets * 3)) {
this->grow(NumBuckets * 2);
LookupBucketFor(Lookup, TheBucket);
+ } else if (LLVM_UNLIKELY(NumBuckets -
+ (NewNumEntries + getNumTombstones()) <=
+ NumBuckets / 8)) {
+ this->grow(NumBuckets);
+ LookupBucketFor(Lookup, TheBucket);
}
assert(TheBucket);
// Only update the state after we've grown our bucket space appropriately
// so that when growing buckets we have self-consistent entry count.
incrementNumEntries();
+
+ // If we are writing over a tombstone, remember this.
+ const KeyT EmptyKey = KeyInfoT::getEmptyKey();
+ if (!KeyInfoT::isEqual(TheBucket->getFirst(), EmptyKey))
+ decrementNumTombstones();
+
return TheBucket;
}
@@ -657,6 +648,7 @@ class DenseMapBase : public DebugEpochBase {
const KeyT EmptyKey = KeyInfoT::getEmptyKey();
unsigned BucketNo = KeyInfoT::getHashValue(Val) & (NumBuckets - 1);
+ unsigned ProbeAmt = 1;
while (true) {
const BucketT *Bucket = BucketsPtr + BucketNo;
if (LLVM_LIKELY(KeyInfoT::isEqual(Val, Bucket->getFirst())))
@@ -664,8 +656,10 @@ class DenseMapBase : public DebugEpochBase {
if (LLVM_LIKELY(KeyInfoT::isEqual(Bucket->getFirst(), EmptyKey)))
return nullptr;
- // Hash collision: continue linear probing.
- BucketNo = (BucketNo + 1) & (NumBuckets - 1);
+ // Otherwise, it's a hash collision or a tombstone, continue quadratic
+ // probing.
+ BucketNo += ProbeAmt++;
+ BucketNo &= NumBuckets - 1;
}
}
@@ -676,7 +670,7 @@ class DenseMapBase : public DebugEpochBase {
/// Lookup the appropriate bucket for Val, returning it in FoundBucket. If the
/// bucket contains the key and a value, this returns true, otherwise it
- /// returns a bucket with an empty marker and returns false.
+ /// returns a bucket with an empty marker or tombstone and returns false.
template <typename LookupKeyT>
bool LookupBucketFor(const LookupKeyT &Val, BucketT *&FoundBucket) {
BucketT *BucketsPtr = getBuckets();
@@ -687,11 +681,16 @@ class DenseMapBase : public DebugEpochBase {
return false;
}
+ // FoundTombstone - Keep track of whether we find a tombstone while probing.
+ BucketT *FoundTombstone = nullptr;
const KeyT EmptyKey = KeyInfoT::getEmptyKey();
+ const KeyT TombstoneKey = KeyInfoT::getTombstoneKey();
assert(!KeyInfoT::isEqual(Val, EmptyKey) &&
- "Empty value shouldn't be inserted into map!");
+ !KeyInfoT::isEqual(Val, TombstoneKey) &&
+ "Empty/Tombstone value shouldn't be inserted into map!");
unsigned BucketNo = KeyInfoT::getHashValue(Val) & (NumBuckets - 1);
+ unsigned ProbeAmt = 1;
while (true) {
BucketT *ThisBucket = BucketsPtr + BucketNo;
// Found Val's bucket? If so, return it.
@@ -701,14 +700,24 @@ class DenseMapBase : public DebugEpochBase {
}
// If we found an empty bucket, the key doesn't exist in the set.
- // Return it as the insertion point.
+ // Insert it and return the default value.
if (LLVM_LIKELY(KeyInfoT::isEqual(ThisBucket->getFirst(), EmptyKey))) {
- FoundBucket = ThisBucket;
+ // If we've already seen a tombstone while probing, fill it in instead
+ // of the empty bucket we eventually probed to.
+ FoundBucket = FoundTombstone ? FoundTombstone : ThisBucket;
return false;
}
- // Hash collision: continue linear probing.
- BucketNo = (BucketNo + 1) & (NumBuckets - 1);
+ // If this is a tombstone, remember it. If Val ends up not in the map, we
+ // prefer to return it than something that would require more probing.
+ if (KeyInfoT::isEqual(ThisBucket->getFirst(), TombstoneKey) &&
+ !FoundTombstone)
+ FoundTombstone = ThisBucket; // Remember the first tombstone found.
+
+ // Otherwise, it's a hash collision or a tombstone, continue quadratic
+ // probing.
+ BucketNo += ProbeAmt++;
+ BucketNo &= (NumBuckets - 1);
}
}
@@ -769,6 +778,7 @@ class DenseMap : public DenseMapBase<DenseMap<KeyT, ValueT, KeyInfoT, BucketT>,
BucketT *Buckets = nullptr;
unsigned NumEntries = 0;
+ unsigned NumTombstones = 0;
unsigned NumBuckets = 0;
explicit DenseMap(unsigned NumBuckets, typename BaseT::ExactBucketCount) {
@@ -821,6 +831,7 @@ class DenseMap : public DenseMapBase<DenseMap<KeyT, ValueT, KeyInfoT, BucketT>,
void swapImpl(DenseMap &RHS) {
std::swap(Buckets, RHS.Buckets);
std::swap(NumEntries, RHS.NumEntries);
+ std::swap(NumTombstones, RHS.NumTombstones);
std::swap(NumBuckets, RHS.NumBuckets);
}
@@ -828,6 +839,10 @@ class DenseMap : public DenseMapBase<DenseMap<KeyT, ValueT, KeyInfoT, BucketT>,
void setNumEntries(unsigned Num) { NumEntries = Num; }
+ unsigned getNumTombstones() const { return NumTombstones; }
+
+ void setNumTombstones(unsigned Num) { NumTombstones = Num; }
+
BucketT *getBuckets() const { return Buckets; }
unsigned getNumBuckets() const { return NumBuckets; }
@@ -896,6 +911,7 @@ class SmallDenseMap
unsigned Small : 1;
unsigned NumEntries : 31;
+ unsigned NumTombstones;
struct LargeRep {
BucketT *Buckets;
@@ -962,8 +978,10 @@ class SmallDenseMap
unsigned TmpNumEntries = RHS.NumEntries;
RHS.NumEntries = NumEntries;
NumEntries = TmpNumEntries;
+ std::swap(NumTombstones, RHS.NumTombstones);
const KeyT EmptyKey = KeyInfoT::getEmptyKey();
+ const KeyT TombstoneKey = KeyInfoT::getTombstoneKey();
if (Small && RHS.Small) {
// If we're swapping inline bucket arrays, we have to cope with some of
// the tricky bits of DenseMap's storage system: the buckets are not
@@ -972,8 +990,10 @@ class SmallDenseMap
for (unsigned i = 0, e = InlineBuckets; i != e; ++i) {
BucketT *LHSB = &getInlineBuckets()[i],
*RHSB = &RHS.getInlineBuckets()[i];
- bool hasLHSValue = !KeyInfoT::isEqual(LHSB->getFirst(), EmptyKey);
- bool hasRHSValue = !KeyInfoT::isEqual(RHSB->getFirst(), EmptyKey);
+ bool hasLHSValue = (!KeyInfoT::isEqual(LHSB->getFirst(), EmptyKey) &&
+ !KeyInfoT::isEqual(LHSB->getFirst(), TombstoneKey));
+ bool hasRHSValue = (!KeyInfoT::isEqual(RHSB->getFirst(), EmptyKey) &&
+ !KeyInfoT::isEqual(RHSB->getFirst(), TombstoneKey));
if (hasLHSValue && hasRHSValue) {
// Swap together if we can...
std::swap(*LHSB, *RHSB);
@@ -1012,7 +1032,8 @@ class SmallDenseMap
*OldB = &SmallSide.getInlineBuckets()[i];
::new (&NewB->getFirst()) KeyT(std::move(OldB->getFirst()));
OldB->getFirst().~KeyT();
- if (!KeyInfoT::isEqual(NewB->getFirst(), EmptyKey)) {
+ if (!KeyInfoT::isEqual(NewB->getFirst(), EmptyKey) &&
+ !KeyInfoT::isEqual(NewB->getFirst(), TombstoneKey)) {
::new (&NewB->getSecond()) ValueT(std::move(OldB->getSecond()));
OldB->getSecond().~ValueT();
}
@@ -1032,6 +1053,10 @@ class SmallDenseMap
NumEntries = Num;
}
+ unsigned getNumTombstones() const { return NumTombstones; }
+
+ void setNumTombstones(unsigned Num) { NumTombstones = Num; }
+
const BucketT *getInlineBuckets() const {
assert(Small);
// Note that this cast does not violate aliasing rules as we assert that
@@ -1119,6 +1144,7 @@ class SmallDenseMap
Small = false;
NumEntries = Other.NumEntries;
+ NumTombstones = Other.NumTombstones;
*getLargeRep() = std::move(*Other.getLargeRep());
Other.getLargeRep()->NumBuckets = 0;
return true;
@@ -1247,8 +1273,10 @@ class DenseMapIterator : DebugEpochBase::HandleBase {
void AdvancePastEmptyBuckets() {
assert(Ptr <= End);
const KeyT Empty = KeyInfoT::getEmptyKey();
+ const KeyT Tombstone = KeyInfoT::getTombstoneKey();
- while (Ptr != End && KeyInfoT::isEqual(Ptr->getFirst(), Empty))
+ while (Ptr != End && (KeyInfoT::isEqual(Ptr->getFirst(), Empty) ||
+ KeyInfoT::isEqual(Ptr->getFirst(), Tombstone)))
++Ptr;
}
diff --git a/llvm/lib/IR/Value.cpp b/llvm/lib/IR/Value.cpp
index f5a501897f0bb..074d42d728c95 100644
--- a/llvm/lib/IR/Value.cpp
+++ b/llvm/lib/IR/Value.cpp
@@ -1222,10 +1222,7 @@ void ValueHandleBase::RemoveFromUseList() {
LLVMContextImpl *pImpl = getValPtr()->getContext().pImpl;
DenseMap<Value*, ValueHandleBase*> &Handles = pImpl->ValueHandles;
if (Handles.isPointerIntoBucketsArray(PrevPtr)) {
- // TODO: Remove the only user of DenseMap's callback erase.
- Handles.erase(getValPtr(), [](auto &Bucket) {
- Bucket.second->setPrevPtr(&Bucket.second);
- });
+ Handles.erase(getValPtr());
getValPtr()->HasValueHandle = false;
}
}
diff --git a/llvm/unittests/ADT/BitVectorTest.cpp b/llvm/unittests/ADT/BitVectorTest.cpp
index adf788b8f9661..d3200b7722ee3 100644
--- a/llvm/unittests/ADT/BitVectorTest.cpp
+++ b/llvm/unittests/ADT/BitVectorTest.cpp
@@ -1434,7 +1434,8 @@ TYPED_TEST(BitVectorTest, DenseSet) {
#if LLVM_ENABLE_ABI_BREAKING_CHECKS
TypeParam D;
- EXPECT_DEATH(Set.insert(D), "Empty value shouldn't be inserted into map!");
+ EXPECT_DEATH(Set.insert(D),
+ "Empty/Tombstone value shouldn't be inserted into map!");
#endif
EXPECT_EQ(3U, Set.size());
More information about the llvm-branch-commits
mailing list