[llvm] [SSAUpdater] Don't use large SmallSets for IDFcalc (PR #97823)
Jeremy Morse via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 30 07:35:01 PDT 2024
https://github.com/jmorse updated https://github.com/llvm/llvm-project/pull/97823
>From 33666499d5b99b9e50305047ce52cf440fd5dfb0 Mon Sep 17 00:00:00 2001
From: Jeremy Morse <jeremy.morse at sony.com>
Date: Fri, 5 Jul 2024 13:15:43 +0100
Subject: [PATCH 1/5] [SSAUpdater] Don't use large SmallSets for IDFcalc
As per the LLVM programmers manual, SmallSets do linear scans on insertion
and then turn into a hash-table if the set gets big. Here in the
IDFCalculator, the SmallSets have been configured to have 32 elements in
each static allocation... which means that we linearly scan for all
problems with up to 32 elements.
Replace these SmallSets with a plain DenseSet (and use reserve to avoid any
repeated allocations). Doing this yields a 0.13% compile-time improvement
for debug-info builds, as we hit IDFCalculator pretty hard in
InstrRefBasedLDV.
---
.../llvm/Support/GenericIteratedDominanceFrontier.h | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/llvm/include/llvm/Support/GenericIteratedDominanceFrontier.h b/llvm/include/llvm/Support/GenericIteratedDominanceFrontier.h
index 0dc58e37c821d..c218d33c3eb30 100644
--- a/llvm/include/llvm/Support/GenericIteratedDominanceFrontier.h
+++ b/llvm/include/llvm/Support/GenericIteratedDominanceFrontier.h
@@ -144,8 +144,12 @@ void IDFCalculatorBase<NodeTy, IsPostDom>::calculate(
DT.updateDFSNumbers();
SmallVector<DomTreeNodeBase<NodeTy> *, 32> Worklist;
- SmallPtrSet<DomTreeNodeBase<NodeTy> *, 32> VisitedPQ;
- SmallPtrSet<DomTreeNodeBase<NodeTy> *, 32> VisitedWorklist;
+ SmallDenseSet<DomTreeNodeBase<NodeTy> *, 16> VisitedPQ;
+ SmallDenseSet<DomTreeNodeBase<NodeTy> *, 16> VisitedWorklist;
+ if (useLiveIn) {
+ VisitedPQ.reserve(LiveInBlocks->size());
+ VisitedWorklist.reserve(LiveInBlocks->size());
+ }
for (NodeTy *BB : *DefBlocks)
if (DomTreeNodeBase<NodeTy> *Node = DT.getNode(BB)) {
>From 3ab1e7d9559ddd3b5b809824978e721181d51a42 Mon Sep 17 00:00:00 2001
From: Jeremy Morse <jeremy.morse at sony.com>
Date: Tue, 16 Jul 2024 14:23:33 +0100
Subject: [PATCH 2/5] Implement a reserve call for SmallPtrSet
---
llvm/include/llvm/ADT/SmallPtrSet.h | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git a/llvm/include/llvm/ADT/SmallPtrSet.h b/llvm/include/llvm/ADT/SmallPtrSet.h
index 597cad8444f69..dd6d5ee72867e 100644
--- a/llvm/include/llvm/ADT/SmallPtrSet.h
+++ b/llvm/include/llvm/ADT/SmallPtrSet.h
@@ -17,6 +17,7 @@
#include "llvm/ADT/EpochTracker.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/ReverseIteration.h"
#include "llvm/Support/type_traits.h"
#include <cassert>
@@ -108,6 +109,21 @@ class SmallPtrSetImplBase : public DebugEpochBase {
NumTombstones = 0;
}
+ void reserve(size_type NumEntries) {
+ incrementEpoch();
+ // No need to expand if we're small and NumEntries will fit in the space.
+ if (isSmall() && NumEntries < CurArraySize)
+ return;
+ // insert_imp_big will reallocate if stores is more than 75% full.
+ if (!isSmall() && (NumEntries * 4) <= (CurArraySize * 3))
+ return;
+ // We must Grow -- find the size where we'd be 75% full, then round up to
+ // the next power of two.
+ size_type NewSize = NumEntries + (NumEntries / 3);
+ NewSize = 1 << (Log2_32_Ceil(NewSize) + 1);
+ Grow(NewSize);
+ }
+
protected:
static void *getTombstoneMarker() { return reinterpret_cast<void*>(-2); }
>From ab267cc31f01dcc088c4839a4fadd3eca92b902f Mon Sep 17 00:00:00 2001
From: Jeremy Morse <jeremy.morse at sony.com>
Date: Tue, 16 Jul 2024 14:23:43 +0100
Subject: [PATCH 3/5] Switch back to using SmallPtrSets in IDF calculator
---
llvm/include/llvm/Support/GenericIteratedDominanceFrontier.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/include/llvm/Support/GenericIteratedDominanceFrontier.h b/llvm/include/llvm/Support/GenericIteratedDominanceFrontier.h
index c218d33c3eb30..e08a75c137d00 100644
--- a/llvm/include/llvm/Support/GenericIteratedDominanceFrontier.h
+++ b/llvm/include/llvm/Support/GenericIteratedDominanceFrontier.h
@@ -144,8 +144,8 @@ void IDFCalculatorBase<NodeTy, IsPostDom>::calculate(
DT.updateDFSNumbers();
SmallVector<DomTreeNodeBase<NodeTy> *, 32> Worklist;
- SmallDenseSet<DomTreeNodeBase<NodeTy> *, 16> VisitedPQ;
- SmallDenseSet<DomTreeNodeBase<NodeTy> *, 16> VisitedWorklist;
+ SmallPtrSet<DomTreeNodeBase<NodeTy> *, 16> VisitedPQ;
+ SmallPtrSet<DomTreeNodeBase<NodeTy> *, 16> VisitedWorklist;
if (useLiveIn) {
VisitedPQ.reserve(LiveInBlocks->size());
VisitedWorklist.reserve(LiveInBlocks->size());
>From 79b2456e9d2741291b4e44546179b1c20646d422 Mon Sep 17 00:00:00 2001
From: Jeremy Morse <jeremy.morse at sony.com>
Date: Tue, 30 Jul 2024 12:10:06 +0100
Subject: [PATCH 4/5] Add some unit tests, just checking for stupidity
Also adjust two off-by-ones in the reserve implementation:
* We should be willing to fill the small array up to 100%
* The test for reallocating a big array happens before the "last" element
that we reserve for is inserted, so apply the filter off-by-one.
---
llvm/include/llvm/ADT/SmallPtrSet.h | 7 ++--
llvm/unittests/ADT/SmallPtrSetTest.cpp | 46 ++++++++++++++++++++++++++
2 files changed, 50 insertions(+), 3 deletions(-)
diff --git a/llvm/include/llvm/ADT/SmallPtrSet.h b/llvm/include/llvm/ADT/SmallPtrSet.h
index dd6d5ee72867e..672c296dde9dc 100644
--- a/llvm/include/llvm/ADT/SmallPtrSet.h
+++ b/llvm/include/llvm/ADT/SmallPtrSet.h
@@ -112,10 +112,11 @@ class SmallPtrSetImplBase : public DebugEpochBase {
void reserve(size_type NumEntries) {
incrementEpoch();
// No need to expand if we're small and NumEntries will fit in the space.
- if (isSmall() && NumEntries < CurArraySize)
+ if (isSmall() && NumEntries <= CurArraySize)
return;
- // insert_imp_big will reallocate if stores is more than 75% full.
- if (!isSmall() && (NumEntries * 4) <= (CurArraySize * 3))
+ // insert_imp_big will reallocate if stores is more than 75% full, on the
+ // /final/ insertion.
+ if (!isSmall() && ((NumEntries-1) * 4) < (CurArraySize * 3))
return;
// We must Grow -- find the size where we'd be 75% full, then round up to
// the next power of two.
diff --git a/llvm/unittests/ADT/SmallPtrSetTest.cpp b/llvm/unittests/ADT/SmallPtrSetTest.cpp
index b45318d076a3d..9894198e85d06 100644
--- a/llvm/unittests/ADT/SmallPtrSetTest.cpp
+++ b/llvm/unittests/ADT/SmallPtrSetTest.cpp
@@ -408,3 +408,49 @@ TEST(SmallPtrSetTest, RemoveIf) {
Removed = Set.remove_if([](int *Ptr) { return false; });
EXPECT_FALSE(Removed);
}
+
+TEST(SmallPtrSetTest, Reserve) {
+ // Check that we don't do anything silly when using reserve().
+ SmallPtrSet<int *, 4> Set;
+ int Vals[8] = {0, 1, 2, 3, 4, 5, 6, 7};
+
+ Set.insert(&Vals[0]);
+
+ // We shouldn't reallocate when this happens.
+ Set.reserve(4);
+
+ Set.insert(&Vals[1]);
+ Set.insert(&Vals[2]);
+ Set.insert(&Vals[3]);
+
+ // We shouldn't reallocate this time either.
+ Set.reserve(4);
+ EXPECT_EQ(Set.size(), 4u);
+ EXPECT_TRUE(Set.contains(&Vals[0]));
+ EXPECT_TRUE(Set.contains(&Vals[1]));
+ EXPECT_TRUE(Set.contains(&Vals[2]));
+ EXPECT_TRUE(Set.contains(&Vals[3]));
+
+ // Reserving further should lead to a reallocation.
+ Set.reserve(5);
+ EXPECT_EQ(Set.size(), 4u);
+ EXPECT_TRUE(Set.contains(&Vals[0]));
+ EXPECT_TRUE(Set.contains(&Vals[1]));
+ EXPECT_TRUE(Set.contains(&Vals[2]));
+ EXPECT_TRUE(Set.contains(&Vals[3]));
+
+ // And we should be able to insert another two or three elements without
+ // reallocating.
+ Set.insert(&Vals[4]);
+ Set.insert(&Vals[5]);
+
+ // Calling a smaller reserve size should have no effect.
+ Set.reserve(1);
+ EXPECT_EQ(Set.size(), 6u);
+ EXPECT_TRUE(Set.contains(&Vals[0]));
+ EXPECT_TRUE(Set.contains(&Vals[1]));
+ EXPECT_TRUE(Set.contains(&Vals[2]));
+ EXPECT_TRUE(Set.contains(&Vals[3]));
+ EXPECT_TRUE(Set.contains(&Vals[4]));
+ EXPECT_TRUE(Set.contains(&Vals[5]));
+}
>From e28791adb59da9a4c515adc914e94ca4e0429a9a Mon Sep 17 00:00:00 2001
From: Jeremy Morse <jeremy.morse at sony.com>
Date: Tue, 30 Jul 2024 15:23:18 +0100
Subject: [PATCH 5/5] Add a capacity method to SmallPtrSet, re-wrangle tests
This involves adding a filter for zero-sized reserves, which break the
off-by-one size test I'd added earlier. To maintain consistency with
insert_imp_big, the first allocation always jumps to 128 elements.
---
llvm/include/llvm/ADT/SmallPtrSet.h | 8 ++++++-
llvm/unittests/ADT/SmallPtrSetTest.cpp | 31 +++++++++++++-------------
2 files changed, 23 insertions(+), 16 deletions(-)
diff --git a/llvm/include/llvm/ADT/SmallPtrSet.h b/llvm/include/llvm/ADT/SmallPtrSet.h
index 672c296dde9dc..6a24520041c99 100644
--- a/llvm/include/llvm/ADT/SmallPtrSet.h
+++ b/llvm/include/llvm/ADT/SmallPtrSet.h
@@ -93,6 +93,7 @@ class SmallPtrSetImplBase : public DebugEpochBase {
[[nodiscard]] bool empty() const { return size() == 0; }
size_type size() const { return NumNonEmpty - NumTombstones; }
+ size_type capacity() const { return CurArraySize; }
void clear() {
incrementEpoch();
@@ -111,17 +112,22 @@ class SmallPtrSetImplBase : public DebugEpochBase {
void reserve(size_type NumEntries) {
incrementEpoch();
+ // Do nothing if we're given zero as a reservation size.
+ if (!NumEntries)
+ return;
// No need to expand if we're small and NumEntries will fit in the space.
if (isSmall() && NumEntries <= CurArraySize)
return;
// insert_imp_big will reallocate if stores is more than 75% full, on the
// /final/ insertion.
- if (!isSmall() && ((NumEntries-1) * 4) < (CurArraySize * 3))
+ if (!isSmall() && ((NumEntries - 1) * 4) < (CurArraySize * 3))
return;
// We must Grow -- find the size where we'd be 75% full, then round up to
// the next power of two.
size_type NewSize = NumEntries + (NumEntries / 3);
NewSize = 1 << (Log2_32_Ceil(NewSize) + 1);
+ // Like insert_imp_big, always allocate at least 128 elements.
+ NewSize = std::max(128u, NewSize);
Grow(NewSize);
}
diff --git a/llvm/unittests/ADT/SmallPtrSetTest.cpp b/llvm/unittests/ADT/SmallPtrSetTest.cpp
index 9894198e85d06..09961e0d20b30 100644
--- a/llvm/unittests/ADT/SmallPtrSetTest.cpp
+++ b/llvm/unittests/ADT/SmallPtrSetTest.cpp
@@ -14,9 +14,11 @@
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/PointerLikeTypeTraits.h"
+#include "gmock/gmock.h"
#include "gtest/gtest.h"
using namespace llvm;
+using testing::UnorderedElementsAre;
TEST(SmallPtrSetTest, Assignment) {
int buf[8];
@@ -418,6 +420,7 @@ TEST(SmallPtrSetTest, Reserve) {
// We shouldn't reallocate when this happens.
Set.reserve(4);
+ EXPECT_EQ(Set.capacity(), 4u);
Set.insert(&Vals[1]);
Set.insert(&Vals[2]);
@@ -425,19 +428,16 @@ TEST(SmallPtrSetTest, Reserve) {
// We shouldn't reallocate this time either.
Set.reserve(4);
+ EXPECT_EQ(Set.capacity(), 4u);
EXPECT_EQ(Set.size(), 4u);
- EXPECT_TRUE(Set.contains(&Vals[0]));
- EXPECT_TRUE(Set.contains(&Vals[1]));
- EXPECT_TRUE(Set.contains(&Vals[2]));
- EXPECT_TRUE(Set.contains(&Vals[3]));
+ EXPECT_THAT(Set, UnorderedElementsAre(&Vals[0], &Vals[1], &Vals[2], &Vals[3]));
- // Reserving further should lead to a reallocation.
+ // Reserving further should lead to a reallocation. And matching the existing
+ // insertion approach, we immediately allocate up to 128 elements.
Set.reserve(5);
+ EXPECT_EQ(Set.capacity(), 128u);
EXPECT_EQ(Set.size(), 4u);
- EXPECT_TRUE(Set.contains(&Vals[0]));
- EXPECT_TRUE(Set.contains(&Vals[1]));
- EXPECT_TRUE(Set.contains(&Vals[2]));
- EXPECT_TRUE(Set.contains(&Vals[3]));
+ EXPECT_THAT(Set, UnorderedElementsAre(&Vals[0], &Vals[1], &Vals[2], &Vals[3]));
// And we should be able to insert another two or three elements without
// reallocating.
@@ -446,11 +446,12 @@ TEST(SmallPtrSetTest, Reserve) {
// Calling a smaller reserve size should have no effect.
Set.reserve(1);
+ EXPECT_EQ(Set.capacity(), 128u);
EXPECT_EQ(Set.size(), 6u);
- EXPECT_TRUE(Set.contains(&Vals[0]));
- EXPECT_TRUE(Set.contains(&Vals[1]));
- EXPECT_TRUE(Set.contains(&Vals[2]));
- EXPECT_TRUE(Set.contains(&Vals[3]));
- EXPECT_TRUE(Set.contains(&Vals[4]));
- EXPECT_TRUE(Set.contains(&Vals[5]));
+
+ // Reserving zero should have no effect either.
+ Set.reserve(0);
+ EXPECT_EQ(Set.capacity(), 128u);
+ EXPECT_EQ(Set.size(), 6u);
+ EXPECT_THAT(Set, UnorderedElementsAre(&Vals[0], &Vals[1], &Vals[2], &Vals[3], &Vals[4], &Vals[5]));
}
More information about the llvm-commits
mailing list