[llvm] [ADT] Simplify SparseBitVectorIterator. NFCI. (PR #143885)

Jay Foad via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 13 09:04:14 PDT 2025


https://github.com/jayfoad updated https://github.com/llvm/llvm-project/pull/143885

>From b5e202767931f2e331d34380117cd5b4225b937c Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Wed, 11 Jun 2025 20:39:01 +0100
Subject: [PATCH 1/3] [ADT] Simplify SparseBitVectorIterator

The old implementation admitted to being "a lot uglier than it would be,
in order to be efficient". The new implementation aims to gain
efficiency through simplicity.
---
 llvm/include/llvm/ADT/SparseBitVector.h | 128 +++++-------------------
 1 file changed, 24 insertions(+), 104 deletions(-)

diff --git a/llvm/include/llvm/ADT/SparseBitVector.h b/llvm/include/llvm/ADT/SparseBitVector.h
index 7151af6146e6e..d3ac388871d9d 100644
--- a/llvm/include/llvm/ADT/SparseBitVector.h
+++ b/llvm/include/llvm/ADT/SparseBitVector.h
@@ -145,19 +145,14 @@ template <unsigned ElementSize = 128> struct SparseBitVectorElement {
 
   /// find_next - Returns the index of the next set bit starting from the
   /// "Curr" bit. Returns -1 if the next set bit is not found.
-  int find_next(unsigned Curr) const {
-    if (Curr >= BITS_PER_ELEMENT)
-      return -1;
+  int find_next(int Curr) const {
+    assert(Curr >= 0 && Curr < BITS_PER_ELEMENT);
 
     unsigned WordPos = Curr / BITWORD_SIZE;
     unsigned BitPos = Curr % BITWORD_SIZE;
-    BitWord Copy = Bits[WordPos];
-    assert(WordPos <= BITWORDS_PER_ELEMENT
-           && "Word Position outside of element");
 
     // Mask off previous bits.
-    Copy &= ~0UL << BitPos;
-
+    BitWord Copy = Bits[WordPos] & ~1UL << BitPos;
     if (Copy != 0)
       return WordPos * BITWORD_SIZE + llvm::countr_zero(Copy);
 
@@ -314,101 +309,34 @@ class SparseBitVector {
     return FindLowerBoundImpl(ElementIndex);
   }
 
-  // Iterator to walk set bits in the bitmap.  This iterator is a lot uglier
-  // than it would be, in order to be efficient.
+  // Iterator to walk set bits in the bitvector.
   class SparseBitVectorIterator {
   private:
-    bool AtEnd;
-
-    const SparseBitVector<ElementSize> *BitVector = nullptr;
+    // Current bit number within the current element, or -1 if we are at the
+    // end.
+    int BitPos = -1;
 
-    // Current element inside of bitmap.
+    // Iterators to the current element and the end of the bitvector. These are
+    // only valid when BitPos >= 0.
     ElementListConstIter Iter;
-
-    // Current bit number inside of our bitmap.
-    unsigned BitNumber;
-
-    // Current word number inside of our element.
-    unsigned WordNumber;
-
-    // Current bits from the element.
-    typename SparseBitVectorElement<ElementSize>::BitWord Bits;
-
-    // Move our iterator to the first non-zero bit in the bitmap.
-    void AdvanceToFirstNonZero() {
-      if (AtEnd)
-        return;
-      if (BitVector->Elements.empty()) {
-        AtEnd = true;
-        return;
-      }
-      Iter = BitVector->Elements.begin();
-      BitNumber = Iter->index() * ElementSize;
-      unsigned BitPos = Iter->find_first();
-      BitNumber += BitPos;
-      WordNumber = (BitNumber % ElementSize) / BITWORD_SIZE;
-      Bits = Iter->word(WordNumber);
-      Bits >>= BitPos % BITWORD_SIZE;
-    }
-
-    // Move our iterator to the next non-zero bit.
-    void AdvanceToNextNonZero() {
-      if (AtEnd)
-        return;
-
-      while (Bits && !(Bits & 1)) {
-        Bits >>= 1;
-        BitNumber += 1;
-      }
-
-      // See if we ran out of Bits in this word.
-      if (!Bits) {
-        int NextSetBitNumber = Iter->find_next(BitNumber % ElementSize) ;
-        // If we ran out of set bits in this element, move to next element.
-        if (NextSetBitNumber == -1 || (BitNumber % ElementSize == 0)) {
-          ++Iter;
-          WordNumber = 0;
-
-          // We may run out of elements in the bitmap.
-          if (Iter == BitVector->Elements.end()) {
-            AtEnd = true;
-            return;
-          }
-          // Set up for next non-zero word in bitmap.
-          BitNumber = Iter->index() * ElementSize;
-          NextSetBitNumber = Iter->find_first();
-          BitNumber += NextSetBitNumber;
-          WordNumber = (BitNumber % ElementSize) / BITWORD_SIZE;
-          Bits = Iter->word(WordNumber);
-          Bits >>= NextSetBitNumber % BITWORD_SIZE;
-        } else {
-          WordNumber = (NextSetBitNumber % ElementSize) / BITWORD_SIZE;
-          Bits = Iter->word(WordNumber);
-          Bits >>= NextSetBitNumber % BITWORD_SIZE;
-          BitNumber = Iter->index() * ElementSize;
-          BitNumber += NextSetBitNumber;
-        }
-      }
-    }
+    ElementListConstIter End;
 
   public:
     SparseBitVectorIterator() = default;
 
-    SparseBitVectorIterator(const SparseBitVector<ElementSize> *RHS,
-                            bool end = false):BitVector(RHS) {
-      Iter = BitVector->Elements.begin();
-      BitNumber = 0;
-      Bits = 0;
-      WordNumber = ~0;
-      AtEnd = end;
-      AdvanceToFirstNonZero();
+    SparseBitVectorIterator(const ElementList &Elements) {
+      if (Elements.empty())
+        return;
+      Iter = Elements.begin();
+      End = Elements.end();
+      BitPos = Iter->find_first();
     }
 
     // Preincrement.
     inline SparseBitVectorIterator& operator++() {
-      ++BitNumber;
-      Bits >>= 1;
-      AdvanceToNextNonZero();
+      BitPos = Iter->find_next(BitPos);
+      if (BitPos < 0 && ++Iter != End)
+        BitPos = Iter->find_first();
       return *this;
     }
 
@@ -421,16 +349,12 @@ class SparseBitVector {
 
     // Return the current set bit number.
     unsigned operator*() const {
-      return BitNumber;
+      assert(BitPos >= 0);
+      return Iter->index() * ElementSize + BitPos;
     }
 
     bool operator==(const SparseBitVectorIterator &RHS) const {
-      // If they are both at the end, ignore the rest of the fields.
-      if (AtEnd && RHS.AtEnd)
-        return true;
-      // Otherwise they are the same if they have the same bit number and
-      // bitmap.
-      return AtEnd == RHS.AtEnd && RHS.BitNumber == BitNumber;
+      return BitPos == RHS.BitPos;
     }
 
     bool operator!=(const SparseBitVectorIterator &RHS) const {
@@ -807,13 +731,9 @@ class SparseBitVector {
     return BitCount;
   }
 
-  iterator begin() const {
-    return iterator(this);
-  }
+  iterator begin() const { return iterator(Elements); }
 
-  iterator end() const {
-    return iterator(this, true);
-  }
+  iterator end() const { return iterator(); }
 };
 
 // Convenience functions to allow Or and And without dereferencing in the user

>From 1063eca4cc0270b8897c050c3d18838fde9c78d5 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Thu, 12 Jun 2025 14:17:11 +0100
Subject: [PATCH 2/3] benchmark

---
 llvm/benchmarks/CMakeLists.txt      |  2 +-
 llvm/benchmarks/SparseBitVector.cpp | 29 +++++++++++++++++++++++++++++
 2 files changed, 30 insertions(+), 1 deletion(-)
 create mode 100644 llvm/benchmarks/SparseBitVector.cpp

diff --git a/llvm/benchmarks/CMakeLists.txt b/llvm/benchmarks/CMakeLists.txt
index 1078efa55f497..e362da3558349 100644
--- a/llvm/benchmarks/CMakeLists.txt
+++ b/llvm/benchmarks/CMakeLists.txt
@@ -10,4 +10,4 @@ add_benchmark(GetIntrinsicForClangBuiltin GetIntrinsicForClangBuiltin.cpp PARTIA
 add_benchmark(FormatVariadicBM FormatVariadicBM.cpp PARTIAL_SOURCES_INTENDED)
 add_benchmark(GetIntrinsicInfoTableEntriesBM GetIntrinsicInfoTableEntriesBM.cpp PARTIAL_SOURCES_INTENDED)
 add_benchmark(SandboxIRBench SandboxIRBench.cpp PARTIAL_SOURCES_INTENDED)
-
+add_benchmark(SparseBitVector SparseBitVector.cpp PARTIAL_SOURCES_INTENDED)
diff --git a/llvm/benchmarks/SparseBitVector.cpp b/llvm/benchmarks/SparseBitVector.cpp
new file mode 100644
index 0000000000000..4887ce673c280
--- /dev/null
+++ b/llvm/benchmarks/SparseBitVector.cpp
@@ -0,0 +1,29 @@
+#include "llvm/ADT/SparseBitVector.h"
+#include "benchmark/benchmark.h"
+using namespace llvm;
+
+static unsigned xorshift(unsigned State) {
+  State ^= State << 13;
+  State ^= State >> 17;
+  State ^= State << 5;
+  return State;
+}
+
+static void BM_SparseBitVectorIterator(benchmark::State &State) {
+  SparseBitVector<> BV;
+
+  unsigned Prev = 0xcafebabe;
+  for (unsigned I = 0, E = State.range(0); I != E; ++I)
+    BV.set((Prev = xorshift(Prev)) % 10000);
+
+  for (auto _ : State) {
+    unsigned Total = 0;
+    for (auto I = BV.begin(), E = BV.end(); I != E; ++I)
+      Total += *I;
+    benchmark::DoNotOptimize(Total);
+  }
+}
+
+BENCHMARK(BM_SparseBitVectorIterator)->Arg(10)->Arg(100)->Arg(1000)->Arg(10000);
+
+BENCHMARK_MAIN();

>From defcb19487103ec11d30f9f8d848eaf6d80aeb2f Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Fri, 13 Jun 2025 17:03:11 +0100
Subject: [PATCH 3/3] Some micro-optimizations

---
 llvm/include/llvm/ADT/SparseBitVector.h | 36 ++++++++++++++-----------
 1 file changed, 21 insertions(+), 15 deletions(-)

diff --git a/llvm/include/llvm/ADT/SparseBitVector.h b/llvm/include/llvm/ADT/SparseBitVector.h
index d3ac388871d9d..d3c488fa3619f 100644
--- a/llvm/include/llvm/ADT/SparseBitVector.h
+++ b/llvm/include/llvm/ADT/SparseBitVector.h
@@ -143,24 +143,31 @@ template <unsigned ElementSize = 128> struct SparseBitVectorElement {
     llvm_unreachable("Illegal empty element");
   }
 
-  /// find_next - Returns the index of the next set bit starting from the
-  /// "Curr" bit. Returns -1 if the next set bit is not found.
-  int find_next(int Curr) const {
-    assert(Curr >= 0 && Curr < BITS_PER_ELEMENT);
+  /// find_next - Sets Curr to the index of the next set bit starting after the
+  /// Curr bit and returns false, or sets Curr to ~0U and returns true if no
+  /// next bit is found.
+  bool find_next(unsigned &Curr) const {
+    assert(Curr < BITS_PER_ELEMENT);
 
     unsigned WordPos = Curr / BITWORD_SIZE;
     unsigned BitPos = Curr % BITWORD_SIZE;
 
     // Mask off previous bits.
     BitWord Copy = Bits[WordPos] & ~1UL << BitPos;
-    if (Copy != 0)
-      return WordPos * BITWORD_SIZE + llvm::countr_zero(Copy);
+    if (Copy != 0) {
+      Curr = WordPos * BITWORD_SIZE + llvm::countr_zero(Copy);
+      return false;
+    }
 
     // Check subsequent words.
-    for (unsigned i = WordPos+1; i < BITWORDS_PER_ELEMENT; ++i)
-      if (Bits[i] != 0)
-        return i * BITWORD_SIZE + llvm::countr_zero(Bits[i]);
-    return -1;
+    for (unsigned i = WordPos + 1; i < BITWORDS_PER_ELEMENT; ++i) {
+      if (Bits[i] != 0) {
+        Curr = i * BITWORD_SIZE + llvm::countr_zero(Bits[i]);
+        return false;
+      }
+    }
+    Curr = ~0U;
+    return true;
   }
 
   // Union this element with RHS and return true if this one changed.
@@ -312,9 +319,9 @@ class SparseBitVector {
   // Iterator to walk set bits in the bitvector.
   class SparseBitVectorIterator {
   private:
-    // Current bit number within the current element, or -1 if we are at the
+    // Current bit number within the current element, or ~0U if we are at the
     // end.
-    int BitPos = -1;
+    unsigned BitPos = ~0U;
 
     // Iterators to the current element and the end of the bitvector. These are
     // only valid when BitPos >= 0.
@@ -334,8 +341,7 @@ class SparseBitVector {
 
     // Preincrement.
     inline SparseBitVectorIterator& operator++() {
-      BitPos = Iter->find_next(BitPos);
-      if (BitPos < 0 && ++Iter != End)
+      if (Iter->find_next(BitPos) && ++Iter != End)
         BitPos = Iter->find_first();
       return *this;
     }
@@ -349,7 +355,7 @@ class SparseBitVector {
 
     // Return the current set bit number.
     unsigned operator*() const {
-      assert(BitPos >= 0);
+      assert(BitPos != ~0U);
       return Iter->index() * ElementSize + BitPos;
     }
 



More information about the llvm-commits mailing list