[llvm] 096b8a8 - [ADT] Fix an indexing bug in PackedVector (#158785)

Tue Sep 16 09:07:32 PDT 2025

Author: Kazu Hirata
Date: 2025-09-16T09:07:28-07:00
New Revision: 096b8a8b89f3234972770e0581e059073d106b7c

URL: https://github.com/llvm/llvm-project/commit/096b8a8b89f3234972770e0581e059073d106b7c
DIFF: https://github.com/llvm/llvm-project/commit/096b8a8b89f3234972770e0581e059073d106b7c.diff

LOG: [ADT] Fix an indexing bug in PackedVector (#158785)

PackedVector is like std::vector<int> except that we can store small
elements (e.g. 2-bit elements) in a packed manner using a BitVector as
the underlying storage.

The problem is that for bit size 3 and beyond, the calculation of
indices into the underlying BitVector is not correct.  For example,
around line 50, we see a "for" loop to retrieve an unsigned integer
value:

  for (unsigned i = 0; i != BitNum-1; ++i)
    val = T(val | ((Bits[(Idx << (BitNum-1)) + i] ? 1UL : 0UL) << i));

Suppose that BitNum is 4 (that is, 4-bit item).  Here is the mapping
between the PackedVector index and the corresponding BitVector
indices.

  Idx 0:  0, 1, 2, 3
  Idx 1:  8, 9, 10, 11
  Idx 2:  16, 17, 18, 19

That is, we use 4 bits out of every 8 bits.  This is because the index
calculation uses "<<".  The index should really be Idx * BitNum + i.
FWIW, all the methods in PackedVector consistently use the shift-based
index calculation, so the user would never encounter a bug except
possibly as excessive storage use.

This patch fixes the index calculation.  Now, in size(), I didn't want
to do integer division:

  return Bits.size() / BitNum;

so this patch adds a separate variable NumElements to keep track of
the number of elements.

The unit test checks for the expected size of the underlying
BitVector.

Added: 
    

Modified: 
    llvm/include/llvm/ADT/PackedVector.h
    llvm/unittests/ADT/PackedVectorTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/ADT/PackedVector.h b/llvm/include/llvm/ADT/PackedVector.h
index b6bb6a4738067..1146cc4bd6d23 100644

--- a/llvm/include/llvm/ADT/PackedVector.h
+++ b/llvm/include/llvm/ADT/PackedVector.h
@@ -31,14 +31,14 @@ class PackedVectorBase<T, BitNum, BitVectorTy, false> {
   static T getValue(const BitVectorTy &Bits, unsigned Idx) {
     T val = T();
     for (unsigned i = 0; i != BitNum; ++i)
-      val = T(val | ((Bits[(Idx << (BitNum-1)) + i] ? 1UL : 0UL) << i));
+      val = T(val | ((Bits[(Idx * BitNum) + i] ? 1UL : 0UL) << i));
     return val;
   }
 
   static void setValue(BitVectorTy &Bits, unsigned Idx, T val) {
     assert((val >> BitNum) == 0 && "value is too big");
     for (unsigned i = 0; i != BitNum; ++i)
-      Bits[(Idx << (BitNum-1)) + i] = val & (T(1) << i);
+      Bits[(Idx * BitNum) + i] = val & (T(1) << i);
   }
 };
 
@@ -48,8 +48,8 @@ class PackedVectorBase<T, BitNum, BitVectorTy, true> {
   static T getValue(const BitVectorTy &Bits, unsigned Idx) {
     T val = T();
     for (unsigned i = 0; i != BitNum-1; ++i)
-      val = T(val | ((Bits[(Idx << (BitNum-1)) + i] ? 1UL : 0UL) << i));
-    if (Bits[(Idx << (BitNum-1)) + BitNum-1])
+      val = T(val | ((Bits[(Idx * BitNum) + i] ? 1UL : 0UL) << i));
+    if (Bits[(Idx * BitNum) + BitNum - 1])
       val = ~val;
     return val;
   }
@@ -57,11 +57,11 @@ class PackedVectorBase<T, BitNum, BitVectorTy, true> {
   static void setValue(BitVectorTy &Bits, unsigned Idx, T val) {
     if (val < 0) {
       val = ~val;
-      Bits.set((Idx << (BitNum-1)) + BitNum-1);
+      Bits.set((Idx * BitNum) + BitNum - 1);
     }
     assert((val >> (BitNum-1)) == 0 && "value is too big");
     for (unsigned i = 0; i != BitNum-1; ++i)
-      Bits[(Idx << (BitNum-1)) + i] = val & (T(1) << i);
+      Bits[(Idx * BitNum) + i] = val & (T(1) << i);
   }
 };
 
@@ -76,6 +76,10 @@ template <typename T, unsigned BitNum, typename BitVectorTy = BitVector>
 class PackedVector : public PackedVectorBase<T, BitNum, BitVectorTy,
                                             std::numeric_limits<T>::is_signed> {
   BitVectorTy Bits;
+  // Keep track of the number of elements on our own.
+  // We always maintain Bits.size() == NumElements * BitNum.
+  // Used to avoid an integer division in size().
+  unsigned NumElements = 0;
   using base = PackedVectorBase<T, BitNum, BitVectorTy,
                                 std::numeric_limits<T>::is_signed>;
 
@@ -99,17 +103,24 @@ class PackedVector : public PackedVectorBase<T, BitNum, BitVectorTy,
   };
 
   PackedVector() = default;
-  explicit PackedVector(unsigned size) : Bits(size << (BitNum-1)) {}
+  explicit PackedVector(unsigned size)
+      : Bits(size * BitNum), NumElements(size) {}
 
-  bool empty() const { return Bits.empty(); }
+  bool empty() const { return NumElements == 0; }
 
-  unsigned size() const { return Bits.size() >> (BitNum - 1); }
+  unsigned size() const { return NumElements; }
 
-  void clear() { Bits.clear(); }
+  void clear() {
+    Bits.clear();
+    NumElements = 0;
+  }
 
-  void resize(unsigned N) { Bits.resize(N << (BitNum - 1)); }
+  void resize(unsigned N) {
+    Bits.resize(N * BitNum);
+    NumElements = N;
+  }
 
-  void reserve(unsigned N) { Bits.reserve(N << (BitNum-1)); }
+  void reserve(unsigned N) { Bits.reserve(N * BitNum); }
 
   PackedVector &reset() {
     Bits.reset();

diff  --git a/llvm/unittests/ADT/PackedVectorTest.cpp b/llvm/unittests/ADT/PackedVectorTest.cpp
index b4e017971efac..30fc7c0b6d07f 100644
--- a/llvm/unittests/ADT/PackedVectorTest.cpp
+++ b/llvm/unittests/ADT/PackedVectorTest.cpp
@@ -61,6 +61,16 @@ TEST(PackedVectorTest, Operation) {
   EXPECT_EQ(3U, Vec[1]);
 }
 
+TEST(PackedVectorTest, RawBitsSize) {
+  PackedVector<unsigned, 3> Vec;
+  EXPECT_EQ(0u, Vec.raw_bits().size());
+  Vec.push_back(2);
+  Vec.push_back(0);
+  Vec.push_back(1);
+  Vec.push_back(3);
+  EXPECT_EQ(12u, Vec.raw_bits().size());
+}
+
 #ifdef EXPECT_DEBUG_DEATH
 
 TEST(PackedVectorTest, UnsignedValues) {