[llvm-commits] [llvm] r147294 - in /llvm/trunk: include/llvm/ADT/StringMap.h lib/Support/StringMap.cpp

Tue Dec 27 12:35:07 PST 2011

Author: d0k
Date: Tue Dec 27 14:35:07 2011
New Revision: 147294

URL: http://llvm.org/viewvc/llvm-project?rev=147294&view=rev
Log:
Switch StringMap from an array of structures to a structure of arrays.

- -25% memory usage of the main table on x86_64 (was wasted in struct padding).
- no significant performance change.

Modified:
    llvm/trunk/include/llvm/ADT/StringMap.h
    llvm/trunk/lib/Support/StringMap.cpp

Modified: llvm/trunk/include/llvm/ADT/StringMap.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/StringMap.h?rev=147294&r1=147293&r2=147294&view=diff
==============================================================================

--- llvm/trunk/include/llvm/ADT/StringMap.h (original)
+++ llvm/trunk/include/llvm/ADT/StringMap.h Tue Dec 27 14:35:07 2011
@@ -51,20 +51,11 @@
 /// StringMapImpl - This is the base class of StringMap that is shared among
 /// all of its instantiations.
 class StringMapImpl {
-public:
-  /// ItemBucket - The hash table consists of an array of these.  If Item is
-  /// non-null, this is an extant entry, otherwise, it is a hole.
-  struct ItemBucket {
-    /// FullHashValue - This remembers the full hash value of the key for
-    /// easy scanning.
-    unsigned FullHashValue;
-
-    /// Item - This is a pointer to the actual item object.
-    StringMapEntryBase *Item;
-  };
-
 protected:
-  ItemBucket *TheTable;
+  // Array of NumBuckets pointers to entries, null pointers are holes.
+  // TheTable[NumBuckets] contains a sentinel value for easy iteration. Follwed
+  // by an array of the actual hash values as unsigned integers.
+  StringMapEntryBase **TheTable;
   unsigned NumBuckets;
   unsigned NumItems;
   unsigned NumTombstones;
@@ -320,13 +311,13 @@
   /// insert it and return true.
   bool insert(MapEntryTy *KeyValue) {
     unsigned BucketNo = LookupBucketFor(KeyValue->getKey());
-    ItemBucket &Bucket = TheTable[BucketNo];
-    if (Bucket.Item && Bucket.Item != getTombstoneVal())
+    StringMapEntryBase *&Bucket = TheTable[BucketNo];
+    if (Bucket && Bucket != getTombstoneVal())
       return false;  // Already exists in map.
 
-    if (Bucket.Item == getTombstoneVal())
+    if (Bucket == getTombstoneVal())
       --NumTombstones;
-    Bucket.Item = KeyValue;
+    Bucket = KeyValue;
     ++NumItems;
     assert(NumItems + NumTombstones <= NumBuckets);
 
@@ -340,10 +331,11 @@
 
     // Zap all values, resetting the keys back to non-present (not tombstone),
     // which is safe because we're removing all elements.
-    for (ItemBucket *I = TheTable, *E = TheTable+NumBuckets; I != E; ++I) {
-      if (I->Item && I->Item != getTombstoneVal()) {
-        static_cast<MapEntryTy*>(I->Item)->Destroy(Allocator);
-        I->Item = 0;
+    for (unsigned I = 0, E = NumBuckets; I != E; ++I) {
+      StringMapEntryBase *&Bucket = TheTable[I];
+      if (Bucket && Bucket != getTombstoneVal()) {
+        static_cast<MapEntryTy*>(Bucket)->Destroy(Allocator);
+        Bucket = 0;
       }
     }
 
@@ -357,21 +349,21 @@
   template <typename InitTy>
   MapEntryTy &GetOrCreateValue(StringRef Key, InitTy Val) {
     unsigned BucketNo = LookupBucketFor(Key);
-    ItemBucket &Bucket = TheTable[BucketNo];
-    if (Bucket.Item && Bucket.Item != getTombstoneVal())
-      return *static_cast<MapEntryTy*>(Bucket.Item);
+    StringMapEntryBase *&Bucket = TheTable[BucketNo];
+    if (Bucket && Bucket != getTombstoneVal())
+      return *static_cast<MapEntryTy*>(Bucket);
 
     MapEntryTy *NewItem =
       MapEntryTy::Create(Key.begin(), Key.end(), Allocator, Val);
 
-    if (Bucket.Item == getTombstoneVal())
+    if (Bucket == getTombstoneVal())
       --NumTombstones;
     ++NumItems;
     assert(NumItems + NumTombstones <= NumBuckets);
 
     // Fill in the bucket for the hash table.  The FullHashValue was already
     // filled in by LookupBucketFor.
-    Bucket.Item = NewItem;
+    Bucket = NewItem;
 
     RehashTable();
     return *NewItem;
@@ -410,21 +402,21 @@
 template<typename ValueTy>
 class StringMapConstIterator {
 protected:
-  StringMapImpl::ItemBucket *Ptr;
+  StringMapEntryBase **Ptr;
 public:
   typedef StringMapEntry<ValueTy> value_type;
 
-  explicit StringMapConstIterator(StringMapImpl::ItemBucket *Bucket,
+  explicit StringMapConstIterator(StringMapEntryBase **Bucket,
                                   bool NoAdvance = false)
   : Ptr(Bucket) {
     if (!NoAdvance) AdvancePastEmptyBuckets();
   }
 
   const value_type &operator*() const {
-    return *static_cast<StringMapEntry<ValueTy>*>(Ptr->Item);
+    return *static_cast<StringMapEntry<ValueTy>*>(*Ptr);
   }
   const value_type *operator->() const {
-    return static_cast<StringMapEntry<ValueTy>*>(Ptr->Item);
+    return static_cast<StringMapEntry<ValueTy>*>(*Ptr);
   }
 
   bool operator==(const StringMapConstIterator &RHS) const {
@@ -445,7 +437,7 @@
 
 private:
   void AdvancePastEmptyBuckets() {
-    while (Ptr->Item == 0 || Ptr->Item == StringMapImpl::getTombstoneVal())
+    while (*Ptr == 0 || *Ptr == StringMapImpl::getTombstoneVal())
       ++Ptr;
   }
 };
@@ -453,15 +445,15 @@
 template<typename ValueTy>
 class StringMapIterator : public StringMapConstIterator<ValueTy> {
 public:
-  explicit StringMapIterator(StringMapImpl::ItemBucket *Bucket,
+  explicit StringMapIterator(StringMapEntryBase **Bucket,
                              bool NoAdvance = false)
     : StringMapConstIterator<ValueTy>(Bucket, NoAdvance) {
   }
   StringMapEntry<ValueTy> &operator*() const {
-    return *static_cast<StringMapEntry<ValueTy>*>(this->Ptr->Item);
+    return *static_cast<StringMapEntry<ValueTy>*>(*this->Ptr);
   }
   StringMapEntry<ValueTy> *operator->() const {
-    return static_cast<StringMapEntry<ValueTy>*>(this->Ptr->Item);
+    return static_cast<StringMapEntry<ValueTy>*>(*this->Ptr);
   }
 };
 

Modified: llvm/trunk/lib/Support/StringMap.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/StringMap.cpp?rev=147294&r1=147293&r2=147294&view=diff
==============================================================================
--- llvm/trunk/lib/Support/StringMap.cpp (original)
+++ llvm/trunk/lib/Support/StringMap.cpp Tue Dec 27 14:35:07 2011
@@ -39,11 +39,13 @@
   NumItems = 0;
   NumTombstones = 0;
   
-  TheTable = (ItemBucket*)calloc(NumBuckets+1, sizeof(ItemBucket));
-  
+  TheTable = (StringMapEntryBase **)calloc(NumBuckets+1,
+                                           sizeof(StringMapEntryBase **) +
+                                           sizeof(unsigned));
+
   // Allocate one extra bucket, set it to look filled so the iterators stop at
   // end.
-  TheTable[NumBuckets].Item = (StringMapEntryBase*)2;
+  TheTable[NumBuckets] = (StringMapEntryBase*)2;
 }
 
 
@@ -60,29 +62,29 @@
   }
   unsigned FullHashValue = HashString(Name);
   unsigned BucketNo = FullHashValue & (HTSize-1);
-  
+  unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
+
   unsigned ProbeAmt = 1;
   int FirstTombstone = -1;
   while (1) {
-    ItemBucket &Bucket = TheTable[BucketNo];
-    StringMapEntryBase *BucketItem = Bucket.Item;
+    StringMapEntryBase *BucketItem = TheTable[BucketNo];
     // If we found an empty bucket, this key isn't in the table yet, return it.
     if (BucketItem == 0) {
       // If we found a tombstone, we want to reuse the tombstone instead of an
       // empty bucket.  This reduces probing.
       if (FirstTombstone != -1) {
-        TheTable[FirstTombstone].FullHashValue = FullHashValue;
+        HashTable[FirstTombstone] = FullHashValue;
         return FirstTombstone;
       }
       
-      Bucket.FullHashValue = FullHashValue;
+      HashTable[BucketNo] = FullHashValue;
       return BucketNo;
     }
     
     if (BucketItem == getTombstoneVal()) {
       // Skip over tombstones.  However, remember the first one we see.
       if (FirstTombstone == -1) FirstTombstone = BucketNo;
-    } else if (Bucket.FullHashValue == FullHashValue) {
+    } else if (HashTable[BucketNo] == FullHashValue) {
       // If the full hash value matches, check deeply for a match.  The common
       // case here is that we are only looking at the buckets (for item info
       // being non-null and for the full hash value) not at the items.  This
@@ -115,18 +117,18 @@
   if (HTSize == 0) return -1;  // Really empty table?
   unsigned FullHashValue = HashString(Key);
   unsigned BucketNo = FullHashValue & (HTSize-1);
-  
+  unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
+
   unsigned ProbeAmt = 1;
   while (1) {
-    ItemBucket &Bucket = TheTable[BucketNo];
-    StringMapEntryBase *BucketItem = Bucket.Item;
+    StringMapEntryBase *BucketItem = TheTable[BucketNo];
     // If we found an empty bucket, this key isn't in the table yet, return.
     if (BucketItem == 0)
       return -1;
     
     if (BucketItem == getTombstoneVal()) {
       // Ignore tombstones.
-    } else if (Bucket.FullHashValue == FullHashValue) {
+    } else if (HashTable[BucketNo] == FullHashValue) {
       // If the full hash value matches, check deeply for a match.  The common
       // case here is that we are only looking at the buckets (for item info
       // being non-null and for the full hash value) not at the items.  This
@@ -165,8 +167,8 @@
   int Bucket = FindKey(Key);
   if (Bucket == -1) return 0;
   
-  StringMapEntryBase *Result = TheTable[Bucket].Item;
-  TheTable[Bucket].Item = getTombstoneVal();
+  StringMapEntryBase *Result = TheTable[Bucket];
+  TheTable[Bucket] = getTombstoneVal();
   --NumItems;
   ++NumTombstones;
   assert(NumItems + NumTombstones <= NumBuckets);
@@ -180,6 +182,7 @@
 /// the appropriate mod-of-hashtable-size.
 void StringMapImpl::RehashTable() {
   unsigned NewSize;
+  unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
 
   // If the hash table is now more than 3/4 full, or if fewer than 1/8 of
   // the buckets are empty (meaning that many are filled with tombstones),
@@ -194,19 +197,23 @@
 
   // Allocate one extra bucket which will always be non-empty.  This allows the
   // iterators to stop at end.
-  ItemBucket *NewTableArray =(ItemBucket*)calloc(NewSize+1, sizeof(ItemBucket));
-  NewTableArray[NewSize].Item = (StringMapEntryBase*)2;
-  
+  StringMapEntryBase **NewTableArray =
+    (StringMapEntryBase **)calloc(NewSize+1, sizeof(StringMapEntryBase *) +
+                                             sizeof(unsigned));
+  unsigned *NewHashArray = (unsigned *)(NewTableArray + NewSize + 1);
+  NewTableArray[NewSize] = (StringMapEntryBase*)2;
+
   // Rehash all the items into their new buckets.  Luckily :) we already have
   // the hash values available, so we don't have to rehash any strings.
-  for (ItemBucket *IB = TheTable, *E = TheTable+NumBuckets; IB != E; ++IB) {
-    if (IB->Item && IB->Item != getTombstoneVal()) {
+  for (unsigned I = 0, E = NumBuckets; I != E; ++I) {
+    StringMapEntryBase *Bucket = TheTable[I];
+    if (Bucket && Bucket != getTombstoneVal()) {
       // Fast case, bucket available.
-      unsigned FullHash = IB->FullHashValue;
+      unsigned FullHash = HashTable[I];
       unsigned NewBucket = FullHash & (NewSize-1);
-      if (NewTableArray[NewBucket].Item == 0) {
-        NewTableArray[FullHash & (NewSize-1)].Item = IB->Item;
-        NewTableArray[FullHash & (NewSize-1)].FullHashValue = FullHash;
+      if (NewTableArray[NewBucket] == 0) {
+        NewTableArray[FullHash & (NewSize-1)] = Bucket;
+        NewHashArray[FullHash & (NewSize-1)] = FullHash;
         continue;
       }
       
@@ -214,11 +221,11 @@
       unsigned ProbeSize = 1;
       do {
         NewBucket = (NewBucket + ProbeSize++) & (NewSize-1);
-      } while (NewTableArray[NewBucket].Item);
+      } while (NewTableArray[NewBucket]);
       
       // Finally found a slot.  Fill it in.
-      NewTableArray[NewBucket].Item = IB->Item;
-      NewTableArray[NewBucket].FullHashValue = FullHash;
+      NewTableArray[NewBucket] = Bucket;
+      NewHashArray[NewBucket] = FullHash;
     }
   }