[llvm] r326081 - [Support] Replace HashString with djbHash.

Mon Feb 26 03:30:14 PST 2018

Author: jdevlieghere
Date: Mon Feb 26 03:30:13 2018
New Revision: 326081

URL: http://llvm.org/viewvc/llvm-project?rev=326081&view=rev
Log:
[Support] Replace HashString with djbHash.

This removes the HashString function from StringExtraces and replaces
its uses with calls to djbHash from DJB.h

This is *almost* NFC. While the algorithm is identical, the djbHash
implementation in StringExtras used 0 as its seed while the
implementation in DJB uses 5381. The latter has been shown to result in
less collisions and improved avalanching.

https://reviews.llvm.org/D43615
(cherry picked from commit 77f7f965bc9499a9ae768a296ca5a1f7347d1d2c)

Modified:
    llvm/trunk/include/llvm/ADT/StringExtras.h
    llvm/trunk/lib/Support/StringMap.cpp

Modified: llvm/trunk/include/llvm/ADT/StringExtras.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/StringExtras.h?rev=326081&r1=326080&r2=326081&view=diff
==============================================================================

--- llvm/trunk/include/llvm/ADT/StringExtras.h (original)
+++ llvm/trunk/include/llvm/ADT/StringExtras.h Mon Feb 26 03:30:13 2018
@@ -232,19 +232,6 @@ void SplitString(StringRef Source,
                  SmallVectorImpl<StringRef> &OutFragments,
                  StringRef Delimiters = " \t\n\v\f\r");
 
-/// HashString - Hash function for strings.
-///
-/// This is the Bernstein hash function.
-//
-// FIXME: Investigate whether a modified bernstein hash function performs
-// better: http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx
-//   X*33+c -> X*33^c
-inline unsigned HashString(StringRef Str, unsigned Result = 0) {
-  for (StringRef::size_type i = 0, e = Str.size(); i != e; ++i)
-    Result = Result * 33 + (unsigned char)Str[i];
-  return Result;
-}
-
 /// Returns the English suffix for an ordinal integer (-st, -nd, -rd, -th).
 inline StringRef getOrdinalSuffix(unsigned Val) {
   // It is critically important that we do this perfectly for

Modified: llvm/trunk/lib/Support/StringMap.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/StringMap.cpp?rev=326081&r1=326080&r2=326081&view=diff
==============================================================================
--- llvm/trunk/lib/Support/StringMap.cpp (original)
+++ llvm/trunk/lib/Support/StringMap.cpp Mon Feb 26 03:30:13 2018
@@ -14,6 +14,7 @@
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/DJB.h"
 #include "llvm/Support/MathExtras.h"
 #include <cassert>
 
@@ -32,7 +33,7 @@ static unsigned getMinBucketToReserveFor
 
 StringMapImpl::StringMapImpl(unsigned InitSize, unsigned itemSize) {
   ItemSize = itemSize;
-  
+
   // If a size is specified, initialize the table with that many buckets.
   if (InitSize) {
     // The table will grow when the number of entries reach 3/4 of the number of
@@ -41,7 +42,7 @@ StringMapImpl::StringMapImpl(unsigned In
     init(getMinBucketToReserveForEntries(InitSize));
     return;
   }
-  
+
   // Otherwise, initialize it with zero buckets to avoid the allocation.
   TheTable = nullptr;
   NumBuckets = 0;
@@ -56,7 +57,7 @@ void StringMapImpl::init(unsigned InitSi
   unsigned NewNumBuckets = InitSize ? InitSize : 16;
   NumItems = 0;
   NumTombstones = 0;
-  
+
   TheTable = static_cast<StringMapEntryBase **>(
       std::calloc(NewNumBuckets+1,
                   sizeof(StringMapEntryBase **) + sizeof(unsigned)));
@@ -82,7 +83,7 @@ unsigned StringMapImpl::LookupBucketFor(
     init(16);
     HTSize = NumBuckets;
   }
-  unsigned FullHashValue = HashString(Name);
+  unsigned FullHashValue = djbHash(Name);
   unsigned BucketNo = FullHashValue & (HTSize-1);
   unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
 
@@ -98,11 +99,11 @@ unsigned StringMapImpl::LookupBucketFor(
         HashTable[FirstTombstone] = FullHashValue;
         return FirstTombstone;
       }
-      
+
       HashTable[BucketNo] = FullHashValue;
       return BucketNo;
     }
-    
+
     if (BucketItem == getTombstoneVal()) {
       // Skip over tombstones.  However, remember the first one we see.
       if (FirstTombstone == -1) FirstTombstone = BucketNo;
@@ -111,7 +112,7 @@ unsigned StringMapImpl::LookupBucketFor(
       // case here is that we are only looking at the buckets (for item info
       // being non-null and for the full hash value) not at the items.  This
       // is important for cache locality.
-      
+
       // Do the comparison like this because Name isn't necessarily
       // null-terminated!
       char *ItemStr = (char*)BucketItem+ItemSize;
@@ -120,10 +121,10 @@ unsigned StringMapImpl::LookupBucketFor(
         return BucketNo;
       }
     }
-    
+
     // Okay, we didn't find the item.  Probe to the next bucket.
     BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
-    
+
     // Use quadratic probing, it has fewer clumping artifacts than linear
     // probing and has good cache behavior in the common case.
     ++ProbeAmt;
@@ -136,7 +137,7 @@ unsigned StringMapImpl::LookupBucketFor(
 int StringMapImpl::FindKey(StringRef Key) const {
   unsigned HTSize = NumBuckets;
   if (HTSize == 0) return -1;  // Really empty table?
-  unsigned FullHashValue = HashString(Key);
+  unsigned FullHashValue = djbHash(Key);
   unsigned BucketNo = FullHashValue & (HTSize-1);
   unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
 
@@ -146,7 +147,7 @@ int StringMapImpl::FindKey(StringRef Key
     // If we found an empty bucket, this key isn't in the table yet, return.
     if (LLVM_LIKELY(!BucketItem))
       return -1;
-    
+
     if (BucketItem == getTombstoneVal()) {
       // Ignore tombstones.
     } else if (LLVM_LIKELY(HashTable[BucketNo] == FullHashValue)) {
@@ -154,7 +155,7 @@ int StringMapImpl::FindKey(StringRef Key
       // case here is that we are only looking at the buckets (for item info
       // being non-null and for the full hash value) not at the items.  This
       // is important for cache locality.
-      
+
       // Do the comparison like this because NameStart isn't necessarily
       // null-terminated!
       char *ItemStr = (char*)BucketItem+ItemSize;
@@ -163,10 +164,10 @@ int StringMapImpl::FindKey(StringRef Key
         return BucketNo;
       }
     }
-    
+
     // Okay, we didn't find the item.  Probe to the next bucket.
     BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
-    
+
     // Use quadratic probing, it has fewer clumping artifacts than linear
     // probing and has good cache behavior in the common case.
     ++ProbeAmt;
@@ -187,7 +188,7 @@ void StringMapImpl::RemoveKey(StringMapE
 StringMapEntryBase *StringMapImpl::RemoveKey(StringRef Key) {
   int Bucket = FindKey(Key);
   if (Bucket == -1) return nullptr;
-  
+
   StringMapEntryBase *Result = TheTable[Bucket];
   TheTable[Bucket] = getTombstoneVal();
   --NumItems;
@@ -241,13 +242,13 @@ unsigned StringMapImpl::RehashTable(unsi
           NewBucketNo = NewBucket;
         continue;
       }
-      
+
       // Otherwise probe for a spot.
       unsigned ProbeSize = 1;
       do {
         NewBucket = (NewBucket + ProbeSize++) & (NewSize-1);
       } while (NewTableArray[NewBucket]);
-      
+
       // Finally found a slot.  Fill it in.
       NewTableArray[NewBucket] = Bucket;
       NewHashArray[NewBucket] = FullHash;
@@ -255,9 +256,9 @@ unsigned StringMapImpl::RehashTable(unsi
         NewBucketNo = NewBucket;
     }
   }
-  
+
   free(TheTable);
-  
+
   TheTable = NewTableArray;
   NumBuckets = NewSize;
   NumTombstones = 0;