[llvm] r326091 - Re-land: "[Support] Replace HashString with djbHash."

Mon Feb 26 07:16:43 PST 2018

Author: jdevlieghere
Date: Mon Feb 26 07:16:42 2018
New Revision: 326091

URL: http://llvm.org/viewvc/llvm-project?rev=326091&view=rev
Log:
Re-land: "[Support] Replace HashString with djbHash."

This patch removes the HashString function from StringExtraces and
replaces its uses with calls to djbHash from DJB.h.

This change is *almost* NFC. While the algorithm is identical, the
djbHash implementation in StringExtras used 0 as its default seed while
the implementation in DJB uses 5381. The latter has been shown to result
in less collisions and improved avalanching and is used by the DWARF
accelerator tables.

Because some test were implicitly relying on the hash order, I've
reverted to using zero as a seed for the following two files:

  lld/include/lld/Core/SymbolTable.h
  llvm/lib/Support/StringMap.cpp

Differential revision: https://reviews.llvm.org/D43615

Modified:
    llvm/trunk/include/llvm/ADT/StringExtras.h
    llvm/trunk/lib/Support/StringMap.cpp

Modified: llvm/trunk/include/llvm/ADT/StringExtras.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/StringExtras.h?rev=326091&r1=326090&r2=326091&view=diff
==============================================================================

--- llvm/trunk/include/llvm/ADT/StringExtras.h (original)
+++ llvm/trunk/include/llvm/ADT/StringExtras.h Mon Feb 26 07:16:42 2018
@@ -232,19 +232,6 @@ void SplitString(StringRef Source,
                  SmallVectorImpl<StringRef> &OutFragments,
                  StringRef Delimiters = " \t\n\v\f\r");
 
-/// HashString - Hash function for strings.
-///
-/// This is the Bernstein hash function.
-//
-// FIXME: Investigate whether a modified bernstein hash function performs
-// better: http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx
-//   X*33+c -> X*33^c
-inline unsigned HashString(StringRef Str, unsigned Result = 0) {
-  for (StringRef::size_type i = 0, e = Str.size(); i != e; ++i)
-    Result = Result * 33 + (unsigned char)Str[i];
-  return Result;
-}
-
 /// Returns the English suffix for an ordinal integer (-st, -nd, -rd, -th).
 inline StringRef getOrdinalSuffix(unsigned Val) {
   // It is critically important that we do this perfectly for

Modified: llvm/trunk/lib/Support/StringMap.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/StringMap.cpp?rev=326091&r1=326090&r2=326091&view=diff
==============================================================================
--- llvm/trunk/lib/Support/StringMap.cpp (original)
+++ llvm/trunk/lib/Support/StringMap.cpp Mon Feb 26 07:16:42 2018
@@ -14,6 +14,7 @@
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/DJB.h"
 #include "llvm/Support/MathExtras.h"
 #include <cassert>
 
@@ -32,7 +33,7 @@ static unsigned getMinBucketToReserveFor
 
 StringMapImpl::StringMapImpl(unsigned InitSize, unsigned itemSize) {
   ItemSize = itemSize;
-  
+
   // If a size is specified, initialize the table with that many buckets.
   if (InitSize) {
     // The table will grow when the number of entries reach 3/4 of the number of
@@ -41,7 +42,7 @@ StringMapImpl::StringMapImpl(unsigned In
     init(getMinBucketToReserveForEntries(InitSize));
     return;
   }
-  
+
   // Otherwise, initialize it with zero buckets to avoid the allocation.
   TheTable = nullptr;
   NumBuckets = 0;
@@ -56,7 +57,7 @@ void StringMapImpl::init(unsigned InitSi
   unsigned NewNumBuckets = InitSize ? InitSize : 16;
   NumItems = 0;
   NumTombstones = 0;
-  
+
   TheTable = static_cast<StringMapEntryBase **>(
       std::calloc(NewNumBuckets+1,
                   sizeof(StringMapEntryBase **) + sizeof(unsigned)));
@@ -82,7 +83,7 @@ unsigned StringMapImpl::LookupBucketFor(
     init(16);
     HTSize = NumBuckets;
   }
-  unsigned FullHashValue = HashString(Name);
+  unsigned FullHashValue = djbHash(Name, 0);
   unsigned BucketNo = FullHashValue & (HTSize-1);
   unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
 
@@ -98,11 +99,11 @@ unsigned StringMapImpl::LookupBucketFor(
         HashTable[FirstTombstone] = FullHashValue;
         return FirstTombstone;
       }
-      
+
       HashTable[BucketNo] = FullHashValue;
       return BucketNo;
     }
-    
+
     if (BucketItem == getTombstoneVal()) {
       // Skip over tombstones.  However, remember the first one we see.
       if (FirstTombstone == -1) FirstTombstone = BucketNo;
@@ -111,7 +112,7 @@ unsigned StringMapImpl::LookupBucketFor(
       // case here is that we are only looking at the buckets (for item info
       // being non-null and for the full hash value) not at the items.  This
       // is important for cache locality.
-      
+
       // Do the comparison like this because Name isn't necessarily
       // null-terminated!
       char *ItemStr = (char*)BucketItem+ItemSize;
@@ -120,10 +121,10 @@ unsigned StringMapImpl::LookupBucketFor(
         return BucketNo;
       }
     }
-    
+
     // Okay, we didn't find the item.  Probe to the next bucket.
     BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
-    
+
     // Use quadratic probing, it has fewer clumping artifacts than linear
     // probing and has good cache behavior in the common case.
     ++ProbeAmt;
@@ -136,7 +137,7 @@ unsigned StringMapImpl::LookupBucketFor(
 int StringMapImpl::FindKey(StringRef Key) const {
   unsigned HTSize = NumBuckets;
   if (HTSize == 0) return -1;  // Really empty table?
-  unsigned FullHashValue = HashString(Key);
+  unsigned FullHashValue = djbHash(Key, 0);
   unsigned BucketNo = FullHashValue & (HTSize-1);
   unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
 
@@ -146,7 +147,7 @@ int StringMapImpl::FindKey(StringRef Key
     // If we found an empty bucket, this key isn't in the table yet, return.
     if (LLVM_LIKELY(!BucketItem))
       return -1;
-    
+
     if (BucketItem == getTombstoneVal()) {
       // Ignore tombstones.
     } else if (LLVM_LIKELY(HashTable[BucketNo] == FullHashValue)) {
@@ -154,7 +155,7 @@ int StringMapImpl::FindKey(StringRef Key
       // case here is that we are only looking at the buckets (for item info
       // being non-null and for the full hash value) not at the items.  This
       // is important for cache locality.
-      
+
       // Do the comparison like this because NameStart isn't necessarily
       // null-terminated!
       char *ItemStr = (char*)BucketItem+ItemSize;
@@ -163,10 +164,10 @@ int StringMapImpl::FindKey(StringRef Key
         return BucketNo;
       }
     }
-    
+
     // Okay, we didn't find the item.  Probe to the next bucket.
     BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
-    
+
     // Use quadratic probing, it has fewer clumping artifacts than linear
     // probing and has good cache behavior in the common case.
     ++ProbeAmt;
@@ -187,7 +188,7 @@ void StringMapImpl::RemoveKey(StringMapE
 StringMapEntryBase *StringMapImpl::RemoveKey(StringRef Key) {
   int Bucket = FindKey(Key);
   if (Bucket == -1) return nullptr;
-  
+
   StringMapEntryBase *Result = TheTable[Bucket];
   TheTable[Bucket] = getTombstoneVal();
   --NumItems;
@@ -241,13 +242,13 @@ unsigned StringMapImpl::RehashTable(unsi
           NewBucketNo = NewBucket;
         continue;
       }
-      
+
       // Otherwise probe for a spot.
       unsigned ProbeSize = 1;
       do {
         NewBucket = (NewBucket + ProbeSize++) & (NewSize-1);
       } while (NewTableArray[NewBucket]);
-      
+
       // Finally found a slot.  Fill it in.
       NewTableArray[NewBucket] = Bucket;
       NewHashArray[NewBucket] = FullHash;
@@ -255,9 +256,9 @@ unsigned StringMapImpl::RehashTable(unsi
         NewBucketNo = NewBucket;
     }
   }
-  
+
   free(TheTable);
-  
+
   TheTable = NewTableArray;
   NumBuckets = NewSize;
   NumTombstones = 0;