[llvm] r326081 - [Support] Replace HashString with djbHash.
Jonas Devlieghere via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 26 03:30:14 PST 2018
Author: jdevlieghere
Date: Mon Feb 26 03:30:13 2018
New Revision: 326081
URL: http://llvm.org/viewvc/llvm-project?rev=326081&view=rev
Log:
[Support] Replace HashString with djbHash.
This removes the HashString function from StringExtraces and replaces
its uses with calls to djbHash from DJB.h
This is *almost* NFC. While the algorithm is identical, the djbHash
implementation in StringExtras used 0 as its seed while the
implementation in DJB uses 5381. The latter has been shown to result in
less collisions and improved avalanching.
https://reviews.llvm.org/D43615
(cherry picked from commit 77f7f965bc9499a9ae768a296ca5a1f7347d1d2c)
Modified:
llvm/trunk/include/llvm/ADT/StringExtras.h
llvm/trunk/lib/Support/StringMap.cpp
Modified: llvm/trunk/include/llvm/ADT/StringExtras.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/StringExtras.h?rev=326081&r1=326080&r2=326081&view=diff
==============================================================================
--- llvm/trunk/include/llvm/ADT/StringExtras.h (original)
+++ llvm/trunk/include/llvm/ADT/StringExtras.h Mon Feb 26 03:30:13 2018
@@ -232,19 +232,6 @@ void SplitString(StringRef Source,
SmallVectorImpl<StringRef> &OutFragments,
StringRef Delimiters = " \t\n\v\f\r");
-/// HashString - Hash function for strings.
-///
-/// This is the Bernstein hash function.
-//
-// FIXME: Investigate whether a modified bernstein hash function performs
-// better: http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx
-// X*33+c -> X*33^c
-inline unsigned HashString(StringRef Str, unsigned Result = 0) {
- for (StringRef::size_type i = 0, e = Str.size(); i != e; ++i)
- Result = Result * 33 + (unsigned char)Str[i];
- return Result;
-}
-
/// Returns the English suffix for an ordinal integer (-st, -nd, -rd, -th).
inline StringRef getOrdinalSuffix(unsigned Val) {
// It is critically important that we do this perfectly for
Modified: llvm/trunk/lib/Support/StringMap.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/StringMap.cpp?rev=326081&r1=326080&r2=326081&view=diff
==============================================================================
--- llvm/trunk/lib/Support/StringMap.cpp (original)
+++ llvm/trunk/lib/Support/StringMap.cpp Mon Feb 26 03:30:13 2018
@@ -14,6 +14,7 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/Support/DJB.h"
#include "llvm/Support/MathExtras.h"
#include <cassert>
@@ -32,7 +33,7 @@ static unsigned getMinBucketToReserveFor
StringMapImpl::StringMapImpl(unsigned InitSize, unsigned itemSize) {
ItemSize = itemSize;
-
+
// If a size is specified, initialize the table with that many buckets.
if (InitSize) {
// The table will grow when the number of entries reach 3/4 of the number of
@@ -41,7 +42,7 @@ StringMapImpl::StringMapImpl(unsigned In
init(getMinBucketToReserveForEntries(InitSize));
return;
}
-
+
// Otherwise, initialize it with zero buckets to avoid the allocation.
TheTable = nullptr;
NumBuckets = 0;
@@ -56,7 +57,7 @@ void StringMapImpl::init(unsigned InitSi
unsigned NewNumBuckets = InitSize ? InitSize : 16;
NumItems = 0;
NumTombstones = 0;
-
+
TheTable = static_cast<StringMapEntryBase **>(
std::calloc(NewNumBuckets+1,
sizeof(StringMapEntryBase **) + sizeof(unsigned)));
@@ -82,7 +83,7 @@ unsigned StringMapImpl::LookupBucketFor(
init(16);
HTSize = NumBuckets;
}
- unsigned FullHashValue = HashString(Name);
+ unsigned FullHashValue = djbHash(Name);
unsigned BucketNo = FullHashValue & (HTSize-1);
unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
@@ -98,11 +99,11 @@ unsigned StringMapImpl::LookupBucketFor(
HashTable[FirstTombstone] = FullHashValue;
return FirstTombstone;
}
-
+
HashTable[BucketNo] = FullHashValue;
return BucketNo;
}
-
+
if (BucketItem == getTombstoneVal()) {
// Skip over tombstones. However, remember the first one we see.
if (FirstTombstone == -1) FirstTombstone = BucketNo;
@@ -111,7 +112,7 @@ unsigned StringMapImpl::LookupBucketFor(
// case here is that we are only looking at the buckets (for item info
// being non-null and for the full hash value) not at the items. This
// is important for cache locality.
-
+
// Do the comparison like this because Name isn't necessarily
// null-terminated!
char *ItemStr = (char*)BucketItem+ItemSize;
@@ -120,10 +121,10 @@ unsigned StringMapImpl::LookupBucketFor(
return BucketNo;
}
}
-
+
// Okay, we didn't find the item. Probe to the next bucket.
BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
-
+
// Use quadratic probing, it has fewer clumping artifacts than linear
// probing and has good cache behavior in the common case.
++ProbeAmt;
@@ -136,7 +137,7 @@ unsigned StringMapImpl::LookupBucketFor(
int StringMapImpl::FindKey(StringRef Key) const {
unsigned HTSize = NumBuckets;
if (HTSize == 0) return -1; // Really empty table?
- unsigned FullHashValue = HashString(Key);
+ unsigned FullHashValue = djbHash(Key);
unsigned BucketNo = FullHashValue & (HTSize-1);
unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
@@ -146,7 +147,7 @@ int StringMapImpl::FindKey(StringRef Key
// If we found an empty bucket, this key isn't in the table yet, return.
if (LLVM_LIKELY(!BucketItem))
return -1;
-
+
if (BucketItem == getTombstoneVal()) {
// Ignore tombstones.
} else if (LLVM_LIKELY(HashTable[BucketNo] == FullHashValue)) {
@@ -154,7 +155,7 @@ int StringMapImpl::FindKey(StringRef Key
// case here is that we are only looking at the buckets (for item info
// being non-null and for the full hash value) not at the items. This
// is important for cache locality.
-
+
// Do the comparison like this because NameStart isn't necessarily
// null-terminated!
char *ItemStr = (char*)BucketItem+ItemSize;
@@ -163,10 +164,10 @@ int StringMapImpl::FindKey(StringRef Key
return BucketNo;
}
}
-
+
// Okay, we didn't find the item. Probe to the next bucket.
BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
-
+
// Use quadratic probing, it has fewer clumping artifacts than linear
// probing and has good cache behavior in the common case.
++ProbeAmt;
@@ -187,7 +188,7 @@ void StringMapImpl::RemoveKey(StringMapE
StringMapEntryBase *StringMapImpl::RemoveKey(StringRef Key) {
int Bucket = FindKey(Key);
if (Bucket == -1) return nullptr;
-
+
StringMapEntryBase *Result = TheTable[Bucket];
TheTable[Bucket] = getTombstoneVal();
--NumItems;
@@ -241,13 +242,13 @@ unsigned StringMapImpl::RehashTable(unsi
NewBucketNo = NewBucket;
continue;
}
-
+
// Otherwise probe for a spot.
unsigned ProbeSize = 1;
do {
NewBucket = (NewBucket + ProbeSize++) & (NewSize-1);
} while (NewTableArray[NewBucket]);
-
+
// Finally found a slot. Fill it in.
NewTableArray[NewBucket] = Bucket;
NewHashArray[NewBucket] = FullHash;
@@ -255,9 +256,9 @@ unsigned StringMapImpl::RehashTable(unsi
NewBucketNo = NewBucket;
}
}
-
+
free(TheTable);
-
+
TheTable = NewTableArray;
NumBuckets = NewSize;
NumTombstones = 0;
More information about the llvm-commits
mailing list