[llvm] 67c631d - [ADT][StringMap] Add ability to precompute and reuse the string hash

David Blaikie via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 11 16:07:16 PST 2023


Author: David Blaikie
Date: 2023-12-12T00:07:08Z
New Revision: 67c631d283fc96d652304199cd625be426b98f8e

URL: https://github.com/llvm/llvm-project/commit/67c631d283fc96d652304199cd625be426b98f8e
DIFF: https://github.com/llvm/llvm-project/commit/67c631d283fc96d652304199cd625be426b98f8e.diff

LOG: [ADT][StringMap] Add ability to precompute and reuse the string hash

Useful for lldb's const string pool, using the hash to determine which
string map to lock and query/insert.

Derived from https://reviews.llvm.org/D122974 by Luboš Luňák

Added: 
    

Modified: 
    llvm/include/llvm/ADT/StringMap.h
    llvm/lib/Support/StringMap.cpp
    llvm/unittests/ADT/StringMapTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/ADT/StringMap.h b/llvm/include/llvm/ADT/StringMap.h
index 466f95254d102e..32e611467ac58e 100644
--- a/llvm/include/llvm/ADT/StringMap.h
+++ b/llvm/include/llvm/ADT/StringMap.h
@@ -17,6 +17,7 @@
 #include "llvm/ADT/StringMapEntry.h"
 #include "llvm/ADT/iterator.h"
 #include "llvm/Support/AllocatorBase.h"
+#include "llvm/Support/DJB.h"
 #include "llvm/Support/PointerLikeTypeTraits.h"
 #include <initializer_list>
 #include <iterator>
@@ -60,12 +61,20 @@ class StringMapImpl {
   /// specified bucket will be non-null.  Otherwise, it will be null.  In either
   /// case, the FullHashValue field of the bucket will be set to the hash value
   /// of the string.
-  unsigned LookupBucketFor(StringRef Key);
+  unsigned LookupBucketFor(StringRef Key) {
+    return LookupBucketFor(Key, hash(Key));
+  }
+
+  /// Overload that explicitly takes precomputed hash(Key).
+  unsigned LookupBucketFor(StringRef Key, uint32_t FullHashValue);
 
   /// FindKey - Look up the bucket that contains the specified key. If it exists
   /// in the map, return the bucket number of the key.  Otherwise return -1.
   /// This does not modify the map.
-  int FindKey(StringRef Key) const;
+  int FindKey(StringRef Key) const { return FindKey(Key, hash(Key)); }
+
+  /// Overload that explicitly takes precomputed hash(Key).
+  int FindKey(StringRef Key, uint32_t FullHashValue) const;
 
   /// RemoveKey - Remove the specified StringMapEntry from the table, but do not
   /// delete it.  This aborts if the value isn't in the table.
@@ -94,6 +103,13 @@ class StringMapImpl {
   bool empty() const { return NumItems == 0; }
   unsigned size() const { return NumItems; }
 
+  /// Returns the hash value that will be used for the given string.
+  /// This allows precomputing the value and passing it explicitly
+  /// to some of the functions.
+  /// The implementation of this function is not guaranteed to be stable
+  /// and may change.
+  static uint32_t hash(StringRef Key);
+
   void swap(StringMapImpl &Other) {
     std::swap(TheTable, Other.TheTable);
     std::swap(NumBuckets, Other.NumBuckets);
@@ -215,15 +231,19 @@ class LLVM_ALLOCATORHOLDER_EMPTYBASE StringMap
                       StringMapKeyIterator<ValueTy>(end()));
   }
 
-  iterator find(StringRef Key) {
-    int Bucket = FindKey(Key);
+  iterator find(StringRef Key) { return find(Key, hash(Key)); }
+
+  iterator find(StringRef Key, uint32_t FullHashValue) {
+    int Bucket = FindKey(Key, FullHashValue);
     if (Bucket == -1)
       return end();
     return iterator(TheTable + Bucket, true);
   }
 
-  const_iterator find(StringRef Key) const {
-    int Bucket = FindKey(Key);
+  const_iterator find(StringRef Key) const { return find(Key, hash(Key)); }
+
+  const_iterator find(StringRef Key, uint32_t FullHashValue) const {
+    int Bucket = FindKey(Key, FullHashValue);
     if (Bucket == -1)
       return end();
     return const_iterator(TheTable + Bucket, true);
@@ -305,7 +325,13 @@ class LLVM_ALLOCATORHOLDER_EMPTYBASE StringMap
   /// if and only if the insertion takes place, and the iterator component of
   /// the pair points to the element with key equivalent to the key of the pair.
   std::pair<iterator, bool> insert(std::pair<StringRef, ValueTy> KV) {
-    return try_emplace(KV.first, std::move(KV.second));
+    return try_emplace_with_hash(KV.first, hash(KV.first),
+                                 std::move(KV.second));
+  }
+
+  std::pair<iterator, bool> insert(std::pair<StringRef, ValueTy> KV,
+                                   uint32_t FullHashValue) {
+    return try_emplace_with_hash(KV.first, FullHashValue, std::move(KV.second));
   }
 
   /// Inserts elements from range [first, last). If multiple elements in the
@@ -339,7 +365,14 @@ class LLVM_ALLOCATORHOLDER_EMPTYBASE StringMap
   /// the pair points to the element with key equivalent to the key of the pair.
   template <typename... ArgsTy>
   std::pair<iterator, bool> try_emplace(StringRef Key, ArgsTy &&...Args) {
-    unsigned BucketNo = LookupBucketFor(Key);
+    return try_emplace_with_hash(Key, hash(Key), std::forward<ArgsTy>(Args)...);
+  }
+
+  template <typename... ArgsTy>
+  std::pair<iterator, bool> try_emplace_with_hash(StringRef Key,
+                                                  uint32_t FullHashValue,
+                                                  ArgsTy &&...Args) {
+    unsigned BucketNo = LookupBucketFor(Key, FullHashValue);
     StringMapEntryBase *&Bucket = TheTable[BucketNo];
     if (Bucket && Bucket != getTombstoneVal())
       return std::make_pair(iterator(TheTable + BucketNo, false),

diff  --git a/llvm/lib/Support/StringMap.cpp b/llvm/lib/Support/StringMap.cpp
index 67c05a87959cf0..451108d01d38a4 100644
--- a/llvm/lib/Support/StringMap.cpp
+++ b/llvm/lib/Support/StringMap.cpp
@@ -43,6 +43,8 @@ static inline unsigned *getHashTable(StringMapEntryBase **TheTable,
   return reinterpret_cast<unsigned *>(TheTable + NumBuckets + 1);
 }
 
+uint32_t StringMapImpl::hash(StringRef Key) { return xxh3_64bits(Key); }
+
 StringMapImpl::StringMapImpl(unsigned InitSize, unsigned itemSize) {
   ItemSize = itemSize;
 
@@ -81,11 +83,14 @@ void StringMapImpl::init(unsigned InitSize) {
 /// specified bucket will be non-null.  Otherwise, it will be null.  In either
 /// case, the FullHashValue field of the bucket will be set to the hash value
 /// of the string.
-unsigned StringMapImpl::LookupBucketFor(StringRef Name) {
+unsigned StringMapImpl::LookupBucketFor(StringRef Name,
+                                        uint32_t FullHashValue) {
+#ifdef EXPENSIVE_CHECKS
+  assert(FullHashValue == hash(Name));
+#endif
   // Hash table unallocated so far?
   if (NumBuckets == 0)
     init(16);
-  unsigned FullHashValue = xxh3_64bits(Name);
   if (shouldReverseIterate())
     FullHashValue = ~FullHashValue;
   unsigned BucketNo = FullHashValue & (NumBuckets - 1);
@@ -139,10 +144,12 @@ unsigned StringMapImpl::LookupBucketFor(StringRef Name) {
 /// FindKey - Look up the bucket that contains the specified key. If it exists
 /// in the map, return the bucket number of the key.  Otherwise return -1.
 /// This does not modify the map.
-int StringMapImpl::FindKey(StringRef Key) const {
+int StringMapImpl::FindKey(StringRef Key, uint32_t FullHashValue) const {
   if (NumBuckets == 0)
     return -1; // Really empty table?
-  unsigned FullHashValue = xxh3_64bits(Key);
+#ifdef EXPENSIVE_CHECKS
+  assert(FullHashValue == hash(Key);
+#endif
   if (shouldReverseIterate())
     FullHashValue = ~FullHashValue;
   unsigned BucketNo = FullHashValue & (NumBuckets - 1);

diff  --git a/llvm/unittests/ADT/StringMapTest.cpp b/llvm/unittests/ADT/StringMapTest.cpp
index f9b138e9a47213..c9ef3f8a096ee9 100644
--- a/llvm/unittests/ADT/StringMapTest.cpp
+++ b/llvm/unittests/ADT/StringMapTest.cpp
@@ -487,6 +487,17 @@ TEST_F(StringMapTest, NotEqualWithDifferentValues) {
   ASSERT_TRUE(B != A);
 }
 
+TEST_F(StringMapTest, PrecomputedHash) {
+  StringMap<int> A;
+  StringRef Key = "foo";
+  int Value = 42;
+  uint64_t Hash = StringMap<int>::hash(Key);
+  A.insert({"foo", Value}, Hash);
+  auto I = A.find(Key, Hash);
+  ASSERT_NE(I, A.end());
+  ASSERT_EQ(I->second, Value);
+}
+
 struct Countable {
   int &InstanceCount;
   int Number;


        


More information about the llvm-commits mailing list