[llvm] r284871 - [ADT] Add CachedHashString.

Justin Lebar via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 21 13:10:48 PDT 2016


Author: jlebar
Date: Fri Oct 21 15:10:48 2016
New Revision: 284871

URL: http://llvm.org/viewvc/llvm-project?rev=284871&view=rev
Log:
[ADT] Add CachedHashString.

Summary:
This is like CachedHashStringRef, but owns its data.

This lets us use strings inside of DenseMaps.

Reviewers: timshen

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D25645

Modified:
    llvm/trunk/include/llvm/ADT/CachedHashString.h

Modified: llvm/trunk/include/llvm/ADT/CachedHashString.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/CachedHashString.h?rev=284871&r1=284870&r2=284871&view=diff
==============================================================================
--- llvm/trunk/include/llvm/ADT/CachedHashString.h (original)
+++ llvm/trunk/include/llvm/ADT/CachedHashString.h Fri Oct 21 15:10:48 2016
@@ -7,16 +7,14 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file defines CachedHashString and CachedHashStringRef.  These are like
-// std::string and StringRef, except they store their hash in addition to their
-// string data.
+// This file defines CachedHashString and CachedHashStringRef.  These are owning
+// and not-owning string types that store their hash in addition to their string
+// data.
 //
 // Unlike std::string, CachedHashString can be used in DenseSet/DenseMap
 // (because, unlike std::string, CachedHashString lets us have empty and
 // tombstone values).
 //
-// TODO: Add CachedHashString.
-//
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_CACHED_HASH_STRING_H
@@ -24,6 +22,7 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/raw_ostream.h"
 
 namespace llvm {
 
@@ -66,6 +65,114 @@ template <> struct DenseMapInfo<CachedHa
   }
 };
 
+/// A container which contains a string, which it owns, plus a precomputed hash.
+///
+/// We do not null-terminate the string.
+class CachedHashString {
+  friend struct DenseMapInfo<CachedHashString>;
+
+  char *P;
+  uint32_t Size;
+  uint32_t Hash;
+
+  static char *getEmptyKeyPtr() { return DenseMapInfo<char *>::getEmptyKey(); }
+  static char *getTombstoneKeyPtr() {
+    return DenseMapInfo<char *>::getTombstoneKey();
+  }
+
+  bool isEmptyOrTombstone() const {
+    return P == getEmptyKeyPtr() || P == getTombstoneKeyPtr();
+  }
+
+  explicit CachedHashString(char *EmptyOrTombstonePtr)
+      : P(EmptyOrTombstonePtr), Size(0), Hash(0) {
+    assert(isEmptyOrTombstone());
+  }
+
+  // TODO: Use small-string optimization to avoid allocating.
+
+public:
+  // Explicit because copying and hashing a string isn't free.
+  explicit CachedHashString(StringRef S)
+      : CachedHashString(S, DenseMapInfo<StringRef>::getHashValue(S)) {}
+
+  CachedHashString(StringRef S, uint32_t Hash)
+      : P(new char[S.size()]), Size(S.size()), Hash(Hash) {
+    memcpy(P, S.data(), S.size());
+  }
+
+  // Ideally this class would not be copyable.  But SetVector requires copyable
+  // keys, and we want this to be usable there.
+  CachedHashString(const CachedHashString &Other)
+      : Size(Other.Size), Hash(Other.Hash) {
+    if (Other.isEmptyOrTombstone()) {
+      P = Other.P;
+    } else {
+      P = new char[Size];
+      memcpy(P, Other.P, Size);
+    }
+  }
+
+  CachedHashString &operator=(CachedHashString Other) {
+    swap(*this, Other);
+    return *this;
+  }
+
+  CachedHashString(CachedHashString &&Other) LLVM_NOEXCEPT : P(Other.P),
+                                                             Size(Other.Size),
+                                                             Hash(Other.Hash) {
+    Other.P = getEmptyKeyPtr();
+  }
+
+  ~CachedHashString() {
+    if (!isEmptyOrTombstone())
+      delete[] P;
+  }
+
+  StringRef val() const { return StringRef(P, Size); }
+  uint32_t size() const { return Size; }
+  uint32_t hash() const { return Hash; }
+
+  operator StringRef() const { return val(); }
+  operator CachedHashStringRef() const {
+    return CachedHashStringRef(val(), Hash);
+  }
+
+  friend void swap(CachedHashString &LHS, CachedHashString &RHS) {
+    using std::swap;
+    swap(LHS.P, RHS.P);
+    swap(LHS.Size, RHS.Size);
+    swap(LHS.Hash, RHS.Hash);
+  }
+};
+
+template <> struct DenseMapInfo<CachedHashString> {
+  static CachedHashString getEmptyKey() {
+    return CachedHashString(CachedHashString::getEmptyKeyPtr());
+  }
+  static CachedHashString getTombstoneKey() {
+    return CachedHashString(CachedHashString::getTombstoneKeyPtr());
+  }
+  static unsigned getHashValue(const CachedHashString &S) {
+    assert(!isEqual(S, getEmptyKey()) && "Cannot hash the empty key!");
+    assert(!isEqual(S, getTombstoneKey()) && "Cannot hash the tombstone key!");
+    return S.hash();
+  }
+  static bool isEqual(const CachedHashString &LHS,
+                      const CachedHashString &RHS) {
+    if (LHS.hash() != RHS.hash())
+      return false;
+    if (LHS.P == CachedHashString::getEmptyKeyPtr())
+      return RHS.P == CachedHashString::getEmptyKeyPtr();
+    if (LHS.P == CachedHashString::getTombstoneKeyPtr())
+      return RHS.P == CachedHashString::getTombstoneKeyPtr();
+
+    // This is safe because if RHS.P is the empty or tombstone key, it will have
+    // length 0, so we'll never dereference its pointer.
+    return LHS.val() == RHS.val();
+  }
+};
+
 } // namespace llvm
 
 #endif




More information about the llvm-commits mailing list