[llvm] r284871 - [ADT] Add CachedHashString.

Justin Lebar via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 24 09:12:33 PDT 2016


> This loses std::string's small string optimization

Right, I left a TODO in the code to this effect.

> is that OK?

At the moment, I think yes.  Nobody is using this data structure in
performance-critical code, and all of the places where we use this
used to be using an stl associative container of std::string (either
std::map or std::unordered_set), which would have at least as many
allocations as DenseMap<CachedHashString>.

At some point in the future I do think it would probably be useful to
implement SSO, however.



On Mon, Oct 24, 2016 at 8:56 AM, David Blaikie <dblaikie at gmail.com> wrote:
>
>
> On Fri, Oct 21, 2016 at 1:20 PM Justin Lebar via llvm-commits
> <llvm-commits at lists.llvm.org> wrote:
>>
>> Author: jlebar
>> Date: Fri Oct 21 15:10:48 2016
>> New Revision: 284871
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=284871&view=rev
>> Log:
>> [ADT] Add CachedHashString.
>>
>> Summary:
>> This is like CachedHashStringRef, but owns its data.
>>
>> This lets us use strings inside of DenseMaps.
>>
>> Reviewers: timshen
>>
>> Subscribers: llvm-commits
>>
>> Differential Revision: https://reviews.llvm.org/D25645
>>
>> Modified:
>>     llvm/trunk/include/llvm/ADT/CachedHashString.h
>>
>> Modified: llvm/trunk/include/llvm/ADT/CachedHashString.h
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/CachedHashString.h?rev=284871&r1=284870&r2=284871&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/include/llvm/ADT/CachedHashString.h (original)
>> +++ llvm/trunk/include/llvm/ADT/CachedHashString.h Fri Oct 21 15:10:48
>> 2016
>> @@ -7,16 +7,14 @@
>>  //
>>
>> //===----------------------------------------------------------------------===//
>>  //
>> -// This file defines CachedHashString and CachedHashStringRef.  These are
>> like
>> -// std::string and StringRef, except they store their hash in addition to
>> their
>> -// string data.
>> +// This file defines CachedHashString and CachedHashStringRef.  These are
>> owning
>> +// and not-owning string types that store their hash in addition to their
>> string
>> +// data.
>>  //
>>  // Unlike std::string, CachedHashString can be used in DenseSet/DenseMap
>>  // (because, unlike std::string, CachedHashString lets us have empty and
>>  // tombstone values).
>>  //
>> -// TODO: Add CachedHashString.
>> -//
>>
>> //===----------------------------------------------------------------------===//
>>
>>  #ifndef LLVM_ADT_CACHED_HASH_STRING_H
>> @@ -24,6 +22,7 @@
>>
>>  #include "llvm/ADT/DenseMap.h"
>>  #include "llvm/ADT/StringRef.h"
>> +#include "llvm/Support/raw_ostream.h"
>>
>>  namespace llvm {
>>
>> @@ -66,6 +65,114 @@ template <> struct DenseMapInfo<CachedHa
>>    }
>>  };
>>
>> +/// A container which contains a string, which it owns, plus a
>> precomputed hash.
>> +///
>> +/// We do not null-terminate the string.
>> +class CachedHashString {
>> +  friend struct DenseMapInfo<CachedHashString>;
>> +
>> +  char *P;
>> +  uint32_t Size;
>> +  uint32_t Hash;
>
>
> This loses std::string's small string optimization - is that OK?
>
>>
>> +
>> +  static char *getEmptyKeyPtr() { return DenseMapInfo<char
>> *>::getEmptyKey(); }
>> +  static char *getTombstoneKeyPtr() {
>> +    return DenseMapInfo<char *>::getTombstoneKey();
>> +  }
>> +
>> +  bool isEmptyOrTombstone() const {
>> +    return P == getEmptyKeyPtr() || P == getTombstoneKeyPtr();
>> +  }
>> +
>> +  explicit CachedHashString(char *EmptyOrTombstonePtr)
>> +      : P(EmptyOrTombstonePtr), Size(0), Hash(0) {
>> +    assert(isEmptyOrTombstone());
>> +  }
>> +
>> +  // TODO: Use small-string optimization to avoid allocating.
>> +
>> +public:
>> +  // Explicit because copying and hashing a string isn't free.
>> +  explicit CachedHashString(StringRef S)
>> +      : CachedHashString(S, DenseMapInfo<StringRef>::getHashValue(S)) {}
>> +
>> +  CachedHashString(StringRef S, uint32_t Hash)
>> +      : P(new char[S.size()]), Size(S.size()), Hash(Hash) {
>> +    memcpy(P, S.data(), S.size());
>> +  }
>> +
>> +  // Ideally this class would not be copyable.  But SetVector requires
>> copyable
>> +  // keys, and we want this to be usable there.
>> +  CachedHashString(const CachedHashString &Other)
>> +      : Size(Other.Size), Hash(Other.Hash) {
>> +    if (Other.isEmptyOrTombstone()) {
>> +      P = Other.P;
>> +    } else {
>> +      P = new char[Size];
>> +      memcpy(P, Other.P, Size);
>> +    }
>> +  }
>> +
>> +  CachedHashString &operator=(CachedHashString Other) {
>> +    swap(*this, Other);
>> +    return *this;
>> +  }
>> +
>> +  CachedHashString(CachedHashString &&Other) LLVM_NOEXCEPT : P(Other.P),
>> +
>> Size(Other.Size),
>> +
>> Hash(Other.Hash) {
>> +    Other.P = getEmptyKeyPtr();
>> +  }
>> +
>> +  ~CachedHashString() {
>> +    if (!isEmptyOrTombstone())
>> +      delete[] P;
>> +  }
>> +
>> +  StringRef val() const { return StringRef(P, Size); }
>> +  uint32_t size() const { return Size; }
>> +  uint32_t hash() const { return Hash; }
>> +
>> +  operator StringRef() const { return val(); }
>> +  operator CachedHashStringRef() const {
>> +    return CachedHashStringRef(val(), Hash);
>> +  }
>> +
>> +  friend void swap(CachedHashString &LHS, CachedHashString &RHS) {
>> +    using std::swap;
>> +    swap(LHS.P, RHS.P);
>> +    swap(LHS.Size, RHS.Size);
>> +    swap(LHS.Hash, RHS.Hash);
>> +  }
>> +};
>> +
>> +template <> struct DenseMapInfo<CachedHashString> {
>> +  static CachedHashString getEmptyKey() {
>> +    return CachedHashString(CachedHashString::getEmptyKeyPtr());
>> +  }
>> +  static CachedHashString getTombstoneKey() {
>> +    return CachedHashString(CachedHashString::getTombstoneKeyPtr());
>> +  }
>> +  static unsigned getHashValue(const CachedHashString &S) {
>> +    assert(!isEqual(S, getEmptyKey()) && "Cannot hash the empty key!");
>> +    assert(!isEqual(S, getTombstoneKey()) && "Cannot hash the tombstone
>> key!");
>> +    return S.hash();
>> +  }
>> +  static bool isEqual(const CachedHashString &LHS,
>> +                      const CachedHashString &RHS) {
>> +    if (LHS.hash() != RHS.hash())
>> +      return false;
>> +    if (LHS.P == CachedHashString::getEmptyKeyPtr())
>> +      return RHS.P == CachedHashString::getEmptyKeyPtr();
>> +    if (LHS.P == CachedHashString::getTombstoneKeyPtr())
>> +      return RHS.P == CachedHashString::getTombstoneKeyPtr();
>> +
>> +    // This is safe because if RHS.P is the empty or tombstone key, it
>> will have
>> +    // length 0, so we'll never dereference its pointer.
>> +    return LHS.val() == RHS.val();
>> +  }
>> +};
>> +
>>  } // namespace llvm
>>
>>  #endif
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at lists.llvm.org
>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits


More information about the llvm-commits mailing list