[llvm] r284871 - [ADT] Add CachedHashString.
Justin Lebar via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 24 09:12:33 PDT 2016
> This loses std::string's small string optimization
Right, I left a TODO in the code to this effect.
> is that OK?
At the moment, I think yes. Nobody is using this data structure in
performance-critical code, and all of the places where we use this
used to be using an stl associative container of std::string (either
std::map or std::unordered_set), which would have at least as many
allocations as DenseMap<CachedHashString>.
At some point in the future I do think it would probably be useful to
implement SSO, however.
On Mon, Oct 24, 2016 at 8:56 AM, David Blaikie <dblaikie at gmail.com> wrote:
>
>
> On Fri, Oct 21, 2016 at 1:20 PM Justin Lebar via llvm-commits
> <llvm-commits at lists.llvm.org> wrote:
>>
>> Author: jlebar
>> Date: Fri Oct 21 15:10:48 2016
>> New Revision: 284871
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=284871&view=rev
>> Log:
>> [ADT] Add CachedHashString.
>>
>> Summary:
>> This is like CachedHashStringRef, but owns its data.
>>
>> This lets us use strings inside of DenseMaps.
>>
>> Reviewers: timshen
>>
>> Subscribers: llvm-commits
>>
>> Differential Revision: https://reviews.llvm.org/D25645
>>
>> Modified:
>> llvm/trunk/include/llvm/ADT/CachedHashString.h
>>
>> Modified: llvm/trunk/include/llvm/ADT/CachedHashString.h
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/CachedHashString.h?rev=284871&r1=284870&r2=284871&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/include/llvm/ADT/CachedHashString.h (original)
>> +++ llvm/trunk/include/llvm/ADT/CachedHashString.h Fri Oct 21 15:10:48
>> 2016
>> @@ -7,16 +7,14 @@
>> //
>>
>> //===----------------------------------------------------------------------===//
>> //
>> -// This file defines CachedHashString and CachedHashStringRef. These are
>> like
>> -// std::string and StringRef, except they store their hash in addition to
>> their
>> -// string data.
>> +// This file defines CachedHashString and CachedHashStringRef. These are
>> owning
>> +// and not-owning string types that store their hash in addition to their
>> string
>> +// data.
>> //
>> // Unlike std::string, CachedHashString can be used in DenseSet/DenseMap
>> // (because, unlike std::string, CachedHashString lets us have empty and
>> // tombstone values).
>> //
>> -// TODO: Add CachedHashString.
>> -//
>>
>> //===----------------------------------------------------------------------===//
>>
>> #ifndef LLVM_ADT_CACHED_HASH_STRING_H
>> @@ -24,6 +22,7 @@
>>
>> #include "llvm/ADT/DenseMap.h"
>> #include "llvm/ADT/StringRef.h"
>> +#include "llvm/Support/raw_ostream.h"
>>
>> namespace llvm {
>>
>> @@ -66,6 +65,114 @@ template <> struct DenseMapInfo<CachedHa
>> }
>> };
>>
>> +/// A container which contains a string, which it owns, plus a
>> precomputed hash.
>> +///
>> +/// We do not null-terminate the string.
>> +class CachedHashString {
>> + friend struct DenseMapInfo<CachedHashString>;
>> +
>> + char *P;
>> + uint32_t Size;
>> + uint32_t Hash;
>
>
> This loses std::string's small string optimization - is that OK?
>
>>
>> +
>> + static char *getEmptyKeyPtr() { return DenseMapInfo<char
>> *>::getEmptyKey(); }
>> + static char *getTombstoneKeyPtr() {
>> + return DenseMapInfo<char *>::getTombstoneKey();
>> + }
>> +
>> + bool isEmptyOrTombstone() const {
>> + return P == getEmptyKeyPtr() || P == getTombstoneKeyPtr();
>> + }
>> +
>> + explicit CachedHashString(char *EmptyOrTombstonePtr)
>> + : P(EmptyOrTombstonePtr), Size(0), Hash(0) {
>> + assert(isEmptyOrTombstone());
>> + }
>> +
>> + // TODO: Use small-string optimization to avoid allocating.
>> +
>> +public:
>> + // Explicit because copying and hashing a string isn't free.
>> + explicit CachedHashString(StringRef S)
>> + : CachedHashString(S, DenseMapInfo<StringRef>::getHashValue(S)) {}
>> +
>> + CachedHashString(StringRef S, uint32_t Hash)
>> + : P(new char[S.size()]), Size(S.size()), Hash(Hash) {
>> + memcpy(P, S.data(), S.size());
>> + }
>> +
>> + // Ideally this class would not be copyable. But SetVector requires
>> copyable
>> + // keys, and we want this to be usable there.
>> + CachedHashString(const CachedHashString &Other)
>> + : Size(Other.Size), Hash(Other.Hash) {
>> + if (Other.isEmptyOrTombstone()) {
>> + P = Other.P;
>> + } else {
>> + P = new char[Size];
>> + memcpy(P, Other.P, Size);
>> + }
>> + }
>> +
>> + CachedHashString &operator=(CachedHashString Other) {
>> + swap(*this, Other);
>> + return *this;
>> + }
>> +
>> + CachedHashString(CachedHashString &&Other) LLVM_NOEXCEPT : P(Other.P),
>> +
>> Size(Other.Size),
>> +
>> Hash(Other.Hash) {
>> + Other.P = getEmptyKeyPtr();
>> + }
>> +
>> + ~CachedHashString() {
>> + if (!isEmptyOrTombstone())
>> + delete[] P;
>> + }
>> +
>> + StringRef val() const { return StringRef(P, Size); }
>> + uint32_t size() const { return Size; }
>> + uint32_t hash() const { return Hash; }
>> +
>> + operator StringRef() const { return val(); }
>> + operator CachedHashStringRef() const {
>> + return CachedHashStringRef(val(), Hash);
>> + }
>> +
>> + friend void swap(CachedHashString &LHS, CachedHashString &RHS) {
>> + using std::swap;
>> + swap(LHS.P, RHS.P);
>> + swap(LHS.Size, RHS.Size);
>> + swap(LHS.Hash, RHS.Hash);
>> + }
>> +};
>> +
>> +template <> struct DenseMapInfo<CachedHashString> {
>> + static CachedHashString getEmptyKey() {
>> + return CachedHashString(CachedHashString::getEmptyKeyPtr());
>> + }
>> + static CachedHashString getTombstoneKey() {
>> + return CachedHashString(CachedHashString::getTombstoneKeyPtr());
>> + }
>> + static unsigned getHashValue(const CachedHashString &S) {
>> + assert(!isEqual(S, getEmptyKey()) && "Cannot hash the empty key!");
>> + assert(!isEqual(S, getTombstoneKey()) && "Cannot hash the tombstone
>> key!");
>> + return S.hash();
>> + }
>> + static bool isEqual(const CachedHashString &LHS,
>> + const CachedHashString &RHS) {
>> + if (LHS.hash() != RHS.hash())
>> + return false;
>> + if (LHS.P == CachedHashString::getEmptyKeyPtr())
>> + return RHS.P == CachedHashString::getEmptyKeyPtr();
>> + if (LHS.P == CachedHashString::getTombstoneKeyPtr())
>> + return RHS.P == CachedHashString::getTombstoneKeyPtr();
>> +
>> + // This is safe because if RHS.P is the empty or tombstone key, it
>> will have
>> + // length 0, so we'll never dereference its pointer.
>> + return LHS.val() == RHS.val();
>> + }
>> +};
>> +
>> } // namespace llvm
>>
>> #endif
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at lists.llvm.org
>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list