[llvm-commits] [llvm] r150890 - in /llvm/trunk: include/llvm/ADT/Hashing.h lib/Support/CMakeLists.txt lib/Support/Hashing.cpp unittests/ADT/HashingTest.cpp unittests/CMakeLists.txt

Jay Foad jay.foad at gmail.com
Tue Feb 21 03:37:51 PST 2012


On 18 February 2012 21:00, Talin <viridia at gmail.com> wrote:
> Author: talin
> Date: Sat Feb 18 15:00:49 2012
> New Revision: 150890
>
> URL: http://llvm.org/viewvc/llvm-project?rev=150890&view=rev
> Log:
> Hashing.h - utilities for hashing various data types.
>
>
> Added:
>    llvm/trunk/include/llvm/ADT/Hashing.h
>    llvm/trunk/lib/Support/Hashing.cpp
>    llvm/trunk/unittests/ADT/HashingTest.cpp
> Modified:
>    llvm/trunk/lib/Support/CMakeLists.txt
>    llvm/trunk/unittests/CMakeLists.txt
>
> Added: llvm/trunk/include/llvm/ADT/Hashing.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/Hashing.h?rev=150890&view=auto
> ==============================================================================
> --- llvm/trunk/include/llvm/ADT/Hashing.h (added)
> +++ llvm/trunk/include/llvm/ADT/Hashing.h Sat Feb 18 15:00:49 2012
> @@ -0,0 +1,180 @@
> +//===-- llvm/ADT/Hashing.h - Utilities for hashing --------------*- C++ -*-===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file defines utilities for computing hash values for various data types.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#ifndef LLVM_ADT_HASHING_H
> +#define LLVM_ADT_HASHING_H
> +
> +#include "llvm/ADT/ArrayRef.h"
> +#include "llvm/ADT/StringRef.h"
> +#include "llvm/Support/AlignOf.h"
> +#include "llvm/Support/Compiler.h"
> +#include "llvm/Support/DataTypes.h"
> +
> +namespace llvm {
> +
> +/// Class to compute a hash value from multiple data fields of arbitrary
> +/// types. Note that if you are hashing a single data type, such as a
> +/// string, it may be cheaper to use a hash algorithm that is tailored
> +/// for that specific data type.
> +/// Typical Usage:
> +///    GeneralHash Hash;
> +///    Hash.add(someValue);
> +///    Hash.add(someOtherValue);
> +///    return Hash.finish();
> +/// Adapted from MurmurHash2 by Austin Appleby
> +class GeneralHash {
> +private:
> +  enum {
> +    M = 0x5bd1e995
> +  };
> +  unsigned Hash;
> +  unsigned Count;
> +public:
> +  GeneralHash(unsigned Seed = 0) : Hash(Seed), Count(0) {}
> +
> +  /// Add a pointer value.
> +  /// Note: this adds pointers to the hash using sizes and endianness that
> +  /// depend on the host.  It doesn't matter however, because hashing on
> +  /// pointer values is inherently unstable.
> +  template<typename T>
> +  GeneralHash& add(const T *PtrVal) {
> +    addBits(&PtrVal, &PtrVal + 1);
> +    return *this;
> +  }
> +
> +  /// Add an ArrayRef of arbitrary data.
> +  template<typename T>
> +  GeneralHash& add(ArrayRef<T> ArrayVal) {
> +    addBits(ArrayVal.begin(), ArrayVal.end());
> +    return *this;
> +  }
> +
> +  /// Add a string
> +  GeneralHash& add(StringRef StrVal) {
> +    addBits(StrVal.begin(), StrVal.end());
> +    return *this;
> +  }
> +
> +  /// Add an signed 32-bit integer.
> +  GeneralHash& add(int32_t Data) {
> +    addInt(uint32_t(Data));
> +    return *this;
> +  }
> +
> +  /// Add an unsigned 32-bit integer.
> +  GeneralHash& add(uint32_t Data) {
> +    addInt(Data);
> +    return *this;
> +  }
> +
> +  /// Add an signed 64-bit integer.
> +  GeneralHash& add(int64_t Data) {
> +    addInt(uint64_t(Data));
> +    return *this;
> +  }
> +
> +  /// Add an unsigned 64-bit integer.
> +  GeneralHash& add(uint64_t Data) {
> +    addInt(Data);
> +    return *this;
> +  }
> +
> +  /// Add a float
> +  GeneralHash& add(float Data) {
> +    union {
> +      float D; uint32_t I;
> +    };
> +    D = Data;
> +    addInt(I);
> +    return *this;
> +  }
> +
> +  /// Add a double
> +  GeneralHash& add(double Data) {
> +    union {
> +      double D; uint64_t I;
> +    };
> +    D = Data;
> +    addInt(I);
> +    return *this;
> +  }
> +
> +  // Do a few final mixes of the hash to ensure the last few
> +  // bytes are well-incorporated.
> +  unsigned finish() {
> +    mix(Count);
> +    Hash ^= Hash >> 13;
> +    Hash *= M;
> +    Hash ^= Hash >> 15;
> +    return Hash;
> +  }
> +
> +private:
> +  void mix(uint32_t Data) {
> +    ++Count;
> +    Data *= M;
> +    Data ^= Data >> 24;
> +    Data *= M;
> +    Hash *= M;
> +    Hash ^= Data;
> +  }
> +
> +  // Add a single uint32 value
> +  void addInt(uint32_t Val) {
> +    mix(Val);
> +  }
> +
> +  // Add a uint64 value
> +  void addInt(uint64_t Val) {
> +    mix(uint32_t(Val >> 32));
> +    mix(uint32_t(Val));
> +  }
> +
> +  template<typename T, bool isAligned>
> +  struct addBitsImpl {
> +    static void add(GeneralHash &Hash, const T *I, const T *E) {
> +      Hash.addUnaligned(
> +        reinterpret_cast<const uint8_t *>(I),
> +        reinterpret_cast<const uint8_t *>(E));
> +    }
> +  };
> +
> +  template<typename T>
> +  struct addBitsImpl<T, true> {
> +    static void add(GeneralHash &Hash, const T *I, const T *E) {
> +      Hash.addAligned(
> +        reinterpret_cast<const uint32_t *>(I),
> +        reinterpret_cast<const uint32_t *>(E));
> +    }
> +  };
> +
> +  // Add a range of bits from I to E.
> +  template<typename T>
> +  void addBits(const T *I, const T *E) {
> +    addBitsImpl<T, AlignOf<T>::Alignment_GreaterEqual_4Bytes>::add(*this, I, E);
> +  }
> +
> +  // Add a range of uint32s
> +  void addAligned(const uint32_t *I, const uint32_t *E) {
> +    while (I < E) {
> +      mix(*I++);
> +    }
> +  }
> +
> +  // Add a possibly unaligned sequence of bytes.
> +  void addUnaligned(const uint8_t *I, const uint8_t *E);
> +};
> +
> +} // end namespace llvm
> +
> +#endif
>
> Modified: llvm/trunk/lib/Support/CMakeLists.txt
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/CMakeLists.txt?rev=150890&r1=150889&r2=150890&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Support/CMakeLists.txt (original)
> +++ llvm/trunk/lib/Support/CMakeLists.txt Sat Feb 18 15:00:49 2012
> @@ -26,6 +26,7 @@
>   FoldingSet.cpp
>   FormattedStream.cpp
>   GraphWriter.cpp
> +  Hashing.cpp
>   IntEqClasses.cpp
>   IntervalMap.cpp
>   IntrusiveRefCntPtr.cpp
>
> Added: llvm/trunk/lib/Support/Hashing.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/Hashing.cpp?rev=150890&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Support/Hashing.cpp (added)
> +++ llvm/trunk/lib/Support/Hashing.cpp Sat Feb 18 15:00:49 2012
> @@ -0,0 +1,46 @@
> +//===-- llvm/ADT/Hashing.cpp - Utilities for hashing ------------*- C++ -*-===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#include "llvm/ADT/Hashing.h"
> +
> +namespace llvm {
> +
> +// Add a possibly unaligned sequence of bytes.
> +void GeneralHash::addUnaligned(const uint8_t *I, const uint8_t *E) {
> +  ptrdiff_t Length = E - I;
> +  if (uintptr_t(I) & 3 == 0) {
> +    while (Length > 3) {
> +      mix(*reinterpret_cast<const uint32_t *>(I));
> +      I += 4;
> +      Length -= 4;
> +    }
> +  } else {
> +    while (Length > 3) {
> +      mix(
> +        uint32_t(I[0]) +
> +        (uint32_t(I[1]) << 8) +
> +        (uint32_t(I[2]) << 16) +
> +        (uint32_t(I[3]) << 24));
> +      I += 4;
> +      Length -= 4;
> +    }
> +  }
> +
> +  if (Length & 3) {
> +    uint32_t Data = 0;
> +    switch (Length & 3) {
> +      case 3: Data |= uint32_t(I[2]) << 16;   // fall through
> +      case 2: Data |= uint32_t(I[1]) << 8;    // fall through
> +      case 1: Data |= uint32_t(I[0]); break;
> +    }
> +    mix(Data);
> +  }
> +}
> +
> +}
>
> Added: llvm/trunk/unittests/ADT/HashingTest.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/ADT/HashingTest.cpp?rev=150890&view=auto
> ==============================================================================
> --- llvm/trunk/unittests/ADT/HashingTest.cpp (added)
> +++ llvm/trunk/unittests/ADT/HashingTest.cpp Sat Feb 18 15:00:49 2012
> @@ -0,0 +1,57 @@
> +//===- llvm/unittest/ADT/HashingTest.cpp ----------------------------------===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// Hashing.h unit tests.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#include "gtest/gtest.h"
> +#include "llvm/ADT/Hashing.h"
> +
> +using namespace llvm;
> +
> +namespace {
> +
> +TEST(HashingTest, EmptyHashTest) {
> +  GeneralHash Hash;
> +  ASSERT_EQ(0u, Hash.finish());
> +}
> +
> +TEST(HashingTest, IntegerHashTest) {
> +  ASSERT_TRUE(GeneralHash().add(1).finish() == GeneralHash().add(1).finish());
> +  ASSERT_TRUE(GeneralHash().add(1).finish() != GeneralHash().add(2).finish());
> +}
> +
> +TEST(HashingTest, StringHashTest) {
> +  ASSERT_TRUE(
> +    GeneralHash().add("abc").finish() == GeneralHash().add("abc").finish());
> +  ASSERT_TRUE(
> +    GeneralHash().add("abc").finish() != GeneralHash().add("abcd").finish());
> +}
> +
> +TEST(HashingTest, FloatHashTest) {
> +  ASSERT_TRUE(
> +    GeneralHash().add(1.0f).finish() == GeneralHash().add(1.0f).finish());
> +  ASSERT_TRUE(
> +    GeneralHash().add(1.0f).finish() != GeneralHash().add(2.0f).finish());
> +}
> +
> +TEST(HashingTest, DoubleHashTest) {
> +  ASSERT_TRUE(GeneralHash().add(1.).finish() == GeneralHash().add(1.).finish());
> +  ASSERT_TRUE(GeneralHash().add(1.).finish() != GeneralHash().add(2.).finish());
> +}
> +
> +TEST(HashingTest, IntegerArrayHashTest) {
> +  int a[] = { 1, 2 };
> +  int b[] = { 1, 3 };
> +  ASSERT_TRUE(GeneralHash().add(a).finish() == GeneralHash().add(a).finish());
> +  ASSERT_TRUE(GeneralHash().add(a).finish() != GeneralHash().add(b).finish());
> +}
> +
> +}
>
> Modified: llvm/trunk/unittests/CMakeLists.txt
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/CMakeLists.txt?rev=150890&r1=150889&r2=150890&view=diff
> ==============================================================================
> --- llvm/trunk/unittests/CMakeLists.txt (original)
> +++ llvm/trunk/unittests/CMakeLists.txt Sat Feb 18 15:00:49 2012
> @@ -60,6 +60,7 @@
>   ADT/DenseMapTest.cpp
>   ADT/DenseSetTest.cpp
>   ADT/FoldingSet.cpp
> +  ADT/HashingTest.cpp
>   ADT/ilistTest.cpp
>   ADT/ImmutableSetTest.cpp
>   ADT/IntEqClassesTest.cpp
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits


After committing Meador's patch to use this for uniquing struct and
function types, I get this GCC warning about strict aliasing:

In file included from
/home/baldrick/osuosl/slave/clang-x86_64-debian-fnt/llvm.src/lib/VMCore/ConstantsContext.h:18,
                 from
/home/baldrick/osuosl/slave/clang-x86_64-debian-fnt/llvm.src/lib/VMCore/LLVMContextImpl.h:19,
                 from
/home/baldrick/osuosl/slave/clang-x86_64-debian-fnt/llvm.src/lib/VMCore/Type.cpp:14:
/home/baldrick/osuosl/slave/clang-x86_64-debian-fnt/llvm.src/include/llvm/ADT/Hashing.h:
In member function 'void llvm::DenseMap<KeyT, ValueT, KeyInfoT,
ValueInfoT>::grow(unsigned int) [with KeyT = llvm::FunctionType*,
ValueT = bool, KeyInfoT = llvm::FunctionTypeKeyInfo, ValueInfoT =
llvm::DenseMapInfo<bool>]':
/home/baldrick/osuosl/slave/clang-x86_64-debian-fnt/llvm.src/include/llvm/ADT/Hashing.h:170:
warning: dereferencing pointer 'I' does break strict-aliasing rules
/home/baldrick/osuosl/slave/clang-x86_64-debian-fnt/llvm.src/include/llvm/ADT/Hashing.h:155:
note: initialized from here

Jay.




More information about the llvm-commits mailing list