[llvm-commits] [llvm] r150890 - in /llvm/trunk: include/llvm/ADT/Hashing.h lib/Support/CMakeLists.txt lib/Support/Hashing.cpp unittests/ADT/HashingTest.cpp unittests/CMakeLists.txt
Jay Foad
jay.foad at gmail.com
Tue Feb 21 03:37:51 PST 2012
On 18 February 2012 21:00, Talin <viridia at gmail.com> wrote:
> Author: talin
> Date: Sat Feb 18 15:00:49 2012
> New Revision: 150890
>
> URL: http://llvm.org/viewvc/llvm-project?rev=150890&view=rev
> Log:
> Hashing.h - utilities for hashing various data types.
>
>
> Added:
> llvm/trunk/include/llvm/ADT/Hashing.h
> llvm/trunk/lib/Support/Hashing.cpp
> llvm/trunk/unittests/ADT/HashingTest.cpp
> Modified:
> llvm/trunk/lib/Support/CMakeLists.txt
> llvm/trunk/unittests/CMakeLists.txt
>
> Added: llvm/trunk/include/llvm/ADT/Hashing.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/Hashing.h?rev=150890&view=auto
> ==============================================================================
> --- llvm/trunk/include/llvm/ADT/Hashing.h (added)
> +++ llvm/trunk/include/llvm/ADT/Hashing.h Sat Feb 18 15:00:49 2012
> @@ -0,0 +1,180 @@
> +//===-- llvm/ADT/Hashing.h - Utilities for hashing --------------*- C++ -*-===//
> +//
> +// The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file defines utilities for computing hash values for various data types.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#ifndef LLVM_ADT_HASHING_H
> +#define LLVM_ADT_HASHING_H
> +
> +#include "llvm/ADT/ArrayRef.h"
> +#include "llvm/ADT/StringRef.h"
> +#include "llvm/Support/AlignOf.h"
> +#include "llvm/Support/Compiler.h"
> +#include "llvm/Support/DataTypes.h"
> +
> +namespace llvm {
> +
> +/// Class to compute a hash value from multiple data fields of arbitrary
> +/// types. Note that if you are hashing a single data type, such as a
> +/// string, it may be cheaper to use a hash algorithm that is tailored
> +/// for that specific data type.
> +/// Typical Usage:
> +/// GeneralHash Hash;
> +/// Hash.add(someValue);
> +/// Hash.add(someOtherValue);
> +/// return Hash.finish();
> +/// Adapted from MurmurHash2 by Austin Appleby
> +class GeneralHash {
> +private:
> + enum {
> + M = 0x5bd1e995
> + };
> + unsigned Hash;
> + unsigned Count;
> +public:
> + GeneralHash(unsigned Seed = 0) : Hash(Seed), Count(0) {}
> +
> + /// Add a pointer value.
> + /// Note: this adds pointers to the hash using sizes and endianness that
> + /// depend on the host. It doesn't matter however, because hashing on
> + /// pointer values is inherently unstable.
> + template<typename T>
> + GeneralHash& add(const T *PtrVal) {
> + addBits(&PtrVal, &PtrVal + 1);
> + return *this;
> + }
> +
> + /// Add an ArrayRef of arbitrary data.
> + template<typename T>
> + GeneralHash& add(ArrayRef<T> ArrayVal) {
> + addBits(ArrayVal.begin(), ArrayVal.end());
> + return *this;
> + }
> +
> + /// Add a string
> + GeneralHash& add(StringRef StrVal) {
> + addBits(StrVal.begin(), StrVal.end());
> + return *this;
> + }
> +
> + /// Add an signed 32-bit integer.
> + GeneralHash& add(int32_t Data) {
> + addInt(uint32_t(Data));
> + return *this;
> + }
> +
> + /// Add an unsigned 32-bit integer.
> + GeneralHash& add(uint32_t Data) {
> + addInt(Data);
> + return *this;
> + }
> +
> + /// Add an signed 64-bit integer.
> + GeneralHash& add(int64_t Data) {
> + addInt(uint64_t(Data));
> + return *this;
> + }
> +
> + /// Add an unsigned 64-bit integer.
> + GeneralHash& add(uint64_t Data) {
> + addInt(Data);
> + return *this;
> + }
> +
> + /// Add a float
> + GeneralHash& add(float Data) {
> + union {
> + float D; uint32_t I;
> + };
> + D = Data;
> + addInt(I);
> + return *this;
> + }
> +
> + /// Add a double
> + GeneralHash& add(double Data) {
> + union {
> + double D; uint64_t I;
> + };
> + D = Data;
> + addInt(I);
> + return *this;
> + }
> +
> + // Do a few final mixes of the hash to ensure the last few
> + // bytes are well-incorporated.
> + unsigned finish() {
> + mix(Count);
> + Hash ^= Hash >> 13;
> + Hash *= M;
> + Hash ^= Hash >> 15;
> + return Hash;
> + }
> +
> +private:
> + void mix(uint32_t Data) {
> + ++Count;
> + Data *= M;
> + Data ^= Data >> 24;
> + Data *= M;
> + Hash *= M;
> + Hash ^= Data;
> + }
> +
> + // Add a single uint32 value
> + void addInt(uint32_t Val) {
> + mix(Val);
> + }
> +
> + // Add a uint64 value
> + void addInt(uint64_t Val) {
> + mix(uint32_t(Val >> 32));
> + mix(uint32_t(Val));
> + }
> +
> + template<typename T, bool isAligned>
> + struct addBitsImpl {
> + static void add(GeneralHash &Hash, const T *I, const T *E) {
> + Hash.addUnaligned(
> + reinterpret_cast<const uint8_t *>(I),
> + reinterpret_cast<const uint8_t *>(E));
> + }
> + };
> +
> + template<typename T>
> + struct addBitsImpl<T, true> {
> + static void add(GeneralHash &Hash, const T *I, const T *E) {
> + Hash.addAligned(
> + reinterpret_cast<const uint32_t *>(I),
> + reinterpret_cast<const uint32_t *>(E));
> + }
> + };
> +
> + // Add a range of bits from I to E.
> + template<typename T>
> + void addBits(const T *I, const T *E) {
> + addBitsImpl<T, AlignOf<T>::Alignment_GreaterEqual_4Bytes>::add(*this, I, E);
> + }
> +
> + // Add a range of uint32s
> + void addAligned(const uint32_t *I, const uint32_t *E) {
> + while (I < E) {
> + mix(*I++);
> + }
> + }
> +
> + // Add a possibly unaligned sequence of bytes.
> + void addUnaligned(const uint8_t *I, const uint8_t *E);
> +};
> +
> +} // end namespace llvm
> +
> +#endif
>
> Modified: llvm/trunk/lib/Support/CMakeLists.txt
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/CMakeLists.txt?rev=150890&r1=150889&r2=150890&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Support/CMakeLists.txt (original)
> +++ llvm/trunk/lib/Support/CMakeLists.txt Sat Feb 18 15:00:49 2012
> @@ -26,6 +26,7 @@
> FoldingSet.cpp
> FormattedStream.cpp
> GraphWriter.cpp
> + Hashing.cpp
> IntEqClasses.cpp
> IntervalMap.cpp
> IntrusiveRefCntPtr.cpp
>
> Added: llvm/trunk/lib/Support/Hashing.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/Hashing.cpp?rev=150890&view=auto
> ==============================================================================
> --- llvm/trunk/lib/Support/Hashing.cpp (added)
> +++ llvm/trunk/lib/Support/Hashing.cpp Sat Feb 18 15:00:49 2012
> @@ -0,0 +1,46 @@
> +//===-- llvm/ADT/Hashing.cpp - Utilities for hashing ------------*- C++ -*-===//
> +//
> +// The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#include "llvm/ADT/Hashing.h"
> +
> +namespace llvm {
> +
> +// Add a possibly unaligned sequence of bytes.
> +void GeneralHash::addUnaligned(const uint8_t *I, const uint8_t *E) {
> + ptrdiff_t Length = E - I;
> + if (uintptr_t(I) & 3 == 0) {
> + while (Length > 3) {
> + mix(*reinterpret_cast<const uint32_t *>(I));
> + I += 4;
> + Length -= 4;
> + }
> + } else {
> + while (Length > 3) {
> + mix(
> + uint32_t(I[0]) +
> + (uint32_t(I[1]) << 8) +
> + (uint32_t(I[2]) << 16) +
> + (uint32_t(I[3]) << 24));
> + I += 4;
> + Length -= 4;
> + }
> + }
> +
> + if (Length & 3) {
> + uint32_t Data = 0;
> + switch (Length & 3) {
> + case 3: Data |= uint32_t(I[2]) << 16; // fall through
> + case 2: Data |= uint32_t(I[1]) << 8; // fall through
> + case 1: Data |= uint32_t(I[0]); break;
> + }
> + mix(Data);
> + }
> +}
> +
> +}
>
> Added: llvm/trunk/unittests/ADT/HashingTest.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/ADT/HashingTest.cpp?rev=150890&view=auto
> ==============================================================================
> --- llvm/trunk/unittests/ADT/HashingTest.cpp (added)
> +++ llvm/trunk/unittests/ADT/HashingTest.cpp Sat Feb 18 15:00:49 2012
> @@ -0,0 +1,57 @@
> +//===- llvm/unittest/ADT/HashingTest.cpp ----------------------------------===//
> +//
> +// The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// Hashing.h unit tests.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#include "gtest/gtest.h"
> +#include "llvm/ADT/Hashing.h"
> +
> +using namespace llvm;
> +
> +namespace {
> +
> +TEST(HashingTest, EmptyHashTest) {
> + GeneralHash Hash;
> + ASSERT_EQ(0u, Hash.finish());
> +}
> +
> +TEST(HashingTest, IntegerHashTest) {
> + ASSERT_TRUE(GeneralHash().add(1).finish() == GeneralHash().add(1).finish());
> + ASSERT_TRUE(GeneralHash().add(1).finish() != GeneralHash().add(2).finish());
> +}
> +
> +TEST(HashingTest, StringHashTest) {
> + ASSERT_TRUE(
> + GeneralHash().add("abc").finish() == GeneralHash().add("abc").finish());
> + ASSERT_TRUE(
> + GeneralHash().add("abc").finish() != GeneralHash().add("abcd").finish());
> +}
> +
> +TEST(HashingTest, FloatHashTest) {
> + ASSERT_TRUE(
> + GeneralHash().add(1.0f).finish() == GeneralHash().add(1.0f).finish());
> + ASSERT_TRUE(
> + GeneralHash().add(1.0f).finish() != GeneralHash().add(2.0f).finish());
> +}
> +
> +TEST(HashingTest, DoubleHashTest) {
> + ASSERT_TRUE(GeneralHash().add(1.).finish() == GeneralHash().add(1.).finish());
> + ASSERT_TRUE(GeneralHash().add(1.).finish() != GeneralHash().add(2.).finish());
> +}
> +
> +TEST(HashingTest, IntegerArrayHashTest) {
> + int a[] = { 1, 2 };
> + int b[] = { 1, 3 };
> + ASSERT_TRUE(GeneralHash().add(a).finish() == GeneralHash().add(a).finish());
> + ASSERT_TRUE(GeneralHash().add(a).finish() != GeneralHash().add(b).finish());
> +}
> +
> +}
>
> Modified: llvm/trunk/unittests/CMakeLists.txt
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/CMakeLists.txt?rev=150890&r1=150889&r2=150890&view=diff
> ==============================================================================
> --- llvm/trunk/unittests/CMakeLists.txt (original)
> +++ llvm/trunk/unittests/CMakeLists.txt Sat Feb 18 15:00:49 2012
> @@ -60,6 +60,7 @@
> ADT/DenseMapTest.cpp
> ADT/DenseSetTest.cpp
> ADT/FoldingSet.cpp
> + ADT/HashingTest.cpp
> ADT/ilistTest.cpp
> ADT/ImmutableSetTest.cpp
> ADT/IntEqClassesTest.cpp
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
After committing Meador's patch to use this for uniquing struct and
function types, I get this GCC warning about strict aliasing:
In file included from
/home/baldrick/osuosl/slave/clang-x86_64-debian-fnt/llvm.src/lib/VMCore/ConstantsContext.h:18,
from
/home/baldrick/osuosl/slave/clang-x86_64-debian-fnt/llvm.src/lib/VMCore/LLVMContextImpl.h:19,
from
/home/baldrick/osuosl/slave/clang-x86_64-debian-fnt/llvm.src/lib/VMCore/Type.cpp:14:
/home/baldrick/osuosl/slave/clang-x86_64-debian-fnt/llvm.src/include/llvm/ADT/Hashing.h:
In member function 'void llvm::DenseMap<KeyT, ValueT, KeyInfoT,
ValueInfoT>::grow(unsigned int) [with KeyT = llvm::FunctionType*,
ValueT = bool, KeyInfoT = llvm::FunctionTypeKeyInfo, ValueInfoT =
llvm::DenseMapInfo<bool>]':
/home/baldrick/osuosl/slave/clang-x86_64-debian-fnt/llvm.src/include/llvm/ADT/Hashing.h:170:
warning: dereferencing pointer 'I' does break strict-aliasing rules
/home/baldrick/osuosl/slave/clang-x86_64-debian-fnt/llvm.src/include/llvm/ADT/Hashing.h:155:
note: initialized from here
Jay.
More information about the llvm-commits
mailing list