[llvm] 1076082 - [Support]: Introduce the `HashBuilder` interface.

Alexandre Rames via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 26 09:20:55 PDT 2021


Author: Alexandre Rames
Date: 2021-08-26T09:20:50-07:00
New Revision: 1076082a0d97bd5c16a25ee7cf3dbb6ee4b5a9fe

URL: https://github.com/llvm/llvm-project/commit/1076082a0d97bd5c16a25ee7cf3dbb6ee4b5a9fe
DIFF: https://github.com/llvm/llvm-project/commit/1076082a0d97bd5c16a25ee7cf3dbb6ee4b5a9fe.diff

LOG: [Support]: Introduce the `HashBuilder` interface.

The `HashBuilder` interface allows conveniently building hashes of various data
types, without relying on the underlying hasher type to know about hashed data
types.

Reviewed By: dexonsmith

Differential Revision: https://reviews.llvm.org/D106910

Added: 
    llvm/include/llvm/Support/HashBuilder.h
    llvm/unittests/Support/HashBuilderTest.cpp

Modified: 
    llvm/unittests/Support/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Support/HashBuilder.h b/llvm/include/llvm/Support/HashBuilder.h
new file mode 100644
index 0000000000000..23f73f1b659dc
--- /dev/null
+++ b/llvm/include/llvm/Support/HashBuilder.h
@@ -0,0 +1,404 @@
+//===- llvm/Support/HashBuilder.h - Convenient hashing interface-*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an interface allowing to conveniently build hashes of
+// various data types, without relying on the underlying hasher type to know
+// about hashed data types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_HASHBUILDER_H
+#define LLVM_SUPPORT_HASHBUILDER_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/type_traits.h"
+
+#include <iterator>
+#include <utility>
+
+namespace llvm {
+
+/// Declares the hasher member, and functions forwarding directly to the hasher.
+template <typename HasherT> class HashBuilderBase {
+public:
+  HasherT &getHasher() { return Hasher; }
+
+  /// Forward to `HasherT::update(ArrayRef<uint8_t>)`.
+  ///
+  /// This may not take the size of `Data` into account.
+  /// Users of this function should pay attention to respect endianness
+  /// contraints.
+  void update(ArrayRef<uint8_t> Data) { this->getHasher().update(Data); }
+
+  /// Forward to `HasherT::update(ArrayRef<uint8_t>)`.
+  ///
+  /// This may not take the size of `Data` into account.
+  /// Users of this function should pay attention to respect endianness
+  /// contraints.
+  void update(StringRef Data) {
+    update(makeArrayRef(reinterpret_cast<const uint8_t *>(Data.data()),
+                        Data.size()));
+  }
+
+  /// Forward to `HasherT::final()` if available.
+  template <typename HasherT_ = HasherT> StringRef final() {
+    return this->getHasher().final();
+  }
+
+  /// Forward to `HasherT::result()` if available.
+  template <typename HasherT_ = HasherT> StringRef result() {
+    return this->getHasher().result();
+  }
+
+protected:
+  explicit HashBuilderBase(HasherT &Hasher) : Hasher(Hasher) {}
+
+  template <typename... ArgTypes>
+  explicit HashBuilderBase(ArgTypes &&...Args)
+      : OptionalHasher(in_place, std::forward<ArgTypes>(Args)...),
+        Hasher(*OptionalHasher) {}
+
+private:
+  Optional<HasherT> OptionalHasher;
+  HasherT &Hasher;
+};
+
+/// Implementation of the `HashBuilder` interface.
+///
+/// `support::endianness::native` is not supported. `HashBuilder` is
+/// expected to canonicalize `support::endianness::native` to one of
+/// `support::endianness::big` or `support::endianness::little`.
+template <typename HasherT, support::endianness Endianness>
+class HashBuilderImpl : public HashBuilderBase<HasherT> {
+  static_assert(Endianness != support::endianness::native,
+                "HashBuilder should canonicalize endianness");
+  /// Trait to indicate whether a type's bits can be hashed directly (after
+  /// endianness correction).
+  template <typename U>
+  struct IsHashableData
+      : std::integral_constant<bool, is_integral_or_enum<U>::value> {};
+
+public:
+  explicit HashBuilderImpl(HasherT &Hasher)
+      : HashBuilderBase<HasherT>(Hasher) {}
+  template <typename... ArgTypes>
+  explicit HashBuilderImpl(ArgTypes &&...Args)
+      : HashBuilderBase<HasherT>(Args...) {}
+
+  /// Implement hashing for hashable data types, e.g. integral or enum values.
+  template <typename T>
+  std::enable_if_t<IsHashableData<T>::value, HashBuilderImpl &> add(T Value) {
+    return adjustForEndiannessAndAdd(Value);
+  }
+
+  /// Support hashing `ArrayRef`.
+  ///
+  /// `Value.size()` is taken into account to ensure cases like
+  /// ```
+  /// builder.add({1});
+  /// builder.add({2, 3});
+  /// ```
+  /// and
+  /// ```
+  /// builder.add({1, 2});
+  /// builder.add({3});
+  /// ```
+  /// do not collide.
+  template <typename T> HashBuilderImpl &add(ArrayRef<T> Value) {
+    // As of implementation time, simply calling `addRange(Value)` would also go
+    // through the `update` fast path. But that would rely on the implementation
+    // details of `ArrayRef::begin()` and `ArrayRef::end()`. Explicitly call
+    // `update` to guarantee the fast path.
+    add(Value.size());
+    if (IsHashableData<T>::value &&
+        Endianness == support::endian::system_endianness()) {
+      this->update(
+          makeArrayRef(reinterpret_cast<const uint8_t *>(Value.begin()),
+                       Value.size() * sizeof(T)));
+    } else {
+      for (auto &V : Value)
+        add(V);
+    }
+    return *this;
+  }
+
+  /// Support hashing `StringRef`.
+  ///
+  /// `Value.size()` is taken into account to ensure cases like
+  /// ```
+  /// builder.add("a");
+  /// builder.add("bc");
+  /// ```
+  /// and
+  /// ```
+  /// builder.add("ab");
+  /// builder.add("c");
+  /// ```
+  /// do not collide.
+  HashBuilderImpl &add(StringRef Value) {
+    // As of implementation time, simply calling `addRange(Value)` would also go
+    // through `update`. But that would rely on the implementation of
+    // `StringRef::begin()` and `StringRef::end()`. Explicitly call `update` to
+    // guarantee the fast path.
+    add(Value.size());
+    this->update(makeArrayRef(reinterpret_cast<const uint8_t *>(Value.begin()),
+                              Value.size()));
+    return *this;
+  }
+
+  template <typename T>
+  using HasAddHashT =
+      decltype(addHash(std::declval<HashBuilderImpl &>(), std::declval<T &>()));
+  /// Implement hashing for user-defined `struct`s.
+  ///
+  /// Any user-define `struct` can participate in hashing via `HashBuilder` by
+  /// providing a `addHash` templated function.
+  ///
+  /// ```
+  /// template <typename HasherT, support::endianness Endianness>
+  /// void addHash(HashBuilder<HasherT, Endianness> &HBuilder,
+  ///              const UserDefinedStruct &Value);
+  /// ```
+  ///
+  /// For example:
+  /// ```
+  /// struct SimpleStruct {
+  ///   char c;
+  ///   int i;
+  /// };
+  ///
+  /// template <typename HasherT, support::endianness Endianness>
+  /// void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder,
+  ///              const SimpleStruct &Value) {
+  ///   HBuilder.add(Value.c);
+  ///   HBuilder.add(Value.i);
+  /// }
+  /// ```
+  ///
+  /// To avoid endianness issues, specializations of `addHash` should
+  /// generally rely on exising `add`, `addRange`, and `addRangeElements`
+  /// functions. If directly using `update`, an implementation must correctly
+  /// handle endianness.
+  ///
+  /// ```
+  /// struct __attribute__ ((packed)) StructWithFastHash {
+  ///   int I;
+  ///   char C;
+  ///
+  ///   // If possible, we want to hash both `I` and `C` in a single
+  ///   // `update` call for performance concerns.
+  ///   template <typename HasherT, support::endianness Endianness>
+  ///   friend void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder,
+  ///                       const StructWithFastHash &Value) {
+  ///     if (Endianness == support::endian::system_endianness()) {
+  ///       HBuilder.update(makeArrayRef(
+  ///           reinterpret_cast<const uint8_t *>(&Value), sizeof(Value)));
+  ///     } else {
+  ///       // Rely on existing `add` methods to handle endianness.
+  ///       HBuilder.add(Value.I);
+  ///       HBuilder.add(Value.C);
+  ///     }
+  ///   }
+  /// };
+  /// ```
+  ///
+  /// To avoid collisions, specialization of `addHash` for variable-size
+  /// types must take the size into account.
+  ///
+  /// For example:
+  /// ```
+  /// struct CustomContainer {
+  /// private:
+  ///   size_t Size;
+  ///   int Elements[100];
+  ///
+  /// public:
+  ///   CustomContainer(size_t Size) : Size(Size) {
+  ///     for (size_t I = 0; I != Size; ++I)
+  ///       Elements[I] = I;
+  ///   }
+  ///   template <typename HasherT, support::endianness Endianness>
+  ///   friend void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder,
+  ///                       const CustomContainer &Value) {
+  ///     if (Endianness == support::endian::system_endianness()) {
+  ///       HBuilder.update(makeArrayRef(
+  ///           reinterpret_cast<const uint8_t *>(&Value.Size),
+  ///           sizeof(Value.Size) + Value.Size * sizeof(Value.Elements[0])));
+  ///     } else {
+  ///       // `addRange` will take care of encoding the size.
+  ///       HBuilder.addRange(&Value.Elements[0], &Value.Elements[0] +
+  ///       Value.Size);
+  ///     }
+  ///   }
+  /// };
+  /// ```
+  template <typename T>
+  std::enable_if_t<is_detected<HasAddHashT, T>::value &&
+                       !IsHashableData<T>::value,
+                   HashBuilderImpl &>
+  add(const T &Value) {
+    addHash(*this, Value);
+    return *this;
+  }
+
+  template <typename T1, typename T2>
+  HashBuilderImpl &add(const std::pair<T1, T2> &Value) {
+    add(Value.first);
+    add(Value.second);
+    return *this;
+  }
+
+  template <typename... Ts> HashBuilderImpl &add(const std::tuple<Ts...> &Arg) {
+    return addTupleHelper(Arg, typename std::index_sequence_for<Ts...>());
+  }
+
+  /// A convenenience variadic helper.
+  /// It simply iterates over its arguments, in order.
+  /// ```
+  /// add(Arg1, Arg2);
+  /// ```
+  /// is equivalent to
+  /// ```
+  /// add(Arg1)
+  /// add(Arg2)
+  /// ```
+  template <typename T, typename... Ts>
+  typename std::enable_if<(sizeof...(Ts) >= 1), HashBuilderImpl &>::type
+  add(const T &FirstArg, const Ts &...Args) {
+    add(FirstArg);
+    add(Args...);
+    return *this;
+  }
+
+  template <typename ForwardIteratorT>
+  HashBuilderImpl &addRange(ForwardIteratorT First, ForwardIteratorT Last) {
+    add(std::distance(First, Last));
+    return addRangeElements(First, Last);
+  }
+
+  template <typename RangeT> HashBuilderImpl &addRange(const RangeT &Range) {
+    return addRange(adl_begin(Range), adl_end(Range));
+  }
+
+  template <typename ForwardIteratorT>
+  HashBuilderImpl &addRangeElements(ForwardIteratorT First,
+                                    ForwardIteratorT Last) {
+    return addRangeElementsImpl(
+        First, Last,
+        typename std::iterator_traits<ForwardIteratorT>::iterator_category());
+  }
+
+  template <typename RangeT>
+  HashBuilderImpl &addRangeElements(const RangeT &Range) {
+    return addRangeElements(adl_begin(Range), adl_end(Range));
+  }
+
+  template <typename T>
+  using HasByteSwapT = decltype(support::endian::byte_swap(
+      std::declval<T &>(), support::endianness::little));
+  /// Adjust `Value` for the target endianness and add it to the hash.
+  template <typename T>
+  std::enable_if_t<is_detected<HasByteSwapT, T>::value, HashBuilderImpl &>
+  adjustForEndiannessAndAdd(const T &Value) {
+    T SwappedValue = support::endian::byte_swap(Value, Endianness);
+    this->update(makeArrayRef(reinterpret_cast<const uint8_t *>(&SwappedValue),
+                              sizeof(SwappedValue)));
+    return *this;
+  }
+
+private:
+  template <typename... Ts, std::size_t... Indices>
+  HashBuilderImpl &addTupleHelper(const std::tuple<Ts...> &Arg,
+                                  std::index_sequence<Indices...>) {
+    add(std::get<Indices>(Arg)...);
+    return *this;
+  }
+
+  // FIXME: Once available, specialize this function for `contiguous_iterator`s,
+  // and use it for `ArrayRef` and `StringRef`.
+  template <typename ForwardIteratorT>
+  HashBuilderImpl &addRangeElementsImpl(ForwardIteratorT First,
+                                        ForwardIteratorT Last,
+                                        std::forward_iterator_tag) {
+    for (auto It = First; It != Last; ++It)
+      add(*It);
+    return *this;
+  }
+
+  template <typename T>
+  std::enable_if_t<IsHashableData<T>::value &&
+                       Endianness == support::endian::system_endianness(),
+                   HashBuilderImpl &>
+  addRangeElementsImpl(T *First, T *Last, std::forward_iterator_tag) {
+    this->update(makeArrayRef(reinterpret_cast<const uint8_t *>(First),
+                              (Last - First) * sizeof(T)));
+    return *this;
+  }
+};
+
+/// Interface to help hash various types through a hasher type.
+///
+/// Via provided specializations of `add`, `addRange`, and `addRangeElements`
+/// functions, various types (e.g. `ArrayRef`, `StringRef`, etc.) can be hashed
+/// without requiring any knowledge of hashed types from the hasher type.
+///
+/// The only method expected from the templated hasher type `HasherT` is:
+/// * void update(ArrayRef<uint8_t> Data)
+///
+/// Additionally, the following methods will be forwarded to the hasher type:
+/// * decltype(std::declval<HasherT &>().final()) final()
+/// * decltype(std::declval<HasherT &>().result()) result()
+///
+/// From a user point of view, the interface provides the following:
+/// * `template<typename T> add(const T &Value)`
+///   The `add` function implements hashing of various types.
+/// * `template <typename ItT> void addRange(ItT First, ItT Last)`
+///   The `addRange` function is designed to aid hashing a range of values.
+///   It explicitly adds the size of the range in the hash.
+/// * `template <typename ItT> void addRangeElements(ItT First, ItT Last)`
+///   The `addRangeElements` function is also designed to aid hashing a range of
+///   values. In contrast to `addRange`, it **ignores** the size of the range,
+///   behaving as if elements were added one at a time with `add`.
+///
+/// User-defined `struct` types can participate in this interface by providing
+/// an `addHash` templated function. See the associated template specialization
+/// for details.
+///
+/// This interface does not impose requirements on the hasher
+/// `update(ArrayRef<uint8_t> Data)` method. We want to avoid collisions for
+/// variable-size types; for example for
+/// ```
+/// builder.add({1});
+/// builder.add({2, 3});
+/// ```
+/// and
+/// ```
+/// builder.add({1, 2});
+/// builder.add({3});
+/// ```
+/// . Thus, specializations of `add` and `addHash` for variable-size types must
+/// not assume that the hasher type considers the size as part of the hash; they
+/// must explicitly add the size to the hash. See for example specializations
+/// for `ArrayRef` and `StringRef`.
+///
+/// Additionally, since types are eventually forwarded to the hasher's
+/// `void update(ArrayRef<uint8_t>)` method, endianness plays a role in the hash
+/// computation (for example when computing `add((int)123)`).
+/// Specifiying a non-`native` `Endianness` template parameter allows to compute
+/// stable hash across platforms with 
diff erent endianness.
+template <class HasherT, support::endianness Endianness>
+using HashBuilder =
+    HashBuilderImpl<HasherT, (Endianness == support::endianness::native
+                                  ? support::endian::system_endianness()
+                                  : Endianness)>;
+} // end namespace llvm
+
+#endif // LLVM_SUPPORT_HASHBUILDER_H

diff  --git a/llvm/unittests/Support/CMakeLists.txt b/llvm/unittests/Support/CMakeLists.txt
index f1145681389e9..b44d9ee27188d 100644
--- a/llvm/unittests/Support/CMakeLists.txt
+++ b/llvm/unittests/Support/CMakeLists.txt
@@ -39,6 +39,7 @@ add_llvm_unittest(SupportTests
   FormatVariadicTest.cpp
   FSUniqueIDTest.cpp
   GlobPatternTest.cpp
+  HashBuilderTest.cpp
   Host.cpp
   IndexedAccessorTest.cpp
   InstructionCostTest.cpp

diff  --git a/llvm/unittests/Support/HashBuilderTest.cpp b/llvm/unittests/Support/HashBuilderTest.cpp
new file mode 100644
index 0000000000000..48e25558f0781
--- /dev/null
+++ b/llvm/unittests/Support/HashBuilderTest.cpp
@@ -0,0 +1,336 @@
+//===- llvm/unittest/Support/HashBuilderTest.cpp - HashBuilder unit tests -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/HashBuilder.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/Support/MD5.h"
+#include "llvm/Support/SHA1.h"
+#include "llvm/Support/SHA256.h"
+#include "gtest/gtest.h"
+
+#include <list>
+#include <string>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+// gtest utilities and macros rely on using a single type. So wrap both the
+// hasher type and endianness.
+template <typename _HasherT, llvm::support::endianness _Endianness>
+struct HasherTAndEndianness {
+  using HasherT = _HasherT;
+  static constexpr llvm::support::endianness Endianness = _Endianness;
+};
+using HasherTAndEndiannessToTest =
+    ::testing::Types<HasherTAndEndianness<llvm::MD5, llvm::support::big>,
+                     HasherTAndEndianness<llvm::MD5, llvm::support::little>,
+                     HasherTAndEndianness<llvm::MD5, llvm::support::native>,
+                     HasherTAndEndianness<llvm::SHA1, llvm::support::big>,
+                     HasherTAndEndianness<llvm::SHA1, llvm::support::little>,
+                     HasherTAndEndianness<llvm::SHA1, llvm::support::native>,
+                     HasherTAndEndianness<llvm::SHA256, llvm::support::big>,
+                     HasherTAndEndianness<llvm::SHA256, llvm::support::little>,
+                     HasherTAndEndianness<llvm::SHA256, llvm::support::native>>;
+template <typename HasherT> class HashBuilderTest : public testing::Test {};
+TYPED_TEST_SUITE(HashBuilderTest, HasherTAndEndiannessToTest);
+
+template <typename HasherTAndEndianness>
+using HashBuilder = llvm::HashBuilder<typename HasherTAndEndianness::HasherT,
+                                      HasherTAndEndianness::Endianness>;
+
+template <typename HasherTAndEndianness, typename... Ts>
+static std::string hashWithBuilder(const Ts &...Args) {
+  return HashBuilder<HasherTAndEndianness>().add(Args...).final().str();
+}
+
+template <typename HasherTAndEndianness, typename... Ts>
+static std::string hashRangeWithBuilder(const Ts &...Args) {
+  return HashBuilder<HasherTAndEndianness>().addRange(Args...).final().str();
+}
+
+// All the test infrastructure relies on the variadic helpers. Test them first.
+TYPED_TEST(HashBuilderTest, VariadicHelpers) {
+  {
+    HashBuilder<TypeParam> HBuilder;
+
+    HBuilder.add(100);
+    HBuilder.add('c');
+    HBuilder.add("string");
+
+    EXPECT_EQ(HBuilder.final(), hashWithBuilder<TypeParam>(100, 'c', "string"));
+  }
+
+  {
+    HashBuilder<TypeParam> HBuilder;
+
+    std::vector<int> Vec{100, 101, 102};
+    HBuilder.addRange(Vec);
+
+    EXPECT_EQ(HBuilder.final(), hashRangeWithBuilder<TypeParam>(Vec));
+  }
+
+  {
+    HashBuilder<TypeParam> HBuilder;
+
+    std::vector<int> Vec{200, 201, 202};
+    HBuilder.addRange(Vec.begin(), Vec.end());
+
+    EXPECT_EQ(HBuilder.final(),
+              hashRangeWithBuilder<TypeParam>(Vec.begin(), Vec.end()));
+  }
+}
+
+TYPED_TEST(HashBuilderTest, AddRangeElements) {
+  HashBuilder<TypeParam> HBuilder;
+  int Values[] = {1, 2, 3};
+  HBuilder.addRangeElements(llvm::ArrayRef<int>(Values));
+  EXPECT_EQ(HBuilder.final(), hashWithBuilder<TypeParam>(1, 2, 3));
+}
+
+TYPED_TEST(HashBuilderTest, AddHashableData) {
+  using HE = TypeParam;
+
+  auto ByteSwapAndHashWithHasher = [](auto Data) {
+    using H = typename HE::HasherT;
+    constexpr auto E = HE::Endianness;
+    H Hasher;
+    auto SwappedData = llvm::support::endian::byte_swap(Data, E);
+    Hasher.update(llvm::makeArrayRef(
+        reinterpret_cast<const uint8_t *>(&SwappedData), sizeof(Data)));
+    return static_cast<std::string>(Hasher.final());
+  };
+
+  char C = 'c';
+  int32_t I = 0x12345678;
+  uint64_t UI64 = static_cast<uint64_t>(1) << 50;
+  enum TestEnumeration : uint16_t { TE_One = 1, TE_Two = 2 };
+  TestEnumeration Enum = TE_Two;
+
+  EXPECT_EQ(ByteSwapAndHashWithHasher(C), hashWithBuilder<HE>(C));
+  EXPECT_EQ(ByteSwapAndHashWithHasher(I), hashWithBuilder<HE>(I));
+  EXPECT_EQ(ByteSwapAndHashWithHasher(UI64), hashWithBuilder<HE>(UI64));
+  EXPECT_EQ(ByteSwapAndHashWithHasher(Enum), hashWithBuilder<HE>(Enum));
+}
+
+struct SimpleStruct {
+  char C;
+  int I;
+};
+
+template <typename HasherT, llvm::support::endianness Endianness>
+void addHash(llvm::HashBuilderImpl<HasherT, Endianness> &HBuilder,
+             const SimpleStruct &Value) {
+  HBuilder.add(Value.C);
+  HBuilder.add(Value.I);
+}
+
+struct StructWithoutCopyOrMove {
+  int I;
+  StructWithoutCopyOrMove() = default;
+  StructWithoutCopyOrMove(const StructWithoutCopyOrMove &) = delete;
+  StructWithoutCopyOrMove &operator=(const StructWithoutCopyOrMove &) = delete;
+
+  template <typename HasherT, llvm::support::endianness Endianness>
+  friend void addHash(llvm::HashBuilderImpl<HasherT, Endianness> &HBuilder,
+                      const StructWithoutCopyOrMove &Value) {
+    HBuilder.add(Value.I);
+  }
+};
+
+// The struct and associated tests are simplified to avoid failures caused by
+// 
diff erent alignments on 
diff erent platforms.
+struct /* __attribute__((packed)) */ StructWithFastHash {
+  int I;
+  // char C;
+
+  // If possible, we want to hash both `I` and `C` in a single `update`
+  // call for performance concerns.
+  template <typename HasherT, llvm::support::endianness Endianness>
+  friend void addHash(llvm::HashBuilderImpl<HasherT, Endianness> &HBuilder,
+                      const StructWithFastHash &Value) {
+    if (Endianness == llvm::support::endian::system_endianness()) {
+      HBuilder.update(llvm::makeArrayRef(
+          reinterpret_cast<const uint8_t *>(&Value), sizeof(Value)));
+    } else {
+      // Rely on existing `add` methods to handle endianness.
+      HBuilder.add(Value.I);
+      // HBuilder.add(Value.C);
+    }
+  }
+};
+
+struct CustomContainer {
+private:
+  size_t Size;
+  int Elements[100];
+
+public:
+  CustomContainer(size_t Size) : Size(Size) {
+    for (size_t I = 0; I != Size; ++I)
+      Elements[I] = I;
+  }
+  template <typename HasherT, llvm::support::endianness Endianness>
+  friend void addHash(llvm::HashBuilderImpl<HasherT, Endianness> &HBuilder,
+                      const CustomContainer &Value) {
+    if (Endianness == llvm::support::endian::system_endianness()) {
+      HBuilder.update(llvm::makeArrayRef(
+          reinterpret_cast<const uint8_t *>(&Value.Size),
+          sizeof(Value.Size) + Value.Size * sizeof(Value.Elements[0])));
+    } else {
+      HBuilder.addRange(&Value.Elements[0], &Value.Elements[0] + Value.Size);
+    }
+  }
+};
+
+TYPED_TEST(HashBuilderTest, HashUserDefinedStruct) {
+  using HE = TypeParam;
+  EXPECT_EQ(hashWithBuilder<HE>(SimpleStruct{'c', 123}),
+            hashWithBuilder<HE>('c', 123));
+  EXPECT_EQ(hashWithBuilder<HE>(StructWithoutCopyOrMove{1}),
+            hashWithBuilder<HE>(1));
+  EXPECT_EQ(hashWithBuilder<HE>(StructWithFastHash{123}),
+            hashWithBuilder<HE>(123));
+  EXPECT_EQ(hashWithBuilder<HE>(CustomContainer(3)),
+            hashWithBuilder<HE>(static_cast<size_t>(3), 0, 1, 2));
+}
+
+TYPED_TEST(HashBuilderTest, HashArrayRefHashableDataTypes) {
+  using HE = TypeParam;
+  int Values[] = {1, 20, 0x12345678};
+  llvm::ArrayRef<int> Array(Values);
+  EXPECT_NE(hashWithBuilder<HE>(Array), hashWithBuilder<HE>(1, 20, 0x12345678));
+  EXPECT_EQ(hashWithBuilder<HE>(Array),
+            hashRangeWithBuilder<HE>(Array.begin(), Array.end()));
+  EXPECT_EQ(
+      hashWithBuilder<HE>(Array),
+      hashRangeWithBuilder<HE>(Array.data(), Array.data() + Array.size()));
+}
+
+TYPED_TEST(HashBuilderTest, HashArrayRef) {
+  using HE = TypeParam;
+  int Values[] = {1, 2, 3};
+  llvm::ArrayRef<int> Array123(&Values[0], 3);
+  llvm::ArrayRef<int> Array12(&Values[0], 2);
+  llvm::ArrayRef<int> Array1(&Values[0], 1);
+  llvm::ArrayRef<int> Array23(&Values[1], 2);
+  llvm::ArrayRef<int> Array3(&Values[2], 1);
+  llvm::ArrayRef<int> ArrayEmpty(&Values[0], static_cast<size_t>(0));
+
+  auto Hash123andEmpty = hashWithBuilder<HE>(Array123, ArrayEmpty);
+  auto Hash12And3 = hashWithBuilder<HE>(Array12, Array3);
+  auto Hash1And23 = hashWithBuilder<HE>(Array1, Array23);
+  auto HashEmptyAnd123 = hashWithBuilder<HE>(ArrayEmpty, Array123);
+
+  EXPECT_NE(Hash123andEmpty, Hash12And3);
+  EXPECT_NE(Hash123andEmpty, Hash1And23);
+  EXPECT_NE(Hash123andEmpty, HashEmptyAnd123);
+  EXPECT_NE(Hash12And3, Hash1And23);
+  EXPECT_NE(Hash12And3, HashEmptyAnd123);
+  EXPECT_NE(Hash1And23, HashEmptyAnd123);
+}
+
+TYPED_TEST(HashBuilderTest, HashArrayRefNonHashableDataTypes) {
+  using HE = TypeParam;
+  SimpleStruct Values[] = {{'a', 100}, {'b', 200}};
+  llvm::ArrayRef<SimpleStruct> Array(Values);
+  EXPECT_NE(
+      hashWithBuilder<HE>(Array),
+      hashWithBuilder<HE>(SimpleStruct{'a', 100}, SimpleStruct{'b', 200}));
+}
+
+TYPED_TEST(HashBuilderTest, HashStringRef) {
+  using HE = TypeParam;
+  llvm::StringRef SEmpty("");
+  llvm::StringRef S1("1");
+  llvm::StringRef S12("12");
+  llvm::StringRef S123("123");
+  llvm::StringRef S23("23");
+  llvm::StringRef S3("3");
+
+  auto Hash123andEmpty = hashWithBuilder<HE>(S123, SEmpty);
+  auto Hash12And3 = hashWithBuilder<HE>(S12, S3);
+  auto Hash1And23 = hashWithBuilder<HE>(S1, S23);
+  auto HashEmptyAnd123 = hashWithBuilder<HE>(SEmpty, S123);
+
+  EXPECT_NE(Hash123andEmpty, Hash12And3);
+  EXPECT_NE(Hash123andEmpty, Hash1And23);
+  EXPECT_NE(Hash123andEmpty, HashEmptyAnd123);
+  EXPECT_NE(Hash12And3, Hash1And23);
+  EXPECT_NE(Hash12And3, HashEmptyAnd123);
+  EXPECT_NE(Hash1And23, HashEmptyAnd123);
+}
+
+TYPED_TEST(HashBuilderTest, HashStdString) {
+  using HE = TypeParam;
+  EXPECT_EQ(hashWithBuilder<HE>(std::string("123")),
+            hashWithBuilder<HE>(llvm::StringRef("123")));
+}
+
+TYPED_TEST(HashBuilderTest, HashStdPair) {
+  using HE = TypeParam;
+  EXPECT_EQ(hashWithBuilder<HE>(std::make_pair(1, "string")),
+            hashWithBuilder<HE>(1, "string"));
+
+  std::pair<StructWithoutCopyOrMove, std::string> Pair;
+  Pair.first.I = 1;
+  Pair.second = "string";
+  EXPECT_EQ(hashWithBuilder<HE>(Pair), hashWithBuilder<HE>(1, "string"));
+}
+
+TYPED_TEST(HashBuilderTest, HashStdTuple) {
+  using HE = TypeParam;
+
+  EXPECT_EQ(hashWithBuilder<HE>(std::make_tuple(1)), hashWithBuilder<HE>(1));
+  EXPECT_EQ(hashWithBuilder<HE>(std::make_tuple(2ULL)),
+            hashWithBuilder<HE>(2ULL));
+  EXPECT_EQ(hashWithBuilder<HE>(std::make_tuple("three")),
+            hashWithBuilder<HE>("three"));
+  EXPECT_EQ(hashWithBuilder<HE>(std::make_tuple(1, 2ULL)),
+            hashWithBuilder<HE>(1, 2ULL));
+  EXPECT_EQ(hashWithBuilder<HE>(std::make_tuple(1, 2ULL, "three")),
+            hashWithBuilder<HE>(1, 2ULL, "three"));
+
+  std::tuple<StructWithoutCopyOrMove, std::string> Tuple;
+  std::get<0>(Tuple).I = 1;
+  std::get<1>(Tuple) = "two";
+
+  EXPECT_EQ(hashWithBuilder<HE>(Tuple), hashWithBuilder<HE>(1, "two"));
+}
+
+TYPED_TEST(HashBuilderTest, HashRangeWithForwardIterator) {
+  using HE = TypeParam;
+  std::list<int> List;
+  List.push_back(1);
+  List.push_back(2);
+  List.push_back(3);
+  EXPECT_NE(hashRangeWithBuilder<HE>(List), hashWithBuilder<HE>(1, 2, 3));
+}
+
+TEST(CustomHasher, CustomHasher) {
+  struct SumHash {
+    explicit SumHash(uint8_t Seed1, uint8_t Seed2) : Hash(Seed1 + Seed2) {}
+    void update(llvm::ArrayRef<uint8_t> Data) {
+      for (uint8_t C : Data)
+        Hash += C;
+    }
+    uint8_t Hash;
+  };
+
+  {
+    llvm::HashBuilder<SumHash, llvm::support::endianness::little> HBuilder(0,
+                                                                           1);
+    EXPECT_EQ(HBuilder.add(0x02, 0x03, 0x400).getHasher().Hash, 0xa);
+  }
+  {
+    llvm::HashBuilder<SumHash, llvm::support::endianness::little> HBuilder(2,
+                                                                           3);
+    EXPECT_EQ(HBuilder.add("ab", 'c').getHasher().Hash,
+              static_cast<uint8_t>(/*seeds*/ 2 + 3 + /*range size*/ 2 +
+                                   /*characters*/ 'a' + 'b' + 'c'));
+  }
+}


        


More information about the llvm-commits mailing list