[llvm] 7615c0b - [StableHash] Implement with xxh3_64bits (#105849)

via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 23 21:53:47 PDT 2024


Author: Kyungwoo Lee
Date: 2024-08-23T21:53:43-07:00
New Revision: 7615c0b2eb52b8c5d8e6dfc7f265a87a7a9f3ef5

URL: https://github.com/llvm/llvm-project/commit/7615c0b2eb52b8c5d8e6dfc7f265a87a7a9f3ef5
DIFF: https://github.com/llvm/llvm-project/commit/7615c0b2eb52b8c5d8e6dfc7f265a87a7a9f3ef5.diff

LOG: [StableHash] Implement with xxh3_64bits (#105849)

This is a follow-up to address a suggestion from
https://github.com/llvm/llvm-project/pull/105619.
The main goal of this change is to efficiently implement stable hash
functions using the xxh3 64bits API.
`stable_hash_combine_range` and `stable_hash_combine_array` functions
are removed and consolidated into a more general `stable_hash_combine`
function that takes an `ArrayRef<stable_hash>` as input.

Added: 
    

Modified: 
    llvm/include/llvm/ADT/StableHashing.h
    llvm/lib/CodeGen/MachineOperand.cpp
    llvm/lib/CodeGen/MachineStableHash.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/ADT/StableHashing.h b/llvm/include/llvm/ADT/StableHashing.h
index f675f828f702e5..7778f5d7c3a1c3 100644
--- a/llvm/include/llvm/ADT/StableHashing.h
+++ b/llvm/include/llvm/ADT/StableHashing.h
@@ -8,7 +8,10 @@
 //
 // This file provides types and functions for computing and combining stable
 // hashes. Stable hashes can be useful for hashing across 
diff erent modules,
-// processes, or compiler runs.
+// processes, machines, or compiler runs for a specific compiler version. It
+// currently employs the xxh3_64bits hashing algorithm. Be aware that this
+// implementation may be adjusted or updated as improvements to the compiler are
+// made.
 //
 //===----------------------------------------------------------------------===//
 
@@ -16,6 +19,7 @@
 #define LLVM_ADT_STABLEHASHING_H
 
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/xxhash.h"
 
 namespace llvm {
 
@@ -23,78 +27,29 @@ namespace llvm {
 /// deserialized, and is stable across processes and executions.
 using stable_hash = uint64_t;
 
-// Implementation details
-namespace hashing {
-namespace detail {
-
-// Stable hashes are based on the 64-bit FNV-1 hash:
-// https://en.wikipedia.org/wiki/Fowler-Noll-Vo_hash_function
-
-const uint64_t FNV_PRIME_64 = 1099511628211u;
-const uint64_t FNV_OFFSET_64 = 14695981039346656037u;
-
-inline void stable_hash_append(stable_hash &Hash, const char Value) {
-  Hash = Hash ^ (Value & 0xFF);
-  Hash = Hash * FNV_PRIME_64;
-}
-
-inline void stable_hash_append(stable_hash &Hash, stable_hash Value) {
-  for (unsigned I = 0; I < 8; ++I) {
-    stable_hash_append(Hash, static_cast<char>(Value));
-    Value >>= 8;
-  }
+inline stable_hash stable_hash_combine(ArrayRef<stable_hash> Buffer) {
+  const uint8_t *Ptr = reinterpret_cast<const uint8_t *>(Buffer.data());
+  size_t Size = Buffer.size() * sizeof(stable_hash);
+  return xxh3_64bits(ArrayRef<uint8_t>(Ptr, Size));
 }
 
-} // namespace detail
-} // namespace hashing
-
 inline stable_hash stable_hash_combine(stable_hash A, stable_hash B) {
-  stable_hash Hash = hashing::detail::FNV_OFFSET_64;
-  hashing::detail::stable_hash_append(Hash, A);
-  hashing::detail::stable_hash_append(Hash, B);
-  return Hash;
+  stable_hash Hashes[2] = {A, B};
+  return stable_hash_combine(Hashes);
 }
 
 inline stable_hash stable_hash_combine(stable_hash A, stable_hash B,
                                        stable_hash C) {
-  stable_hash Hash = hashing::detail::FNV_OFFSET_64;
-  hashing::detail::stable_hash_append(Hash, A);
-  hashing::detail::stable_hash_append(Hash, B);
-  hashing::detail::stable_hash_append(Hash, C);
-  return Hash;
+  stable_hash Hashes[3] = {A, B, C};
+  return stable_hash_combine(Hashes);
 }
 
 inline stable_hash stable_hash_combine(stable_hash A, stable_hash B,
                                        stable_hash C, stable_hash D) {
-  stable_hash Hash = hashing::detail::FNV_OFFSET_64;
-  hashing::detail::stable_hash_append(Hash, A);
-  hashing::detail::stable_hash_append(Hash, B);
-  hashing::detail::stable_hash_append(Hash, C);
-  hashing::detail::stable_hash_append(Hash, D);
-  return Hash;
-}
-
-/// Compute a stable_hash for a sequence of values.
-///
-/// This hashes a sequence of values. It produces the same stable_hash as
-/// 'stable_hash_combine(a, b, c, ...)', but can run over arbitrary sized
-/// sequences and is significantly faster given pointers and types which
-/// can be hashed as a sequence of bytes.
-template <typename InputIteratorT>
-stable_hash stable_hash_combine_range(InputIteratorT First,
-                                      InputIteratorT Last) {
-  stable_hash Hash = hashing::detail::FNV_OFFSET_64;
-  for (auto I = First; I != Last; ++I)
-    hashing::detail::stable_hash_append(Hash, *I);
-  return Hash;
+  stable_hash Hashes[4] = {A, B, C, D};
+  return stable_hash_combine(Hashes);
 }
 
-inline stable_hash stable_hash_combine_array(const stable_hash *P, size_t C) {
-  stable_hash Hash = hashing::detail::FNV_OFFSET_64;
-  for (size_t I = 0; I < C; ++I)
-    hashing::detail::stable_hash_append(Hash, P[I]);
-  return Hash;
-}
 } // namespace llvm
 
 #endif

diff  --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp
index ace05902d5df79..a0726ca64910ea 100644
--- a/llvm/lib/CodeGen/MachineOperand.cpp
+++ b/llvm/lib/CodeGen/MachineOperand.cpp
@@ -424,8 +424,7 @@ hash_code llvm::hash_value(const MachineOperand &MO) {
       const uint32_t *RegMask = MO.getRegMask();
       std::vector<stable_hash> RegMaskHashes(RegMask, RegMask + RegMaskSize);
       return hash_combine(MO.getType(), MO.getTargetFlags(),
-                          stable_hash_combine_array(RegMaskHashes.data(),
-                                                    RegMaskHashes.size()));
+                          stable_hash_combine(RegMaskHashes));
     }
 
     assert(0 && "MachineOperand not associated with any MachineFunction");

diff  --git a/llvm/lib/CodeGen/MachineStableHash.cpp b/llvm/lib/CodeGen/MachineStableHash.cpp
index fb5e9a37d9b997..916acbf2d2cbf9 100644
--- a/llvm/lib/CodeGen/MachineStableHash.cpp
+++ b/llvm/lib/CodeGen/MachineStableHash.cpp
@@ -66,7 +66,7 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {
       SmallVector<stable_hash> DefOpcodes;
       for (auto &Def : MRI.def_instructions(MO.getReg()))
         DefOpcodes.push_back(Def.getOpcode());
-      return stable_hash_combine_range(DefOpcodes.begin(), DefOpcodes.end());
+      return stable_hash_combine(DefOpcodes);
     }
 
     // Register operands don't have target flags.
@@ -78,8 +78,8 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {
   case MachineOperand::MO_FPImmediate: {
     auto Val = MO.isCImm() ? MO.getCImm()->getValue()
                            : MO.getFPImm()->getValueAPF().bitcastToAPInt();
-    auto ValHash =
-        stable_hash_combine_array(Val.getRawData(), Val.getNumWords());
+    auto ValHash = stable_hash_combine(
+        ArrayRef<stable_hash>(Val.getRawData(), Val.getNumWords()));
     return stable_hash_combine(MO.getType(), MO.getTargetFlags(), ValHash);
   }
 
@@ -126,10 +126,8 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {
           const uint32_t *RegMask = MO.getRegMask();
           std::vector<llvm::stable_hash> RegMaskHashes(RegMask,
                                                        RegMask + RegMaskSize);
-          return stable_hash_combine(
-              MO.getType(), MO.getTargetFlags(),
-              stable_hash_combine_array(RegMaskHashes.data(),
-                                        RegMaskHashes.size()));
+          return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
+                                     stable_hash_combine(RegMaskHashes));
         }
       }
     }
@@ -145,10 +143,8 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {
         MO.getShuffleMask(), std::back_inserter(ShuffleMaskHashes),
         [](int S) -> llvm::stable_hash { return llvm::stable_hash(S); });
 
-    return stable_hash_combine(
-        MO.getType(), MO.getTargetFlags(),
-        stable_hash_combine_array(ShuffleMaskHashes.data(),
-                                  ShuffleMaskHashes.size()));
+    return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
+                               stable_hash_combine(ShuffleMaskHashes));
   }
   case MachineOperand::MO_MCSymbol: {
     auto SymbolName = MO.getMCSymbol()->getName();
@@ -212,8 +208,7 @@ stable_hash llvm::stableHashValue(const MachineInstr &MI, bool HashVRegs,
     HashComponents.push_back(static_cast<unsigned>(Op->getFailureOrdering()));
   }
 
-  return stable_hash_combine_range(HashComponents.begin(),
-                                   HashComponents.end());
+  return stable_hash_combine(HashComponents);
 }
 
 stable_hash llvm::stableHashValue(const MachineBasicBlock &MBB) {
@@ -221,8 +216,7 @@ stable_hash llvm::stableHashValue(const MachineBasicBlock &MBB) {
   // TODO: Hash more stuff like block alignment and branch probabilities.
   for (const auto &MI : MBB)
     HashComponents.push_back(stableHashValue(MI));
-  return stable_hash_combine_range(HashComponents.begin(),
-                                   HashComponents.end());
+  return stable_hash_combine(HashComponents);
 }
 
 stable_hash llvm::stableHashValue(const MachineFunction &MF) {
@@ -230,6 +224,5 @@ stable_hash llvm::stableHashValue(const MachineFunction &MF) {
   // TODO: Hash lots more stuff like function alignment and stack objects.
   for (const auto &MBB : MF)
     HashComponents.push_back(stableHashValue(MBB));
-  return stable_hash_combine_range(HashComponents.begin(),
-                                   HashComponents.end());
+  return stable_hash_combine(HashComponents);
 }


        


More information about the llvm-commits mailing list