[llvm] 64da0be - Reland "[NFCi][MergeFunctions] Consolidate Hashing Functions"

Sat Aug 19 17:16:19 PDT 2023

Author: Aiden Grossman
Date: 2023-08-19T17:14:08-07:00
New Revision: 64da0be1fc06ee2199bd27c980736986e0eccd9d

URL: https://github.com/llvm/llvm-project/commit/64da0be1fc06ee2199bd27c980736986e0eccd9d
DIFF: https://github.com/llvm/llvm-project/commit/64da0be1fc06ee2199bd27c980736986e0eccd9d.diff

LOG: Reland "[NFCi][MergeFunctions] Consolidate Hashing Functions"

This is a reland of 28134a29fdedd8972acdfb39223571ddcc15dc59 which was
reverted due to behavioral differences between 32 and 64 bit builds that
have since been fixed.

Differential Revision: https://reviews.llvm.org/D158217

Added: 
    

Modified: 
    llvm/include/llvm/IR/StructuralHash.h
    llvm/include/llvm/Transforms/Utils/FunctionComparator.h
    llvm/lib/IR/StructuralHash.cpp
    llvm/lib/Transforms/IPO/MergeFunctions.cpp
    llvm/lib/Transforms/Utils/FunctionComparator.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/StructuralHash.h b/llvm/include/llvm/IR/StructuralHash.h
index 1bdeb85afa3c50..8739c599a7ac33 100644

--- a/llvm/include/llvm/IR/StructuralHash.h
+++ b/llvm/include/llvm/IR/StructuralHash.h
@@ -21,8 +21,10 @@ namespace llvm {
 class Function;
 class Module;
 
-uint64_t StructuralHash(const Function &F);
-uint64_t StructuralHash(const Module &M);
+using IRHash = uint64_t;
+
+IRHash StructuralHash(const Function &F);
+IRHash StructuralHash(const Module &M);
 
 } // end namespace llvm
 

diff  --git a/llvm/include/llvm/Transforms/Utils/FunctionComparator.h b/llvm/include/llvm/Transforms/Utils/FunctionComparator.h
index 78761fc78fee8c..c28f868039a1f7 100644
--- a/llvm/include/llvm/Transforms/Utils/FunctionComparator.h
+++ b/llvm/include/llvm/Transforms/Utils/FunctionComparator.h
@@ -99,11 +99,6 @@ class FunctionComparator {
   /// Test whether the two functions have equivalent behaviour.
   int compare();
 
-  /// Hash a function. Equivalent functions will have the same hash, and unequal
-  /// functions will have 
diff erent hashes with high probability.
-  using FunctionHash = uint64_t;
-  static FunctionHash functionHash(Function &);
-
 protected:
   /// Start the comparison.
   void beginCompare() {

diff  --git a/llvm/lib/IR/StructuralHash.cpp b/llvm/lib/IR/StructuralHash.cpp
index 6ea108d831a165..13cd77151adf4a 100644
--- a/llvm/lib/IR/StructuralHash.cpp
+++ b/llvm/lib/IR/StructuralHash.cpp
@@ -20,19 +20,35 @@ namespace {
 // llvm/lib/Transforms/Utils/FunctionComparator.cpp
 
 class StructuralHashImpl {
-  hash_code Hash;
+  uint64_t Hash;
 
-  template <typename T> void hash(const T &V) { Hash = hash_combine(Hash, V); }
+  void hash(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); }
 
 public:
   StructuralHashImpl() : Hash(4) {}
 
+  // A function hash is calculated by considering only the number of arguments
+  // and whether a function is varargs, the order of basic blocks (given by the
+  // successors of each basic block in depth first order), and the order of
+  // opcodes of each instruction within each of these basic blocks. This mirrors
+  // the strategy FunctionComparator::compare() uses to compare functions by
+  // walking the BBs in depth first order and comparing each instruction in
+  // sequence. Because this hash currently does not look at the operands, it is
+  // insensitive to things such as the target of calls and the constants used in
+  // the function, which makes it useful when possibly merging functions which
+  // are the same modulo constants and call targets.
+  //
+  // Note that 
diff erent users of StructuralHash will want 
diff erent behavior
+  // out of it (i.e., MergeFunctions will want something 
diff erent from PM
+  // expensive checks for pass modification status). When modifying this
+  // function, most changes should be gated behind an option and enabled
+  // selectively.
   void update(const Function &F) {
     // Declarations don't affect analyses.
     if (F.isDeclaration())
       return;
 
-    hash(12345); // Function header
+    hash(0x62642d6b6b2d6b72); // Function header
 
     hash(F.isVarArg());
     hash(F.arg_size());
@@ -40,11 +56,18 @@ class StructuralHashImpl {
     SmallVector<const BasicBlock *, 8> BBs;
     SmallPtrSet<const BasicBlock *, 16> VisitedBBs;
 
+    // Walk the blocks in the same order as
+    // FunctionComparator::cmpBasicBlocks(), accumulating the hash of the
+    // function "structure." (BB and opcode sequence)
     BBs.push_back(&F.getEntryBlock());
     VisitedBBs.insert(BBs[0]);
     while (!BBs.empty()) {
       const BasicBlock *BB = BBs.pop_back_val();
-      hash(45798); // Block header
+
+      // This random value acts as a block header, as otherwise the partition of
+      // opcodes into BBs wouldn't affect the hash, only the order of the
+      // opcodes
+      hash(45798);
       for (auto &Inst : *BB)
         hash(Inst.getOpcode());
 
@@ -79,13 +102,13 @@ class StructuralHashImpl {
 
 } // namespace
 
-uint64_t llvm::StructuralHash(const Function &F) {
+IRHash llvm::StructuralHash(const Function &F) {
   StructuralHashImpl H;
   H.update(F);
   return H.getHash();
 }
 
-uint64_t llvm::StructuralHash(const Module &M) {
+IRHash llvm::StructuralHash(const Module &M) {
   StructuralHashImpl H;
   H.update(M);
   return H.getHash();

diff  --git a/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/llvm/lib/Transforms/IPO/MergeFunctions.cpp
index feda5d6459cb47..4a64580b3cb329 100644
--- a/llvm/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/llvm/lib/Transforms/IPO/MergeFunctions.cpp
@@ -107,6 +107,7 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/StructuralHash.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Use.h"
 #include "llvm/IR/User.h"
@@ -171,15 +172,14 @@ namespace {
 
 class FunctionNode {
   mutable AssertingVH<Function> F;
-  FunctionComparator::FunctionHash Hash;
+  IRHash Hash;
 
 public:
   // Note the hash is recalculated potentially multiple times, but it is cheap.
-  FunctionNode(Function *F)
-    : F(F), Hash(FunctionComparator::functionHash(*F))  {}
+  FunctionNode(Function *F) : F(F), Hash(StructuralHash(*F)) {}
 
   Function *getFunc() const { return F; }
-  FunctionComparator::FunctionHash getHash() const { return Hash; }
+  IRHash getHash() const { return Hash; }
 
   /// Replace the reference to the function F by the function G, assuming their
   /// implementations are equal.
@@ -390,11 +390,10 @@ bool MergeFunctions::runOnModule(Module &M) {
 
   // All functions in the module, ordered by hash. Functions with a unique
   // hash value are easily eliminated.
-  std::vector<std::pair<FunctionComparator::FunctionHash, Function *>>
-    HashedFuncs;
+  std::vector<std::pair<IRHash, Function *>> HashedFuncs;
   for (Function &Func : M) {
     if (isEligibleForMerging(Func)) {
-      HashedFuncs.push_back({FunctionComparator::functionHash(Func), &Func});
+      HashedFuncs.push_back({StructuralHash(Func), &Func});
     }
   }
 

diff  --git a/llvm/lib/Transforms/Utils/FunctionComparator.cpp b/llvm/lib/Transforms/Utils/FunctionComparator.cpp
index 8daeb92130ba3c..b1d74f67377e27 100644
--- a/llvm/lib/Transforms/Utils/FunctionComparator.cpp
+++ b/llvm/lib/Transforms/Utils/FunctionComparator.cpp
@@ -958,67 +958,3 @@ int FunctionComparator::compare() {
   }
   return 0;
 }
-
-namespace {
-
-// Accumulate the hash of a sequence of 64-bit integers. This is similar to a
-// hash of a sequence of 64bit ints, but the entire input does not need to be
-// available at once. This interface is necessary for functionHash because it
-// needs to accumulate the hash as the structure of the function is traversed
-// without saving these values to an intermediate buffer. This form of hashing
-// is not often needed, as usually the object to hash is just read from a
-// buffer.
-class HashAccumulator64 {
-  uint64_t Hash;
-
-public:
-  // Initialize to random constant, so the state isn't zero.
-  HashAccumulator64() { Hash = 0x6acaa36bef8325c5ULL; }
-
-  void add(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); }
-
-  // No finishing is required, because the entire hash value is used.
-  uint64_t getHash() { return Hash; }
-};
-
-} // end anonymous namespace
-
-// A function hash is calculated by considering only the number of arguments and
-// whether a function is varargs, the order of basic blocks (given by the
-// successors of each basic block in depth first order), and the order of
-// opcodes of each instruction within each of these basic blocks. This mirrors
-// the strategy compare() uses to compare functions by walking the BBs in depth
-// first order and comparing each instruction in sequence. Because this hash
-// does not look at the operands, it is insensitive to things such as the
-// target of calls and the constants used in the function, which makes it useful
-// when possibly merging functions which are the same modulo constants and call
-// targets.
-FunctionComparator::FunctionHash FunctionComparator::functionHash(Function &F) {
-  HashAccumulator64 H;
-  H.add(F.isVarArg());
-  H.add(F.arg_size());
-
-  SmallVector<const BasicBlock *, 8> BBs;
-  SmallPtrSet<const BasicBlock *, 16> VisitedBBs;
-
-  // Walk the blocks in the same order as FunctionComparator::cmpBasicBlocks(),
-  // accumulating the hash of the function "structure." (BB and opcode sequence)
-  BBs.push_back(&F.getEntryBlock());
-  VisitedBBs.insert(BBs[0]);
-  while (!BBs.empty()) {
-    const BasicBlock *BB = BBs.pop_back_val();
-    // This random value acts as a block header, as otherwise the partition of
-    // opcodes into BBs wouldn't affect the hash, only the order of the opcodes
-    H.add(45798);
-    for (const auto &Inst : *BB) {
-      H.add(Inst.getOpcode());
-    }
-    const Instruction *Term = BB->getTerminator();
-    for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
-      if (!VisitedBBs.insert(Term->getSuccessor(i)).second)
-        continue;
-      BBs.push_back(Term->getSuccessor(i));
-    }
-  }
-  return H.getHash();
-}