[llvm] 98ca9a6 - Reland [StructuralHash] Refactor (#112621)

Kyungwoo Lee via llvm-commits llvm-commits at lists.llvm.org
Sat Oct 26 12:13:56 PDT 2024


Author: Kyungwoo Lee
Date: 2024-10-26T12:07:57-07:00
New Revision: 98ca9a635bd2fb98cee473a9558687a5b522e219

URL: https://github.com/llvm/llvm-project/commit/98ca9a635bd2fb98cee473a9558687a5b522e219
DIFF: https://github.com/llvm/llvm-project/commit/98ca9a635bd2fb98cee473a9558687a5b522e219.diff

LOG: Reland [StructuralHash] Refactor (#112621)

This is largely NFC, and it prepares for #112638.
 - Use stable_hash instead of uint64_t
 - Rename update* to hash* functions. They compute stable_hash locally and return it.

This is a patch for
https://discourse.llvm.org/t/rfc-global-function-merging/82608.

Added: 
    

Modified: 
    llvm/include/llvm/IR/StructuralHash.h
    llvm/lib/IR/StructuralHash.cpp
    llvm/lib/Transforms/IPO/MergeFunctions.cpp
    llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges-attr.ll
    llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll
    llvm/test/Transforms/MergeFunc/inline-asm.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/StructuralHash.h b/llvm/include/llvm/IR/StructuralHash.h
index 57fb45db849110..e2e192cc9501b3 100644
--- a/llvm/include/llvm/IR/StructuralHash.h
+++ b/llvm/include/llvm/IR/StructuralHash.h
@@ -14,6 +14,7 @@
 #ifndef LLVM_IR_STRUCTURALHASH_H
 #define LLVM_IR_STRUCTURALHASH_H
 
+#include "llvm/ADT/StableHashing.h"
 #include <cstdint>
 
 namespace llvm {
@@ -21,20 +22,18 @@ namespace llvm {
 class Function;
 class Module;
 
-using IRHash = uint64_t;
-
 /// Returns a hash of the function \p F.
 /// \param F The function to hash.
 /// \param DetailedHash Whether or not to encode additional information in the
 /// hash. The additional information added into the hash when this flag is set
 /// to true includes instruction and operand type information.
-IRHash StructuralHash(const Function &F, bool DetailedHash = false);
+stable_hash StructuralHash(const Function &F, bool DetailedHash = false);
 
 /// Returns a hash of the module \p M by hashing all functions and global
 /// variables contained within. \param M The module to hash. \param DetailedHash
 /// Whether or not to encode additional information in the function hashes that
 /// composed the module hash.
-IRHash StructuralHash(const Module &M, bool DetailedHash = false);
+stable_hash StructuralHash(const Module &M, bool DetailedHash = false);
 
 } // end namespace llvm
 

diff  --git a/llvm/lib/IR/StructuralHash.cpp b/llvm/lib/IR/StructuralHash.cpp
index fb4f33a021a96b..267a085c5af705 100644
--- a/llvm/lib/IR/StructuralHash.cpp
+++ b/llvm/lib/IR/StructuralHash.cpp
@@ -24,61 +24,93 @@ namespace {
 // by the MergeFunctions pass.
 
 class StructuralHashImpl {
-  uint64_t Hash = 4;
+  stable_hash Hash = 4;
 
-  void hash(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); }
+  bool DetailedHash;
+
+  // This random value acts as a block header, as otherwise the partition of
+  // opcodes into BBs wouldn't affect the hash, only the order of the opcodes.
+  static constexpr stable_hash BlockHeaderHash = 45798;
+  static constexpr stable_hash FunctionHeaderHash = 0x62642d6b6b2d6b72;
+  static constexpr stable_hash GlobalHeaderHash = 23456;
 
   // This will produce 
diff erent values on 32-bit and 64-bit systens as
   // hash_combine returns a size_t. However, this is only used for
   // detailed hashing which, in-tree, only needs to distinguish between
   // 
diff erences in functions.
-  template <typename T> void hashArbitaryType(const T &V) {
-    hash(hash_combine(V));
+  // TODO: This is not stable.
+  template <typename T> stable_hash hashArbitaryType(const T &V) {
+    return hash_combine(V);
   }
 
-  void hashType(Type *ValueType) {
-    hash(ValueType->getTypeID());
+  stable_hash hashType(Type *ValueType) {
+    SmallVector<stable_hash> Hashes;
+    Hashes.emplace_back(ValueType->getTypeID());
     if (ValueType->isIntegerTy())
-      hash(ValueType->getIntegerBitWidth());
+      Hashes.emplace_back(ValueType->getIntegerBitWidth());
+    return stable_hash_combine(Hashes);
   }
 
 public:
-  StructuralHashImpl() = default;
-
-  void updateOperand(Value *Operand) {
-    hashType(Operand->getType());
-
-    // The cases enumerated below are not exhaustive and are only aimed to
-    // get decent coverage over the function.
-    if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(Operand)) {
-      hashArbitaryType(ConstInt->getValue());
-    } else if (ConstantFP *ConstFP = dyn_cast<ConstantFP>(Operand)) {
-      hashArbitaryType(ConstFP->getValue());
-    } else if (Argument *Arg = dyn_cast<Argument>(Operand)) {
-      hash(Arg->getArgNo());
-    } else if (Function *Func = dyn_cast<Function>(Operand)) {
+  StructuralHashImpl() = delete;
+  explicit StructuralHashImpl(bool DetailedHash) : DetailedHash(DetailedHash) {}
+
+  stable_hash hashConstant(Constant *C) {
+    SmallVector<stable_hash> Hashes;
+    // TODO: hashArbitaryType() is not stable.
+    if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(C)) {
+      Hashes.emplace_back(hashArbitaryType(ConstInt->getValue()));
+    } else if (ConstantFP *ConstFP = dyn_cast<ConstantFP>(C)) {
+      Hashes.emplace_back(hashArbitaryType(ConstFP->getValue()));
+    } else if (Function *Func = dyn_cast<Function>(C)) {
       // Hashing the name will be deterministic as LLVM's hashing infrastructure
       // has explicit support for hashing strings and will not simply hash
       // the pointer.
-      hashArbitaryType(Func->getName());
+      Hashes.emplace_back(hashArbitaryType(Func->getName()));
     }
+
+    return stable_hash_combine(Hashes);
+  }
+
+  stable_hash hashValue(Value *V) {
+    // Check constant and return its hash.
+    Constant *C = dyn_cast<Constant>(V);
+    if (C)
+      return hashConstant(C);
+
+    // Hash argument number.
+    SmallVector<stable_hash> Hashes;
+    if (Argument *Arg = dyn_cast<Argument>(V))
+      Hashes.emplace_back(Arg->getArgNo());
+
+    return stable_hash_combine(Hashes);
   }
 
-  void updateInstruction(const Instruction &Inst, bool DetailedHash) {
-    hash(Inst.getOpcode());
+  stable_hash hashOperand(Value *Operand) {
+    SmallVector<stable_hash> Hashes;
+    Hashes.emplace_back(hashType(Operand->getType()));
+    Hashes.emplace_back(hashValue(Operand));
+    return stable_hash_combine(Hashes);
+  }
+
+  stable_hash hashInstruction(const Instruction &Inst) {
+    SmallVector<stable_hash> Hashes;
+    Hashes.emplace_back(Inst.getOpcode());
 
     if (!DetailedHash)
-      return;
+      return stable_hash_combine(Hashes);
 
-    hashType(Inst.getType());
+    Hashes.emplace_back(hashType(Inst.getType()));
 
     // Handle additional properties of specific instructions that cause
     // semantic 
diff erences in the IR.
     if (const auto *ComparisonInstruction = dyn_cast<CmpInst>(&Inst))
-      hash(ComparisonInstruction->getPredicate());
+      Hashes.emplace_back(ComparisonInstruction->getPredicate());
 
     for (const auto &Op : Inst.operands())
-      updateOperand(Op);
+      Hashes.emplace_back(hashOperand(Op));
+
+    return stable_hash_combine(Hashes);
   }
 
   // A function hash is calculated by considering only the number of arguments
@@ -97,15 +129,17 @@ class StructuralHashImpl {
   // expensive checks for pass modification status). When modifying this
   // function, most changes should be gated behind an option and enabled
   // selectively.
-  void update(const Function &F, bool DetailedHash) {
+  void update(const Function &F) {
     // Declarations don't affect analyses.
     if (F.isDeclaration())
       return;
 
-    hash(0x62642d6b6b2d6b72); // Function header
+    SmallVector<stable_hash> Hashes;
+    Hashes.emplace_back(Hash);
+    Hashes.emplace_back(FunctionHeaderHash);
 
-    hash(F.isVarArg());
-    hash(F.arg_size());
+    Hashes.emplace_back(F.isVarArg());
+    Hashes.emplace_back(F.arg_size());
 
     SmallVector<const BasicBlock *, 8> BBs;
     SmallPtrSet<const BasicBlock *, 16> VisitedBBs;
@@ -118,17 +152,17 @@ class StructuralHashImpl {
     while (!BBs.empty()) {
       const BasicBlock *BB = BBs.pop_back_val();
 
-      // This random value acts as a block header, as otherwise the partition of
-      // opcodes into BBs wouldn't affect the hash, only the order of the
-      // opcodes
-      hash(45798);
+      Hashes.emplace_back(BlockHeaderHash);
       for (auto &Inst : *BB)
-        updateInstruction(Inst, DetailedHash);
+        Hashes.emplace_back(hashInstruction(Inst));
 
       for (const BasicBlock *Succ : successors(BB))
         if (VisitedBBs.insert(Succ).second)
           BBs.push_back(Succ);
     }
+
+    // Update the combined hash in place.
+    Hash = stable_hash_combine(Hashes);
   }
 
   void update(const GlobalVariable &GV) {
@@ -137,15 +171,20 @@ class StructuralHashImpl {
     // we ignore anything with the `.llvm` prefix
     if (GV.isDeclaration() || GV.getName().starts_with("llvm."))
       return;
-    hash(23456); // Global header
-    hash(GV.getValueType()->getTypeID());
+    SmallVector<stable_hash> Hashes;
+    Hashes.emplace_back(Hash);
+    Hashes.emplace_back(GlobalHeaderHash);
+    Hashes.emplace_back(GV.getValueType()->getTypeID());
+
+    // Update the combined hash in place.
+    Hash = stable_hash_combine(Hashes);
   }
 
-  void update(const Module &M, bool DetailedHash) {
+  void update(const Module &M) {
     for (const GlobalVariable &GV : M.globals())
       update(GV);
     for (const Function &F : M)
-      update(F, DetailedHash);
+      update(F);
   }
 
   uint64_t getHash() const { return Hash; }
@@ -153,14 +192,14 @@ class StructuralHashImpl {
 
 } // namespace
 
-IRHash llvm::StructuralHash(const Function &F, bool DetailedHash) {
-  StructuralHashImpl H;
-  H.update(F, DetailedHash);
+stable_hash llvm::StructuralHash(const Function &F, bool DetailedHash) {
+  StructuralHashImpl H(DetailedHash);
+  H.update(F);
   return H.getHash();
 }
 
-IRHash llvm::StructuralHash(const Module &M, bool DetailedHash) {
-  StructuralHashImpl H;
-  H.update(M, DetailedHash);
+stable_hash llvm::StructuralHash(const Module &M, bool DetailedHash) {
+  StructuralHashImpl H(DetailedHash);
+  H.update(M);
   return H.getHash();
 }

diff  --git a/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/llvm/lib/Transforms/IPO/MergeFunctions.cpp
index b50a700e09038f..ad16b0b3501495 100644
--- a/llvm/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/llvm/lib/Transforms/IPO/MergeFunctions.cpp
@@ -172,14 +172,14 @@ namespace {
 
 class FunctionNode {
   mutable AssertingVH<Function> F;
-  IRHash Hash;
+  stable_hash Hash;
 
 public:
   // Note the hash is recalculated potentially multiple times, but it is cheap.
   FunctionNode(Function *F) : F(F), Hash(StructuralHash(*F)) {}
 
   Function *getFunc() const { return F; }
-  IRHash getHash() const { return Hash; }
+  stable_hash getHash() const { return Hash; }
 
   /// Replace the reference to the function F by the function G, assuming their
   /// implementations are equal.
@@ -420,7 +420,7 @@ bool MergeFunctions::runOnModule(Module &M) {
 
   // All functions in the module, ordered by hash. Functions with a unique
   // hash value are easily eliminated.
-  std::vector<std::pair<IRHash, Function *>> HashedFuncs;
+  std::vector<std::pair<stable_hash, Function *>> HashedFuncs;
   for (Function &Func : M) {
     if (isEligibleForMerging(Func)) {
       HashedFuncs.push_back({StructuralHash(Func), &Func});

diff  --git a/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges-attr.ll b/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges-attr.ll
index e5d62319bf9db7..cbf14165548ec5 100644
--- a/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges-attr.ll
+++ b/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges-attr.ll
@@ -80,8 +80,8 @@ lpad:
 }
 
 define i8 @invoke_with_same_range() personality ptr undef {
-; CHECK-LABEL: @invoke_with_same_range()
-; CHECK: tail call i8 @invoke_with_range()
+; CHECK-DAG: @invoke_with_same_range()
+; CHECK-DAG: tail call i8 @invoke_with_range()
   %out = invoke range(i8 0, 2) i8 @dummy() to label %next unwind label %lpad
 
 next:
@@ -93,15 +93,15 @@ lpad:
 }
 
 define i8 @call_with_same_range() {
-; CHECK-LABEL: @call_with_same_range
-; CHECK: tail call i8 @call_with_range
+; CHECK-DAG: @call_with_same_range
+; CHECK-DAG: tail call i8 @call_with_range
   %out = call range(i8 0, 2) i8 @dummy()
   ret i8 %out
 }
 
 define i8 @call_with_same_range_attr(i8 range(i8 0, 2) %v) {
-; CHECK-LABEL: @call_with_same_range_attr
-; CHECK: tail call i8 @call_with_range_attr
+; CHECK-DAG: @call_with_same_range_attr
+; CHECK-DAG: tail call i8 @call_with_range_attr
   %out = call i8 @dummy2(i8 %v)
   ret i8 %out
 }

diff  --git a/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll b/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll
index e7718ca84d3165..39e5a11181a4f0 100644
--- a/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll
+++ b/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll
@@ -64,8 +64,8 @@ lpad:
 }
 
 define i8 @invoke_with_same_range() personality ptr undef {
-; CHECK-LABEL: @invoke_with_same_range()
-; CHECK: tail call i8 @invoke_with_range()
+; CHECK-DAG: @invoke_with_same_range()
+; CHECK-DAG: tail call i8 @invoke_with_range()
   %out = invoke i8 @dummy() to label %next unwind label %lpad, !range !0
 
 next:
@@ -77,8 +77,8 @@ lpad:
 }
 
 define i8 @call_with_same_range() {
-; CHECK-LABEL: @call_with_same_range
-; CHECK: tail call i8 @call_with_range
+; CHECK-DAG: @call_with_same_range
+; CHECK-DAG: tail call i8 @call_with_range
   bitcast i8 0 to i8
   %out = call i8 @dummy(), !range !0
   ret i8 %out

diff  --git a/llvm/test/Transforms/MergeFunc/inline-asm.ll b/llvm/test/Transforms/MergeFunc/inline-asm.ll
index 7cc6afd2f8f7bd..970757e8d53afb 100644
--- a/llvm/test/Transforms/MergeFunc/inline-asm.ll
+++ b/llvm/test/Transforms/MergeFunc/inline-asm.ll
@@ -3,11 +3,11 @@
 ; CHECK-LABEL: @int_ptr_arg_
diff erent
 ; CHECK-NEXT: call void asm
 
-; CHECK-LABEL: @int_ptr_null
-; CHECK-NEXT: tail call void @float_ptr_null()
+; CHECK-DAG: @int_ptr_null
+; CHECK-DAG: tail call void @float_ptr_null()
 
-; CHECK-LABEL: @int_ptr_arg_same
-; CHECK-NEXT: tail call void @float_ptr_arg_same(ptr %0)
+; CHECK-DAG: @int_ptr_arg_same
+; CHECK-DAG: tail call void @float_ptr_arg_same(ptr %0)
 
 ; Used to satisfy minimum size limit
 declare void @stuff()


        


More information about the llvm-commits mailing list