[llvm] f532d61 - [IR] Add more details to StructuralHash

Aiden Grossman via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 25 14:33:49 PDT 2023


Author: Aiden Grossman
Date: 2023-08-25T14:32:47-07:00
New Revision: f532d61de02befccd429c0d260d37275b72f2aa2

URL: https://github.com/llvm/llvm-project/commit/f532d61de02befccd429c0d260d37275b72f2aa2
DIFF: https://github.com/llvm/llvm-project/commit/f532d61de02befccd429c0d260d37275b72f2aa2.diff

LOG: [IR] Add more details to StructuralHash

This pass extends StructuralHash to include much more information about
the IR under analysis. This is done with a flag (Detailed) to configure
the behavior. The detailed behavior is intended for use in expensive
checks and downstream.

Differential Revision: https://reviews.llvm.org/D158250

Reviewed-By: aeubanks, nikic

Added: 
    

Modified: 
    llvm/include/llvm/IR/StructuralHash.h
    llvm/lib/IR/Pass.cpp
    llvm/lib/IR/StructuralHash.cpp
    llvm/unittests/IR/StructuralHashTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/StructuralHash.h b/llvm/include/llvm/IR/StructuralHash.h
index 8739c599a7ac33..57fb45db849110 100644
--- a/llvm/include/llvm/IR/StructuralHash.h
+++ b/llvm/include/llvm/IR/StructuralHash.h
@@ -23,8 +23,18 @@ class Module;
 
 using IRHash = uint64_t;
 
-IRHash StructuralHash(const Function &F);
-IRHash StructuralHash(const Module &M);
+/// Returns a hash of the function \p F.
+/// \param F The function to hash.
+/// \param DetailedHash Whether or not to encode additional information in the
+/// hash. The additional information added into the hash when this flag is set
+/// to true includes instruction and operand type information.
+IRHash StructuralHash(const Function &F, bool DetailedHash = false);
+
+/// Returns a hash of the module \p M by hashing all functions and global
+/// variables contained within. \param M The module to hash. \param DetailedHash
+/// Whether or not to encode additional information in the function hashes that
+/// composed the module hash.
+IRHash StructuralHash(const Module &M, bool DetailedHash = false);
 
 } // end namespace llvm
 

diff  --git a/llvm/lib/IR/Pass.cpp b/llvm/lib/IR/Pass.cpp
index 716d9d546f4f06..b92838d2e50ac7 100644
--- a/llvm/lib/IR/Pass.cpp
+++ b/llvm/lib/IR/Pass.cpp
@@ -139,9 +139,15 @@ LLVM_DUMP_METHOD void Pass::dump() const {
 #endif
 
 #ifdef EXPENSIVE_CHECKS
-uint64_t Pass::structuralHash(Module &M) const { return StructuralHash(M); }
+// TODO: Use detailed structural hashing once exposed bugs have been fixed/
+// (https://github.com/llvm/llvm-project/issues/64938)
+uint64_t Pass::structuralHash(Module &M) const {
+  return StructuralHash(M, false);
+}
 
-uint64_t Pass::structuralHash(Function &F) const { return StructuralHash(F); }
+uint64_t Pass::structuralHash(Function &F) const {
+  return StructuralHash(F, false);
+}
 #endif
 
 //===----------------------------------------------------------------------===//

diff  --git a/llvm/lib/IR/StructuralHash.cpp b/llvm/lib/IR/StructuralHash.cpp
index 13cd77151adf4a..90f13772da75a6 100644
--- a/llvm/lib/IR/StructuralHash.cpp
+++ b/llvm/lib/IR/StructuralHash.cpp
@@ -7,8 +7,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/IR/StructuralHash.h"
+#include "llvm/ADT/Hashing.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
 
 using namespace llvm;
@@ -24,9 +28,59 @@ class StructuralHashImpl {
 
   void hash(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); }
 
+  // This will produce 
diff erent values on 32-bit and 64-bit systens as
+  // hash_combine returns a size_t. However, this is only used for
+  // detailed hashing which, in-tree, only needs to distinguish between
+  // 
diff erences in functions.
+  template <typename T> void hashArbitaryType(const T &V) {
+    hash(hash_combine(V));
+  }
+
+  void hashType(Type *ValueType) {
+    hash(ValueType->getTypeID());
+    if (ValueType->isIntegerTy())
+      hash(ValueType->getIntegerBitWidth());
+  }
+
 public:
   StructuralHashImpl() : Hash(4) {}
 
+  void updateOperand(Value *Operand) {
+    hashType(Operand->getType());
+
+    // The cases enumerated below are not exhaustive and are only aimed to
+    // get decent coverage over the function.
+    if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(Operand)) {
+      hashArbitaryType(ConstInt->getValue());
+    } else if (ConstantFP *ConstFP = dyn_cast<ConstantFP>(Operand)) {
+      hashArbitaryType(ConstFP->getValue());
+    } else if (Argument *Arg = dyn_cast<Argument>(Operand)) {
+      hash(Arg->getArgNo());
+    } else if (Function *Func = dyn_cast<Function>(Operand)) {
+      // Hashing the name will be deterministic as LLVM's hashing infrastructure
+      // has explicit support for hashing strings and will not simply hash
+      // the pointer.
+      hashArbitaryType(Func->getName());
+    }
+  }
+
+  void updateInstruction(const Instruction &Inst, bool DetailedHash) {
+    hash(Inst.getOpcode());
+
+    if (!DetailedHash)
+      return;
+
+    hashType(Inst.getType());
+
+    // Handle additional properties of specific instructions that cause
+    // semantic 
diff erences in the IR.
+    if (const auto *ComparisonInstruction = dyn_cast<CmpInst>(&Inst))
+      hash(ComparisonInstruction->getPredicate());
+
+    for (const auto &Op : Inst.operands())
+      updateOperand(Op);
+  }
+
   // A function hash is calculated by considering only the number of arguments
   // and whether a function is varargs, the order of basic blocks (given by the
   // successors of each basic block in depth first order), and the order of
@@ -43,7 +97,7 @@ class StructuralHashImpl {
   // expensive checks for pass modification status). When modifying this
   // function, most changes should be gated behind an option and enabled
   // selectively.
-  void update(const Function &F) {
+  void update(const Function &F, bool DetailedHash) {
     // Declarations don't affect analyses.
     if (F.isDeclaration())
       return;
@@ -69,7 +123,7 @@ class StructuralHashImpl {
       // opcodes
       hash(45798);
       for (auto &Inst : *BB)
-        hash(Inst.getOpcode());
+        updateInstruction(Inst, DetailedHash);
 
       const Instruction *Term = BB->getTerminator();
       for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
@@ -90,11 +144,11 @@ class StructuralHashImpl {
     hash(GV.getValueType()->getTypeID());
   }
 
-  void update(const Module &M) {
+  void update(const Module &M, bool DetailedHash) {
     for (const GlobalVariable &GV : M.globals())
       update(GV);
     for (const Function &F : M)
-      update(F);
+      update(F, DetailedHash);
   }
 
   uint64_t getHash() const { return Hash; }
@@ -102,14 +156,14 @@ class StructuralHashImpl {
 
 } // namespace
 
-IRHash llvm::StructuralHash(const Function &F) {
+IRHash llvm::StructuralHash(const Function &F, bool DetailedHash) {
   StructuralHashImpl H;
-  H.update(F);
+  H.update(F, DetailedHash);
   return H.getHash();
 }
 
-IRHash llvm::StructuralHash(const Module &M) {
+IRHash llvm::StructuralHash(const Module &M, bool DetailedHash) {
   StructuralHashImpl H;
-  H.update(M);
+  H.update(M, DetailedHash);
   return H.getHash();
 }

diff  --git a/llvm/unittests/IR/StructuralHashTest.cpp b/llvm/unittests/IR/StructuralHashTest.cpp
index 4c16144a91df9f..64e66aa5f97a6d 100644
--- a/llvm/unittests/IR/StructuralHashTest.cpp
+++ b/llvm/unittests/IR/StructuralHashTest.cpp
@@ -91,8 +91,8 @@ TEST(StructuralHashTest, FunctionRetType) {
   LLVMContext Ctx;
   std::unique_ptr<Module> M1 = parseIR(Ctx, "define void @f() { ret void }");
   std::unique_ptr<Module> M2 = parseIR(Ctx, "define i32 @f() { ret i32 0 }");
-  // FIXME: should be 
diff erent
   EXPECT_EQ(StructuralHash(*M1), StructuralHash(*M2));
+  EXPECT_NE(StructuralHash(*M1, true), StructuralHash(*M2, true));
 }
 
 TEST(StructuralHashTest, InstructionOpCode) {
@@ -109,7 +109,7 @@ TEST(StructuralHashTest, InstructionOpCode) {
   EXPECT_NE(StructuralHash(*M1), StructuralHash(*M2));
 }
 
-TEST(StructuralHashTest, InstructionType) {
+TEST(StructuralHashTest, InstructionSubType) {
   LLVMContext Ctx;
   std::unique_ptr<Module> M1 = parseIR(Ctx, "define void @f(ptr %p) {\n"
                                             "  %a = load i32, ptr %p\n"
@@ -119,8 +119,22 @@ TEST(StructuralHashTest, InstructionType) {
                                             "  %a = load i64, ptr %p\n"
                                             "  ret void\n"
                                             "}\n");
-  // FIXME: should be 
diff erent
   EXPECT_EQ(StructuralHash(*M1), StructuralHash(*M2));
+  EXPECT_NE(StructuralHash(*M1, true), StructuralHash(*M2, true));
+}
+
+TEST(StructuralHashTest, InstructionType) {
+  LLVMContext Ctx;
+  std::unique_ptr<Module> M1 = parseIR(Ctx, "define void @f(ptr %p) {\n"
+                                            "  %1 = load i32, ptr %p\n"
+                                            "  ret void\n"
+                                            "}\n");
+  std::unique_ptr<Module> M2 = parseIR(Ctx, "define void @f(ptr %p) {\n"
+                                            "  %1 = load float, ptr %p\n"
+                                            "  ret void\n"
+                                            "}\n");
+  EXPECT_EQ(StructuralHash(*M1), StructuralHash(*M2));
+  EXPECT_NE(StructuralHash(*M1, true), StructuralHash(*M2, true));
 }
 
 TEST(StructuralHashTest, IgnoredMetadata) {
@@ -138,6 +152,91 @@ TEST(StructuralHashTest, IgnoredMetadata) {
         !0 = !{}
         !1 = !{ptr @llvm.embedded.object, !".llvm.lto"}
         )");
+  // clang-format on
+  EXPECT_EQ(StructuralHash(*M1), StructuralHash(*M2));
+}
+
+TEST(StructuralHashTest, ComparisonInstructionPredicate) {
+  LLVMContext Ctx;
+  std::unique_ptr<Module> M1 = parseIR(Ctx, "define i1 @f(i64 %a, i64 %b) {\n"
+                                            "  %1 = icmp eq i64 %a, %b\n"
+                                            "  ret i1 %1\n"
+                                            "}\n");
+  std::unique_ptr<Module> M2 = parseIR(Ctx, "define i1 @f(i64 %a, i64 %b) {\n"
+                                            "  %1 = icmp ne i64 %a, %b\n"
+                                            " ret i1 %1\n"
+                                            "}\n");
+  EXPECT_EQ(StructuralHash(*M1), StructuralHash(*M2));
+  EXPECT_NE(StructuralHash(*M1, true), StructuralHash(*M2, true));
+}
+
+TEST(StructuralHashTest, IntrinsicInstruction) {
+  LLVMContext Ctx;
+  std::unique_ptr<Module> M1 =
+      parseIR(Ctx, "define float @f(float %a) {\n"
+                   "  %b = call float @llvm.sin.f32(float %a)\n"
+                   "  ret float %b\n"
+                   "}\n"
+                   "declare float @llvm.sin.f32(float)\n");
+  std::unique_ptr<Module> M2 =
+      parseIR(Ctx, "define float @f(float %a) {\n"
+                   "  %b = call float @llvm.cos.f32(float %a)\n"
+                   "  ret float %b\n"
+                   "}\n"
+                   "declare float @llvm.cos.f32(float)\n");
+  EXPECT_EQ(StructuralHash(*M1), StructuralHash(*M2));
+  EXPECT_NE(StructuralHash(*M1, true), StructuralHash(*M2, true));
+}
+
+TEST(StructuralHashTest, CallInstruction) {
+  LLVMContext Ctx;
+  std::unique_ptr<Module> M1 = parseIR(Ctx, "define i64 @f(i64 %a) {\n"
+                                            "  %b = call i64 @f1(i64 %a)\n"
+                                            "  ret i64 %b\n"
+                                            "}\n"
+                                            "declare i64 @f1(i64)");
+  std::unique_ptr<Module> M2 = parseIR(Ctx, "define i64 @f(i64 %a) {\n"
+                                            "  %b = call i64 @f2(i64 %a)\n"
+                                            "  ret i64 %b\n"
+                                            "}\n"
+                                            "declare i64 @f2(i64)");
+  EXPECT_EQ(StructuralHash(*M1), StructuralHash(*M2));
+  EXPECT_NE(StructuralHash(*M1, true), StructuralHash(*M2, true));
+}
+
+TEST(StructuralHashTest, ConstantInteger) {
+  LLVMContext Ctx;
+  std::unique_ptr<Module> M1 = parseIR(Ctx, "define i64 @f1() {\n"
+                                            "  ret i64 1\n"
+                                            "}\n");
+  std::unique_ptr<Module> M2 = parseIR(Ctx, "define i64 @f2() {\n"
+                                            "  ret i64 2\n"
+                                            "}\n");
+  EXPECT_EQ(StructuralHash(*M1), StructuralHash(*M2));
+  EXPECT_NE(StructuralHash(*M1, true), StructuralHash(*M2, true));
+}
+
+TEST(StructuralHashTest, BigConstantInteger) {
+  LLVMContext Ctx;
+  std::unique_ptr<Module> M1 = parseIR(Ctx, "define i128 @f1() {\n"
+                                            "  ret i128 18446744073709551616\n"
+                                            "}\n");
+  std::unique_ptr<Module> M2 = parseIR(Ctx, "define i128 @f2() {\n"
+                                            "  ret i128 18446744073709551617\n"
+                                            "}\n");
+  EXPECT_EQ(StructuralHash(*M1), StructuralHash(*M2));
+  EXPECT_NE(StructuralHash(*M1, true), StructuralHash(*M2, true));
+}
+
+TEST(StructuralHashTest, ArgumentNumber) {
+  LLVMContext Ctx;
+  std::unique_ptr<Module> M1 = parseIR(Ctx, "define i64 @f1(i64 %a, i64 %b) {\n"
+                                            "  ret i64 %a\n"
+                                            "}\n");
+  std::unique_ptr<Module> M2 = parseIR(Ctx, "define i64 @f2(i64 %a, i64 %b) {\n"
+                                            "  ret i64 %b\n"
+                                            "}\n");
   EXPECT_EQ(StructuralHash(*M1), StructuralHash(*M2));
+  EXPECT_NE(StructuralHash(*M1, true), StructuralHash(*M2, true));
 }
 } // end anonymous namespace


        


More information about the llvm-commits mailing list