[llvm] f532d61 - [IR] Add more details to StructuralHash
Aiden Grossman via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 25 14:33:49 PDT 2023
Author: Aiden Grossman
Date: 2023-08-25T14:32:47-07:00
New Revision: f532d61de02befccd429c0d260d37275b72f2aa2
URL: https://github.com/llvm/llvm-project/commit/f532d61de02befccd429c0d260d37275b72f2aa2
DIFF: https://github.com/llvm/llvm-project/commit/f532d61de02befccd429c0d260d37275b72f2aa2.diff
LOG: [IR] Add more details to StructuralHash
This pass extends StructuralHash to include much more information about
the IR under analysis. This is done with a flag (Detailed) to configure
the behavior. The detailed behavior is intended for use in expensive
checks and downstream.
Differential Revision: https://reviews.llvm.org/D158250
Reviewed-By: aeubanks, nikic
Added:
Modified:
llvm/include/llvm/IR/StructuralHash.h
llvm/lib/IR/Pass.cpp
llvm/lib/IR/StructuralHash.cpp
llvm/unittests/IR/StructuralHashTest.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/StructuralHash.h b/llvm/include/llvm/IR/StructuralHash.h
index 8739c599a7ac33..57fb45db849110 100644
--- a/llvm/include/llvm/IR/StructuralHash.h
+++ b/llvm/include/llvm/IR/StructuralHash.h
@@ -23,8 +23,18 @@ class Module;
using IRHash = uint64_t;
-IRHash StructuralHash(const Function &F);
-IRHash StructuralHash(const Module &M);
+/// Returns a hash of the function \p F.
+/// \param F The function to hash.
+/// \param DetailedHash Whether or not to encode additional information in the
+/// hash. The additional information added into the hash when this flag is set
+/// to true includes instruction and operand type information.
+IRHash StructuralHash(const Function &F, bool DetailedHash = false);
+
+/// Returns a hash of the module \p M by hashing all functions and global
+/// variables contained within. \param M The module to hash. \param DetailedHash
+/// Whether or not to encode additional information in the function hashes that
+/// composed the module hash.
+IRHash StructuralHash(const Module &M, bool DetailedHash = false);
} // end namespace llvm
diff --git a/llvm/lib/IR/Pass.cpp b/llvm/lib/IR/Pass.cpp
index 716d9d546f4f06..b92838d2e50ac7 100644
--- a/llvm/lib/IR/Pass.cpp
+++ b/llvm/lib/IR/Pass.cpp
@@ -139,9 +139,15 @@ LLVM_DUMP_METHOD void Pass::dump() const {
#endif
#ifdef EXPENSIVE_CHECKS
-uint64_t Pass::structuralHash(Module &M) const { return StructuralHash(M); }
+// TODO: Use detailed structural hashing once exposed bugs have been fixed/
+// (https://github.com/llvm/llvm-project/issues/64938)
+uint64_t Pass::structuralHash(Module &M) const {
+ return StructuralHash(M, false);
+}
-uint64_t Pass::structuralHash(Function &F) const { return StructuralHash(F); }
+uint64_t Pass::structuralHash(Function &F) const {
+ return StructuralHash(F, false);
+}
#endif
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/IR/StructuralHash.cpp b/llvm/lib/IR/StructuralHash.cpp
index 13cd77151adf4a..90f13772da75a6 100644
--- a/llvm/lib/IR/StructuralHash.cpp
+++ b/llvm/lib/IR/StructuralHash.cpp
@@ -7,8 +7,12 @@
//===----------------------------------------------------------------------===//
#include "llvm/IR/StructuralHash.h"
+#include "llvm/ADT/Hashing.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
using namespace llvm;
@@ -24,9 +28,59 @@ class StructuralHashImpl {
void hash(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); }
+ // This will produce
diff erent values on 32-bit and 64-bit systens as
+ // hash_combine returns a size_t. However, this is only used for
+ // detailed hashing which, in-tree, only needs to distinguish between
+ //
diff erences in functions.
+ template <typename T> void hashArbitaryType(const T &V) {
+ hash(hash_combine(V));
+ }
+
+ void hashType(Type *ValueType) {
+ hash(ValueType->getTypeID());
+ if (ValueType->isIntegerTy())
+ hash(ValueType->getIntegerBitWidth());
+ }
+
public:
StructuralHashImpl() : Hash(4) {}
+ void updateOperand(Value *Operand) {
+ hashType(Operand->getType());
+
+ // The cases enumerated below are not exhaustive and are only aimed to
+ // get decent coverage over the function.
+ if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(Operand)) {
+ hashArbitaryType(ConstInt->getValue());
+ } else if (ConstantFP *ConstFP = dyn_cast<ConstantFP>(Operand)) {
+ hashArbitaryType(ConstFP->getValue());
+ } else if (Argument *Arg = dyn_cast<Argument>(Operand)) {
+ hash(Arg->getArgNo());
+ } else if (Function *Func = dyn_cast<Function>(Operand)) {
+ // Hashing the name will be deterministic as LLVM's hashing infrastructure
+ // has explicit support for hashing strings and will not simply hash
+ // the pointer.
+ hashArbitaryType(Func->getName());
+ }
+ }
+
+ void updateInstruction(const Instruction &Inst, bool DetailedHash) {
+ hash(Inst.getOpcode());
+
+ if (!DetailedHash)
+ return;
+
+ hashType(Inst.getType());
+
+ // Handle additional properties of specific instructions that cause
+ // semantic
diff erences in the IR.
+ if (const auto *ComparisonInstruction = dyn_cast<CmpInst>(&Inst))
+ hash(ComparisonInstruction->getPredicate());
+
+ for (const auto &Op : Inst.operands())
+ updateOperand(Op);
+ }
+
// A function hash is calculated by considering only the number of arguments
// and whether a function is varargs, the order of basic blocks (given by the
// successors of each basic block in depth first order), and the order of
@@ -43,7 +97,7 @@ class StructuralHashImpl {
// expensive checks for pass modification status). When modifying this
// function, most changes should be gated behind an option and enabled
// selectively.
- void update(const Function &F) {
+ void update(const Function &F, bool DetailedHash) {
// Declarations don't affect analyses.
if (F.isDeclaration())
return;
@@ -69,7 +123,7 @@ class StructuralHashImpl {
// opcodes
hash(45798);
for (auto &Inst : *BB)
- hash(Inst.getOpcode());
+ updateInstruction(Inst, DetailedHash);
const Instruction *Term = BB->getTerminator();
for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
@@ -90,11 +144,11 @@ class StructuralHashImpl {
hash(GV.getValueType()->getTypeID());
}
- void update(const Module &M) {
+ void update(const Module &M, bool DetailedHash) {
for (const GlobalVariable &GV : M.globals())
update(GV);
for (const Function &F : M)
- update(F);
+ update(F, DetailedHash);
}
uint64_t getHash() const { return Hash; }
@@ -102,14 +156,14 @@ class StructuralHashImpl {
} // namespace
-IRHash llvm::StructuralHash(const Function &F) {
+IRHash llvm::StructuralHash(const Function &F, bool DetailedHash) {
StructuralHashImpl H;
- H.update(F);
+ H.update(F, DetailedHash);
return H.getHash();
}
-IRHash llvm::StructuralHash(const Module &M) {
+IRHash llvm::StructuralHash(const Module &M, bool DetailedHash) {
StructuralHashImpl H;
- H.update(M);
+ H.update(M, DetailedHash);
return H.getHash();
}
diff --git a/llvm/unittests/IR/StructuralHashTest.cpp b/llvm/unittests/IR/StructuralHashTest.cpp
index 4c16144a91df9f..64e66aa5f97a6d 100644
--- a/llvm/unittests/IR/StructuralHashTest.cpp
+++ b/llvm/unittests/IR/StructuralHashTest.cpp
@@ -91,8 +91,8 @@ TEST(StructuralHashTest, FunctionRetType) {
LLVMContext Ctx;
std::unique_ptr<Module> M1 = parseIR(Ctx, "define void @f() { ret void }");
std::unique_ptr<Module> M2 = parseIR(Ctx, "define i32 @f() { ret i32 0 }");
- // FIXME: should be
diff erent
EXPECT_EQ(StructuralHash(*M1), StructuralHash(*M2));
+ EXPECT_NE(StructuralHash(*M1, true), StructuralHash(*M2, true));
}
TEST(StructuralHashTest, InstructionOpCode) {
@@ -109,7 +109,7 @@ TEST(StructuralHashTest, InstructionOpCode) {
EXPECT_NE(StructuralHash(*M1), StructuralHash(*M2));
}
-TEST(StructuralHashTest, InstructionType) {
+TEST(StructuralHashTest, InstructionSubType) {
LLVMContext Ctx;
std::unique_ptr<Module> M1 = parseIR(Ctx, "define void @f(ptr %p) {\n"
" %a = load i32, ptr %p\n"
@@ -119,8 +119,22 @@ TEST(StructuralHashTest, InstructionType) {
" %a = load i64, ptr %p\n"
" ret void\n"
"}\n");
- // FIXME: should be
diff erent
EXPECT_EQ(StructuralHash(*M1), StructuralHash(*M2));
+ EXPECT_NE(StructuralHash(*M1, true), StructuralHash(*M2, true));
+}
+
+TEST(StructuralHashTest, InstructionType) {
+ LLVMContext Ctx;
+ std::unique_ptr<Module> M1 = parseIR(Ctx, "define void @f(ptr %p) {\n"
+ " %1 = load i32, ptr %p\n"
+ " ret void\n"
+ "}\n");
+ std::unique_ptr<Module> M2 = parseIR(Ctx, "define void @f(ptr %p) {\n"
+ " %1 = load float, ptr %p\n"
+ " ret void\n"
+ "}\n");
+ EXPECT_EQ(StructuralHash(*M1), StructuralHash(*M2));
+ EXPECT_NE(StructuralHash(*M1, true), StructuralHash(*M2, true));
}
TEST(StructuralHashTest, IgnoredMetadata) {
@@ -138,6 +152,91 @@ TEST(StructuralHashTest, IgnoredMetadata) {
!0 = !{}
!1 = !{ptr @llvm.embedded.object, !".llvm.lto"}
)");
+ // clang-format on
+ EXPECT_EQ(StructuralHash(*M1), StructuralHash(*M2));
+}
+
+TEST(StructuralHashTest, ComparisonInstructionPredicate) {
+ LLVMContext Ctx;
+ std::unique_ptr<Module> M1 = parseIR(Ctx, "define i1 @f(i64 %a, i64 %b) {\n"
+ " %1 = icmp eq i64 %a, %b\n"
+ " ret i1 %1\n"
+ "}\n");
+ std::unique_ptr<Module> M2 = parseIR(Ctx, "define i1 @f(i64 %a, i64 %b) {\n"
+ " %1 = icmp ne i64 %a, %b\n"
+ " ret i1 %1\n"
+ "}\n");
+ EXPECT_EQ(StructuralHash(*M1), StructuralHash(*M2));
+ EXPECT_NE(StructuralHash(*M1, true), StructuralHash(*M2, true));
+}
+
+TEST(StructuralHashTest, IntrinsicInstruction) {
+ LLVMContext Ctx;
+ std::unique_ptr<Module> M1 =
+ parseIR(Ctx, "define float @f(float %a) {\n"
+ " %b = call float @llvm.sin.f32(float %a)\n"
+ " ret float %b\n"
+ "}\n"
+ "declare float @llvm.sin.f32(float)\n");
+ std::unique_ptr<Module> M2 =
+ parseIR(Ctx, "define float @f(float %a) {\n"
+ " %b = call float @llvm.cos.f32(float %a)\n"
+ " ret float %b\n"
+ "}\n"
+ "declare float @llvm.cos.f32(float)\n");
+ EXPECT_EQ(StructuralHash(*M1), StructuralHash(*M2));
+ EXPECT_NE(StructuralHash(*M1, true), StructuralHash(*M2, true));
+}
+
+TEST(StructuralHashTest, CallInstruction) {
+ LLVMContext Ctx;
+ std::unique_ptr<Module> M1 = parseIR(Ctx, "define i64 @f(i64 %a) {\n"
+ " %b = call i64 @f1(i64 %a)\n"
+ " ret i64 %b\n"
+ "}\n"
+ "declare i64 @f1(i64)");
+ std::unique_ptr<Module> M2 = parseIR(Ctx, "define i64 @f(i64 %a) {\n"
+ " %b = call i64 @f2(i64 %a)\n"
+ " ret i64 %b\n"
+ "}\n"
+ "declare i64 @f2(i64)");
+ EXPECT_EQ(StructuralHash(*M1), StructuralHash(*M2));
+ EXPECT_NE(StructuralHash(*M1, true), StructuralHash(*M2, true));
+}
+
+TEST(StructuralHashTest, ConstantInteger) {
+ LLVMContext Ctx;
+ std::unique_ptr<Module> M1 = parseIR(Ctx, "define i64 @f1() {\n"
+ " ret i64 1\n"
+ "}\n");
+ std::unique_ptr<Module> M2 = parseIR(Ctx, "define i64 @f2() {\n"
+ " ret i64 2\n"
+ "}\n");
+ EXPECT_EQ(StructuralHash(*M1), StructuralHash(*M2));
+ EXPECT_NE(StructuralHash(*M1, true), StructuralHash(*M2, true));
+}
+
+TEST(StructuralHashTest, BigConstantInteger) {
+ LLVMContext Ctx;
+ std::unique_ptr<Module> M1 = parseIR(Ctx, "define i128 @f1() {\n"
+ " ret i128 18446744073709551616\n"
+ "}\n");
+ std::unique_ptr<Module> M2 = parseIR(Ctx, "define i128 @f2() {\n"
+ " ret i128 18446744073709551617\n"
+ "}\n");
+ EXPECT_EQ(StructuralHash(*M1), StructuralHash(*M2));
+ EXPECT_NE(StructuralHash(*M1, true), StructuralHash(*M2, true));
+}
+
+TEST(StructuralHashTest, ArgumentNumber) {
+ LLVMContext Ctx;
+ std::unique_ptr<Module> M1 = parseIR(Ctx, "define i64 @f1(i64 %a, i64 %b) {\n"
+ " ret i64 %a\n"
+ "}\n");
+ std::unique_ptr<Module> M2 = parseIR(Ctx, "define i64 @f2(i64 %a, i64 %b) {\n"
+ " ret i64 %b\n"
+ "}\n");
EXPECT_EQ(StructuralHash(*M1), StructuralHash(*M2));
+ EXPECT_NE(StructuralHash(*M1, true), StructuralHash(*M2, true));
}
} // end anonymous namespace
More information about the llvm-commits
mailing list