[llvm] [StructuralHash] Refactor (PR #112621)
Kyungwoo Lee via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 21 18:19:11 PDT 2024
https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/112621
>From e715fc6b8fb299d7973ec2a63509277918b88131 Mon Sep 17 00:00:00 2001
From: Kyungwoo Lee <kyulee at meta.com>
Date: Wed, 16 Oct 2024 14:36:38 -0700
Subject: [PATCH 1/3] [StructuralHash] Refactor
- Use stable_hash instead of uint64_t
- Rename update* to hash* functions. They compute stable_hash locally
and return it.
---
llvm/include/llvm/IR/StructuralHash.h | 3 +-
llvm/lib/IR/StructuralHash.cpp | 121 +++++++++++-------
.../MergeFunc/call-and-invoke-with-ranges.ll | 17 ++-
3 files changed, 88 insertions(+), 53 deletions(-)
diff --git a/llvm/include/llvm/IR/StructuralHash.h b/llvm/include/llvm/IR/StructuralHash.h
index 57fb45db849110..aa292bc3446799 100644
--- a/llvm/include/llvm/IR/StructuralHash.h
+++ b/llvm/include/llvm/IR/StructuralHash.h
@@ -14,6 +14,7 @@
#ifndef LLVM_IR_STRUCTURALHASH_H
#define LLVM_IR_STRUCTURALHASH_H
+#include "llvm/ADT/StableHashing.h"
#include <cstdint>
namespace llvm {
@@ -21,7 +22,7 @@ namespace llvm {
class Function;
class Module;
-using IRHash = uint64_t;
+using IRHash = stable_hash;
/// Returns a hash of the function \p F.
/// \param F The function to hash.
diff --git a/llvm/lib/IR/StructuralHash.cpp b/llvm/lib/IR/StructuralHash.cpp
index fb4f33a021a96b..a1fabab77d52b2 100644
--- a/llvm/lib/IR/StructuralHash.cpp
+++ b/llvm/lib/IR/StructuralHash.cpp
@@ -24,61 +24,86 @@ namespace {
// by the MergeFunctions pass.
class StructuralHashImpl {
- uint64_t Hash = 4;
+ stable_hash Hash = 4;
- void hash(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); }
+ bool DetailedHash;
// This will produce different values on 32-bit and 64-bit systens as
// hash_combine returns a size_t. However, this is only used for
// detailed hashing which, in-tree, only needs to distinguish between
// differences in functions.
- template <typename T> void hashArbitaryType(const T &V) {
- hash(hash_combine(V));
+ // TODO: This is not stable.
+ template <typename T> stable_hash hashArbitaryType(const T &V) {
+ return hash_combine(V);
}
- void hashType(Type *ValueType) {
- hash(ValueType->getTypeID());
+ stable_hash hashType(Type *ValueType) {
+ SmallVector<stable_hash> Hashes;
+ Hashes.emplace_back(ValueType->getTypeID());
if (ValueType->isIntegerTy())
- hash(ValueType->getIntegerBitWidth());
+ Hashes.emplace_back(ValueType->getIntegerBitWidth());
+ return stable_hash_combine(Hashes);
}
public:
- StructuralHashImpl() = default;
-
- void updateOperand(Value *Operand) {
- hashType(Operand->getType());
-
- // The cases enumerated below are not exhaustive and are only aimed to
- // get decent coverage over the function.
- if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(Operand)) {
- hashArbitaryType(ConstInt->getValue());
- } else if (ConstantFP *ConstFP = dyn_cast<ConstantFP>(Operand)) {
- hashArbitaryType(ConstFP->getValue());
- } else if (Argument *Arg = dyn_cast<Argument>(Operand)) {
- hash(Arg->getArgNo());
- } else if (Function *Func = dyn_cast<Function>(Operand)) {
+ StructuralHashImpl() = delete;
+ explicit StructuralHashImpl(bool DetailedHash) : DetailedHash(DetailedHash) {}
+
+ stable_hash hashConstant(Constant *C) {
+ SmallVector<stable_hash> Hashes;
+ // TODO: hashArbitaryType() is not stable.
+ if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(C)) {
+ Hashes.emplace_back(hashArbitaryType(ConstInt->getValue()));
+ } else if (ConstantFP *ConstFP = dyn_cast<ConstantFP>(C)) {
+ Hashes.emplace_back(hashArbitaryType(ConstFP->getValue()));
+ } else if (Function *Func = dyn_cast<Function>(C))
// Hashing the name will be deterministic as LLVM's hashing infrastructure
// has explicit support for hashing strings and will not simply hash
// the pointer.
- hashArbitaryType(Func->getName());
- }
+ Hashes.emplace_back(hashArbitaryType(Func->getName()));
+
+ return stable_hash_combine(Hashes);
+ }
+
+ stable_hash hashValue(Value *V) {
+ // Check constant and return its hash.
+ Constant *C = dyn_cast<Constant>(V);
+ if (C)
+ return hashConstant(C);
+
+ // Hash argument number.
+ SmallVector<stable_hash> Hashes;
+ if (Argument *Arg = dyn_cast<Argument>(V))
+ Hashes.emplace_back(Arg->getArgNo());
+
+ return stable_hash_combine(Hashes);
}
- void updateInstruction(const Instruction &Inst, bool DetailedHash) {
- hash(Inst.getOpcode());
+ stable_hash hashOperand(Value *Operand) {
+ SmallVector<stable_hash> Hashes;
+ Hashes.emplace_back(hashType(Operand->getType()));
+ Hashes.emplace_back(hashValue(Operand));
+ return stable_hash_combine(Hashes);
+ }
+
+ stable_hash hashInstruction(const Instruction &Inst) {
+ SmallVector<stable_hash> Hashes;
+ Hashes.emplace_back(Inst.getOpcode());
if (!DetailedHash)
- return;
+ return stable_hash_combine(Hashes);
- hashType(Inst.getType());
+ Hashes.emplace_back(hashType(Inst.getType()));
// Handle additional properties of specific instructions that cause
// semantic differences in the IR.
if (const auto *ComparisonInstruction = dyn_cast<CmpInst>(&Inst))
- hash(ComparisonInstruction->getPredicate());
+ Hashes.emplace_back(ComparisonInstruction->getPredicate());
for (const auto &Op : Inst.operands())
- updateOperand(Op);
+ Hashes.emplace_back(hashOperand(Op));
+
+ return stable_hash_combine(Hashes);
}
// A function hash is calculated by considering only the number of arguments
@@ -97,15 +122,17 @@ class StructuralHashImpl {
// expensive checks for pass modification status). When modifying this
// function, most changes should be gated behind an option and enabled
// selectively.
- void update(const Function &F, bool DetailedHash) {
+ void update(const Function &F) {
// Declarations don't affect analyses.
if (F.isDeclaration())
return;
- hash(0x62642d6b6b2d6b72); // Function header
+ SmallVector<stable_hash> Hashes;
+ Hashes.emplace_back(Hash);
+ Hashes.emplace_back(0x62642d6b6b2d6b72); // Function header
- hash(F.isVarArg());
- hash(F.arg_size());
+ Hashes.emplace_back(F.isVarArg());
+ Hashes.emplace_back(F.arg_size());
SmallVector<const BasicBlock *, 8> BBs;
SmallPtrSet<const BasicBlock *, 16> VisitedBBs;
@@ -121,14 +148,17 @@ class StructuralHashImpl {
// This random value acts as a block header, as otherwise the partition of
// opcodes into BBs wouldn't affect the hash, only the order of the
// opcodes
- hash(45798);
+ Hashes.emplace_back(45798);
for (auto &Inst : *BB)
- updateInstruction(Inst, DetailedHash);
+ Hashes.emplace_back(hashInstruction(Inst));
for (const BasicBlock *Succ : successors(BB))
if (VisitedBBs.insert(Succ).second)
BBs.push_back(Succ);
}
+
+ // Update the combined hash in place.
+ Hash = stable_hash_combine(Hashes);
}
void update(const GlobalVariable &GV) {
@@ -137,15 +167,20 @@ class StructuralHashImpl {
// we ignore anything with the `.llvm` prefix
if (GV.isDeclaration() || GV.getName().starts_with("llvm."))
return;
- hash(23456); // Global header
- hash(GV.getValueType()->getTypeID());
+ SmallVector<stable_hash> Hashes;
+ Hashes.emplace_back(Hash);
+ Hashes.emplace_back(23456); // Global header
+ Hashes.emplace_back(GV.getValueType()->getTypeID());
+
+ // Update the combined hash in place.
+ Hash = stable_hash_combine(Hashes);
}
- void update(const Module &M, bool DetailedHash) {
+ void update(const Module &M) {
for (const GlobalVariable &GV : M.globals())
update(GV);
for (const Function &F : M)
- update(F, DetailedHash);
+ update(F);
}
uint64_t getHash() const { return Hash; }
@@ -154,13 +189,13 @@ class StructuralHashImpl {
} // namespace
IRHash llvm::StructuralHash(const Function &F, bool DetailedHash) {
- StructuralHashImpl H;
- H.update(F, DetailedHash);
+ StructuralHashImpl H(DetailedHash);
+ H.update(F);
return H.getHash();
}
IRHash llvm::StructuralHash(const Module &M, bool DetailedHash) {
- StructuralHashImpl H;
- H.update(M, DetailedHash);
+ StructuralHashImpl H(DetailedHash);
+ H.update(M);
return H.getHash();
}
diff --git a/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll b/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll
index e7718ca84d3165..0ceb363a67b1fa 100644
--- a/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll
+++ b/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll
@@ -63,6 +63,14 @@ lpad:
resume { ptr, i32 } zeroinitializer
}
+define i8 @call_with_same_range() {
+; CHECK-LABEL: @call_with_same_range
+; CHECK: tail call i8 @call_with_range
+ bitcast i8 0 to i8
+ %out = call i8 @dummy(), !range !0
+ ret i8 %out
+}
+
define i8 @invoke_with_same_range() personality ptr undef {
; CHECK-LABEL: @invoke_with_same_range()
; CHECK: tail call i8 @invoke_with_range()
@@ -76,15 +84,6 @@ lpad:
resume { ptr, i32 } zeroinitializer
}
-define i8 @call_with_same_range() {
-; CHECK-LABEL: @call_with_same_range
-; CHECK: tail call i8 @call_with_range
- bitcast i8 0 to i8
- %out = call i8 @dummy(), !range !0
- ret i8 %out
-}
-
-
declare i8 @dummy();
declare i32 @__gxx_personality_v0(...)
>From f1060da949a5620d4bb064679d226a808f47dd12 Mon Sep 17 00:00:00 2001
From: Kyungwoo Lee <kyulee at meta.com>
Date: Mon, 21 Oct 2024 16:13:48 -0700
Subject: [PATCH 2/3] Address comments from ellishg
---
llvm/lib/IR/StructuralHash.cpp | 9 +++---
.../call-and-invoke-with-ranges-attr.ll | 28 +++++++++----------
llvm/test/Transforms/MergeFunc/inline-asm.ll | 6 ++--
3 files changed, 22 insertions(+), 21 deletions(-)
diff --git a/llvm/lib/IR/StructuralHash.cpp b/llvm/lib/IR/StructuralHash.cpp
index a1fabab77d52b2..943bb06d16851d 100644
--- a/llvm/lib/IR/StructuralHash.cpp
+++ b/llvm/lib/IR/StructuralHash.cpp
@@ -56,11 +56,12 @@ class StructuralHashImpl {
Hashes.emplace_back(hashArbitaryType(ConstInt->getValue()));
} else if (ConstantFP *ConstFP = dyn_cast<ConstantFP>(C)) {
Hashes.emplace_back(hashArbitaryType(ConstFP->getValue()));
- } else if (Function *Func = dyn_cast<Function>(C))
+ } else if (Function *Func = dyn_cast<Function>(C)) {
// Hashing the name will be deterministic as LLVM's hashing infrastructure
// has explicit support for hashing strings and will not simply hash
// the pointer.
Hashes.emplace_back(hashArbitaryType(Func->getName()));
+ }
return stable_hash_combine(Hashes);
}
@@ -129,7 +130,7 @@ class StructuralHashImpl {
SmallVector<stable_hash> Hashes;
Hashes.emplace_back(Hash);
- Hashes.emplace_back(0x62642d6b6b2d6b72); // Function header
+ Hashes.emplace_back(stable_hash_name("Function Header"));
Hashes.emplace_back(F.isVarArg());
Hashes.emplace_back(F.arg_size());
@@ -148,7 +149,7 @@ class StructuralHashImpl {
// This random value acts as a block header, as otherwise the partition of
// opcodes into BBs wouldn't affect the hash, only the order of the
// opcodes
- Hashes.emplace_back(45798);
+ Hashes.emplace_back(stable_hash_name("Block Header"));
for (auto &Inst : *BB)
Hashes.emplace_back(hashInstruction(Inst));
@@ -169,7 +170,7 @@ class StructuralHashImpl {
return;
SmallVector<stable_hash> Hashes;
Hashes.emplace_back(Hash);
- Hashes.emplace_back(23456); // Global header
+ Hashes.emplace_back(stable_hash_name("Global Header"));
Hashes.emplace_back(GV.getValueType()->getTypeID());
// Update the combined hash in place.
diff --git a/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges-attr.ll b/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges-attr.ll
index e5d62319bf9db7..5ed14b2ef1008d 100644
--- a/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges-attr.ll
+++ b/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges-attr.ll
@@ -79,17 +79,11 @@ lpad:
resume { ptr, i32 } zeroinitializer
}
-define i8 @invoke_with_same_range() personality ptr undef {
-; CHECK-LABEL: @invoke_with_same_range()
-; CHECK: tail call i8 @invoke_with_range()
- %out = invoke range(i8 0, 2) i8 @dummy() to label %next unwind label %lpad
-
-next:
+define i8 @call_with_same_range_attr(i8 range(i8 0, 2) %v) {
+; CHECK-LABEL: @call_with_same_range_attr
+; CHECK: tail call i8 @call_with_range_attr
+ %out = call i8 @dummy2(i8 %v)
ret i8 %out
-
-lpad:
- %pad = landingpad { ptr, i32 } cleanup
- resume { ptr, i32 } zeroinitializer
}
define i8 @call_with_same_range() {
@@ -99,11 +93,17 @@ define i8 @call_with_same_range() {
ret i8 %out
}
-define i8 @call_with_same_range_attr(i8 range(i8 0, 2) %v) {
-; CHECK-LABEL: @call_with_same_range_attr
-; CHECK: tail call i8 @call_with_range_attr
- %out = call i8 @dummy2(i8 %v)
+define i8 @invoke_with_same_range() personality ptr undef {
+; CHECK-LABEL: @invoke_with_same_range()
+; CHECK: tail call i8 @invoke_with_range()
+ %out = invoke range(i8 0, 2) i8 @dummy() to label %next unwind label %lpad
+
+next:
ret i8 %out
+
+lpad:
+ %pad = landingpad { ptr, i32 } cleanup
+ resume { ptr, i32 } zeroinitializer
}
declare i8 @dummy();
diff --git a/llvm/test/Transforms/MergeFunc/inline-asm.ll b/llvm/test/Transforms/MergeFunc/inline-asm.ll
index 7cc6afd2f8f7bd..0c0d487d9de6a8 100644
--- a/llvm/test/Transforms/MergeFunc/inline-asm.ll
+++ b/llvm/test/Transforms/MergeFunc/inline-asm.ll
@@ -3,12 +3,12 @@
; CHECK-LABEL: @int_ptr_arg_different
; CHECK-NEXT: call void asm
-; CHECK-LABEL: @int_ptr_null
-; CHECK-NEXT: tail call void @float_ptr_null()
-
; CHECK-LABEL: @int_ptr_arg_same
; CHECK-NEXT: tail call void @float_ptr_arg_same(ptr %0)
+; CHECK-LABEL: @int_ptr_null
+; CHECK-NEXT: tail call void @float_ptr_null()
+
; Used to satisfy minimum size limit
declare void @stuff()
>From 68d1ec115588e555fcc40e3f6d3af1d5561d3364 Mon Sep 17 00:00:00 2001
From: Kyungwoo Lee <kyulee at meta.com>
Date: Mon, 21 Oct 2024 18:18:44 -0700
Subject: [PATCH 3/3] Address comments from boomanaiden154
---
llvm/lib/IR/StructuralHash.cpp | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/IR/StructuralHash.cpp b/llvm/lib/IR/StructuralHash.cpp
index 943bb06d16851d..1294706b954a3b 100644
--- a/llvm/lib/IR/StructuralHash.cpp
+++ b/llvm/lib/IR/StructuralHash.cpp
@@ -28,6 +28,10 @@ class StructuralHashImpl {
bool DetailedHash;
+ const stable_hash GlobalHeaderHash = stable_hash_name("Global Header");
+ const stable_hash FunctionHeaderHash = stable_hash_name("Function Header");
+ const stable_hash BlockHeaderHash = stable_hash_name("Block Header");
+
// This will produce different values on 32-bit and 64-bit systens as
// hash_combine returns a size_t. However, this is only used for
// detailed hashing which, in-tree, only needs to distinguish between
@@ -130,7 +134,7 @@ class StructuralHashImpl {
SmallVector<stable_hash> Hashes;
Hashes.emplace_back(Hash);
- Hashes.emplace_back(stable_hash_name("Function Header"));
+ Hashes.emplace_back(FunctionHeaderHash);
Hashes.emplace_back(F.isVarArg());
Hashes.emplace_back(F.arg_size());
@@ -149,7 +153,7 @@ class StructuralHashImpl {
// This random value acts as a block header, as otherwise the partition of
// opcodes into BBs wouldn't affect the hash, only the order of the
// opcodes
- Hashes.emplace_back(stable_hash_name("Block Header"));
+ Hashes.emplace_back(BlockHeaderHash);
for (auto &Inst : *BB)
Hashes.emplace_back(hashInstruction(Inst));
@@ -170,7 +174,7 @@ class StructuralHashImpl {
return;
SmallVector<stable_hash> Hashes;
Hashes.emplace_back(Hash);
- Hashes.emplace_back(stable_hash_name("Global Header"));
+ Hashes.emplace_back(GlobalHeaderHash);
Hashes.emplace_back(GV.getValueType()->getTypeID());
// Update the combined hash in place.
More information about the llvm-commits
mailing list