[llvm] d104b8e - Revert "Reland [StructuralHash] Refactor (#112621)"
Kyungwoo Lee via llvm-commits
llvm-commits at lists.llvm.org
Sat Oct 26 13:56:16 PDT 2024
Author: Kyungwoo Lee
Date: 2024-10-26T13:55:46-07:00
New Revision: d104b8e827ef5c3cb723aee92af4adfc8af18e9a
URL: https://github.com/llvm/llvm-project/commit/d104b8e827ef5c3cb723aee92af4adfc8af18e9a
DIFF: https://github.com/llvm/llvm-project/commit/d104b8e827ef5c3cb723aee92af4adfc8af18e9a.diff
LOG: Revert "Reland [StructuralHash] Refactor (#112621)"
This reverts commit 98ca9a635bd2fb98cee473a9558687a5b522e219.
Added:
Modified:
llvm/include/llvm/IR/StructuralHash.h
llvm/lib/IR/StructuralHash.cpp
llvm/lib/Transforms/IPO/MergeFunctions.cpp
llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges-attr.ll
llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll
llvm/test/Transforms/MergeFunc/inline-asm.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/StructuralHash.h b/llvm/include/llvm/IR/StructuralHash.h
index e2e192cc9501b3..57fb45db849110 100644
--- a/llvm/include/llvm/IR/StructuralHash.h
+++ b/llvm/include/llvm/IR/StructuralHash.h
@@ -14,7 +14,6 @@
#ifndef LLVM_IR_STRUCTURALHASH_H
#define LLVM_IR_STRUCTURALHASH_H
-#include "llvm/ADT/StableHashing.h"
#include <cstdint>
namespace llvm {
@@ -22,18 +21,20 @@ namespace llvm {
class Function;
class Module;
+using IRHash = uint64_t;
+
/// Returns a hash of the function \p F.
/// \param F The function to hash.
/// \param DetailedHash Whether or not to encode additional information in the
/// hash. The additional information added into the hash when this flag is set
/// to true includes instruction and operand type information.
-stable_hash StructuralHash(const Function &F, bool DetailedHash = false);
+IRHash StructuralHash(const Function &F, bool DetailedHash = false);
/// Returns a hash of the module \p M by hashing all functions and global
/// variables contained within. \param M The module to hash. \param DetailedHash
/// Whether or not to encode additional information in the function hashes that
/// composed the module hash.
-stable_hash StructuralHash(const Module &M, bool DetailedHash = false);
+IRHash StructuralHash(const Module &M, bool DetailedHash = false);
} // end namespace llvm
diff --git a/llvm/lib/IR/StructuralHash.cpp b/llvm/lib/IR/StructuralHash.cpp
index 267a085c5af705..fb4f33a021a96b 100644
--- a/llvm/lib/IR/StructuralHash.cpp
+++ b/llvm/lib/IR/StructuralHash.cpp
@@ -24,93 +24,61 @@ namespace {
// by the MergeFunctions pass.
class StructuralHashImpl {
- stable_hash Hash = 4;
+ uint64_t Hash = 4;
- bool DetailedHash;
-
- // This random value acts as a block header, as otherwise the partition of
- // opcodes into BBs wouldn't affect the hash, only the order of the opcodes.
- static constexpr stable_hash BlockHeaderHash = 45798;
- static constexpr stable_hash FunctionHeaderHash = 0x62642d6b6b2d6b72;
- static constexpr stable_hash GlobalHeaderHash = 23456;
+ void hash(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); }
// This will produce
diff erent values on 32-bit and 64-bit systens as
// hash_combine returns a size_t. However, this is only used for
// detailed hashing which, in-tree, only needs to distinguish between
//
diff erences in functions.
- // TODO: This is not stable.
- template <typename T> stable_hash hashArbitaryType(const T &V) {
- return hash_combine(V);
+ template <typename T> void hashArbitaryType(const T &V) {
+ hash(hash_combine(V));
}
- stable_hash hashType(Type *ValueType) {
- SmallVector<stable_hash> Hashes;
- Hashes.emplace_back(ValueType->getTypeID());
+ void hashType(Type *ValueType) {
+ hash(ValueType->getTypeID());
if (ValueType->isIntegerTy())
- Hashes.emplace_back(ValueType->getIntegerBitWidth());
- return stable_hash_combine(Hashes);
+ hash(ValueType->getIntegerBitWidth());
}
public:
- StructuralHashImpl() = delete;
- explicit StructuralHashImpl(bool DetailedHash) : DetailedHash(DetailedHash) {}
-
- stable_hash hashConstant(Constant *C) {
- SmallVector<stable_hash> Hashes;
- // TODO: hashArbitaryType() is not stable.
- if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(C)) {
- Hashes.emplace_back(hashArbitaryType(ConstInt->getValue()));
- } else if (ConstantFP *ConstFP = dyn_cast<ConstantFP>(C)) {
- Hashes.emplace_back(hashArbitaryType(ConstFP->getValue()));
- } else if (Function *Func = dyn_cast<Function>(C)) {
+ StructuralHashImpl() = default;
+
+ void updateOperand(Value *Operand) {
+ hashType(Operand->getType());
+
+ // The cases enumerated below are not exhaustive and are only aimed to
+ // get decent coverage over the function.
+ if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(Operand)) {
+ hashArbitaryType(ConstInt->getValue());
+ } else if (ConstantFP *ConstFP = dyn_cast<ConstantFP>(Operand)) {
+ hashArbitaryType(ConstFP->getValue());
+ } else if (Argument *Arg = dyn_cast<Argument>(Operand)) {
+ hash(Arg->getArgNo());
+ } else if (Function *Func = dyn_cast<Function>(Operand)) {
// Hashing the name will be deterministic as LLVM's hashing infrastructure
// has explicit support for hashing strings and will not simply hash
// the pointer.
- Hashes.emplace_back(hashArbitaryType(Func->getName()));
+ hashArbitaryType(Func->getName());
}
-
- return stable_hash_combine(Hashes);
- }
-
- stable_hash hashValue(Value *V) {
- // Check constant and return its hash.
- Constant *C = dyn_cast<Constant>(V);
- if (C)
- return hashConstant(C);
-
- // Hash argument number.
- SmallVector<stable_hash> Hashes;
- if (Argument *Arg = dyn_cast<Argument>(V))
- Hashes.emplace_back(Arg->getArgNo());
-
- return stable_hash_combine(Hashes);
}
- stable_hash hashOperand(Value *Operand) {
- SmallVector<stable_hash> Hashes;
- Hashes.emplace_back(hashType(Operand->getType()));
- Hashes.emplace_back(hashValue(Operand));
- return stable_hash_combine(Hashes);
- }
-
- stable_hash hashInstruction(const Instruction &Inst) {
- SmallVector<stable_hash> Hashes;
- Hashes.emplace_back(Inst.getOpcode());
+ void updateInstruction(const Instruction &Inst, bool DetailedHash) {
+ hash(Inst.getOpcode());
if (!DetailedHash)
- return stable_hash_combine(Hashes);
+ return;
- Hashes.emplace_back(hashType(Inst.getType()));
+ hashType(Inst.getType());
// Handle additional properties of specific instructions that cause
// semantic
diff erences in the IR.
if (const auto *ComparisonInstruction = dyn_cast<CmpInst>(&Inst))
- Hashes.emplace_back(ComparisonInstruction->getPredicate());
+ hash(ComparisonInstruction->getPredicate());
for (const auto &Op : Inst.operands())
- Hashes.emplace_back(hashOperand(Op));
-
- return stable_hash_combine(Hashes);
+ updateOperand(Op);
}
// A function hash is calculated by considering only the number of arguments
@@ -129,17 +97,15 @@ class StructuralHashImpl {
// expensive checks for pass modification status). When modifying this
// function, most changes should be gated behind an option and enabled
// selectively.
- void update(const Function &F) {
+ void update(const Function &F, bool DetailedHash) {
// Declarations don't affect analyses.
if (F.isDeclaration())
return;
- SmallVector<stable_hash> Hashes;
- Hashes.emplace_back(Hash);
- Hashes.emplace_back(FunctionHeaderHash);
+ hash(0x62642d6b6b2d6b72); // Function header
- Hashes.emplace_back(F.isVarArg());
- Hashes.emplace_back(F.arg_size());
+ hash(F.isVarArg());
+ hash(F.arg_size());
SmallVector<const BasicBlock *, 8> BBs;
SmallPtrSet<const BasicBlock *, 16> VisitedBBs;
@@ -152,17 +118,17 @@ class StructuralHashImpl {
while (!BBs.empty()) {
const BasicBlock *BB = BBs.pop_back_val();
- Hashes.emplace_back(BlockHeaderHash);
+ // This random value acts as a block header, as otherwise the partition of
+ // opcodes into BBs wouldn't affect the hash, only the order of the
+ // opcodes
+ hash(45798);
for (auto &Inst : *BB)
- Hashes.emplace_back(hashInstruction(Inst));
+ updateInstruction(Inst, DetailedHash);
for (const BasicBlock *Succ : successors(BB))
if (VisitedBBs.insert(Succ).second)
BBs.push_back(Succ);
}
-
- // Update the combined hash in place.
- Hash = stable_hash_combine(Hashes);
}
void update(const GlobalVariable &GV) {
@@ -171,20 +137,15 @@ class StructuralHashImpl {
// we ignore anything with the `.llvm` prefix
if (GV.isDeclaration() || GV.getName().starts_with("llvm."))
return;
- SmallVector<stable_hash> Hashes;
- Hashes.emplace_back(Hash);
- Hashes.emplace_back(GlobalHeaderHash);
- Hashes.emplace_back(GV.getValueType()->getTypeID());
-
- // Update the combined hash in place.
- Hash = stable_hash_combine(Hashes);
+ hash(23456); // Global header
+ hash(GV.getValueType()->getTypeID());
}
- void update(const Module &M) {
+ void update(const Module &M, bool DetailedHash) {
for (const GlobalVariable &GV : M.globals())
update(GV);
for (const Function &F : M)
- update(F);
+ update(F, DetailedHash);
}
uint64_t getHash() const { return Hash; }
@@ -192,14 +153,14 @@ class StructuralHashImpl {
} // namespace
-stable_hash llvm::StructuralHash(const Function &F, bool DetailedHash) {
- StructuralHashImpl H(DetailedHash);
- H.update(F);
+IRHash llvm::StructuralHash(const Function &F, bool DetailedHash) {
+ StructuralHashImpl H;
+ H.update(F, DetailedHash);
return H.getHash();
}
-stable_hash llvm::StructuralHash(const Module &M, bool DetailedHash) {
- StructuralHashImpl H(DetailedHash);
- H.update(M);
+IRHash llvm::StructuralHash(const Module &M, bool DetailedHash) {
+ StructuralHashImpl H;
+ H.update(M, DetailedHash);
return H.getHash();
}
diff --git a/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/llvm/lib/Transforms/IPO/MergeFunctions.cpp
index ad16b0b3501495..b50a700e09038f 100644
--- a/llvm/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/llvm/lib/Transforms/IPO/MergeFunctions.cpp
@@ -172,14 +172,14 @@ namespace {
class FunctionNode {
mutable AssertingVH<Function> F;
- stable_hash Hash;
+ IRHash Hash;
public:
// Note the hash is recalculated potentially multiple times, but it is cheap.
FunctionNode(Function *F) : F(F), Hash(StructuralHash(*F)) {}
Function *getFunc() const { return F; }
- stable_hash getHash() const { return Hash; }
+ IRHash getHash() const { return Hash; }
/// Replace the reference to the function F by the function G, assuming their
/// implementations are equal.
@@ -420,7 +420,7 @@ bool MergeFunctions::runOnModule(Module &M) {
// All functions in the module, ordered by hash. Functions with a unique
// hash value are easily eliminated.
- std::vector<std::pair<stable_hash, Function *>> HashedFuncs;
+ std::vector<std::pair<IRHash, Function *>> HashedFuncs;
for (Function &Func : M) {
if (isEligibleForMerging(Func)) {
HashedFuncs.push_back({StructuralHash(Func), &Func});
diff --git a/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges-attr.ll b/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges-attr.ll
index cbf14165548ec5..e5d62319bf9db7 100644
--- a/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges-attr.ll
+++ b/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges-attr.ll
@@ -80,8 +80,8 @@ lpad:
}
define i8 @invoke_with_same_range() personality ptr undef {
-; CHECK-DAG: @invoke_with_same_range()
-; CHECK-DAG: tail call i8 @invoke_with_range()
+; CHECK-LABEL: @invoke_with_same_range()
+; CHECK: tail call i8 @invoke_with_range()
%out = invoke range(i8 0, 2) i8 @dummy() to label %next unwind label %lpad
next:
@@ -93,15 +93,15 @@ lpad:
}
define i8 @call_with_same_range() {
-; CHECK-DAG: @call_with_same_range
-; CHECK-DAG: tail call i8 @call_with_range
+; CHECK-LABEL: @call_with_same_range
+; CHECK: tail call i8 @call_with_range
%out = call range(i8 0, 2) i8 @dummy()
ret i8 %out
}
define i8 @call_with_same_range_attr(i8 range(i8 0, 2) %v) {
-; CHECK-DAG: @call_with_same_range_attr
-; CHECK-DAG: tail call i8 @call_with_range_attr
+; CHECK-LABEL: @call_with_same_range_attr
+; CHECK: tail call i8 @call_with_range_attr
%out = call i8 @dummy2(i8 %v)
ret i8 %out
}
diff --git a/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll b/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll
index 39e5a11181a4f0..e7718ca84d3165 100644
--- a/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll
+++ b/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll
@@ -64,8 +64,8 @@ lpad:
}
define i8 @invoke_with_same_range() personality ptr undef {
-; CHECK-DAG: @invoke_with_same_range()
-; CHECK-DAG: tail call i8 @invoke_with_range()
+; CHECK-LABEL: @invoke_with_same_range()
+; CHECK: tail call i8 @invoke_with_range()
%out = invoke i8 @dummy() to label %next unwind label %lpad, !range !0
next:
@@ -77,8 +77,8 @@ lpad:
}
define i8 @call_with_same_range() {
-; CHECK-DAG: @call_with_same_range
-; CHECK-DAG: tail call i8 @call_with_range
+; CHECK-LABEL: @call_with_same_range
+; CHECK: tail call i8 @call_with_range
bitcast i8 0 to i8
%out = call i8 @dummy(), !range !0
ret i8 %out
diff --git a/llvm/test/Transforms/MergeFunc/inline-asm.ll b/llvm/test/Transforms/MergeFunc/inline-asm.ll
index 970757e8d53afb..7cc6afd2f8f7bd 100644
--- a/llvm/test/Transforms/MergeFunc/inline-asm.ll
+++ b/llvm/test/Transforms/MergeFunc/inline-asm.ll
@@ -3,11 +3,11 @@
; CHECK-LABEL: @int_ptr_arg_
diff erent
; CHECK-NEXT: call void asm
-; CHECK-DAG: @int_ptr_null
-; CHECK-DAG: tail call void @float_ptr_null()
+; CHECK-LABEL: @int_ptr_null
+; CHECK-NEXT: tail call void @float_ptr_null()
-; CHECK-DAG: @int_ptr_arg_same
-; CHECK-DAG: tail call void @float_ptr_arg_same(ptr %0)
+; CHECK-LABEL: @int_ptr_arg_same
+; CHECK-NEXT: tail call void @float_ptr_arg_same(ptr %0)
; Used to satisfy minimum size limit
declare void @stuff()
More information about the llvm-commits
mailing list