[llvm] 7ff7df1 - Revert "[NFCi][MergeFunctions] Consolidate Hashing Functions"
Aiden Grossman via llvm-commits
llvm-commits at lists.llvm.org
Sat Aug 19 12:23:53 PDT 2023
Author: Aiden Grossman
Date: 2023-08-19T12:23:16-07:00
New Revision: 7ff7df1c629a78f6c87a0bfbc7e5d7c2714283fe
URL: https://github.com/llvm/llvm-project/commit/7ff7df1c629a78f6c87a0bfbc7e5d7c2714283fe
DIFF: https://github.com/llvm/llvm-project/commit/7ff7df1c629a78f6c87a0bfbc7e5d7c2714283fe.diff
LOG: Revert "[NFCi][MergeFunctions] Consolidate Hashing Functions"
This reverts commit 28134a29fdedd8972acdfb39223571ddcc15dc59.
This patch was causing build failures on multiple buildbots on 32-bit
architectures. Reverting now so I can deboug out-of-trunk and resubmit
later.
Added:
Modified:
llvm/include/llvm/IR/StructuralHash.h
llvm/include/llvm/Transforms/Utils/FunctionComparator.h
llvm/lib/IR/StructuralHash.cpp
llvm/lib/Transforms/IPO/MergeFunctions.cpp
llvm/lib/Transforms/Utils/FunctionComparator.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/StructuralHash.h b/llvm/include/llvm/IR/StructuralHash.h
index 8739c599a7ac33..1bdeb85afa3c50 100644
--- a/llvm/include/llvm/IR/StructuralHash.h
+++ b/llvm/include/llvm/IR/StructuralHash.h
@@ -21,10 +21,8 @@ namespace llvm {
class Function;
class Module;
-using IRHash = uint64_t;
-
-IRHash StructuralHash(const Function &F);
-IRHash StructuralHash(const Module &M);
+uint64_t StructuralHash(const Function &F);
+uint64_t StructuralHash(const Module &M);
} // end namespace llvm
diff --git a/llvm/include/llvm/Transforms/Utils/FunctionComparator.h b/llvm/include/llvm/Transforms/Utils/FunctionComparator.h
index c28f868039a1f7..78761fc78fee8c 100644
--- a/llvm/include/llvm/Transforms/Utils/FunctionComparator.h
+++ b/llvm/include/llvm/Transforms/Utils/FunctionComparator.h
@@ -99,6 +99,11 @@ class FunctionComparator {
/// Test whether the two functions have equivalent behaviour.
int compare();
+ /// Hash a function. Equivalent functions will have the same hash, and unequal
+ /// functions will have
diff erent hashes with high probability.
+ using FunctionHash = uint64_t;
+ static FunctionHash functionHash(Function &);
+
protected:
/// Start the comparison.
void beginCompare() {
diff --git a/llvm/lib/IR/StructuralHash.cpp b/llvm/lib/IR/StructuralHash.cpp
index 5ba64e0cc0d35f..6ea108d831a165 100644
--- a/llvm/lib/IR/StructuralHash.cpp
+++ b/llvm/lib/IR/StructuralHash.cpp
@@ -27,28 +27,12 @@ class StructuralHashImpl {
public:
StructuralHashImpl() : Hash(4) {}
- // A function hash is calculated by considering only the number of arguments
- // and whether a function is varargs, the order of basic blocks (given by the
- // successors of each basic block in depth first order), and the order of
- // opcodes of each instruction within each of these basic blocks. This mirrors
- // the strategy FunctionComparator::compare() uses to compare functions by
- // walking the BBs in depth first order and comparing each instruction in
- // sequence. Because this hash currently does not look at the operands, it is
- // insensitive to things such as the target of calls and the constants used in
- // the function, which makes it useful when possibly merging functions which
- // are the same modulo constants and call targets.
- //
- // Note that
diff erent users of StructuralHash will want
diff erent behavior
- // out of it (i.e., MergeFunctions will want something
diff erent from PM
- // expensive checks for pass modification status). When modifying this
- // function, most changes should be gated behind an option and enabled
- // selectively.
void update(const Function &F) {
// Declarations don't affect analyses.
if (F.isDeclaration())
return;
- hash(0x6acaa36bef8325c5ULL); // Function header
+ hash(12345); // Function header
hash(F.isVarArg());
hash(F.arg_size());
@@ -56,18 +40,11 @@ class StructuralHashImpl {
SmallVector<const BasicBlock *, 8> BBs;
SmallPtrSet<const BasicBlock *, 16> VisitedBBs;
- // Walk the blocks in the same order as
- // FunctionComparator::cmpBasicBlocks(), accumulating the hash of the
- // function "structure." (BB and opcode sequence)
BBs.push_back(&F.getEntryBlock());
VisitedBBs.insert(BBs[0]);
while (!BBs.empty()) {
const BasicBlock *BB = BBs.pop_back_val();
-
- // This random value acts as a block header, as otherwise the partition of
- // opcodes into BBs wouldn't affect the hash, only the order of the
- // opcodes
- hash(45798);
+ hash(45798); // Block header
for (auto &Inst : *BB)
hash(Inst.getOpcode());
@@ -102,13 +79,13 @@ class StructuralHashImpl {
} // namespace
-IRHash llvm::StructuralHash(const Function &F) {
+uint64_t llvm::StructuralHash(const Function &F) {
StructuralHashImpl H;
H.update(F);
return H.getHash();
}
-IRHash llvm::StructuralHash(const Module &M) {
+uint64_t llvm::StructuralHash(const Module &M) {
StructuralHashImpl H;
H.update(M);
return H.getHash();
diff --git a/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/llvm/lib/Transforms/IPO/MergeFunctions.cpp
index 4a64580b3cb329..feda5d6459cb47 100644
--- a/llvm/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/llvm/lib/Transforms/IPO/MergeFunctions.cpp
@@ -107,7 +107,6 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
-#include "llvm/IR/StructuralHash.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
@@ -172,14 +171,15 @@ namespace {
class FunctionNode {
mutable AssertingVH<Function> F;
- IRHash Hash;
+ FunctionComparator::FunctionHash Hash;
public:
// Note the hash is recalculated potentially multiple times, but it is cheap.
- FunctionNode(Function *F) : F(F), Hash(StructuralHash(*F)) {}
+ FunctionNode(Function *F)
+ : F(F), Hash(FunctionComparator::functionHash(*F)) {}
Function *getFunc() const { return F; }
- IRHash getHash() const { return Hash; }
+ FunctionComparator::FunctionHash getHash() const { return Hash; }
/// Replace the reference to the function F by the function G, assuming their
/// implementations are equal.
@@ -390,10 +390,11 @@ bool MergeFunctions::runOnModule(Module &M) {
// All functions in the module, ordered by hash. Functions with a unique
// hash value are easily eliminated.
- std::vector<std::pair<IRHash, Function *>> HashedFuncs;
+ std::vector<std::pair<FunctionComparator::FunctionHash, Function *>>
+ HashedFuncs;
for (Function &Func : M) {
if (isEligibleForMerging(Func)) {
- HashedFuncs.push_back({StructuralHash(Func), &Func});
+ HashedFuncs.push_back({FunctionComparator::functionHash(Func), &Func});
}
}
diff --git a/llvm/lib/Transforms/Utils/FunctionComparator.cpp b/llvm/lib/Transforms/Utils/FunctionComparator.cpp
index b1d74f67377e27..8daeb92130ba3c 100644
--- a/llvm/lib/Transforms/Utils/FunctionComparator.cpp
+++ b/llvm/lib/Transforms/Utils/FunctionComparator.cpp
@@ -958,3 +958,67 @@ int FunctionComparator::compare() {
}
return 0;
}
+
+namespace {
+
+// Accumulate the hash of a sequence of 64-bit integers. This is similar to a
+// hash of a sequence of 64bit ints, but the entire input does not need to be
+// available at once. This interface is necessary for functionHash because it
+// needs to accumulate the hash as the structure of the function is traversed
+// without saving these values to an intermediate buffer. This form of hashing
+// is not often needed, as usually the object to hash is just read from a
+// buffer.
+class HashAccumulator64 {
+ uint64_t Hash;
+
+public:
+ // Initialize to random constant, so the state isn't zero.
+ HashAccumulator64() { Hash = 0x6acaa36bef8325c5ULL; }
+
+ void add(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); }
+
+ // No finishing is required, because the entire hash value is used.
+ uint64_t getHash() { return Hash; }
+};
+
+} // end anonymous namespace
+
+// A function hash is calculated by considering only the number of arguments and
+// whether a function is varargs, the order of basic blocks (given by the
+// successors of each basic block in depth first order), and the order of
+// opcodes of each instruction within each of these basic blocks. This mirrors
+// the strategy compare() uses to compare functions by walking the BBs in depth
+// first order and comparing each instruction in sequence. Because this hash
+// does not look at the operands, it is insensitive to things such as the
+// target of calls and the constants used in the function, which makes it useful
+// when possibly merging functions which are the same modulo constants and call
+// targets.
+FunctionComparator::FunctionHash FunctionComparator::functionHash(Function &F) {
+ HashAccumulator64 H;
+ H.add(F.isVarArg());
+ H.add(F.arg_size());
+
+ SmallVector<const BasicBlock *, 8> BBs;
+ SmallPtrSet<const BasicBlock *, 16> VisitedBBs;
+
+ // Walk the blocks in the same order as FunctionComparator::cmpBasicBlocks(),
+ // accumulating the hash of the function "structure." (BB and opcode sequence)
+ BBs.push_back(&F.getEntryBlock());
+ VisitedBBs.insert(BBs[0]);
+ while (!BBs.empty()) {
+ const BasicBlock *BB = BBs.pop_back_val();
+ // This random value acts as a block header, as otherwise the partition of
+ // opcodes into BBs wouldn't affect the hash, only the order of the opcodes
+ H.add(45798);
+ for (const auto &Inst : *BB) {
+ H.add(Inst.getOpcode());
+ }
+ const Instruction *Term = BB->getTerminator();
+ for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
+ if (!VisitedBBs.insert(Term->getSuccessor(i)).second)
+ continue;
+ BBs.push_back(Term->getSuccessor(i));
+ }
+ }
+ return H.getHash();
+}
More information about the llvm-commits
mailing list