[llvm] [StructuralHash] Support Differences (PR #112638)
Kyungwoo Lee via llvm-commits
llvm-commits at lists.llvm.org
Sat Oct 26 12:37:44 PDT 2024
https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/112638
>From 14f0691bbf62959290a2eb2203d7eb3a133615f4 Mon Sep 17 00:00:00 2001
From: Kyungwoo Lee <kyulee at meta.com>
Date: Wed, 16 Oct 2024 17:09:07 -0700
Subject: [PATCH 1/5] [StructuralHash] Support Differences
This comutes a structural hash while allowing for selective ignoring of
certain operands based on a custom function that is provided.
Instead of a single hash value, it now returns FunctionHashInfo which
includes a hash value, an instruction mapping, and a map to track the
operand location and its corresponding hash value that is ignored.
---
llvm/include/llvm/IR/StructuralHash.h | 45 ++++++
llvm/lib/IR/StructuralHash.cpp | 194 ++++++++++++++++++++---
llvm/unittests/IR/StructuralHashTest.cpp | 55 +++++++
3 files changed, 272 insertions(+), 22 deletions(-)
diff --git a/llvm/include/llvm/IR/StructuralHash.h b/llvm/include/llvm/IR/StructuralHash.h
index e2e192cc9501b3..071575137ff572 100644
--- a/llvm/include/llvm/IR/StructuralHash.h
+++ b/llvm/include/llvm/IR/StructuralHash.h
@@ -14,7 +14,9 @@
#ifndef LLVM_IR_STRUCTURALHASH_H
#define LLVM_IR_STRUCTURALHASH_H
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/StableHashing.h"
+#include "llvm/IR/Instruction.h"
#include <cstdint>
namespace llvm {
@@ -35,6 +37,49 @@ stable_hash StructuralHash(const Function &F, bool DetailedHash = false);
/// composed the module hash.
stable_hash StructuralHash(const Module &M, bool DetailedHash = false);
+/// The pair of an instruction index and a operand index.
+using IndexPair = std::pair<unsigned, unsigned>;
+
+/// A map from an instruction index to an instruction pointer.
+using IndexInstrMap = MapVector<unsigned, Instruction *>;
+
+/// A map from an IndexPair to a stable hash.
+using IndexOperandHashMapType = DenseMap<IndexPair, stable_hash>;
+
+/// A function that takes an instruction and an operand index and returns true
+/// if the operand should be ignored in the function hash computation.
+using IgnoreOperandFunc = std::function<bool(const Instruction *, unsigned)>;
+
+struct FunctionHashInfo {
+ /// A hash value representing the structural content of the function
+ stable_hash FunctionHash;
+ /// A mapping from instruction indices to instruction pointers
+ std::unique_ptr<IndexInstrMap> IndexInstruction;
+ /// A mapping from pairs of instruction indices and operand indices
+ /// to the hashes of the operands. This can be used to analyze or
+ /// reconstruct the differences in ignored operands
+ std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap;
+
+ FunctionHashInfo(stable_hash FuntionHash,
+ std::unique_ptr<IndexInstrMap> IndexInstruction,
+ std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap)
+ : FunctionHash(FuntionHash),
+ IndexInstruction(std::move(IndexInstruction)),
+ IndexOperandHashMap(std::move(IndexOperandHashMap)) {}
+};
+
+/// Computes a structural hash of a given function, considering the structure
+/// and content of the function's instructions while allowing for selective
+/// ignoring of certain operands based on custom criteria. This hash can be used
+/// to identify functions that are structurally similar or identical, which is
+/// useful in optimizations, deduplication, or analysis tasks.
+/// \param F The function to hash.
+/// \param IgnoreOp A callable that takes an instruction and an operand index,
+/// and returns true if the operand should be ignored in the hash computation.
+/// \return A FunctionHashInfo structure
+FunctionHashInfo StructuralHashWithDifferences(const Function &F,
+ IgnoreOperandFunc IgnoreOp);
+
} // end namespace llvm
#endif
diff --git a/llvm/lib/IR/StructuralHash.cpp b/llvm/lib/IR/StructuralHash.cpp
index 267a085c5af705..db54a848fd50dd 100644
--- a/llvm/lib/IR/StructuralHash.cpp
+++ b/llvm/lib/IR/StructuralHash.cpp
@@ -34,14 +34,18 @@ class StructuralHashImpl {
static constexpr stable_hash FunctionHeaderHash = 0x62642d6b6b2d6b72;
static constexpr stable_hash GlobalHeaderHash = 23456;
- // This will produce different values on 32-bit and 64-bit systens as
- // hash_combine returns a size_t. However, this is only used for
- // detailed hashing which, in-tree, only needs to distinguish between
- // differences in functions.
- // TODO: This is not stable.
- template <typename T> stable_hash hashArbitaryType(const T &V) {
- return hash_combine(V);
- }
+ /// IgnoreOp is a function that returns true if the operand should be ignored.
+ IgnoreOperandFunc IgnoreOp = nullptr;
+ /// A mapping from instruction indices to instruction pointers.
+ /// The index represents the position of an instruction based on the order in
+ /// which it is first encountered.
+ std::unique_ptr<IndexInstrMap> IndexInstruction = nullptr;
+ /// A mapping from pairs of instruction indices and operand indices
+ /// to the hashes of the operands.
+ std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap = nullptr;
+
+ /// Assign a unique ID to each Value in the order they are first seen.
+ DenseMap<const Value *, int> ValueToId;
stable_hash hashType(Type *ValueType) {
SmallVector<stable_hash> Hashes;
@@ -53,23 +57,138 @@ class StructuralHashImpl {
public:
StructuralHashImpl() = delete;
- explicit StructuralHashImpl(bool DetailedHash) : DetailedHash(DetailedHash) {}
+ explicit StructuralHashImpl(bool DetailedHash,
+ IgnoreOperandFunc IgnoreOp = nullptr)
+ : DetailedHash(DetailedHash), IgnoreOp(IgnoreOp) {
+ if (IgnoreOp) {
+ IndexInstruction = std::make_unique<IndexInstrMap>();
+ IndexOperandHashMap = std::make_unique<IndexOperandHashMapType>();
+ }
+ }
+
+ stable_hash hashAPInt(const APInt &I) {
+ SmallVector<stable_hash> Hashes;
+ Hashes.emplace_back(I.getBitWidth());
+ for (unsigned J = 0; J < I.getNumWords(); ++J)
+ Hashes.emplace_back((I.getRawData())[J]);
+ return stable_hash_combine(Hashes);
+ }
+
+ stable_hash hashAPFloat(const APFloat &F) {
+ SmallVector<stable_hash> Hashes;
+ const fltSemantics &S = F.getSemantics();
+ Hashes.emplace_back(APFloat::semanticsPrecision(S));
+ Hashes.emplace_back(APFloat::semanticsMaxExponent(S));
+ Hashes.emplace_back(APFloat::semanticsMinExponent(S));
+ Hashes.emplace_back(APFloat::semanticsSizeInBits(S));
+ Hashes.emplace_back(hashAPInt(F.bitcastToAPInt()));
+ return stable_hash_combine(Hashes);
+ }
+
+ stable_hash hashGlobalValue(const GlobalValue *GV) {
+ if (!GV->hasName())
+ return 0;
+ return stable_hash_name(GV->getName());
+ }
+ // Compute a hash for a Constant. This function is logically similar to
+ // FunctionComparator::cmpConstants() in FunctionComparator.cpp, but here
+ // we're interested in computing a hash rather than comparing two Constants.
+ // Some of the logic is simplified, e.g, we don't expand GEPOperator.
stable_hash hashConstant(Constant *C) {
SmallVector<stable_hash> Hashes;
- // TODO: hashArbitaryType() is not stable.
- if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(C)) {
- Hashes.emplace_back(hashArbitaryType(ConstInt->getValue()));
- } else if (ConstantFP *ConstFP = dyn_cast<ConstantFP>(C)) {
- Hashes.emplace_back(hashArbitaryType(ConstFP->getValue()));
- } else if (Function *Func = dyn_cast<Function>(C)) {
- // Hashing the name will be deterministic as LLVM's hashing infrastructure
- // has explicit support for hashing strings and will not simply hash
- // the pointer.
- Hashes.emplace_back(hashArbitaryType(Func->getName()));
+
+ Type *Ty = C->getType();
+ Hashes.emplace_back(hashType(Ty));
+
+ if (C->isNullValue()) {
+ Hashes.emplace_back(static_cast<stable_hash>('N'));
+ return stable_hash_combine(Hashes);
}
- return stable_hash_combine(Hashes);
+ auto *G = dyn_cast<GlobalValue>(C);
+ if (G) {
+ Hashes.emplace_back(hashGlobalValue(G));
+ return stable_hash_combine(Hashes);
+ }
+
+ if (const auto *Seq = dyn_cast<ConstantDataSequential>(C)) {
+ Hashes.emplace_back(xxh3_64bits(Seq->getRawDataValues()));
+ return stable_hash_combine(Hashes);
+ }
+
+ switch (C->getValueID()) {
+ case Value::UndefValueVal:
+ case Value::PoisonValueVal:
+ case Value::ConstantTokenNoneVal: {
+ return stable_hash_combine(Hashes);
+ }
+ case Value::ConstantIntVal: {
+ const APInt &Int = cast<ConstantInt>(C)->getValue();
+ Hashes.emplace_back(hashAPInt(Int));
+ return stable_hash_combine(Hashes);
+ }
+ case Value::ConstantFPVal: {
+ const APFloat &APF = cast<ConstantFP>(C)->getValueAPF();
+ Hashes.emplace_back(hashAPFloat(APF));
+ return stable_hash_combine(Hashes);
+ }
+ case Value::ConstantArrayVal: {
+ const ConstantArray *A = cast<ConstantArray>(C);
+ uint64_t NumElements = cast<ArrayType>(Ty)->getNumElements();
+ Hashes.emplace_back(NumElements);
+ for (auto &Op : A->operands()) {
+ auto H = hashConstant(cast<Constant>(Op));
+ Hashes.emplace_back(H);
+ }
+ return stable_hash_combine(Hashes);
+ }
+ case Value::ConstantStructVal: {
+ const ConstantStruct *S = cast<ConstantStruct>(C);
+ unsigned NumElements = cast<StructType>(Ty)->getNumElements();
+ Hashes.emplace_back(NumElements);
+ for (auto &Op : S->operands()) {
+ auto H = hashConstant(cast<Constant>(Op));
+ Hashes.emplace_back(H);
+ }
+ return stable_hash_combine(Hashes);
+ }
+ case Value::ConstantVectorVal: {
+ const ConstantVector *V = cast<ConstantVector>(C);
+ unsigned NumElements = cast<FixedVectorType>(Ty)->getNumElements();
+ Hashes.emplace_back(NumElements);
+ for (auto &Op : V->operands()) {
+ auto H = hashConstant(cast<Constant>(Op));
+ Hashes.emplace_back(H);
+ }
+ return stable_hash_combine(Hashes);
+ }
+ case Value::ConstantExprVal: {
+ const ConstantExpr *E = cast<ConstantExpr>(C);
+ unsigned NumOperands = E->getNumOperands();
+ Hashes.emplace_back(NumOperands);
+ for (auto &Op : E->operands()) {
+ auto H = hashConstant(cast<Constant>(Op));
+ Hashes.emplace_back(H);
+ }
+ return stable_hash_combine(Hashes);
+ }
+ case Value::BlockAddressVal: {
+ const BlockAddress *BA = cast<BlockAddress>(C);
+ auto H = hashGlobalValue(BA->getFunction());
+ Hashes.emplace_back(H);
+ return stable_hash_combine(Hashes);
+ }
+ case Value::DSOLocalEquivalentVal: {
+ const auto *Equiv = cast<DSOLocalEquivalent>(C);
+ auto H = hashGlobalValue(Equiv->getGlobalValue());
+ Hashes.emplace_back(H);
+ return stable_hash_combine(Hashes);
+ }
+ default: // Unknown constant, abort.
+ llvm_unreachable("Constant ValueID not recognized.");
+ }
+ return Hash;
}
stable_hash hashValue(Value *V) {
@@ -83,6 +202,10 @@ class StructuralHashImpl {
if (Argument *Arg = dyn_cast<Argument>(V))
Hashes.emplace_back(Arg->getArgNo());
+ // Get an index (an insertion order) for the non-constant value.
+ auto I = ValueToId.insert({V, ValueToId.size()});
+ Hashes.emplace_back(I.first->second);
+
return stable_hash_combine(Hashes);
}
@@ -107,8 +230,20 @@ class StructuralHashImpl {
if (const auto *ComparisonInstruction = dyn_cast<CmpInst>(&Inst))
Hashes.emplace_back(ComparisonInstruction->getPredicate());
- for (const auto &Op : Inst.operands())
- Hashes.emplace_back(hashOperand(Op));
+ unsigned InstIdx = 0;
+ if (IndexInstruction) {
+ InstIdx = IndexInstruction->size();
+ IndexInstruction->insert({InstIdx, const_cast<Instruction *>(&Inst)});
+ }
+
+ for (const auto [OpndIdx, Op] : enumerate(Inst.operands())) {
+ auto OpndHash = hashOperand(Op);
+ if (IgnoreOp && IgnoreOp(&Inst, OpndIdx)) {
+ assert(IndexOperandHashMap);
+ IndexOperandHashMap->insert({{InstIdx, OpndIdx}, OpndHash});
+ } else
+ Hashes.emplace_back(OpndHash);
+ }
return stable_hash_combine(Hashes);
}
@@ -188,6 +323,12 @@ class StructuralHashImpl {
}
uint64_t getHash() const { return Hash; }
+ std::unique_ptr<IndexInstrMap> getIndexInstrMap() {
+ return std::move(IndexInstruction);
+ }
+ std::unique_ptr<IndexOperandHashMapType> getIndexPairOpndHashMap() {
+ return std::move(IndexOperandHashMap);
+ }
};
} // namespace
@@ -203,3 +344,12 @@ stable_hash llvm::StructuralHash(const Module &M, bool DetailedHash) {
H.update(M);
return H.getHash();
}
+
+FunctionHashInfo
+llvm::StructuralHashWithDifferences(const Function &F,
+ IgnoreOperandFunc IgnoreOp) {
+ StructuralHashImpl H(/*DetailedHash=*/true, IgnoreOp);
+ H.update(F);
+ return FunctionHashInfo(H.getHash(), H.getIndexInstrMap(),
+ H.getIndexPairOpndHashMap());
+}
diff --git a/llvm/unittests/IR/StructuralHashTest.cpp b/llvm/unittests/IR/StructuralHashTest.cpp
index 64e66aa5f97a6d..e9f18ed40bc65b 100644
--- a/llvm/unittests/IR/StructuralHashTest.cpp
+++ b/llvm/unittests/IR/StructuralHashTest.cpp
@@ -239,4 +239,59 @@ TEST(StructuralHashTest, ArgumentNumber) {
EXPECT_EQ(StructuralHash(*M1), StructuralHash(*M2));
EXPECT_NE(StructuralHash(*M1, true), StructuralHash(*M2, true));
}
+
+TEST(StructuralHashTest, Differences) {
+ LLVMContext Ctx;
+ std::unique_ptr<Module> M1 = parseIR(Ctx, "define i64 @f(i64 %a) {\n"
+ " %c = add i64 %a, 1\n"
+ " %b = call i64 @f1(i64 %c)\n"
+ " ret i64 %b\n"
+ "}\n"
+ "declare i64 @f1(i64)");
+ auto *F1 = M1->getFunction("f");
+ std::unique_ptr<Module> M2 = parseIR(Ctx, "define i64 @g(i64 %a) {\n"
+ " %c = add i64 %a, 1\n"
+ " %b = call i64 @f2(i64 %c)\n"
+ " ret i64 %b\n"
+ "}\n"
+ "declare i64 @f2(i64)");
+ auto *F2 = M2->getFunction("g");
+
+ // They are originally different when not ignoring any operand.
+ EXPECT_NE(StructuralHash(*F1, true), StructuralHash(*F2, true));
+ EXPECT_NE(StructuralHashWithDifferences(*F1, nullptr).FunctionHash,
+ StructuralHashWithDifferences(*F2, nullptr).FunctionHash);
+
+ // When we ignore the call target f1 vs f2, they have the same hash.
+ auto IgnoreOp = [&](const Instruction *I, unsigned OpndIdx) {
+ return I->getOpcode() == Instruction::Call && OpndIdx == 1;
+ };
+ auto FuncHashInfo1 = StructuralHashWithDifferences(*F1, IgnoreOp);
+ auto FuncHashInfo2 = StructuralHashWithDifferences(*F2, IgnoreOp);
+ EXPECT_EQ(FuncHashInfo1.FunctionHash, FuncHashInfo2.FunctionHash);
+
+ // There are total 3 instructions.
+ EXPECT_EQ(FuncHashInfo1.IndexInstruction->size(), 3u);
+ EXPECT_EQ(FuncHashInfo2.IndexInstruction->size(), 3u);
+
+ // The only 1 operand (the call target) has been ignored.
+ EXPECT_EQ(FuncHashInfo1.IndexOperandHashMap->size(), 1u);
+ EXPECT_EQ(FuncHashInfo2.IndexOperandHashMap->size(), 1u);
+
+ // The index pair of instruction and operand (1, 1) is a key in the map.
+ ASSERT_TRUE(FuncHashInfo1.IndexOperandHashMap->count({1, 1}));
+ ASSERT_TRUE(FuncHashInfo2.IndexOperandHashMap->count({1, 1}));
+
+ // The indexed instruciton must be the call instruction as shown in the
+ // IgnoreOp above.
+ EXPECT_EQ(FuncHashInfo1.IndexInstruction->lookup(1)->getOpcode(),
+ Instruction::Call);
+ EXPECT_EQ(FuncHashInfo2.IndexInstruction->lookup(1)->getOpcode(),
+ Instruction::Call);
+
+ // The ignored operand hashes (for f1 vs. f2) are different.
+ EXPECT_NE(FuncHashInfo1.IndexOperandHashMap->lookup({1, 1}),
+ FuncHashInfo2.IndexOperandHashMap->lookup({1, 1}));
+}
+
} // end anonymous namespace
>From 6c35be80841db518618e2247166312a369683f37 Mon Sep 17 00:00:00 2001
From: Kyungwoo Lee <kyulee at meta.com>
Date: Sat, 19 Oct 2024 00:41:14 -0700
Subject: [PATCH 2/5] Address comments from ellishg
---
llvm/lib/IR/StructuralHash.cpp | 38 +++++++-----------------
llvm/unittests/IR/StructuralHashTest.cpp | 18 +++++++----
2 files changed, 23 insertions(+), 33 deletions(-)
diff --git a/llvm/lib/IR/StructuralHash.cpp b/llvm/lib/IR/StructuralHash.cpp
index db54a848fd50dd..5a643702bd8379 100644
--- a/llvm/lib/IR/StructuralHash.cpp
+++ b/llvm/lib/IR/StructuralHash.cpp
@@ -69,20 +69,13 @@ class StructuralHashImpl {
stable_hash hashAPInt(const APInt &I) {
SmallVector<stable_hash> Hashes;
Hashes.emplace_back(I.getBitWidth());
- for (unsigned J = 0; J < I.getNumWords(); ++J)
- Hashes.emplace_back((I.getRawData())[J]);
+ auto RawVals = ArrayRef<uint64_t>(I.getRawData(), I.getNumWords());
+ Hashes.append(RawVals.begin(), RawVals.end());
return stable_hash_combine(Hashes);
}
stable_hash hashAPFloat(const APFloat &F) {
- SmallVector<stable_hash> Hashes;
- const fltSemantics &S = F.getSemantics();
- Hashes.emplace_back(APFloat::semanticsPrecision(S));
- Hashes.emplace_back(APFloat::semanticsMaxExponent(S));
- Hashes.emplace_back(APFloat::semanticsMinExponent(S));
- Hashes.emplace_back(APFloat::semanticsSizeInBits(S));
- Hashes.emplace_back(hashAPInt(F.bitcastToAPInt()));
- return stable_hash_combine(Hashes);
+ return hashAPInt(F.bitcastToAPInt());
}
stable_hash hashGlobalValue(const GlobalValue *GV) {
@@ -106,8 +99,7 @@ class StructuralHashImpl {
return stable_hash_combine(Hashes);
}
- auto *G = dyn_cast<GlobalValue>(C);
- if (G) {
+ if (auto *G = dyn_cast<GlobalValue>(C)) {
Hashes.emplace_back(hashGlobalValue(G));
return stable_hash_combine(Hashes);
}
@@ -135,8 +127,6 @@ class StructuralHashImpl {
}
case Value::ConstantArrayVal: {
const ConstantArray *A = cast<ConstantArray>(C);
- uint64_t NumElements = cast<ArrayType>(Ty)->getNumElements();
- Hashes.emplace_back(NumElements);
for (auto &Op : A->operands()) {
auto H = hashConstant(cast<Constant>(Op));
Hashes.emplace_back(H);
@@ -145,8 +135,6 @@ class StructuralHashImpl {
}
case Value::ConstantStructVal: {
const ConstantStruct *S = cast<ConstantStruct>(C);
- unsigned NumElements = cast<StructType>(Ty)->getNumElements();
- Hashes.emplace_back(NumElements);
for (auto &Op : S->operands()) {
auto H = hashConstant(cast<Constant>(Op));
Hashes.emplace_back(H);
@@ -155,8 +143,6 @@ class StructuralHashImpl {
}
case Value::ConstantVectorVal: {
const ConstantVector *V = cast<ConstantVector>(C);
- unsigned NumElements = cast<FixedVectorType>(Ty)->getNumElements();
- Hashes.emplace_back(NumElements);
for (auto &Op : V->operands()) {
auto H = hashConstant(cast<Constant>(Op));
Hashes.emplace_back(H);
@@ -165,8 +151,6 @@ class StructuralHashImpl {
}
case Value::ConstantExprVal: {
const ConstantExpr *E = cast<ConstantExpr>(C);
- unsigned NumOperands = E->getNumOperands();
- Hashes.emplace_back(NumOperands);
for (auto &Op : E->operands()) {
auto H = hashConstant(cast<Constant>(Op));
Hashes.emplace_back(H);
@@ -185,10 +169,10 @@ class StructuralHashImpl {
Hashes.emplace_back(H);
return stable_hash_combine(Hashes);
}
- default: // Unknown constant, abort.
- llvm_unreachable("Constant ValueID not recognized.");
+ default:
+ // Skip other types of constants for simplicity.
+ return stable_hash_combine(Hashes);
}
- return Hash;
}
stable_hash hashValue(Value *V) {
@@ -203,8 +187,8 @@ class StructuralHashImpl {
Hashes.emplace_back(Arg->getArgNo());
// Get an index (an insertion order) for the non-constant value.
- auto I = ValueToId.insert({V, ValueToId.size()});
- Hashes.emplace_back(I.first->second);
+ auto [It, WasInserted] = ValueToId.try_emplace(V, ValueToId.size());
+ Hashes.emplace_back(It->second);
return stable_hash_combine(Hashes);
}
@@ -233,14 +217,14 @@ class StructuralHashImpl {
unsigned InstIdx = 0;
if (IndexInstruction) {
InstIdx = IndexInstruction->size();
- IndexInstruction->insert({InstIdx, const_cast<Instruction *>(&Inst)});
+ IndexInstruction->try_emplace(InstIdx, const_cast<Instruction *>(&Inst));
}
for (const auto [OpndIdx, Op] : enumerate(Inst.operands())) {
auto OpndHash = hashOperand(Op);
if (IgnoreOp && IgnoreOp(&Inst, OpndIdx)) {
assert(IndexOperandHashMap);
- IndexOperandHashMap->insert({{InstIdx, OpndIdx}, OpndHash});
+ IndexOperandHashMap->try_emplace({InstIdx, OpndIdx}, OpndHash);
} else
Hashes.emplace_back(OpndHash);
}
diff --git a/llvm/unittests/IR/StructuralHashTest.cpp b/llvm/unittests/IR/StructuralHashTest.cpp
index e9f18ed40bc65b..81c17120a1f6ff 100644
--- a/llvm/unittests/IR/StructuralHashTest.cpp
+++ b/llvm/unittests/IR/StructuralHashTest.cpp
@@ -10,6 +10,7 @@
#include "llvm/AsmParser/Parser.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/SourceMgr.h"
+#include "gmock/gmock-matchers.h"
#include "gtest/gtest.h"
#include <memory>
@@ -18,6 +19,11 @@ using namespace llvm;
namespace {
+using testing::Contains;
+using testing::Key;
+using testing::Pair;
+using testing::SizeIs;
+
std::unique_ptr<Module> parseIR(LLVMContext &Context, const char *IR) {
SMDiagnostic Err;
std::unique_ptr<Module> M = parseAssemblyString(IR, Err, Context);
@@ -271,16 +277,16 @@ TEST(StructuralHashTest, Differences) {
EXPECT_EQ(FuncHashInfo1.FunctionHash, FuncHashInfo2.FunctionHash);
// There are total 3 instructions.
- EXPECT_EQ(FuncHashInfo1.IndexInstruction->size(), 3u);
- EXPECT_EQ(FuncHashInfo2.IndexInstruction->size(), 3u);
+ EXPECT_THAT(*FuncHashInfo1.IndexInstruction, SizeIs(3));
+ EXPECT_THAT(*FuncHashInfo2.IndexInstruction, SizeIs(3));
// The only 1 operand (the call target) has been ignored.
- EXPECT_EQ(FuncHashInfo1.IndexOperandHashMap->size(), 1u);
- EXPECT_EQ(FuncHashInfo2.IndexOperandHashMap->size(), 1u);
+ EXPECT_THAT(*FuncHashInfo1.IndexOperandHashMap, SizeIs(1u));
+ EXPECT_THAT(*FuncHashInfo2.IndexOperandHashMap, SizeIs(1u));
// The index pair of instruction and operand (1, 1) is a key in the map.
- ASSERT_TRUE(FuncHashInfo1.IndexOperandHashMap->count({1, 1}));
- ASSERT_TRUE(FuncHashInfo2.IndexOperandHashMap->count({1, 1}));
+ ASSERT_THAT(*FuncHashInfo1.IndexOperandHashMap, Contains(Key(Pair(1, 1))));
+ ASSERT_THAT(*FuncHashInfo2.IndexOperandHashMap, Contains(Key(Pair(1, 1))));
// The indexed instruciton must be the call instruction as shown in the
// IgnoreOp above.
>From 24768bbfe139a0f703310c9c57897ecdb743e653 Mon Sep 17 00:00:00 2001
From: Kyungwoo Lee <kyulee at meta.com>
Date: Sat, 19 Oct 2024 09:18:45 -0700
Subject: [PATCH 3/5] Address comments from ilovepi
---
llvm/include/llvm/Analysis/StructuralHash.h | 9 +++--
llvm/lib/Analysis/StructuralHash.cpp | 27 ++++++++++++--
llvm/lib/IR/StructuralHash.cpp | 37 +++----------------
llvm/lib/Passes/PassBuilder.cpp | 16 ++++++--
llvm/lib/Passes/PassRegistry.def | 7 ++--
.../StructuralHash/structural-hash-printer.ll | 24 +++++++++---
6 files changed, 71 insertions(+), 49 deletions(-)
diff --git a/llvm/include/llvm/Analysis/StructuralHash.h b/llvm/include/llvm/Analysis/StructuralHash.h
index 9f33c69aed345c..58846511807b6e 100644
--- a/llvm/include/llvm/Analysis/StructuralHash.h
+++ b/llvm/include/llvm/Analysis/StructuralHash.h
@@ -13,15 +13,18 @@
namespace llvm {
+enum class StructuralHashOptions { None, Detailed, CallTargetIgnored };
+
/// Printer pass for StructuralHashes
class StructuralHashPrinterPass
: public PassInfoMixin<StructuralHashPrinterPass> {
raw_ostream &OS;
- bool EnableDetailedStructuralHash;
+ const StructuralHashOptions Options;
public:
- explicit StructuralHashPrinterPass(raw_ostream &OS, bool Detailed)
- : OS(OS), EnableDetailedStructuralHash(Detailed) {}
+ explicit StructuralHashPrinterPass(raw_ostream &OS,
+ StructuralHashOptions Options)
+ : OS(OS), Options(Options) {}
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
diff --git a/llvm/lib/Analysis/StructuralHash.cpp b/llvm/lib/Analysis/StructuralHash.cpp
index 3a2341fe59ad9c..4f2e003148b606 100644
--- a/llvm/lib/Analysis/StructuralHash.cpp
+++ b/llvm/lib/Analysis/StructuralHash.cpp
@@ -21,14 +21,33 @@ using namespace llvm;
PreservedAnalyses StructuralHashPrinterPass::run(Module &M,
ModuleAnalysisManager &MAM) {
OS << "Module Hash: "
- << format("%016" PRIx64, StructuralHash(M, EnableDetailedStructuralHash))
+ << format("%016" PRIx64,
+ StructuralHash(M, Options != StructuralHashOptions::None))
<< "\n";
for (Function &F : M) {
if (F.isDeclaration())
continue;
- OS << "Function " << F.getName() << " Hash: "
- << format("%016" PRIx64, StructuralHash(F, EnableDetailedStructuralHash))
- << "\n";
+ if (Options == StructuralHashOptions::CallTargetIgnored) {
+ auto IgnoreOp = [&](const Instruction *I, unsigned OpndIdx) {
+ return I->getOpcode() == Instruction::Call &&
+ isa<Constant>(I->getOperand(OpndIdx));
+ };
+ auto FuncHashInfo = StructuralHashWithDifferences(F, IgnoreOp);
+ OS << "Function " << F.getName()
+ << " Hash: " << format("%016" PRIx64, FuncHashInfo.FunctionHash)
+ << "\n";
+ for (auto &[IndexPair, OpndHash] : *FuncHashInfo.IndexOperandHashMap) {
+ auto [InstIndex, OpndIndex] = IndexPair;
+ OS << "\tIgnored Operand Hash: " << format("%016" PRIx64, OpndHash)
+ << " at (" << InstIndex << "," << OpndIndex << ")\n";
+ }
+ } else {
+ OS << "Function " << F.getName() << " Hash: "
+ << format(
+ "%016" PRIx64,
+ StructuralHash(F, Options == StructuralHashOptions::Detailed))
+ << "\n";
+ }
}
return PreservedAnalyses::all();
}
diff --git a/llvm/lib/IR/StructuralHash.cpp b/llvm/lib/IR/StructuralHash.cpp
index 5a643702bd8379..a51f9124af04da 100644
--- a/llvm/lib/IR/StructuralHash.cpp
+++ b/llvm/lib/IR/StructuralHash.cpp
@@ -110,11 +110,6 @@ class StructuralHashImpl {
}
switch (C->getValueID()) {
- case Value::UndefValueVal:
- case Value::PoisonValueVal:
- case Value::ConstantTokenNoneVal: {
- return stable_hash_combine(Hashes);
- }
case Value::ConstantIntVal: {
const APInt &Int = cast<ConstantInt>(C)->getValue();
Hashes.emplace_back(hashAPInt(Int));
@@ -125,33 +120,11 @@ class StructuralHashImpl {
Hashes.emplace_back(hashAPFloat(APF));
return stable_hash_combine(Hashes);
}
- case Value::ConstantArrayVal: {
- const ConstantArray *A = cast<ConstantArray>(C);
- for (auto &Op : A->operands()) {
- auto H = hashConstant(cast<Constant>(Op));
- Hashes.emplace_back(H);
- }
- return stable_hash_combine(Hashes);
- }
- case Value::ConstantStructVal: {
- const ConstantStruct *S = cast<ConstantStruct>(C);
- for (auto &Op : S->operands()) {
- auto H = hashConstant(cast<Constant>(Op));
- Hashes.emplace_back(H);
- }
- return stable_hash_combine(Hashes);
- }
- case Value::ConstantVectorVal: {
- const ConstantVector *V = cast<ConstantVector>(C);
- for (auto &Op : V->operands()) {
- auto H = hashConstant(cast<Constant>(Op));
- Hashes.emplace_back(H);
- }
- return stable_hash_combine(Hashes);
- }
+ case Value::ConstantArrayVal:
+ case Value::ConstantStructVal:
+ case Value::ConstantVectorVal:
case Value::ConstantExprVal: {
- const ConstantExpr *E = cast<ConstantExpr>(C);
- for (auto &Op : E->operands()) {
+ for (const auto &Op : C->operands()) {
auto H = hashConstant(cast<Constant>(Op));
Hashes.emplace_back(H);
}
@@ -307,9 +280,11 @@ class StructuralHashImpl {
}
uint64_t getHash() const { return Hash; }
+
std::unique_ptr<IndexInstrMap> getIndexInstrMap() {
return std::move(IndexInstruction);
}
+
std::unique_ptr<IndexOperandHashMapType> getIndexPairOpndHashMap() {
return std::move(IndexOperandHashMap);
}
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index f5ce405ab8d961..7f3b766a7c4018 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -1175,9 +1175,19 @@ Expected<std::string> parseMemProfUsePassOptions(StringRef Params) {
return Result;
}
-Expected<bool> parseStructuralHashPrinterPassOptions(StringRef Params) {
- return PassBuilder::parseSinglePassOption(Params, "detailed",
- "StructuralHashPrinterPass");
+Expected<StructuralHashOptions>
+parseStructuralHashPrinterPassOptions(StringRef Params) {
+ if (Params.empty())
+ return StructuralHashOptions::None;
+ else if (Params == "detailed")
+ return StructuralHashOptions::Detailed;
+ else if (Params == "call-target-ignored")
+ return StructuralHashOptions::CallTargetIgnored;
+ else
+ return make_error<StringError>(
+ formatv("invalid structural hash printer parameter '{0}' ", Params)
+ .str(),
+ inconvertibleErrorCode());
}
Expected<bool> parseWinEHPrepareOptions(StringRef Params) {
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 549c1359b5852c..017ae311c55eb4 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -220,10 +220,11 @@ MODULE_PASS_WITH_PARAMS(
parseMSanPassOptions, "recover;kernel;eager-checks;track-origins=N")
MODULE_PASS_WITH_PARAMS(
"print<structural-hash>", "StructuralHashPrinterPass",
- [](bool EnableDetailedStructuralHash) {
- return StructuralHashPrinterPass(dbgs(), EnableDetailedStructuralHash);
+ [](StructuralHashOptions Options) {
+ return StructuralHashPrinterPass(dbgs(), Options);
},
- parseStructuralHashPrinterPassOptions, "detailed")
+ parseStructuralHashPrinterPassOptions, "detailed;call-target-ignored")
+
#undef MODULE_PASS_WITH_PARAMS
#ifndef CGSCC_ANALYSIS
diff --git a/llvm/test/Analysis/StructuralHash/structural-hash-printer.ll b/llvm/test/Analysis/StructuralHash/structural-hash-printer.ll
index 5936199bf32f43..3c23b54d297369 100644
--- a/llvm/test/Analysis/StructuralHash/structural-hash-printer.ll
+++ b/llvm/test/Analysis/StructuralHash/structural-hash-printer.ll
@@ -1,17 +1,21 @@
; RUN: opt -passes='print<structural-hash>' -disable-output %s 2>&1 | FileCheck %s
; RUN: opt -passes='print<structural-hash><detailed>' -disable-output %s 2>&1 | FileCheck %s -check-prefix=DETAILED-HASH
+; RUN: opt -passes='print<structural-hash><call-target-ignored>' -disable-output %s 2>&1 | FileCheck %s -check-prefix=CALLTARGETIGNORED-HASH
; Add a declaration so that we can test we skip it.
-declare i64 @d1()
+declare i64 @d1(i64)
+declare i64 @e1(i64)
define i64 @f1(i64 %a) {
%b = add i64 %a, 1
- ret i64 %b
+ %c = call i64 @d1(i64 %b)
+ ret i64 %c
}
-define i32 @f2(i32 %a) {
- %b = add i32 %a, 2
- ret i32 %b
+define i64 @f2(i64 %a) {
+ %b = add i64 %a, 1
+ %c = call i64 @e1(i64 %b)
+ ret i64 %c
}
; CHECK: Module Hash: {{([a-f0-9]{16,})}}
@@ -22,3 +26,13 @@ define i32 @f2(i32 %a) {
; DETAILED-HASH-NEXT: Function f1 Hash: [[DF1H:([a-f0-9]{16,})]]
; DETAILED-HASH-NOT: [[DF1H]]
; DETAILED-HASH-NEXT: Function f2 Hash: {{([a-f0-9]{16,})}}
+
+; When ignoring the call target, check if `f1` and `f2` produce the same function hash.
+; The index for the call instruction is 1, and the index of the call target operand is 1.
+; The ignored operand hashes for different call targets should be different.
+; CALLTARGETIGNORED-HASH: Module Hash: {{([a-f0-9]{16,})}}
+; CALLTARGETIGNORED-HASH-NEXT: Function f1 Hash: [[IF1H:([a-f0-9]{16,})]]
+; CALLTARGETIGNORED-HASH-NEXT: Ignored Operand Hash: [[IO1H:([a-f0-9]{16,})]] at (1,1)
+; CALLTARGETIGNORED-HASH-NEXT: Function f2 Hash: [[IF1H]]
+; CALLTARGETIGNORED-HASH-NOT: [[IO1H]]
+; CALLTARGETIGNORED-HASH-NEXT: Ignored Operand Hash: {{([a-f0-9]{16,})}} at (1,1)
>From 14aa138267c26239cbd746c50f222a4cb46ab559 Mon Sep 17 00:00:00 2001
From: Kyungwoo Lee <kyulee at meta.com>
Date: Mon, 21 Oct 2024 16:38:16 -0700
Subject: [PATCH 4/5] Address 2nd comments from ellishg
---
llvm/lib/Passes/PassBuilder.cpp | 12 +++++-------
1 file changed, 5 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 7f3b766a7c4018..d1f75dfb5350a0 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -1179,15 +1179,13 @@ Expected<StructuralHashOptions>
parseStructuralHashPrinterPassOptions(StringRef Params) {
if (Params.empty())
return StructuralHashOptions::None;
- else if (Params == "detailed")
+ if (Params == "detailed")
return StructuralHashOptions::Detailed;
- else if (Params == "call-target-ignored")
+ if (Params == "call-target-ignored")
return StructuralHashOptions::CallTargetIgnored;
- else
- return make_error<StringError>(
- formatv("invalid structural hash printer parameter '{0}' ", Params)
- .str(),
- inconvertibleErrorCode());
+ return make_error<StringError>(
+ formatv("invalid structural hash printer parameter '{0}' ", Params).str(),
+ inconvertibleErrorCode());
}
Expected<bool> parseWinEHPrepareOptions(StringRef Params) {
>From d3c35a765780eedf9ccc86949d639afede480dfb Mon Sep 17 00:00:00 2001
From: Kyungwoo Lee <kyulee at meta.com>
Date: Fri, 25 Oct 2024 23:29:43 -0700
Subject: [PATCH 5/5] Address 3rd comments from ellishg
---
llvm/include/llvm/Analysis/StructuralHash.h | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/llvm/include/llvm/Analysis/StructuralHash.h b/llvm/include/llvm/Analysis/StructuralHash.h
index 58846511807b6e..4c9f063bc7d2c8 100644
--- a/llvm/include/llvm/Analysis/StructuralHash.h
+++ b/llvm/include/llvm/Analysis/StructuralHash.h
@@ -13,7 +13,11 @@
namespace llvm {
-enum class StructuralHashOptions { None, Detailed, CallTargetIgnored };
+enum class StructuralHashOptions {
+ None, /// Hash with opcode only.
+ Detailed, /// Hash with opcode and operands.
+ CallTargetIgnored, /// Ignore call target operand when computing hash.
+};
/// Printer pass for StructuralHashes
class StructuralHashPrinterPass
More information about the llvm-commits
mailing list