[llvm] [StableHash] Implement stable global name for the hash computation (PR #106156)
Kyungwoo Lee via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 27 13:40:38 PDT 2024
https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/106156
>From 9eff590051775a7e4cad04fdddb9d9be8db0e86d Mon Sep 17 00:00:00 2001
From: Kyungwoo Lee <kyulee at meta.com>
Date: Mon, 26 Aug 2024 14:03:56 -0700
Subject: [PATCH 1/2] [StableHash] Implement stable global name for the hash
computation
LLVM often extends global names by adding suffixes to distinguish unique identities.
However, these suffixes are not always stable across different runs and build environments.
To address this issue, I implemented `get_stable_name` to ignore such suffixes and obtain the original name. This approach is not new, as PGO or Bolt already handle this issue similarly. Using the stable name obtained from `get_stable_name`, I implemented `stable_hash_name` while utilizing the same underlying `xxh3_64bit` algorithm as before.
---
llvm/include/llvm/ADT/StableHashing.h | 16 +++
llvm/lib/CodeGen/MachineStableHash.cpp | 21 ++-
llvm/unittests/MIR/CMakeLists.txt | 1 +
llvm/unittests/MIR/MachineStableHashTest.cpp | 143 +++++++++++++++++++
4 files changed, 175 insertions(+), 6 deletions(-)
create mode 100644 llvm/unittests/MIR/MachineStableHashTest.cpp
diff --git a/llvm/include/llvm/ADT/StableHashing.h b/llvm/include/llvm/ADT/StableHashing.h
index 7778f5d7c3a1c3..23a878b264da88 100644
--- a/llvm/include/llvm/ADT/StableHashing.h
+++ b/llvm/include/llvm/ADT/StableHashing.h
@@ -50,6 +50,22 @@ inline stable_hash stable_hash_combine(stable_hash A, stable_hash B,
return stable_hash_combine(Hashes);
}
+// Removes suffixes introduced by LLVM from the name to enhance stability and
+// maintain closeness to the original name across different builds.
+inline StringRef get_stable_name(StringRef Name) {
+ auto [P1, S1] = Name.rsplit(".llvm.");
+ auto [P2, S2] = P1.rsplit(".__uniq.");
+ return P2;
+}
+
+// Generates a consistent hash value for a given input name across different
+// program executions and environments. This function first converts the input
+// name into a stable form using the `get_stable_name` function, and then
+// computes a hash of this stable name.
+inline stable_hash stable_hash_name(StringRef Name) {
+ return xxh3_64bits(get_stable_name(Name));
+}
+
} // namespace llvm
#endif
diff --git a/llvm/lib/CodeGen/MachineStableHash.cpp b/llvm/lib/CodeGen/MachineStableHash.cpp
index 916acbf2d2cbf9..e88c6f37bbebd0 100644
--- a/llvm/lib/CodeGen/MachineStableHash.cpp
+++ b/llvm/lib/CodeGen/MachineStableHash.cpp
@@ -95,13 +95,21 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {
case MachineOperand::MO_Metadata:
StableHashBailingMetadataUnsupported++;
return 0;
- case MachineOperand::MO_GlobalAddress:
- StableHashBailingGlobalAddress++;
- return 0;
+ case MachineOperand::MO_GlobalAddress: {
+ const GlobalValue *GV = MO.getGlobal();
+ if (!GV->hasName()) {
+ StableHashBailingGlobalAddress++;
+ return 0;
+ }
+ auto Name = GV->getName();
+ return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
+ stable_hash_name(Name), MO.getOffset());
+ }
+
case MachineOperand::MO_TargetIndex: {
if (const char *Name = MO.getTargetIndexName())
return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
- xxh3_64bits(Name), MO.getOffset());
+ stable_hash_name(Name), MO.getOffset());
StableHashBailingTargetIndexNoName++;
return 0;
}
@@ -113,7 +121,8 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {
case MachineOperand::MO_ExternalSymbol:
return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
- MO.getOffset(), xxh3_64bits(MO.getSymbolName()));
+ MO.getOffset(),
+ stable_hash_name(MO.getSymbolName()));
case MachineOperand::MO_RegisterMask:
case MachineOperand::MO_RegisterLiveOut: {
@@ -149,7 +158,7 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {
case MachineOperand::MO_MCSymbol: {
auto SymbolName = MO.getMCSymbol()->getName();
return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
- xxh3_64bits(SymbolName));
+ stable_hash_name(SymbolName));
}
case MachineOperand::MO_CFIIndex:
return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
diff --git a/llvm/unittests/MIR/CMakeLists.txt b/llvm/unittests/MIR/CMakeLists.txt
index d6aff46eff07b7..206094266ba148 100644
--- a/llvm/unittests/MIR/CMakeLists.txt
+++ b/llvm/unittests/MIR/CMakeLists.txt
@@ -15,6 +15,7 @@ set(LLVM_LINK_COMPONENTS
add_llvm_unittest(MIRTests
MachineMetadata.cpp
+ MachineStableHashTest.cpp
)
target_link_libraries(MIRTests PRIVATE LLVMTestingSupport)
diff --git a/llvm/unittests/MIR/MachineStableHashTest.cpp b/llvm/unittests/MIR/MachineStableHashTest.cpp
new file mode 100644
index 00000000000000..19cb7ebb25cdfd
--- /dev/null
+++ b/llvm/unittests/MIR/MachineStableHashTest.cpp
@@ -0,0 +1,143 @@
+//===- MachineStableHashTest.cpp ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineStableHash.h"
+#include "llvm/CodeGen/MIRParser/MIRParser.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/FileCheck/FileCheck.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Target/TargetMachine.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+class MachineStableHashTest : public testing::Test {
+public:
+ MachineStableHashTest() {}
+
+protected:
+ LLVMContext Context;
+ std::unique_ptr<Module> M;
+ std::unique_ptr<MIRParser> MIR;
+
+ static void SetUpTestCase() {
+ InitializeAllTargetInfos();
+ InitializeAllTargets();
+ InitializeAllTargetMCs();
+ }
+
+ void SetUp() override { M = std::make_unique<Module>("Dummy", Context); }
+
+ std::unique_ptr<LLVMTargetMachine>
+ createTargetMachine(std::string TT, StringRef CPU, StringRef FS) {
+ std::string Error;
+ const Target *T = TargetRegistry::lookupTarget(TT, Error);
+ if (!T)
+ return nullptr;
+ TargetOptions Options;
+ return std::unique_ptr<LLVMTargetMachine>(
+ static_cast<LLVMTargetMachine *>(T->createTargetMachine(
+ TT, CPU, FS, Options, std::nullopt, std::nullopt)));
+ }
+
+ std::unique_ptr<Module> parseMIR(const TargetMachine &TM, StringRef MIRCode,
+ MachineModuleInfo &MMI) {
+ SMDiagnostic Diagnostic;
+ std::unique_ptr<MemoryBuffer> MBuffer = MemoryBuffer::getMemBuffer(MIRCode);
+ MIR = createMIRParser(std::move(MBuffer), Context);
+ if (!MIR)
+ return nullptr;
+
+ std::unique_ptr<Module> Mod = MIR->parseIRModule();
+ if (!Mod)
+ return nullptr;
+
+ Mod->setDataLayout(TM.createDataLayout());
+
+ if (MIR->parseMachineFunctions(*Mod, MMI)) {
+ M.reset();
+ return nullptr;
+ }
+
+ return Mod;
+ }
+};
+
+TEST_F(MachineStableHashTest, StableGlobalName) {
+ auto TM = createTargetMachine(("aarch64--"), "", "");
+ if (!TM)
+ GTEST_SKIP();
+ StringRef MIRString = R"MIR(
+--- |
+ define void @f1() { ret void }
+ define void @f2() { ret void }
+ define void @f3() { ret void }
+ define void @f4() { ret void }
+ declare void @goo()
+ declare void @goo.llvm.123()
+ declare void @goo.__uniq.456()
+ declare void @goo.invalid.789()
+...
+---
+name: f1
+alignment: 16
+tracksRegLiveness: true
+frameInfo:
+ maxAlignment: 16
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $lr
+ BL @goo
+ RET undef $lr
+
+...
+---
+name: f2
+body: |
+ bb.0:
+ liveins: $lr
+ BL @goo.llvm.123
+ RET undef $lr
+...
+---
+name: f3
+body: |
+ bb.0:
+ liveins: $lr
+ BL @goo.__uniq.456
+ RET undef $lr
+...
+---
+name: f4
+body: |
+ bb.0:
+ liveins: $lr
+ BL @goo.invalid.789
+ RET undef $lr
+...
+)MIR";
+ MachineModuleInfo MMI(TM.get());
+ M = parseMIR(*TM, MIRString, MMI);
+ ASSERT_TRUE(M);
+ auto *MF1 = MMI.getMachineFunction(*M->getFunction("f1"));
+ auto *MF2 = MMI.getMachineFunction(*M->getFunction("f2"));
+ auto *MF3 = MMI.getMachineFunction(*M->getFunction("f3"));
+ auto *MF4 = MMI.getMachineFunction(*M->getFunction("f4"));
+
+ // Expect the suffix, `.llvm.{number}` to be ignored.
+ EXPECT_EQ(stableHashValue(*MF1), stableHashValue(*MF2));
+ // Expect the suffix, `.__uniq.{number}` to be ignored.
+ EXPECT_EQ(stableHashValue(*MF1), stableHashValue(*MF3));
+ // Do not ignore `.invalid.{number}`.
+ EXPECT_NE(stableHashValue(*MF1), stableHashValue(*MF4));
+}
>From 39e65594c2c10db2e0c7be2e49b9e515b141ba3b Mon Sep 17 00:00:00 2001
From: Kyungwoo Lee <kyulee at meta.com>
Date: Tue, 27 Aug 2024 13:22:56 -0700
Subject: [PATCH 2/2] Address comments from ellishg
---
llvm/include/llvm/ADT/StableHashing.h | 3 ++-
llvm/lib/CodeGen/MachineStableHash.cpp | 12 ++++++------
llvm/unittests/MIR/MachineStableHashTest.cpp | 8 ++++----
3 files changed, 12 insertions(+), 11 deletions(-)
diff --git a/llvm/include/llvm/ADT/StableHashing.h b/llvm/include/llvm/ADT/StableHashing.h
index 23a878b264da88..7852199f8b0a00 100644
--- a/llvm/include/llvm/ADT/StableHashing.h
+++ b/llvm/include/llvm/ADT/StableHashing.h
@@ -61,7 +61,8 @@ inline StringRef get_stable_name(StringRef Name) {
// Generates a consistent hash value for a given input name across different
// program executions and environments. This function first converts the input
// name into a stable form using the `get_stable_name` function, and then
-// computes a hash of this stable name.
+// computes a hash of this stable name. For instance, `foo.llvm.1234` would have
+// the same hash as `foo.llvm.5678.
inline stable_hash stable_hash_name(StringRef Name) {
return xxh3_64bits(get_stable_name(Name));
}
diff --git a/llvm/lib/CodeGen/MachineStableHash.cpp b/llvm/lib/CodeGen/MachineStableHash.cpp
index e88c6f37bbebd0..51348c095411cf 100644
--- a/llvm/lib/CodeGen/MachineStableHash.cpp
+++ b/llvm/lib/CodeGen/MachineStableHash.cpp
@@ -84,21 +84,21 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {
}
case MachineOperand::MO_MachineBasicBlock:
- StableHashBailingMachineBasicBlock++;
+ ++StableHashBailingMachineBasicBlock;
return 0;
case MachineOperand::MO_ConstantPoolIndex:
- StableHashBailingConstantPoolIndex++;
+ ++StableHashBailingConstantPoolIndex;
return 0;
case MachineOperand::MO_BlockAddress:
- StableHashBailingBlockAddress++;
+ ++StableHashBailingBlockAddress;
return 0;
case MachineOperand::MO_Metadata:
- StableHashBailingMetadataUnsupported++;
+ ++StableHashBailingMetadataUnsupported;
return 0;
case MachineOperand::MO_GlobalAddress: {
const GlobalValue *GV = MO.getGlobal();
if (!GV->hasName()) {
- StableHashBailingGlobalAddress++;
+ ++StableHashBailingGlobalAddress;
return 0;
}
auto Name = GV->getName();
@@ -110,7 +110,7 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {
if (const char *Name = MO.getTargetIndexName())
return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
stable_hash_name(Name), MO.getOffset());
- StableHashBailingTargetIndexNoName++;
+ ++StableHashBailingTargetIndexNoName;
return 0;
}
diff --git a/llvm/unittests/MIR/MachineStableHashTest.cpp b/llvm/unittests/MIR/MachineStableHashTest.cpp
index 19cb7ebb25cdfd..c6b99123d4bd2a 100644
--- a/llvm/unittests/MIR/MachineStableHashTest.cpp
+++ b/llvm/unittests/MIR/MachineStableHashTest.cpp
@@ -134,10 +134,10 @@ body: |
auto *MF3 = MMI.getMachineFunction(*M->getFunction("f3"));
auto *MF4 = MMI.getMachineFunction(*M->getFunction("f4"));
- // Expect the suffix, `.llvm.{number}` to be ignored.
- EXPECT_EQ(stableHashValue(*MF1), stableHashValue(*MF2));
- // Expect the suffix, `.__uniq.{number}` to be ignored.
- EXPECT_EQ(stableHashValue(*MF1), stableHashValue(*MF3));
+ EXPECT_EQ(stableHashValue(*MF1), stableHashValue(*MF2))
+ << "Expect the suffix, `.llvm.{number}` to be ignored.";
+ EXPECT_EQ(stableHashValue(*MF1), stableHashValue(*MF3))
+ << "Expect the suffix, `.__uniq.{number}` to be ignored.";
// Do not ignore `.invalid.{number}`.
EXPECT_NE(stableHashValue(*MF1), stableHashValue(*MF4));
}
More information about the llvm-commits
mailing list