[llvm] f9ad249 - [StableHash] Implement stable global name for the hash computation (#106156)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 27 15:09:10 PDT 2024
Author: Kyungwoo Lee
Date: 2024-08-27T15:09:06-07:00
New Revision: f9ad24946026c45e22c445f8ebc79b8b651e1dad
URL: https://github.com/llvm/llvm-project/commit/f9ad24946026c45e22c445f8ebc79b8b651e1dad
DIFF: https://github.com/llvm/llvm-project/commit/f9ad24946026c45e22c445f8ebc79b8b651e1dad.diff
LOG: [StableHash] Implement stable global name for the hash computation (#106156)
LLVM often extends global names by adding suffixes to distinguish unique
identities. However, these suffixes are not always stable across
different runs and build environments. To address this issue, I
implemented `get_stable_name` to ignore such suffixes and obtain the
original name. This approach is not new, as PGO or Bolt already handle
this issue similarly. Using the stable name obtained from
`get_stable_name`, I implemented `stable_hash_name` while utilizing the
same underlying `xxh3_64bit` algorithm as before.
Added:
llvm/unittests/MIR/MachineStableHashTest.cpp
Modified:
llvm/include/llvm/ADT/StableHashing.h
llvm/lib/CodeGen/MachineStableHash.cpp
llvm/unittests/MIR/CMakeLists.txt
Removed:
################################################################################
diff --git a/llvm/include/llvm/ADT/StableHashing.h b/llvm/include/llvm/ADT/StableHashing.h
index 7778f5d7c3a1c3..7852199f8b0a00 100644
--- a/llvm/include/llvm/ADT/StableHashing.h
+++ b/llvm/include/llvm/ADT/StableHashing.h
@@ -50,6 +50,23 @@ inline stable_hash stable_hash_combine(stable_hash A, stable_hash B,
return stable_hash_combine(Hashes);
}
+// Removes suffixes introduced by LLVM from the name to enhance stability and
+// maintain closeness to the original name across
diff erent builds.
+inline StringRef get_stable_name(StringRef Name) {
+ auto [P1, S1] = Name.rsplit(".llvm.");
+ auto [P2, S2] = P1.rsplit(".__uniq.");
+ return P2;
+}
+
+// Generates a consistent hash value for a given input name across
diff erent
+// program executions and environments. This function first converts the input
+// name into a stable form using the `get_stable_name` function, and then
+// computes a hash of this stable name. For instance, `foo.llvm.1234` would have
+// the same hash as `foo.llvm.5678.
+inline stable_hash stable_hash_name(StringRef Name) {
+ return xxh3_64bits(get_stable_name(Name));
+}
+
} // namespace llvm
#endif
diff --git a/llvm/lib/CodeGen/MachineStableHash.cpp b/llvm/lib/CodeGen/MachineStableHash.cpp
index 916acbf2d2cbf9..51348c095411cf 100644
--- a/llvm/lib/CodeGen/MachineStableHash.cpp
+++ b/llvm/lib/CodeGen/MachineStableHash.cpp
@@ -84,25 +84,33 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {
}
case MachineOperand::MO_MachineBasicBlock:
- StableHashBailingMachineBasicBlock++;
+ ++StableHashBailingMachineBasicBlock;
return 0;
case MachineOperand::MO_ConstantPoolIndex:
- StableHashBailingConstantPoolIndex++;
+ ++StableHashBailingConstantPoolIndex;
return 0;
case MachineOperand::MO_BlockAddress:
- StableHashBailingBlockAddress++;
+ ++StableHashBailingBlockAddress;
return 0;
case MachineOperand::MO_Metadata:
- StableHashBailingMetadataUnsupported++;
- return 0;
- case MachineOperand::MO_GlobalAddress:
- StableHashBailingGlobalAddress++;
+ ++StableHashBailingMetadataUnsupported;
return 0;
+ case MachineOperand::MO_GlobalAddress: {
+ const GlobalValue *GV = MO.getGlobal();
+ if (!GV->hasName()) {
+ ++StableHashBailingGlobalAddress;
+ return 0;
+ }
+ auto Name = GV->getName();
+ return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
+ stable_hash_name(Name), MO.getOffset());
+ }
+
case MachineOperand::MO_TargetIndex: {
if (const char *Name = MO.getTargetIndexName())
return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
- xxh3_64bits(Name), MO.getOffset());
- StableHashBailingTargetIndexNoName++;
+ stable_hash_name(Name), MO.getOffset());
+ ++StableHashBailingTargetIndexNoName;
return 0;
}
@@ -113,7 +121,8 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {
case MachineOperand::MO_ExternalSymbol:
return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
- MO.getOffset(), xxh3_64bits(MO.getSymbolName()));
+ MO.getOffset(),
+ stable_hash_name(MO.getSymbolName()));
case MachineOperand::MO_RegisterMask:
case MachineOperand::MO_RegisterLiveOut: {
@@ -149,7 +158,7 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {
case MachineOperand::MO_MCSymbol: {
auto SymbolName = MO.getMCSymbol()->getName();
return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
- xxh3_64bits(SymbolName));
+ stable_hash_name(SymbolName));
}
case MachineOperand::MO_CFIIndex:
return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
diff --git a/llvm/unittests/MIR/CMakeLists.txt b/llvm/unittests/MIR/CMakeLists.txt
index d6aff46eff07b7..206094266ba148 100644
--- a/llvm/unittests/MIR/CMakeLists.txt
+++ b/llvm/unittests/MIR/CMakeLists.txt
@@ -15,6 +15,7 @@ set(LLVM_LINK_COMPONENTS
add_llvm_unittest(MIRTests
MachineMetadata.cpp
+ MachineStableHashTest.cpp
)
target_link_libraries(MIRTests PRIVATE LLVMTestingSupport)
diff --git a/llvm/unittests/MIR/MachineStableHashTest.cpp b/llvm/unittests/MIR/MachineStableHashTest.cpp
new file mode 100644
index 00000000000000..c6b99123d4bd2a
--- /dev/null
+++ b/llvm/unittests/MIR/MachineStableHashTest.cpp
@@ -0,0 +1,143 @@
+//===- MachineStableHashTest.cpp ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineStableHash.h"
+#include "llvm/CodeGen/MIRParser/MIRParser.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/FileCheck/FileCheck.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Target/TargetMachine.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+class MachineStableHashTest : public testing::Test {
+public:
+ MachineStableHashTest() {}
+
+protected:
+ LLVMContext Context;
+ std::unique_ptr<Module> M;
+ std::unique_ptr<MIRParser> MIR;
+
+ static void SetUpTestCase() {
+ InitializeAllTargetInfos();
+ InitializeAllTargets();
+ InitializeAllTargetMCs();
+ }
+
+ void SetUp() override { M = std::make_unique<Module>("Dummy", Context); }
+
+ std::unique_ptr<LLVMTargetMachine>
+ createTargetMachine(std::string TT, StringRef CPU, StringRef FS) {
+ std::string Error;
+ const Target *T = TargetRegistry::lookupTarget(TT, Error);
+ if (!T)
+ return nullptr;
+ TargetOptions Options;
+ return std::unique_ptr<LLVMTargetMachine>(
+ static_cast<LLVMTargetMachine *>(T->createTargetMachine(
+ TT, CPU, FS, Options, std::nullopt, std::nullopt)));
+ }
+
+ std::unique_ptr<Module> parseMIR(const TargetMachine &TM, StringRef MIRCode,
+ MachineModuleInfo &MMI) {
+ SMDiagnostic Diagnostic;
+ std::unique_ptr<MemoryBuffer> MBuffer = MemoryBuffer::getMemBuffer(MIRCode);
+ MIR = createMIRParser(std::move(MBuffer), Context);
+ if (!MIR)
+ return nullptr;
+
+ std::unique_ptr<Module> Mod = MIR->parseIRModule();
+ if (!Mod)
+ return nullptr;
+
+ Mod->setDataLayout(TM.createDataLayout());
+
+ if (MIR->parseMachineFunctions(*Mod, MMI)) {
+ M.reset();
+ return nullptr;
+ }
+
+ return Mod;
+ }
+};
+
+TEST_F(MachineStableHashTest, StableGlobalName) {
+ auto TM = createTargetMachine(("aarch64--"), "", "");
+ if (!TM)
+ GTEST_SKIP();
+ StringRef MIRString = R"MIR(
+--- |
+ define void @f1() { ret void }
+ define void @f2() { ret void }
+ define void @f3() { ret void }
+ define void @f4() { ret void }
+ declare void @goo()
+ declare void @goo.llvm.123()
+ declare void @goo.__uniq.456()
+ declare void @goo.invalid.789()
+...
+---
+name: f1
+alignment: 16
+tracksRegLiveness: true
+frameInfo:
+ maxAlignment: 16
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $lr
+ BL @goo
+ RET undef $lr
+
+...
+---
+name: f2
+body: |
+ bb.0:
+ liveins: $lr
+ BL @goo.llvm.123
+ RET undef $lr
+...
+---
+name: f3
+body: |
+ bb.0:
+ liveins: $lr
+ BL @goo.__uniq.456
+ RET undef $lr
+...
+---
+name: f4
+body: |
+ bb.0:
+ liveins: $lr
+ BL @goo.invalid.789
+ RET undef $lr
+...
+)MIR";
+ MachineModuleInfo MMI(TM.get());
+ M = parseMIR(*TM, MIRString, MMI);
+ ASSERT_TRUE(M);
+ auto *MF1 = MMI.getMachineFunction(*M->getFunction("f1"));
+ auto *MF2 = MMI.getMachineFunction(*M->getFunction("f2"));
+ auto *MF3 = MMI.getMachineFunction(*M->getFunction("f3"));
+ auto *MF4 = MMI.getMachineFunction(*M->getFunction("f4"));
+
+ EXPECT_EQ(stableHashValue(*MF1), stableHashValue(*MF2))
+ << "Expect the suffix, `.llvm.{number}` to be ignored.";
+ EXPECT_EQ(stableHashValue(*MF1), stableHashValue(*MF3))
+ << "Expect the suffix, `.__uniq.{number}` to be ignored.";
+ // Do not ignore `.invalid.{number}`.
+ EXPECT_NE(stableHashValue(*MF1), stableHashValue(*MF4));
+}
More information about the llvm-commits
mailing list