[llvm] [StableHash] Implement stable global name for the hash computation (PR #106156)

via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 26 18:28:49 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-adt

Author: Kyungwoo Lee (kyulee-com)

<details>
<summary>Changes</summary>

LLVM often extends global names by adding suffixes to distinguish unique identities. However, these suffixes are not always stable across different runs and build environments. To address this issue, I implemented `get_stable_name` to ignore such suffixes and obtain the original name. This approach is not new, as PGO or Bolt already handle this issue similarly. Using the stable name obtained from `get_stable_name`, I implemented `stable_hash_name` while utilizing the same underlying `xxh3_64bit` algorithm as before.

---
Full diff: https://github.com/llvm/llvm-project/pull/106156.diff


4 Files Affected:

- (modified) llvm/include/llvm/ADT/StableHashing.h (+16) 
- (modified) llvm/lib/CodeGen/MachineStableHash.cpp (+15-6) 
- (modified) llvm/unittests/MIR/CMakeLists.txt (+1) 
- (added) llvm/unittests/MIR/MachineStableHashTest.cpp (+143) 


``````````diff
diff --git a/llvm/include/llvm/ADT/StableHashing.h b/llvm/include/llvm/ADT/StableHashing.h
index 7778f5d7c3a1c3..23a878b264da88 100644
--- a/llvm/include/llvm/ADT/StableHashing.h
+++ b/llvm/include/llvm/ADT/StableHashing.h
@@ -50,6 +50,22 @@ inline stable_hash stable_hash_combine(stable_hash A, stable_hash B,
   return stable_hash_combine(Hashes);
 }
 
+// Removes suffixes introduced by LLVM from the name to enhance stability and
+// maintain closeness to the original name across different builds.
+inline StringRef get_stable_name(StringRef Name) {
+  auto [P1, S1] = Name.rsplit(".llvm.");
+  auto [P2, S2] = P1.rsplit(".__uniq.");
+  return P2;
+}
+
+// Generates a consistent hash value for a given input name across different
+// program executions and environments. This function first converts the input
+// name into a stable form using the `get_stable_name` function, and then
+// computes a hash of this stable name.
+inline stable_hash stable_hash_name(StringRef Name) {
+  return xxh3_64bits(get_stable_name(Name));
+}
+
 } // namespace llvm
 
 #endif
diff --git a/llvm/lib/CodeGen/MachineStableHash.cpp b/llvm/lib/CodeGen/MachineStableHash.cpp
index 916acbf2d2cbf9..e88c6f37bbebd0 100644
--- a/llvm/lib/CodeGen/MachineStableHash.cpp
+++ b/llvm/lib/CodeGen/MachineStableHash.cpp
@@ -95,13 +95,21 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {
   case MachineOperand::MO_Metadata:
     StableHashBailingMetadataUnsupported++;
     return 0;
-  case MachineOperand::MO_GlobalAddress:
-    StableHashBailingGlobalAddress++;
-    return 0;
+  case MachineOperand::MO_GlobalAddress: {
+    const GlobalValue *GV = MO.getGlobal();
+    if (!GV->hasName()) {
+      StableHashBailingGlobalAddress++;
+      return 0;
+    }
+    auto Name = GV->getName();
+    return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
+                               stable_hash_name(Name), MO.getOffset());
+  }
+
   case MachineOperand::MO_TargetIndex: {
     if (const char *Name = MO.getTargetIndexName())
       return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
-                                 xxh3_64bits(Name), MO.getOffset());
+                                 stable_hash_name(Name), MO.getOffset());
     StableHashBailingTargetIndexNoName++;
     return 0;
   }
@@ -113,7 +121,8 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {
 
   case MachineOperand::MO_ExternalSymbol:
     return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
-                               MO.getOffset(), xxh3_64bits(MO.getSymbolName()));
+                               MO.getOffset(),
+                               stable_hash_name(MO.getSymbolName()));
 
   case MachineOperand::MO_RegisterMask:
   case MachineOperand::MO_RegisterLiveOut: {
@@ -149,7 +158,7 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {
   case MachineOperand::MO_MCSymbol: {
     auto SymbolName = MO.getMCSymbol()->getName();
     return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
-                               xxh3_64bits(SymbolName));
+                               stable_hash_name(SymbolName));
   }
   case MachineOperand::MO_CFIIndex:
     return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
diff --git a/llvm/unittests/MIR/CMakeLists.txt b/llvm/unittests/MIR/CMakeLists.txt
index d6aff46eff07b7..206094266ba148 100644
--- a/llvm/unittests/MIR/CMakeLists.txt
+++ b/llvm/unittests/MIR/CMakeLists.txt
@@ -15,6 +15,7 @@ set(LLVM_LINK_COMPONENTS
 
 add_llvm_unittest(MIRTests
   MachineMetadata.cpp
+  MachineStableHashTest.cpp
   )
 
 target_link_libraries(MIRTests PRIVATE LLVMTestingSupport)
diff --git a/llvm/unittests/MIR/MachineStableHashTest.cpp b/llvm/unittests/MIR/MachineStableHashTest.cpp
new file mode 100644
index 00000000000000..19cb7ebb25cdfd
--- /dev/null
+++ b/llvm/unittests/MIR/MachineStableHashTest.cpp
@@ -0,0 +1,143 @@
+//===- MachineStableHashTest.cpp ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineStableHash.h"
+#include "llvm/CodeGen/MIRParser/MIRParser.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/FileCheck/FileCheck.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Target/TargetMachine.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+class MachineStableHashTest : public testing::Test {
+public:
+  MachineStableHashTest() {}
+
+protected:
+  LLVMContext Context;
+  std::unique_ptr<Module> M;
+  std::unique_ptr<MIRParser> MIR;
+
+  static void SetUpTestCase() {
+    InitializeAllTargetInfos();
+    InitializeAllTargets();
+    InitializeAllTargetMCs();
+  }
+
+  void SetUp() override { M = std::make_unique<Module>("Dummy", Context); }
+
+  std::unique_ptr<LLVMTargetMachine>
+  createTargetMachine(std::string TT, StringRef CPU, StringRef FS) {
+    std::string Error;
+    const Target *T = TargetRegistry::lookupTarget(TT, Error);
+    if (!T)
+      return nullptr;
+    TargetOptions Options;
+    return std::unique_ptr<LLVMTargetMachine>(
+        static_cast<LLVMTargetMachine *>(T->createTargetMachine(
+            TT, CPU, FS, Options, std::nullopt, std::nullopt)));
+  }
+
+  std::unique_ptr<Module> parseMIR(const TargetMachine &TM, StringRef MIRCode,
+                                   MachineModuleInfo &MMI) {
+    SMDiagnostic Diagnostic;
+    std::unique_ptr<MemoryBuffer> MBuffer = MemoryBuffer::getMemBuffer(MIRCode);
+    MIR = createMIRParser(std::move(MBuffer), Context);
+    if (!MIR)
+      return nullptr;
+
+    std::unique_ptr<Module> Mod = MIR->parseIRModule();
+    if (!Mod)
+      return nullptr;
+
+    Mod->setDataLayout(TM.createDataLayout());
+
+    if (MIR->parseMachineFunctions(*Mod, MMI)) {
+      M.reset();
+      return nullptr;
+    }
+
+    return Mod;
+  }
+};
+
+TEST_F(MachineStableHashTest, StableGlobalName) {
+  auto TM = createTargetMachine(("aarch64--"), "", "");
+  if (!TM)
+    GTEST_SKIP();
+  StringRef MIRString = R"MIR(
+--- |
+  define void @f1() { ret void }
+  define void @f2() { ret void }
+  define void @f3() { ret void }
+  define void @f4() { ret void }
+  declare void @goo()
+  declare void @goo.llvm.123()
+  declare void @goo.__uniq.456()
+  declare void @goo.invalid.789()
+...
+---
+name:            f1
+alignment:       16
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    16
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+  liveins: $lr
+    BL @goo
+  RET undef $lr
+
+...
+---
+name:            f2
+body:             |
+  bb.0:
+  liveins: $lr
+    BL @goo.llvm.123
+  RET undef $lr
+...
+---
+name:            f3
+body:             |
+  bb.0:
+  liveins: $lr
+    BL @goo.__uniq.456
+  RET undef $lr
+...
+---
+name:            f4
+body:             |
+  bb.0:
+  liveins: $lr
+    BL @goo.invalid.789
+  RET undef $lr
+...
+)MIR";
+  MachineModuleInfo MMI(TM.get());
+  M = parseMIR(*TM, MIRString, MMI);
+  ASSERT_TRUE(M);
+  auto *MF1 = MMI.getMachineFunction(*M->getFunction("f1"));
+  auto *MF2 = MMI.getMachineFunction(*M->getFunction("f2"));
+  auto *MF3 = MMI.getMachineFunction(*M->getFunction("f3"));
+  auto *MF4 = MMI.getMachineFunction(*M->getFunction("f4"));
+
+  // Expect the suffix, `.llvm.{number}` to be ignored.
+  EXPECT_EQ(stableHashValue(*MF1), stableHashValue(*MF2));
+  // Expect the suffix, `.__uniq.{number}` to be ignored.
+  EXPECT_EQ(stableHashValue(*MF1), stableHashValue(*MF3));
+  // Do not ignore `.invalid.{number}`.
+  EXPECT_NE(stableHashValue(*MF1), stableHashValue(*MF4));
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/106156


More information about the llvm-commits mailing list