[llvm] [StableHash] Implement stable global name for the hash computation (PR #106156)

Kyungwoo Lee via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 27 13:38:04 PDT 2024


https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/106156

>From 918459ecc481e6ba8373fb28015737aaabdc4386 Mon Sep 17 00:00:00 2001
From: Kyungwoo Lee <kyulee at meta.com>
Date: Mon, 26 Aug 2024 14:03:56 -0700
Subject: [PATCH 1/2] [StableHash] Implement stable global name for the hash
 computation

LLVM often extends global names by adding suffixes to distinguish unique identities.
However, these suffixes are not always stable across different runs and build environments.
To address this issue, I implemented `get_stable_name` to ignore such suffixes and obtain the original name. This approach is not new, as PGO or Bolt already handle this issue similarly. Using the stable name obtained from `get_stable_name`, I implemented `stable_hash_name` while utilizing the same underlying `xxh3_64bit` algorithm as before.
---
 llvm/include/llvm/ADT/StableHashing.h        |  16 +++
 llvm/lib/CodeGen/MachineStableHash.cpp       |  21 ++-
 llvm/unittests/MIR/CMakeLists.txt            |   1 +
 llvm/unittests/MIR/MachineStableHashTest.cpp | 143 +++++++++++++++++++
 4 files changed, 175 insertions(+), 6 deletions(-)
 create mode 100644 llvm/unittests/MIR/MachineStableHashTest.cpp

diff --git a/llvm/include/llvm/ADT/StableHashing.h b/llvm/include/llvm/ADT/StableHashing.h
index 7778f5d7c3a1c3..23a878b264da88 100644
--- a/llvm/include/llvm/ADT/StableHashing.h
+++ b/llvm/include/llvm/ADT/StableHashing.h
@@ -50,6 +50,22 @@ inline stable_hash stable_hash_combine(stable_hash A, stable_hash B,
   return stable_hash_combine(Hashes);
 }
 
+// Removes suffixes introduced by LLVM from the name to enhance stability and
+// maintain closeness to the original name across different builds.
+inline StringRef get_stable_name(StringRef Name) {
+  auto [P1, S1] = Name.rsplit(".llvm.");
+  auto [P2, S2] = P1.rsplit(".__uniq.");
+  return P2;
+}
+
+// Generates a consistent hash value for a given input name across different
+// program executions and environments. This function first converts the input
+// name into a stable form using the `get_stable_name` function, and then
+// computes a hash of this stable name.
+inline stable_hash stable_hash_name(StringRef Name) {
+  return xxh3_64bits(get_stable_name(Name));
+}
+
 } // namespace llvm
 
 #endif
diff --git a/llvm/lib/CodeGen/MachineStableHash.cpp b/llvm/lib/CodeGen/MachineStableHash.cpp
index 916acbf2d2cbf9..e88c6f37bbebd0 100644
--- a/llvm/lib/CodeGen/MachineStableHash.cpp
+++ b/llvm/lib/CodeGen/MachineStableHash.cpp
@@ -95,13 +95,21 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {
   case MachineOperand::MO_Metadata:
     StableHashBailingMetadataUnsupported++;
     return 0;
-  case MachineOperand::MO_GlobalAddress:
-    StableHashBailingGlobalAddress++;
-    return 0;
+  case MachineOperand::MO_GlobalAddress: {
+    const GlobalValue *GV = MO.getGlobal();
+    if (!GV->hasName()) {
+      StableHashBailingGlobalAddress++;
+      return 0;
+    }
+    auto Name = GV->getName();
+    return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
+                               stable_hash_name(Name), MO.getOffset());
+  }
+
   case MachineOperand::MO_TargetIndex: {
     if (const char *Name = MO.getTargetIndexName())
       return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
-                                 xxh3_64bits(Name), MO.getOffset());
+                                 stable_hash_name(Name), MO.getOffset());
     StableHashBailingTargetIndexNoName++;
     return 0;
   }
@@ -113,7 +121,8 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {
 
   case MachineOperand::MO_ExternalSymbol:
     return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
-                               MO.getOffset(), xxh3_64bits(MO.getSymbolName()));
+                               MO.getOffset(),
+                               stable_hash_name(MO.getSymbolName()));
 
   case MachineOperand::MO_RegisterMask:
   case MachineOperand::MO_RegisterLiveOut: {
@@ -149,7 +158,7 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {
   case MachineOperand::MO_MCSymbol: {
     auto SymbolName = MO.getMCSymbol()->getName();
     return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
-                               xxh3_64bits(SymbolName));
+                               stable_hash_name(SymbolName));
   }
   case MachineOperand::MO_CFIIndex:
     return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
diff --git a/llvm/unittests/MIR/CMakeLists.txt b/llvm/unittests/MIR/CMakeLists.txt
index d6aff46eff07b7..206094266ba148 100644
--- a/llvm/unittests/MIR/CMakeLists.txt
+++ b/llvm/unittests/MIR/CMakeLists.txt
@@ -15,6 +15,7 @@ set(LLVM_LINK_COMPONENTS
 
 add_llvm_unittest(MIRTests
   MachineMetadata.cpp
+  MachineStableHashTest.cpp
   )
 
 target_link_libraries(MIRTests PRIVATE LLVMTestingSupport)
diff --git a/llvm/unittests/MIR/MachineStableHashTest.cpp b/llvm/unittests/MIR/MachineStableHashTest.cpp
new file mode 100644
index 00000000000000..19cb7ebb25cdfd
--- /dev/null
+++ b/llvm/unittests/MIR/MachineStableHashTest.cpp
@@ -0,0 +1,143 @@
+//===- MachineStableHashTest.cpp ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineStableHash.h"
+#include "llvm/CodeGen/MIRParser/MIRParser.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/FileCheck/FileCheck.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Target/TargetMachine.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+class MachineStableHashTest : public testing::Test {
+public:
+  MachineStableHashTest() {}
+
+protected:
+  LLVMContext Context;
+  std::unique_ptr<Module> M;
+  std::unique_ptr<MIRParser> MIR;
+
+  static void SetUpTestCase() {
+    InitializeAllTargetInfos();
+    InitializeAllTargets();
+    InitializeAllTargetMCs();
+  }
+
+  void SetUp() override { M = std::make_unique<Module>("Dummy", Context); }
+
+  std::unique_ptr<LLVMTargetMachine>
+  createTargetMachine(std::string TT, StringRef CPU, StringRef FS) {
+    std::string Error;
+    const Target *T = TargetRegistry::lookupTarget(TT, Error);
+    if (!T)
+      return nullptr;
+    TargetOptions Options;
+    return std::unique_ptr<LLVMTargetMachine>(
+        static_cast<LLVMTargetMachine *>(T->createTargetMachine(
+            TT, CPU, FS, Options, std::nullopt, std::nullopt)));
+  }
+
+  std::unique_ptr<Module> parseMIR(const TargetMachine &TM, StringRef MIRCode,
+                                   MachineModuleInfo &MMI) {
+    SMDiagnostic Diagnostic;
+    std::unique_ptr<MemoryBuffer> MBuffer = MemoryBuffer::getMemBuffer(MIRCode);
+    MIR = createMIRParser(std::move(MBuffer), Context);
+    if (!MIR)
+      return nullptr;
+
+    std::unique_ptr<Module> Mod = MIR->parseIRModule();
+    if (!Mod)
+      return nullptr;
+
+    Mod->setDataLayout(TM.createDataLayout());
+
+    if (MIR->parseMachineFunctions(*Mod, MMI)) {
+      M.reset();
+      return nullptr;
+    }
+
+    return Mod;
+  }
+};
+
+TEST_F(MachineStableHashTest, StableGlobalName) {
+  auto TM = createTargetMachine(("aarch64--"), "", "");
+  if (!TM)
+    GTEST_SKIP();
+  StringRef MIRString = R"MIR(
+--- |
+  define void @f1() { ret void }
+  define void @f2() { ret void }
+  define void @f3() { ret void }
+  define void @f4() { ret void }
+  declare void @goo()
+  declare void @goo.llvm.123()
+  declare void @goo.__uniq.456()
+  declare void @goo.invalid.789()
+...
+---
+name:            f1
+alignment:       16
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    16
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+  liveins: $lr
+    BL @goo
+  RET undef $lr
+
+...
+---
+name:            f2
+body:             |
+  bb.0:
+  liveins: $lr
+    BL @goo.llvm.123
+  RET undef $lr
+...
+---
+name:            f3
+body:             |
+  bb.0:
+  liveins: $lr
+    BL @goo.__uniq.456
+  RET undef $lr
+...
+---
+name:            f4
+body:             |
+  bb.0:
+  liveins: $lr
+    BL @goo.invalid.789
+  RET undef $lr
+...
+)MIR";
+  MachineModuleInfo MMI(TM.get());
+  M = parseMIR(*TM, MIRString, MMI);
+  ASSERT_TRUE(M);
+  auto *MF1 = MMI.getMachineFunction(*M->getFunction("f1"));
+  auto *MF2 = MMI.getMachineFunction(*M->getFunction("f2"));
+  auto *MF3 = MMI.getMachineFunction(*M->getFunction("f3"));
+  auto *MF4 = MMI.getMachineFunction(*M->getFunction("f4"));
+
+  // Expect the suffix, `.llvm.{number}` to be ignored.
+  EXPECT_EQ(stableHashValue(*MF1), stableHashValue(*MF2));
+  // Expect the suffix, `.__uniq.{number}` to be ignored.
+  EXPECT_EQ(stableHashValue(*MF1), stableHashValue(*MF3));
+  // Do not ignore `.invalid.{number}`.
+  EXPECT_NE(stableHashValue(*MF1), stableHashValue(*MF4));
+}

>From 612853fe38e40308f8a45f90595ad9b3d2961222 Mon Sep 17 00:00:00 2001
From: Kyungwoo Lee <kyulee at meta.com>
Date: Tue, 27 Aug 2024 13:22:56 -0700
Subject: [PATCH 2/2] Address comments from ellishg

---
 llvm/lib/CodeGen/MachineStableHash.cpp       | 12 ++++++------
 llvm/unittests/MIR/MachineStableHashTest.cpp |  8 ++++----
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/CodeGen/MachineStableHash.cpp b/llvm/lib/CodeGen/MachineStableHash.cpp
index e88c6f37bbebd0..51348c095411cf 100644
--- a/llvm/lib/CodeGen/MachineStableHash.cpp
+++ b/llvm/lib/CodeGen/MachineStableHash.cpp
@@ -84,21 +84,21 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {
   }
 
   case MachineOperand::MO_MachineBasicBlock:
-    StableHashBailingMachineBasicBlock++;
+    ++StableHashBailingMachineBasicBlock;
     return 0;
   case MachineOperand::MO_ConstantPoolIndex:
-    StableHashBailingConstantPoolIndex++;
+    ++StableHashBailingConstantPoolIndex;
     return 0;
   case MachineOperand::MO_BlockAddress:
-    StableHashBailingBlockAddress++;
+    ++StableHashBailingBlockAddress;
     return 0;
   case MachineOperand::MO_Metadata:
-    StableHashBailingMetadataUnsupported++;
+    ++StableHashBailingMetadataUnsupported;
     return 0;
   case MachineOperand::MO_GlobalAddress: {
     const GlobalValue *GV = MO.getGlobal();
     if (!GV->hasName()) {
-      StableHashBailingGlobalAddress++;
+      ++StableHashBailingGlobalAddress;
       return 0;
     }
     auto Name = GV->getName();
@@ -110,7 +110,7 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {
     if (const char *Name = MO.getTargetIndexName())
       return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
                                  stable_hash_name(Name), MO.getOffset());
-    StableHashBailingTargetIndexNoName++;
+    ++StableHashBailingTargetIndexNoName;
     return 0;
   }
 
diff --git a/llvm/unittests/MIR/MachineStableHashTest.cpp b/llvm/unittests/MIR/MachineStableHashTest.cpp
index 19cb7ebb25cdfd..c6b99123d4bd2a 100644
--- a/llvm/unittests/MIR/MachineStableHashTest.cpp
+++ b/llvm/unittests/MIR/MachineStableHashTest.cpp
@@ -134,10 +134,10 @@ body:             |
   auto *MF3 = MMI.getMachineFunction(*M->getFunction("f3"));
   auto *MF4 = MMI.getMachineFunction(*M->getFunction("f4"));
 
-  // Expect the suffix, `.llvm.{number}` to be ignored.
-  EXPECT_EQ(stableHashValue(*MF1), stableHashValue(*MF2));
-  // Expect the suffix, `.__uniq.{number}` to be ignored.
-  EXPECT_EQ(stableHashValue(*MF1), stableHashValue(*MF3));
+  EXPECT_EQ(stableHashValue(*MF1), stableHashValue(*MF2))
+      << "Expect the suffix, `.llvm.{number}` to be ignored.";
+  EXPECT_EQ(stableHashValue(*MF1), stableHashValue(*MF3))
+      << "Expect the suffix, `.__uniq.{number}` to be ignored.";
   // Do not ignore `.invalid.{number}`.
   EXPECT_NE(stableHashValue(*MF1), stableHashValue(*MF4));
 }



More information about the llvm-commits mailing list