[llvm] f9ad249 - [StableHash] Implement stable global name for the hash computation (#106156)

via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 27 15:09:10 PDT 2024


Author: Kyungwoo Lee
Date: 2024-08-27T15:09:06-07:00
New Revision: f9ad24946026c45e22c445f8ebc79b8b651e1dad

URL: https://github.com/llvm/llvm-project/commit/f9ad24946026c45e22c445f8ebc79b8b651e1dad
DIFF: https://github.com/llvm/llvm-project/commit/f9ad24946026c45e22c445f8ebc79b8b651e1dad.diff

LOG: [StableHash] Implement stable global name for the hash computation (#106156)

LLVM often extends global names by adding suffixes to distinguish unique
identities. However, these suffixes are not always stable across
different runs and build environments. To address this issue, I
implemented `get_stable_name` to ignore such suffixes and obtain the
original name. This approach is not new, as PGO or Bolt already handle
this issue similarly. Using the stable name obtained from
`get_stable_name`, I implemented `stable_hash_name` while utilizing the
same underlying `xxh3_64bit` algorithm as before.

Added: 
    llvm/unittests/MIR/MachineStableHashTest.cpp

Modified: 
    llvm/include/llvm/ADT/StableHashing.h
    llvm/lib/CodeGen/MachineStableHash.cpp
    llvm/unittests/MIR/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/ADT/StableHashing.h b/llvm/include/llvm/ADT/StableHashing.h
index 7778f5d7c3a1c3..7852199f8b0a00 100644
--- a/llvm/include/llvm/ADT/StableHashing.h
+++ b/llvm/include/llvm/ADT/StableHashing.h
@@ -50,6 +50,23 @@ inline stable_hash stable_hash_combine(stable_hash A, stable_hash B,
   return stable_hash_combine(Hashes);
 }
 
+// Removes suffixes introduced by LLVM from the name to enhance stability and
+// maintain closeness to the original name across 
diff erent builds.
+inline StringRef get_stable_name(StringRef Name) {
+  auto [P1, S1] = Name.rsplit(".llvm.");
+  auto [P2, S2] = P1.rsplit(".__uniq.");
+  return P2;
+}
+
+// Generates a consistent hash value for a given input name across 
diff erent
+// program executions and environments. This function first converts the input
+// name into a stable form using the `get_stable_name` function, and then
+// computes a hash of this stable name. For instance, `foo.llvm.1234` would have
+// the same hash as `foo.llvm.5678.
+inline stable_hash stable_hash_name(StringRef Name) {
+  return xxh3_64bits(get_stable_name(Name));
+}
+
 } // namespace llvm
 
 #endif

diff  --git a/llvm/lib/CodeGen/MachineStableHash.cpp b/llvm/lib/CodeGen/MachineStableHash.cpp
index 916acbf2d2cbf9..51348c095411cf 100644
--- a/llvm/lib/CodeGen/MachineStableHash.cpp
+++ b/llvm/lib/CodeGen/MachineStableHash.cpp
@@ -84,25 +84,33 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {
   }
 
   case MachineOperand::MO_MachineBasicBlock:
-    StableHashBailingMachineBasicBlock++;
+    ++StableHashBailingMachineBasicBlock;
     return 0;
   case MachineOperand::MO_ConstantPoolIndex:
-    StableHashBailingConstantPoolIndex++;
+    ++StableHashBailingConstantPoolIndex;
     return 0;
   case MachineOperand::MO_BlockAddress:
-    StableHashBailingBlockAddress++;
+    ++StableHashBailingBlockAddress;
     return 0;
   case MachineOperand::MO_Metadata:
-    StableHashBailingMetadataUnsupported++;
-    return 0;
-  case MachineOperand::MO_GlobalAddress:
-    StableHashBailingGlobalAddress++;
+    ++StableHashBailingMetadataUnsupported;
     return 0;
+  case MachineOperand::MO_GlobalAddress: {
+    const GlobalValue *GV = MO.getGlobal();
+    if (!GV->hasName()) {
+      ++StableHashBailingGlobalAddress;
+      return 0;
+    }
+    auto Name = GV->getName();
+    return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
+                               stable_hash_name(Name), MO.getOffset());
+  }
+
   case MachineOperand::MO_TargetIndex: {
     if (const char *Name = MO.getTargetIndexName())
       return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
-                                 xxh3_64bits(Name), MO.getOffset());
-    StableHashBailingTargetIndexNoName++;
+                                 stable_hash_name(Name), MO.getOffset());
+    ++StableHashBailingTargetIndexNoName;
     return 0;
   }
 
@@ -113,7 +121,8 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {
 
   case MachineOperand::MO_ExternalSymbol:
     return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
-                               MO.getOffset(), xxh3_64bits(MO.getSymbolName()));
+                               MO.getOffset(),
+                               stable_hash_name(MO.getSymbolName()));
 
   case MachineOperand::MO_RegisterMask:
   case MachineOperand::MO_RegisterLiveOut: {
@@ -149,7 +158,7 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {
   case MachineOperand::MO_MCSymbol: {
     auto SymbolName = MO.getMCSymbol()->getName();
     return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
-                               xxh3_64bits(SymbolName));
+                               stable_hash_name(SymbolName));
   }
   case MachineOperand::MO_CFIIndex:
     return stable_hash_combine(MO.getType(), MO.getTargetFlags(),

diff  --git a/llvm/unittests/MIR/CMakeLists.txt b/llvm/unittests/MIR/CMakeLists.txt
index d6aff46eff07b7..206094266ba148 100644
--- a/llvm/unittests/MIR/CMakeLists.txt
+++ b/llvm/unittests/MIR/CMakeLists.txt
@@ -15,6 +15,7 @@ set(LLVM_LINK_COMPONENTS
 
 add_llvm_unittest(MIRTests
   MachineMetadata.cpp
+  MachineStableHashTest.cpp
   )
 
 target_link_libraries(MIRTests PRIVATE LLVMTestingSupport)

diff  --git a/llvm/unittests/MIR/MachineStableHashTest.cpp b/llvm/unittests/MIR/MachineStableHashTest.cpp
new file mode 100644
index 00000000000000..c6b99123d4bd2a
--- /dev/null
+++ b/llvm/unittests/MIR/MachineStableHashTest.cpp
@@ -0,0 +1,143 @@
+//===- MachineStableHashTest.cpp ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineStableHash.h"
+#include "llvm/CodeGen/MIRParser/MIRParser.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/FileCheck/FileCheck.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Target/TargetMachine.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+class MachineStableHashTest : public testing::Test {
+public:
+  MachineStableHashTest() {}
+
+protected:
+  LLVMContext Context;
+  std::unique_ptr<Module> M;
+  std::unique_ptr<MIRParser> MIR;
+
+  static void SetUpTestCase() {
+    InitializeAllTargetInfos();
+    InitializeAllTargets();
+    InitializeAllTargetMCs();
+  }
+
+  void SetUp() override { M = std::make_unique<Module>("Dummy", Context); }
+
+  std::unique_ptr<LLVMTargetMachine>
+  createTargetMachine(std::string TT, StringRef CPU, StringRef FS) {
+    std::string Error;
+    const Target *T = TargetRegistry::lookupTarget(TT, Error);
+    if (!T)
+      return nullptr;
+    TargetOptions Options;
+    return std::unique_ptr<LLVMTargetMachine>(
+        static_cast<LLVMTargetMachine *>(T->createTargetMachine(
+            TT, CPU, FS, Options, std::nullopt, std::nullopt)));
+  }
+
+  std::unique_ptr<Module> parseMIR(const TargetMachine &TM, StringRef MIRCode,
+                                   MachineModuleInfo &MMI) {
+    SMDiagnostic Diagnostic;
+    std::unique_ptr<MemoryBuffer> MBuffer = MemoryBuffer::getMemBuffer(MIRCode);
+    MIR = createMIRParser(std::move(MBuffer), Context);
+    if (!MIR)
+      return nullptr;
+
+    std::unique_ptr<Module> Mod = MIR->parseIRModule();
+    if (!Mod)
+      return nullptr;
+
+    Mod->setDataLayout(TM.createDataLayout());
+
+    if (MIR->parseMachineFunctions(*Mod, MMI)) {
+      M.reset();
+      return nullptr;
+    }
+
+    return Mod;
+  }
+};
+
+TEST_F(MachineStableHashTest, StableGlobalName) {
+  auto TM = createTargetMachine(("aarch64--"), "", "");
+  if (!TM)
+    GTEST_SKIP();
+  StringRef MIRString = R"MIR(
+--- |
+  define void @f1() { ret void }
+  define void @f2() { ret void }
+  define void @f3() { ret void }
+  define void @f4() { ret void }
+  declare void @goo()
+  declare void @goo.llvm.123()
+  declare void @goo.__uniq.456()
+  declare void @goo.invalid.789()
+...
+---
+name:            f1
+alignment:       16
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    16
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+  liveins: $lr
+    BL @goo
+  RET undef $lr
+
+...
+---
+name:            f2
+body:             |
+  bb.0:
+  liveins: $lr
+    BL @goo.llvm.123
+  RET undef $lr
+...
+---
+name:            f3
+body:             |
+  bb.0:
+  liveins: $lr
+    BL @goo.__uniq.456
+  RET undef $lr
+...
+---
+name:            f4
+body:             |
+  bb.0:
+  liveins: $lr
+    BL @goo.invalid.789
+  RET undef $lr
+...
+)MIR";
+  MachineModuleInfo MMI(TM.get());
+  M = parseMIR(*TM, MIRString, MMI);
+  ASSERT_TRUE(M);
+  auto *MF1 = MMI.getMachineFunction(*M->getFunction("f1"));
+  auto *MF2 = MMI.getMachineFunction(*M->getFunction("f2"));
+  auto *MF3 = MMI.getMachineFunction(*M->getFunction("f3"));
+  auto *MF4 = MMI.getMachineFunction(*M->getFunction("f4"));
+
+  EXPECT_EQ(stableHashValue(*MF1), stableHashValue(*MF2))
+      << "Expect the suffix, `.llvm.{number}` to be ignored.";
+  EXPECT_EQ(stableHashValue(*MF1), stableHashValue(*MF3))
+      << "Expect the suffix, `.__uniq.{number}` to be ignored.";
+  // Do not ignore `.invalid.{number}`.
+  EXPECT_NE(stableHashValue(*MF1), stableHashValue(*MF4));
+}


        


More information about the llvm-commits mailing list