[llvm] [StructuralHash] Global Variable (PR #118412)
Kyungwoo Lee via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 3 09:39:37 PST 2024
https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/118412
>From fb58d52684282d50d3baab88a5d25a1a9b94111b Mon Sep 17 00:00:00 2001
From: Kyungwoo Lee <kyulee at meta.com>
Date: Mon, 2 Dec 2024 01:20:33 -0800
Subject: [PATCH 1/5] structural hash for global var
---
llvm/include/llvm/IR/StructuralHash.h | 3 +
llvm/lib/CodeGen/MachineStableHash.cpp | 13 ++-
llvm/lib/IR/StructuralHash.cpp | 51 +++++++++--
.../test/CodeGen/AArch64/cgdata-merge-gvar.ll | 91 +++++++++++++++++++
.../CodeGen/AArch64/cgdata-outline-gvar.ll | 52 +++++++++++
5 files changed, 200 insertions(+), 10 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/cgdata-merge-gvar.ll
create mode 100644 llvm/test/CodeGen/AArch64/cgdata-outline-gvar.ll
diff --git a/llvm/include/llvm/IR/StructuralHash.h b/llvm/include/llvm/IR/StructuralHash.h
index 071575137ff572..514dd6f174b903 100644
--- a/llvm/include/llvm/IR/StructuralHash.h
+++ b/llvm/include/llvm/IR/StructuralHash.h
@@ -31,6 +31,9 @@ class Module;
/// to true includes instruction and operand type information.
stable_hash StructuralHash(const Function &F, bool DetailedHash = false);
+/// Returns a hash of the global variable \p G.
+stable_hash StructuralHash(const GlobalVariable &G);
+
/// Returns a hash of the module \p M by hashing all functions and global
/// variables contained within. \param M The module to hash. \param DetailedHash
/// Whether or not to encode additional information in the function hashes that
diff --git a/llvm/lib/CodeGen/MachineStableHash.cpp b/llvm/lib/CodeGen/MachineStableHash.cpp
index facda7a59e2f86..09a81cb318ecb7 100644
--- a/llvm/lib/CodeGen/MachineStableHash.cpp
+++ b/llvm/lib/CodeGen/MachineStableHash.cpp
@@ -27,6 +27,8 @@
#include "llvm/CodeGen/Register.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/StructuralHash.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Support/ErrorHandling.h"
@@ -97,9 +99,14 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {
++StableHashBailingGlobalAddress;
return 0;
}
- auto Name = GV->getName();
- return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
- stable_hash_name(Name), MO.getOffset());
+ stable_hash GVHash = 0;
+ if (auto *GVar = dyn_cast<GlobalVariable>(GV))
+ GVHash = StructuralHash(*GVar);
+ if (!GVHash)
+ GVHash = stable_hash_name(GV->getName());
+
+ return stable_hash_combine(MO.getType(), MO.getTargetFlags(), GVHash,
+ MO.getOffset());
}
case MachineOperand::MO_TargetIndex: {
diff --git a/llvm/lib/IR/StructuralHash.cpp b/llvm/lib/IR/StructuralHash.cpp
index ccc534a8904191..de883f81a8e4a5 100644
--- a/llvm/lib/IR/StructuralHash.cpp
+++ b/llvm/lib/IR/StructuralHash.cpp
@@ -46,7 +46,7 @@ class StructuralHashImpl {
/// Assign a unique ID to each Value in the order they are first seen.
DenseMap<const Value *, int> ValueToId;
- stable_hash hashType(Type *ValueType) {
+ static stable_hash hashType(Type *ValueType) {
SmallVector<stable_hash> Hashes;
Hashes.emplace_back(ValueType->getTypeID());
if (ValueType->isIntegerTy())
@@ -65,7 +65,7 @@ class StructuralHashImpl {
}
}
- stable_hash hashAPInt(const APInt &I) {
+ static stable_hash hashAPInt(const APInt &I) {
SmallVector<stable_hash> Hashes;
Hashes.emplace_back(I.getBitWidth());
auto RawVals = ArrayRef<uint64_t>(I.getRawData(), I.getNumWords());
@@ -73,11 +73,36 @@ class StructuralHashImpl {
return stable_hash_combine(Hashes);
}
- stable_hash hashAPFloat(const APFloat &F) {
+ static stable_hash hashAPFloat(const APFloat &F) {
return hashAPInt(F.bitcastToAPInt());
}
- stable_hash hashGlobalValue(const GlobalValue *GV) {
+ static stable_hash hashGlobalVariable(const GlobalVariable &GVar) {
+ if (!GVar.hasInitializer())
+ return hashGlobalValue(&GVar);
+
+ // Hash the contents of a string.
+ if (GVar.getName().starts_with(".str"))
+ return hashConstant(GVar.getInitializer());
+
+ // Hash structural contents of Objective-C metadata in specific sections.
+ // This can be extended to other metadata if needed.
+ static constexpr const char *SectionNames[] = {
+ "__cfstring", "__cstring", "__objc_classrefs",
+ "__objc_methname", "__objc_selrefs",
+ };
+ if (GVar.hasSection()) {
+ StringRef SectionName = GVar.getSection();
+ for (const char *Name : SectionNames) {
+ if (SectionName.contains(Name))
+ return hashConstant(GVar.getInitializer());
+ }
+ }
+
+ return hashGlobalValue(&GVar);
+ }
+
+ static stable_hash hashGlobalValue(const GlobalValue *GV) {
if (!GV->hasName())
return 0;
return stable_hash_name(GV->getName());
@@ -87,7 +112,7 @@ class StructuralHashImpl {
// FunctionComparator::cmpConstants() in FunctionComparator.cpp, but here
// we're interested in computing a hash rather than comparing two Constants.
// Some of the logic is simplified, e.g, we don't expand GEPOperator.
- stable_hash hashConstant(Constant *C) {
+ static stable_hash hashConstant(const Constant *C) {
SmallVector<stable_hash> Hashes;
Type *Ty = C->getType();
@@ -98,14 +123,21 @@ class StructuralHashImpl {
return stable_hash_combine(Hashes);
}
+ if (auto *GVar = dyn_cast<GlobalVariable>(C)) {
+ Hashes.emplace_back(hashGlobalVariable(*GVar));
+ return stable_hash_combine(Hashes);
+ }
+
if (auto *G = dyn_cast<GlobalValue>(C)) {
Hashes.emplace_back(hashGlobalValue(G));
return stable_hash_combine(Hashes);
}
if (const auto *Seq = dyn_cast<ConstantDataSequential>(C)) {
- Hashes.emplace_back(xxh3_64bits(Seq->getRawDataValues()));
- return stable_hash_combine(Hashes);
+ if (Seq->isString()) {
+ Hashes.emplace_back(stable_hash_name(Seq->getAsString()));
+ return stable_hash_combine(Hashes);
+ }
}
switch (C->getValueID()) {
@@ -266,6 +298,7 @@ class StructuralHashImpl {
Hashes.emplace_back(Hash);
Hashes.emplace_back(GlobalHeaderHash);
Hashes.emplace_back(GV.getValueType()->getTypeID());
+ Hashes.emplace_back(hashGlobalVariable(GV));
// Update the combined hash in place.
Hash = stable_hash_combine(Hashes);
@@ -297,6 +330,10 @@ stable_hash llvm::StructuralHash(const Function &F, bool DetailedHash) {
return H.getHash();
}
+stable_hash llvm::StructuralHash(const GlobalVariable &GVar) {
+ return StructuralHashImpl::hashGlobalVariable(GVar);
+}
+
stable_hash llvm::StructuralHash(const Module &M, bool DetailedHash) {
StructuralHashImpl H(DetailedHash);
H.update(M);
diff --git a/llvm/test/CodeGen/AArch64/cgdata-merge-gvar.ll b/llvm/test/CodeGen/AArch64/cgdata-merge-gvar.ll
new file mode 100644
index 00000000000000..f1f5209abe3507
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/cgdata-merge-gvar.ll
@@ -0,0 +1,91 @@
+; This test verifies that global variables are hashed based on their initial contents,
+; allowing them to be merged even if they appear different due to their names.
+; Now they become identical functions that can be merged without creating a paramter.
+
+; RUN: rm -rf %t && split-file %s %t
+
+; RUN: llc -mtriple=arm64-apple-darwin -enable-global-merge-func=true -global-merging-skip-no-params=false < %t/string.ll | FileCheck %s
+; RUN: llc -mtriple=arm64-apple-darwin -enable-global-merge-func=true -global-merging-skip-no-params=false < %t/ns-const.ll | FileCheck %s
+; RUN: llc -mtriple=arm64-apple-darwin -enable-global-merge-func=true -global-merging-skip-no-params=false < %t/objc-ref.ll | FileCheck %s
+
+; CHECK: _f1.Tgm
+; CHECK: _f2.Tgm
+
+;--- string.ll
+
+ at .str = private unnamed_addr constant [6 x i8] c"hello\00", align 1
+ at .str.1 = private unnamed_addr constant [6 x i8] c"hello\00", align 1
+
+declare noundef i32 @goo(ptr noundef)
+
+define i32 @f1() {
+entry:
+ %call = tail call noundef i32 @goo(ptr noundef nonnull @.str)
+ %add = add nsw i32 %call, 1
+ ret i32 %add
+}
+
+define i32 @f2() {
+entry:
+ %call = tail call noundef i32 @goo(ptr noundef nonnull @.str.1)
+ %add = add nsw i32 %call, 1
+ ret i32 %add
+}
+
+;--- ns-const.ll
+
+%struct.__NSConstantString_tag = type { ptr, i32, ptr, i64 }
+ at __CFConstantStringClassReference = external global [0 x i32]
+ at .str.2 = private unnamed_addr constant [9 x i8] c"cfstring\00", section "__TEXT,__cstring,cstring_literals", align 1
+ at _unnamed_cfstring_ = private global %struct.__NSConstantString_tag { ptr @__CFConstantStringClassReference, i32 1992, ptr @.str.2, i64 8 }, section "__DATA,__cfstring", align 8
+
+ at .str.3 = private unnamed_addr constant [9 x i8] c"cfstring\00", section "__TEXT,__cstring,cstring_literals", align 1
+ at _unnamed_cfstring_.2 = private global %struct.__NSConstantString_tag { ptr @__CFConstantStringClassReference, i32 1992, ptr @.str.3, i64 8 }, section "__DATA,__cfstring", align 8
+
+declare noundef i32 @hoo(ptr noundef)
+
+define i32 @f1() {
+entry:
+ %call = tail call i32 @hoo(ptr noundef nonnull @_unnamed_cfstring_)
+ %add = sub nsw i32 %call, 1
+ ret i32 %add
+}
+
+define i32 @f2() {
+entry:
+ %call = tail call i32 @hoo(ptr noundef nonnull @_unnamed_cfstring_.2)
+ %add = sub nsw i32 %call, 1
+ ret i32 %add
+}
+
+;--- objc-ref.ll
+
+%struct._class_t = type { ptr, ptr, ptr, ptr, ptr }
+
+@"OBJC_CLASS_$_MyClass" = external global %struct._class_t
+@"OBJC_CLASSLIST_REFERENCES_$_" = internal global ptr @"OBJC_CLASS_$_MyClass", section "__DATA,__objc_classrefs,regular,no_dead_strip", align 8
+@"OBJC_CLASSLIST_REFERENCES_$_.1" = internal global ptr @"OBJC_CLASS_$_MyClass", section "__DATA,__objc_classrefs,regular,no_dead_strip", align 8
+
+ at OBJC_METH_VAR_NAME_ = private unnamed_addr constant [6 x i8] c"hello\00", section "__TEXT,__objc_methname,cstring_literals", align 1
+ at OBJC_METH_VAR_NAME_.1 = private unnamed_addr constant [6 x i8] c"hello\00", section "__TEXT,__objc_methname,cstring_literals", align 1
+
+ at OBJC_SELECTOR_REFERENCES_ = internal externally_initialized global ptr @OBJC_METH_VAR_NAME_, section "__DATA,__objc_selrefs,literal_pointers,no_dead_strip", align 8
+ at OBJC_SELECTOR_REFERENCES_.1 = internal externally_initialized global ptr @OBJC_METH_VAR_NAME_.1, section "__DATA,__objc_selrefs,literal_pointers,no_dead_strip", align 8
+
+define i32 @f1() {
+entry:
+ %0 = load ptr, ptr @"OBJC_CLASSLIST_REFERENCES_$_", align 8
+ %1 = load ptr, ptr @OBJC_SELECTOR_REFERENCES_, align 8
+ %call = tail call noundef i32 @objc_msgSend(ptr noundef %0, ptr noundef %1)
+ ret i32 %call
+}
+
+declare ptr @objc_msgSend(ptr, ptr, ...)
+
+define i32 @f2() {
+entry:
+ %0 = load ptr, ptr @"OBJC_CLASSLIST_REFERENCES_$_.1", align 8
+ %1 = load ptr, ptr @OBJC_SELECTOR_REFERENCES_.1, align 8
+ %call = tail call noundef i32 @objc_msgSend(ptr noundef %0, ptr noundef %1)
+ ret i32 %call
+}
diff --git a/llvm/test/CodeGen/AArch64/cgdata-outline-gvar.ll b/llvm/test/CodeGen/AArch64/cgdata-outline-gvar.ll
new file mode 100644
index 00000000000000..447928dfa07245
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/cgdata-outline-gvar.ll
@@ -0,0 +1,52 @@
+; This test verifies that global variables are hashed based on their initial contents,
+; allowing them to be outlined even if they appear different due to their names.
+
+; RUN: split-file %s %t
+
+; Check if the outlined function is created locally.
+; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-generate=true -aarch64-enable-collect-loh=false -filetype=obj %t/local-two.ll -o %t_write_base
+; RUN: llvm-objdump -d %t_write_base | FileCheck %s
+
+; RUN: llvm-cgdata --merge %t_write_base -o %t_cgdata_base
+
+; Read the cgdata in the machine outliner for optimistically outlining in local-one.ll.
+; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-use-path=%t_cgdata_base -aarch64-enable-collect-loh=false -append-content-hash-outlined-name=false -filetype=obj %t/local-one.ll -o %t_read_base
+; RUN: llvm-objdump -d %t_read_base | FileCheck %s
+
+; The names of globals `.str` and `.str.4` are different, but their initial contents are identical.
+; The outlined function now starts with a reference to that global ("hello\00").
+; CHECK: _OUTLINED_FUNCTION_{{.*}}:
+; CHECK-NEXT: adrp x1
+; CHECK-NEXT: add x1, x1
+; CHECK-NEXT: mov w2
+; CHECK-NEXT: mov w3
+; CHECK-NEXT: mov w4
+; CHECK-NEXT: b
+
+;--- local-two.ll
+ at .str = private unnamed_addr constant [6 x i8] c"hello\00", align 1
+ at .str.1 = private unnamed_addr constant [3 x i8] c"f1\00", align 1
+ at .str.2 = private unnamed_addr constant [3 x i8] c"f2\00", align 1
+
+declare noundef i32 @goo(ptr noundef, ptr noundef, i32, i32, i32)
+define i32 @f1() minsize {
+entry:
+ %call = tail call noundef i32 @goo(ptr noundef nonnull @.str.1, ptr noundef nonnull @.str, i32 1, i32 2, i32 3)
+ ret i32 %call
+}
+define i32 @f2() minsize {
+entry:
+ %call = tail call noundef i32 @goo(ptr noundef nonnull @.str.2, ptr noundef nonnull @.str, i32 1, i32 2, i32 3)
+ ret i32 %call
+}
+
+;--- local-one.ll
+ at .str.3 = private unnamed_addr constant [3 x i8] c"f3\00", align 1
+ at .str.4 = private unnamed_addr constant [6 x i8] c"hello\00", align 1
+
+declare noundef i32 @goo(ptr noundef, ptr noundef, i32, i32, i32)
+define i32 @f1() minsize {
+entry:
+ %call = tail call noundef i32 @goo(ptr noundef nonnull @.str.3, ptr noundef nonnull @.str.4, i32 1, i32 2, i32 3)
+ ret i32 %call
+}
>From b08aab824b5215a5c3e2fccac2eed7b33451d187 Mon Sep 17 00:00:00 2001
From: Kyungwoo Lee <kyulee at meta.com>
Date: Mon, 2 Dec 2024 18:53:50 -0800
Subject: [PATCH 2/5] Address comments from mingmingl-llvm
---
llvm/test/CodeGen/AArch64/cgdata-merge-gvar.ll | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/AArch64/cgdata-merge-gvar.ll b/llvm/test/CodeGen/AArch64/cgdata-merge-gvar.ll
index f1f5209abe3507..9e867d37f65865 100644
--- a/llvm/test/CodeGen/AArch64/cgdata-merge-gvar.ll
+++ b/llvm/test/CodeGen/AArch64/cgdata-merge-gvar.ll
@@ -1,6 +1,6 @@
; This test verifies that global variables are hashed based on their initial contents,
; allowing them to be merged even if they appear different due to their names.
-; Now they become identical functions that can be merged without creating a paramter.
+; Now they become identical functions that can be merged without creating a parameter.
; RUN: rm -rf %t && split-file %s %t
>From 58f729aa9729248b13a47db21b0d670bec6f03e4 Mon Sep 17 00:00:00 2001
From: Kyungwoo Lee <kyulee at meta.com>
Date: Mon, 2 Dec 2024 22:32:09 -0800
Subject: [PATCH 3/5] Address comments from ellishg
---
llvm/lib/IR/StructuralHash.cpp | 11 ++-
.../AArch64/cgdata-merge-gvar-nsconst.ll | 32 +++++++
.../CodeGen/AArch64/cgdata-merge-gvar-objc.ll | 40 ++++++++
.../AArch64/cgdata-merge-gvar-string.ll | 46 ++++++++++
.../test/CodeGen/AArch64/cgdata-merge-gvar.ll | 91 -------------------
.../CodeGen/AArch64/cgdata-outline-gvar.ll | 54 ++++++-----
6 files changed, 158 insertions(+), 116 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/cgdata-merge-gvar-nsconst.ll
create mode 100644 llvm/test/CodeGen/AArch64/cgdata-merge-gvar-objc.ll
create mode 100644 llvm/test/CodeGen/AArch64/cgdata-merge-gvar-string.ll
delete mode 100644 llvm/test/CodeGen/AArch64/cgdata-merge-gvar.ll
diff --git a/llvm/lib/IR/StructuralHash.cpp b/llvm/lib/IR/StructuralHash.cpp
index de883f81a8e4a5..56b925626d8450 100644
--- a/llvm/lib/IR/StructuralHash.cpp
+++ b/llvm/lib/IR/StructuralHash.cpp
@@ -82,8 +82,12 @@ class StructuralHashImpl {
return hashGlobalValue(&GVar);
// Hash the contents of a string.
- if (GVar.getName().starts_with(".str"))
- return hashConstant(GVar.getInitializer());
+ if (GVar.getName().starts_with(".str")) {
+ auto *C = GVar.getInitializer();
+ if (const auto *Seq = dyn_cast<ConstantDataSequential>(C))
+ if (Seq->isString())
+ return stable_hash_name(Seq->getAsString());
+ }
// Hash structural contents of Objective-C metadata in specific sections.
// This can be extended to other metadata if needed.
@@ -93,10 +97,9 @@ class StructuralHashImpl {
};
if (GVar.hasSection()) {
StringRef SectionName = GVar.getSection();
- for (const char *Name : SectionNames) {
+ for (const char *Name : SectionNames)
if (SectionName.contains(Name))
return hashConstant(GVar.getInitializer());
- }
}
return hashGlobalValue(&GVar);
diff --git a/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-nsconst.ll b/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-nsconst.ll
new file mode 100644
index 00000000000000..490a778f69e263
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-nsconst.ll
@@ -0,0 +1,32 @@
+; This test verifies that global variables (ns constant) are hashed based on their initial contents,
+; allowing them to be merged even if they appear different due to their names.
+; Now they become identical functions that can be merged without creating a parameter
+
+; RUN: llc -mtriple=arm64-apple-darwin -enable-global-merge-func=true -global-merging-skip-no-params=false < %s | FileCheck %s
+
+; CHECK: _f1.Tgm
+; CHECK: _f2.Tgm
+
+%struct.__NSConstantString_tag = type { ptr, i32, ptr, i64 }
+ at __CFConstantStringClassReference = external global [0 x i32]
+ at .str.2 = private unnamed_addr constant [9 x i8] c"cfstring\00", section "__TEXT,__cstring,cstring_literals", align 1
+ at _unnamed_cfstring_ = private global %struct.__NSConstantString_tag { ptr @__CFConstantStringClassReference, i32 1992, ptr @.str.2, i64 8 }, section "__DATA,__cfstring", align 8
+
+ at .str.3 = private unnamed_addr constant [9 x i8] c"cfstring\00", section "__TEXT,__cstring,cstring_literals", align 1
+ at _unnamed_cfstring_.2 = private global %struct.__NSConstantString_tag { ptr @__CFConstantStringClassReference, i32 1992, ptr @.str.3, i64 8 }, section "__DATA,__cfstring", align 8
+
+declare i32 @hoo(ptr noundef)
+
+define i32 @f1() {
+entry:
+ %call = tail call i32 @hoo(ptr noundef nonnull @_unnamed_cfstring_)
+ %add = sub nsw i32 %call, 1
+ ret i32 %add
+}
+
+define i32 @f2() {
+entry:
+ %call = tail call i32 @hoo(ptr noundef nonnull @_unnamed_cfstring_.2)
+ %add = sub nsw i32 %call, 1
+ ret i32 %add
+}
diff --git a/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-objc.ll b/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-objc.ll
new file mode 100644
index 00000000000000..e0d28721f2afb7
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-objc.ll
@@ -0,0 +1,40 @@
+
+; This test verifies that global variables (objc metadata) are hashed based on their initial contents,
+; allowing them to be merged even if they appear different due to their names.
+; Now they become identical functions that can be merged without creating a parameter
+
+; RUN: llc -mtriple=arm64-apple-darwin -enable-global-merge-func=true -global-merging-skip-no-params=false < %s | FileCheck %s
+
+; CHECK: _f1.Tgm
+; CHECK: _f2.Tgm
+
+
+%struct._class_t = type { ptr, ptr, ptr, ptr, ptr }
+
+@"OBJC_CLASS_$_MyClass" = external global %struct._class_t
+@"OBJC_CLASSLIST_REFERENCES_$_" = internal global ptr @"OBJC_CLASS_$_MyClass", section "__DATA,__objc_classrefs,regular,no_dead_strip", align 8
+@"OBJC_CLASSLIST_REFERENCES_$_.1" = internal global ptr @"OBJC_CLASS_$_MyClass", section "__DATA,__objc_classrefs,regular,no_dead_strip", align 8
+
+ at OBJC_METH_VAR_NAME_ = private unnamed_addr constant [6 x i8] c"hello\00", section "__TEXT,__objc_methname,cstring_literals", align 1
+ at OBJC_METH_VAR_NAME_.1 = private unnamed_addr constant [6 x i8] c"hello\00", section "__TEXT,__objc_methname,cstring_literals", align 1
+
+ at OBJC_SELECTOR_REFERENCES_ = internal externally_initialized global ptr @OBJC_METH_VAR_NAME_, section "__DATA,__objc_selrefs,literal_pointers,no_dead_strip", align 8
+ at OBJC_SELECTOR_REFERENCES_.1 = internal externally_initialized global ptr @OBJC_METH_VAR_NAME_.1, section "__DATA,__objc_selrefs,literal_pointers,no_dead_strip", align 8
+
+declare ptr @objc_msgSend(ptr, ptr, ...)
+
+define i32 @f1() {
+entry:
+ %0 = load ptr, ptr @"OBJC_CLASSLIST_REFERENCES_$_", align 8
+ %1 = load ptr, ptr @OBJC_SELECTOR_REFERENCES_, align 8
+ %call = tail call i32 @objc_msgSend(ptr noundef %0, ptr noundef %1)
+ ret i32 %call
+}
+
+define i32 @f2() {
+entry:
+ %0 = load ptr, ptr @"OBJC_CLASSLIST_REFERENCES_$_.1", align 8
+ %1 = load ptr, ptr @OBJC_SELECTOR_REFERENCES_.1, align 8
+ %call = tail call i32 @objc_msgSend(ptr noundef %0, ptr noundef %1)
+ ret i32 %call
+}
diff --git a/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-string.ll b/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-string.ll
new file mode 100644
index 00000000000000..1e67425f0b8475
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-string.ll
@@ -0,0 +1,46 @@
+; This test verifies that global variables (string) are hashed based on their initial contents,
+; allowing them to be merged even if they appear different due to their names.
+; Now they become identical functions that can be merged without creating a parameter.
+
+; RUN: llc -mtriple=arm64-apple-darwin -enable-global-merge-func=true -global-merging-skip-no-params=false < %s | FileCheck %s
+
+; CHECK: _f1.Tgm
+; CHECK: _f2.Tgm
+; CHECK-NOT: _f3.Tgm
+; CHECK-NOT: _f4.Tgm
+
+; The initial contents of `.str` and `.str.1` are identical, but not with those of `.str.2` and `.str.3`.
+ at .str = private unnamed_addr constant [6 x i8] c"hello\00", align 1
+ at .str.1 = private unnamed_addr constant [6 x i8] c"hello\00", align 1
+ at .str.2 = private unnamed_addr constant [6 x i8] c"diff2\00", align 1
+ at .str.3 = private unnamed_addr constant [6 x i8] c"diff3\00", align 1
+
+declare i32 @goo(ptr noundef)
+
+define i32 @f1() {
+entry:
+ %call = tail call i32 @goo(ptr noundef nonnull @.str)
+ %add = add nsw i32 %call, 1
+ ret i32 %add
+}
+
+define i32 @f2() {
+entry:
+ %call = tail call i32 @goo(ptr noundef nonnull @.str.1)
+ %add = add nsw i32 %call, 1
+ ret i32 %add
+}
+
+define i32 @f3() {
+entry:
+ %call = tail call noundef i32 @goo(ptr noundef nonnull @.str.2)
+ %add = sub nsw i32 %call, 1
+ ret i32 %add
+}
+
+define i32 @f4() {
+entry:
+ %call = tail call noundef i32 @goo(ptr noundef nonnull @.str.3)
+ %add = sub nsw i32 %call, 1
+ ret i32 %add
+}
diff --git a/llvm/test/CodeGen/AArch64/cgdata-merge-gvar.ll b/llvm/test/CodeGen/AArch64/cgdata-merge-gvar.ll
deleted file mode 100644
index 9e867d37f65865..00000000000000
--- a/llvm/test/CodeGen/AArch64/cgdata-merge-gvar.ll
+++ /dev/null
@@ -1,91 +0,0 @@
-; This test verifies that global variables are hashed based on their initial contents,
-; allowing them to be merged even if they appear different due to their names.
-; Now they become identical functions that can be merged without creating a parameter.
-
-; RUN: rm -rf %t && split-file %s %t
-
-; RUN: llc -mtriple=arm64-apple-darwin -enable-global-merge-func=true -global-merging-skip-no-params=false < %t/string.ll | FileCheck %s
-; RUN: llc -mtriple=arm64-apple-darwin -enable-global-merge-func=true -global-merging-skip-no-params=false < %t/ns-const.ll | FileCheck %s
-; RUN: llc -mtriple=arm64-apple-darwin -enable-global-merge-func=true -global-merging-skip-no-params=false < %t/objc-ref.ll | FileCheck %s
-
-; CHECK: _f1.Tgm
-; CHECK: _f2.Tgm
-
-;--- string.ll
-
- at .str = private unnamed_addr constant [6 x i8] c"hello\00", align 1
- at .str.1 = private unnamed_addr constant [6 x i8] c"hello\00", align 1
-
-declare noundef i32 @goo(ptr noundef)
-
-define i32 @f1() {
-entry:
- %call = tail call noundef i32 @goo(ptr noundef nonnull @.str)
- %add = add nsw i32 %call, 1
- ret i32 %add
-}
-
-define i32 @f2() {
-entry:
- %call = tail call noundef i32 @goo(ptr noundef nonnull @.str.1)
- %add = add nsw i32 %call, 1
- ret i32 %add
-}
-
-;--- ns-const.ll
-
-%struct.__NSConstantString_tag = type { ptr, i32, ptr, i64 }
- at __CFConstantStringClassReference = external global [0 x i32]
- at .str.2 = private unnamed_addr constant [9 x i8] c"cfstring\00", section "__TEXT,__cstring,cstring_literals", align 1
- at _unnamed_cfstring_ = private global %struct.__NSConstantString_tag { ptr @__CFConstantStringClassReference, i32 1992, ptr @.str.2, i64 8 }, section "__DATA,__cfstring", align 8
-
- at .str.3 = private unnamed_addr constant [9 x i8] c"cfstring\00", section "__TEXT,__cstring,cstring_literals", align 1
- at _unnamed_cfstring_.2 = private global %struct.__NSConstantString_tag { ptr @__CFConstantStringClassReference, i32 1992, ptr @.str.3, i64 8 }, section "__DATA,__cfstring", align 8
-
-declare noundef i32 @hoo(ptr noundef)
-
-define i32 @f1() {
-entry:
- %call = tail call i32 @hoo(ptr noundef nonnull @_unnamed_cfstring_)
- %add = sub nsw i32 %call, 1
- ret i32 %add
-}
-
-define i32 @f2() {
-entry:
- %call = tail call i32 @hoo(ptr noundef nonnull @_unnamed_cfstring_.2)
- %add = sub nsw i32 %call, 1
- ret i32 %add
-}
-
-;--- objc-ref.ll
-
-%struct._class_t = type { ptr, ptr, ptr, ptr, ptr }
-
-@"OBJC_CLASS_$_MyClass" = external global %struct._class_t
-@"OBJC_CLASSLIST_REFERENCES_$_" = internal global ptr @"OBJC_CLASS_$_MyClass", section "__DATA,__objc_classrefs,regular,no_dead_strip", align 8
-@"OBJC_CLASSLIST_REFERENCES_$_.1" = internal global ptr @"OBJC_CLASS_$_MyClass", section "__DATA,__objc_classrefs,regular,no_dead_strip", align 8
-
- at OBJC_METH_VAR_NAME_ = private unnamed_addr constant [6 x i8] c"hello\00", section "__TEXT,__objc_methname,cstring_literals", align 1
- at OBJC_METH_VAR_NAME_.1 = private unnamed_addr constant [6 x i8] c"hello\00", section "__TEXT,__objc_methname,cstring_literals", align 1
-
- at OBJC_SELECTOR_REFERENCES_ = internal externally_initialized global ptr @OBJC_METH_VAR_NAME_, section "__DATA,__objc_selrefs,literal_pointers,no_dead_strip", align 8
- at OBJC_SELECTOR_REFERENCES_.1 = internal externally_initialized global ptr @OBJC_METH_VAR_NAME_.1, section "__DATA,__objc_selrefs,literal_pointers,no_dead_strip", align 8
-
-define i32 @f1() {
-entry:
- %0 = load ptr, ptr @"OBJC_CLASSLIST_REFERENCES_$_", align 8
- %1 = load ptr, ptr @OBJC_SELECTOR_REFERENCES_, align 8
- %call = tail call noundef i32 @objc_msgSend(ptr noundef %0, ptr noundef %1)
- ret i32 %call
-}
-
-declare ptr @objc_msgSend(ptr, ptr, ...)
-
-define i32 @f2() {
-entry:
- %0 = load ptr, ptr @"OBJC_CLASSLIST_REFERENCES_$_.1", align 8
- %1 = load ptr, ptr @OBJC_SELECTOR_REFERENCES_.1, align 8
- %call = tail call noundef i32 @objc_msgSend(ptr noundef %0, ptr noundef %1)
- ret i32 %call
-}
diff --git a/llvm/test/CodeGen/AArch64/cgdata-outline-gvar.ll b/llvm/test/CodeGen/AArch64/cgdata-outline-gvar.ll
index 447928dfa07245..63ba1d491f9c73 100644
--- a/llvm/test/CodeGen/AArch64/cgdata-outline-gvar.ll
+++ b/llvm/test/CodeGen/AArch64/cgdata-outline-gvar.ll
@@ -3,50 +3,62 @@
; RUN: split-file %s %t
-; Check if the outlined function is created locally.
-; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-generate=true -aarch64-enable-collect-loh=false -filetype=obj %t/local-two.ll -o %t_write_base
-; RUN: llvm-objdump -d %t_write_base | FileCheck %s
+; The outlined function is created locally.
+; Note that `.str.3` is commonly used in both `f1()` and `f2()`.
+; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-generate -aarch64-enable-collect-loh=false \
+; RUN: %t/local-two.ll -o - | FileCheck %s --check-prefix=WRITE
+; WRITE-LABEL: _OUTLINED_FUNCTION_{{.*}}:
+; WRITE: adrp x1, l_.str.3
+; WRITE-NEXT: add x1, x1, l_.str.3
+; WRITE-NEXT: mov w2
+; WRITE-NEXT: mov w3
+; WRITE-NEXT: mov w4
+; WRITE-NEXT: b
+
+; Create an object file and merge it into the cgdata.
+; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-generate -aarch64-enable-collect-loh=false \
+; RUN: -filetype=obj %t/local-two.ll -o %t_write_base
; RUN: llvm-cgdata --merge %t_write_base -o %t_cgdata_base
; Read the cgdata in the machine outliner for optimistically outlining in local-one.ll.
-; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-use-path=%t_cgdata_base -aarch64-enable-collect-loh=false -append-content-hash-outlined-name=false -filetype=obj %t/local-one.ll -o %t_read_base
-; RUN: llvm-objdump -d %t_read_base | FileCheck %s
-
-; The names of globals `.str` and `.str.4` are different, but their initial contents are identical.
-; The outlined function now starts with a reference to that global ("hello\00").
-; CHECK: _OUTLINED_FUNCTION_{{.*}}:
-; CHECK-NEXT: adrp x1
-; CHECK-NEXT: add x1, x1
-; CHECK-NEXT: mov w2
-; CHECK-NEXT: mov w3
-; CHECK-NEXT: mov w4
-; CHECK-NEXT: b
+; Note that the hash of `.str.5` in local-one.ll matches that of `.str.3` in an outlined tree in the cgdata.
+
+; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-use-path=%t_cgdata_base -aarch64-enable-collect-loh=false \
+; RUN: %t/local-one.ll -o - | FileCheck %s --check-prefix=READ
+
+; READ-LABEL: _OUTLINED_FUNCTION_{{.*}}:
+; READ: adrp x1, l_.str.5
+; READ-NEXT: add x1, x1, l_.str.5
+; READ-NEXT: mov w2
+; READ-NEXT: mov w3
+; READ-NEXT: mov w4
+; READ-NEXT: b
;--- local-two.ll
- at .str = private unnamed_addr constant [6 x i8] c"hello\00", align 1
@.str.1 = private unnamed_addr constant [3 x i8] c"f1\00", align 1
@.str.2 = private unnamed_addr constant [3 x i8] c"f2\00", align 1
+ at .str.3 = private unnamed_addr constant [6 x i8] c"hello\00", align 1
declare noundef i32 @goo(ptr noundef, ptr noundef, i32, i32, i32)
define i32 @f1() minsize {
entry:
- %call = tail call noundef i32 @goo(ptr noundef nonnull @.str.1, ptr noundef nonnull @.str, i32 1, i32 2, i32 3)
+ %call = tail call noundef i32 @goo(ptr noundef nonnull @.str.1, ptr noundef nonnull @.str.3, i32 1, i32 2, i32 3)
ret i32 %call
}
define i32 @f2() minsize {
entry:
- %call = tail call noundef i32 @goo(ptr noundef nonnull @.str.2, ptr noundef nonnull @.str, i32 1, i32 2, i32 3)
+ %call = tail call noundef i32 @goo(ptr noundef nonnull @.str.2, ptr noundef nonnull @.str.3, i32 1, i32 2, i32 3)
ret i32 %call
}
;--- local-one.ll
- at .str.3 = private unnamed_addr constant [3 x i8] c"f3\00", align 1
- at .str.4 = private unnamed_addr constant [6 x i8] c"hello\00", align 1
+ at .str.4 = private unnamed_addr constant [3 x i8] c"f3\00", align 1
+ at .str.5 = private unnamed_addr constant [6 x i8] c"hello\00", align 1
declare noundef i32 @goo(ptr noundef, ptr noundef, i32, i32, i32)
define i32 @f1() minsize {
entry:
- %call = tail call noundef i32 @goo(ptr noundef nonnull @.str.3, ptr noundef nonnull @.str.4, i32 1, i32 2, i32 3)
+ %call = tail call noundef i32 @goo(ptr noundef nonnull @.str.4, ptr noundef nonnull @.str.5, i32 1, i32 2, i32 3)
ret i32 %call
}
>From d8d6cb70b65097751ac43c80c931b1995a6d8142 Mon Sep 17 00:00:00 2001
From: Kyungwoo Lee <kyulee at meta.com>
Date: Mon, 2 Dec 2024 22:34:12 -0800
Subject: [PATCH 4/5] Address comments from nocchijiang
---
llvm/lib/CodeGen/MachineStableHash.cpp | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/CodeGen/MachineStableHash.cpp b/llvm/lib/CodeGen/MachineStableHash.cpp
index 09a81cb318ecb7..5ab589acee4135 100644
--- a/llvm/lib/CodeGen/MachineStableHash.cpp
+++ b/llvm/lib/CodeGen/MachineStableHash.cpp
@@ -95,15 +95,16 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {
return 0;
case MachineOperand::MO_GlobalAddress: {
const GlobalValue *GV = MO.getGlobal();
- if (!GV->hasName()) {
- ++StableHashBailingGlobalAddress;
- return 0;
- }
stable_hash GVHash = 0;
if (auto *GVar = dyn_cast<GlobalVariable>(GV))
GVHash = StructuralHash(*GVar);
- if (!GVHash)
+ if (!GVHash) {
+ if (!GV->hasName()) {
+ ++StableHashBailingGlobalAddress;
+ return 0;
+ }
GVHash = stable_hash_name(GV->getName());
+ }
return stable_hash_combine(MO.getType(), MO.getTargetFlags(), GVHash,
MO.getOffset());
>From c00f96376a35a7784a1c3a7b3443e2808dec79ae Mon Sep 17 00:00:00 2001
From: Kyungwoo Lee <kyulee at meta.com>
Date: Tue, 3 Dec 2024 09:39:14 -0800
Subject: [PATCH 5/5] remove spaces
---
llvm/test/CodeGen/AArch64/cgdata-merge-gvar-objc.ll | 2 --
1 file changed, 2 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-objc.ll b/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-objc.ll
index e0d28721f2afb7..0073114941501a 100644
--- a/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-objc.ll
+++ b/llvm/test/CodeGen/AArch64/cgdata-merge-gvar-objc.ll
@@ -1,4 +1,3 @@
-
; This test verifies that global variables (objc metadata) are hashed based on their initial contents,
; allowing them to be merged even if they appear different due to their names.
; Now they become identical functions that can be merged without creating a parameter
@@ -8,7 +7,6 @@
; CHECK: _f1.Tgm
; CHECK: _f2.Tgm
-
%struct._class_t = type { ptr, ptr, ptr, ptr, ptr }
@"OBJC_CLASS_$_MyClass" = external global %struct._class_t
More information about the llvm-commits
mailing list