[llvm] [BOLT] Store FileSymRefs in a multimap (PR #98992)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 16 18:14:29 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-bolt
Author: Amir Ayupov (aaupov)
<details>
<summary>Changes</summary>
With aggressive ICF, it's possible to have different local symbols
(under different FILE symbols) to be mapped to the same address.
FileSymRefs only keeps a single SymbolRef per address, which prevents
fragment matching from finding the correct symbol to perform parent
function lookup.
Work around this issue by switching FileSymRefs to a multimap. In
future, uses of FileSymRefs can be replaced with SortedSymbols which
keeps essentially the same information.
Test Plan: added ambiguous_fragment.test
---
Full diff: https://github.com/llvm/llvm-project/pull/98992.diff
5 Files Affected:
- (modified) bolt/include/bolt/Rewrite/RewriteInstance.h (+1-1)
- (modified) bolt/lib/Rewrite/RewriteInstance.cpp (+9-3)
- (added) bolt/test/X86/Inputs/ambiguous_fragment.s (+54)
- (added) bolt/test/X86/Inputs/ambiguous_fragment.script (+6)
- (added) bolt/test/X86/ambiguous_fragment.test (+28)
``````````diff
diff --git a/bolt/include/bolt/Rewrite/RewriteInstance.h b/bolt/include/bolt/Rewrite/RewriteInstance.h
index af1d9b4b70a3d..16a82d5687de9 100644
--- a/bolt/include/bolt/Rewrite/RewriteInstance.h
+++ b/bolt/include/bolt/Rewrite/RewriteInstance.h
@@ -490,7 +490,7 @@ class RewriteInstance {
std::unordered_map<const MCSymbol *, uint32_t> SymbolIndex;
/// Store all non-zero symbols in this map for a quick address lookup.
- std::map<uint64_t, llvm::object::SymbolRef> FileSymRefs;
+ std::multimap<uint64_t, llvm::object::SymbolRef> FileSymRefs;
/// FILE symbols used for disambiguating split function parents.
std::vector<ELFSymbolRef> FileSymbols;
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index ded2f577237fe..6815bae47a007 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -886,7 +886,7 @@ void RewriteInstance::discoverFileObjects() {
if (SymName == "__hot_start" || SymName == "__hot_end")
continue;
- FileSymRefs[SymbolAddress] = Symbol;
+ FileSymRefs.emplace(SymbolAddress, Symbol);
// Skip section symbols that will be registered by disassemblePLT().
if (SymbolType == SymbolRef::ST_Debug) {
@@ -1052,7 +1052,9 @@ void RewriteInstance::discoverFileObjects() {
// Remove the symbol from FileSymRefs so that we can skip it from
// in the future.
- auto SI = FileSymRefs.find(SymbolAddress);
+ auto SI = llvm::find_if(
+ llvm::make_range(FileSymRefs.equal_range(SymbolAddress)),
+ [&](auto SymIt) { return SymIt.second == Symbol; });
assert(SI != FileSymRefs.end() && "symbol expected to be present");
assert(SI->second == Symbol && "wrong symbol found");
FileSymRefs.erase(SI);
@@ -1433,7 +1435,11 @@ void RewriteInstance::registerFragments() {
const uint64_t Address = BF->getAddress();
// Get fragment's own symbol
- const auto SymIt = FileSymRefs.find(Address);
+ const auto SymIt = llvm::find_if(
+ llvm::make_range(FileSymRefs.equal_range(Address)), [&](auto SI) {
+ StringRef Name = cantFail(SI.second.getName());
+ return Name.contains(ParentName);
+ });
if (SymIt == FileSymRefs.end()) {
BC->errs()
<< "BOLT-ERROR: symbol lookup failed for function at address 0x"
diff --git a/bolt/test/X86/Inputs/ambiguous_fragment.s b/bolt/test/X86/Inputs/ambiguous_fragment.s
new file mode 100644
index 0000000000000..05346ffd7c344
--- /dev/null
+++ b/bolt/test/X86/Inputs/ambiguous_fragment.s
@@ -0,0 +1,54 @@
+#--- file1
+.file "file1.cpp"
+.section .text.cold
+.type __func.cold.0, @function
+__func.cold.0:
+ ud2
+ .size __func.cold.0, .-__func.cold.0
+.section .text
+.type __func, @function
+__func:
+ ud2
+ .size __func, .-__func
+
+#--- file2
+.file "file2.cpp"
+.section .text.cold
+.type __func.cold.0, @function
+__func.cold.0:
+ ud2
+ .size __func.cold.0, .-__func.cold.0
+.section .text
+.type __func, @function
+__func:
+ ud2
+ .size __func, .-__func
+
+#--- file3
+.file "file3.cpp"
+.section .text.cold
+.type __func.cold.0, @function
+__func.cold.0:
+ ud2
+ .size __func.cold.0, .-__func.cold.0
+.section .text
+.type __func, @function
+__func:
+ ud2
+ .size __func, .-__func
+
+#--- file4
+.file "file4.cpp"
+.section .text.cold
+.type __func.cold.0, @function
+__func.cold.0:
+ ud2
+ .size __func.cold.0, .-__func.cold.0
+.section .text
+.type __func, @function
+__func:
+ ud2
+ .size __func, .-__func
+
+#--- file5
+.file "bolt-pseudo.o"
diff --git a/bolt/test/X86/Inputs/ambiguous_fragment.script b/bolt/test/X86/Inputs/ambiguous_fragment.script
new file mode 100644
index 0000000000000..00129b8887641
--- /dev/null
+++ b/bolt/test/X86/Inputs/ambiguous_fragment.script
@@ -0,0 +1,6 @@
+SECTIONS {
+ . = 0x10000;
+ .text : { *(.text) }
+ . = 0x20000;
+ .text.cold : { *(.text.cold) }
+}
diff --git a/bolt/test/X86/ambiguous_fragment.test b/bolt/test/X86/ambiguous_fragment.test
new file mode 100644
index 0000000000000..e8170584bc697
--- /dev/null
+++ b/bolt/test/X86/ambiguous_fragment.test
@@ -0,0 +1,28 @@
+## This reproduces a bug with misidentification of a parent fragment.
+
+RUN: split-file %p/Inputs/ambiguous_fragment.s %t
+
+RUN: llvm-mc --filetype=obj --triple x86_64-unknown-unknown %t/file1 -o %t1.o
+RUN: llvm-mc --filetype=obj --triple x86_64-unknown-unknown %t/file2 -o %t2.o
+RUN: llvm-mc --filetype=obj --triple x86_64-unknown-unknown %t/file3 -o %t3.o
+RUN: llvm-mc --filetype=obj --triple x86_64-unknown-unknown %t/file4 -o %t4.o
+RUN: llvm-mc --filetype=obj --triple x86_64-unknown-unknown %t/file5 -o %t5.o
+
+RUN: ld.lld %t1.o %t2.o %t3.o %t4.o %t5.o -o %t.exe \
+RUN: --script %p/Inputs/ambiguous_fragment.script
+
+RUN: llvm-objcopy %t.exe %t.exe2 \
+RUN: --add-symbol=_Zfunc.cold.0=.text.cold:0x4,local,function \
+RUN: --add-symbol=_Zfunc=.text:0xc,function
+
+RUN: llvm-objdump --syms %t.exe2 | FileCheck %s --check-prefix=CHECK-SYMS
+
+RUN: link_fdata %s %t.exe2 %t.preagg PREAGG
+RUN: perf2bolt -v=1 %t.exe2 -p %t.preagg --pa -o %t.fdata -w %t.yaml | FileCheck %s
+
+# PREAGG: B X:0 #__func# 1 0
+
+CHECK-SYMS: 0000000000020004 {{.*}} __func.cold.0
+CHECK-SYMS: 0000000000020004 {{.*}} _Zfunc.cold.0
+
+CHECK: BOLT-INFO
``````````
</details>
https://github.com/llvm/llvm-project/pull/98992
More information about the llvm-commits
mailing list