[llvm] [BOLT] Store FileSymRefs in a multimap (PR #98992)

via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 16 18:14:29 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-bolt

Author: Amir Ayupov (aaupov)

<details>
<summary>Changes</summary>

With aggressive ICF, it's possible to have different local symbols
(under different FILE symbols) to be mapped to the same address.

FileSymRefs only keeps a single SymbolRef per address, which prevents
fragment matching from finding the correct symbol to perform parent
function lookup.

Work around this issue by switching FileSymRefs to a multimap. In
future, uses of FileSymRefs can be replaced with SortedSymbols which
keeps essentially the same information.

Test Plan: added ambiguous_fragment.test


---
Full diff: https://github.com/llvm/llvm-project/pull/98992.diff


5 Files Affected:

- (modified) bolt/include/bolt/Rewrite/RewriteInstance.h (+1-1) 
- (modified) bolt/lib/Rewrite/RewriteInstance.cpp (+9-3) 
- (added) bolt/test/X86/Inputs/ambiguous_fragment.s (+54) 
- (added) bolt/test/X86/Inputs/ambiguous_fragment.script (+6) 
- (added) bolt/test/X86/ambiguous_fragment.test (+28) 


``````````diff
diff --git a/bolt/include/bolt/Rewrite/RewriteInstance.h b/bolt/include/bolt/Rewrite/RewriteInstance.h
index af1d9b4b70a3d..16a82d5687de9 100644
--- a/bolt/include/bolt/Rewrite/RewriteInstance.h
+++ b/bolt/include/bolt/Rewrite/RewriteInstance.h
@@ -490,7 +490,7 @@ class RewriteInstance {
   std::unordered_map<const MCSymbol *, uint32_t> SymbolIndex;
 
   /// Store all non-zero symbols in this map for a quick address lookup.
-  std::map<uint64_t, llvm::object::SymbolRef> FileSymRefs;
+  std::multimap<uint64_t, llvm::object::SymbolRef> FileSymRefs;
 
   /// FILE symbols used for disambiguating split function parents.
   std::vector<ELFSymbolRef> FileSymbols;
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index ded2f577237fe..6815bae47a007 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -886,7 +886,7 @@ void RewriteInstance::discoverFileObjects() {
     if (SymName == "__hot_start" || SymName == "__hot_end")
       continue;
 
-    FileSymRefs[SymbolAddress] = Symbol;
+    FileSymRefs.emplace(SymbolAddress, Symbol);
 
     // Skip section symbols that will be registered by disassemblePLT().
     if (SymbolType == SymbolRef::ST_Debug) {
@@ -1052,7 +1052,9 @@ void RewriteInstance::discoverFileObjects() {
 
         // Remove the symbol from FileSymRefs so that we can skip it from
         // in the future.
-        auto SI = FileSymRefs.find(SymbolAddress);
+        auto SI = llvm::find_if(
+            llvm::make_range(FileSymRefs.equal_range(SymbolAddress)),
+            [&](auto SymIt) { return SymIt.second == Symbol; });
         assert(SI != FileSymRefs.end() && "symbol expected to be present");
         assert(SI->second == Symbol && "wrong symbol found");
         FileSymRefs.erase(SI);
@@ -1433,7 +1435,11 @@ void RewriteInstance::registerFragments() {
     const uint64_t Address = BF->getAddress();
 
     // Get fragment's own symbol
-    const auto SymIt = FileSymRefs.find(Address);
+    const auto SymIt = llvm::find_if(
+        llvm::make_range(FileSymRefs.equal_range(Address)), [&](auto SI) {
+          StringRef Name = cantFail(SI.second.getName());
+          return Name.contains(ParentName);
+        });
     if (SymIt == FileSymRefs.end()) {
       BC->errs()
           << "BOLT-ERROR: symbol lookup failed for function at address 0x"
diff --git a/bolt/test/X86/Inputs/ambiguous_fragment.s b/bolt/test/X86/Inputs/ambiguous_fragment.s
new file mode 100644
index 0000000000000..05346ffd7c344
--- /dev/null
+++ b/bolt/test/X86/Inputs/ambiguous_fragment.s
@@ -0,0 +1,54 @@
+#--- file1
+.file "file1.cpp"
+.section .text.cold
+.type __func.cold.0, @function
+__func.cold.0:
+  ud2
+  .size __func.cold.0, .-__func.cold.0
+.section .text
+.type __func, @function
+__func:
+  ud2
+  .size __func, .-__func
+
+#--- file2
+.file "file2.cpp"
+.section .text.cold
+.type __func.cold.0, @function
+__func.cold.0:
+  ud2
+  .size __func.cold.0, .-__func.cold.0
+.section .text
+.type __func, @function
+__func:
+  ud2
+  .size __func, .-__func
+
+#--- file3
+.file "file3.cpp"
+.section .text.cold
+.type __func.cold.0, @function
+__func.cold.0:
+  ud2
+  .size __func.cold.0, .-__func.cold.0
+.section .text
+.type __func, @function
+__func:
+  ud2
+  .size __func, .-__func
+
+#--- file4
+.file "file4.cpp"
+.section .text.cold
+.type __func.cold.0, @function
+__func.cold.0:
+  ud2
+  .size __func.cold.0, .-__func.cold.0
+.section .text
+.type __func, @function
+__func:
+  ud2
+  .size __func, .-__func
+
+#--- file5
+.file "bolt-pseudo.o"
diff --git a/bolt/test/X86/Inputs/ambiguous_fragment.script b/bolt/test/X86/Inputs/ambiguous_fragment.script
new file mode 100644
index 0000000000000..00129b8887641
--- /dev/null
+++ b/bolt/test/X86/Inputs/ambiguous_fragment.script
@@ -0,0 +1,6 @@
+SECTIONS {
+  . = 0x10000;
+  .text : { *(.text) }
+  . = 0x20000;
+  .text.cold : { *(.text.cold) }
+}
diff --git a/bolt/test/X86/ambiguous_fragment.test b/bolt/test/X86/ambiguous_fragment.test
new file mode 100644
index 0000000000000..e8170584bc697
--- /dev/null
+++ b/bolt/test/X86/ambiguous_fragment.test
@@ -0,0 +1,28 @@
+## This reproduces a bug with misidentification of a parent fragment.
+
+RUN: split-file %p/Inputs/ambiguous_fragment.s %t
+
+RUN: llvm-mc --filetype=obj --triple x86_64-unknown-unknown %t/file1 -o %t1.o
+RUN: llvm-mc --filetype=obj --triple x86_64-unknown-unknown %t/file2 -o %t2.o
+RUN: llvm-mc --filetype=obj --triple x86_64-unknown-unknown %t/file3 -o %t3.o
+RUN: llvm-mc --filetype=obj --triple x86_64-unknown-unknown %t/file4 -o %t4.o
+RUN: llvm-mc --filetype=obj --triple x86_64-unknown-unknown %t/file5 -o %t5.o
+
+RUN: ld.lld %t1.o %t2.o %t3.o %t4.o %t5.o -o %t.exe \
+RUN:   --script %p/Inputs/ambiguous_fragment.script
+
+RUN: llvm-objcopy %t.exe %t.exe2 \
+RUN:   --add-symbol=_Zfunc.cold.0=.text.cold:0x4,local,function \
+RUN:   --add-symbol=_Zfunc=.text:0xc,function
+
+RUN: llvm-objdump --syms %t.exe2 | FileCheck %s --check-prefix=CHECK-SYMS
+
+RUN: link_fdata %s %t.exe2 %t.preagg PREAGG
+RUN: perf2bolt -v=1 %t.exe2 -p %t.preagg --pa -o %t.fdata -w %t.yaml | FileCheck %s
+
+# PREAGG: B X:0 #__func# 1 0
+
+CHECK-SYMS: 0000000000020004 {{.*}} __func.cold.0
+CHECK-SYMS: 0000000000020004 {{.*}} _Zfunc.cold.0
+
+CHECK: BOLT-INFO

``````````

</details>


https://github.com/llvm/llvm-project/pull/98992


More information about the llvm-commits mailing list