[llvm] [BOLT] Store FileSymRefs in a multimap (PR #98992)

Amir Ayupov via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 16 21:52:13 PDT 2024


https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/98992

>From c893e49a13f59af26ad027b944baa32fb3213085 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Mon, 15 Jul 2024 22:16:33 -0700
Subject: [PATCH 1/4] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?=
 =?UTF-8?q?itial=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.4
---
 bolt/include/bolt/Rewrite/RewriteInstance.h |  2 +-
 bolt/lib/Rewrite/RewriteInstance.cpp        | 13 +++++++++++--
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/bolt/include/bolt/Rewrite/RewriteInstance.h b/bolt/include/bolt/Rewrite/RewriteInstance.h
index af1d9b4b70a3d..16a82d5687de9 100644
--- a/bolt/include/bolt/Rewrite/RewriteInstance.h
+++ b/bolt/include/bolt/Rewrite/RewriteInstance.h
@@ -490,7 +490,7 @@ class RewriteInstance {
   std::unordered_map<const MCSymbol *, uint32_t> SymbolIndex;
 
   /// Store all non-zero symbols in this map for a quick address lookup.
-  std::map<uint64_t, llvm::object::SymbolRef> FileSymRefs;
+  std::multimap<uint64_t, llvm::object::SymbolRef> FileSymRefs;
 
   /// FILE symbols used for disambiguating split function parents.
   std::vector<ELFSymbolRef> FileSymbols;
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index ded2f577237fe..205be34c2336a 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -886,7 +886,7 @@ void RewriteInstance::discoverFileObjects() {
     if (SymName == "__hot_start" || SymName == "__hot_end")
       continue;
 
-    FileSymRefs[SymbolAddress] = Symbol;
+    FileSymRefs.emplace(SymbolAddress, Symbol);
 
     // Skip section symbols that will be registered by disassemblePLT().
     if (SymbolType == SymbolRef::ST_Debug) {
@@ -1433,7 +1433,16 @@ void RewriteInstance::registerFragments() {
     const uint64_t Address = BF->getAddress();
 
     // Get fragment's own symbol
-    const auto SymIt = FileSymRefs.find(Address);
+    auto SymIt = FileSymRefs.end();
+    auto EqualAddressSymRange = FileSymRefs.equal_range(Address);
+    while (EqualAddressSymRange.first != EqualAddressSymRange.second) {
+      auto EqualAddressSymIt = EqualAddressSymRange.first;
+      StringRef Name = cantFail(EqualAddressSymIt->second.getName());
+      if (Name.contains(ParentName)) {
+        SymIt = EqualAddressSymIt;
+        break;
+      }
+    }
     if (SymIt == FileSymRefs.end()) {
       BC->errs()
           << "BOLT-ERROR: symbol lookup failed for function at address 0x"

>From bab720dea72f5683fdb8e893ed60d336d5338895 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Tue, 16 Jul 2024 18:09:27 -0700
Subject: [PATCH 2/4] jf submit -n

Created using spr 1.3.4
---
 bolt/lib/Rewrite/RewriteInstance.cpp          | 19 +++----
 bolt/test/X86/Inputs/ambiguous_fragment.s     | 54 +++++++++++++++++++
 .../test/X86/Inputs/ambiguous_fragment.script |  6 +++
 bolt/test/X86/ambiguous_fragment.test         | 28 ++++++++++
 4 files changed, 96 insertions(+), 11 deletions(-)
 create mode 100644 bolt/test/X86/Inputs/ambiguous_fragment.s
 create mode 100644 bolt/test/X86/Inputs/ambiguous_fragment.script
 create mode 100644 bolt/test/X86/ambiguous_fragment.test

diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 205be34c2336a..6815bae47a007 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -1052,7 +1052,9 @@ void RewriteInstance::discoverFileObjects() {
 
         // Remove the symbol from FileSymRefs so that we can skip it from
         // in the future.
-        auto SI = FileSymRefs.find(SymbolAddress);
+        auto SI = llvm::find_if(
+            llvm::make_range(FileSymRefs.equal_range(SymbolAddress)),
+            [&](auto SymIt) { return SymIt.second == Symbol; });
         assert(SI != FileSymRefs.end() && "symbol expected to be present");
         assert(SI->second == Symbol && "wrong symbol found");
         FileSymRefs.erase(SI);
@@ -1433,16 +1435,11 @@ void RewriteInstance::registerFragments() {
     const uint64_t Address = BF->getAddress();
 
     // Get fragment's own symbol
-    auto SymIt = FileSymRefs.end();
-    auto EqualAddressSymRange = FileSymRefs.equal_range(Address);
-    while (EqualAddressSymRange.first != EqualAddressSymRange.second) {
-      auto EqualAddressSymIt = EqualAddressSymRange.first;
-      StringRef Name = cantFail(EqualAddressSymIt->second.getName());
-      if (Name.contains(ParentName)) {
-        SymIt = EqualAddressSymIt;
-        break;
-      }
-    }
+    const auto SymIt = llvm::find_if(
+        llvm::make_range(FileSymRefs.equal_range(Address)), [&](auto SI) {
+          StringRef Name = cantFail(SI.second.getName());
+          return Name.contains(ParentName);
+        });
     if (SymIt == FileSymRefs.end()) {
       BC->errs()
           << "BOLT-ERROR: symbol lookup failed for function at address 0x"
diff --git a/bolt/test/X86/Inputs/ambiguous_fragment.s b/bolt/test/X86/Inputs/ambiguous_fragment.s
new file mode 100644
index 0000000000000..05346ffd7c344
--- /dev/null
+++ b/bolt/test/X86/Inputs/ambiguous_fragment.s
@@ -0,0 +1,54 @@
+#--- file1
+.file "file1.cpp"
+.section .text.cold
+.type __func.cold.0, @function
+__func.cold.0:
+  ud2
+  .size __func.cold.0, .-__func.cold.0
+.section .text
+.type __func, @function
+__func:
+  ud2
+  .size __func, .-__func
+
+#--- file2
+.file "file2.cpp"
+.section .text.cold
+.type __func.cold.0, @function
+__func.cold.0:
+  ud2
+  .size __func.cold.0, .-__func.cold.0
+.section .text
+.type __func, @function
+__func:
+  ud2
+  .size __func, .-__func
+
+#--- file3
+.file "file3.cpp"
+.section .text.cold
+.type __func.cold.0, @function
+__func.cold.0:
+  ud2
+  .size __func.cold.0, .-__func.cold.0
+.section .text
+.type __func, @function
+__func:
+  ud2
+  .size __func, .-__func
+
+#--- file4
+.file "file4.cpp"
+.section .text.cold
+.type __func.cold.0, @function
+__func.cold.0:
+  ud2
+  .size __func.cold.0, .-__func.cold.0
+.section .text
+.type __func, @function
+__func:
+  ud2
+  .size __func, .-__func
+
+#--- file5
+.file "bolt-pseudo.o"
diff --git a/bolt/test/X86/Inputs/ambiguous_fragment.script b/bolt/test/X86/Inputs/ambiguous_fragment.script
new file mode 100644
index 0000000000000..00129b8887641
--- /dev/null
+++ b/bolt/test/X86/Inputs/ambiguous_fragment.script
@@ -0,0 +1,6 @@
+SECTIONS {
+  . = 0x10000;
+  .text : { *(.text) }
+  . = 0x20000;
+  .text.cold : { *(.text.cold) }
+}
diff --git a/bolt/test/X86/ambiguous_fragment.test b/bolt/test/X86/ambiguous_fragment.test
new file mode 100644
index 0000000000000..e8170584bc697
--- /dev/null
+++ b/bolt/test/X86/ambiguous_fragment.test
@@ -0,0 +1,28 @@
+## This reproduces a bug with misidentification of a parent fragment.
+
+RUN: split-file %p/Inputs/ambiguous_fragment.s %t
+
+RUN: llvm-mc --filetype=obj --triple x86_64-unknown-unknown %t/file1 -o %t1.o
+RUN: llvm-mc --filetype=obj --triple x86_64-unknown-unknown %t/file2 -o %t2.o
+RUN: llvm-mc --filetype=obj --triple x86_64-unknown-unknown %t/file3 -o %t3.o
+RUN: llvm-mc --filetype=obj --triple x86_64-unknown-unknown %t/file4 -o %t4.o
+RUN: llvm-mc --filetype=obj --triple x86_64-unknown-unknown %t/file5 -o %t5.o
+
+RUN: ld.lld %t1.o %t2.o %t3.o %t4.o %t5.o -o %t.exe \
+RUN:   --script %p/Inputs/ambiguous_fragment.script
+
+RUN: llvm-objcopy %t.exe %t.exe2 \
+RUN:   --add-symbol=_Zfunc.cold.0=.text.cold:0x4,local,function \
+RUN:   --add-symbol=_Zfunc=.text:0xc,function
+
+RUN: llvm-objdump --syms %t.exe2 | FileCheck %s --check-prefix=CHECK-SYMS
+
+RUN: link_fdata %s %t.exe2 %t.preagg PREAGG
+RUN: perf2bolt -v=1 %t.exe2 -p %t.preagg --pa -o %t.fdata -w %t.yaml | FileCheck %s
+
+# PREAGG: B X:0 #__func# 1 0
+
+CHECK-SYMS: 0000000000020004 {{.*}} __func.cold.0
+CHECK-SYMS: 0000000000020004 {{.*}} _Zfunc.cold.0
+
+CHECK: BOLT-INFO

>From 03a66981b9652cbc8ac89d6d007c4b1d290d5050 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Tue, 16 Jul 2024 18:18:00 -0700
Subject: [PATCH 3/4] update test

Created using spr 1.3.4
---
 bolt/test/X86/ambiguous_fragment.test | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/bolt/test/X86/ambiguous_fragment.test b/bolt/test/X86/ambiguous_fragment.test
index e8170584bc697..e7d32c0a680a3 100644
--- a/bolt/test/X86/ambiguous_fragment.test
+++ b/bolt/test/X86/ambiguous_fragment.test
@@ -25,4 +25,9 @@ RUN: perf2bolt -v=1 %t.exe2 -p %t.preagg --pa -o %t.fdata -w %t.yaml | FileCheck
 CHECK-SYMS: 0000000000020004 {{.*}} __func.cold.0
 CHECK-SYMS: 0000000000020004 {{.*}} _Zfunc.cold.0
 
-CHECK: BOLT-INFO
+CHECK-NOT:  BOLT-ERROR: parent function not found for __func.cold.0
+CHECK:      BOLT-INFO: marking __func.cold.0/3(*4) as a fragment of __func/4(*3)
+CHECK-NEXT: BOLT-INFO: marking __func.cold.0/1(*2) as a fragment of __func/1(*2)
+CHECK-NEXT: BOLT-INFO: marking __func.cold.0/2(*2) as a fragment of __func/2(*2)
+CHECK-NEXT: BOLT-INFO: marking __func.cold.0/3(*4) as a fragment of __func/3(*2)
+CHECK-NEXT: BOLT-INFO: marking __func.cold.0/4(*2) as a fragment of __func/4(*3)

>From a78792f052adc776b9fbbeb39971101eb8646bd2 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Tue, 16 Jul 2024 21:52:02 -0700
Subject: [PATCH 4/4] Clear FileSymRefs at the end of discoverFileObjects

Created using spr 1.3.4
---
 bolt/lib/Rewrite/RewriteInstance.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 6815bae47a007..32562ccb6b345 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -1262,6 +1262,7 @@ void RewriteInstance::discoverFileObjects() {
 
   registerFragments();
   FileSymbols.clear();
+  FileSymRefs.clear();
 
   discoverBOLTReserved();
 }



More information about the llvm-commits mailing list