[llvm] [BOLT] Use symbol table info in registerFragment (PR #89648)

Amir Ayupov via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 22 18:39:18 PDT 2024


https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/89648

>From 4245b120999b4a1c23a1bfcdb21eb72ec842a897 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Sun, 21 Apr 2024 01:15:30 -0700
Subject: [PATCH 1/8] [BOLT] Use symbol table info in registerFragment

Fragment matching relies on symbol names to identify and register split
function fragments. However, as split fragments are local symbols,
name aliasing is possible. For such cases, use symbol table to resolve
ambiguities:
- find FILE symbol the fragment belongs to,
- iterate over local symbols belonging to the same file and check if
  parent symbol is present,
- if a local parent symbol is found, use it, otherwise use global
  parent symbol.

This requires the presence of FILE symbols in the input binary. As BOLT
requires non-stripped binary, this is a reasonable assumption. Note
however that `strip -g` removes FILE symbols by default, but
`--keep-file-symbols` can be used to preserve these.

We opted to resolve ambiguous cases one by one because there are
typically very few such cases and symbol table parsing is relatively
expensive:

python3.8.6:
Total symbols: 17522
Function symbols: 7877 (45% total)
Fragments: 2551 (15% total)
Fragments with ambiguous parent: 31 (0.1% total)

Large binary:
Total symbols: 3032180
Function symbols: 1637209 (54% total)
Fragments: 43077 (0.1% total)
Fragments with ambiguous parent: 180 (0.006% total)

Test Plan:
Updated X86/fragment-lite.s
---
 bolt/include/bolt/Rewrite/RewriteInstance.h |   3 +
 bolt/include/bolt/Utils/NameResolver.h      |   7 ++
 bolt/lib/Rewrite/RewriteInstance.cpp        | 105 +++++++++++++++-----
 bolt/test/X86/fragment-lite.s               |  54 ++++++++--
 4 files changed, 135 insertions(+), 34 deletions(-)

diff --git a/bolt/include/bolt/Rewrite/RewriteInstance.h b/bolt/include/bolt/Rewrite/RewriteInstance.h
index af832b4c7c84cf..d35c85567c33e3 100644
--- a/bolt/include/bolt/Rewrite/RewriteInstance.h
+++ b/bolt/include/bolt/Rewrite/RewriteInstance.h
@@ -490,6 +490,9 @@ class RewriteInstance {
   /// Store all non-zero symbols in this map for a quick address lookup.
   std::map<uint64_t, llvm::object::SymbolRef> FileSymRefs;
 
+  /// FILE symbols.
+  std::vector<object::DataRefImpl> FileSymbols;
+
   std::unique_ptr<DWARFRewriter> DebugInfoRewriter;
 
   std::unique_ptr<BoltAddressTranslation> BAT;
diff --git a/bolt/include/bolt/Utils/NameResolver.h b/bolt/include/bolt/Utils/NameResolver.h
index 2e3ac20a532d76..b6312642e72c8c 100644
--- a/bolt/include/bolt/Utils/NameResolver.h
+++ b/bolt/include/bolt/Utils/NameResolver.h
@@ -28,6 +28,13 @@ class NameResolver {
   static constexpr char Sep = '/';
 
 public:
+  /// Return the number of duplicates registered for a given \p Name.
+  uint64_t getNumDuplicates(StringRef Name) {
+    if (Counters.contains(Name))
+      return Counters.at(Name);
+    return 0;
+  }
+
   /// Return unique version of the \p Name in the form "Name<Sep><Number>".
   std::string uniquify(StringRef Name) {
     const uint64_t ID = ++Counters[Name];
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 4e0096cf988aed..dc2105d090b6a9 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -840,6 +840,7 @@ void RewriteInstance::discoverFileObjects() {
       continue;
 
     if (cantFail(Symbol.getType()) == SymbolRef::ST_File) {
+      FileSymbols.emplace_back(Symbol.getRawDataRefImpl());
       StringRef Name =
           cantFail(std::move(NameOrError), "cannot get symbol name for file");
       // Ignore Clang LTO artificial FILE symbol as it is not always generated,
@@ -1417,50 +1418,106 @@ void RewriteInstance::registerFragments() {
   if (!BC->HasSplitFunctions)
     return;
 
+  std::vector<std::pair<StringRef, BinaryFunction *>> AmbiguousFragments;
   for (auto &BFI : BC->getBinaryFunctions()) {
     BinaryFunction &Function = BFI.second;
     if (!Function.isFragment())
       continue;
-    unsigned ParentsFound = 0;
-    for (StringRef Name : Function.getNames()) {
+    for (MCSymbol *Symbol : Function.Symbols) {
+      StringRef Name = Symbol->getName();
       StringRef BaseName, Suffix;
       std::tie(BaseName, Suffix) = Name.split('/');
       const size_t ColdSuffixPos = BaseName.find(".cold");
       if (ColdSuffixPos == StringRef::npos)
         continue;
-      // For cold function with local (foo.cold/1) symbol, prefer a parent with
-      // local symbol as well (foo/1) over global symbol (foo).
-      std::string ParentName = BaseName.substr(0, ColdSuffixPos).str();
+      StringRef ParentName = BaseName.substr(0, ColdSuffixPos);
       const BinaryData *BD = BC->getBinaryDataByName(ParentName);
-      if (Suffix != "") {
-        ParentName.append(Twine("/", Suffix).str());
-        const BinaryData *BDLocal = BC->getBinaryDataByName(ParentName);
-        if (BDLocal || !BD)
-          BD = BDLocal;
-      }
-      if (!BD) {
-        if (opts::Verbosity >= 1)
-          BC->outs() << "BOLT-INFO: parent function not found for " << Name
-                     << "\n";
+      uint64_t NumPossibleLocalParents = NR.getNumDuplicates(ParentName);
+      // The most common case: single local parent fragment.
+      if (!BD && NumPossibleLocalParents == 1) {
+        BD = BC->getBinaryDataByName((Twine(ParentName) + "/1").str());
+      } else if (BD && (!NumPossibleLocalParents || Suffix.empty())) {
+        // Global parent and either no local candidates (second most common), or
+        // the fragment is global as well (uncommon).
+      } else {
+        // Any other case: need to disambiguate using FILE symbols.
+        AmbiguousFragments.emplace_back(ParentName, &Function);
         continue;
       }
+      assert(BD);
       const uint64_t Address = BD->getAddress();
       BinaryFunction *BF = BC->getBinaryFunctionAtAddress(Address);
       if (!BF) {
-        if (opts::Verbosity >= 1)
-          BC->outs() << formatv(
-              "BOLT-INFO: parent function not found at {0:x}\n", Address);
-        continue;
+        BC->errs() << "BOLT-ERROR: parent function not found for " << Function
+                   << '\n';
+        exit(1);
       }
       BC->registerFragment(Function, *BF);
-      ++ParentsFound;
     }
-    if (!ParentsFound) {
-      BC->errs() << "BOLT-ERROR: parent function not found for " << Function
-                 << '\n';
-      exit(1);
+  }
+
+  if (AmbiguousFragments.empty())
+    return;
+
+  if (!BC->hasSymbolsWithFileName()) {
+    BC->errs() << "BOLT-ERROR: input file has split functions but does not "
+                  "have FILE symbols. If the binary was stripped, preserve "
+                  "FILE symbols with --keep-file-symbols strip option.";
+    exit(1);
+  }
+
+  // The first global symbol is identified by the symbol table sh_info value.
+  // Used as local symbol search stopping point.
+  uint32_t FirstGlobal{0};
+  auto *ELF64LEFile = cast<ELF64LEObjectFile>(InputFile);
+  const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile();
+  for (const auto &Sec : cantFail(Obj.sections())) {
+    if (Sec.sh_type == ELF::SHT_SYMTAB) {
+      FirstGlobal = Sec.sh_info;
+      break;
     }
   }
+
+  for (auto &[ParentName, BF]: AmbiguousFragments) {
+    const uint64_t Address = BF->getAddress();
+
+    // Get fragment's own symbol
+    const auto SymIt = FileSymRefs.find(Address);
+    assert(SymIt != FileSymRefs.end());
+
+    // Find containing FILE symbol
+    auto FSI =
+        llvm::upper_bound(FileSymbols, SymIt->second.getRawDataRefImpl(),
+                          [](const DataRefImpl &A, const DataRefImpl &B) {
+                            return A.d.b < B.d.b;
+                          });
+    assert(FSI != FileSymbols.begin());
+
+    uint32_t StopSymbolNum = FirstGlobal;
+    if (FSI != FileSymbols.end())
+      StopSymbolNum = FSI->d.b;
+
+    uint64_t ParentAddress{0};
+    // Iterate over local file symbols and check symbol names to match parent.
+    for (SymbolRef Symbol(FSI[-1], InputFile);
+         Symbol.getRawDataRefImpl().d.b != StopSymbolNum; Symbol.moveNext()) {
+      if (cantFail(Symbol.getName()) == ParentName) {
+        ParentAddress = cantFail(Symbol.getAddress());
+        break;
+      }
+    }
+
+    // No local parent is found, use global parent function.
+    if (!ParentAddress) {
+      BinaryData *ParentBD = BC->getBinaryDataByName(ParentName);
+      assert(ParentBD);
+      ParentAddress = ParentBD->getAddress();
+    }
+
+    BinaryFunction *ParentBF = BC->getBinaryFunctionAtAddress(ParentAddress);
+    assert(ParentBF);
+    BC->registerFragment(*BF, *ParentBF);
+  }
 }
 
 void RewriteInstance::createPLTBinaryFunction(uint64_t TargetAddress,
diff --git a/bolt/test/X86/fragment-lite.s b/bolt/test/X86/fragment-lite.s
index 97069bf8096e1c..32d1f5a98b64a3 100644
--- a/bolt/test/X86/fragment-lite.s
+++ b/bolt/test/X86/fragment-lite.s
@@ -3,35 +3,42 @@
 # RUN: split-file %s %t
 # RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/main.s -o %t.o
 # RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/baz.s -o %t.baz.o
+# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/baz2.s -o %t.baz2.o
 # RUN: link_fdata %s %t.o %t.main.fdata
 # RUN: link_fdata %s %t.baz.o %t.baz.fdata
-# RUN: merge-fdata %t.main.fdata %t.baz.fdata > %t.fdata
-# RUN: %clang %cflags %t.o %t.baz.o -o %t.exe -Wl,-q
+# RUN: link_fdata %s %t.baz2.o %t.baz2.fdata
+# RUN: merge-fdata %t.main.fdata %t.baz.fdata %t.baz2.fdata > %t.fdata
+# RUN: %clang %cflags %t.o %t.baz.o %t.baz2.o -o %t.exe -Wl,-q
 # RUN: llvm-bolt %t.exe -o %t.out --lite=1 --data %t.fdata -v=1 -print-cfg \
 # RUN:   2>&1 | FileCheck %s
 
 # CHECK: BOLT-INFO: processing main.cold.1 as a sibling of non-ignored function
-# CHECK: BOLT-INFO: processing foo.cold.1/1 as a sibling of non-ignored function
-# CHECK: BOLT-INFO: processing bar.cold.1/1 as a sibling of non-ignored function
+# CHECK: BOLT-INFO: processing foo.cold.1/1(*2) as a sibling of non-ignored function
+# CHECK: BOLT-INFO: processing bar.cold.1/1(*2) as a sibling of non-ignored function
 # CHECK: BOLT-INFO: processing baz.cold.1 as a sibling of non-ignored function
-# CHECK: BOLT-INFO: processing baz.cold.1/1 as a sibling of non-ignored function
+# CHECK: BOLT-INFO: processing baz.cold.1/1(*2) as a sibling of non-ignored function
+# CHECK: BOLT-INFO: processing baz.cold.1/2(*2) as a sibling of non-ignored function
 
 # CHECK: Binary Function "main.cold.1" after building cfg
 # CHECK: Parent : main
 
-# CHECK: Binary Function "foo.cold.1/1" after building cfg
+# CHECK: Binary Function "foo.cold.1/1(*2)" after building cfg
 # CHECK: Parent : foo
 
-# CHECK: Binary Function "bar.cold.1/1" after building cfg
-# CHECK: Parent : bar/1
+# CHECK: Binary Function "bar.cold.1/1(*2)" after building cfg
+# CHECK: Parent : bar/1(*2)
 
 # CHECK: Binary Function "baz.cold.1" after building cfg
 # CHECK: Parent : baz{{$}}
 
-# CHECK: Binary Function "baz.cold.1/1" after building cfg
-# CHECK: Parent : baz/1
+# CHECK: Binary Function "baz.cold.1/1(*2)" after building cfg
+# CHECK: Parent : baz/1(*2)
+
+# CHECK: Binary Function "baz.cold.1/2(*2)" after building cfg
+# CHECK: Parent : baz/2(*2)
 
 #--- main.s
+.file "main.s"
   .globl main
   .type main, %function
 main:
@@ -126,6 +133,7 @@ baz.cold.1:
 .size baz.cold.1, .-baz.cold.1
 
 #--- baz.s
+.file "baz.s"
   .local baz
   .type baz, %function
 baz:
@@ -149,3 +157,29 @@ baz.cold.1:
   retq
   .cfi_endproc
 .size baz.cold.1, .-baz.cold.1
+
+#--- baz2.s
+.file "baz2.s"
+  .local baz
+  .type baz, %function
+baz:
+  .cfi_startproc
+# FDATA: 0 [unknown] 0 1 baz/2 0 1 0
+  cmpl	$0x0, %eax
+  je	baz.cold.1
+  retq
+  .cfi_endproc
+.size baz, .-baz
+
+  .section .text.cold
+  .local baz.cold.1
+  .type baz.cold.1, %function
+baz.cold.1:
+  .cfi_startproc
+  pushq	%rbp
+  movq	%rsp, %rbp
+  movl	$0x0, %eax
+  popq	%rbp
+  retq
+  .cfi_endproc
+.size baz.cold.1, .-baz.cold.1

>From 325ae69c00c5e171d963387672942f0e76f475cc Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Mon, 22 Apr 2024 11:41:55 -0700
Subject: [PATCH 2/8] fixup! [BOLT] Use symbol table info in registerFragment

---
 bolt/lib/Rewrite/RewriteInstance.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index dc2105d090b6a9..f7b11684b48a69 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -1448,7 +1448,7 @@ void RewriteInstance::registerFragments() {
       const uint64_t Address = BD->getAddress();
       BinaryFunction *BF = BC->getBinaryFunctionAtAddress(Address);
       if (!BF) {
-        BC->errs() << "BOLT-ERROR: parent function not found for " << Function
+        BC->errs() << "BOLT-ERROR: parent function not found for " << Name
                    << '\n';
         exit(1);
       }
@@ -1478,7 +1478,7 @@ void RewriteInstance::registerFragments() {
     }
   }
 
-  for (auto &[ParentName, BF]: AmbiguousFragments) {
+  for (auto &[ParentName, BF] : AmbiguousFragments) {
     const uint64_t Address = BF->getAddress();
 
     // Get fragment's own symbol

>From 163178767f0486cc2d8c4545b8c01b8dadfd1b73 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Mon, 22 Apr 2024 13:44:48 -0700
Subject: [PATCH 3/8] Also check function aliases

---
 bolt/lib/Rewrite/RewriteInstance.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index f7b11684b48a69..332e12bd87e07e 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -1423,8 +1423,7 @@ void RewriteInstance::registerFragments() {
     BinaryFunction &Function = BFI.second;
     if (!Function.isFragment())
       continue;
-    for (MCSymbol *Symbol : Function.Symbols) {
-      StringRef Name = Symbol->getName();
+    for (StringRef Name : Function.getNames()) {
       StringRef BaseName, Suffix;
       std::tie(BaseName, Suffix) = Name.split('/');
       const size_t ColdSuffixPos = BaseName.find(".cold");

>From 972faefb142a3a95e27cae70ecbfea523cd1c1e6 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Mon, 22 Apr 2024 14:02:00 -0700
Subject: [PATCH 4/8] Switch to exit(1) in BD/BF checks

---
 bolt/include/bolt/Rewrite/RewriteInstance.h |  2 +-
 bolt/lib/Rewrite/RewriteInstance.cpp        | 34 +++++++++++----------
 2 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/bolt/include/bolt/Rewrite/RewriteInstance.h b/bolt/include/bolt/Rewrite/RewriteInstance.h
index d35c85567c33e3..5148528594d541 100644
--- a/bolt/include/bolt/Rewrite/RewriteInstance.h
+++ b/bolt/include/bolt/Rewrite/RewriteInstance.h
@@ -490,7 +490,7 @@ class RewriteInstance {
   /// Store all non-zero symbols in this map for a quick address lookup.
   std::map<uint64_t, llvm::object::SymbolRef> FileSymRefs;
 
-  /// FILE symbols.
+  /// FILE symbols used for disambiguating split function parents.
   std::vector<object::DataRefImpl> FileSymbols;
 
   std::unique_ptr<DWARFRewriter> DebugInfoRewriter;
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 332e12bd87e07e..38c722309418ba 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -1443,15 +1443,16 @@ void RewriteInstance::registerFragments() {
         AmbiguousFragments.emplace_back(ParentName, &Function);
         continue;
       }
-      assert(BD);
-      const uint64_t Address = BD->getAddress();
-      BinaryFunction *BF = BC->getBinaryFunctionAtAddress(Address);
-      if (!BF) {
-        BC->errs() << "BOLT-ERROR: parent function not found for " << Name
-                   << '\n';
-        exit(1);
+      if (BD) {
+        const uint64_t Address = BD->getAddress();
+        if (BinaryFunction *BF = BC->getBinaryFunctionAtAddress(Address)) {
+          BC->registerFragment(Function, *BF);
+          continue;
+        }
       }
-      BC->registerFragment(Function, *BF);
+      BC->errs() << "BOLT-ERROR: parent function not found for " << Name
+                 << '\n';
+      exit(1);
     }
   }
 
@@ -1507,15 +1508,16 @@ void RewriteInstance::registerFragments() {
     }
 
     // No local parent is found, use global parent function.
-    if (!ParentAddress) {
-      BinaryData *ParentBD = BC->getBinaryDataByName(ParentName);
-      assert(ParentBD);
-      ParentAddress = ParentBD->getAddress();
-    }
+    if (!ParentAddress)
+      if (BinaryData *ParentBD = BC->getBinaryDataByName(ParentName))
+        ParentAddress = ParentBD->getAddress();
 
-    BinaryFunction *ParentBF = BC->getBinaryFunctionAtAddress(ParentAddress);
-    assert(ParentBF);
-    BC->registerFragment(*BF, *ParentBF);
+    if (BinaryFunction *ParentBF = BC->getBinaryFunctionAtAddress(ParentAddress)) {
+      BC->registerFragment(*BF, *ParentBF);
+      continue;
+    }
+    BC->errs() << "BOLT-ERROR: parent function not found for " << *BF << '\n';
+    exit(1);
   }
 }
 

>From b9573ce35cb93260fb57f71127544ebdfa36d028 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Mon, 22 Apr 2024 14:05:28 -0700
Subject: [PATCH 5/8] fixup! Also check function aliases

---
 bolt/lib/Rewrite/RewriteInstance.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 38c722309418ba..4b84cf56ec9d2b 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -1512,7 +1512,8 @@ void RewriteInstance::registerFragments() {
       if (BinaryData *ParentBD = BC->getBinaryDataByName(ParentName))
         ParentAddress = ParentBD->getAddress();
 
-    if (BinaryFunction *ParentBF = BC->getBinaryFunctionAtAddress(ParentAddress)) {
+    if (BinaryFunction *ParentBF =
+            BC->getBinaryFunctionAtAddress(ParentAddress)) {
       BC->registerFragment(*BF, *ParentBF);
       continue;
     }

>From c830ca6b2946e61c8724a1f1b55d5e4c8c731e6a Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Mon, 22 Apr 2024 14:08:43 -0700
Subject: [PATCH 6/8] reduce changes

---
 bolt/lib/Rewrite/RewriteInstance.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 4b84cf56ec9d2b..dca109f26d562e 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -1450,7 +1450,7 @@ void RewriteInstance::registerFragments() {
           continue;
         }
       }
-      BC->errs() << "BOLT-ERROR: parent function not found for " << Name
+      BC->errs() << "BOLT-ERROR: parent function not found for " << Function
                  << '\n';
       exit(1);
     }

>From 40c226e8c4e4b0c74893508367981fec27b1272b Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Mon, 22 Apr 2024 17:26:18 -0700
Subject: [PATCH 7/8] Add a comment with reasoning behind handling ambiguous
 fragments one by one

---
 bolt/lib/Rewrite/RewriteInstance.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index dca109f26d562e..839d28297a55bd 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -1418,6 +1418,8 @@ void RewriteInstance::registerFragments() {
   if (!BC->HasSplitFunctions)
     return;
 
+  // Process fragments with ambiguous parents separately as they are typically a
+  // vanishing minority of cases and require expensive symbol table lookups.
   std::vector<std::pair<StringRef, BinaryFunction *>> AmbiguousFragments;
   for (auto &BFI : BC->getBinaryFunctions()) {
     BinaryFunction &Function = BFI.second;
@@ -1445,7 +1447,8 @@ void RewriteInstance::registerFragments() {
       }
       if (BD) {
         const uint64_t Address = BD->getAddress();
-        if (BinaryFunction *BF = BC->getBinaryFunctionAtAddress(Address)) {
+        BinaryFunction *BF = BC->getBinaryFunctionAtAddress(Address);
+        if (BF) {
           BC->registerFragment(Function, *BF);
           continue;
         }

>From 710c6c34430ec30f8dbc0e9787d882afa1e63971 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Mon, 22 Apr 2024 18:39:01 -0700
Subject: [PATCH 8/8] Address comments

---
 bolt/include/bolt/Utils/NameResolver.h | 12 ++++++---
 bolt/lib/Rewrite/RewriteInstance.cpp   | 34 ++++++++++++++++++--------
 2 files changed, 33 insertions(+), 13 deletions(-)

diff --git a/bolt/include/bolt/Utils/NameResolver.h b/bolt/include/bolt/Utils/NameResolver.h
index b6312642e72c8c..2af304e02e2496 100644
--- a/bolt/include/bolt/Utils/NameResolver.h
+++ b/bolt/include/bolt/Utils/NameResolver.h
@@ -28,17 +28,23 @@ class NameResolver {
   static constexpr char Sep = '/';
 
 public:
-  /// Return the number of duplicates registered for a given \p Name.
-  uint64_t getNumDuplicates(StringRef Name) {
+  /// Return the counter for a given \p Name.
+  uint64_t getCounter(StringRef Name) const {
     if (Counters.contains(Name))
       return Counters.at(Name);
     return 0;
   }
 
   /// Return unique version of the \p Name in the form "Name<Sep><Number>".
+  std::string getUniqueName(StringRef Name, const uint64_t ID) const {
+    return (Name + Twine(Sep) + Twine(ID)).str();
+  }
+
+  /// Register new version of \p Name and return unique version in the form
+  /// "Name<Sep><Number>".
   std::string uniquify(StringRef Name) {
     const uint64_t ID = ++Counters[Name];
-    return (Name + Twine(Sep) + Twine(ID)).str();
+    return getUniqueName(Name, ID);
   }
 
   /// For uniquified \p Name, return the original form (that may no longer be
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 839d28297a55bd..5386ee1eaa7223 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -1433,10 +1433,10 @@ void RewriteInstance::registerFragments() {
         continue;
       StringRef ParentName = BaseName.substr(0, ColdSuffixPos);
       const BinaryData *BD = BC->getBinaryDataByName(ParentName);
-      uint64_t NumPossibleLocalParents = NR.getNumDuplicates(ParentName);
+      const uint64_t NumPossibleLocalParents = NR.getCounter(ParentName);
       // The most common case: single local parent fragment.
       if (!BD && NumPossibleLocalParents == 1) {
-        BD = BC->getBinaryDataByName((Twine(ParentName) + "/1").str());
+        BD = BC->getBinaryDataByName(NR.getUniqueName(ParentName, 1));
       } else if (BD && (!NumPossibleLocalParents || Suffix.empty())) {
         // Global parent and either no local candidates (second most common), or
         // the fragment is global as well (uncommon).
@@ -1465,7 +1465,7 @@ void RewriteInstance::registerFragments() {
   if (!BC->hasSymbolsWithFileName()) {
     BC->errs() << "BOLT-ERROR: input file has split functions but does not "
                   "have FILE symbols. If the binary was stripped, preserve "
-                  "FILE symbols with --keep-file-symbols strip option.";
+                  "FILE symbols with --keep-file-symbols strip option";
     exit(1);
   }
 
@@ -1480,21 +1480,35 @@ void RewriteInstance::registerFragments() {
       break;
     }
   }
+  if (FirstGlobal == 0) {
+    BC->errs() << "BOLT-ERROR: malformed SYMTAB sh_info\n";
+    exit(1);
+  }
 
   for (auto &[ParentName, BF] : AmbiguousFragments) {
     const uint64_t Address = BF->getAddress();
 
     // Get fragment's own symbol
     const auto SymIt = FileSymRefs.find(Address);
-    assert(SymIt != FileSymRefs.end());
+    if (SymIt == FileSymRefs.end()) {
+      BC->errs()
+          << "BOLT-ERROR: symbol lookup failed for function at address 0x"
+          << Twine::utohexstr(Address) << '\n';
+      exit(1);
+    }
 
     // Find containing FILE symbol
-    auto FSI =
-        llvm::upper_bound(FileSymbols, SymIt->second.getRawDataRefImpl(),
-                          [](const DataRefImpl &A, const DataRefImpl &B) {
-                            return A.d.b < B.d.b;
-                          });
-    assert(FSI != FileSymbols.begin());
+    SymbolRef Symbol = SymIt->second;
+    DataRefImpl DRI = Symbol.getRawDataRefImpl();
+    auto FSI = llvm::upper_bound(
+        FileSymbols, DRI, [](const DataRefImpl &A, const DataRefImpl &B) {
+          return A.d.b < B.d.b;
+        });
+    if (FSI == FileSymbols.begin()) {
+      BC->errs() << "BOLT-ERROR: owning FILE symbol not found for symbol "
+                 << cantFail(Symbol.getName()) << '\n';
+      exit(1);
+    }
 
     uint32_t StopSymbolNum = FirstGlobal;
     if (FSI != FileSymbols.end())



More information about the llvm-commits mailing list