[llvm] [BOLT] Register fragments using symbol table information (PR #89320)

Amir Ayupov via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 18 15:53:00 PDT 2024


https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/89320

>From 84ea9bfa35225d49841c743a7a40329c460923e7 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Thu, 18 Apr 2024 14:47:40 -0700
Subject: [PATCH 1/2] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?=
 =?UTF-8?q?itial=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.4
---
 bolt/include/bolt/Rewrite/RewriteInstance.h |   2 +-
 bolt/lib/Rewrite/RewriteInstance.cpp        | 128 +++++++++++++-------
 2 files changed, 82 insertions(+), 48 deletions(-)

diff --git a/bolt/include/bolt/Rewrite/RewriteInstance.h b/bolt/include/bolt/Rewrite/RewriteInstance.h
index 826677cd63b22b..8a53c0d6905629 100644
--- a/bolt/include/bolt/Rewrite/RewriteInstance.h
+++ b/bolt/include/bolt/Rewrite/RewriteInstance.h
@@ -110,7 +110,7 @@ class RewriteInstance {
   void initializeMetadataManager();
 
   /// Process fragments, locate parent functions.
-  void registerFragments();
+  Error registerFragments();
 
   /// Read info from special sections. E.g. eh_frame and .gcc_except_table
   /// for exception and stack unwinding information.
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index f22bede002da58..e6da8c476f2aca 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -1339,7 +1339,7 @@ void RewriteInstance::discoverFileObjects() {
     processRelocations();
   }
 
-  registerFragments();
+  BC->logBOLTErrorsAndQuitOnFatal(registerFragments());
 }
 
 Error RewriteInstance::discoverRtFiniAddress() {
@@ -1413,54 +1413,88 @@ void RewriteInstance::updateRtFiniReloc() {
       /*Addend*/ RT->getRuntimeFiniAddress(), /*Value*/ 0});
 }
 
-void RewriteInstance::registerFragments() {
+Error RewriteInstance::registerFragments() {
   if (!BC->HasSplitFunctions)
-    return;
+    return Error::success();
 
-  for (auto &BFI : BC->getBinaryFunctions()) {
-    BinaryFunction &Function = BFI.second;
-    if (!Function.isFragment())
-      continue;
-    unsigned ParentsFound = 0;
-    for (StringRef Name : Function.getNames()) {
-      StringRef BaseName, Suffix;
-      std::tie(BaseName, Suffix) = Name.split('/');
-      const size_t ColdSuffixPos = BaseName.find(".cold");
-      if (ColdSuffixPos == StringRef::npos)
-        continue;
-      // For cold function with local (foo.cold/1) symbol, prefer a parent with
-      // local symbol as well (foo/1) over global symbol (foo).
-      std::string ParentName = BaseName.substr(0, ColdSuffixPos).str();
-      const BinaryData *BD = BC->getBinaryDataByName(ParentName);
-      if (Suffix != "") {
-        ParentName.append(Twine("/", Suffix).str());
-        const BinaryData *BDLocal = BC->getBinaryDataByName(ParentName);
-        if (BDLocal || !BD)
-          BD = BDLocal;
-      }
-      if (!BD) {
-        if (opts::Verbosity >= 1)
-          BC->outs() << "BOLT-INFO: parent function not found for " << Name
-                     << "\n";
-        continue;
-      }
-      const uint64_t Address = BD->getAddress();
-      BinaryFunction *BF = BC->getBinaryFunctionAtAddress(Address);
-      if (!BF) {
-        if (opts::Verbosity >= 1)
-          BC->outs() << formatv(
-              "BOLT-INFO: parent function not found at {0:x}\n", Address);
-        continue;
-      }
-      BC->registerFragment(Function, *BF);
-      ++ParentsFound;
-    }
-    if (!ParentsFound) {
-      BC->errs() << "BOLT-ERROR: parent function not found for " << Function
-                 << '\n';
-      exit(1);
-    }
-  }
+  std::vector<DataRefImpl> FileSymbols;
+  std::multimap<StringRef, DataRefImpl> FunctionSymbols;
+  for (const ELFSymbolRef &Symbol : InputFile->symbols()) {
+    if (cantFail(Symbol.getType()) == SymbolRef::ST_File)
+      FileSymbols.emplace_back(Symbol.getRawDataRefImpl());
+    if (cantFail(Symbol.getFlags()) & SymbolRef::ST_Function)
+      FunctionSymbols.emplace(cantFail(Symbol.getName()),
+                              Symbol.getRawDataRefImpl());
+  }
+
+  auto registerBD = [&](BinaryFunction &Function, BinaryData *BD) -> Error {
+    if (BD)
+      if (BinaryFunction *BF = BC->getBinaryFunctionAtAddress(BD->getAddress()))
+        if (BC->registerFragment(Function, *BF))
+          return Error::success();
+    return createFatalBOLTError("parent not found for " +
+                                Twine(Function.getOneName()));
+  };
+
+  auto handleFragment = [&](StringRef Name, DataRefImpl DRI) -> Error {
+    StringRef ParentName = Name.split(".cold").first;
+    if (ParentName == Name)
+      return Error::success();
+
+    SymbolRef Symbol(DRI, InputFile);
+    const uint64_t Address = cantFail(Symbol.getAddress());
+    BinaryFunction *BF = BC->getBinaryFunctionAtAddress(Address);
+    assert(BF);
+
+    // Fragment has global symbol: lookup global parent
+    if (cantFail(Symbol.getFlags()) & SymbolRef::SF_Global)
+      return registerBD(*BF, BC->getBinaryDataByName(ParentName));
+
+    // Fragment has local symbol: check number of matching symbols
+    size_t ParentSymsCount = FunctionSymbols.count(ParentName);
+    if (ParentSymsCount == 0)
+      return createFatalBOLTError("parent not found for " + Twine(Name));
+
+    if (ParentSymsCount == 1)
+      return registerBD(*BF, BC->getBinaryDataByName(ParentName));
+
+    // Multiple possible parent symbols.
+    // Need FILE symbols to resolve ambiguity.
+    if (!BC->hasSymbolsWithFileName())
+      return createFatalBOLTError(
+          "input file has split functions but does not have FILE symbols. If "
+          "the binary was stripped, preserve FILE symbols with "
+          "--keep-file-symbols strip option.");
+
+    // Check local symbols belonging to the same file first.
+    auto FSI = llvm::upper_bound(FileSymbols, DRI);
+    // Symbol table index bounds to look for local parent.
+    uint32_t FSII = FSI != FileSymbols.begin() ? FSI[-1].d.b : 0;
+    uint32_t FSIE = FSI != FileSymbols.end() ? FSI->d.b : UINT32_MAX;
+    // All symbols with ParentName.
+    auto Range = llvm::make_range(FunctionSymbols.equal_range(ParentName));
+    // Filter to only the local file.
+    auto LocalFileSymbols = llvm::make_filter_range(
+        llvm::make_second_range(Range), [&](const DataRefImpl &DRI) {
+          return DRI.d.b > FSII && DRI.d.b < FSIE;
+        });
+    // No local symbol with ParentName: use global symbol.
+    if (LocalFileSymbols.empty())
+      return registerBD(*BF, BC->getBinaryDataByName(ParentName));
+
+    auto LocalFileSymbol = LocalFileSymbols.begin();
+    if (std::next(LocalFileSymbol) != LocalFileSymbols.end())
+      return createFatalBOLTError("duplicate local symbols " +
+                                  Twine(ParentName) + " from the same file");
+    SymbolRef ParentSymbol(*LocalFileSymbol, InputFile);
+    const uint64_t ParentAddress = cantFail(ParentSymbol.getAddress());
+    return registerBD(*BF, BC->getBinaryDataAtAddress(ParentAddress));
+  };
+
+  for (const auto &[Name, DRI] : FunctionSymbols)
+    if (Error E = handleFragment(Name, DRI))
+      return E;
+  return Error::success();
 }
 
 void RewriteInstance::createPLTBinaryFunction(uint64_t TargetAddress,

>From 501200e061b53591f591cc91f1313b8bb16c54de Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Thu, 18 Apr 2024 15:52:49 -0700
Subject: [PATCH 2/2] Fix issues, tests pass

Created using spr 1.3.4
---
 bolt/lib/Rewrite/RewriteInstance.cpp | 13 ++++++++++---
 bolt/test/X86/fragment-lite.s        | 14 ++++++++------
 2 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index e6da8c476f2aca..76526d864b88cd 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -1422,7 +1422,7 @@ Error RewriteInstance::registerFragments() {
   for (const ELFSymbolRef &Symbol : InputFile->symbols()) {
     if (cantFail(Symbol.getType()) == SymbolRef::ST_File)
       FileSymbols.emplace_back(Symbol.getRawDataRefImpl());
-    if (cantFail(Symbol.getFlags()) & SymbolRef::ST_Function)
+    if (cantFail(Symbol.getType()) == SymbolRef::ST_Function)
       FunctionSymbols.emplace(cantFail(Symbol.getName()),
                               Symbol.getRawDataRefImpl());
   }
@@ -1455,8 +1455,12 @@ Error RewriteInstance::registerFragments() {
     if (ParentSymsCount == 0)
       return createFatalBOLTError("parent not found for " + Twine(Name));
 
-    if (ParentSymsCount == 1)
-      return registerBD(*BF, BC->getBinaryDataByName(ParentName));
+    if (ParentSymsCount == 1) {
+      auto ParentSymbolIt = FunctionSymbols.find(ParentName);
+      SymbolRef ParentSymbol(ParentSymbolIt->second, InputFile);
+      const uint64_t ParentAddress = cantFail(ParentSymbol.getAddress());
+      return registerBD(*BF, BC->getBinaryDataAtAddress(ParentAddress));
+    }
 
     // Multiple possible parent symbols.
     // Need FILE symbols to resolve ambiguity.
@@ -1476,6 +1480,9 @@ Error RewriteInstance::registerFragments() {
     // Filter to only the local file.
     auto LocalFileSymbols = llvm::make_filter_range(
         llvm::make_second_range(Range), [&](const DataRefImpl &DRI) {
+          SymbolRef Symbol(DRI, InputFile);
+          if (cantFail(Symbol.getFlags()) & SymbolRef::SF_Global)
+            return false;
           return DRI.d.b > FSII && DRI.d.b < FSIE;
         });
     // No local symbol with ParentName: use global symbol.
diff --git a/bolt/test/X86/fragment-lite.s b/bolt/test/X86/fragment-lite.s
index 97069bf8096e1c..c3920fe2aaade4 100644
--- a/bolt/test/X86/fragment-lite.s
+++ b/bolt/test/X86/fragment-lite.s
@@ -11,27 +11,28 @@
 # RUN:   2>&1 | FileCheck %s
 
 # CHECK: BOLT-INFO: processing main.cold.1 as a sibling of non-ignored function
-# CHECK: BOLT-INFO: processing foo.cold.1/1 as a sibling of non-ignored function
-# CHECK: BOLT-INFO: processing bar.cold.1/1 as a sibling of non-ignored function
+# CHECK: BOLT-INFO: processing foo.cold.1/1(*2) as a sibling of non-ignored function
+# CHECK: BOLT-INFO: processing bar.cold.1/1(*2) as a sibling of non-ignored function
 # CHECK: BOLT-INFO: processing baz.cold.1 as a sibling of non-ignored function
-# CHECK: BOLT-INFO: processing baz.cold.1/1 as a sibling of non-ignored function
+# CHECK: BOLT-INFO: processing baz.cold.1/1(*2) as a sibling of non-ignored function
 
 # CHECK: Binary Function "main.cold.1" after building cfg
 # CHECK: Parent : main
 
-# CHECK: Binary Function "foo.cold.1/1" after building cfg
+# CHECK: Binary Function "foo.cold.1/1(*2)" after building cfg
 # CHECK: Parent : foo
 
-# CHECK: Binary Function "bar.cold.1/1" after building cfg
+# CHECK: Binary Function "bar.cold.1/1(*2)" after building cfg
 # CHECK: Parent : bar/1
 
 # CHECK: Binary Function "baz.cold.1" after building cfg
 # CHECK: Parent : baz{{$}}
 
-# CHECK: Binary Function "baz.cold.1/1" after building cfg
+# CHECK: Binary Function "baz.cold.1/1(*2)" after building cfg
 # CHECK: Parent : baz/1
 
 #--- main.s
+.file "main.s"
   .globl main
   .type main, %function
 main:
@@ -126,6 +127,7 @@ baz.cold.1:
 .size baz.cold.1, .-baz.cold.1
 
 #--- baz.s
+.file "baz.s"
   .local baz
   .type baz, %function
 baz:



More information about the llvm-commits mailing list