[llvm] [BOLT] Register fragments using symbol table information (PR #89320)
Amir Ayupov via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 18 15:53:00 PDT 2024
https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/89320
>From 84ea9bfa35225d49841c743a7a40329c460923e7 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Thu, 18 Apr 2024 14:47:40 -0700
Subject: [PATCH 1/2] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?=
=?UTF-8?q?itial=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.4
---
bolt/include/bolt/Rewrite/RewriteInstance.h | 2 +-
bolt/lib/Rewrite/RewriteInstance.cpp | 128 +++++++++++++-------
2 files changed, 82 insertions(+), 48 deletions(-)
diff --git a/bolt/include/bolt/Rewrite/RewriteInstance.h b/bolt/include/bolt/Rewrite/RewriteInstance.h
index 826677cd63b22b..8a53c0d6905629 100644
--- a/bolt/include/bolt/Rewrite/RewriteInstance.h
+++ b/bolt/include/bolt/Rewrite/RewriteInstance.h
@@ -110,7 +110,7 @@ class RewriteInstance {
void initializeMetadataManager();
/// Process fragments, locate parent functions.
- void registerFragments();
+ Error registerFragments();
/// Read info from special sections. E.g. eh_frame and .gcc_except_table
/// for exception and stack unwinding information.
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index f22bede002da58..e6da8c476f2aca 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -1339,7 +1339,7 @@ void RewriteInstance::discoverFileObjects() {
processRelocations();
}
- registerFragments();
+ BC->logBOLTErrorsAndQuitOnFatal(registerFragments());
}
Error RewriteInstance::discoverRtFiniAddress() {
@@ -1413,54 +1413,88 @@ void RewriteInstance::updateRtFiniReloc() {
/*Addend*/ RT->getRuntimeFiniAddress(), /*Value*/ 0});
}
-void RewriteInstance::registerFragments() {
+Error RewriteInstance::registerFragments() {
if (!BC->HasSplitFunctions)
- return;
+ return Error::success();
- for (auto &BFI : BC->getBinaryFunctions()) {
- BinaryFunction &Function = BFI.second;
- if (!Function.isFragment())
- continue;
- unsigned ParentsFound = 0;
- for (StringRef Name : Function.getNames()) {
- StringRef BaseName, Suffix;
- std::tie(BaseName, Suffix) = Name.split('/');
- const size_t ColdSuffixPos = BaseName.find(".cold");
- if (ColdSuffixPos == StringRef::npos)
- continue;
- // For cold function with local (foo.cold/1) symbol, prefer a parent with
- // local symbol as well (foo/1) over global symbol (foo).
- std::string ParentName = BaseName.substr(0, ColdSuffixPos).str();
- const BinaryData *BD = BC->getBinaryDataByName(ParentName);
- if (Suffix != "") {
- ParentName.append(Twine("/", Suffix).str());
- const BinaryData *BDLocal = BC->getBinaryDataByName(ParentName);
- if (BDLocal || !BD)
- BD = BDLocal;
- }
- if (!BD) {
- if (opts::Verbosity >= 1)
- BC->outs() << "BOLT-INFO: parent function not found for " << Name
- << "\n";
- continue;
- }
- const uint64_t Address = BD->getAddress();
- BinaryFunction *BF = BC->getBinaryFunctionAtAddress(Address);
- if (!BF) {
- if (opts::Verbosity >= 1)
- BC->outs() << formatv(
- "BOLT-INFO: parent function not found at {0:x}\n", Address);
- continue;
- }
- BC->registerFragment(Function, *BF);
- ++ParentsFound;
- }
- if (!ParentsFound) {
- BC->errs() << "BOLT-ERROR: parent function not found for " << Function
- << '\n';
- exit(1);
- }
- }
+ std::vector<DataRefImpl> FileSymbols;
+ std::multimap<StringRef, DataRefImpl> FunctionSymbols;
+ for (const ELFSymbolRef &Symbol : InputFile->symbols()) {
+ if (cantFail(Symbol.getType()) == SymbolRef::ST_File)
+ FileSymbols.emplace_back(Symbol.getRawDataRefImpl());
+ if (cantFail(Symbol.getFlags()) & SymbolRef::ST_Function)
+ FunctionSymbols.emplace(cantFail(Symbol.getName()),
+ Symbol.getRawDataRefImpl());
+ }
+
+ auto registerBD = [&](BinaryFunction &Function, BinaryData *BD) -> Error {
+ if (BD)
+ if (BinaryFunction *BF = BC->getBinaryFunctionAtAddress(BD->getAddress()))
+ if (BC->registerFragment(Function, *BF))
+ return Error::success();
+ return createFatalBOLTError("parent not found for " +
+ Twine(Function.getOneName()));
+ };
+
+ auto handleFragment = [&](StringRef Name, DataRefImpl DRI) -> Error {
+ StringRef ParentName = Name.split(".cold").first;
+ if (ParentName == Name)
+ return Error::success();
+
+ SymbolRef Symbol(DRI, InputFile);
+ const uint64_t Address = cantFail(Symbol.getAddress());
+ BinaryFunction *BF = BC->getBinaryFunctionAtAddress(Address);
+ assert(BF);
+
+ // Fragment has global symbol: lookup global parent
+ if (cantFail(Symbol.getFlags()) & SymbolRef::SF_Global)
+ return registerBD(*BF, BC->getBinaryDataByName(ParentName));
+
+ // Fragment has local symbol: check number of matching symbols
+ size_t ParentSymsCount = FunctionSymbols.count(ParentName);
+ if (ParentSymsCount == 0)
+ return createFatalBOLTError("parent not found for " + Twine(Name));
+
+ if (ParentSymsCount == 1)
+ return registerBD(*BF, BC->getBinaryDataByName(ParentName));
+
+ // Multiple possible parent symbols.
+ // Need FILE symbols to resolve ambiguity.
+ if (!BC->hasSymbolsWithFileName())
+ return createFatalBOLTError(
+ "input file has split functions but does not have FILE symbols. If "
+ "the binary was stripped, preserve FILE symbols with "
+ "--keep-file-symbols strip option.");
+
+ // Check local symbols belonging to the same file first.
+ auto FSI = llvm::upper_bound(FileSymbols, DRI);
+ // Symbol table index bounds to look for local parent.
+ uint32_t FSII = FSI != FileSymbols.begin() ? FSI[-1].d.b : 0;
+ uint32_t FSIE = FSI != FileSymbols.end() ? FSI->d.b : UINT32_MAX;
+ // All symbols with ParentName.
+ auto Range = llvm::make_range(FunctionSymbols.equal_range(ParentName));
+ // Filter to only the local file.
+ auto LocalFileSymbols = llvm::make_filter_range(
+ llvm::make_second_range(Range), [&](const DataRefImpl &DRI) {
+ return DRI.d.b > FSII && DRI.d.b < FSIE;
+ });
+ // No local symbol with ParentName: use global symbol.
+ if (LocalFileSymbols.empty())
+ return registerBD(*BF, BC->getBinaryDataByName(ParentName));
+
+ auto LocalFileSymbol = LocalFileSymbols.begin();
+ if (std::next(LocalFileSymbol) != LocalFileSymbols.end())
+ return createFatalBOLTError("duplicate local symbols " +
+ Twine(ParentName) + " from the same file");
+ SymbolRef ParentSymbol(*LocalFileSymbol, InputFile);
+ const uint64_t ParentAddress = cantFail(ParentSymbol.getAddress());
+ return registerBD(*BF, BC->getBinaryDataAtAddress(ParentAddress));
+ };
+
+ for (const auto &[Name, DRI] : FunctionSymbols)
+ if (Error E = handleFragment(Name, DRI))
+ return E;
+ return Error::success();
}
void RewriteInstance::createPLTBinaryFunction(uint64_t TargetAddress,
>From 501200e061b53591f591cc91f1313b8bb16c54de Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Thu, 18 Apr 2024 15:52:49 -0700
Subject: [PATCH 2/2] Fix issues, tests pass
Created using spr 1.3.4
---
bolt/lib/Rewrite/RewriteInstance.cpp | 13 ++++++++++---
bolt/test/X86/fragment-lite.s | 14 ++++++++------
2 files changed, 18 insertions(+), 9 deletions(-)
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index e6da8c476f2aca..76526d864b88cd 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -1422,7 +1422,7 @@ Error RewriteInstance::registerFragments() {
for (const ELFSymbolRef &Symbol : InputFile->symbols()) {
if (cantFail(Symbol.getType()) == SymbolRef::ST_File)
FileSymbols.emplace_back(Symbol.getRawDataRefImpl());
- if (cantFail(Symbol.getFlags()) & SymbolRef::ST_Function)
+ if (cantFail(Symbol.getType()) == SymbolRef::ST_Function)
FunctionSymbols.emplace(cantFail(Symbol.getName()),
Symbol.getRawDataRefImpl());
}
@@ -1455,8 +1455,12 @@ Error RewriteInstance::registerFragments() {
if (ParentSymsCount == 0)
return createFatalBOLTError("parent not found for " + Twine(Name));
- if (ParentSymsCount == 1)
- return registerBD(*BF, BC->getBinaryDataByName(ParentName));
+ if (ParentSymsCount == 1) {
+ auto ParentSymbolIt = FunctionSymbols.find(ParentName);
+ SymbolRef ParentSymbol(ParentSymbolIt->second, InputFile);
+ const uint64_t ParentAddress = cantFail(ParentSymbol.getAddress());
+ return registerBD(*BF, BC->getBinaryDataAtAddress(ParentAddress));
+ }
// Multiple possible parent symbols.
// Need FILE symbols to resolve ambiguity.
@@ -1476,6 +1480,9 @@ Error RewriteInstance::registerFragments() {
// Filter to only the local file.
auto LocalFileSymbols = llvm::make_filter_range(
llvm::make_second_range(Range), [&](const DataRefImpl &DRI) {
+ SymbolRef Symbol(DRI, InputFile);
+ if (cantFail(Symbol.getFlags()) & SymbolRef::SF_Global)
+ return false;
return DRI.d.b > FSII && DRI.d.b < FSIE;
});
// No local symbol with ParentName: use global symbol.
diff --git a/bolt/test/X86/fragment-lite.s b/bolt/test/X86/fragment-lite.s
index 97069bf8096e1c..c3920fe2aaade4 100644
--- a/bolt/test/X86/fragment-lite.s
+++ b/bolt/test/X86/fragment-lite.s
@@ -11,27 +11,28 @@
# RUN: 2>&1 | FileCheck %s
# CHECK: BOLT-INFO: processing main.cold.1 as a sibling of non-ignored function
-# CHECK: BOLT-INFO: processing foo.cold.1/1 as a sibling of non-ignored function
-# CHECK: BOLT-INFO: processing bar.cold.1/1 as a sibling of non-ignored function
+# CHECK: BOLT-INFO: processing foo.cold.1/1(*2) as a sibling of non-ignored function
+# CHECK: BOLT-INFO: processing bar.cold.1/1(*2) as a sibling of non-ignored function
# CHECK: BOLT-INFO: processing baz.cold.1 as a sibling of non-ignored function
-# CHECK: BOLT-INFO: processing baz.cold.1/1 as a sibling of non-ignored function
+# CHECK: BOLT-INFO: processing baz.cold.1/1(*2) as a sibling of non-ignored function
# CHECK: Binary Function "main.cold.1" after building cfg
# CHECK: Parent : main
-# CHECK: Binary Function "foo.cold.1/1" after building cfg
+# CHECK: Binary Function "foo.cold.1/1(*2)" after building cfg
# CHECK: Parent : foo
-# CHECK: Binary Function "bar.cold.1/1" after building cfg
+# CHECK: Binary Function "bar.cold.1/1(*2)" after building cfg
# CHECK: Parent : bar/1
# CHECK: Binary Function "baz.cold.1" after building cfg
# CHECK: Parent : baz{{$}}
-# CHECK: Binary Function "baz.cold.1/1" after building cfg
+# CHECK: Binary Function "baz.cold.1/1(*2)" after building cfg
# CHECK: Parent : baz/1
#--- main.s
+.file "main.s"
.globl main
.type main, %function
main:
@@ -126,6 +127,7 @@ baz.cold.1:
.size baz.cold.1, .-baz.cold.1
#--- baz.s
+.file "baz.s"
.local baz
.type baz, %function
baz:
More information about the llvm-commits
mailing list