[llvm] [BOLT] Use symbol table info in registerFragment (PR #89648)
Maksim Panchenko via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 29 09:19:59 PDT 2024
================
@@ -1417,50 +1418,125 @@ void RewriteInstance::registerFragments() {
if (!BC->HasSplitFunctions)
return;
+ // Process fragments with ambiguous parents separately as they are typically a
+ // vanishing minority of cases and require expensive symbol table lookups.
+ std::vector<std::pair<StringRef, BinaryFunction *>> AmbiguousFragments;
for (auto &BFI : BC->getBinaryFunctions()) {
BinaryFunction &Function = BFI.second;
if (!Function.isFragment())
continue;
- unsigned ParentsFound = 0;
for (StringRef Name : Function.getNames()) {
- StringRef BaseName, Suffix;
- std::tie(BaseName, Suffix) = Name.split('/');
+ StringRef BaseName = NR.restore(Name);
+ const bool IsGlobal = BaseName == Name;
const size_t ColdSuffixPos = BaseName.find(".cold");
if (ColdSuffixPos == StringRef::npos)
continue;
- // For cold function with local (foo.cold/1) symbol, prefer a parent with
- // local symbol as well (foo/1) over global symbol (foo).
- std::string ParentName = BaseName.substr(0, ColdSuffixPos).str();
+ StringRef ParentName = BaseName.substr(0, ColdSuffixPos);
const BinaryData *BD = BC->getBinaryDataByName(ParentName);
- if (Suffix != "") {
- ParentName.append(Twine("/", Suffix).str());
- const BinaryData *BDLocal = BC->getBinaryDataByName(ParentName);
- if (BDLocal || !BD)
- BD = BDLocal;
- }
- if (!BD) {
- if (opts::Verbosity >= 1)
- BC->outs() << "BOLT-INFO: parent function not found for " << Name
- << "\n";
+ const uint64_t NumPossibleLocalParents =
+ NR.getUniquifiedNameCount(ParentName);
+ // The most common case: single local parent fragment.
+ if (!BD && NumPossibleLocalParents == 1) {
+ BD = BC->getBinaryDataByName(NR.getUniqueName(ParentName, 1));
+ } else if (BD && (!NumPossibleLocalParents || IsGlobal)) {
+ // Global parent and either no local candidates (second most common), or
+ // the fragment is global as well (uncommon).
+ } else {
+ // Any other case: need to disambiguate using FILE symbols.
+ AmbiguousFragments.emplace_back(ParentName, &Function);
continue;
}
- const uint64_t Address = BD->getAddress();
- BinaryFunction *BF = BC->getBinaryFunctionAtAddress(Address);
- if (!BF) {
- if (opts::Verbosity >= 1)
- BC->outs() << formatv(
- "BOLT-INFO: parent function not found at {0:x}\n", Address);
- continue;
+ if (BD) {
+ BinaryFunction *BF = BC->getFunctionForSymbol(BD->getSymbol());
+ if (BF) {
+ BC->registerFragment(Function, *BF);
+ continue;
+ }
}
- BC->registerFragment(Function, *BF);
- ++ParentsFound;
- }
- if (!ParentsFound) {
BC->errs() << "BOLT-ERROR: parent function not found for " << Function
<< '\n';
exit(1);
}
}
+
+ if (AmbiguousFragments.empty())
+ return;
+
+ if (!BC->hasSymbolsWithFileName()) {
+ BC->errs() << "BOLT-ERROR: input file has split functions but does not "
+ "have FILE symbols. If the binary was stripped, preserve "
+ "FILE symbols with --keep-file-symbols strip option";
+ exit(1);
+ }
+
+ // The first global symbol is identified by the symbol table sh_info value.
+ // Used as local symbol search stopping point.
+ uint32_t FirstGlobal{0};
+ auto *ELF64LEFile = cast<ELF64LEObjectFile>(InputFile);
+ const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile();
+ for (const auto &Sec : cantFail(Obj.sections())) {
+ if (Sec.sh_type == ELF::SHT_SYMTAB) {
+ FirstGlobal = Sec.sh_info;
+ break;
+ }
+ }
+ if (FirstGlobal == 0) {
----------------
maksfb wrote:
Is it possible to have a symbol table without globals? Even if it’s unlikely but still a valid file, we shouldn’t fail to process it.
https://github.com/llvm/llvm-project/pull/89648
More information about the llvm-commits
mailing list