[llvm] [BOLT][NFCI] Use FileSymbols for local symbol disambiguation (PR #89088)
Amir Ayupov via llvm-commits
llvm-commits at lists.llvm.org
Sun May 5 13:54:02 PDT 2024
https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/89088
>From cef8fc71eff387a530e23747f1f92fb6bb02bf24 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Wed, 17 Apr 2024 16:14:46 +0200
Subject: [PATCH 1/3] [BOLT][NFC] Keep FILE symbols in a vector
In discoverFileObjects, replace mapping from every local symbol to
associated file name with a vector of symbol data for FILE symbols
only. This cuts down on memory needed to resolve local file names.
Test Plan: NFC
---
bolt/lib/Rewrite/RewriteInstance.cpp | 29 ++++++++++++----------------
1 file changed, 12 insertions(+), 17 deletions(-)
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 0c8ee0d417233b..463249bb789259 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -813,14 +813,8 @@ void RewriteInstance::discoverFileObjects() {
// For local symbols we want to keep track of associated FILE symbol name for
// disambiguation by combined name.
- StringRef FileSymbolName;
bool SeenFileName = false;
- struct SymbolRefHash {
- size_t operator()(SymbolRef const &S) const {
- return std::hash<decltype(DataRefImpl::p)>{}(S.getRawDataRefImpl().p);
- }
- };
- std::unordered_map<SymbolRef, StringRef, SymbolRefHash> SymbolToFileName;
+ std::vector<std::pair<DataRefImpl, StringRef>> FileSymbols;
for (const ELFSymbolRef &Symbol : InputFile->symbols()) {
Expected<StringRef> NameOrError = Symbol.getName();
if (NameOrError && NameOrError->starts_with("__asan_init")) {
@@ -846,13 +840,10 @@ void RewriteInstance::discoverFileObjects() {
// and this uncertainty is causing havoc in function name matching.
if (Name == "ld-temp.o")
continue;
- FileSymbolName = Name;
+ FileSymbols.emplace_back(Symbol.getRawDataRefImpl(), Name);
SeenFileName = true;
continue;
}
- if (!FileSymbolName.empty() &&
- !(cantFail(Symbol.getFlags()) & SymbolRef::SF_Global))
- SymbolToFileName[Symbol] = FileSymbolName;
}
// Sort symbols in the file by value. Ignore symbols from non-allocatable
@@ -1027,14 +1018,18 @@ void RewriteInstance::discoverFileObjects() {
// The <id> field is used for disambiguation of local symbols since there
// could be identical function names coming from identical file names
// (e.g. from different directories).
- std::string AltPrefix;
- auto SFI = SymbolToFileName.find(Symbol);
- if (SymbolType == SymbolRef::ST_Function && SFI != SymbolToFileName.end())
- AltPrefix = Name + "/" + std::string(SFI->second);
+ auto CompareSymsByIdx = [](const std::pair<DataRefImpl, StringRef> &A,
+ const std::pair<DataRefImpl, StringRef> &B) {
+ return A.first.d.b < B.first.d.b;
+ };
+ DataRefImpl SymDataRef = Symbol.getRawDataRefImpl();
+ auto SFI = llvm::upper_bound(FileSymbols,
+ std::make_pair(SymDataRef, StringRef()),
+ CompareSymsByIdx);
+ if (SymbolType == SymbolRef::ST_Function && SFI != FileSymbols.begin())
+ AlternativeName = NR.uniquify(Name + "/" + SFI[-1].second.str());
UniqueName = NR.uniquify(Name);
- if (!AltPrefix.empty())
- AlternativeName = NR.uniquify(AltPrefix);
}
uint64_t SymbolSize = ELFSymbolRef(Symbol).getSize();
>From bcca1d20d4c4e7ecf73babc493c5c475215bbede Mon Sep 17 00:00:00 2001
From: Amir Ayupov <amir.aupov at gmail.com>
Date: Sun, 5 May 2024 13:49:52 -0700
Subject: [PATCH 2/3] Rebase on top of PR#89648
---
bolt/lib/Rewrite/RewriteInstance.cpp | 29 ++++++----------------------
1 file changed, 6 insertions(+), 23 deletions(-)
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 8578c2c6c8edcc..800a568db001cf 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -813,8 +813,6 @@ void RewriteInstance::discoverFileObjects() {
// For local symbols we want to keep track of associated FILE symbol name for
// disambiguation by combined name.
- bool SeenFileName = false;
- std::vector<std::pair<DataRefImpl, StringRef>> FileSymbols;
for (const ELFSymbolRef &Symbol : InputFile->symbols()) {
Expected<StringRef> NameOrError = Symbol.getName();
if (NameOrError && NameOrError->starts_with("__asan_init")) {
@@ -833,18 +831,8 @@ void RewriteInstance::discoverFileObjects() {
if (cantFail(Symbol.getFlags()) & SymbolRef::SF_Undefined)
continue;
- if (cantFail(Symbol.getType()) == SymbolRef::ST_File) {
+ if (cantFail(Symbol.getType()) == SymbolRef::ST_File)
FileSymbols.emplace_back(Symbol);
- StringRef Name =
- cantFail(std::move(NameOrError), "cannot get symbol name for file");
- // Ignore Clang LTO artificial FILE symbol as it is not always generated,
- // and this uncertainty is causing havoc in function name matching.
- if (Name == "ld-temp.o")
- continue;
- FileSymbols.emplace_back(Symbol.getRawDataRefImpl(), Name);
- SeenFileName = true;
- continue;
- }
}
// Sort symbols in the file by value. Ignore symbols from non-allocatable
@@ -1019,16 +1007,11 @@ void RewriteInstance::discoverFileObjects() {
// The <id> field is used for disambiguation of local symbols since there
// could be identical function names coming from identical file names
// (e.g. from different directories).
- auto CompareSymsByIdx = [](const std::pair<DataRefImpl, StringRef> &A,
- const std::pair<DataRefImpl, StringRef> &B) {
- return A.first.d.b < B.first.d.b;
- };
- DataRefImpl SymDataRef = Symbol.getRawDataRefImpl();
- auto SFI = llvm::upper_bound(FileSymbols,
- std::make_pair(SymDataRef, StringRef()),
- CompareSymsByIdx);
- if (SymbolType == SymbolRef::ST_Function && SFI != FileSymbols.begin())
- AlternativeName = NR.uniquify(Name + "/" + SFI[-1].second.str());
+ auto SFI = llvm::upper_bound(FileSymbols, ELFSymbolRef(Symbol));
+ if (SymbolType == SymbolRef::ST_Function && SFI != FileSymbols.begin()) {
+ StringRef FileSymbolName = cantFail(SFI[-1].getName());
+ AlternativeName = NR.uniquify(Name + "/" + FileSymbolName.str());
+ }
UniqueName = NR.uniquify(Name);
}
>From dfabf57e84fc898336126b7e61f5e6e6e459c21e Mon Sep 17 00:00:00 2001
From: Amir Ayupov <amir.aupov at gmail.com>
Date: Sun, 5 May 2024 13:52:45 -0700
Subject: [PATCH 3/3] Fix omission
---
bolt/lib/Rewrite/RewriteInstance.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 800a568db001cf..2ab2cc332d94b0 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -1264,7 +1264,7 @@ void RewriteInstance::discoverFileObjects() {
FDE->getAddressRange());
}
- BC->setHasSymbolsWithFileName(SeenFileName);
+ BC->setHasSymbolsWithFileName(FileSymbols.size());
// Now that all the functions were created - adjust their boundaries.
adjustFunctionBoundaries();
More information about the llvm-commits
mailing list