[llvm] [BOLT][NFCI] Use FileSymbols for local symbol disambiguation (PR #89088)

Amir Ayupov via llvm-commits llvm-commits at lists.llvm.org
Sun May 5 13:54:02 PDT 2024


https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/89088

>From cef8fc71eff387a530e23747f1f92fb6bb02bf24 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Wed, 17 Apr 2024 16:14:46 +0200
Subject: [PATCH 1/3] [BOLT][NFC] Keep FILE symbols in a vector

In discoverFileObjects, replace mapping from every local symbol to
associated file name with a vector of symbol data for FILE symbols
only. This cuts down on memory needed to resolve local file names.

Test Plan: NFC
---
 bolt/lib/Rewrite/RewriteInstance.cpp | 29 ++++++++++++----------------
 1 file changed, 12 insertions(+), 17 deletions(-)

diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 0c8ee0d417233b..463249bb789259 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -813,14 +813,8 @@ void RewriteInstance::discoverFileObjects() {
 
   // For local symbols we want to keep track of associated FILE symbol name for
   // disambiguation by combined name.
-  StringRef FileSymbolName;
   bool SeenFileName = false;
-  struct SymbolRefHash {
-    size_t operator()(SymbolRef const &S) const {
-      return std::hash<decltype(DataRefImpl::p)>{}(S.getRawDataRefImpl().p);
-    }
-  };
-  std::unordered_map<SymbolRef, StringRef, SymbolRefHash> SymbolToFileName;
+  std::vector<std::pair<DataRefImpl, StringRef>> FileSymbols;
   for (const ELFSymbolRef &Symbol : InputFile->symbols()) {
     Expected<StringRef> NameOrError = Symbol.getName();
     if (NameOrError && NameOrError->starts_with("__asan_init")) {
@@ -846,13 +840,10 @@ void RewriteInstance::discoverFileObjects() {
       // and this uncertainty is causing havoc in function name matching.
       if (Name == "ld-temp.o")
         continue;
-      FileSymbolName = Name;
+      FileSymbols.emplace_back(Symbol.getRawDataRefImpl(), Name);
       SeenFileName = true;
       continue;
     }
-    if (!FileSymbolName.empty() &&
-        !(cantFail(Symbol.getFlags()) & SymbolRef::SF_Global))
-      SymbolToFileName[Symbol] = FileSymbolName;
   }
 
   // Sort symbols in the file by value. Ignore symbols from non-allocatable
@@ -1027,14 +1018,18 @@ void RewriteInstance::discoverFileObjects() {
       // The <id> field is used for disambiguation of local symbols since there
       // could be identical function names coming from identical file names
       // (e.g. from different directories).
-      std::string AltPrefix;
-      auto SFI = SymbolToFileName.find(Symbol);
-      if (SymbolType == SymbolRef::ST_Function && SFI != SymbolToFileName.end())
-        AltPrefix = Name + "/" + std::string(SFI->second);
+      auto CompareSymsByIdx = [](const std::pair<DataRefImpl, StringRef> &A,
+                                 const std::pair<DataRefImpl, StringRef> &B) {
+        return A.first.d.b < B.first.d.b;
+      };
+      DataRefImpl SymDataRef = Symbol.getRawDataRefImpl();
+      auto SFI = llvm::upper_bound(FileSymbols,
+                                   std::make_pair(SymDataRef, StringRef()),
+                                   CompareSymsByIdx);
+      if (SymbolType == SymbolRef::ST_Function && SFI != FileSymbols.begin())
+        AlternativeName = NR.uniquify(Name + "/" + SFI[-1].second.str());
 
       UniqueName = NR.uniquify(Name);
-      if (!AltPrefix.empty())
-        AlternativeName = NR.uniquify(AltPrefix);
     }
 
     uint64_t SymbolSize = ELFSymbolRef(Symbol).getSize();

>From bcca1d20d4c4e7ecf73babc493c5c475215bbede Mon Sep 17 00:00:00 2001
From: Amir Ayupov <amir.aupov at gmail.com>
Date: Sun, 5 May 2024 13:49:52 -0700
Subject: [PATCH 2/3] Rebase on top of PR#89648

---
 bolt/lib/Rewrite/RewriteInstance.cpp | 29 ++++++----------------------
 1 file changed, 6 insertions(+), 23 deletions(-)

diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 8578c2c6c8edcc..800a568db001cf 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -813,8 +813,6 @@ void RewriteInstance::discoverFileObjects() {
 
   // For local symbols we want to keep track of associated FILE symbol name for
   // disambiguation by combined name.
-  bool SeenFileName = false;
-  std::vector<std::pair<DataRefImpl, StringRef>> FileSymbols;
   for (const ELFSymbolRef &Symbol : InputFile->symbols()) {
     Expected<StringRef> NameOrError = Symbol.getName();
     if (NameOrError && NameOrError->starts_with("__asan_init")) {
@@ -833,18 +831,8 @@ void RewriteInstance::discoverFileObjects() {
     if (cantFail(Symbol.getFlags()) & SymbolRef::SF_Undefined)
       continue;
 
-    if (cantFail(Symbol.getType()) == SymbolRef::ST_File) {
+    if (cantFail(Symbol.getType()) == SymbolRef::ST_File)
       FileSymbols.emplace_back(Symbol);
-      StringRef Name =
-          cantFail(std::move(NameOrError), "cannot get symbol name for file");
-      // Ignore Clang LTO artificial FILE symbol as it is not always generated,
-      // and this uncertainty is causing havoc in function name matching.
-      if (Name == "ld-temp.o")
-        continue;
-      FileSymbols.emplace_back(Symbol.getRawDataRefImpl(), Name);
-      SeenFileName = true;
-      continue;
-    }
   }
 
   // Sort symbols in the file by value. Ignore symbols from non-allocatable
@@ -1019,16 +1007,11 @@ void RewriteInstance::discoverFileObjects() {
       // The <id> field is used for disambiguation of local symbols since there
       // could be identical function names coming from identical file names
       // (e.g. from different directories).
-      auto CompareSymsByIdx = [](const std::pair<DataRefImpl, StringRef> &A,
-                                 const std::pair<DataRefImpl, StringRef> &B) {
-        return A.first.d.b < B.first.d.b;
-      };
-      DataRefImpl SymDataRef = Symbol.getRawDataRefImpl();
-      auto SFI = llvm::upper_bound(FileSymbols,
-                                   std::make_pair(SymDataRef, StringRef()),
-                                   CompareSymsByIdx);
-      if (SymbolType == SymbolRef::ST_Function && SFI != FileSymbols.begin())
-        AlternativeName = NR.uniquify(Name + "/" + SFI[-1].second.str());
+      auto SFI = llvm::upper_bound(FileSymbols, ELFSymbolRef(Symbol));
+      if (SymbolType == SymbolRef::ST_Function && SFI != FileSymbols.begin()) {
+        StringRef FileSymbolName = cantFail(SFI[-1].getName());
+        AlternativeName = NR.uniquify(Name + "/" + FileSymbolName.str());
+      }
 
       UniqueName = NR.uniquify(Name);
     }

>From dfabf57e84fc898336126b7e61f5e6e6e459c21e Mon Sep 17 00:00:00 2001
From: Amir Ayupov <amir.aupov at gmail.com>
Date: Sun, 5 May 2024 13:52:45 -0700
Subject: [PATCH 3/3] Fix omission

---
 bolt/lib/Rewrite/RewriteInstance.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 800a568db001cf..2ab2cc332d94b0 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -1264,7 +1264,7 @@ void RewriteInstance::discoverFileObjects() {
                              FDE->getAddressRange());
   }
 
-  BC->setHasSymbolsWithFileName(SeenFileName);
+  BC->setHasSymbolsWithFileName(FileSymbols.size());
 
   // Now that all the functions were created - adjust their boundaries.
   adjustFunctionBoundaries();



More information about the llvm-commits mailing list