[lld] [llvm] Re-apply "[NFCI][LTO][lld] Optimize away symbol copies within LTO global resolution in ELF" (PR #107792)

Mingming Liu via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 9 10:14:21 PDT 2024


https://github.com/minglotus-6 updated https://github.com/llvm/llvm-project/pull/107792

>From a01e4c9bc1fb4788d3d0a566c2f91cac489a13e6 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Tue, 27 Aug 2024 00:12:13 -0700
Subject: [PATCH 1/8] [NFCI]Use DenseMap<StringRef, ValueType> for global
 resolution

---
 llvm/include/llvm/LTO/LTO.h | 6 +++++-
 llvm/lib/LTO/LTO.cpp        | 3 ++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h
index 30eda34cd7ba54..79902db353443e 100644
--- a/llvm/include/llvm/LTO/LTO.h
+++ b/llvm/include/llvm/LTO/LTO.h
@@ -15,6 +15,9 @@
 #ifndef LLVM_LTO_LTO_H
 #define LLVM_LTO_LTO_H
 
+#include <memory>
+
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/Bitcode/BitcodeReader.h"
@@ -408,7 +411,8 @@ class LTO {
   // Global mapping from mangled symbol names to resolutions.
   // Make this an optional to guard against accessing after it has been reset
   // (to reduce memory after we're done with it).
-  std::optional<StringMap<GlobalResolution>> GlobalResolutions;
+  std::unique_ptr<llvm::DenseMap<StringRef, GlobalResolution>>
+      GlobalResolutions;
 
   void addModuleToGlobalRes(ArrayRef<InputFile::Symbol> Syms,
                             ArrayRef<SymbolResolution> Res, unsigned Partition,
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index bb3c9f7acdb8e5..ba3336bdd31206 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -607,7 +607,8 @@ LTO::LTO(Config Conf, ThinBackend Backend,
     : Conf(std::move(Conf)),
       RegularLTO(ParallelCodeGenParallelismLevel, this->Conf),
       ThinLTO(std::move(Backend)),
-      GlobalResolutions(std::make_optional<StringMap<GlobalResolution>>()),
+      GlobalResolutions(
+          std::make_unique<DenseMap<StringRef, GlobalResolution>>()),
       LTOMode(LTOMode) {}
 
 // Requires a destructor for MapVector<BitcodeModule>.

>From 6d50637f8ee1028b8350cc82e23eec4510ea1a28 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Tue, 27 Aug 2024 15:57:00 -0700
Subject: [PATCH 2/8] Make it optional for LTO to keep copies of symbol strings

---
 lld/ELF/LTO.cpp                       |  1 +
 llvm/include/llvm/LTO/Config.h        |  5 +++++
 llvm/include/llvm/LTO/LTO.h           | 10 +++++++++-
 llvm/include/llvm/Support/Allocator.h |  1 +
 llvm/lib/LTO/LTO.cpp                  |  9 ++++++++-
 5 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp
index 935d0a9eab9ee0..f339f1c2c0ec21 100644
--- a/lld/ELF/LTO.cpp
+++ b/lld/ELF/LTO.cpp
@@ -135,6 +135,7 @@ static lto::Config createConfig() {
       config->ltoValidateAllVtablesHaveTypeInfos;
   c.AllVtablesHaveTypeInfos = ctx.ltoAllVtablesHaveTypeInfos;
   c.AlwaysEmitRegularLTOObj = !config->ltoObjPath.empty();
+  c.KeepSymbolNameCopies = false;
 
   for (const llvm::StringRef &name : config->thinLTOModulesToCompile)
     c.ThinLTOModulesToCompile.emplace_back(name);
diff --git a/llvm/include/llvm/LTO/Config.h b/llvm/include/llvm/LTO/Config.h
index 482b6e55a19d35..a49cce9f30e20c 100644
--- a/llvm/include/llvm/LTO/Config.h
+++ b/llvm/include/llvm/LTO/Config.h
@@ -88,6 +88,11 @@ struct Config {
   /// want to know a priori all possible output files.
   bool AlwaysEmitRegularLTOObj = false;
 
+  /// If true, the LTO instance creates copies of the symbol names for LTO::run.
+  /// The lld linker uses string saver to keep symbol names alive and doesn't
+  /// need to create copies, so it can set this field to false.
+  bool KeepSymbolNameCopies = true;
+
   /// Allows non-imported definitions to get the potentially more constraining
   /// visibility from the prevailing definition. FromPrevailing is the default
   /// because it works for many binary formats. ELF can use the more optimized
diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h
index 79902db353443e..f2dc146c0bc501 100644
--- a/llvm/include/llvm/LTO/LTO.h
+++ b/llvm/include/llvm/LTO/LTO.h
@@ -26,6 +26,7 @@
 #include "llvm/Object/IRSymtab.h"
 #include "llvm/Support/Caching.h"
 #include "llvm/Support/Error.h"
+#include "llvm/Support/StringSaver.h"
 #include "llvm/Support/thread.h"
 #include "llvm/Transforms/IPO/FunctionAttrs.h"
 #include "llvm/Transforms/IPO/FunctionImport.h"
@@ -39,6 +40,7 @@ class MemoryBufferRef;
 class Module;
 class raw_pwrite_stream;
 class ToolOutputFile;
+class UniqueStringSaver;
 
 /// Resolve linkage for prevailing symbols in the \p Index. Linkage changes
 /// recorded in the index and the ThinLTO backends must apply the changes to
@@ -351,6 +353,12 @@ class LTO {
     DenseMap<GlobalValue::GUID, StringRef> PrevailingModuleForGUID;
   } ThinLTO;
 
+  std::unique_ptr<llvm::BumpPtrAllocator> Alloc =
+      std::make_unique<BumpPtrAllocator>();
+
+  std::unique_ptr<llvm::UniqueStringSaver> UniqueSymbolSaver =
+      std::make_unique<llvm::UniqueStringSaver>(*Alloc);
+
   // The global resolution for a particular (mangled) symbol name. This is in
   // particular necessary to track whether each symbol can be internalized.
   // Because any input file may introduce a new cross-partition reference, we
@@ -409,7 +417,7 @@ class LTO {
   };
 
   // Global mapping from mangled symbol names to resolutions.
-  // Make this an optional to guard against accessing after it has been reset
+  // Make this an unique_ptr to guard against accessing after it has been reset
   // (to reduce memory after we're done with it).
   std::unique_ptr<llvm::DenseMap<StringRef, GlobalResolution>>
       GlobalResolutions;
diff --git a/llvm/include/llvm/Support/Allocator.h b/llvm/include/llvm/Support/Allocator.h
index 568f0d34032fa2..3c54556bfefd34 100644
--- a/llvm/include/llvm/Support/Allocator.h
+++ b/llvm/include/llvm/Support/Allocator.h
@@ -22,6 +22,7 @@
 #include "llvm/Support/AllocatorBase.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cassert>
 #include <cstddef>
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index ba3336bdd31206..1ad209db056f0a 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -627,7 +627,11 @@ void LTO::addModuleToGlobalRes(ArrayRef<InputFile::Symbol> Syms,
     assert(ResI != ResE);
     SymbolResolution Res = *ResI++;
 
-    auto &GlobalRes = (*GlobalResolutions)[Sym.getName()];
+    StringRef SymbolName = Sym.getName();
+    if (Conf.KeepSymbolNameCopies)
+      SymbolName = UniqueSymbolSaver->save(SymbolName);
+
+    auto &GlobalRes = (*GlobalResolutions)[SymbolName];
     GlobalRes.UnnamedAddr &= Sym.isUnnamedAddr();
     if (Res.Prevailing) {
       assert(!GlobalRes.Prevailing &&
@@ -1798,6 +1802,9 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
   // cross module importing, which adds to peak memory via the computed import
   // and export lists.
   GlobalResolutions.reset();
+  // Reset the bump pointer allocator to release its memory.
+  UniqueSymbolSaver.reset();
+  Alloc.reset();
 
   if (Conf.OptLevel > 0)
     ComputeCrossModuleImport(ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries,

>From c59fff7a80f6e00d26f07808028101ea7cf3d5d7 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Thu, 29 Aug 2024 22:39:38 -0700
Subject: [PATCH 3/8] Re-use GlobalResolution map for key de-duplication

---
 llvm/include/llvm/LTO/LTO.h |  7 +++----
 llvm/lib/LTO/LTO.cpp        | 18 ++++++++++++++----
 2 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h
index f2dc146c0bc501..7282e7bbcf451c 100644
--- a/llvm/include/llvm/LTO/LTO.h
+++ b/llvm/include/llvm/LTO/LTO.h
@@ -353,11 +353,10 @@ class LTO {
     DenseMap<GlobalValue::GUID, StringRef> PrevailingModuleForGUID;
   } ThinLTO;
 
-  std::unique_ptr<llvm::BumpPtrAllocator> Alloc =
-      std::make_unique<BumpPtrAllocator>();
+  std::unique_ptr<llvm::BumpPtrAllocator> Alloc;
 
-  std::unique_ptr<llvm::UniqueStringSaver> UniqueSymbolSaver =
-      std::make_unique<llvm::UniqueStringSaver>(*Alloc);
+  // Symbol saver for global resolution map.
+  std::unique_ptr<llvm::StringSaver> GlobalResolutionSymbolSaver;
 
   // The global resolution for a particular (mangled) symbol name. This is in
   // particular necessary to track whether each symbol can be internalized.
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index 1ad209db056f0a..dcba01d4cc05ee 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -78,6 +78,10 @@ cl::opt<bool> EnableLTOInternalization(
     "enable-lto-internalization", cl::init(true), cl::Hidden,
     cl::desc("Enable global value internalization in LTO"));
 
+static cl::opt<bool>
+    LTOKeepSymbolCopies("lto-keep-symbol-copies", cl::init(false), cl::Hidden,
+                        cl::desc("Keep copies of symbols in LTO indexing"));
+
 /// Indicate we are linking with an allocator that supports hot/cold operator
 /// new interfaces.
 extern cl::opt<bool> SupportsHotColdNew;
@@ -609,7 +613,12 @@ LTO::LTO(Config Conf, ThinBackend Backend,
       ThinLTO(std::move(Backend)),
       GlobalResolutions(
           std::make_unique<DenseMap<StringRef, GlobalResolution>>()),
-      LTOMode(LTOMode) {}
+      LTOMode(LTOMode) {
+  if (Conf.KeepSymbolNameCopies || LTOKeepSymbolCopies) {
+    Alloc = std::make_unique<BumpPtrAllocator>();
+    GlobalResolutionSymbolSaver = std::make_unique<llvm::StringSaver>(*Alloc);
+  }
+}
 
 // Requires a destructor for MapVector<BitcodeModule>.
 LTO::~LTO() = default;
@@ -628,8 +637,9 @@ void LTO::addModuleToGlobalRes(ArrayRef<InputFile::Symbol> Syms,
     SymbolResolution Res = *ResI++;
 
     StringRef SymbolName = Sym.getName();
-    if (Conf.KeepSymbolNameCopies)
-      SymbolName = UniqueSymbolSaver->save(SymbolName);
+    // Keep copies of symbols if the client of LTO says so.
+    if (GlobalResolutionSymbolSaver && !GlobalResolutions->contains(SymbolName))
+      SymbolName = GlobalResolutionSymbolSaver->save(SymbolName);
 
     auto &GlobalRes = (*GlobalResolutions)[SymbolName];
     GlobalRes.UnnamedAddr &= Sym.isUnnamedAddr();
@@ -1803,7 +1813,7 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
   // and export lists.
   GlobalResolutions.reset();
   // Reset the bump pointer allocator to release its memory.
-  UniqueSymbolSaver.reset();
+  GlobalResolutionSymbolSaver.reset();
   Alloc.reset();
 
   if (Conf.OptLevel > 0)

>From b169ccbe36d0fe37cbb40e53d15beaf3f33c6ee3 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Fri, 30 Aug 2024 10:06:50 -0700
Subject: [PATCH 4/8] resolve comments and update code comment

---
 llvm/include/llvm/LTO/LTO.h           | 1 -
 llvm/include/llvm/Support/Allocator.h | 1 -
 llvm/lib/LTO/LTO.cpp                  | 2 +-
 3 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h
index 7282e7bbcf451c..8f8af8ed6027de 100644
--- a/llvm/include/llvm/LTO/LTO.h
+++ b/llvm/include/llvm/LTO/LTO.h
@@ -40,7 +40,6 @@ class MemoryBufferRef;
 class Module;
 class raw_pwrite_stream;
 class ToolOutputFile;
-class UniqueStringSaver;
 
 /// Resolve linkage for prevailing symbols in the \p Index. Linkage changes
 /// recorded in the index and the ThinLTO backends must apply the changes to
diff --git a/llvm/include/llvm/Support/Allocator.h b/llvm/include/llvm/Support/Allocator.h
index 3c54556bfefd34..568f0d34032fa2 100644
--- a/llvm/include/llvm/Support/Allocator.h
+++ b/llvm/include/llvm/Support/Allocator.h
@@ -22,7 +22,6 @@
 #include "llvm/Support/AllocatorBase.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cassert>
 #include <cstddef>
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index dcba01d4cc05ee..41e73e222c5e20 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -1812,7 +1812,7 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
   // cross module importing, which adds to peak memory via the computed import
   // and export lists.
   GlobalResolutions.reset();
-  // Reset the bump pointer allocator to release its memory.
+  // Release the string saver memory.
   GlobalResolutionSymbolSaver.reset();
   Alloc.reset();
 

>From dbfb00c119888ccab09fbdb6d201301049174200 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Thu, 5 Sep 2024 09:28:27 -0700
Subject: [PATCH 5/8] resolve review comments

---
 lld/ELF/InputFiles.cpp                       |  9 +++++++--
 lld/include/lld/Common/CommonLinkerContext.h |  4 ++++
 llvm/include/llvm/LTO/LTO.h                  |  2 ++
 llvm/lib/LTO/LTO.cpp                         | 13 +++++++++----
 4 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index 7adc35f20984a5..563fb226ba9fbf 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -1745,7 +1745,7 @@ createBitcodeSymbol(Symbol *&sym, const std::vector<bool> &keptComdats,
   uint8_t visibility = mapVisibility(objSym.getVisibility());
 
   if (!sym)
-    sym = symtab.insert(saver().save(objSym.getName()));
+    sym = symtab.insert(unique_saver().save(objSym.getName()));
 
   int c = objSym.getComdatIndex();
   if (objSym.isUndefined() || (c != -1 && !keptComdats[c])) {
@@ -1797,9 +1797,14 @@ void BitcodeFile::parseLazy() {
   symbols = std::make_unique<Symbol *[]>(numSymbols);
   for (auto [i, irSym] : llvm::enumerate(obj->symbols()))
     if (!irSym.isUndefined()) {
-      auto *sym = symtab.insert(saver().save(irSym.getName()));
+      auto *sym = symtab.insert(unique_saver().save(irSym.getName()));
       sym->resolve(LazySymbol{*this});
       symbols[i] = sym;
+    } else {
+      // Keep copies of per-module undefined symbols for LTO::GlobalResolutions
+      // usage.
+      [[maybe_unused]] StringRef SymbolRef =
+          unique_saver().save(irSym.getName());
     }
 }
 
diff --git a/lld/include/lld/Common/CommonLinkerContext.h b/lld/include/lld/Common/CommonLinkerContext.h
index 0627bbdc8bd877..c8f55a781e4265 100644
--- a/lld/include/lld/Common/CommonLinkerContext.h
+++ b/lld/include/lld/Common/CommonLinkerContext.h
@@ -38,6 +38,7 @@ class CommonLinkerContext {
 
   llvm::BumpPtrAllocator bAlloc;
   llvm::StringSaver saver{bAlloc};
+  llvm::UniqueStringSaver unique_saver{bAlloc};
   llvm::DenseMap<void *, SpecificAllocBase *> instances;
 
   ErrorHandler e;
@@ -55,6 +56,9 @@ template <typename T = CommonLinkerContext> T &context() {
 bool hasContext();
 
 inline llvm::StringSaver &saver() { return context().saver; }
+inline llvm::UniqueStringSaver &unique_saver() {
+  return context().unique_saver;
+}
 inline llvm::BumpPtrAllocator &bAlloc() { return context().bAlloc; }
 } // namespace lld
 
diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h
index 8f8af8ed6027de..e8ab94f84f3426 100644
--- a/llvm/include/llvm/LTO/LTO.h
+++ b/llvm/include/llvm/LTO/LTO.h
@@ -420,6 +420,8 @@ class LTO {
   std::unique_ptr<llvm::DenseMap<StringRef, GlobalResolution>>
       GlobalResolutions;
 
+  void releaseGlobalResolutionsMemory();
+
   void addModuleToGlobalRes(ArrayRef<InputFile::Symbol> Syms,
                             ArrayRef<SymbolResolution> Res, unsigned Partition,
                             bool InSummary);
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index 41e73e222c5e20..13324db1f9be27 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -695,6 +695,14 @@ void LTO::addModuleToGlobalRes(ArrayRef<InputFile::Symbol> Syms,
   }
 }
 
+void LTO::releaseGlobalResolutionsMemory() {
+  // Release GlobalResolutions dense-map itself.
+  GlobalResolutions.reset();
+  // Release the string saver memory.
+  GlobalResolutionSymbolSaver.reset();
+  Alloc.reset();
+}
+
 static void writeToResolutionFile(raw_ostream &OS, InputFile *Input,
                                   ArrayRef<SymbolResolution> Res) {
   StringRef Path = Input->getName();
@@ -1811,10 +1819,7 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
   // are no further accesses. We specifically want to do this before computing
   // cross module importing, which adds to peak memory via the computed import
   // and export lists.
-  GlobalResolutions.reset();
-  // Release the string saver memory.
-  GlobalResolutionSymbolSaver.reset();
-  Alloc.reset();
+  releaseGlobalResolutionsMemory();
 
   if (Conf.OptLevel > 0)
     ComputeCrossModuleImport(ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries,

>From 106f91c2f252208547c5669f45e6e68f66ebc54c Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Fri, 6 Sep 2024 17:57:29 -0700
Subject: [PATCH 6/8] resolve comments

---
 lld/ELF/InputFiles.cpp      |  3 +--
 llvm/include/llvm/LTO/LTO.h | 11 ++++++-----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index 47ca4eac044684..da69c4882ead21 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -1807,8 +1807,7 @@ void BitcodeFile::parseLazy() {
     } else {
       // Keep copies of per-module undefined symbols for LTO::GlobalResolutions
       // usage.
-      [[maybe_unused]] StringRef SymbolRef =
-          unique_saver().save(irSym.getName());
+      unique_saver().save(irSym.getName());
     }
 }
 
diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h
index e970a6cb8e3cdb..782f37dc8d4404 100644
--- a/llvm/include/llvm/LTO/LTO.h
+++ b/llvm/include/llvm/LTO/LTO.h
@@ -350,11 +350,6 @@ class LTO {
     DenseMap<GlobalValue::GUID, StringRef> PrevailingModuleForGUID;
   } ThinLTO;
 
-  std::unique_ptr<llvm::BumpPtrAllocator> Alloc;
-
-  // Symbol saver for global resolution map.
-  std::unique_ptr<llvm::StringSaver> GlobalResolutionSymbolSaver;
-
   // The global resolution for a particular (mangled) symbol name. This is in
   // particular necessary to track whether each symbol can be internalized.
   // Because any input file may introduce a new cross-partition reference, we
@@ -412,6 +407,12 @@ class LTO {
     };
   };
 
+  // GlobalResolutionSymbolSaver allocator.
+  std::unique_ptr<llvm::BumpPtrAllocator> Alloc;
+
+  // Symbol saver for global resolution map.
+  std::unique_ptr<llvm::StringSaver> GlobalResolutionSymbolSaver;
+
   // Global mapping from mangled symbol names to resolutions.
   // Make this an unique_ptr to guard against accessing after it has been reset
   // (to reduce memory after we're done with it).

>From 9776ed44cfb26172480145aed8f59ba78a6fa2ea Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Sun, 8 Sep 2024 17:49:01 -0700
Subject: [PATCH 7/8] fix use-after-free

---
 lld/ELF/InputFiles.cpp              | 18 ++++++++++--------
 llvm/include/llvm/LTO/LTO.h         |  2 +-
 llvm/include/llvm/Object/IRSymtab.h |  3 ++-
 3 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index da69c4882ead21..cad9ec322d1385 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -1746,8 +1746,10 @@ createBitcodeSymbol(Symbol *&sym, const std::vector<bool> &keptComdats,
 
   // Symbols can be duplicated in bitcode files because of '#include' and
   // linkonce_odr. Use unique_saver to save symbol names for de-duplication.
-  if (!sym)
-    sym = symtab.insert(unique_saver().save(objSym.getName()));
+  if (!sym) {
+    objSym.Name = unique_saver().save(objSym.getName());
+    sym = symtab.insert(objSym.getName());
+  }
 
   int c = objSym.getComdatIndex();
   if (objSym.isUndefined() || (c != -1 && !keptComdats[c])) {
@@ -1797,18 +1799,18 @@ void BitcodeFile::parse() {
 void BitcodeFile::parseLazy() {
   numSymbols = obj->symbols().size();
   symbols = std::make_unique<Symbol *[]>(numSymbols);
-  for (auto [i, irSym] : llvm::enumerate(obj->symbols()))
+  for (auto [i, irSym] : llvm::enumerate(obj->symbols())) {
+    // Keep copies of per-module undefined symbols for LTO::GlobalResolutions
+    // usage.
+    irSym.Name = unique_saver().save(irSym.getName());
     if (!irSym.isUndefined()) {
       // Symbols can be duplicated in bitcode files because of '#include' and
       // linkonce_odr. Use unique_saver to save symbol names for de-duplication.
-      auto *sym = symtab.insert(unique_saver().save(irSym.getName()));
+      auto *sym = symtab.insert(irSym.getName());
       sym->resolve(LazySymbol{*this});
       symbols[i] = sym;
-    } else {
-      // Keep copies of per-module undefined symbols for LTO::GlobalResolutions
-      // usage.
-      unique_saver().save(irSym.getName());
     }
+  }
 }
 
 void BitcodeFile::postParse() {
diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h
index 782f37dc8d4404..e08dfbe1f236ee 100644
--- a/llvm/include/llvm/LTO/LTO.h
+++ b/llvm/include/llvm/LTO/LTO.h
@@ -138,7 +138,7 @@ class InputFile {
 
   /// The purpose of this class is to only expose the symbol information that an
   /// LTO client should need in order to do symbol resolution.
-  class Symbol : irsymtab::Symbol {
+  struct Symbol : irsymtab::Symbol {
     friend LTO;
 
   public:
diff --git a/llvm/include/llvm/Object/IRSymtab.h b/llvm/include/llvm/Object/IRSymtab.h
index 72a51ffa1022de..4e0013ea767e3e 100644
--- a/llvm/include/llvm/Object/IRSymtab.h
+++ b/llvm/include/llvm/Object/IRSymtab.h
@@ -169,7 +169,8 @@ Error build(ArrayRef<Module *> Mods, SmallVector<char, 0> &Symtab,
 /// possibly a storage::Uncommon.
 struct Symbol {
   // Copied from storage::Symbol.
-  StringRef Name, IRName;
+  mutable StringRef Name;
+  StringRef IRName;
   int ComdatIndex;
   uint32_t Flags;
 

>From b700ca1f10d1a0de7fd8080f37d4571b7aa17211 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Mon, 9 Sep 2024 10:13:57 -0700
Subject: [PATCH 8/8] resolve comments

---
 lld/ELF/InputFiles.cpp      | 16 ++++++++++------
 llvm/include/llvm/LTO/LTO.h |  4 ++--
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index cad9ec322d1385..db520178f3f534 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -1744,9 +1744,12 @@ createBitcodeSymbol(Symbol *&sym, const std::vector<bool> &keptComdats,
   uint8_t type = objSym.isTLS() ? STT_TLS : STT_NOTYPE;
   uint8_t visibility = mapVisibility(objSym.getVisibility());
 
-  // Symbols can be duplicated in bitcode files because of '#include' and
-  // linkonce_odr. Use unique_saver to save symbol names for de-duplication.
   if (!sym) {
+    // Symbols can be duplicated in bitcode files because of '#include' and
+    // linkonce_odr. Use unique_saver to save symbol names for de-duplication.
+    // Update objSym.Name to reference (via StringRef) the string saver's copy;
+    // this way LTO can reference the same string saver's copy rather than
+    // keeping copies of its own.
     objSym.Name = unique_saver().save(objSym.getName());
     sym = symtab.insert(objSym.getName());
   }
@@ -1800,12 +1803,13 @@ void BitcodeFile::parseLazy() {
   numSymbols = obj->symbols().size();
   symbols = std::make_unique<Symbol *[]>(numSymbols);
   for (auto [i, irSym] : llvm::enumerate(obj->symbols())) {
-    // Keep copies of per-module undefined symbols for LTO::GlobalResolutions
-    // usage.
+    // Symbols can be duplicated in bitcode files because of '#include' and
+    // linkonce_odr. Use unique_saver to save symbol names for de-duplication.
+    // Update objSym.Name to reference (via StringRef) the string saver's copy;
+    // this way LTO can reference the same string saver's copy rather than
+    // keeping copies of its own.
     irSym.Name = unique_saver().save(irSym.getName());
     if (!irSym.isUndefined()) {
-      // Symbols can be duplicated in bitcode files because of '#include' and
-      // linkonce_odr. Use unique_saver to save symbol names for de-duplication.
       auto *sym = symtab.insert(irSym.getName());
       sym->resolve(LazySymbol{*this});
       symbols[i] = sym;
diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h
index e08dfbe1f236ee..119f872b26c4ff 100644
--- a/llvm/include/llvm/LTO/LTO.h
+++ b/llvm/include/llvm/LTO/LTO.h
@@ -136,8 +136,8 @@ class InputFile {
   /// Create an InputFile.
   static Expected<std::unique_ptr<InputFile>> create(MemoryBufferRef Object);
 
-  /// The purpose of this class is to only expose the symbol information that an
-  /// LTO client should need in order to do symbol resolution.
+  /// The purpose of this struct is to only expose the symbol information that
+  /// an LTO client should need in order to do symbol resolution.
   struct Symbol : irsymtab::Symbol {
     friend LTO;
 



More information about the llvm-commits mailing list