[lld] [LTO][ELF][lld] Use unique saver in ELF bitcode symbol parsing (PR #106670)

via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 29 23:48:55 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-lld-elf

Author: Mingming Liu (minglotus-6)

<details>
<summary>Changes</summary>

 lld ELF [BitcodeFile](https://github.com/llvm/llvm-project/blob/a527248a3c2d638b0c92a06992f3f1c1f80842ad/lld/ELF/InputFiles.h#L328) uses [string saver](https://github.com/llvm/llvm-project/blob/a527248a3c2d638b0c92a06992f3f1c1f80842ad/lld/include/lld/Common/CommonLinkerContext.h#L57) to keep copies of bitcode symbols. Symbol duplication is very common when compiling application binaries.

This change proposes to introduce a UniqueStringSaver in lld context and use it for bitcode symbol parsing. The implementation covers ELF only. Similar opportunities should exist on other (COFF, MachO, wasm) formats.

For an internal production binary where lto indexing takes ~10GiB originally, this changes optimizes away ~800MiB (~7.8%), measured by https://github.com/google/pprof. Flame graph breaks down memory usage call stacks and agrees with this measurement.


---
Full diff: https://github.com/llvm/llvm-project/pull/106670.diff


3 Files Affected:

- (modified) lld/ELF/InputFiles.cpp (+20-4) 
- (modified) lld/ELF/InputFiles.h (+10) 
- (modified) lld/include/lld/Common/CommonLinkerContext.h (+5) 


``````````diff
diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index 7adc35f20984a5..ed946a91404088 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -1695,6 +1695,22 @@ static uint8_t getOsAbi(const Triple &t) {
   }
 }
 
+StringRef BitcodeFile::saved_symbol(const char *S) {
+  return unique_saver().save(S);
+}
+
+StringRef BitcodeFile::saved_symbol(StringRef S) {
+  return unique_saver().save(S);
+}
+
+StringRef BitcodeFile::saved_symbol(const Twine &S) {
+  return unique_saver().save(S);
+}
+
+StringRef BitcodeFile::saved_symbol(const std::string &S) {
+  return unique_saver().save(S);
+}
+
 BitcodeFile::BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
                          uint64_t offsetInArchive, bool lazy)
     : InputFile(BitcodeKind, mb) {
@@ -1712,8 +1728,8 @@ BitcodeFile::BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
   // symbols later in the link stage). So we append file offset to make
   // filename unique.
   StringRef name = archiveName.empty()
-                       ? saver().save(path)
-                       : saver().save(archiveName + "(" + path::filename(path) +
+                       ? saved_symbol(path)
+                       : saved_symbol(archiveName + "(" + path::filename(path) +
                                       " at " + utostr(offsetInArchive) + ")");
   MemoryBufferRef mbref(mb.getBuffer(), name);
 
@@ -1745,7 +1761,7 @@ createBitcodeSymbol(Symbol *&sym, const std::vector<bool> &keptComdats,
   uint8_t visibility = mapVisibility(objSym.getVisibility());
 
   if (!sym)
-    sym = symtab.insert(saver().save(objSym.getName()));
+    sym = symtab.insert(unique_saver().save(objSym.getName()));
 
   int c = objSym.getComdatIndex();
   if (objSym.isUndefined() || (c != -1 && !keptComdats[c])) {
@@ -1797,7 +1813,7 @@ void BitcodeFile::parseLazy() {
   symbols = std::make_unique<Symbol *[]>(numSymbols);
   for (auto [i, irSym] : llvm::enumerate(obj->symbols()))
     if (!irSym.isUndefined()) {
-      auto *sym = symtab.insert(saver().save(irSym.getName()));
+      auto *sym = symtab.insert(saved_symbol(irSym.getName()));
       sym->resolve(LazySymbol{*this});
       symbols[i] = sym;
     }
diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h
index b0a74877d0aaeb..28c4c1a1f92962 100644
--- a/lld/ELF/InputFiles.h
+++ b/lld/ELF/InputFiles.h
@@ -9,6 +9,8 @@
 #ifndef LLD_ELF_INPUT_FILES_H
 #define LLD_ELF_INPUT_FILES_H
 
+#include <string>
+
 #include "Config.h"
 #include "Symbols.h"
 #include "lld/Common/ErrorHandler.h"
@@ -23,6 +25,8 @@
 namespace llvm {
 struct DILineInfo;
 class TarWriter;
+class StringRef;
+class Twine;
 namespace lto {
 class InputFile;
 }
@@ -335,6 +339,12 @@ class BitcodeFile : public InputFile {
   void postParse();
   std::unique_ptr<llvm::lto::InputFile> obj;
   std::vector<bool> keptComdats;
+
+private:
+  StringRef saved_symbol(const char *S);
+  StringRef saved_symbol(StringRef S);
+  StringRef saved_symbol(const Twine &S);
+  StringRef saved_symbol(const std::string &S);
 };
 
 // .so file.
diff --git a/lld/include/lld/Common/CommonLinkerContext.h b/lld/include/lld/Common/CommonLinkerContext.h
index 0627bbdc8bd877..0c5349fbf16cd1 100644
--- a/lld/include/lld/Common/CommonLinkerContext.h
+++ b/lld/include/lld/Common/CommonLinkerContext.h
@@ -38,6 +38,7 @@ class CommonLinkerContext {
 
   llvm::BumpPtrAllocator bAlloc;
   llvm::StringSaver saver{bAlloc};
+  llvm::UniqueStringSaver unique_saver{bAlloc};
   llvm::DenseMap<void *, SpecificAllocBase *> instances;
 
   ErrorHandler e;
@@ -56,6 +57,10 @@ bool hasContext();
 
 inline llvm::StringSaver &saver() { return context().saver; }
 inline llvm::BumpPtrAllocator &bAlloc() { return context().bAlloc; }
+
+inline llvm::UniqueStringSaver &unique_saver() {
+  return context().unique_saver;
+}
 } // namespace lld
 
 #endif

``````````

</details>


https://github.com/llvm/llvm-project/pull/106670


More information about the llvm-commits mailing list