[lld] 64498c5 - [LTO][ELF][lld] Use unique string saver in ELF bitcode symbol parsing (#106670)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 5 14:49:07 PDT 2024
Author: Mingming Liu
Date: 2024-09-05T14:49:03-07:00
New Revision: 64498c54831bed9cf069e0923b9b73678c6451d8
URL: https://github.com/llvm/llvm-project/commit/64498c54831bed9cf069e0923b9b73678c6451d8
DIFF: https://github.com/llvm/llvm-project/commit/64498c54831bed9cf069e0923b9b73678c6451d8.diff
LOG: [LTO][ELF][lld] Use unique string saver in ELF bitcode symbol parsing (#106670)
lld ELF
[BitcodeFile](https://github.com/llvm/llvm-project/blob/a527248a3c2d638b0c92a06992f3f1c1f80842ad/lld/ELF/InputFiles.h#L328)
uses [string
saver](https://github.com/llvm/llvm-project/blob/a527248a3c2d638b0c92a06992f3f1c1f80842ad/lld/include/lld/Common/CommonLinkerContext.h#L57)
to keep copies of bitcode symbols. Symbol duplication is very common
when compiling application binaries.
This change proposes to introduce a UniqueStringSaver in lld context and
use it for bitcode symbol parsing. The implementation covers ELF only.
Similar opportunities should exist on other (COFF, MachO, wasm) formats.
For an internal production binary where lto indexing takes ~10GiB
originally, this changes optimizes away ~800MiB (~7.8%), measured by
https://github.com/google/pprof. Flame graph breaks down memory by usage
call stacks and agrees with this measurement.
Added:
Modified:
lld/ELF/InputFiles.cpp
lld/include/lld/Common/CommonLinkerContext.h
Removed:
################################################################################
diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index 7adc35f20984a5..1570adf1370930 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -1744,8 +1744,10 @@ createBitcodeSymbol(Symbol *&sym, const std::vector<bool> &keptComdats,
uint8_t type = objSym.isTLS() ? STT_TLS : STT_NOTYPE;
uint8_t visibility = mapVisibility(objSym.getVisibility());
+ // Symbols can be duplicated in bitcode files because of '#include' and
+ // linkonce_odr. Use unique_saver to save symbol names for de-duplication.
if (!sym)
- sym = symtab.insert(saver().save(objSym.getName()));
+ sym = symtab.insert(unique_saver().save(objSym.getName()));
int c = objSym.getComdatIndex();
if (objSym.isUndefined() || (c != -1 && !keptComdats[c])) {
@@ -1797,7 +1799,9 @@ void BitcodeFile::parseLazy() {
symbols = std::make_unique<Symbol *[]>(numSymbols);
for (auto [i, irSym] : llvm::enumerate(obj->symbols()))
if (!irSym.isUndefined()) {
- auto *sym = symtab.insert(saver().save(irSym.getName()));
+ // Symbols can be duplicated in bitcode files because of '#include' and
+ // linkonce_odr. Use unique_saver to save symbol names for de-duplication.
+ auto *sym = symtab.insert(unique_saver().save(irSym.getName()));
sym->resolve(LazySymbol{*this});
symbols[i] = sym;
}
diff --git a/lld/include/lld/Common/CommonLinkerContext.h b/lld/include/lld/Common/CommonLinkerContext.h
index 0627bbdc8bd877..9970dfcb713f8d 100644
--- a/lld/include/lld/Common/CommonLinkerContext.h
+++ b/lld/include/lld/Common/CommonLinkerContext.h
@@ -38,6 +38,7 @@ class CommonLinkerContext {
llvm::BumpPtrAllocator bAlloc;
llvm::StringSaver saver{bAlloc};
+ llvm::UniqueStringSaver unique_saver{bAlloc};
llvm::DenseMap<void *, SpecificAllocBase *> instances;
ErrorHandler e;
@@ -54,8 +55,13 @@ template <typename T = CommonLinkerContext> T &context() {
bool hasContext();
-inline llvm::StringSaver &saver() { return context().saver; }
inline llvm::BumpPtrAllocator &bAlloc() { return context().bAlloc; }
+inline llvm::StringSaver &saver() { return context().saver; }
+inline llvm::UniqueStringSaver &unique_saver() {
+ // FIXME: Look into other places where duplications are common in saved
+ // strings and unique saver make sense.
+ return context().unique_saver;
+}
} // namespace lld
#endif
More information about the llvm-commits
mailing list