[lld] [LTO][ELF][lld] Use unique string saver in ELF bitcode symbol parsing (PR #106670)
Mingming Liu via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 5 00:47:46 PDT 2024
https://github.com/minglotus-6 updated https://github.com/llvm/llvm-project/pull/106670
>From 7074440477eb9a689ff0210b2225c5a0e0b53c7d Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Thu, 29 Aug 2024 23:05:14 -0700
Subject: [PATCH 1/2] [lld][ELF]Use unique saver in bitcode symbol parsing
---
lld/ELF/InputFiles.cpp | 24 ++++++++++++++++----
lld/ELF/InputFiles.h | 10 ++++++++
lld/include/lld/Common/CommonLinkerContext.h | 5 ++++
3 files changed, 35 insertions(+), 4 deletions(-)
diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index 7adc35f20984a5..ed946a91404088 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -1695,6 +1695,22 @@ static uint8_t getOsAbi(const Triple &t) {
}
}
+StringRef BitcodeFile::saved_symbol(const char *S) {
+ return unique_saver().save(S);
+}
+
+StringRef BitcodeFile::saved_symbol(StringRef S) {
+ return unique_saver().save(S);
+}
+
+StringRef BitcodeFile::saved_symbol(const Twine &S) {
+ return unique_saver().save(S);
+}
+
+StringRef BitcodeFile::saved_symbol(const std::string &S) {
+ return unique_saver().save(S);
+}
+
BitcodeFile::BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
uint64_t offsetInArchive, bool lazy)
: InputFile(BitcodeKind, mb) {
@@ -1712,8 +1728,8 @@ BitcodeFile::BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
// symbols later in the link stage). So we append file offset to make
// filename unique.
StringRef name = archiveName.empty()
- ? saver().save(path)
- : saver().save(archiveName + "(" + path::filename(path) +
+ ? saved_symbol(path)
+ : saved_symbol(archiveName + "(" + path::filename(path) +
" at " + utostr(offsetInArchive) + ")");
MemoryBufferRef mbref(mb.getBuffer(), name);
@@ -1745,7 +1761,7 @@ createBitcodeSymbol(Symbol *&sym, const std::vector<bool> &keptComdats,
uint8_t visibility = mapVisibility(objSym.getVisibility());
if (!sym)
- sym = symtab.insert(saver().save(objSym.getName()));
+ sym = symtab.insert(unique_saver().save(objSym.getName()));
int c = objSym.getComdatIndex();
if (objSym.isUndefined() || (c != -1 && !keptComdats[c])) {
@@ -1797,7 +1813,7 @@ void BitcodeFile::parseLazy() {
symbols = std::make_unique<Symbol *[]>(numSymbols);
for (auto [i, irSym] : llvm::enumerate(obj->symbols()))
if (!irSym.isUndefined()) {
- auto *sym = symtab.insert(saver().save(irSym.getName()));
+ auto *sym = symtab.insert(saved_symbol(irSym.getName()));
sym->resolve(LazySymbol{*this});
symbols[i] = sym;
}
diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h
index b0a74877d0aaeb..28c4c1a1f92962 100644
--- a/lld/ELF/InputFiles.h
+++ b/lld/ELF/InputFiles.h
@@ -9,6 +9,8 @@
#ifndef LLD_ELF_INPUT_FILES_H
#define LLD_ELF_INPUT_FILES_H
+#include <string>
+
#include "Config.h"
#include "Symbols.h"
#include "lld/Common/ErrorHandler.h"
@@ -23,6 +25,8 @@
namespace llvm {
struct DILineInfo;
class TarWriter;
+class StringRef;
+class Twine;
namespace lto {
class InputFile;
}
@@ -335,6 +339,12 @@ class BitcodeFile : public InputFile {
void postParse();
std::unique_ptr<llvm::lto::InputFile> obj;
std::vector<bool> keptComdats;
+
+private:
+ StringRef saved_symbol(const char *S);
+ StringRef saved_symbol(StringRef S);
+ StringRef saved_symbol(const Twine &S);
+ StringRef saved_symbol(const std::string &S);
};
// .so file.
diff --git a/lld/include/lld/Common/CommonLinkerContext.h b/lld/include/lld/Common/CommonLinkerContext.h
index 0627bbdc8bd877..0c5349fbf16cd1 100644
--- a/lld/include/lld/Common/CommonLinkerContext.h
+++ b/lld/include/lld/Common/CommonLinkerContext.h
@@ -38,6 +38,7 @@ class CommonLinkerContext {
llvm::BumpPtrAllocator bAlloc;
llvm::StringSaver saver{bAlloc};
+ llvm::UniqueStringSaver unique_saver{bAlloc};
llvm::DenseMap<void *, SpecificAllocBase *> instances;
ErrorHandler e;
@@ -56,6 +57,10 @@ bool hasContext();
inline llvm::StringSaver &saver() { return context().saver; }
inline llvm::BumpPtrAllocator &bAlloc() { return context().bAlloc; }
+
+inline llvm::UniqueStringSaver &unique_saver() {
+ return context().unique_saver;
+}
} // namespace lld
#endif
>From 86f030eda31a0278983b310d75cc025aaee70554 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Thu, 5 Sep 2024 00:00:37 -0700
Subject: [PATCH 2/2] resolve review comments
---
lld/ELF/InputFiles.cpp | 26 ++++++--------------
lld/ELF/InputFiles.h | 10 --------
lld/include/lld/Common/CommonLinkerContext.h | 5 ++--
3 files changed, 10 insertions(+), 31 deletions(-)
diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index ed946a91404088..1570adf1370930 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -1695,22 +1695,6 @@ static uint8_t getOsAbi(const Triple &t) {
}
}
-StringRef BitcodeFile::saved_symbol(const char *S) {
- return unique_saver().save(S);
-}
-
-StringRef BitcodeFile::saved_symbol(StringRef S) {
- return unique_saver().save(S);
-}
-
-StringRef BitcodeFile::saved_symbol(const Twine &S) {
- return unique_saver().save(S);
-}
-
-StringRef BitcodeFile::saved_symbol(const std::string &S) {
- return unique_saver().save(S);
-}
-
BitcodeFile::BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
uint64_t offsetInArchive, bool lazy)
: InputFile(BitcodeKind, mb) {
@@ -1728,8 +1712,8 @@ BitcodeFile::BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
// symbols later in the link stage). So we append file offset to make
// filename unique.
StringRef name = archiveName.empty()
- ? saved_symbol(path)
- : saved_symbol(archiveName + "(" + path::filename(path) +
+ ? saver().save(path)
+ : saver().save(archiveName + "(" + path::filename(path) +
" at " + utostr(offsetInArchive) + ")");
MemoryBufferRef mbref(mb.getBuffer(), name);
@@ -1760,6 +1744,8 @@ createBitcodeSymbol(Symbol *&sym, const std::vector<bool> &keptComdats,
uint8_t type = objSym.isTLS() ? STT_TLS : STT_NOTYPE;
uint8_t visibility = mapVisibility(objSym.getVisibility());
+ // Symbols can be duplicated in bitcode files because of '#include' and
+ // linkonce_odr. Use unique_saver to save symbol names for de-duplication.
if (!sym)
sym = symtab.insert(unique_saver().save(objSym.getName()));
@@ -1813,7 +1799,9 @@ void BitcodeFile::parseLazy() {
symbols = std::make_unique<Symbol *[]>(numSymbols);
for (auto [i, irSym] : llvm::enumerate(obj->symbols()))
if (!irSym.isUndefined()) {
- auto *sym = symtab.insert(saved_symbol(irSym.getName()));
+ // Symbols can be duplicated in bitcode files because of '#include' and
+ // linkonce_odr. Use unique_saver to save symbol names for de-duplication.
+ auto *sym = symtab.insert(unique_saver().save(irSym.getName()));
sym->resolve(LazySymbol{*this});
symbols[i] = sym;
}
diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h
index 28c4c1a1f92962..b0a74877d0aaeb 100644
--- a/lld/ELF/InputFiles.h
+++ b/lld/ELF/InputFiles.h
@@ -9,8 +9,6 @@
#ifndef LLD_ELF_INPUT_FILES_H
#define LLD_ELF_INPUT_FILES_H
-#include <string>
-
#include "Config.h"
#include "Symbols.h"
#include "lld/Common/ErrorHandler.h"
@@ -25,8 +23,6 @@
namespace llvm {
struct DILineInfo;
class TarWriter;
-class StringRef;
-class Twine;
namespace lto {
class InputFile;
}
@@ -339,12 +335,6 @@ class BitcodeFile : public InputFile {
void postParse();
std::unique_ptr<llvm::lto::InputFile> obj;
std::vector<bool> keptComdats;
-
-private:
- StringRef saved_symbol(const char *S);
- StringRef saved_symbol(StringRef S);
- StringRef saved_symbol(const Twine &S);
- StringRef saved_symbol(const std::string &S);
};
// .so file.
diff --git a/lld/include/lld/Common/CommonLinkerContext.h b/lld/include/lld/Common/CommonLinkerContext.h
index 0c5349fbf16cd1..9970dfcb713f8d 100644
--- a/lld/include/lld/Common/CommonLinkerContext.h
+++ b/lld/include/lld/Common/CommonLinkerContext.h
@@ -55,10 +55,11 @@ template <typename T = CommonLinkerContext> T &context() {
bool hasContext();
-inline llvm::StringSaver &saver() { return context().saver; }
inline llvm::BumpPtrAllocator &bAlloc() { return context().bAlloc; }
-
+inline llvm::StringSaver &saver() { return context().saver; }
inline llvm::UniqueStringSaver &unique_saver() {
+ // FIXME: Look into other places where duplications are common in saved
+ // strings and unique saver make sense.
return context().unique_saver;
}
} // namespace lld
More information about the llvm-commits
mailing list