[lld] 9ffeaaa - [LLD] [COFF] Use StringTableBuilder to optimize the string table
Martin Storsjö via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 1 08:44:20 PST 2022
Author: Martin Storsjö
Date: 2022-03-01T18:44:03+02:00
New Revision: 9ffeaaa0ea54307db309104696a0b6cce6ddda38
URL: https://github.com/llvm/llvm-project/commit/9ffeaaa0ea54307db309104696a0b6cce6ddda38
DIFF: https://github.com/llvm/llvm-project/commit/9ffeaaa0ea54307db309104696a0b6cce6ddda38.diff
LOG: [LLD] [COFF] Use StringTableBuilder to optimize the string table
This does tail merging (and deduplication) of the strings.
On a statically linked clang.exe, this shrinks the ~17 MB string
table by around 0.5 MB. This adds ~160 ms to the linking time
which originally was around 950 ms.
For cases where `-debug:symtab` or `-debug:dwarf` isn't set, the
string table is only used for long section names, where this
shouldn't make any difference at all.
Differential Revision: https://reviews.llvm.org/D120677
Added:
Modified:
lld/COFF/Writer.cpp
Removed:
################################################################################
diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp
index eec7e9d497205..94466e7adbd76 100644
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp
@@ -24,6 +24,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/MC/StringTableBuilder.h"
#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Endian.h"
@@ -195,7 +196,9 @@ class PartialSectionKey {
// The writer writes a SymbolTable result to a file.
class Writer {
public:
- Writer(COFFLinkerContext &c) : buffer(errorHandler().outputBuffer), ctx(c) {}
+ Writer(COFFLinkerContext &c)
+ : buffer(errorHandler().outputBuffer),
+ strtab(StringTableBuilder::WinCOFF), ctx(c) {}
void run();
private:
@@ -240,7 +243,6 @@ class Writer {
PartialSection *findPartialSection(StringRef name, uint32_t outChars);
llvm::Optional<coff_symbol16> createSymbol(Defined *d);
- size_t addEntryToStringTable(StringRef str);
OutputSection *findSection(StringRef name);
void addBaserels();
@@ -250,7 +252,7 @@ class Writer {
std::unique_ptr<FileOutputBuffer> &buffer;
std::map<PartialSectionKey, PartialSection *> partialSections;
- std::vector<char> strtab;
+ StringTableBuilder strtab;
std::vector<llvm::object::coff_symbol16> outputSymtab;
IdataContents idata;
Chunk *importTableStart = nullptr;
@@ -1120,14 +1122,6 @@ void Writer::assignOutputSectionIndices() {
sc->setOutputSectionIdx(mc->getOutputSectionIdx());
}
-size_t Writer::addEntryToStringTable(StringRef str) {
- assert(str.size() > COFF::NameSize);
- size_t offsetOfEntry = strtab.size() + 4; // +4 for the size field
- strtab.insert(strtab.end(), str.begin(), str.end());
- strtab.push_back('\0');
- return offsetOfEntry;
-}
-
Optional<coff_symbol16> Writer::createSymbol(Defined *def) {
coff_symbol16 sym;
switch (def->kind()) {
@@ -1164,7 +1158,8 @@ Optional<coff_symbol16> Writer::createSymbol(Defined *def) {
StringRef name = def->getName();
if (name.size() > COFF::NameSize) {
sym.Name.Offset.Zeroes = 0;
- sym.Name.Offset.Offset = addEntryToStringTable(name);
+ sym.Name.Offset.Offset = 0; // Filled in later
+ strtab.add(name);
} else {
memset(sym.Name.ShortName, 0, COFF::NameSize);
memcpy(sym.Name.ShortName, name.data(), name.size());
@@ -1191,6 +1186,7 @@ void Writer::createSymbolAndStringTable() {
// solution where discardable sections have long names preserved and
// non-discardable sections have their names truncated, to ensure that any
// section which is mapped at runtime also has its name mapped at runtime.
+ std::vector<OutputSection *> longNameSections;
for (OutputSection *sec : ctx.outputSections) {
if (sec->name.size() <= COFF::NameSize)
continue;
@@ -1201,9 +1197,12 @@ void Writer::createSymbolAndStringTable() {
" is longer than 8 characters and will use a non-standard string "
"table");
}
- sec->setStringTableOff(addEntryToStringTable(sec->name));
+
+ strtab.add(sec->name);
+ longNameSections.push_back(sec);
}
+ std::vector<std::pair<size_t, StringRef>> longNameSymbols;
if (config->debugDwarf || config->debugSymtab) {
for (ObjFile *file : ctx.objFileInstances) {
for (Symbol *b : file->getSymbols()) {
@@ -1218,20 +1217,33 @@ void Writer::createSymbolAndStringTable() {
continue;
}
- if (Optional<coff_symbol16> sym = createSymbol(d))
+ if (Optional<coff_symbol16> sym = createSymbol(d)) {
outputSymtab.push_back(*sym);
+ if (d->getName().size() > COFF::NameSize)
+ longNameSymbols.push_back({outputSymtab.size() - 1, d->getName()});
+ }
}
}
}
- if (outputSymtab.empty() && strtab.empty())
+ strtab.finalize();
+
+ for (OutputSection *sec : longNameSections)
+ sec->setStringTableOff(strtab.getOffset(sec->name));
+
+ for (auto P : longNameSymbols) {
+ coff_symbol16 &sym = outputSymtab[P.first];
+ sym.Name.Offset.Offset = strtab.getOffset(P.second);
+ }
+
+ if (outputSymtab.empty() && strtab.getSize() <= 4)
return;
// We position the symbol table to be adjacent to the end of the last section.
uint64_t fileOff = fileSize;
pointerToSymbolTable = fileOff;
fileOff += outputSymtab.size() * sizeof(coff_symbol16);
- fileOff += 4 + strtab.size();
+ fileOff += strtab.getSize();
fileSize = alignTo(fileOff, config->fileAlign);
}
@@ -1506,7 +1518,7 @@ template <typename PEHeaderTy> void Writer::writeHeader() {
sectionTable = ArrayRef<uint8_t>(
buf - ctx.outputSections.size() * sizeof(coff_section), buf);
- if (outputSymtab.empty() && strtab.empty())
+ if (outputSymtab.empty() && strtab.getSize() <= 4)
return;
coff->PointerToSymbolTable = pointerToSymbolTable;
@@ -1519,9 +1531,7 @@ template <typename PEHeaderTy> void Writer::writeHeader() {
// Create the string table, it follows immediately after the symbol table.
// The first 4 bytes is length including itself.
buf = reinterpret_cast<uint8_t *>(&symbolTable[numberOfSymbols]);
- write32le(buf, strtab.size() + 4);
- if (!strtab.empty())
- memcpy(buf + 4, strtab.data(), strtab.size());
+ strtab.write(buf);
}
void Writer::openFile(StringRef path) {
More information about the llvm-commits
mailing list