[lld] 9cc9efc - [lld][COFF] Remove duplicate strtab entries (#141197)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 20 22:44:14 PDT 2025
Author: Haohai Wen
Date: 2025-06-21T13:44:10+08:00
New Revision: 9cc9efc483339ece1d52923569bb755db42b69f3
URL: https://github.com/llvm/llvm-project/commit/9cc9efc483339ece1d52923569bb755db42b69f3
DIFF: https://github.com/llvm/llvm-project/commit/9cc9efc483339ece1d52923569bb755db42b69f3.diff
LOG: [lld][COFF] Remove duplicate strtab entries (#141197)
String table size is too big for large binary when symbol table is
enabled. Some strings in strtab is same so it can be reused.
This patch revives 9ffeaaa authored by mstorsjo with the prioritized
string table builder to fix debug section name issue (see 4d2eda2
for more details).
---------
Co-authored-by: Wen Haohai <whh108 at live.com>
Co-authored-by: James Henderson <James.Henderson at sony.com>
Added:
lld/test/COFF/strtab.s
Modified:
lld/COFF/Writer.cpp
llvm/include/llvm/MC/StringTableBuilder.h
llvm/lib/MC/StringTableBuilder.cpp
Removed:
################################################################################
diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp
index 5f1da5e79daca..076561807af47 100644
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp
@@ -24,6 +24,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/MC/StringTableBuilder.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/FileOutputBuffer.h"
#include "llvm/Support/Parallel.h"
@@ -201,7 +202,8 @@ struct ChunkRange {
class Writer {
public:
Writer(COFFLinkerContext &c)
- : buffer(c.e.outputBuffer), delayIdata(c), ctx(c) {}
+ : buffer(c.e.outputBuffer), strtab(StringTableBuilder::WinCOFF),
+ delayIdata(c), ctx(c) {}
void run();
private:
@@ -281,7 +283,7 @@ class Writer {
std::unique_ptr<FileOutputBuffer> &buffer;
std::map<PartialSectionKey, PartialSection *> partialSections;
- std::vector<char> strtab;
+ StringTableBuilder strtab;
std::vector<llvm::object::coff_symbol16> outputSymtab;
std::vector<ECCodeMapEntry> codeMap;
IdataContents idata;
@@ -1434,14 +1436,6 @@ void Writer::assignOutputSectionIndices() {
sc->setOutputSectionIdx(mc->getOutputSectionIdx());
}
-size_t Writer::addEntryToStringTable(StringRef str) {
- assert(str.size() > COFF::NameSize);
- size_t offsetOfEntry = strtab.size() + 4; // +4 for the size field
- strtab.insert(strtab.end(), str.begin(), str.end());
- strtab.push_back('\0');
- return offsetOfEntry;
-}
-
std::optional<coff_symbol16> Writer::createSymbol(Defined *def) {
coff_symbol16 sym;
switch (def->kind()) {
@@ -1482,7 +1476,8 @@ std::optional<coff_symbol16> Writer::createSymbol(Defined *def) {
StringRef name = def->getName();
if (name.size() > COFF::NameSize) {
sym.Name.Offset.Zeroes = 0;
- sym.Name.Offset.Offset = addEntryToStringTable(name);
+ sym.Name.Offset.Offset = 0; // Filled in later.
+ strtab.add(name);
} else {
memset(sym.Name.ShortName, 0, COFF::NameSize);
memcpy(sym.Name.ShortName, name.data(), name.size());
@@ -1514,6 +1509,7 @@ void Writer::createSymbolAndStringTable() {
// solution where discardable sections have long names preserved and
// non-discardable sections have their names truncated, to ensure that any
// section which is mapped at runtime also has its name mapped at runtime.
+ SmallVector<OutputSection *> longNameSections;
for (OutputSection *sec : ctx.outputSections) {
if (sec->name.size() <= COFF::NameSize)
continue;
@@ -1525,9 +1521,13 @@ void Writer::createSymbolAndStringTable() {
<< " is longer than 8 characters and will use a non-standard string "
"table";
}
- sec->setStringTableOff(addEntryToStringTable(sec->name));
+ // Put the section name in the begin of strtab so that its offset is less
+ // than Max7DecimalOffset otherwise lldb/gdb will not read it.
+ strtab.add(sec->name, /*Priority=*/UINT8_MAX);
+ longNameSections.push_back(sec);
}
+ std::vector<std::pair<size_t, StringRef>> longNameSymbols;
if (ctx.config.writeSymtab) {
for (ObjFile *file : ctx.objFileInstances) {
for (Symbol *b : file->getSymbols()) {
@@ -1542,15 +1542,22 @@ void Writer::createSymbolAndStringTable() {
continue;
}
- if (std::optional<coff_symbol16> sym = createSymbol(d))
+ if (std::optional<coff_symbol16> sym = createSymbol(d)) {
+ if (d->getName().size() > COFF::NameSize)
+ longNameSymbols.emplace_back(outputSymtab.size(), d->getName());
outputSymtab.push_back(*sym);
+ }
if (auto *dthunk = dyn_cast<DefinedImportThunk>(d)) {
if (!dthunk->wrappedSym->writtenToSymtab) {
dthunk->wrappedSym->writtenToSymtab = true;
if (std::optional<coff_symbol16> sym =
- createSymbol(dthunk->wrappedSym))
+ createSymbol(dthunk->wrappedSym)) {
+ if (d->getName().size() > COFF::NameSize)
+ longNameSymbols.emplace_back(outputSymtab.size(),
+ dthunk->wrappedSym->getName());
outputSymtab.push_back(*sym);
+ }
}
}
}
@@ -1560,11 +1567,19 @@ void Writer::createSymbolAndStringTable() {
if (outputSymtab.empty() && strtab.empty())
return;
+ strtab.finalize();
+ for (OutputSection *sec : longNameSections)
+ sec->setStringTableOff(strtab.getOffset(sec->name));
+ for (auto P : longNameSymbols) {
+ coff_symbol16 &sym = outputSymtab[P.first];
+ sym.Name.Offset.Offset = strtab.getOffset(P.second);
+ }
+
// We position the symbol table to be adjacent to the end of the last section.
uint64_t fileOff = fileSize;
pointerToSymbolTable = fileOff;
fileOff += outputSymtab.size() * sizeof(coff_symbol16);
- fileOff += 4 + strtab.size();
+ fileOff += strtab.getSize();
fileSize = alignTo(fileOff, ctx.config.fileAlign);
}
@@ -1945,9 +1960,7 @@ template <typename PEHeaderTy> void Writer::writeHeader() {
// Create the string table, it follows immediately after the symbol table.
// The first 4 bytes is length including itself.
buf = reinterpret_cast<uint8_t *>(&symbolTable[numberOfSymbols]);
- write32le(buf, strtab.size() + 4);
- if (!strtab.empty())
- memcpy(buf + 4, strtab.data(), strtab.size());
+ strtab.write(buf);
}
void Writer::openFile(StringRef path) {
diff --git a/lld/test/COFF/strtab.s b/lld/test/COFF/strtab.s
new file mode 100644
index 0000000000000..4d8fa39f56db6
--- /dev/null
+++ b/lld/test/COFF/strtab.s
@@ -0,0 +1,29 @@
+# RUN: llvm-mc -triple=x86_64-windows-msvc %s -filetype=obj -o %t.obj
+# RUN: lld-link -out:%t.exe -entry:main %t.obj -debug:dwarf
+# RUN: llvm-readobj --string-table %t.exe | FileCheck %s
+
+# CHECK: StringTable {
+# CHECK-NEXT: Length: 87
+# CHECK-NEXT: [ 4] .debug_abbrev
+# CHECK-NEXT: [ 12] .debug_line
+# CHECK-NEXT: [ 1e] long_name_symbolz
+# CHECK-NEXT: [ 30] .debug_abbrez
+# CHECK-NEXT: [ 3e] __impl_long_name_symbolA
+# CHECK-NEXT: }
+
+
+.global main
+.text
+main:
+long_name_symbolz:
+long_name_symbolA:
+__impl_long_name_symbolA:
+name_symbolA:
+.debug_abbrez:
+ ret
+
+.section .debug_abbrev,"dr"
+.byte 0
+
+.section .debug_line,"dr"
+.byte 0
diff --git a/llvm/include/llvm/MC/StringTableBuilder.h b/llvm/include/llvm/MC/StringTableBuilder.h
index 83d55ef8512fe..3f1c045fc0bdd 100644
--- a/llvm/include/llvm/MC/StringTableBuilder.h
+++ b/llvm/include/llvm/MC/StringTableBuilder.h
@@ -38,6 +38,8 @@ class StringTableBuilder {
};
private:
+ // Only non-zero priority will be recorded.
+ DenseMap<CachedHashStringRef, uint8_t> StringPriorityMap;
DenseMap<CachedHashStringRef, size_t> StringIndexMap;
size_t Size = 0;
Kind K;
@@ -51,11 +53,16 @@ class StringTableBuilder {
LLVM_ABI StringTableBuilder(Kind K, Align Alignment = Align(1));
LLVM_ABI ~StringTableBuilder();
- /// Add a string to the builder. Returns the position of S in the
- /// table. The position will be changed if finalize is used.
- /// Can only be used before the table is finalized.
- LLVM_ABI size_t add(CachedHashStringRef S);
- size_t add(StringRef S) { return add(CachedHashStringRef(S)); }
+ /// Add a string to the builder. Returns the position of S in the table. The
+ /// position will be changed if finalize is used. Can only be used before the
+ /// table is finalized. Priority is only useful with reordering. Strings with
+ /// the same priority will be put together. Strings with higher priority are
+ /// placed closer to the begin of string table. When adding same string with
+ ///
diff erent priority, the maximum priority win.
+ LLVM_ABI size_t add(CachedHashStringRef S, uint8_t Priority = 0);
+ size_t add(StringRef S, uint8_t Priority = 0) {
+ return add(CachedHashStringRef(S), Priority);
+ }
/// Analyze the strings and build the final table. No more strings can
/// be added after this point.
@@ -78,6 +85,7 @@ class StringTableBuilder {
bool contains(StringRef S) const { return contains(CachedHashStringRef(S)); }
bool contains(CachedHashStringRef S) const { return StringIndexMap.count(S); }
+ bool empty() const { return StringIndexMap.empty(); }
size_t getSize() const { return Size; }
LLVM_ABI void clear();
diff --git a/llvm/lib/MC/StringTableBuilder.cpp b/llvm/lib/MC/StringTableBuilder.cpp
index 7accdc2a9e774..f2b82998f2457 100644
--- a/llvm/lib/MC/StringTableBuilder.cpp
+++ b/llvm/lib/MC/StringTableBuilder.cpp
@@ -138,13 +138,31 @@ void StringTableBuilder::finalizeInOrder() {
void StringTableBuilder::finalizeStringTable(bool Optimize) {
Finalized = true;
- if (Optimize) {
+ if (Optimize && StringIndexMap.size()) {
std::vector<StringPair *> Strings;
Strings.reserve(StringIndexMap.size());
for (StringPair &P : StringIndexMap)
Strings.push_back(&P);
- multikeySort(Strings, 0);
+ size_t RangeBegin = 0;
+ MutableArrayRef<StringPair *> StringsRef(Strings);
+ if (StringPriorityMap.size()) {
+ llvm::sort(Strings,
+ [&](const StringPair *LHS, const StringPair *RHS) -> bool {
+ return StringPriorityMap.lookup(LHS->first) >
+ StringPriorityMap.lookup(RHS->first);
+ });
+ uint8_t RangePriority = StringPriorityMap.lookup(Strings[0]->first);
+ for (size_t I = 1, E = Strings.size(); I != E && RangePriority; ++I) {
+ uint8_t Priority = StringPriorityMap.lookup(Strings[I]->first);
+ if (Priority != RangePriority) {
+ multikeySort(StringsRef.slice(RangeBegin, I - RangeBegin), 0);
+ RangePriority = Priority;
+ RangeBegin = I;
+ }
+ }
+ }
+ multikeySort(StringsRef.slice(RangeBegin), 0);
initSize();
StringRef Previous;
@@ -199,11 +217,14 @@ size_t StringTableBuilder::getOffset(CachedHashStringRef S) const {
return I->second;
}
-size_t StringTableBuilder::add(CachedHashStringRef S) {
+size_t StringTableBuilder::add(CachedHashStringRef S, uint8_t Priority) {
if (K == WinCOFF)
assert(S.size() > COFF::NameSize && "Short string in COFF string table!");
assert(!isFinalized());
+ if (Priority)
+ StringPriorityMap[S] = std::max(Priority, StringPriorityMap[S]);
+
auto P = StringIndexMap.try_emplace(S);
if (P.second) {
size_t Start = alignTo(Size, Alignment);
More information about the llvm-commits
mailing list