[lld] 9ffeaaa - [LLD] [COFF] Use StringTableBuilder to optimize the string table

Martin Storsjö via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 1 08:44:20 PST 2022


Author: Martin Storsjö
Date: 2022-03-01T18:44:03+02:00
New Revision: 9ffeaaa0ea54307db309104696a0b6cce6ddda38

URL: https://github.com/llvm/llvm-project/commit/9ffeaaa0ea54307db309104696a0b6cce6ddda38
DIFF: https://github.com/llvm/llvm-project/commit/9ffeaaa0ea54307db309104696a0b6cce6ddda38.diff

LOG: [LLD] [COFF] Use StringTableBuilder to optimize the string table

This does tail merging (and deduplication) of the strings.

On a statically linked clang.exe, this shrinks the ~17 MB string
table by around 0.5 MB. This adds ~160 ms to the linking time
which originally was around 950 ms.

For cases where `-debug:symtab` or `-debug:dwarf` isn't set, the
string table is only used for long section names, where this
shouldn't make any difference at all.

Differential Revision: https://reviews.llvm.org/D120677

Added: 
    

Modified: 
    lld/COFF/Writer.cpp

Removed: 
    


################################################################################
diff  --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp
index eec7e9d497205..94466e7adbd76 100644
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp
@@ -24,6 +24,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringSet.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/MC/StringTableBuilder.h"
 #include "llvm/Support/BinaryStreamReader.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Endian.h"
@@ -195,7 +196,9 @@ class PartialSectionKey {
 // The writer writes a SymbolTable result to a file.
 class Writer {
 public:
-  Writer(COFFLinkerContext &c) : buffer(errorHandler().outputBuffer), ctx(c) {}
+  Writer(COFFLinkerContext &c)
+      : buffer(errorHandler().outputBuffer),
+        strtab(StringTableBuilder::WinCOFF), ctx(c) {}
   void run();
 
 private:
@@ -240,7 +243,6 @@ class Writer {
   PartialSection *findPartialSection(StringRef name, uint32_t outChars);
 
   llvm::Optional<coff_symbol16> createSymbol(Defined *d);
-  size_t addEntryToStringTable(StringRef str);
 
   OutputSection *findSection(StringRef name);
   void addBaserels();
@@ -250,7 +252,7 @@ class Writer {
 
   std::unique_ptr<FileOutputBuffer> &buffer;
   std::map<PartialSectionKey, PartialSection *> partialSections;
-  std::vector<char> strtab;
+  StringTableBuilder strtab;
   std::vector<llvm::object::coff_symbol16> outputSymtab;
   IdataContents idata;
   Chunk *importTableStart = nullptr;
@@ -1120,14 +1122,6 @@ void Writer::assignOutputSectionIndices() {
           sc->setOutputSectionIdx(mc->getOutputSectionIdx());
 }
 
-size_t Writer::addEntryToStringTable(StringRef str) {
-  assert(str.size() > COFF::NameSize);
-  size_t offsetOfEntry = strtab.size() + 4; // +4 for the size field
-  strtab.insert(strtab.end(), str.begin(), str.end());
-  strtab.push_back('\0');
-  return offsetOfEntry;
-}
-
 Optional<coff_symbol16> Writer::createSymbol(Defined *def) {
   coff_symbol16 sym;
   switch (def->kind()) {
@@ -1164,7 +1158,8 @@ Optional<coff_symbol16> Writer::createSymbol(Defined *def) {
   StringRef name = def->getName();
   if (name.size() > COFF::NameSize) {
     sym.Name.Offset.Zeroes = 0;
-    sym.Name.Offset.Offset = addEntryToStringTable(name);
+    sym.Name.Offset.Offset = 0; // Filled in later
+    strtab.add(name);
   } else {
     memset(sym.Name.ShortName, 0, COFF::NameSize);
     memcpy(sym.Name.ShortName, name.data(), name.size());
@@ -1191,6 +1186,7 @@ void Writer::createSymbolAndStringTable() {
   // solution where discardable sections have long names preserved and
   // non-discardable sections have their names truncated, to ensure that any
   // section which is mapped at runtime also has its name mapped at runtime.
+  std::vector<OutputSection *> longNameSections;
   for (OutputSection *sec : ctx.outputSections) {
     if (sec->name.size() <= COFF::NameSize)
       continue;
@@ -1201,9 +1197,12 @@ void Writer::createSymbolAndStringTable() {
            " is longer than 8 characters and will use a non-standard string "
            "table");
     }
-    sec->setStringTableOff(addEntryToStringTable(sec->name));
+
+    strtab.add(sec->name);
+    longNameSections.push_back(sec);
   }
 
+  std::vector<std::pair<size_t, StringRef>> longNameSymbols;
   if (config->debugDwarf || config->debugSymtab) {
     for (ObjFile *file : ctx.objFileInstances) {
       for (Symbol *b : file->getSymbols()) {
@@ -1218,20 +1217,33 @@ void Writer::createSymbolAndStringTable() {
             continue;
         }
 
-        if (Optional<coff_symbol16> sym = createSymbol(d))
+        if (Optional<coff_symbol16> sym = createSymbol(d)) {
           outputSymtab.push_back(*sym);
+          if (d->getName().size() > COFF::NameSize)
+            longNameSymbols.push_back({outputSymtab.size() - 1, d->getName()});
+        }
       }
     }
   }
 
-  if (outputSymtab.empty() && strtab.empty())
+  strtab.finalize();
+
+  for (OutputSection *sec : longNameSections)
+    sec->setStringTableOff(strtab.getOffset(sec->name));
+
+  for (auto P : longNameSymbols) {
+    coff_symbol16 &sym = outputSymtab[P.first];
+    sym.Name.Offset.Offset = strtab.getOffset(P.second);
+  }
+
+  if (outputSymtab.empty() && strtab.getSize() <= 4)
     return;
 
   // We position the symbol table to be adjacent to the end of the last section.
   uint64_t fileOff = fileSize;
   pointerToSymbolTable = fileOff;
   fileOff += outputSymtab.size() * sizeof(coff_symbol16);
-  fileOff += 4 + strtab.size();
+  fileOff += strtab.getSize();
   fileSize = alignTo(fileOff, config->fileAlign);
 }
 
@@ -1506,7 +1518,7 @@ template <typename PEHeaderTy> void Writer::writeHeader() {
   sectionTable = ArrayRef<uint8_t>(
       buf - ctx.outputSections.size() * sizeof(coff_section), buf);
 
-  if (outputSymtab.empty() && strtab.empty())
+  if (outputSymtab.empty() && strtab.getSize() <= 4)
     return;
 
   coff->PointerToSymbolTable = pointerToSymbolTable;
@@ -1519,9 +1531,7 @@ template <typename PEHeaderTy> void Writer::writeHeader() {
   // Create the string table, it follows immediately after the symbol table.
   // The first 4 bytes is length including itself.
   buf = reinterpret_cast<uint8_t *>(&symbolTable[numberOfSymbols]);
-  write32le(buf, strtab.size() + 4);
-  if (!strtab.empty())
-    memcpy(buf + 4, strtab.data(), strtab.size());
+  strtab.write(buf);
 }
 
 void Writer::openFile(StringRef path) {


        


More information about the llvm-commits mailing list