[lld] r257017 - ELF: Improve performance of string table construction.

Rui Ueyama via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 6 18:35:32 PST 2016


Author: ruiu
Date: Wed Jan  6 20:35:32 2016
New Revision: 257017

URL: http://llvm.org/viewvc/llvm-project?rev=257017&view=rev
Log:
ELF: Improve performance of string table construction.

String tables in unstripped executable files are fairly large in size.
For example, lld's executable file is about 34.4 MB in my environment,
and of which 3.5 MB is the string table. Efficiency of string table
construction matters.

Previously, the string table was built in an inefficient way. We used
StringTableBuilder to build that and enabled string tail merging,
although tail merging is not effective for the symbol table (you can
only make the string table 0.3% smaller for lld.) Tail merging is
computation intensive task and slow.

This patch eliminates string tail merging.

I changed the way of adding strings to the string table in this patch
too. Previously, strings were added using add() and the same strings
were then passed to getOffset() to get their offsets in the string table.
In this way, getOffset() needs to look up a hash table to get offsets
for given strings. This is a violation of "we look up the symbol table
(or a hash table) only once for each symbol" dogma of the new LLD's
design. Hash table lookup for long C++ mangled names is slow.
I eliminated that lookup in this patch.

In total, this patch improves link time of lld itself about 12%
(3.50 seconds -> 3.08 seconds.)

Modified:
    lld/trunk/ELF/OutputSections.cpp
    lld/trunk/ELF/OutputSections.h
    lld/trunk/ELF/Writer.cpp
    lld/trunk/test/ELF/basic-aarch64.s
    lld/trunk/test/ELF/basic-mips.s
    lld/trunk/test/ELF/basic.s
    lld/trunk/test/ELF/basic32.s
    lld/trunk/test/ELF/basic64be.s
    lld/trunk/test/ELF/discard-none.s
    lld/trunk/test/ELF/linkerscript-sections.s
    lld/trunk/test/ELF/string-table.s

Modified: lld/trunk/ELF/OutputSections.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/OutputSections.cpp?rev=257017&r1=257016&r2=257017&view=diff
==============================================================================
--- lld/trunk/ELF/OutputSections.cpp (original)
+++ lld/trunk/ELF/OutputSections.cpp Wed Jan  6 20:35:32 2016
@@ -330,7 +330,7 @@ InterpSection<ELFT>::InterpSection()
 
 template <class ELFT>
 void OutputSectionBase<ELFT>::writeHeaderTo(Elf_Shdr *SHdr) {
-  Header.sh_name = Out<ELFT>::ShStrTab->getOffset(Name);
+  Header.sh_name = Out<ELFT>::ShStrTab->addString(Name);
   *SHdr = Header;
 }
 
@@ -581,12 +581,12 @@ template <class ELFT> void DynamicSectio
 
   if (!Config->RPath.empty()) {
     ++NumEntries; // DT_RUNPATH / DT_RPATH
-    Out<ELFT>::DynStrTab->add(Config->RPath);
+    Out<ELFT>::DynStrTab->reserve(Config->RPath);
   }
 
   if (!Config->SoName.empty()) {
     ++NumEntries; // DT_SONAME
-    Out<ELFT>::DynStrTab->add(Config->SoName);
+    Out<ELFT>::DynStrTab->reserve(Config->SoName);
   }
 
   if (PreInitArraySec)
@@ -599,7 +599,7 @@ template <class ELFT> void DynamicSectio
   for (const std::unique_ptr<SharedFile<ELFT>> &F : SymTab.getSharedFiles()) {
     if (!F->isNeeded())
       continue;
-    Out<ELFT>::DynStrTab->add(F->getSoName());
+    Out<ELFT>::DynStrTab->reserve(F->getSoName());
     ++NumEntries;
   }
 
@@ -687,7 +687,7 @@ template <class ELFT> void DynamicSectio
   WritePtr(DT_SYMTAB, Out<ELFT>::DynSymTab->getVA());
   WritePtr(DT_SYMENT, sizeof(Elf_Sym));
   WritePtr(DT_STRTAB, Out<ELFT>::DynStrTab->getVA());
-  WriteVal(DT_STRSZ, Out<ELFT>::DynStrTab->data().size());
+  WriteVal(DT_STRSZ, Out<ELFT>::DynStrTab->getSize());
   if (Out<ELFT>::GnuHashTab)
     WritePtr(DT_GNU_HASH, Out<ELFT>::GnuHashTab->getVA());
   if (Out<ELFT>::HashTab)
@@ -703,10 +703,10 @@ template <class ELFT> void DynamicSectio
   //   DT_RPATH is used for indirect dependencies as well.
   if (!Config->RPath.empty())
     WriteVal(Config->EnableNewDtags ? DT_RUNPATH : DT_RPATH,
-             Out<ELFT>::DynStrTab->getOffset(Config->RPath));
+             Out<ELFT>::DynStrTab->addString(Config->RPath));
 
   if (!Config->SoName.empty())
-    WriteVal(DT_SONAME, Out<ELFT>::DynStrTab->getOffset(Config->SoName));
+    WriteVal(DT_SONAME, Out<ELFT>::DynStrTab->addString(Config->SoName));
 
   auto WriteArray = [&](int32_t T1, int32_t T2,
                         const OutputSectionBase<ELFT> *Sec) {
@@ -721,7 +721,7 @@ template <class ELFT> void DynamicSectio
 
   for (const std::unique_ptr<SharedFile<ELFT>> &F : SymTab.getSharedFiles())
     if (F->isNeeded())
-      WriteVal(DT_NEEDED, Out<ELFT>::DynStrTab->getOffset(F->getSoName()));
+      WriteVal(DT_NEEDED, Out<ELFT>::DynStrTab->addString(F->getSoName()));
 
   if (InitSym)
     WritePtr(DT_INIT, getSymVA<ELFT>(*InitSym));
@@ -1162,9 +1162,39 @@ StringTableSection<ELFT>::StringTableSec
   this->Header.sh_addralign = 1;
 }
 
+// String tables are created in two phases. First you call reserve()
+// to reserve room in the string table, and then call addString() to actually
+// add that string.
+//
+// Why two phases? We want to know the size of the string table as early as
+// possible to fix file layout. So we have separated finalize(), which
+// determines the size of the section, from writeTo(), which writes the section
+// contents to the output buffer. If we merge reserve() with addString(),
+// we need a plumbing work for finalize() and writeTo() so that offsets
+// we obtained in the former function can be written in the latter.
+// This design eliminated that need.
+template <class ELFT> void StringTableSection<ELFT>::reserve(StringRef S) {
+  Reserved += S.size() + 1; // +1 for NUL
+}
+
+// Adds a string to the string table. You must call reverse() with the
+// same string before calling addString().
+template <class ELFT> size_t StringTableSection<ELFT>::addString(StringRef S) {
+  size_t Pos = Used;
+  Strings.push_back(S);
+  Used += S.size() + 1;
+  Reserved -= S.size() + 1;
+  assert((int64_t)Reserved >= 0);
+  return Pos;
+}
+
 template <class ELFT> void StringTableSection<ELFT>::writeTo(uint8_t *Buf) {
-  StringRef Data = StrTabBuilder.data();
-  memcpy(Buf, Data.data(), Data.size());
+  // ELF string tables start with NUL byte, so advance the pointer by one.
+  ++Buf;
+  for (StringRef S : Strings) {
+    memcpy(Buf, S.data(), S.size());
+    Buf += S.size() + 1;
+  }
 }
 
 template <class ELFT>
@@ -1248,14 +1278,14 @@ template <class ELFT> void SymbolTableSe
 
 template <class ELFT>
 void SymbolTableSection<ELFT>::addLocalSymbol(StringRef Name) {
-  StrTabSec.add(Name);
+  StrTabSec.reserve(Name);
   ++NumVisible;
   ++NumLocals;
 }
 
 template <class ELFT>
 void SymbolTableSection<ELFT>::addSymbol(SymbolBody *Body) {
-  StrTabSec.add(Body->getName());
+  StrTabSec.reserve(Body->getName());
   Symbols.push_back(Body);
   ++NumVisible;
 }
@@ -1297,7 +1327,7 @@ void SymbolTableSection<ELFT>::writeLoca
         ESym->st_shndx = OutSec->SectionIndex;
         VA += OutSec->getVA() + Section->getOffset(Sym);
       }
-      ESym->st_name = StrTabSec.getOffset(SymName);
+      ESym->st_name = StrTabSec.addString(SymName);
       ESym->st_size = Sym.st_size;
       ESym->setBindingAndType(Sym.getBinding(), Sym.getType());
       ESym->st_value = VA;
@@ -1352,7 +1382,7 @@ void SymbolTableSection<ELFT>::writeGlob
     }
 
     StringRef Name = Body->getName();
-    ESym->st_name = StrTabSec.getOffset(Name);
+    ESym->st_name = StrTabSec.addString(Name);
 
     unsigned char Type = STT_NOTYPE;
     uintX_t Size = 0;

Modified: lld/trunk/ELF/OutputSections.h
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/OutputSections.h?rev=257017&r1=257016&r2=257017&view=diff
==============================================================================
--- lld/trunk/ELF/OutputSections.h (original)
+++ lld/trunk/ELF/OutputSections.h Wed Jan  6 20:35:32 2016
@@ -329,21 +329,18 @@ class StringTableSection final : public
 public:
   typedef typename llvm::object::ELFFile<ELFT>::uintX_t uintX_t;
   StringTableSection(StringRef Name, bool Dynamic);
-  void add(StringRef S) { StrTabBuilder.add(S); }
-  size_t getOffset(StringRef S) const { return StrTabBuilder.getOffset(S); }
-  StringRef data() const { return StrTabBuilder.data(); }
+  void reserve(StringRef S);
+  size_t addString(StringRef S);
   void writeTo(uint8_t *Buf) override;
-
-  void finalize() override {
-    StrTabBuilder.finalize();
-    this->Header.sh_size = StrTabBuilder.data().size();
-  }
-
+  size_t getSize() const { return Used + Reserved; }
+  void finalize() override { this->Header.sh_size = getSize(); }
   bool isDynamic() const { return Dynamic; }
 
 private:
   const bool Dynamic;
-  llvm::StringTableBuilder StrTabBuilder{llvm::StringTableBuilder::ELF};
+  std::vector<StringRef> Strings;
+  size_t Used = 1; // ELF string tables start with a NUL byte, so 1.
+  size_t Reserved = 0;
 };
 
 template <class ELFT>

Modified: lld/trunk/ELF/Writer.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/Writer.cpp?rev=257017&r1=257016&r2=257017&view=diff
==============================================================================
--- lld/trunk/ELF/Writer.cpp (original)
+++ lld/trunk/ELF/Writer.cpp Wed Jan  6 20:35:32 2016
@@ -831,7 +831,7 @@ template <class ELFT> void Writer<ELFT>:
   }
 
   for (OutputSectionBase<ELFT> *Sec : OutputSections)
-    Out<ELFT>::ShStrTab->add(Sec->getName());
+    Out<ELFT>::ShStrTab->reserve(Sec->getName());
 
   // Finalizers fix each section's size.
   // .dynamic section's finalizer may add strings to .dynstr,
@@ -1237,8 +1237,17 @@ template <class ELFT> void Writer<ELFT>:
     Sec->writeTo(Buf + Sec->getFileOff());
   }
 
+  // Write all sections but string table sections. We know the sizes of the
+  // string tables already, but they may not have actual strings yet (only
+  // room may be reserved), because writeTo() is allowed to add actual
+  // strings to the string tables.
   for (OutputSectionBase<ELFT> *Sec : OutputSections)
-    if (Sec != Out<ELFT>::Opd)
+    if (Sec != Out<ELFT>::Opd && Sec->getType() != SHT_STRTAB)
+      Sec->writeTo(Buf + Sec->getFileOff());
+
+  // Write string table sections.
+  for (OutputSectionBase<ELFT> *Sec : OutputSections)
+    if (Sec != Out<ELFT>::Opd && Sec->getType() == SHT_STRTAB)
       Sec->writeTo(Buf + Sec->getFileOff());
 }
 

Modified: lld/trunk/test/ELF/basic-aarch64.s
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/ELF/basic-aarch64.s?rev=257017&r1=257016&r2=257017&view=diff
==============================================================================
--- lld/trunk/test/ELF/basic-aarch64.s (original)
+++ lld/trunk/test/ELF/basic-aarch64.s Wed Jan  6 20:35:32 2016
@@ -97,7 +97,7 @@ _start:
 # CHECK-NEXT:   }
 # CHECK-NEXT:   Section {
 # CHECK-NEXT:     Index: 4
-# CHECK-NEXT:     Name: .strtab (17)
+# CHECK-NEXT:     Name: .strtab (25)
 # CHECK-NEXT:     Type: SHT_STRTAB (0x3)
 # CHECK-NEXT:     Flags [ (0x0)
 # CHECK-NEXT:     ]

Modified: lld/trunk/test/ELF/basic-mips.s
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/ELF/basic-mips.s?rev=257017&r1=257016&r2=257017&view=diff
==============================================================================
--- lld/trunk/test/ELF/basic-mips.s (original)
+++ lld/trunk/test/ELF/basic-mips.s Wed Jan  6 20:35:32 2016
@@ -87,7 +87,7 @@ __start:
 # CHECK-NEXT:   }
 # CHECK-NEXT:   Section {
 # CHECK-NEXT:     Index: 3
-# CHECK-NEXT:     Name: .text (1)
+# CHECK-NEXT:     Name: .text (25)
 # CHECK-NEXT:     Type: SHT_PROGBITS (0x1)
 # CHECK-NEXT:     Flags [ (0x6)
 # CHECK-NEXT:       SHF_ALLOC (0x2)
@@ -119,7 +119,7 @@ __start:
 # CHECK-NEXT:   }
 # CHECK-NEXT:   Section {
 # CHECK-NEXT:     Index: 5
-# CHECK-NEXT:     Name: .bss (7)
+# CHECK-NEXT:     Name: .bss (37)
 # CHECK-NEXT:     Type: SHT_NOBITS (0x8)
 # CHECK-NEXT:     Flags [ (0x3)
 # CHECK-NEXT:       SHF_ALLOC (0x2)
@@ -163,7 +163,7 @@ __start:
 # CHECK-NEXT:   }
 # CHECK-NEXT:   Section {
 # CHECK-NEXT:     Index: 8
-# CHECK-NEXT:     Name: .strtab (46)
+# CHECK-NEXT:     Name: .strtab (60)
 # CHECK-NEXT:     Type: SHT_STRTAB (0x3)
 # CHECK-NEXT:     Flags [ (0x0)
 # CHECK-NEXT:     ]

Modified: lld/trunk/test/ELF/basic.s
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/ELF/basic.s?rev=257017&r1=257016&r2=257017&view=diff
==============================================================================
--- lld/trunk/test/ELF/basic.s (original)
+++ lld/trunk/test/ELF/basic.s Wed Jan  6 20:35:32 2016
@@ -98,7 +98,7 @@ _start:
 # CHECK-NEXT:   }
 # CHECK-NEXT:   Section {
 # CHECK-NEXT:     Index: 4
-# CHECK-NEXT:     Name: .strtab (17)
+# CHECK-NEXT:     Name: .strtab (25)
 # CHECK-NEXT:     Type: SHT_STRTAB (0x3)
 # CHECK-NEXT:     Flags [ (0x0)
 # CHECK-NEXT:     ]

Modified: lld/trunk/test/ELF/basic32.s
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/ELF/basic32.s?rev=257017&r1=257016&r2=257017&view=diff
==============================================================================
--- lld/trunk/test/ELF/basic32.s (original)
+++ lld/trunk/test/ELF/basic32.s Wed Jan  6 20:35:32 2016
@@ -96,7 +96,7 @@ _start:
 # CHECK-NEXT:   }
 # CHECK-NEXT:   Section {
 # CHECK-NEXT:     Index: 4
-# CHECK-NEXT:     Name: .strtab (17)
+# CHECK-NEXT:     Name: .strtab (25)
 # CHECK-NEXT:     Type: SHT_STRTAB (0x3)
 # CHECK-NEXT:     Flags [ (0x0)
 # CHECK-NEXT:     ]

Modified: lld/trunk/test/ELF/basic64be.s
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/ELF/basic64be.s?rev=257017&r1=257016&r2=257017&view=diff
==============================================================================
--- lld/trunk/test/ELF/basic64be.s (original)
+++ lld/trunk/test/ELF/basic64be.s Wed Jan  6 20:35:32 2016
@@ -177,7 +177,7 @@ _start:
 # CHECK-NEXT:   }
 # CHECK-NEXT:   Section {
 # CHECK-NEXT:     Index: 7
-# CHECK-NEXT:     Name: .strtab (27)
+# CHECK-NEXT:     Name: .strtab (41)
 # CHECK-NEXT:     Type: SHT_STRTAB (0x3)
 # CHECK-NEXT:     Flags [ (0x0)
 # CHECK-NEXT:     ]
@@ -257,4 +257,4 @@ _start:
 # CHECK-NEXT:    ]
 # CHECK-NEXT:    Alignment: 0
 # CHECK-NEXT:  }
-# CHECK-NEXT: ]
\ No newline at end of file
+# CHECK-NEXT: ]

Modified: lld/trunk/test/ELF/discard-none.s
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/ELF/discard-none.s?rev=257017&r1=257016&r2=257017&view=diff
==============================================================================
--- lld/trunk/test/ELF/discard-none.s (original)
+++ lld/trunk/test/ELF/discard-none.s Wed Jan  6 20:35:32 2016
@@ -20,8 +20,8 @@
 // CHECK-NEXT:     AddressAlignment:
 // CHECK-NEXT:     EntrySize:
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 002E4C6D 79766172 002E4C6D 796F7468  |..Lmyvar..Lmyoth|
-// CHECK-NEXT:       0010: 65727661 7200                        |ervar.|
+// CHECK-NEXT:       0000: 002E4C6D 796F7468 65727661 72002E4C  |..Lmyothervar..L|
+// CHECK-NEXT:       0010: 6D797661 7200                        |myvar.|
 // CHECK-NEXT:     )
 // CHECK-NEXT:   }
 

Modified: lld/trunk/test/ELF/linkerscript-sections.s
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/ELF/linkerscript-sections.s?rev=257017&r1=257016&r2=257017&view=diff
==============================================================================
--- lld/trunk/test/ELF/linkerscript-sections.s (original)
+++ lld/trunk/test/ELF/linkerscript-sections.s Wed Jan  6 20:35:32 2016
@@ -22,7 +22,7 @@
 # SEC-DEFAULT: 4 .bss          00000002 {{[0-9a-f]*}} BSS
 # SEC-DEFAULT: 5 .shstrtab     00000002 {{[0-9a-f]*}}
 # SEC-DEFAULT: 6 .symtab       00000030 {{[0-9a-f]*}}
-# SEC-DEFAULT: 7 .shstrtab     00000032 {{[0-9a-f]*}}
+# SEC-DEFAULT: 7 .shstrtab     0000003c {{[0-9a-f]*}}
 # SEC-DEFAULT: 8 .strtab       00000008 {{[0-9a-f]*}}
 
 # Sections are put in order specified in linker script.
@@ -42,7 +42,7 @@
 # SEC-ORDER: 1 .bss          00000002 {{[0-9a-f]*}} BSS
 # SEC-ORDER: 2 other         00000003 {{[0-9a-f]*}} DATA
 # SEC-ORDER: 3 .shstrtab     00000002 {{[0-9a-f]*}}
-# SEC-ORDER: 4 .shstrtab     00000032 {{[0-9a-f]*}}
+# SEC-ORDER: 4 .shstrtab     0000003c {{[0-9a-f]*}}
 # SEC-ORDER: 5 .symtab       00000030 {{[0-9a-f]*}}
 # SEC-ORDER: 6 .strtab       00000008 {{[0-9a-f]*}}
 # SEC-ORDER: 7 .data         00000020 {{[0-9a-f]*}} DATA
@@ -63,7 +63,7 @@
 # SEC-SWAP-NAMES: 4 .bss          00000002 {{[0-9a-f]*}} BSS
 # SEC-SWAP-NAMES: 5 .shstrtab     00000002 {{[0-9a-f]*}}
 # SEC-SWAP-NAMES: 6 .symtab       00000030 {{[0-9a-f]*}}
-# SEC-SWAP-NAMES: 7 .shstrtab     00000032 {{[0-9a-f]*}}
+# SEC-SWAP-NAMES: 7 .shstrtab     0000003c {{[0-9a-f]*}}
 # SEC-SWAP-NAMES: 8 .strtab       00000008 {{[0-9a-f]*}}
 
 # .shstrtab from the input object file is discarded.
@@ -100,7 +100,7 @@
 # SEC-MULTI: 3 .bss          00000002 {{[0-9a-f]*}} BSS
 # SEC-MULTI: 4 .shstrtab     00000002 {{[0-9a-f]*}}
 # SEC-MULTI: 5 .symtab       00000030 {{[0-9a-f]*}}
-# SEC-MULTI: 6 .shstrtab     0000002c {{[0-9a-f]*}}
+# SEC-MULTI: 6 .shstrtab     00000036 {{[0-9a-f]*}}
 # SEC-MULTI: 7 .strtab       00000008 {{[0-9a-f]*}}
 
 .globl _start;

Modified: lld/trunk/test/ELF/string-table.s
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/ELF/string-table.s?rev=257017&r1=257016&r2=257017&view=diff
==============================================================================
--- lld/trunk/test/ELF/string-table.s (original)
+++ lld/trunk/test/ELF/string-table.s Wed Jan  6 20:35:32 2016
@@ -58,9 +58,10 @@ _start:
 // CHECK-NEXT:  AddressAlignment: 1
 // CHECK-NEXT:  EntrySize: 0
 // CHECK-NEXT:  SectionData (
-// CHECK-NEXT:    0000: 002E7465 78740066 6F6F6261 72002E73  |..text.foobar..s|
-// CHECK-NEXT:    0010: 68737472 74616200 2E737472 74616200  |hstrtab..strtab.|
-// CHECK-NEXT:    0020: 2E73796D 74616200                    |.symtab.|
+// CHECK-NEXT:    0000: 00626172 002E7465 78740066 6F6F6261  |.bar..text.fooba|
+// CHECK-NEXT:    0010: 7200666F 6F626172 00666F6F 62617200  |r.foobar.foobar.|
+// CHECK-NEXT:    0020: 2E73796D 74616200 2E736873 74727461  |.symtab..shstrta|
+// CHECK-NEXT:    0030: 62002E73 74727461 6200               |b..strtab.|
 // CHECK-NEXT:  )
 // CHECK-NEXT:}
 // CHECK:        Name: .strtab
@@ -75,6 +76,6 @@ _start:
 // CHECK-NEXT:   AddressAlignment: 1
 // CHECK-NEXT:   EntrySize: 0
 // CHECK-NEXT:   SectionData (
-// CHECK-NEXT:     0000: 005F7374 61727400 666F6F62 617200    |._start.foobar.|
+// CHECK-NEXT:     0000: 00666F6F 62617200 5F737461 727400 |.foobar._start.|
 // CHECK-NEXT:   )
 // CHECK-NEXT: }




More information about the llvm-commits mailing list