<div dir="ltr"><div>Hi,</div><div><br></div><div>MSan is unhappy about this change:</div><div><br></div><a href="http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux-bootstrap-msan/builds/5844/steps/check-lld%20msan/logs/stdio">http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux-bootstrap-msan/builds/5844/steps/check-lld%20msan/logs/stdio</a><br><div><br></div><div><div>==43923==WARNING: MemorySanitizer: use-of-uninitialized-value</div><div>    #0 0xa4087f in lld::elf::GdbIndexSection::GdbIndexSection(std::__1::vector<lld::elf::GdbIndexChunk, std::__1::allocator<lld::elf::GdbIndexChunk> >&&) /b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/ELF/SyntheticSections.cpp:2432:39</div><div>    #1 0xa474d4 in lld::elf::GdbIndexSection* lld::make<lld::elf::GdbIndexSection, std::__1::vector<lld::elf::GdbIndexChunk, std::__1::allocator<lld::elf::GdbIndexChunk> > >(std::__1::vector<lld::elf::GdbIndexChunk, std::__1::allocator<lld::elf::GdbIndexChunk> >&&) /b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/include/lld/Common/Memory.h:55:39</div><div>    #2 0xa483c3 in lld::elf::GdbIndexSection* lld::elf::createGdbIndex<llvm::object::ELFType<(llvm::support::endianness)1, true> >() /b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/ELF/SyntheticSections.cpp:2362:10</div><div>    #3 0xb5513e in createSyntheticSections<llvm::object::ELFType<llvm::support::little, true> > /b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/ELF/Writer.cpp:373:21</div><div>    #4 0xb5513e in (anonymous namespace)::Writer<llvm::object::ELFType<(llvm::support::endianness)1, true> >::run() /b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/ELF/Writer.cpp:426</div><div>    #5 0xb52ee4 in void lld::elf::writeResult<llvm::object::ELFType<(llvm::support::endianness)1, true> >() /b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/ELF/Writer.cpp:143:64</div><div>    #6 0x84e4f6 in void lld::elf::LinkerDriver::link<llvm::object::ELFType<(llvm::support::endianness)1, true> >(llvm::opt::InputArgList&) /b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/ELF/Driver.cpp:1413:3</div><div>    #7 0x832b56 in lld::elf::LinkerDriver::main(llvm::ArrayRef<char const*>) /b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/ELF/Driver.cpp:422:5</div><div>    #8 0x82eedd in lld::elf::link(llvm::ArrayRef<char const*>, bool, llvm::raw_ostream&) /b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/ELF/Driver.cpp:98:11</div><div>    #9 0x5d350e in main /b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/tools/lld/lld.cpp:132:13</div><div>    #10 0x7f157cd712e0 in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x202e0)</div><div>    #11 0x55eca9 in _start (/b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm_build_msan/bin/lld+0x55eca9)</div></div></div><div class="gmail_extra"><br><div class="gmail_quote">On Tue, Jul 10, 2018 at 6:49 AM, Rui Ueyama via llvm-commits <span dir="ltr"><<a href="mailto:llvm-commits@lists.llvm.org" target="_blank">llvm-commits@lists.llvm.org</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Author: ruiu<br>
Date: Tue Jul 10 06:49:13 2018<br>
New Revision: 336672<br>
<br>
URL: <a href="http://llvm.org/viewvc/llvm-project?rev=336672&view=rev" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project?rev=336672&view=rev</a><br>
Log:<br>
Reduce memory usage when creating .gdb_index. NFC.<br>
<br>
.gdb_index sections can be very large. When you are compiling<br>
multi-gibibyte executables, they can be larger than 1 GiB. The previous<br>
implementation of .gdb_index seems to consume too much memory.<br>
<br>
This patch reduces memory consumption by eliminating temporary objects.<br>
In one experiment, memory consumption of GdbIndexSection class is<br>
reduced from 962 MiB to 228 MiB when creating a .gdb_index of 1350 GiB.<br>
<br>
Differential Revision: <a href="https://reviews.llvm.org/D49094" rel="noreferrer" target="_blank">https://reviews.llvm.org/<wbr>D49094</a><br>
<br>
Modified:<br>
    lld/trunk/ELF/<wbr>SyntheticSections.cpp<br>
    lld/trunk/ELF/<wbr>SyntheticSections.h<br>
<br>
Modified: lld/trunk/ELF/<wbr>SyntheticSections.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/SyntheticSections.cpp?rev=336672&r1=336671&r2=336672&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/lld/trunk/ELF/<wbr>SyntheticSections.cpp?rev=<wbr>336672&r1=336671&r2=336672&<wbr>view=diff</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- lld/trunk/ELF/<wbr>SyntheticSections.cpp (original)<br>
+++ lld/trunk/ELF/<wbr>SyntheticSections.cpp Tue Jul 10 06:49:13 2018<br>
@@ -52,6 +52,7 @@ using namespace llvm::support;<br>
 using namespace lld;<br>
 using namespace lld::elf;<br>
<br>
+using llvm::support::endian::<wbr>read32le;<br>
 using llvm::support::endian::<wbr>write32le;<br>
 using llvm::support::endian::<wbr>write64le;<br>
<br>
@@ -2311,19 +2312,17 @@ readAddressAreas(DWARFContext &Dwarf, In<br>
 }<br>
<br>
 static std::vector<GdbIndexChunk::<wbr>NameTypeEntry><br>
-readPubNamesAndTypes(<wbr>DWARFContext &Dwarf) {<br>
+readPubNamesAndTypes(<wbr>DWARFContext &Dwarf, uint32_t Idx) {<br>
   StringRef Sec1 = Dwarf.getDWARFObj().<wbr>getGnuPubNamesSection();<br>
   StringRef Sec2 = Dwarf.getDWARFObj().<wbr>getGnuPubTypesSection();<br>
<br>
   std::vector<GdbIndexChunk::<wbr>NameTypeEntry> Ret;<br>
   for (StringRef Sec : {Sec1, Sec2}) {<br>
     DWARFDebugPubTable Table(Sec, Config->IsLE, true);<br>
-    for (const DWARFDebugPubTable::Set &Set : Table.getData()) {<br>
-      for (const DWARFDebugPubTable::Entry &Ent : Set.Entries) {<br>
-        CachedHashStringRef S(Ent.Name, computeGdbHash(Ent.Name));<br>
-        Ret.push_back({S, Ent.Descriptor.toBits()});<br>
-      }<br>
-    }<br>
+    for (const DWARFDebugPubTable::Set &Set : Table.getData())<br>
+      for (const DWARFDebugPubTable::Entry &Ent : Set.Entries)<br>
+        Ret.push_back({{Ent.Name, computeGdbHash(Ent.Name)},<br>
+                       (Ent.Descriptor.toBits() << 24) | Idx});<br>
   }<br>
   return Ret;<br>
 }<br>
@@ -2337,43 +2336,6 @@ static std::vector<InputSection *> getDe<br>
   return Ret;<br>
 }<br>
<br>
-void GdbIndexSection::fixCuIndex() {<br>
-  uint32_t Idx = 0;<br>
-  for (GdbIndexChunk &Chunk : Chunks) {<br>
-    for (GdbIndexChunk::AddressEntry &Ent : Chunk.AddressAreas)<br>
-      Ent.CuIndex += Idx;<br>
-    Idx += Chunk.CompilationUnits.size();<br>
-  }<br>
-}<br>
-<br>
-std::vector<std::vector<<wbr>uint32_t>> GdbIndexSection::<wbr>createCuVectors() {<br>
-  std::vector<std::vector<<wbr>uint32_t>> Ret;<br>
-  uint32_t Idx = 0;<br>
-  uint32_t Off = 0;<br>
-<br>
-  for (GdbIndexChunk &Chunk : Chunks) {<br>
-    for (GdbIndexChunk::NameTypeEntry &Ent : Chunk.NamesAndTypes) {<br>
-      GdbSymbol *&Sym = Symbols[Ent.Name];<br>
-      if (!Sym) {<br>
-        Sym = make<GdbSymbol>(GdbSymbol{Ent.<wbr>Name.hash(), Off, Ret.size()});<br>
-        Off += Ent.Name.size() + 1;<br>
-        Ret.push_back({});<br>
-      }<br>
-<br>
-      // gcc 5.4.1 produces a buggy .debug_gnu_pubnames that contains<br>
-      // duplicate entries, so we want to dedup them.<br>
-      std::vector<uint32_t> &Vec = Ret[Sym->CuVectorIndex];<br>
-      uint32_t Val = (Ent.Type << 24) | Idx;<br>
-      if (Vec.empty() || Vec.back() != Val)<br>
-        Vec.push_back(Val);<br>
-    }<br>
-    Idx += Chunk.CompilationUnits.size();<br>
-  }<br>
-<br>
-  StringPoolSize = Off;<br>
-  return Ret;<br>
-}<br>
-<br>
 template <class ELFT> GdbIndexSection *elf::createGdbIndex() {<br>
   // Gather debug info to create a .gdb_index section.<br>
   std::vector<InputSection *> Sections = getDebugInfoSections();<br>
@@ -2386,7 +2348,7 @@ template <class ELFT> GdbIndexSection *e<br>
     Chunks[I].DebugInfoSec = Sections[I];<br>
     Chunks[I].CompilationUnits = readCuList(Dwarf);<br>
     Chunks[I].AddressAreas = readAddressAreas(Dwarf, Sections[I]);<br>
-    Chunks[I].NamesAndTypes = readPubNamesAndTypes(Dwarf);<br>
+    Chunks[I].NamesAndTypes = readPubNamesAndTypes(Dwarf, I);<br>
   });<br>
<br>
   // .debug_gnu_pub{names,types} are useless in executables.<br>
@@ -2414,37 +2376,48 @@ static size_t getAddressAreaSize(ArrayRe<br>
   return Ret;<br>
 }<br>
<br>
-std::vector<GdbSymbol *> GdbIndexSection::<wbr>createGdbSymtab() {<br>
-  uint32_t Size = NextPowerOf2(Symbols.size() * 4 / 3);<br>
-  if (Size < 1024)<br>
-    Size = 1024;<br>
-<br>
-  uint32_t Mask = Size - 1;<br>
-  std::vector<GdbSymbol *> Ret(Size);<br>
-<br>
-  for (auto &KV : Symbols) {<br>
-    GdbSymbol *Sym = KV.second;<br>
-    uint32_t I = Sym->NameHash & Mask;<br>
-    uint32_t Step = ((Sym->NameHash * 17) & Mask) | 1;<br>
-<br>
-    while (Ret[I])<br>
-      I = (I + Step) & Mask;<br>
-    Ret[I] = Sym;<br>
-  }<br>
-  return Ret;<br>
+// Returns the desired size of an on-disk hash table for a .gdb_index section.<br>
+// There's a tradeoff between size and collision rate. We aim 75% utilization.<br>
+static size_t getSymtabSize(size_t NumSymbols) {<br>
+  return std::max<size_t>(NextPowerOf2(<wbr>NumSymbols * 4 / 3), 1024);<br>
 }<br>
<br>
 GdbIndexSection::<wbr>GdbIndexSection(std::vector<<wbr>GdbIndexChunk> &&C)<br>
     : SyntheticSection(0, SHT_PROGBITS, 1, ".gdb_index"), Chunks(std::move(C)) {<br>
-  fixCuIndex();<br>
-  CuVectors = createCuVectors();<br>
-  GdbSymtab = createGdbSymtab();<br>
+  // A map to identify duplicate symbols.<br>
+  DenseMap<CachedHashStringRef, size_t> Map;<br>
+<br>
+  // Initialize Symbols and CuVectors while deduplicating symbols by name.<br>
+  for (GdbIndexChunk &Chunk : Chunks) {<br>
+    for (GdbIndexChunk::NameTypeEntry &Ent : Chunk.NamesAndTypes) {<br>
+      CachedHashStringRef S = Ent.Name;<br>
+      size_t &Idx = Map[S];<br>
+<br>
+      if (!Idx) {<br>
+        Idx = Symbols.size() + 1;<br>
+        Symbols.push_back({S, static_cast<uint32_t>(<wbr>StringPoolSize),<br>
+                           static_cast<uint32_t>(Symbols.<wbr>size())});<br>
+        StringPoolSize += S.size() + 1;<br>
+        CuVectors.push_back({});<br>
+      }<br>
+<br>
+      // gcc 5.4.1 produces a buggy .debug_gnu_pubnames that contains<br>
+      // duplicate entries, so we want to dedup them.<br>
+      std::vector<uint32_t> &Vec = CuVectors[Symbols[Idx - 1].CuVectorIdx];<br>
+      if (Vec.empty() || Vec.back() != Ent.Type)<br>
+        Vec.push_back(Ent.Type);<br>
+    }<br>
+<br>
+    // NamesAndTypes is useless beyond this point, so clear it to save memory.<br>
+    Chunk.NamesAndTypes = {};<br>
+  }<br>
<br>
   // Compute offsets early to know the section size.<br>
   // Each chunk size needs to be in sync with what we write in writeTo.<br>
   CuTypesOffset = CuListOffset + getCuSize(Chunks) * 16;<br>
   SymtabOffset = CuTypesOffset + getAddressAreaSize(Chunks) * 20;<br>
-  ConstantPoolOffset = SymtabOffset + GdbSymtab.size() * 8;<br>
+  SymtabSize = getSymtabSize(Symbols.size());<br>
+  ConstantPoolOffset = SymtabOffset + SymtabSize * 8;<br>
<br>
   for (ArrayRef<uint32_t> Vec : CuVectors) {<br>
     CuVectorOffsets.push_back(<wbr>CuVectorsPoolSize);<br>
@@ -2480,25 +2453,35 @@ void GdbIndexSection::writeTo(<wbr>uint8_t *B<br>
   }<br>
<br>
   // Write the address area.<br>
-  for (GdbIndexChunk &D : Chunks) {<br>
-    for (GdbIndexChunk::AddressEntry &E : D.AddressAreas) {<br>
+  uint32_t Idx = 0;<br>
+  for (GdbIndexChunk &Chunk : Chunks) {<br>
+    for (GdbIndexChunk::AddressEntry &E : Chunk.AddressAreas) {<br>
       uint64_t BaseAddr = E.Section->getVA(0);<br>
       write64le(Buf, BaseAddr + E.LowAddress);<br>
       write64le(Buf + 8, BaseAddr + E.HighAddress);<br>
-      write32le(Buf + 16, E.CuIndex);<br>
+      write32le(Buf + 16, E.CuIndex + Idx);<br>
       Buf += 20;<br>
     }<br>
+    Idx += Chunk.CompilationUnits.size();<br>
   }<br>
<br>
-  // Write the symbol table.<br>
-  for (GdbSymbol *Sym : GdbSymtab) {<br>
-    if (Sym) {<br>
-      write32le(Buf, CuVectorsPoolSize + Sym->NameOffset);<br>
-      write32le(Buf + 4, CuVectorOffsets[Sym-><wbr>CuVectorIndex]);<br>
-    }<br>
-    Buf += 8;<br>
+  // Write the on-disk open-addressing hash table containing symbols.<br>
+  for (GdbSymbol &Sym : Symbols) {<br>
+    uint32_t Mask = SymtabSize - 1;<br>
+    uint32_t H = Sym.Name.hash();<br>
+    uint32_t I = H & Mask;<br>
+    uint32_t Step = ((H * 17) & Mask) | 1;<br>
+<br>
+    while (read32le(Buf + I * 8))<br>
+      I = (I + Step) & Mask;<br>
+<br>
+    uint8_t *P = Buf + I * 8;<br>
+    write32le(P, CuVectorsPoolSize + Sym.OutputOff);<br>
+    write32le(P + 4, CuVectorOffsets[Sym.<wbr>CuVectorIdx]);<br>
   }<br>
<br>
+  Buf += SymtabSize * 8;<br>
+<br>
   // Write the CU vectors.<br>
   for (ArrayRef<uint32_t> Vec : CuVectors) {<br>
     write32le(Buf, Vec.size());<br>
@@ -2510,13 +2493,8 @@ void GdbIndexSection::writeTo(<wbr>uint8_t *B<br>
   }<br>
<br>
   // Write the string pool.<br>
-  for (auto &KV : Symbols) {<br>
-    CachedHashStringRef S = KV.first;<br>
-    GdbSymbol *Sym = KV.second;<br>
-    size_t Off = Sym->NameOffset;<br>
-    memcpy(Buf + Off, S.val().data(), S.size());<br>
-    Buf[Off + S.size()] = '\0';<br>
-  }<br>
+  for (GdbSymbol &Sym : Symbols)<br>
+    memcpy(Buf + Sym.OutputOff, Sym.Name.val().data(), Sym.Name.size());<br>
 }<br>
<br>
 bool GdbIndexSection::empty() const { return !Out::DebugInfo; }<br>
<br>
Modified: lld/trunk/ELF/<wbr>SyntheticSections.h<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/SyntheticSections.h?rev=336672&r1=336671&r2=336672&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/lld/trunk/ELF/<wbr>SyntheticSections.h?rev=<wbr>336672&r1=336671&r2=336672&<wbr>view=diff</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- lld/trunk/ELF/<wbr>SyntheticSections.h (original)<br>
+++ lld/trunk/ELF/<wbr>SyntheticSections.h Tue Jul 10 06:49:13 2018<br>
@@ -668,7 +668,7 @@ struct GdbIndexChunk {<br>
<br>
   struct NameTypeEntry {<br>
     llvm::CachedHashStringRef Name;<br>
-    uint8_t Type;<br>
+    uint32_t Type;<br>
   };<br>
<br>
   InputSection *DebugInfoSec;<br>
@@ -679,9 +679,9 @@ struct GdbIndexChunk {<br>
<br>
 // The symbol type for the .gdb_index section.<br>
 struct GdbSymbol {<br>
-  uint32_t NameHash;<br>
-  size_t NameOffset;<br>
-  size_t CuVectorIndex;<br>
+  llvm::CachedHashStringRef Name;<br>
+  uint32_t OutputOff;<br>
+  uint32_t CuVectorIdx;<br>
 };<br>
<br>
 class GdbIndexSection final : public SyntheticSection {<br>
@@ -692,19 +692,12 @@ public:<br>
   bool empty() const override;<br>
<br>
 private:<br>
-  void fixCuIndex();<br>
-  std::vector<std::vector<<wbr>uint32_t>> createCuVectors();<br>
-  std::vector<GdbSymbol *> createGdbSymtab();<br>
-<br>
   // A symbol table for this .gdb_index section.<br>
-  std::vector<GdbSymbol *> GdbSymtab;<br>
+  std::vector<GdbSymbol> Symbols;<br>
<br>
   // CU vector is a part of constant pool area of section.<br>
   std::vector<std::vector<<wbr>uint32_t>> CuVectors;<br>
<br>
-  // Symbol table contents.<br>
-  llvm::DenseMap<llvm::<wbr>CachedHashStringRef, GdbSymbol *> Symbols;<br>
-<br>
   // Each chunk contains information gathered from a debug sections of single<br>
   // object and used to build different areas of gdb index.<br>
   std::vector<GdbIndexChunk> Chunks;<br>
@@ -712,12 +705,13 @@ private:<br>
   uint64_t CuListOffset = 24;<br>
   uint64_t CuTypesOffset;<br>
   uint64_t SymtabOffset;<br>
+  uint64_t SymtabSize = 0;<br>
   uint64_t ConstantPoolOffset;<br>
   uint64_t CuVectorsPoolSize = 0;<br>
   uint64_t StringPoolSize;<br>
   uint64_t TotalSize;<br>
<br>
-  std::vector<size_t> CuVectorOffsets;<br>
+  std::vector<uint32_t> CuVectorOffsets;<br>
 };<br>
<br>
 template <class ELFT> GdbIndexSection *createGdbIndex();<br>
<br>
<br>
______________________________<wbr>_________________<br>
llvm-commits mailing list<br>
<a href="mailto:llvm-commits@lists.llvm.org">llvm-commits@lists.llvm.org</a><br>
<a href="http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits" rel="noreferrer" target="_blank">http://lists.llvm.org/cgi-bin/<wbr>mailman/listinfo/llvm-commits</a><br>
</blockquote></div><br></div>