<div dir="ltr"><div>Hi,</div><div><br></div><div>MSan is unhappy about this change:</div><div><br></div><a href="http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux-bootstrap-msan/builds/5844/steps/check-lld%20msan/logs/stdio">http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux-bootstrap-msan/builds/5844/steps/check-lld%20msan/logs/stdio</a><br><div><br></div><div><div>==43923==WARNING: MemorySanitizer: use-of-uninitialized-value</div><div> #0 0xa4087f in lld::elf::GdbIndexSection::GdbIndexSection(std::__1::vector<lld::elf::GdbIndexChunk, std::__1::allocator<lld::elf::GdbIndexChunk> >&&) /b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/ELF/SyntheticSections.cpp:2432:39</div><div> #1 0xa474d4 in lld::elf::GdbIndexSection* lld::make<lld::elf::GdbIndexSection, std::__1::vector<lld::elf::GdbIndexChunk, std::__1::allocator<lld::elf::GdbIndexChunk> > >(std::__1::vector<lld::elf::GdbIndexChunk, std::__1::allocator<lld::elf::GdbIndexChunk> >&&) /b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/include/lld/Common/Memory.h:55:39</div><div> #2 0xa483c3 in lld::elf::GdbIndexSection* lld::elf::createGdbIndex<llvm::object::ELFType<(llvm::support::endianness)1, true> >() /b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/ELF/SyntheticSections.cpp:2362:10</div><div> #3 0xb5513e in createSyntheticSections<llvm::object::ELFType<llvm::support::little, true> > /b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/ELF/Writer.cpp:373:21</div><div> #4 0xb5513e in (anonymous namespace)::Writer<llvm::object::ELFType<(llvm::support::endianness)1, true> >::run() /b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/ELF/Writer.cpp:426</div><div> #5 0xb52ee4 in void lld::elf::writeResult<llvm::object::ELFType<(llvm::support::endianness)1, true> >() /b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/ELF/Writer.cpp:143:64</div><div> #6 0x84e4f6 in void lld::elf::LinkerDriver::link<llvm::object::ELFType<(llvm::support::endianness)1, true> >(llvm::opt::InputArgList&) /b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/ELF/Driver.cpp:1413:3</div><div> #7 0x832b56 in lld::elf::LinkerDriver::main(llvm::ArrayRef<char const*>) /b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/ELF/Driver.cpp:422:5</div><div> #8 0x82eedd in lld::elf::link(llvm::ArrayRef<char const*>, bool, llvm::raw_ostream&) /b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/ELF/Driver.cpp:98:11</div><div> #9 0x5d350e in main /b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/tools/lld/lld.cpp:132:13</div><div> #10 0x7f157cd712e0 in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x202e0)</div><div> #11 0x55eca9 in _start (/b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm_build_msan/bin/lld+0x55eca9)</div></div></div><div class="gmail_extra"><br><div class="gmail_quote">On Tue, Jul 10, 2018 at 6:49 AM, Rui Ueyama via llvm-commits <span dir="ltr"><<a href="mailto:llvm-commits@lists.llvm.org" target="_blank">llvm-commits@lists.llvm.org</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Author: ruiu<br>
Date: Tue Jul 10 06:49:13 2018<br>
New Revision: 336672<br>
<br>
URL: <a href="http://llvm.org/viewvc/llvm-project?rev=336672&view=rev" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project?rev=336672&view=rev</a><br>
Log:<br>
Reduce memory usage when creating .gdb_index. NFC.<br>
<br>
.gdb_index sections can be very large. When you are compiling<br>
multi-gibibyte executables, they can be larger than 1 GiB. The previous<br>
implementation of .gdb_index seems to consume too much memory.<br>
<br>
This patch reduces memory consumption by eliminating temporary objects.<br>
In one experiment, memory consumption of GdbIndexSection class is<br>
reduced from 962 MiB to 228 MiB when creating a .gdb_index of 1350 GiB.<br>
<br>
Differential Revision: <a href="https://reviews.llvm.org/D49094" rel="noreferrer" target="_blank">https://reviews.llvm.org/<wbr>D49094</a><br>
<br>
Modified:<br>
lld/trunk/ELF/<wbr>SyntheticSections.cpp<br>
lld/trunk/ELF/<wbr>SyntheticSections.h<br>
<br>
Modified: lld/trunk/ELF/<wbr>SyntheticSections.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/SyntheticSections.cpp?rev=336672&r1=336671&r2=336672&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/lld/trunk/ELF/<wbr>SyntheticSections.cpp?rev=<wbr>336672&r1=336671&r2=336672&<wbr>view=diff</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- lld/trunk/ELF/<wbr>SyntheticSections.cpp (original)<br>
+++ lld/trunk/ELF/<wbr>SyntheticSections.cpp Tue Jul 10 06:49:13 2018<br>
@@ -52,6 +52,7 @@ using namespace llvm::support;<br>
using namespace lld;<br>
using namespace lld::elf;<br>
<br>
+using llvm::support::endian::<wbr>read32le;<br>
using llvm::support::endian::<wbr>write32le;<br>
using llvm::support::endian::<wbr>write64le;<br>
<br>
@@ -2311,19 +2312,17 @@ readAddressAreas(DWARFContext &Dwarf, In<br>
}<br>
<br>
static std::vector<GdbIndexChunk::<wbr>NameTypeEntry><br>
-readPubNamesAndTypes(<wbr>DWARFContext &Dwarf) {<br>
+readPubNamesAndTypes(<wbr>DWARFContext &Dwarf, uint32_t Idx) {<br>
StringRef Sec1 = Dwarf.getDWARFObj().<wbr>getGnuPubNamesSection();<br>
StringRef Sec2 = Dwarf.getDWARFObj().<wbr>getGnuPubTypesSection();<br>
<br>
std::vector<GdbIndexChunk::<wbr>NameTypeEntry> Ret;<br>
for (StringRef Sec : {Sec1, Sec2}) {<br>
DWARFDebugPubTable Table(Sec, Config->IsLE, true);<br>
- for (const DWARFDebugPubTable::Set &Set : Table.getData()) {<br>
- for (const DWARFDebugPubTable::Entry &Ent : Set.Entries) {<br>
- CachedHashStringRef S(Ent.Name, computeGdbHash(Ent.Name));<br>
- Ret.push_back({S, Ent.Descriptor.toBits()});<br>
- }<br>
- }<br>
+ for (const DWARFDebugPubTable::Set &Set : Table.getData())<br>
+ for (const DWARFDebugPubTable::Entry &Ent : Set.Entries)<br>
+ Ret.push_back({{Ent.Name, computeGdbHash(Ent.Name)},<br>
+ (Ent.Descriptor.toBits() << 24) | Idx});<br>
}<br>
return Ret;<br>
}<br>
@@ -2337,43 +2336,6 @@ static std::vector<InputSection *> getDe<br>
return Ret;<br>
}<br>
<br>
-void GdbIndexSection::fixCuIndex() {<br>
- uint32_t Idx = 0;<br>
- for (GdbIndexChunk &Chunk : Chunks) {<br>
- for (GdbIndexChunk::AddressEntry &Ent : Chunk.AddressAreas)<br>
- Ent.CuIndex += Idx;<br>
- Idx += Chunk.CompilationUnits.size();<br>
- }<br>
-}<br>
-<br>
-std::vector<std::vector<<wbr>uint32_t>> GdbIndexSection::<wbr>createCuVectors() {<br>
- std::vector<std::vector<<wbr>uint32_t>> Ret;<br>
- uint32_t Idx = 0;<br>
- uint32_t Off = 0;<br>
-<br>
- for (GdbIndexChunk &Chunk : Chunks) {<br>
- for (GdbIndexChunk::NameTypeEntry &Ent : Chunk.NamesAndTypes) {<br>
- GdbSymbol *&Sym = Symbols[Ent.Name];<br>
- if (!Sym) {<br>
- Sym = make<GdbSymbol>(GdbSymbol{Ent.<wbr>Name.hash(), Off, Ret.size()});<br>
- Off += Ent.Name.size() + 1;<br>
- Ret.push_back({});<br>
- }<br>
-<br>
- // gcc 5.4.1 produces a buggy .debug_gnu_pubnames that contains<br>
- // duplicate entries, so we want to dedup them.<br>
- std::vector<uint32_t> &Vec = Ret[Sym->CuVectorIndex];<br>
- uint32_t Val = (Ent.Type << 24) | Idx;<br>
- if (Vec.empty() || Vec.back() != Val)<br>
- Vec.push_back(Val);<br>
- }<br>
- Idx += Chunk.CompilationUnits.size();<br>
- }<br>
-<br>
- StringPoolSize = Off;<br>
- return Ret;<br>
-}<br>
-<br>
template <class ELFT> GdbIndexSection *elf::createGdbIndex() {<br>
// Gather debug info to create a .gdb_index section.<br>
std::vector<InputSection *> Sections = getDebugInfoSections();<br>
@@ -2386,7 +2348,7 @@ template <class ELFT> GdbIndexSection *e<br>
Chunks[I].DebugInfoSec = Sections[I];<br>
Chunks[I].CompilationUnits = readCuList(Dwarf);<br>
Chunks[I].AddressAreas = readAddressAreas(Dwarf, Sections[I]);<br>
- Chunks[I].NamesAndTypes = readPubNamesAndTypes(Dwarf);<br>
+ Chunks[I].NamesAndTypes = readPubNamesAndTypes(Dwarf, I);<br>
});<br>
<br>
// .debug_gnu_pub{names,types} are useless in executables.<br>
@@ -2414,37 +2376,48 @@ static size_t getAddressAreaSize(ArrayRe<br>
return Ret;<br>
}<br>
<br>
-std::vector<GdbSymbol *> GdbIndexSection::<wbr>createGdbSymtab() {<br>
- uint32_t Size = NextPowerOf2(Symbols.size() * 4 / 3);<br>
- if (Size < 1024)<br>
- Size = 1024;<br>
-<br>
- uint32_t Mask = Size - 1;<br>
- std::vector<GdbSymbol *> Ret(Size);<br>
-<br>
- for (auto &KV : Symbols) {<br>
- GdbSymbol *Sym = KV.second;<br>
- uint32_t I = Sym->NameHash & Mask;<br>
- uint32_t Step = ((Sym->NameHash * 17) & Mask) | 1;<br>
-<br>
- while (Ret[I])<br>
- I = (I + Step) & Mask;<br>
- Ret[I] = Sym;<br>
- }<br>
- return Ret;<br>
+// Returns the desired size of an on-disk hash table for a .gdb_index section.<br>
+// There's a tradeoff between size and collision rate. We aim 75% utilization.<br>
+static size_t getSymtabSize(size_t NumSymbols) {<br>
+ return std::max<size_t>(NextPowerOf2(<wbr>NumSymbols * 4 / 3), 1024);<br>
}<br>
<br>
GdbIndexSection::<wbr>GdbIndexSection(std::vector<<wbr>GdbIndexChunk> &&C)<br>
: SyntheticSection(0, SHT_PROGBITS, 1, ".gdb_index"), Chunks(std::move(C)) {<br>
- fixCuIndex();<br>
- CuVectors = createCuVectors();<br>
- GdbSymtab = createGdbSymtab();<br>
+ // A map to identify duplicate symbols.<br>
+ DenseMap<CachedHashStringRef, size_t> Map;<br>
+<br>
+ // Initialize Symbols and CuVectors while deduplicating symbols by name.<br>
+ for (GdbIndexChunk &Chunk : Chunks) {<br>
+ for (GdbIndexChunk::NameTypeEntry &Ent : Chunk.NamesAndTypes) {<br>
+ CachedHashStringRef S = Ent.Name;<br>
+ size_t &Idx = Map[S];<br>
+<br>
+ if (!Idx) {<br>
+ Idx = Symbols.size() + 1;<br>
+ Symbols.push_back({S, static_cast<uint32_t>(<wbr>StringPoolSize),<br>
+ static_cast<uint32_t>(Symbols.<wbr>size())});<br>
+ StringPoolSize += S.size() + 1;<br>
+ CuVectors.push_back({});<br>
+ }<br>
+<br>
+ // gcc 5.4.1 produces a buggy .debug_gnu_pubnames that contains<br>
+ // duplicate entries, so we want to dedup them.<br>
+ std::vector<uint32_t> &Vec = CuVectors[Symbols[Idx - 1].CuVectorIdx];<br>
+ if (Vec.empty() || Vec.back() != Ent.Type)<br>
+ Vec.push_back(Ent.Type);<br>
+ }<br>
+<br>
+ // NamesAndTypes is useless beyond this point, so clear it to save memory.<br>
+ Chunk.NamesAndTypes = {};<br>
+ }<br>
<br>
// Compute offsets early to know the section size.<br>
// Each chunk size needs to be in sync with what we write in writeTo.<br>
CuTypesOffset = CuListOffset + getCuSize(Chunks) * 16;<br>
SymtabOffset = CuTypesOffset + getAddressAreaSize(Chunks) * 20;<br>
- ConstantPoolOffset = SymtabOffset + GdbSymtab.size() * 8;<br>
+ SymtabSize = getSymtabSize(Symbols.size());<br>
+ ConstantPoolOffset = SymtabOffset + SymtabSize * 8;<br>
<br>
for (ArrayRef<uint32_t> Vec : CuVectors) {<br>
CuVectorOffsets.push_back(<wbr>CuVectorsPoolSize);<br>
@@ -2480,25 +2453,35 @@ void GdbIndexSection::writeTo(<wbr>uint8_t *B<br>
}<br>
<br>
// Write the address area.<br>
- for (GdbIndexChunk &D : Chunks) {<br>
- for (GdbIndexChunk::AddressEntry &E : D.AddressAreas) {<br>
+ uint32_t Idx = 0;<br>
+ for (GdbIndexChunk &Chunk : Chunks) {<br>
+ for (GdbIndexChunk::AddressEntry &E : Chunk.AddressAreas) {<br>
uint64_t BaseAddr = E.Section->getVA(0);<br>
write64le(Buf, BaseAddr + E.LowAddress);<br>
write64le(Buf + 8, BaseAddr + E.HighAddress);<br>
- write32le(Buf + 16, E.CuIndex);<br>
+ write32le(Buf + 16, E.CuIndex + Idx);<br>
Buf += 20;<br>
}<br>
+ Idx += Chunk.CompilationUnits.size();<br>
}<br>
<br>
- // Write the symbol table.<br>
- for (GdbSymbol *Sym : GdbSymtab) {<br>
- if (Sym) {<br>
- write32le(Buf, CuVectorsPoolSize + Sym->NameOffset);<br>
- write32le(Buf + 4, CuVectorOffsets[Sym-><wbr>CuVectorIndex]);<br>
- }<br>
- Buf += 8;<br>
+ // Write the on-disk open-addressing hash table containing symbols.<br>
+ for (GdbSymbol &Sym : Symbols) {<br>
+ uint32_t Mask = SymtabSize - 1;<br>
+ uint32_t H = Sym.Name.hash();<br>
+ uint32_t I = H & Mask;<br>
+ uint32_t Step = ((H * 17) & Mask) | 1;<br>
+<br>
+ while (read32le(Buf + I * 8))<br>
+ I = (I + Step) & Mask;<br>
+<br>
+ uint8_t *P = Buf + I * 8;<br>
+ write32le(P, CuVectorsPoolSize + Sym.OutputOff);<br>
+ write32le(P + 4, CuVectorOffsets[Sym.<wbr>CuVectorIdx]);<br>
}<br>
<br>
+ Buf += SymtabSize * 8;<br>
+<br>
// Write the CU vectors.<br>
for (ArrayRef<uint32_t> Vec : CuVectors) {<br>
write32le(Buf, Vec.size());<br>
@@ -2510,13 +2493,8 @@ void GdbIndexSection::writeTo(<wbr>uint8_t *B<br>
}<br>
<br>
// Write the string pool.<br>
- for (auto &KV : Symbols) {<br>
- CachedHashStringRef S = KV.first;<br>
- GdbSymbol *Sym = KV.second;<br>
- size_t Off = Sym->NameOffset;<br>
- memcpy(Buf + Off, S.val().data(), S.size());<br>
- Buf[Off + S.size()] = '\0';<br>
- }<br>
+ for (GdbSymbol &Sym : Symbols)<br>
+ memcpy(Buf + Sym.OutputOff, Sym.Name.val().data(), Sym.Name.size());<br>
}<br>
<br>
bool GdbIndexSection::empty() const { return !Out::DebugInfo; }<br>
<br>
Modified: lld/trunk/ELF/<wbr>SyntheticSections.h<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/SyntheticSections.h?rev=336672&r1=336671&r2=336672&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/lld/trunk/ELF/<wbr>SyntheticSections.h?rev=<wbr>336672&r1=336671&r2=336672&<wbr>view=diff</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- lld/trunk/ELF/<wbr>SyntheticSections.h (original)<br>
+++ lld/trunk/ELF/<wbr>SyntheticSections.h Tue Jul 10 06:49:13 2018<br>
@@ -668,7 +668,7 @@ struct GdbIndexChunk {<br>
<br>
struct NameTypeEntry {<br>
llvm::CachedHashStringRef Name;<br>
- uint8_t Type;<br>
+ uint32_t Type;<br>
};<br>
<br>
InputSection *DebugInfoSec;<br>
@@ -679,9 +679,9 @@ struct GdbIndexChunk {<br>
<br>
// The symbol type for the .gdb_index section.<br>
struct GdbSymbol {<br>
- uint32_t NameHash;<br>
- size_t NameOffset;<br>
- size_t CuVectorIndex;<br>
+ llvm::CachedHashStringRef Name;<br>
+ uint32_t OutputOff;<br>
+ uint32_t CuVectorIdx;<br>
};<br>
<br>
class GdbIndexSection final : public SyntheticSection {<br>
@@ -692,19 +692,12 @@ public:<br>
bool empty() const override;<br>
<br>
private:<br>
- void fixCuIndex();<br>
- std::vector<std::vector<<wbr>uint32_t>> createCuVectors();<br>
- std::vector<GdbSymbol *> createGdbSymtab();<br>
-<br>
// A symbol table for this .gdb_index section.<br>
- std::vector<GdbSymbol *> GdbSymtab;<br>
+ std::vector<GdbSymbol> Symbols;<br>
<br>
// CU vector is a part of constant pool area of section.<br>
std::vector<std::vector<<wbr>uint32_t>> CuVectors;<br>
<br>
- // Symbol table contents.<br>
- llvm::DenseMap<llvm::<wbr>CachedHashStringRef, GdbSymbol *> Symbols;<br>
-<br>
// Each chunk contains information gathered from a debug sections of single<br>
// object and used to build different areas of gdb index.<br>
std::vector<GdbIndexChunk> Chunks;<br>
@@ -712,12 +705,13 @@ private:<br>
uint64_t CuListOffset = 24;<br>
uint64_t CuTypesOffset;<br>
uint64_t SymtabOffset;<br>
+ uint64_t SymtabSize = 0;<br>
uint64_t ConstantPoolOffset;<br>
uint64_t CuVectorsPoolSize = 0;<br>
uint64_t StringPoolSize;<br>
uint64_t TotalSize;<br>
<br>
- std::vector<size_t> CuVectorOffsets;<br>
+ std::vector<uint32_t> CuVectorOffsets;<br>
};<br>
<br>
template <class ELFT> GdbIndexSection *createGdbIndex();<br>
<br>
<br>
______________________________<wbr>_________________<br>
llvm-commits mailing list<br>
<a href="mailto:llvm-commits@lists.llvm.org">llvm-commits@lists.llvm.org</a><br>
<a href="http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits" rel="noreferrer" target="_blank">http://lists.llvm.org/cgi-bin/<wbr>mailman/listinfo/llvm-commits</a><br>
</blockquote></div><br></div>