[lld] r336672 - Reduce memory usage when creating .gdb_index. NFC.
Rui Ueyama via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 10 11:32:47 PDT 2018
Sorry for the breakage. I believe that's fixed in r336700.
On Tue, Jul 10, 2018 at 11:30 AM Evgenii Stepanov <eugeni.stepanov at gmail.com>
wrote:
> Hi,
>
> MSan is unhappy about this change:
>
>
> http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux-bootstrap-msan/builds/5844/steps/check-lld%20msan/logs/stdio
>
> ==43923==WARNING: MemorySanitizer: use-of-uninitialized-value
> #0 0xa4087f in
> lld::elf::GdbIndexSection::GdbIndexSection(std::__1::vector<lld::elf::GdbIndexChunk,
> std::__1::allocator<lld::elf::GdbIndexChunk> >&&)
> /b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/ELF/SyntheticSections.cpp:2432:39
> #1 0xa474d4 in lld::elf::GdbIndexSection*
> lld::make<lld::elf::GdbIndexSection,
> std::__1::vector<lld::elf::GdbIndexChunk,
> std::__1::allocator<lld::elf::GdbIndexChunk> >
> >(std::__1::vector<lld::elf::GdbIndexChunk,
> std::__1::allocator<lld::elf::GdbIndexChunk> >&&)
> /b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/include/lld/Common/Memory.h:55:39
> #2 0xa483c3 in lld::elf::GdbIndexSection*
> lld::elf::createGdbIndex<llvm::object::ELFType<(llvm::support::endianness)1,
> true> >()
> /b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/ELF/SyntheticSections.cpp:2362:10
> #3 0xb5513e in
> createSyntheticSections<llvm::object::ELFType<llvm::support::little, true>
> >
> /b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/ELF/Writer.cpp:373:21
> #4 0xb5513e in (anonymous
> namespace)::Writer<llvm::object::ELFType<(llvm::support::endianness)1,
> true> >::run()
> /b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/ELF/Writer.cpp:426
> #5 0xb52ee4 in void
> lld::elf::writeResult<llvm::object::ELFType<(llvm::support::endianness)1,
> true> >()
> /b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/ELF/Writer.cpp:143:64
> #6 0x84e4f6 in void
> lld::elf::LinkerDriver::link<llvm::object::ELFType<(llvm::support::endianness)1,
> true> >(llvm::opt::InputArgList&)
> /b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/ELF/Driver.cpp:1413:3
> #7 0x832b56 in lld::elf::LinkerDriver::main(llvm::ArrayRef<char
> const*>)
> /b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/ELF/Driver.cpp:422:5
> #8 0x82eedd in lld::elf::link(llvm::ArrayRef<char const*>, bool,
> llvm::raw_ostream&)
> /b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/ELF/Driver.cpp:98:11
> #9 0x5d350e in main
> /b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/tools/lld/lld.cpp:132:13
> #10 0x7f157cd712e0 in __libc_start_main
> (/lib/x86_64-linux-gnu/libc.so.6+0x202e0)
> #11 0x55eca9 in _start
> (/b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm_build_msan/bin/lld+0x55eca9)
>
> On Tue, Jul 10, 2018 at 6:49 AM, Rui Ueyama via llvm-commits <
> llvm-commits at lists.llvm.org> wrote:
>
>> Author: ruiu
>> Date: Tue Jul 10 06:49:13 2018
>> New Revision: 336672
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=336672&view=rev
>> Log:
>> Reduce memory usage when creating .gdb_index. NFC.
>>
>> .gdb_index sections can be very large. When you are compiling
>> multi-gibibyte executables, they can be larger than 1 GiB. The previous
>> implementation of .gdb_index seems to consume too much memory.
>>
>> This patch reduces memory consumption by eliminating temporary objects.
>> In one experiment, memory consumption of GdbIndexSection class is
>> reduced from 962 MiB to 228 MiB when creating a .gdb_index of 1350 GiB.
>>
>> Differential Revision: https://reviews.llvm.org/D49094
>>
>> Modified:
>> lld/trunk/ELF/SyntheticSections.cpp
>> lld/trunk/ELF/SyntheticSections.h
>>
>> Modified: lld/trunk/ELF/SyntheticSections.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/SyntheticSections.cpp?rev=336672&r1=336671&r2=336672&view=diff
>>
>> ==============================================================================
>> --- lld/trunk/ELF/SyntheticSections.cpp (original)
>> +++ lld/trunk/ELF/SyntheticSections.cpp Tue Jul 10 06:49:13 2018
>> @@ -52,6 +52,7 @@ using namespace llvm::support;
>> using namespace lld;
>> using namespace lld::elf;
>>
>> +using llvm::support::endian::read32le;
>> using llvm::support::endian::write32le;
>> using llvm::support::endian::write64le;
>>
>> @@ -2311,19 +2312,17 @@ readAddressAreas(DWARFContext &Dwarf, In
>> }
>>
>> static std::vector<GdbIndexChunk::NameTypeEntry>
>> -readPubNamesAndTypes(DWARFContext &Dwarf) {
>> +readPubNamesAndTypes(DWARFContext &Dwarf, uint32_t Idx) {
>> StringRef Sec1 = Dwarf.getDWARFObj().getGnuPubNamesSection();
>> StringRef Sec2 = Dwarf.getDWARFObj().getGnuPubTypesSection();
>>
>> std::vector<GdbIndexChunk::NameTypeEntry> Ret;
>> for (StringRef Sec : {Sec1, Sec2}) {
>> DWARFDebugPubTable Table(Sec, Config->IsLE, true);
>> - for (const DWARFDebugPubTable::Set &Set : Table.getData()) {
>> - for (const DWARFDebugPubTable::Entry &Ent : Set.Entries) {
>> - CachedHashStringRef S(Ent.Name, computeGdbHash(Ent.Name));
>> - Ret.push_back({S, Ent.Descriptor.toBits()});
>> - }
>> - }
>> + for (const DWARFDebugPubTable::Set &Set : Table.getData())
>> + for (const DWARFDebugPubTable::Entry &Ent : Set.Entries)
>> + Ret.push_back({{Ent.Name, computeGdbHash(Ent.Name)},
>> + (Ent.Descriptor.toBits() << 24) | Idx});
>> }
>> return Ret;
>> }
>> @@ -2337,43 +2336,6 @@ static std::vector<InputSection *> getDe
>> return Ret;
>> }
>>
>> -void GdbIndexSection::fixCuIndex() {
>> - uint32_t Idx = 0;
>> - for (GdbIndexChunk &Chunk : Chunks) {
>> - for (GdbIndexChunk::AddressEntry &Ent : Chunk.AddressAreas)
>> - Ent.CuIndex += Idx;
>> - Idx += Chunk.CompilationUnits.size();
>> - }
>> -}
>> -
>> -std::vector<std::vector<uint32_t>> GdbIndexSection::createCuVectors() {
>> - std::vector<std::vector<uint32_t>> Ret;
>> - uint32_t Idx = 0;
>> - uint32_t Off = 0;
>> -
>> - for (GdbIndexChunk &Chunk : Chunks) {
>> - for (GdbIndexChunk::NameTypeEntry &Ent : Chunk.NamesAndTypes) {
>> - GdbSymbol *&Sym = Symbols[Ent.Name];
>> - if (!Sym) {
>> - Sym = make<GdbSymbol>(GdbSymbol{Ent.Name.hash(), Off,
>> Ret.size()});
>> - Off += Ent.Name.size() + 1;
>> - Ret.push_back({});
>> - }
>> -
>> - // gcc 5.4.1 produces a buggy .debug_gnu_pubnames that contains
>> - // duplicate entries, so we want to dedup them.
>> - std::vector<uint32_t> &Vec = Ret[Sym->CuVectorIndex];
>> - uint32_t Val = (Ent.Type << 24) | Idx;
>> - if (Vec.empty() || Vec.back() != Val)
>> - Vec.push_back(Val);
>> - }
>> - Idx += Chunk.CompilationUnits.size();
>> - }
>> -
>> - StringPoolSize = Off;
>> - return Ret;
>> -}
>> -
>> template <class ELFT> GdbIndexSection *elf::createGdbIndex() {
>> // Gather debug info to create a .gdb_index section.
>> std::vector<InputSection *> Sections = getDebugInfoSections();
>> @@ -2386,7 +2348,7 @@ template <class ELFT> GdbIndexSection *e
>> Chunks[I].DebugInfoSec = Sections[I];
>> Chunks[I].CompilationUnits = readCuList(Dwarf);
>> Chunks[I].AddressAreas = readAddressAreas(Dwarf, Sections[I]);
>> - Chunks[I].NamesAndTypes = readPubNamesAndTypes(Dwarf);
>> + Chunks[I].NamesAndTypes = readPubNamesAndTypes(Dwarf, I);
>> });
>>
>> // .debug_gnu_pub{names,types} are useless in executables.
>> @@ -2414,37 +2376,48 @@ static size_t getAddressAreaSize(ArrayRe
>> return Ret;
>> }
>>
>> -std::vector<GdbSymbol *> GdbIndexSection::createGdbSymtab() {
>> - uint32_t Size = NextPowerOf2(Symbols.size() * 4 / 3);
>> - if (Size < 1024)
>> - Size = 1024;
>> -
>> - uint32_t Mask = Size - 1;
>> - std::vector<GdbSymbol *> Ret(Size);
>> -
>> - for (auto &KV : Symbols) {
>> - GdbSymbol *Sym = KV.second;
>> - uint32_t I = Sym->NameHash & Mask;
>> - uint32_t Step = ((Sym->NameHash * 17) & Mask) | 1;
>> -
>> - while (Ret[I])
>> - I = (I + Step) & Mask;
>> - Ret[I] = Sym;
>> - }
>> - return Ret;
>> +// Returns the desired size of an on-disk hash table for a .gdb_index
>> section.
>> +// There's a tradeoff between size and collision rate. We aim 75%
>> utilization.
>> +static size_t getSymtabSize(size_t NumSymbols) {
>> + return std::max<size_t>(NextPowerOf2(NumSymbols * 4 / 3), 1024);
>> }
>>
>> GdbIndexSection::GdbIndexSection(std::vector<GdbIndexChunk> &&C)
>> : SyntheticSection(0, SHT_PROGBITS, 1, ".gdb_index"),
>> Chunks(std::move(C)) {
>> - fixCuIndex();
>> - CuVectors = createCuVectors();
>> - GdbSymtab = createGdbSymtab();
>> + // A map to identify duplicate symbols.
>> + DenseMap<CachedHashStringRef, size_t> Map;
>> +
>> + // Initialize Symbols and CuVectors while deduplicating symbols by
>> name.
>> + for (GdbIndexChunk &Chunk : Chunks) {
>> + for (GdbIndexChunk::NameTypeEntry &Ent : Chunk.NamesAndTypes) {
>> + CachedHashStringRef S = Ent.Name;
>> + size_t &Idx = Map[S];
>> +
>> + if (!Idx) {
>> + Idx = Symbols.size() + 1;
>> + Symbols.push_back({S, static_cast<uint32_t>(StringPoolSize),
>> + static_cast<uint32_t>(Symbols.size())});
>> + StringPoolSize += S.size() + 1;
>> + CuVectors.push_back({});
>> + }
>> +
>> + // gcc 5.4.1 produces a buggy .debug_gnu_pubnames that contains
>> + // duplicate entries, so we want to dedup them.
>> + std::vector<uint32_t> &Vec = CuVectors[Symbols[Idx -
>> 1].CuVectorIdx];
>> + if (Vec.empty() || Vec.back() != Ent.Type)
>> + Vec.push_back(Ent.Type);
>> + }
>> +
>> + // NamesAndTypes is useless beyond this point, so clear it to save
>> memory.
>> + Chunk.NamesAndTypes = {};
>> + }
>>
>> // Compute offsets early to know the section size.
>> // Each chunk size needs to be in sync with what we write in writeTo.
>> CuTypesOffset = CuListOffset + getCuSize(Chunks) * 16;
>> SymtabOffset = CuTypesOffset + getAddressAreaSize(Chunks) * 20;
>> - ConstantPoolOffset = SymtabOffset + GdbSymtab.size() * 8;
>> + SymtabSize = getSymtabSize(Symbols.size());
>> + ConstantPoolOffset = SymtabOffset + SymtabSize * 8;
>>
>> for (ArrayRef<uint32_t> Vec : CuVectors) {
>> CuVectorOffsets.push_back(CuVectorsPoolSize);
>> @@ -2480,25 +2453,35 @@ void GdbIndexSection::writeTo(uint8_t *B
>> }
>>
>> // Write the address area.
>> - for (GdbIndexChunk &D : Chunks) {
>> - for (GdbIndexChunk::AddressEntry &E : D.AddressAreas) {
>> + uint32_t Idx = 0;
>> + for (GdbIndexChunk &Chunk : Chunks) {
>> + for (GdbIndexChunk::AddressEntry &E : Chunk.AddressAreas) {
>> uint64_t BaseAddr = E.Section->getVA(0);
>> write64le(Buf, BaseAddr + E.LowAddress);
>> write64le(Buf + 8, BaseAddr + E.HighAddress);
>> - write32le(Buf + 16, E.CuIndex);
>> + write32le(Buf + 16, E.CuIndex + Idx);
>> Buf += 20;
>> }
>> + Idx += Chunk.CompilationUnits.size();
>> }
>>
>> - // Write the symbol table.
>> - for (GdbSymbol *Sym : GdbSymtab) {
>> - if (Sym) {
>> - write32le(Buf, CuVectorsPoolSize + Sym->NameOffset);
>> - write32le(Buf + 4, CuVectorOffsets[Sym->CuVectorIndex]);
>> - }
>> - Buf += 8;
>> + // Write the on-disk open-addressing hash table containing symbols.
>> + for (GdbSymbol &Sym : Symbols) {
>> + uint32_t Mask = SymtabSize - 1;
>> + uint32_t H = Sym.Name.hash();
>> + uint32_t I = H & Mask;
>> + uint32_t Step = ((H * 17) & Mask) | 1;
>> +
>> + while (read32le(Buf + I * 8))
>> + I = (I + Step) & Mask;
>> +
>> + uint8_t *P = Buf + I * 8;
>> + write32le(P, CuVectorsPoolSize + Sym.OutputOff);
>> + write32le(P + 4, CuVectorOffsets[Sym.CuVectorIdx]);
>> }
>>
>> + Buf += SymtabSize * 8;
>> +
>> // Write the CU vectors.
>> for (ArrayRef<uint32_t> Vec : CuVectors) {
>> write32le(Buf, Vec.size());
>> @@ -2510,13 +2493,8 @@ void GdbIndexSection::writeTo(uint8_t *B
>> }
>>
>> // Write the string pool.
>> - for (auto &KV : Symbols) {
>> - CachedHashStringRef S = KV.first;
>> - GdbSymbol *Sym = KV.second;
>> - size_t Off = Sym->NameOffset;
>> - memcpy(Buf + Off, S.val().data(), S.size());
>> - Buf[Off + S.size()] = '\0';
>> - }
>> + for (GdbSymbol &Sym : Symbols)
>> + memcpy(Buf + Sym.OutputOff, Sym.Name.val().data(), Sym.Name.size());
>> }
>>
>> bool GdbIndexSection::empty() const { return !Out::DebugInfo; }
>>
>> Modified: lld/trunk/ELF/SyntheticSections.h
>> URL:
>> http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/SyntheticSections.h?rev=336672&r1=336671&r2=336672&view=diff
>>
>> ==============================================================================
>> --- lld/trunk/ELF/SyntheticSections.h (original)
>> +++ lld/trunk/ELF/SyntheticSections.h Tue Jul 10 06:49:13 2018
>> @@ -668,7 +668,7 @@ struct GdbIndexChunk {
>>
>> struct NameTypeEntry {
>> llvm::CachedHashStringRef Name;
>> - uint8_t Type;
>> + uint32_t Type;
>> };
>>
>> InputSection *DebugInfoSec;
>> @@ -679,9 +679,9 @@ struct GdbIndexChunk {
>>
>> // The symbol type for the .gdb_index section.
>> struct GdbSymbol {
>> - uint32_t NameHash;
>> - size_t NameOffset;
>> - size_t CuVectorIndex;
>> + llvm::CachedHashStringRef Name;
>> + uint32_t OutputOff;
>> + uint32_t CuVectorIdx;
>> };
>>
>> class GdbIndexSection final : public SyntheticSection {
>> @@ -692,19 +692,12 @@ public:
>> bool empty() const override;
>>
>> private:
>> - void fixCuIndex();
>> - std::vector<std::vector<uint32_t>> createCuVectors();
>> - std::vector<GdbSymbol *> createGdbSymtab();
>> -
>> // A symbol table for this .gdb_index section.
>> - std::vector<GdbSymbol *> GdbSymtab;
>> + std::vector<GdbSymbol> Symbols;
>>
>> // CU vector is a part of constant pool area of section.
>> std::vector<std::vector<uint32_t>> CuVectors;
>>
>> - // Symbol table contents.
>> - llvm::DenseMap<llvm::CachedHashStringRef, GdbSymbol *> Symbols;
>> -
>> // Each chunk contains information gathered from a debug sections of
>> single
>> // object and used to build different areas of gdb index.
>> std::vector<GdbIndexChunk> Chunks;
>> @@ -712,12 +705,13 @@ private:
>> uint64_t CuListOffset = 24;
>> uint64_t CuTypesOffset;
>> uint64_t SymtabOffset;
>> + uint64_t SymtabSize = 0;
>> uint64_t ConstantPoolOffset;
>> uint64_t CuVectorsPoolSize = 0;
>> uint64_t StringPoolSize;
>> uint64_t TotalSize;
>>
>> - std::vector<size_t> CuVectorOffsets;
>> + std::vector<uint32_t> CuVectorOffsets;
>> };
>>
>> template <class ELFT> GdbIndexSection *createGdbIndex();
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at lists.llvm.org
>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180710/dd2665dd/attachment.html>
More information about the llvm-commits
mailing list