[lld] r336672 - Reduce memory usage when creating .gdb_index. NFC.
Evgenii Stepanov via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 10 11:29:37 PDT 2018
Hi,
MSan is unhappy about this change:
http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux-bootstrap-msan/builds/5844/steps/check-lld%20msan/logs/stdio
==43923==WARNING: MemorySanitizer: use-of-uninitialized-value
#0 0xa4087f in
lld::elf::GdbIndexSection::GdbIndexSection(std::__1::vector<lld::elf::GdbIndexChunk,
std::__1::allocator<lld::elf::GdbIndexChunk> >&&)
/b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/ELF/SyntheticSections.cpp:2432:39
#1 0xa474d4 in lld::elf::GdbIndexSection*
lld::make<lld::elf::GdbIndexSection,
std::__1::vector<lld::elf::GdbIndexChunk,
std::__1::allocator<lld::elf::GdbIndexChunk> >
>(std::__1::vector<lld::elf::GdbIndexChunk,
std::__1::allocator<lld::elf::GdbIndexChunk> >&&)
/b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/include/lld/Common/Memory.h:55:39
#2 0xa483c3 in lld::elf::GdbIndexSection*
lld::elf::createGdbIndex<llvm::object::ELFType<(llvm::support::endianness)1,
true> >()
/b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/ELF/SyntheticSections.cpp:2362:10
#3 0xb5513e in
createSyntheticSections<llvm::object::ELFType<llvm::support::little, true>
>
/b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/ELF/Writer.cpp:373:21
#4 0xb5513e in (anonymous
namespace)::Writer<llvm::object::ELFType<(llvm::support::endianness)1,
true> >::run()
/b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/ELF/Writer.cpp:426
#5 0xb52ee4 in void
lld::elf::writeResult<llvm::object::ELFType<(llvm::support::endianness)1,
true> >()
/b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/ELF/Writer.cpp:143:64
#6 0x84e4f6 in void
lld::elf::LinkerDriver::link<llvm::object::ELFType<(llvm::support::endianness)1,
true> >(llvm::opt::InputArgList&)
/b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/ELF/Driver.cpp:1413:3
#7 0x832b56 in lld::elf::LinkerDriver::main(llvm::ArrayRef<char
const*>)
/b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/ELF/Driver.cpp:422:5
#8 0x82eedd in lld::elf::link(llvm::ArrayRef<char const*>, bool,
llvm::raw_ostream&)
/b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/ELF/Driver.cpp:98:11
#9 0x5d350e in main
/b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/tools/lld/lld.cpp:132:13
#10 0x7f157cd712e0 in __libc_start_main
(/lib/x86_64-linux-gnu/libc.so.6+0x202e0)
#11 0x55eca9 in _start
(/b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm_build_msan/bin/lld+0x55eca9)
On Tue, Jul 10, 2018 at 6:49 AM, Rui Ueyama via llvm-commits <
llvm-commits at lists.llvm.org> wrote:
> Author: ruiu
> Date: Tue Jul 10 06:49:13 2018
> New Revision: 336672
>
> URL: http://llvm.org/viewvc/llvm-project?rev=336672&view=rev
> Log:
> Reduce memory usage when creating .gdb_index. NFC.
>
> .gdb_index sections can be very large. When you are compiling
> multi-gibibyte executables, they can be larger than 1 GiB. The previous
> implementation of .gdb_index seems to consume too much memory.
>
> This patch reduces memory consumption by eliminating temporary objects.
> In one experiment, memory consumption of GdbIndexSection class is
> reduced from 962 MiB to 228 MiB when creating a .gdb_index of 1350 GiB.
>
> Differential Revision: https://reviews.llvm.org/D49094
>
> Modified:
> lld/trunk/ELF/SyntheticSections.cpp
> lld/trunk/ELF/SyntheticSections.h
>
> Modified: lld/trunk/ELF/SyntheticSections.cpp
> URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/
> SyntheticSections.cpp?rev=336672&r1=336671&r2=336672&view=diff
> ============================================================
> ==================
> --- lld/trunk/ELF/SyntheticSections.cpp (original)
> +++ lld/trunk/ELF/SyntheticSections.cpp Tue Jul 10 06:49:13 2018
> @@ -52,6 +52,7 @@ using namespace llvm::support;
> using namespace lld;
> using namespace lld::elf;
>
> +using llvm::support::endian::read32le;
> using llvm::support::endian::write32le;
> using llvm::support::endian::write64le;
>
> @@ -2311,19 +2312,17 @@ readAddressAreas(DWARFContext &Dwarf, In
> }
>
> static std::vector<GdbIndexChunk::NameTypeEntry>
> -readPubNamesAndTypes(DWARFContext &Dwarf) {
> +readPubNamesAndTypes(DWARFContext &Dwarf, uint32_t Idx) {
> StringRef Sec1 = Dwarf.getDWARFObj().getGnuPubNamesSection();
> StringRef Sec2 = Dwarf.getDWARFObj().getGnuPubTypesSection();
>
> std::vector<GdbIndexChunk::NameTypeEntry> Ret;
> for (StringRef Sec : {Sec1, Sec2}) {
> DWARFDebugPubTable Table(Sec, Config->IsLE, true);
> - for (const DWARFDebugPubTable::Set &Set : Table.getData()) {
> - for (const DWARFDebugPubTable::Entry &Ent : Set.Entries) {
> - CachedHashStringRef S(Ent.Name, computeGdbHash(Ent.Name));
> - Ret.push_back({S, Ent.Descriptor.toBits()});
> - }
> - }
> + for (const DWARFDebugPubTable::Set &Set : Table.getData())
> + for (const DWARFDebugPubTable::Entry &Ent : Set.Entries)
> + Ret.push_back({{Ent.Name, computeGdbHash(Ent.Name)},
> + (Ent.Descriptor.toBits() << 24) | Idx});
> }
> return Ret;
> }
> @@ -2337,43 +2336,6 @@ static std::vector<InputSection *> getDe
> return Ret;
> }
>
> -void GdbIndexSection::fixCuIndex() {
> - uint32_t Idx = 0;
> - for (GdbIndexChunk &Chunk : Chunks) {
> - for (GdbIndexChunk::AddressEntry &Ent : Chunk.AddressAreas)
> - Ent.CuIndex += Idx;
> - Idx += Chunk.CompilationUnits.size();
> - }
> -}
> -
> -std::vector<std::vector<uint32_t>> GdbIndexSection::createCuVectors() {
> - std::vector<std::vector<uint32_t>> Ret;
> - uint32_t Idx = 0;
> - uint32_t Off = 0;
> -
> - for (GdbIndexChunk &Chunk : Chunks) {
> - for (GdbIndexChunk::NameTypeEntry &Ent : Chunk.NamesAndTypes) {
> - GdbSymbol *&Sym = Symbols[Ent.Name];
> - if (!Sym) {
> - Sym = make<GdbSymbol>(GdbSymbol{Ent.Name.hash(), Off,
> Ret.size()});
> - Off += Ent.Name.size() + 1;
> - Ret.push_back({});
> - }
> -
> - // gcc 5.4.1 produces a buggy .debug_gnu_pubnames that contains
> - // duplicate entries, so we want to dedup them.
> - std::vector<uint32_t> &Vec = Ret[Sym->CuVectorIndex];
> - uint32_t Val = (Ent.Type << 24) | Idx;
> - if (Vec.empty() || Vec.back() != Val)
> - Vec.push_back(Val);
> - }
> - Idx += Chunk.CompilationUnits.size();
> - }
> -
> - StringPoolSize = Off;
> - return Ret;
> -}
> -
> template <class ELFT> GdbIndexSection *elf::createGdbIndex() {
> // Gather debug info to create a .gdb_index section.
> std::vector<InputSection *> Sections = getDebugInfoSections();
> @@ -2386,7 +2348,7 @@ template <class ELFT> GdbIndexSection *e
> Chunks[I].DebugInfoSec = Sections[I];
> Chunks[I].CompilationUnits = readCuList(Dwarf);
> Chunks[I].AddressAreas = readAddressAreas(Dwarf, Sections[I]);
> - Chunks[I].NamesAndTypes = readPubNamesAndTypes(Dwarf);
> + Chunks[I].NamesAndTypes = readPubNamesAndTypes(Dwarf, I);
> });
>
> // .debug_gnu_pub{names,types} are useless in executables.
> @@ -2414,37 +2376,48 @@ static size_t getAddressAreaSize(ArrayRe
> return Ret;
> }
>
> -std::vector<GdbSymbol *> GdbIndexSection::createGdbSymtab() {
> - uint32_t Size = NextPowerOf2(Symbols.size() * 4 / 3);
> - if (Size < 1024)
> - Size = 1024;
> -
> - uint32_t Mask = Size - 1;
> - std::vector<GdbSymbol *> Ret(Size);
> -
> - for (auto &KV : Symbols) {
> - GdbSymbol *Sym = KV.second;
> - uint32_t I = Sym->NameHash & Mask;
> - uint32_t Step = ((Sym->NameHash * 17) & Mask) | 1;
> -
> - while (Ret[I])
> - I = (I + Step) & Mask;
> - Ret[I] = Sym;
> - }
> - return Ret;
> +// Returns the desired size of an on-disk hash table for a .gdb_index
> section.
> +// There's a tradeoff between size and collision rate. We aim 75%
> utilization.
> +static size_t getSymtabSize(size_t NumSymbols) {
> + return std::max<size_t>(NextPowerOf2(NumSymbols * 4 / 3), 1024);
> }
>
> GdbIndexSection::GdbIndexSection(std::vector<GdbIndexChunk> &&C)
> : SyntheticSection(0, SHT_PROGBITS, 1, ".gdb_index"),
> Chunks(std::move(C)) {
> - fixCuIndex();
> - CuVectors = createCuVectors();
> - GdbSymtab = createGdbSymtab();
> + // A map to identify duplicate symbols.
> + DenseMap<CachedHashStringRef, size_t> Map;
> +
> + // Initialize Symbols and CuVectors while deduplicating symbols by name.
> + for (GdbIndexChunk &Chunk : Chunks) {
> + for (GdbIndexChunk::NameTypeEntry &Ent : Chunk.NamesAndTypes) {
> + CachedHashStringRef S = Ent.Name;
> + size_t &Idx = Map[S];
> +
> + if (!Idx) {
> + Idx = Symbols.size() + 1;
> + Symbols.push_back({S, static_cast<uint32_t>(StringPoolSize),
> + static_cast<uint32_t>(Symbols.size())});
> + StringPoolSize += S.size() + 1;
> + CuVectors.push_back({});
> + }
> +
> + // gcc 5.4.1 produces a buggy .debug_gnu_pubnames that contains
> + // duplicate entries, so we want to dedup them.
> + std::vector<uint32_t> &Vec = CuVectors[Symbols[Idx -
> 1].CuVectorIdx];
> + if (Vec.empty() || Vec.back() != Ent.Type)
> + Vec.push_back(Ent.Type);
> + }
> +
> + // NamesAndTypes is useless beyond this point, so clear it to save
> memory.
> + Chunk.NamesAndTypes = {};
> + }
>
> // Compute offsets early to know the section size.
> // Each chunk size needs to be in sync with what we write in writeTo.
> CuTypesOffset = CuListOffset + getCuSize(Chunks) * 16;
> SymtabOffset = CuTypesOffset + getAddressAreaSize(Chunks) * 20;
> - ConstantPoolOffset = SymtabOffset + GdbSymtab.size() * 8;
> + SymtabSize = getSymtabSize(Symbols.size());
> + ConstantPoolOffset = SymtabOffset + SymtabSize * 8;
>
> for (ArrayRef<uint32_t> Vec : CuVectors) {
> CuVectorOffsets.push_back(CuVectorsPoolSize);
> @@ -2480,25 +2453,35 @@ void GdbIndexSection::writeTo(uint8_t *B
> }
>
> // Write the address area.
> - for (GdbIndexChunk &D : Chunks) {
> - for (GdbIndexChunk::AddressEntry &E : D.AddressAreas) {
> + uint32_t Idx = 0;
> + for (GdbIndexChunk &Chunk : Chunks) {
> + for (GdbIndexChunk::AddressEntry &E : Chunk.AddressAreas) {
> uint64_t BaseAddr = E.Section->getVA(0);
> write64le(Buf, BaseAddr + E.LowAddress);
> write64le(Buf + 8, BaseAddr + E.HighAddress);
> - write32le(Buf + 16, E.CuIndex);
> + write32le(Buf + 16, E.CuIndex + Idx);
> Buf += 20;
> }
> + Idx += Chunk.CompilationUnits.size();
> }
>
> - // Write the symbol table.
> - for (GdbSymbol *Sym : GdbSymtab) {
> - if (Sym) {
> - write32le(Buf, CuVectorsPoolSize + Sym->NameOffset);
> - write32le(Buf + 4, CuVectorOffsets[Sym->CuVectorIndex]);
> - }
> - Buf += 8;
> + // Write the on-disk open-addressing hash table containing symbols.
> + for (GdbSymbol &Sym : Symbols) {
> + uint32_t Mask = SymtabSize - 1;
> + uint32_t H = Sym.Name.hash();
> + uint32_t I = H & Mask;
> + uint32_t Step = ((H * 17) & Mask) | 1;
> +
> + while (read32le(Buf + I * 8))
> + I = (I + Step) & Mask;
> +
> + uint8_t *P = Buf + I * 8;
> + write32le(P, CuVectorsPoolSize + Sym.OutputOff);
> + write32le(P + 4, CuVectorOffsets[Sym.CuVectorIdx]);
> }
>
> + Buf += SymtabSize * 8;
> +
> // Write the CU vectors.
> for (ArrayRef<uint32_t> Vec : CuVectors) {
> write32le(Buf, Vec.size());
> @@ -2510,13 +2493,8 @@ void GdbIndexSection::writeTo(uint8_t *B
> }
>
> // Write the string pool.
> - for (auto &KV : Symbols) {
> - CachedHashStringRef S = KV.first;
> - GdbSymbol *Sym = KV.second;
> - size_t Off = Sym->NameOffset;
> - memcpy(Buf + Off, S.val().data(), S.size());
> - Buf[Off + S.size()] = '\0';
> - }
> + for (GdbSymbol &Sym : Symbols)
> + memcpy(Buf + Sym.OutputOff, Sym.Name.val().data(), Sym.Name.size());
> }
>
> bool GdbIndexSection::empty() const { return !Out::DebugInfo; }
>
> Modified: lld/trunk/ELF/SyntheticSections.h
> URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/
> SyntheticSections.h?rev=336672&r1=336671&r2=336672&view=diff
> ============================================================
> ==================
> --- lld/trunk/ELF/SyntheticSections.h (original)
> +++ lld/trunk/ELF/SyntheticSections.h Tue Jul 10 06:49:13 2018
> @@ -668,7 +668,7 @@ struct GdbIndexChunk {
>
> struct NameTypeEntry {
> llvm::CachedHashStringRef Name;
> - uint8_t Type;
> + uint32_t Type;
> };
>
> InputSection *DebugInfoSec;
> @@ -679,9 +679,9 @@ struct GdbIndexChunk {
>
> // The symbol type for the .gdb_index section.
> struct GdbSymbol {
> - uint32_t NameHash;
> - size_t NameOffset;
> - size_t CuVectorIndex;
> + llvm::CachedHashStringRef Name;
> + uint32_t OutputOff;
> + uint32_t CuVectorIdx;
> };
>
> class GdbIndexSection final : public SyntheticSection {
> @@ -692,19 +692,12 @@ public:
> bool empty() const override;
>
> private:
> - void fixCuIndex();
> - std::vector<std::vector<uint32_t>> createCuVectors();
> - std::vector<GdbSymbol *> createGdbSymtab();
> -
> // A symbol table for this .gdb_index section.
> - std::vector<GdbSymbol *> GdbSymtab;
> + std::vector<GdbSymbol> Symbols;
>
> // CU vector is a part of constant pool area of section.
> std::vector<std::vector<uint32_t>> CuVectors;
>
> - // Symbol table contents.
> - llvm::DenseMap<llvm::CachedHashStringRef, GdbSymbol *> Symbols;
> -
> // Each chunk contains information gathered from a debug sections of
> single
> // object and used to build different areas of gdb index.
> std::vector<GdbIndexChunk> Chunks;
> @@ -712,12 +705,13 @@ private:
> uint64_t CuListOffset = 24;
> uint64_t CuTypesOffset;
> uint64_t SymtabOffset;
> + uint64_t SymtabSize = 0;
> uint64_t ConstantPoolOffset;
> uint64_t CuVectorsPoolSize = 0;
> uint64_t StringPoolSize;
> uint64_t TotalSize;
>
> - std::vector<size_t> CuVectorOffsets;
> + std::vector<uint32_t> CuVectorOffsets;
> };
>
> template <class ELFT> GdbIndexSection *createGdbIndex();
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180710/9b685acc/attachment.html>
More information about the llvm-commits
mailing list