[lld] r336672 - Reduce memory usage when creating .gdb_index. NFC.

Rui Ueyama via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 10 11:32:47 PDT 2018


Sorry for the breakage. I believe that's fixed in r336700.

On Tue, Jul 10, 2018 at 11:30 AM Evgenii Stepanov <eugeni.stepanov at gmail.com>
wrote:

> Hi,
>
> MSan is unhappy about this change:
>
>
> http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux-bootstrap-msan/builds/5844/steps/check-lld%20msan/logs/stdio
>
> ==43923==WARNING: MemorySanitizer: use-of-uninitialized-value
>     #0 0xa4087f in
> lld::elf::GdbIndexSection::GdbIndexSection(std::__1::vector<lld::elf::GdbIndexChunk,
> std::__1::allocator<lld::elf::GdbIndexChunk> >&&)
> /b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/ELF/SyntheticSections.cpp:2432:39
>     #1 0xa474d4 in lld::elf::GdbIndexSection*
> lld::make<lld::elf::GdbIndexSection,
> std::__1::vector<lld::elf::GdbIndexChunk,
> std::__1::allocator<lld::elf::GdbIndexChunk> >
> >(std::__1::vector<lld::elf::GdbIndexChunk,
> std::__1::allocator<lld::elf::GdbIndexChunk> >&&)
> /b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/include/lld/Common/Memory.h:55:39
>     #2 0xa483c3 in lld::elf::GdbIndexSection*
> lld::elf::createGdbIndex<llvm::object::ELFType<(llvm::support::endianness)1,
> true> >()
> /b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/ELF/SyntheticSections.cpp:2362:10
>     #3 0xb5513e in
> createSyntheticSections<llvm::object::ELFType<llvm::support::little, true>
> >
> /b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/ELF/Writer.cpp:373:21
>     #4 0xb5513e in (anonymous
> namespace)::Writer<llvm::object::ELFType<(llvm::support::endianness)1,
> true> >::run()
> /b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/ELF/Writer.cpp:426
>     #5 0xb52ee4 in void
> lld::elf::writeResult<llvm::object::ELFType<(llvm::support::endianness)1,
> true> >()
> /b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/ELF/Writer.cpp:143:64
>     #6 0x84e4f6 in void
> lld::elf::LinkerDriver::link<llvm::object::ELFType<(llvm::support::endianness)1,
> true> >(llvm::opt::InputArgList&)
> /b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/ELF/Driver.cpp:1413:3
>     #7 0x832b56 in lld::elf::LinkerDriver::main(llvm::ArrayRef<char
> const*>)
> /b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/ELF/Driver.cpp:422:5
>     #8 0x82eedd in lld::elf::link(llvm::ArrayRef<char const*>, bool,
> llvm::raw_ostream&)
> /b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/ELF/Driver.cpp:98:11
>     #9 0x5d350e in main
> /b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/tools/lld/lld.cpp:132:13
>     #10 0x7f157cd712e0 in __libc_start_main
> (/lib/x86_64-linux-gnu/libc.so.6+0x202e0)
>     #11 0x55eca9 in _start
> (/b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm_build_msan/bin/lld+0x55eca9)
>
> On Tue, Jul 10, 2018 at 6:49 AM, Rui Ueyama via llvm-commits <
> llvm-commits at lists.llvm.org> wrote:
>
>> Author: ruiu
>> Date: Tue Jul 10 06:49:13 2018
>> New Revision: 336672
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=336672&view=rev
>> Log:
>> Reduce memory usage when creating .gdb_index. NFC.
>>
>> .gdb_index sections can be very large. When you are compiling
>> multi-gibibyte executables, they can be larger than 1 GiB. The previous
>> implementation of .gdb_index seems to consume too much memory.
>>
>> This patch reduces memory consumption by eliminating temporary objects.
>> In one experiment, memory consumption of GdbIndexSection class is
>> reduced from 962 MiB to 228 MiB when creating a .gdb_index of 1350 GiB.
>>
>> Differential Revision: https://reviews.llvm.org/D49094
>>
>> Modified:
>>     lld/trunk/ELF/SyntheticSections.cpp
>>     lld/trunk/ELF/SyntheticSections.h
>>
>> Modified: lld/trunk/ELF/SyntheticSections.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/SyntheticSections.cpp?rev=336672&r1=336671&r2=336672&view=diff
>>
>> ==============================================================================
>> --- lld/trunk/ELF/SyntheticSections.cpp (original)
>> +++ lld/trunk/ELF/SyntheticSections.cpp Tue Jul 10 06:49:13 2018
>> @@ -52,6 +52,7 @@ using namespace llvm::support;
>>  using namespace lld;
>>  using namespace lld::elf;
>>
>> +using llvm::support::endian::read32le;
>>  using llvm::support::endian::write32le;
>>  using llvm::support::endian::write64le;
>>
>> @@ -2311,19 +2312,17 @@ readAddressAreas(DWARFContext &Dwarf, In
>>  }
>>
>>  static std::vector<GdbIndexChunk::NameTypeEntry>
>> -readPubNamesAndTypes(DWARFContext &Dwarf) {
>> +readPubNamesAndTypes(DWARFContext &Dwarf, uint32_t Idx) {
>>    StringRef Sec1 = Dwarf.getDWARFObj().getGnuPubNamesSection();
>>    StringRef Sec2 = Dwarf.getDWARFObj().getGnuPubTypesSection();
>>
>>    std::vector<GdbIndexChunk::NameTypeEntry> Ret;
>>    for (StringRef Sec : {Sec1, Sec2}) {
>>      DWARFDebugPubTable Table(Sec, Config->IsLE, true);
>> -    for (const DWARFDebugPubTable::Set &Set : Table.getData()) {
>> -      for (const DWARFDebugPubTable::Entry &Ent : Set.Entries) {
>> -        CachedHashStringRef S(Ent.Name, computeGdbHash(Ent.Name));
>> -        Ret.push_back({S, Ent.Descriptor.toBits()});
>> -      }
>> -    }
>> +    for (const DWARFDebugPubTable::Set &Set : Table.getData())
>> +      for (const DWARFDebugPubTable::Entry &Ent : Set.Entries)
>> +        Ret.push_back({{Ent.Name, computeGdbHash(Ent.Name)},
>> +                       (Ent.Descriptor.toBits() << 24) | Idx});
>>    }
>>    return Ret;
>>  }
>> @@ -2337,43 +2336,6 @@ static std::vector<InputSection *> getDe
>>    return Ret;
>>  }
>>
>> -void GdbIndexSection::fixCuIndex() {
>> -  uint32_t Idx = 0;
>> -  for (GdbIndexChunk &Chunk : Chunks) {
>> -    for (GdbIndexChunk::AddressEntry &Ent : Chunk.AddressAreas)
>> -      Ent.CuIndex += Idx;
>> -    Idx += Chunk.CompilationUnits.size();
>> -  }
>> -}
>> -
>> -std::vector<std::vector<uint32_t>> GdbIndexSection::createCuVectors() {
>> -  std::vector<std::vector<uint32_t>> Ret;
>> -  uint32_t Idx = 0;
>> -  uint32_t Off = 0;
>> -
>> -  for (GdbIndexChunk &Chunk : Chunks) {
>> -    for (GdbIndexChunk::NameTypeEntry &Ent : Chunk.NamesAndTypes) {
>> -      GdbSymbol *&Sym = Symbols[Ent.Name];
>> -      if (!Sym) {
>> -        Sym = make<GdbSymbol>(GdbSymbol{Ent.Name.hash(), Off,
>> Ret.size()});
>> -        Off += Ent.Name.size() + 1;
>> -        Ret.push_back({});
>> -      }
>> -
>> -      // gcc 5.4.1 produces a buggy .debug_gnu_pubnames that contains
>> -      // duplicate entries, so we want to dedup them.
>> -      std::vector<uint32_t> &Vec = Ret[Sym->CuVectorIndex];
>> -      uint32_t Val = (Ent.Type << 24) | Idx;
>> -      if (Vec.empty() || Vec.back() != Val)
>> -        Vec.push_back(Val);
>> -    }
>> -    Idx += Chunk.CompilationUnits.size();
>> -  }
>> -
>> -  StringPoolSize = Off;
>> -  return Ret;
>> -}
>> -
>>  template <class ELFT> GdbIndexSection *elf::createGdbIndex() {
>>    // Gather debug info to create a .gdb_index section.
>>    std::vector<InputSection *> Sections = getDebugInfoSections();
>> @@ -2386,7 +2348,7 @@ template <class ELFT> GdbIndexSection *e
>>      Chunks[I].DebugInfoSec = Sections[I];
>>      Chunks[I].CompilationUnits = readCuList(Dwarf);
>>      Chunks[I].AddressAreas = readAddressAreas(Dwarf, Sections[I]);
>> -    Chunks[I].NamesAndTypes = readPubNamesAndTypes(Dwarf);
>> +    Chunks[I].NamesAndTypes = readPubNamesAndTypes(Dwarf, I);
>>    });
>>
>>    // .debug_gnu_pub{names,types} are useless in executables.
>> @@ -2414,37 +2376,48 @@ static size_t getAddressAreaSize(ArrayRe
>>    return Ret;
>>  }
>>
>> -std::vector<GdbSymbol *> GdbIndexSection::createGdbSymtab() {
>> -  uint32_t Size = NextPowerOf2(Symbols.size() * 4 / 3);
>> -  if (Size < 1024)
>> -    Size = 1024;
>> -
>> -  uint32_t Mask = Size - 1;
>> -  std::vector<GdbSymbol *> Ret(Size);
>> -
>> -  for (auto &KV : Symbols) {
>> -    GdbSymbol *Sym = KV.second;
>> -    uint32_t I = Sym->NameHash & Mask;
>> -    uint32_t Step = ((Sym->NameHash * 17) & Mask) | 1;
>> -
>> -    while (Ret[I])
>> -      I = (I + Step) & Mask;
>> -    Ret[I] = Sym;
>> -  }
>> -  return Ret;
>> +// Returns the desired size of an on-disk hash table for a .gdb_index
>> section.
>> +// There's a tradeoff between size and collision rate. We aim 75%
>> utilization.
>> +static size_t getSymtabSize(size_t NumSymbols) {
>> +  return std::max<size_t>(NextPowerOf2(NumSymbols * 4 / 3), 1024);
>>  }
>>
>>  GdbIndexSection::GdbIndexSection(std::vector<GdbIndexChunk> &&C)
>>      : SyntheticSection(0, SHT_PROGBITS, 1, ".gdb_index"),
>> Chunks(std::move(C)) {
>> -  fixCuIndex();
>> -  CuVectors = createCuVectors();
>> -  GdbSymtab = createGdbSymtab();
>> +  // A map to identify duplicate symbols.
>> +  DenseMap<CachedHashStringRef, size_t> Map;
>> +
>> +  // Initialize Symbols and CuVectors while deduplicating symbols by
>> name.
>> +  for (GdbIndexChunk &Chunk : Chunks) {
>> +    for (GdbIndexChunk::NameTypeEntry &Ent : Chunk.NamesAndTypes) {
>> +      CachedHashStringRef S = Ent.Name;
>> +      size_t &Idx = Map[S];
>> +
>> +      if (!Idx) {
>> +        Idx = Symbols.size() + 1;
>> +        Symbols.push_back({S, static_cast<uint32_t>(StringPoolSize),
>> +                           static_cast<uint32_t>(Symbols.size())});
>> +        StringPoolSize += S.size() + 1;
>> +        CuVectors.push_back({});
>> +      }
>> +
>> +      // gcc 5.4.1 produces a buggy .debug_gnu_pubnames that contains
>> +      // duplicate entries, so we want to dedup them.
>> +      std::vector<uint32_t> &Vec = CuVectors[Symbols[Idx -
>> 1].CuVectorIdx];
>> +      if (Vec.empty() || Vec.back() != Ent.Type)
>> +        Vec.push_back(Ent.Type);
>> +    }
>> +
>> +    // NamesAndTypes is useless beyond this point, so clear it to save
>> memory.
>> +    Chunk.NamesAndTypes = {};
>> +  }
>>
>>    // Compute offsets early to know the section size.
>>    // Each chunk size needs to be in sync with what we write in writeTo.
>>    CuTypesOffset = CuListOffset + getCuSize(Chunks) * 16;
>>    SymtabOffset = CuTypesOffset + getAddressAreaSize(Chunks) * 20;
>> -  ConstantPoolOffset = SymtabOffset + GdbSymtab.size() * 8;
>> +  SymtabSize = getSymtabSize(Symbols.size());
>> +  ConstantPoolOffset = SymtabOffset + SymtabSize * 8;
>>
>>    for (ArrayRef<uint32_t> Vec : CuVectors) {
>>      CuVectorOffsets.push_back(CuVectorsPoolSize);
>> @@ -2480,25 +2453,35 @@ void GdbIndexSection::writeTo(uint8_t *B
>>    }
>>
>>    // Write the address area.
>> -  for (GdbIndexChunk &D : Chunks) {
>> -    for (GdbIndexChunk::AddressEntry &E : D.AddressAreas) {
>> +  uint32_t Idx = 0;
>> +  for (GdbIndexChunk &Chunk : Chunks) {
>> +    for (GdbIndexChunk::AddressEntry &E : Chunk.AddressAreas) {
>>        uint64_t BaseAddr = E.Section->getVA(0);
>>        write64le(Buf, BaseAddr + E.LowAddress);
>>        write64le(Buf + 8, BaseAddr + E.HighAddress);
>> -      write32le(Buf + 16, E.CuIndex);
>> +      write32le(Buf + 16, E.CuIndex + Idx);
>>        Buf += 20;
>>      }
>> +    Idx += Chunk.CompilationUnits.size();
>>    }
>>
>> -  // Write the symbol table.
>> -  for (GdbSymbol *Sym : GdbSymtab) {
>> -    if (Sym) {
>> -      write32le(Buf, CuVectorsPoolSize + Sym->NameOffset);
>> -      write32le(Buf + 4, CuVectorOffsets[Sym->CuVectorIndex]);
>> -    }
>> -    Buf += 8;
>> +  // Write the on-disk open-addressing hash table containing symbols.
>> +  for (GdbSymbol &Sym : Symbols) {
>> +    uint32_t Mask = SymtabSize - 1;
>> +    uint32_t H = Sym.Name.hash();
>> +    uint32_t I = H & Mask;
>> +    uint32_t Step = ((H * 17) & Mask) | 1;
>> +
>> +    while (read32le(Buf + I * 8))
>> +      I = (I + Step) & Mask;
>> +
>> +    uint8_t *P = Buf + I * 8;
>> +    write32le(P, CuVectorsPoolSize + Sym.OutputOff);
>> +    write32le(P + 4, CuVectorOffsets[Sym.CuVectorIdx]);
>>    }
>>
>> +  Buf += SymtabSize * 8;
>> +
>>    // Write the CU vectors.
>>    for (ArrayRef<uint32_t> Vec : CuVectors) {
>>      write32le(Buf, Vec.size());
>> @@ -2510,13 +2493,8 @@ void GdbIndexSection::writeTo(uint8_t *B
>>    }
>>
>>    // Write the string pool.
>> -  for (auto &KV : Symbols) {
>> -    CachedHashStringRef S = KV.first;
>> -    GdbSymbol *Sym = KV.second;
>> -    size_t Off = Sym->NameOffset;
>> -    memcpy(Buf + Off, S.val().data(), S.size());
>> -    Buf[Off + S.size()] = '\0';
>> -  }
>> +  for (GdbSymbol &Sym : Symbols)
>> +    memcpy(Buf + Sym.OutputOff, Sym.Name.val().data(), Sym.Name.size());
>>  }
>>
>>  bool GdbIndexSection::empty() const { return !Out::DebugInfo; }
>>
>> Modified: lld/trunk/ELF/SyntheticSections.h
>> URL:
>> http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/SyntheticSections.h?rev=336672&r1=336671&r2=336672&view=diff
>>
>> ==============================================================================
>> --- lld/trunk/ELF/SyntheticSections.h (original)
>> +++ lld/trunk/ELF/SyntheticSections.h Tue Jul 10 06:49:13 2018
>> @@ -668,7 +668,7 @@ struct GdbIndexChunk {
>>
>>    struct NameTypeEntry {
>>      llvm::CachedHashStringRef Name;
>> -    uint8_t Type;
>> +    uint32_t Type;
>>    };
>>
>>    InputSection *DebugInfoSec;
>> @@ -679,9 +679,9 @@ struct GdbIndexChunk {
>>
>>  // The symbol type for the .gdb_index section.
>>  struct GdbSymbol {
>> -  uint32_t NameHash;
>> -  size_t NameOffset;
>> -  size_t CuVectorIndex;
>> +  llvm::CachedHashStringRef Name;
>> +  uint32_t OutputOff;
>> +  uint32_t CuVectorIdx;
>>  };
>>
>>  class GdbIndexSection final : public SyntheticSection {
>> @@ -692,19 +692,12 @@ public:
>>    bool empty() const override;
>>
>>  private:
>> -  void fixCuIndex();
>> -  std::vector<std::vector<uint32_t>> createCuVectors();
>> -  std::vector<GdbSymbol *> createGdbSymtab();
>> -
>>    // A symbol table for this .gdb_index section.
>> -  std::vector<GdbSymbol *> GdbSymtab;
>> +  std::vector<GdbSymbol> Symbols;
>>
>>    // CU vector is a part of constant pool area of section.
>>    std::vector<std::vector<uint32_t>> CuVectors;
>>
>> -  // Symbol table contents.
>> -  llvm::DenseMap<llvm::CachedHashStringRef, GdbSymbol *> Symbols;
>> -
>>    // Each chunk contains information gathered from a debug sections of
>> single
>>    // object and used to build different areas of gdb index.
>>    std::vector<GdbIndexChunk> Chunks;
>> @@ -712,12 +705,13 @@ private:
>>    uint64_t CuListOffset = 24;
>>    uint64_t CuTypesOffset;
>>    uint64_t SymtabOffset;
>> +  uint64_t SymtabSize = 0;
>>    uint64_t ConstantPoolOffset;
>>    uint64_t CuVectorsPoolSize = 0;
>>    uint64_t StringPoolSize;
>>    uint64_t TotalSize;
>>
>> -  std::vector<size_t> CuVectorOffsets;
>> +  std::vector<uint32_t> CuVectorOffsets;
>>  };
>>
>>  template <class ELFT> GdbIndexSection *createGdbIndex();
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at lists.llvm.org
>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180710/dd2665dd/attachment.html>


More information about the llvm-commits mailing list