[lld] r336672 - Reduce memory usage when creating .gdb_index. NFC.

Evgenii Stepanov via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 10 11:29:37 PDT 2018


Hi,

MSan is unhappy about this change:

http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux-bootstrap-msan/builds/5844/steps/check-lld%20msan/logs/stdio

==43923==WARNING: MemorySanitizer: use-of-uninitialized-value
    #0 0xa4087f in
lld::elf::GdbIndexSection::GdbIndexSection(std::__1::vector<lld::elf::GdbIndexChunk,
std::__1::allocator<lld::elf::GdbIndexChunk> >&&)
/b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/ELF/SyntheticSections.cpp:2432:39
    #1 0xa474d4 in lld::elf::GdbIndexSection*
lld::make<lld::elf::GdbIndexSection,
std::__1::vector<lld::elf::GdbIndexChunk,
std::__1::allocator<lld::elf::GdbIndexChunk> >
>(std::__1::vector<lld::elf::GdbIndexChunk,
std::__1::allocator<lld::elf::GdbIndexChunk> >&&)
/b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/include/lld/Common/Memory.h:55:39
    #2 0xa483c3 in lld::elf::GdbIndexSection*
lld::elf::createGdbIndex<llvm::object::ELFType<(llvm::support::endianness)1,
true> >()
/b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/ELF/SyntheticSections.cpp:2362:10
    #3 0xb5513e in
createSyntheticSections<llvm::object::ELFType<llvm::support::little, true>
>
/b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/ELF/Writer.cpp:373:21
    #4 0xb5513e in (anonymous
namespace)::Writer<llvm::object::ELFType<(llvm::support::endianness)1,
true> >::run()
/b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/ELF/Writer.cpp:426
    #5 0xb52ee4 in void
lld::elf::writeResult<llvm::object::ELFType<(llvm::support::endianness)1,
true> >()
/b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/ELF/Writer.cpp:143:64
    #6 0x84e4f6 in void
lld::elf::LinkerDriver::link<llvm::object::ELFType<(llvm::support::endianness)1,
true> >(llvm::opt::InputArgList&)
/b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/ELF/Driver.cpp:1413:3
    #7 0x832b56 in lld::elf::LinkerDriver::main(llvm::ArrayRef<char
const*>)
/b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/ELF/Driver.cpp:422:5
    #8 0x82eedd in lld::elf::link(llvm::ArrayRef<char const*>, bool,
llvm::raw_ostream&)
/b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/ELF/Driver.cpp:98:11
    #9 0x5d350e in main
/b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm/tools/lld/tools/lld/lld.cpp:132:13
    #10 0x7f157cd712e0 in __libc_start_main
(/lib/x86_64-linux-gnu/libc.so.6+0x202e0)
    #11 0x55eca9 in _start
(/b/sanitizer-x86_64-linux-bootstrap-msan/build/llvm_build_msan/bin/lld+0x55eca9)

On Tue, Jul 10, 2018 at 6:49 AM, Rui Ueyama via llvm-commits <
llvm-commits at lists.llvm.org> wrote:

> Author: ruiu
> Date: Tue Jul 10 06:49:13 2018
> New Revision: 336672
>
> URL: http://llvm.org/viewvc/llvm-project?rev=336672&view=rev
> Log:
> Reduce memory usage when creating .gdb_index. NFC.
>
> .gdb_index sections can be very large. When you are compiling
> multi-gibibyte executables, they can be larger than 1 GiB. The previous
> implementation of .gdb_index seems to consume too much memory.
>
> This patch reduces memory consumption by eliminating temporary objects.
> In one experiment, memory consumption of GdbIndexSection class is
> reduced from 962 MiB to 228 MiB when creating a .gdb_index of 1350 GiB.
>
> Differential Revision: https://reviews.llvm.org/D49094
>
> Modified:
>     lld/trunk/ELF/SyntheticSections.cpp
>     lld/trunk/ELF/SyntheticSections.h
>
> Modified: lld/trunk/ELF/SyntheticSections.cpp
> URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/
> SyntheticSections.cpp?rev=336672&r1=336671&r2=336672&view=diff
> ============================================================
> ==================
> --- lld/trunk/ELF/SyntheticSections.cpp (original)
> +++ lld/trunk/ELF/SyntheticSections.cpp Tue Jul 10 06:49:13 2018
> @@ -52,6 +52,7 @@ using namespace llvm::support;
>  using namespace lld;
>  using namespace lld::elf;
>
> +using llvm::support::endian::read32le;
>  using llvm::support::endian::write32le;
>  using llvm::support::endian::write64le;
>
> @@ -2311,19 +2312,17 @@ readAddressAreas(DWARFContext &Dwarf, In
>  }
>
>  static std::vector<GdbIndexChunk::NameTypeEntry>
> -readPubNamesAndTypes(DWARFContext &Dwarf) {
> +readPubNamesAndTypes(DWARFContext &Dwarf, uint32_t Idx) {
>    StringRef Sec1 = Dwarf.getDWARFObj().getGnuPubNamesSection();
>    StringRef Sec2 = Dwarf.getDWARFObj().getGnuPubTypesSection();
>
>    std::vector<GdbIndexChunk::NameTypeEntry> Ret;
>    for (StringRef Sec : {Sec1, Sec2}) {
>      DWARFDebugPubTable Table(Sec, Config->IsLE, true);
> -    for (const DWARFDebugPubTable::Set &Set : Table.getData()) {
> -      for (const DWARFDebugPubTable::Entry &Ent : Set.Entries) {
> -        CachedHashStringRef S(Ent.Name, computeGdbHash(Ent.Name));
> -        Ret.push_back({S, Ent.Descriptor.toBits()});
> -      }
> -    }
> +    for (const DWARFDebugPubTable::Set &Set : Table.getData())
> +      for (const DWARFDebugPubTable::Entry &Ent : Set.Entries)
> +        Ret.push_back({{Ent.Name, computeGdbHash(Ent.Name)},
> +                       (Ent.Descriptor.toBits() << 24) | Idx});
>    }
>    return Ret;
>  }
> @@ -2337,43 +2336,6 @@ static std::vector<InputSection *> getDe
>    return Ret;
>  }
>
> -void GdbIndexSection::fixCuIndex() {
> -  uint32_t Idx = 0;
> -  for (GdbIndexChunk &Chunk : Chunks) {
> -    for (GdbIndexChunk::AddressEntry &Ent : Chunk.AddressAreas)
> -      Ent.CuIndex += Idx;
> -    Idx += Chunk.CompilationUnits.size();
> -  }
> -}
> -
> -std::vector<std::vector<uint32_t>> GdbIndexSection::createCuVectors() {
> -  std::vector<std::vector<uint32_t>> Ret;
> -  uint32_t Idx = 0;
> -  uint32_t Off = 0;
> -
> -  for (GdbIndexChunk &Chunk : Chunks) {
> -    for (GdbIndexChunk::NameTypeEntry &Ent : Chunk.NamesAndTypes) {
> -      GdbSymbol *&Sym = Symbols[Ent.Name];
> -      if (!Sym) {
> -        Sym = make<GdbSymbol>(GdbSymbol{Ent.Name.hash(), Off,
> Ret.size()});
> -        Off += Ent.Name.size() + 1;
> -        Ret.push_back({});
> -      }
> -
> -      // gcc 5.4.1 produces a buggy .debug_gnu_pubnames that contains
> -      // duplicate entries, so we want to dedup them.
> -      std::vector<uint32_t> &Vec = Ret[Sym->CuVectorIndex];
> -      uint32_t Val = (Ent.Type << 24) | Idx;
> -      if (Vec.empty() || Vec.back() != Val)
> -        Vec.push_back(Val);
> -    }
> -    Idx += Chunk.CompilationUnits.size();
> -  }
> -
> -  StringPoolSize = Off;
> -  return Ret;
> -}
> -
>  template <class ELFT> GdbIndexSection *elf::createGdbIndex() {
>    // Gather debug info to create a .gdb_index section.
>    std::vector<InputSection *> Sections = getDebugInfoSections();
> @@ -2386,7 +2348,7 @@ template <class ELFT> GdbIndexSection *e
>      Chunks[I].DebugInfoSec = Sections[I];
>      Chunks[I].CompilationUnits = readCuList(Dwarf);
>      Chunks[I].AddressAreas = readAddressAreas(Dwarf, Sections[I]);
> -    Chunks[I].NamesAndTypes = readPubNamesAndTypes(Dwarf);
> +    Chunks[I].NamesAndTypes = readPubNamesAndTypes(Dwarf, I);
>    });
>
>    // .debug_gnu_pub{names,types} are useless in executables.
> @@ -2414,37 +2376,48 @@ static size_t getAddressAreaSize(ArrayRe
>    return Ret;
>  }
>
> -std::vector<GdbSymbol *> GdbIndexSection::createGdbSymtab() {
> -  uint32_t Size = NextPowerOf2(Symbols.size() * 4 / 3);
> -  if (Size < 1024)
> -    Size = 1024;
> -
> -  uint32_t Mask = Size - 1;
> -  std::vector<GdbSymbol *> Ret(Size);
> -
> -  for (auto &KV : Symbols) {
> -    GdbSymbol *Sym = KV.second;
> -    uint32_t I = Sym->NameHash & Mask;
> -    uint32_t Step = ((Sym->NameHash * 17) & Mask) | 1;
> -
> -    while (Ret[I])
> -      I = (I + Step) & Mask;
> -    Ret[I] = Sym;
> -  }
> -  return Ret;
> +// Returns the desired size of an on-disk hash table for a .gdb_index
> section.
> +// There's a tradeoff between size and collision rate. We aim 75%
> utilization.
> +static size_t getSymtabSize(size_t NumSymbols) {
> +  return std::max<size_t>(NextPowerOf2(NumSymbols * 4 / 3), 1024);
>  }
>
>  GdbIndexSection::GdbIndexSection(std::vector<GdbIndexChunk> &&C)
>      : SyntheticSection(0, SHT_PROGBITS, 1, ".gdb_index"),
> Chunks(std::move(C)) {
> -  fixCuIndex();
> -  CuVectors = createCuVectors();
> -  GdbSymtab = createGdbSymtab();
> +  // A map to identify duplicate symbols.
> +  DenseMap<CachedHashStringRef, size_t> Map;
> +
> +  // Initialize Symbols and CuVectors while deduplicating symbols by name.
> +  for (GdbIndexChunk &Chunk : Chunks) {
> +    for (GdbIndexChunk::NameTypeEntry &Ent : Chunk.NamesAndTypes) {
> +      CachedHashStringRef S = Ent.Name;
> +      size_t &Idx = Map[S];
> +
> +      if (!Idx) {
> +        Idx = Symbols.size() + 1;
> +        Symbols.push_back({S, static_cast<uint32_t>(StringPoolSize),
> +                           static_cast<uint32_t>(Symbols.size())});
> +        StringPoolSize += S.size() + 1;
> +        CuVectors.push_back({});
> +      }
> +
> +      // gcc 5.4.1 produces a buggy .debug_gnu_pubnames that contains
> +      // duplicate entries, so we want to dedup them.
> +      std::vector<uint32_t> &Vec = CuVectors[Symbols[Idx -
> 1].CuVectorIdx];
> +      if (Vec.empty() || Vec.back() != Ent.Type)
> +        Vec.push_back(Ent.Type);
> +    }
> +
> +    // NamesAndTypes is useless beyond this point, so clear it to save
> memory.
> +    Chunk.NamesAndTypes = {};
> +  }
>
>    // Compute offsets early to know the section size.
>    // Each chunk size needs to be in sync with what we write in writeTo.
>    CuTypesOffset = CuListOffset + getCuSize(Chunks) * 16;
>    SymtabOffset = CuTypesOffset + getAddressAreaSize(Chunks) * 20;
> -  ConstantPoolOffset = SymtabOffset + GdbSymtab.size() * 8;
> +  SymtabSize = getSymtabSize(Symbols.size());
> +  ConstantPoolOffset = SymtabOffset + SymtabSize * 8;
>
>    for (ArrayRef<uint32_t> Vec : CuVectors) {
>      CuVectorOffsets.push_back(CuVectorsPoolSize);
> @@ -2480,25 +2453,35 @@ void GdbIndexSection::writeTo(uint8_t *B
>    }
>
>    // Write the address area.
> -  for (GdbIndexChunk &D : Chunks) {
> -    for (GdbIndexChunk::AddressEntry &E : D.AddressAreas) {
> +  uint32_t Idx = 0;
> +  for (GdbIndexChunk &Chunk : Chunks) {
> +    for (GdbIndexChunk::AddressEntry &E : Chunk.AddressAreas) {
>        uint64_t BaseAddr = E.Section->getVA(0);
>        write64le(Buf, BaseAddr + E.LowAddress);
>        write64le(Buf + 8, BaseAddr + E.HighAddress);
> -      write32le(Buf + 16, E.CuIndex);
> +      write32le(Buf + 16, E.CuIndex + Idx);
>        Buf += 20;
>      }
> +    Idx += Chunk.CompilationUnits.size();
>    }
>
> -  // Write the symbol table.
> -  for (GdbSymbol *Sym : GdbSymtab) {
> -    if (Sym) {
> -      write32le(Buf, CuVectorsPoolSize + Sym->NameOffset);
> -      write32le(Buf + 4, CuVectorOffsets[Sym->CuVectorIndex]);
> -    }
> -    Buf += 8;
> +  // Write the on-disk open-addressing hash table containing symbols.
> +  for (GdbSymbol &Sym : Symbols) {
> +    uint32_t Mask = SymtabSize - 1;
> +    uint32_t H = Sym.Name.hash();
> +    uint32_t I = H & Mask;
> +    uint32_t Step = ((H * 17) & Mask) | 1;
> +
> +    while (read32le(Buf + I * 8))
> +      I = (I + Step) & Mask;
> +
> +    uint8_t *P = Buf + I * 8;
> +    write32le(P, CuVectorsPoolSize + Sym.OutputOff);
> +    write32le(P + 4, CuVectorOffsets[Sym.CuVectorIdx]);
>    }
>
> +  Buf += SymtabSize * 8;
> +
>    // Write the CU vectors.
>    for (ArrayRef<uint32_t> Vec : CuVectors) {
>      write32le(Buf, Vec.size());
> @@ -2510,13 +2493,8 @@ void GdbIndexSection::writeTo(uint8_t *B
>    }
>
>    // Write the string pool.
> -  for (auto &KV : Symbols) {
> -    CachedHashStringRef S = KV.first;
> -    GdbSymbol *Sym = KV.second;
> -    size_t Off = Sym->NameOffset;
> -    memcpy(Buf + Off, S.val().data(), S.size());
> -    Buf[Off + S.size()] = '\0';
> -  }
> +  for (GdbSymbol &Sym : Symbols)
> +    memcpy(Buf + Sym.OutputOff, Sym.Name.val().data(), Sym.Name.size());
>  }
>
>  bool GdbIndexSection::empty() const { return !Out::DebugInfo; }
>
> Modified: lld/trunk/ELF/SyntheticSections.h
> URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/
> SyntheticSections.h?rev=336672&r1=336671&r2=336672&view=diff
> ============================================================
> ==================
> --- lld/trunk/ELF/SyntheticSections.h (original)
> +++ lld/trunk/ELF/SyntheticSections.h Tue Jul 10 06:49:13 2018
> @@ -668,7 +668,7 @@ struct GdbIndexChunk {
>
>    struct NameTypeEntry {
>      llvm::CachedHashStringRef Name;
> -    uint8_t Type;
> +    uint32_t Type;
>    };
>
>    InputSection *DebugInfoSec;
> @@ -679,9 +679,9 @@ struct GdbIndexChunk {
>
>  // The symbol type for the .gdb_index section.
>  struct GdbSymbol {
> -  uint32_t NameHash;
> -  size_t NameOffset;
> -  size_t CuVectorIndex;
> +  llvm::CachedHashStringRef Name;
> +  uint32_t OutputOff;
> +  uint32_t CuVectorIdx;
>  };
>
>  class GdbIndexSection final : public SyntheticSection {
> @@ -692,19 +692,12 @@ public:
>    bool empty() const override;
>
>  private:
> -  void fixCuIndex();
> -  std::vector<std::vector<uint32_t>> createCuVectors();
> -  std::vector<GdbSymbol *> createGdbSymtab();
> -
>    // A symbol table for this .gdb_index section.
> -  std::vector<GdbSymbol *> GdbSymtab;
> +  std::vector<GdbSymbol> Symbols;
>
>    // CU vector is a part of constant pool area of section.
>    std::vector<std::vector<uint32_t>> CuVectors;
>
> -  // Symbol table contents.
> -  llvm::DenseMap<llvm::CachedHashStringRef, GdbSymbol *> Symbols;
> -
>    // Each chunk contains information gathered from a debug sections of
> single
>    // object and used to build different areas of gdb index.
>    std::vector<GdbIndexChunk> Chunks;
> @@ -712,12 +705,13 @@ private:
>    uint64_t CuListOffset = 24;
>    uint64_t CuTypesOffset;
>    uint64_t SymtabOffset;
> +  uint64_t SymtabSize = 0;
>    uint64_t ConstantPoolOffset;
>    uint64_t CuVectorsPoolSize = 0;
>    uint64_t StringPoolSize;
>    uint64_t TotalSize;
>
> -  std::vector<size_t> CuVectorOffsets;
> +  std::vector<uint32_t> CuVectorOffsets;
>  };
>
>  template <class ELFT> GdbIndexSection *createGdbIndex();
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180710/9b685acc/attachment.html>


More information about the llvm-commits mailing list