[lld] r314084 - Refactor GdbIndexSection. NFC.

Rui Ueyama via llvm-commits llvm-commits at lists.llvm.org
Sun Sep 24 14:45:35 PDT 2017


Author: ruiu
Date: Sun Sep 24 14:45:35 2017
New Revision: 314084

URL: http://llvm.org/viewvc/llvm-project?rev=314084&view=rev
Log:
Refactor GdbIndexSection. NFC.

This patch rewrites a part of GdbIndexSection to address the following
issues in the previous implementation:

 - Previously, some struct declarations were in GdbIndex.h while they
   were not used in GdbIndex.cpp. Such structs are moved to
   SyntheticSection.h.

 - The actual implementation were split into GdbIndexSection and GdbHash
   section, but that separation didn't make much sense. They are now
   unified as GdbIndexSection.

In addition to the above changes, this patch splits functions, rename
variables and remove redundant functions/variables to generally improve
code quality.

Modified:
    lld/trunk/ELF/GdbIndex.cpp
    lld/trunk/ELF/GdbIndex.h
    lld/trunk/ELF/SyntheticSections.cpp
    lld/trunk/ELF/SyntheticSections.h

Modified: lld/trunk/ELF/GdbIndex.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/GdbIndex.cpp?rev=314084&r1=314083&r2=314084&view=diff
==============================================================================
--- lld/trunk/ELF/GdbIndex.cpp (original)
+++ lld/trunk/ELF/GdbIndex.cpp Sun Sep 24 14:45:35 2017
@@ -24,30 +24,6 @@ using namespace llvm::object;
 using namespace lld;
 using namespace lld::elf;
 
-std::pair<bool, GdbSymbol *> GdbHashTab::add(uint32_t Hash, size_t Offset) {
-  GdbSymbol *&Sym = Map[Offset];
-  if (Sym)
-    return {false, Sym};
-  Sym = make<GdbSymbol>(Hash, Offset);
-  return {true, Sym};
-}
-
-void GdbHashTab::finalizeContents() {
-  uint32_t Size = std::max<uint32_t>(1024, NextPowerOf2(Map.size() * 4 / 3));
-  uint32_t Mask = Size - 1;
-  Table.resize(Size);
-
-  for (auto &P : Map) {
-    GdbSymbol *Sym = P.second;
-    uint32_t I = Sym->NameHash & Mask;
-    uint32_t Step = ((Sym->NameHash * 17) & Mask) | 1;
-
-    while (Table[I])
-      I = (I + Step) & Mask;
-    Table[I] = Sym;
-  }
-}
-
 template <class ELFT>
 LLDDwarfObj<ELFT>::LLDDwarfObj(ObjFile<ELFT> *Obj) : Obj(Obj) {
   for (InputSectionBase *Sec : Obj->getSections()) {

Modified: lld/trunk/ELF/GdbIndex.h
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/GdbIndex.h?rev=314084&r1=314083&r2=314084&view=diff
==============================================================================
--- lld/trunk/ELF/GdbIndex.h (original)
+++ lld/trunk/ELF/GdbIndex.h Sun Sep 24 14:45:35 2017
@@ -64,63 +64,6 @@ public:
                                             uint64_t Pos) const override;
 };
 
-// Struct represents single entry of address area of gdb index.
-struct AddressEntry {
-  InputSection *Section;
-  uint64_t LowAddress;
-  uint64_t HighAddress;
-  uint32_t CuIndex;
-};
-
-// Struct represents single entry of compilation units list area of gdb index.
-// It consist of CU offset in .debug_info section and it's size.
-struct CompilationUnitEntry {
-  uint64_t CuOffset;
-  uint64_t CuLength;
-};
-
-// Represents data about symbol and type names which are used
-// to build symbol table and constant pool area of gdb index.
-struct NameTypeEntry {
-  StringRef Name;
-  uint8_t Type;
-};
-
-// We fill one GdbIndexDataChunk for each object where scan of
-// debug information performed. That information futher used
-// for filling gdb index section areas.
-struct GdbIndexChunk {
-  InputSection *DebugInfoSec;
-  std::vector<AddressEntry> AddressArea;
-  std::vector<CompilationUnitEntry> CompilationUnits;
-  std::vector<NameTypeEntry> NamesAndTypes;
-};
-
-// Element of GdbHashTab hash table.
-struct GdbSymbol {
-  GdbSymbol(uint32_t Hash, size_t Offset)
-      : NameHash(Hash), NameOffset(Offset) {}
-  uint32_t NameHash;
-  size_t NameOffset;
-  size_t CuVectorIndex;
-};
-
-// This class manages the hashed symbol table for the .gdb_index section.
-// The hash value for a table entry is computed by applying an iterative hash
-// function to the symbol's name.
-class GdbHashTab final {
-public:
-  std::pair<bool, GdbSymbol *> add(uint32_t Hash, size_t Offset);
-
-  void finalizeContents();
-  size_t getCapacity() { return Table.size(); }
-  GdbSymbol *getSymbol(size_t I) { return Table[I]; }
-
-private:
-  llvm::DenseMap<size_t, GdbSymbol *> Map;
-  std::vector<GdbSymbol *> Table;
-};
-
 } // namespace elf
 } // namespace lld
 

Modified: lld/trunk/ELF/SyntheticSections.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/SyntheticSections.cpp?rev=314084&r1=314083&r2=314084&view=diff
==============================================================================
--- lld/trunk/ELF/SyntheticSections.cpp (original)
+++ lld/trunk/ELF/SyntheticSections.cpp Sun Sep 24 14:45:35 2017
@@ -1707,29 +1707,29 @@ unsigned PltSection::getPltRelocOff() co
   return (HeaderSize == 0) ? InX::Plt->getSize() : 0;
 }
 
-// The hash function used for .gdb_index version 5 or above.
-static uint32_t gdbHash(StringRef Str) {
-  uint32_t R = 0;
-  for (uint8_t C : Str)
-    R = R * 67 + tolower(C) - 113;
-  return R;
+// The string hash function for .gdb_index.
+static uint32_t computeGdbHash(StringRef S) {
+  uint32_t H = 0;
+  for (uint8_t C : S)
+    H = H * 67 + tolower(C) - 113;
+  return H;
 }
 
-static std::vector<CompilationUnitEntry> readCuList(DWARFContext &Dwarf) {
-  std::vector<CompilationUnitEntry> Ret;
-  for (std::unique_ptr<DWARFCompileUnit> &CU : Dwarf.compile_units())
-    Ret.push_back({CU->getOffset(), CU->getLength() + 4});
+static std::vector<GdbIndexChunk::CuEntry> readCuList(DWARFContext &Dwarf) {
+  std::vector<GdbIndexChunk::CuEntry> Ret;
+  for (std::unique_ptr<DWARFCompileUnit> &Cu : Dwarf.compile_units())
+    Ret.push_back({Cu->getOffset(), Cu->getLength() + 4});
   return Ret;
 }
 
-static std::vector<AddressEntry> readAddressArea(DWARFContext &Dwarf,
-                                                 InputSection *Sec) {
-  std::vector<AddressEntry> Ret;
+static std::vector<GdbIndexChunk::AddressEntry>
+readAddressAreas(DWARFContext &Dwarf, InputSection *Sec) {
+  std::vector<GdbIndexChunk::AddressEntry> Ret;
 
-  uint32_t CurrentCu = 0;
-  for (std::unique_ptr<DWARFCompileUnit> &CU : Dwarf.compile_units()) {
+  uint32_t CuIdx = 0;
+  for (std::unique_ptr<DWARFCompileUnit> &Cu : Dwarf.compile_units()) {
     DWARFAddressRangesVector Ranges;
-    CU->collectAddressRanges(Ranges);
+    Cu->collectAddressRanges(Ranges);
 
     ArrayRef<InputSectionBase *> Sections = Sec->File->getSections();
     for (DWARFAddressRange &R : Ranges) {
@@ -1741,22 +1741,22 @@ static std::vector<AddressEntry> readAdd
         continue;
       auto *IS = cast<InputSection>(S);
       uint64_t Offset = IS->getOffsetInFile();
-      Ret.push_back({IS, R.LowPC - Offset, R.HighPC - Offset, CurrentCu});
+      Ret.push_back({IS, R.LowPC - Offset, R.HighPC - Offset, CuIdx});
     }
-    ++CurrentCu;
+    ++CuIdx;
   }
   return Ret;
 }
 
-static std::vector<NameTypeEntry> readPubNamesAndTypes(DWARFContext &Dwarf,
-                                                       bool IsLE) {
-  StringRef Data[] = {Dwarf.getDWARFObj().getGnuPubNamesSection(),
-                      Dwarf.getDWARFObj().getGnuPubTypesSection()};
+static std::vector<GdbIndexChunk::NameTypeEntry>
+readPubNamesAndTypes(DWARFContext &Dwarf) {
+  StringRef Sec1 = Dwarf.getDWARFObj().getGnuPubNamesSection();
+  StringRef Sec2 = Dwarf.getDWARFObj().getGnuPubTypesSection();
 
-  std::vector<NameTypeEntry> Ret;
-  for (StringRef D : Data) {
-    DWARFDebugPubTable PubTable(D, IsLE, true);
-    for (const DWARFDebugPubTable::Set &Set : PubTable.getData())
+  std::vector<GdbIndexChunk::NameTypeEntry> Ret;
+  for (StringRef Sec : {Sec1, Sec2}) {
+    DWARFDebugPubTable Table(Sec, Config->IsLE, true);
+    for (const DWARFDebugPubTable::Set &Set : Table.getData())
       for (const DWARFDebugPubTable::Entry &Ent : Set.Entries)
         Ret.push_back({Ent.Name, Ent.Descriptor.toBits()});
   }
@@ -1772,52 +1772,50 @@ static std::vector<InputSection *> getDe
   return Ret;
 }
 
-void GdbIndexSection::buildIndex() {
-  if (Chunks.empty())
-    return;
-
-  uint32_t CuId = 0;
-  for (GdbIndexChunk &D : Chunks) {
-    for (AddressEntry &E : D.AddressArea)
-      E.CuIndex += CuId;
+void GdbIndexSection::fixCuIndex() {
+  uint32_t Idx = 0;
+  for (GdbIndexChunk &Chunk : Chunks) {
+    for (GdbIndexChunk::AddressEntry &Ent : Chunk.AddressAreas)
+      Ent.CuIndex += Idx;
+    Idx += Chunk.CompilationUnits.size();
+  }
+}
 
-    // Populate constant pool area.
-    for (NameTypeEntry &NameType : D.NamesAndTypes) {
-      uint32_t Hash = gdbHash(NameType.Name);
-      size_t Offset = StringPool.add(NameType.Name);
-
-      bool IsNew;
-      GdbSymbol *Sym;
-      std::tie(IsNew, Sym) = HashTab.add(Hash, Offset);
-      if (IsNew) {
-        Sym->CuVectorIndex = CuVectors.size();
-        CuVectors.resize(CuVectors.size() + 1);
+std::vector<std::set<uint32_t>> GdbIndexSection::createCuVectors() {
+  std::vector<std::set<uint32_t>> Ret;
+  uint32_t Idx = 0;
+
+  for (GdbIndexChunk &Chunk : Chunks) {
+    for (GdbIndexChunk::NameTypeEntry &Ent : Chunk.NamesAndTypes) {
+      uint32_t Hash = computeGdbHash(Ent.Name);
+      size_t Offset = StringPool.add(Ent.Name);
+
+      GdbSymbol *&Sym = SymbolMap[Offset];
+      if (!Sym) {
+        Sym = make<GdbSymbol>(GdbSymbol{Hash, Offset, Ret.size()});
+        Ret.resize(Ret.size() + 1);
       }
-
-      CuVectors[Sym->CuVectorIndex].insert(CuId | (NameType.Type << 24));
+      Ret[Sym->CuVectorIndex].insert((Ent.Type << 24) | Idx);
     }
-
-    CuId += D.CompilationUnits.size();
+    Idx += Chunk.CompilationUnits.size();
   }
-}
-
-static GdbIndexChunk readDwarf(DWARFContext &Dwarf, InputSection *Sec) {
-  GdbIndexChunk Ret;
-  Ret.DebugInfoSec = Sec;
-  Ret.CompilationUnits = readCuList(Dwarf);
-  Ret.AddressArea = readAddressArea(Dwarf, Sec);
-  Ret.NamesAndTypes = readPubNamesAndTypes(Dwarf, Config->IsLE);
   return Ret;
 }
 
 template <class ELFT> GdbIndexSection *elf::createGdbIndex() {
   std::vector<InputSection *> Sections = getDebugInfoSections();
   std::vector<GdbIndexChunk> Chunks(Sections.size());
+
   parallelForEachN(0, Chunks.size(), [&](size_t I) {
-    ObjFile<ELFT> *F = Sections[I]->getFile<ELFT>();
-    DWARFContext Dwarf(make_unique<LLDDwarfObj<ELFT>>(F));
-    Chunks[I] = readDwarf(Dwarf, Sections[I]);
+    ObjFile<ELFT> *File = Sections[I]->getFile<ELFT>();
+    DWARFContext Dwarf(make_unique<LLDDwarfObj<ELFT>>(File));
+
+    Chunks[I].DebugInfoSec = Sections[I];
+    Chunks[I].CompilationUnits = readCuList(Dwarf);
+    Chunks[I].AddressAreas = readAddressAreas(Dwarf, Sections[I]);
+    Chunks[I].NamesAndTypes = readPubNamesAndTypes(Dwarf);
   });
+
   return make<GdbIndexSection>(std::move(Chunks));
 }
 
@@ -1831,27 +1829,47 @@ static size_t getCuSize(ArrayRef<GdbInde
 static size_t getAddressAreaSize(ArrayRef<GdbIndexChunk> Arr) {
   size_t Ret = 0;
   for (const GdbIndexChunk &D : Arr)
-    Ret += D.AddressArea.size();
+    Ret += D.AddressAreas.size();
+  return Ret;
+}
+
+std::vector<GdbSymbol *> GdbIndexSection::createGdbSymtab() {
+  uint32_t Size =
+      std::max<uint32_t>(1024, NextPowerOf2(SymbolMap.size() * 4 / 3));
+  uint32_t Mask = Size - 1;
+  std::vector<GdbSymbol *> Ret(Size);
+
+  for (auto &KV : SymbolMap) {
+    GdbSymbol *Sym = KV.second;
+    uint32_t I = Sym->NameHash & Mask;
+    uint32_t Step = ((Sym->NameHash * 17) & Mask) | 1;
+
+    while (Ret[I])
+      I = (I + Step) & Mask;
+    Ret[I] = Sym;
+  }
   return Ret;
 }
 
 GdbIndexSection::GdbIndexSection(std::vector<GdbIndexChunk> &&C)
     : SyntheticSection(0, SHT_PROGBITS, 1, ".gdb_index"),
       StringPool(llvm::StringTableBuilder::ELF), Chunks(std::move(C)) {
-  buildIndex();
-  HashTab.finalizeContents();
-
-  // GdbIndex header consist from version fields
-  // and 5 more fields with different kinds of offsets.
-  CuTypesOffset = CuListOffset + getCuSize(Chunks) * CompilationUnitSize;
-  SymTabOffset = CuTypesOffset + getAddressAreaSize(Chunks) * AddressEntrySize;
-  ConstantPoolOffset = SymTabOffset + HashTab.getCapacity() * SymTabEntrySize;
+  fixCuIndex();
+  CuVectors = createCuVectors();
+  GdbSymtab = createGdbSymtab();
+
+  // Compute offsets early to know the section size.
+  // Each chunk size needs to be in sync with what we write in writeTo.
+  CuTypesOffset = CuListOffset + getCuSize(Chunks) * 16;
+  SymtabOffset = CuTypesOffset + getAddressAreaSize(Chunks) * 20;
+  ConstantPoolOffset = SymtabOffset + GdbSymtab.size() * 8;
 
+  size_t Off = 0;
   for (std::set<uint32_t> &CuVec : CuVectors) {
-    CuVectorsOffset.push_back(CuVectorsSize);
-    CuVectorsSize += OffsetTypeSize * (CuVec.size() + 1);
+    CuVectorOffsets.push_back(Off);
+    Off += (CuVec.size() + 1) * 4;
   }
-  StringPoolOffset = ConstantPoolOffset + CuVectorsSize;
+  StringPoolOffset = ConstantPoolOffset + Off;
   StringPool.finalizeInOrder();
 }
 
@@ -1860,17 +1878,18 @@ size_t GdbIndexSection::getSize() const
 }
 
 void GdbIndexSection::writeTo(uint8_t *Buf) {
-  write32le(Buf, 7);                       // Write version
-  write32le(Buf + 4, CuListOffset);        // CU list offset
-  write32le(Buf + 8, CuTypesOffset);       // Types CU list offset
-  write32le(Buf + 12, CuTypesOffset);      // Address area offset
-  write32le(Buf + 16, SymTabOffset);       // Symbol table offset
-  write32le(Buf + 20, ConstantPoolOffset); // Constant pool offset
+  // Write the section header.
+  write32le(Buf, 7);
+  write32le(Buf + 4, CuListOffset);
+  write32le(Buf + 8, CuTypesOffset);
+  write32le(Buf + 12, CuTypesOffset);
+  write32le(Buf + 16, SymtabOffset);
+  write32le(Buf + 20, ConstantPoolOffset);
   Buf += 24;
 
   // Write the CU list.
   for (GdbIndexChunk &D : Chunks) {
-    for (CompilationUnitEntry &Cu : D.CompilationUnits) {
+    for (GdbIndexChunk::CuEntry &Cu : D.CompilationUnits) {
       write64le(Buf, D.DebugInfoSec->OutSecOff + Cu.CuOffset);
       write64le(Buf + 8, Cu.CuLength);
       Buf += 16;
@@ -1879,7 +1898,7 @@ void GdbIndexSection::writeTo(uint8_t *B
 
   // Write the address area.
   for (GdbIndexChunk &D : Chunks) {
-    for (AddressEntry &E : D.AddressArea) {
+    for (GdbIndexChunk::AddressEntry &E : D.AddressAreas) {
       uint64_t BaseAddr =
           E.Section->getParent()->Addr + E.Section->getOffset(0);
       write64le(Buf, BaseAddr + E.LowAddress);
@@ -1890,19 +1909,15 @@ void GdbIndexSection::writeTo(uint8_t *B
   }
 
   // Write the symbol table.
-  for (size_t I = 0; I < HashTab.getCapacity(); ++I) {
-    GdbSymbol *Sym = HashTab.getSymbol(I);
+  for (GdbSymbol *Sym : GdbSymtab) {
     if (Sym) {
-      size_t NameOffset =
-          Sym->NameOffset + StringPoolOffset - ConstantPoolOffset;
-      size_t CuVectorOffset = CuVectorsOffset[Sym->CuVectorIndex];
-      write32le(Buf, NameOffset);
-      write32le(Buf + 4, CuVectorOffset);
+      write32le(Buf, Sym->NameOffset + StringPoolOffset - ConstantPoolOffset);
+      write32le(Buf + 4, CuVectorOffsets[Sym->CuVectorIndex]);
     }
     Buf += 8;
   }
 
-  // Write the CU vectors into the constant pool.
+  // Write the CU vectors.
   for (std::set<uint32_t> &CuVec : CuVectors) {
     write32le(Buf, CuVec.size());
     Buf += 4;
@@ -1912,6 +1927,7 @@ void GdbIndexSection::writeTo(uint8_t *B
     }
   }
 
+  // Write the string pool.
   StringPool.write(Buf);
 }
 

Modified: lld/trunk/ELF/SyntheticSections.h
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/SyntheticSections.h?rev=314084&r1=314083&r2=314084&view=diff
==============================================================================
--- lld/trunk/ELF/SyntheticSections.h (original)
+++ lld/trunk/ELF/SyntheticSections.h Sun Sep 24 14:45:35 2017
@@ -494,13 +494,40 @@ private:
   size_t HeaderSize;
 };
 
-class GdbIndexSection final : public SyntheticSection {
-  const unsigned OffsetTypeSize = 4;
-  const unsigned CuListOffset = 6 * OffsetTypeSize;
-  const unsigned CompilationUnitSize = 16;
-  const unsigned AddressEntrySize = 16 + OffsetTypeSize;
-  const unsigned SymTabEntrySize = 2 * OffsetTypeSize;
+// GdbIndexChunk is created for each .debug_info section and contains
+// information to create a part of .gdb_index for a given input section.
+struct GdbIndexChunk {
+  struct AddressEntry {
+    InputSection *Section;
+    uint64_t LowAddress;
+    uint64_t HighAddress;
+    uint32_t CuIndex;
+  };
+
+  struct CuEntry {
+    uint64_t CuOffset;
+    uint64_t CuLength;
+  };
+
+  struct NameTypeEntry {
+    StringRef Name;
+    uint8_t Type;
+  };
+
+  InputSection *DebugInfoSec;
+  std::vector<AddressEntry> AddressAreas;
+  std::vector<CuEntry> CompilationUnits;
+  std::vector<NameTypeEntry> NamesAndTypes;
+};
+
+// The symbol type for the .gdb_index section.
+struct GdbSymbol {
+  uint32_t NameHash;
+  size_t NameOffset;
+  size_t CuVectorIndex;
+};
 
+class GdbIndexSection final : public SyntheticSection {
 public:
   GdbIndexSection(std::vector<GdbIndexChunk> &&Chunks);
   void writeTo(uint8_t *Buf) override;
@@ -508,9 +535,16 @@ public:
   bool empty() const override;
 
 private:
-  // Symbol table is a hash table for types and names.
-  // It is the area of gdb index.
-  GdbHashTab HashTab;
+  void fixCuIndex();
+  std::vector<std::set<uint32_t>> createCuVectors();
+  std::vector<GdbSymbol *> createGdbSymtab();
+
+  // A symbol table for this .gdb_index section.
+  std::vector<GdbSymbol *> GdbSymtab;
+
+  // Symbol table entries are uniquified by their offsets, so
+  // we need a map from offsets to symbols.
+  llvm::DenseMap<size_t, GdbSymbol *> SymbolMap;
 
   // CU vector is a part of constant pool area of section.
   std::vector<std::set<uint32_t>> CuVectors;
@@ -522,15 +556,13 @@ private:
   // object and used to build different areas of gdb index.
   std::vector<GdbIndexChunk> Chunks;
 
-  void buildIndex();
-
+  static constexpr uint32_t CuListOffset = 24;
   uint32_t CuTypesOffset;
-  uint32_t SymTabOffset;
+  uint32_t SymtabOffset;
   uint32_t ConstantPoolOffset;
   uint32_t StringPoolOffset;
 
-  size_t CuVectorsSize = 0;
-  std::vector<size_t> CuVectorsOffset;
+  std::vector<size_t> CuVectorOffsets;
 };
 
 template <class ELFT> GdbIndexSection *createGdbIndex();




More information about the llvm-commits mailing list