[llvm] [MC][ELF] Eliminate some hash maps from ELFObjectWriter (PR #97421)

via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 2 07:21:44 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-mc

Author: Alexis Engelke (aengelke)

<details>
<summary>Changes</summary>

Remove some maps. Unfortunately, it didn't result in a measurable performance improvements, but at least some parts of this are a nice to have regardless.

- Replace SectionIndexMap with layout order: The section layout order is only used in MachO, so we can repurpose the field as section table index.
- Store section offsets in MCSectionELF: No need for a map, and especially not a std::map. Direct access to the underlying (and easily modifyable) data structure is always faster.
- Store signature group index in MCSymbolELF: This removes the need for a reverse lookup map.
- Improve storage of groups: There's no point in having a DenseMap, the number of sections and groups are reasonably small to use vectors.

---

I'm unsure about the third part, it increases the size of MCSymbolELF and therefore max-rss. On the other hand, MCSymbol currently has 28 padding bits and an extra four padding bytes (if I counted correctly, the bit fields sum up to 36), but this seems unintended. @<!-- -->MaskRay what do you think? Use padding bytes in MCSymbol or drop the third commit and fix MCSymbol bit fields later?

---
Full diff: https://github.com/llvm/llvm-project/pull/97421.diff


3 Files Affected:

- (modified) llvm/include/llvm/MC/MCSectionELF.h (+12) 
- (modified) llvm/include/llvm/MC/MCSymbolELF.h (+6) 
- (modified) llvm/lib/MC/ELFObjectWriter.cpp (+48-78) 


``````````diff
diff --git a/llvm/include/llvm/MC/MCSectionELF.h b/llvm/include/llvm/MC/MCSectionELF.h
index 3d45d3da10ca1..d43ffbd885c96 100644
--- a/llvm/include/llvm/MC/MCSectionELF.h
+++ b/llvm/include/llvm/MC/MCSectionELF.h
@@ -46,6 +46,10 @@ class MCSectionELF final : public MCSection {
   /// section header index of the section where LinkedToSym is defined.
   const MCSymbol *LinkedToSym;
 
+  /// Start/end offset in file, used by ELFWriter.
+  uint64_t StartOffset;
+  uint64_t EndOffset;
+
 private:
   friend class MCContext;
 
@@ -92,6 +96,14 @@ class MCSectionELF final : public MCSection {
   }
   const MCSymbol *getLinkedToSymbol() const { return LinkedToSym; }
 
+  void setOffsets(uint64_t Start, uint64_t End) {
+    StartOffset = Start;
+    EndOffset = End;
+  }
+  std::pair<uint64_t, uint64_t> getOffsets() const {
+    return std::make_pair(StartOffset, EndOffset);
+  }
+
   static bool classof(const MCSection *S) {
     return S->getVariant() == SV_ELF;
   }
diff --git a/llvm/include/llvm/MC/MCSymbolELF.h b/llvm/include/llvm/MC/MCSymbolELF.h
index 13c2c6b13f8e1..aa1531a897121 100644
--- a/llvm/include/llvm/MC/MCSymbolELF.h
+++ b/llvm/include/llvm/MC/MCSymbolELF.h
@@ -17,6 +17,9 @@ class MCSymbolELF : public MCSymbol {
   /// symbol has no size this field will be NULL.
   const MCExpr *SymbolSize = nullptr;
 
+  /// Group section index for signature symbols. Set by ELFWriter.
+  mutable unsigned GroupIndex = 0;
+
 public:
   MCSymbolELF(const MCSymbolTableEntry *Name, bool isTemporary)
       : MCSymbol(SymbolKindELF, Name, isTemporary) {}
@@ -44,6 +47,9 @@ class MCSymbolELF : public MCSymbol {
   void setIsSignature() const;
   bool isSignature() const;
 
+  void setGroupIndex(unsigned Index) const { GroupIndex = Index; }
+  unsigned getGroupIndex() const { return GroupIndex; }
+
   void setMemtag(bool Tagged);
   bool isMemtag() const;
 
diff --git a/llvm/lib/MC/ELFObjectWriter.cpp b/llvm/lib/MC/ELFObjectWriter.cpp
index bcc6dfeeeccd6..de55bdc00301b 100644
--- a/llvm/lib/MC/ELFObjectWriter.cpp
+++ b/llvm/lib/MC/ELFObjectWriter.cpp
@@ -66,8 +66,6 @@ using namespace llvm;
 
 namespace {
 
-using SectionIndexMapTy = DenseMap<const MCSectionELF *, uint32_t>;
-
 class ELFObjectWriter;
 struct ELFWriter;
 
@@ -136,8 +134,8 @@ struct ELFWriter {
   unsigned SymbolTableIndex = ~0u;
 
   // Sections in the order they are to be output in the section table.
-  std::vector<const MCSectionELF *> SectionTable;
-  unsigned addToSectionTable(const MCSectionELF *Sec);
+  std::vector<MCSectionELF *> SectionTable;
+  unsigned addToSectionTable(MCSectionELF *Sec);
 
   // TargetObjectWriter wrappers.
   bool is64Bit() const;
@@ -171,31 +169,17 @@ struct ELFWriter {
   void writeSymbol(const MCAssembler &Asm, SymbolTableWriter &Writer,
                    uint32_t StringIndex, ELFSymbolData &MSD);
 
-  // Start and end offset of each section
-  using SectionOffsetsTy =
-      std::map<const MCSectionELF *, std::pair<uint64_t, uint64_t>>;
-
-  // Map from a signature symbol to the group section index
-  using RevGroupMapTy = DenseMap<const MCSymbol *, unsigned>;
-
   /// Compute the symbol table data
   ///
   /// \param Asm - The assembler.
-  /// \param SectionIndexMap - Maps a section to its index.
-  /// \param RevGroupMap - Maps a signature symbol to the group section.
-  void computeSymbolTable(MCAssembler &Asm,
-                          const SectionIndexMapTy &SectionIndexMap,
-                          const RevGroupMapTy &RevGroupMap,
-                          SectionOffsetsTy &SectionOffsets);
+  void computeSymbolTable(MCAssembler &Asm);
 
   void writeAddrsigSection();
 
   MCSectionELF *createRelocationSection(MCContext &Ctx,
                                         const MCSectionELF &Sec);
 
-  void writeSectionHeader(const MCAssembler &Asm,
-                          const SectionIndexMapTy &SectionIndexMap,
-                          const SectionOffsetsTy &SectionOffsets);
+  void writeSectionHeader(const MCAssembler &Asm);
 
   void writeSectionData(const MCAssembler &Asm, MCSection &Sec);
 
@@ -207,8 +191,7 @@ struct ELFWriter {
   void writeRelocations(const MCAssembler &Asm, const MCSectionELF &Sec);
 
   uint64_t writeObject(MCAssembler &Asm);
-  void writeSection(const SectionIndexMapTy &SectionIndexMap,
-                    uint32_t GroupSymbolIndex, uint64_t Offset, uint64_t Size,
+  void writeSection(uint32_t GroupSymbolIndex, uint64_t Offset, uint64_t Size,
                     const MCSectionELF &Section);
 };
 
@@ -330,7 +313,7 @@ uint64_t ELFWriter::align(Align Alignment) {
   return NewOffset;
 }
 
-unsigned ELFWriter::addToSectionTable(const MCSectionELF *Sec) {
+unsigned ELFWriter::addToSectionTable(MCSectionELF *Sec) {
   SectionTable.push_back(Sec);
   StrTabBuilder.add(Sec->getName());
   return SectionTable.size();
@@ -611,10 +594,7 @@ bool ELFWriter::isInSymtab(const MCAssembler &Asm, const MCSymbolELF &Symbol,
   return true;
 }
 
-void ELFWriter::computeSymbolTable(MCAssembler &Asm,
-                                   const SectionIndexMapTy &SectionIndexMap,
-                                   const RevGroupMapTy &RevGroupMap,
-                                   SectionOffsetsTy &SectionOffsets) {
+void ELFWriter::computeSymbolTable(MCAssembler &Asm) {
   MCContext &Ctx = Asm.getContext();
   SymbolTableWriter Writer(*this, is64Bit());
 
@@ -672,7 +652,7 @@ void ELFWriter::computeSymbolTable(MCAssembler &Asm,
       }
     } else if (Symbol.isUndefined()) {
       if (isSignature && !Used) {
-        MSD.SectionIndex = RevGroupMap.lookup(&Symbol);
+        MSD.SectionIndex = Symbol.getGroupIndex();
         if (MSD.SectionIndex >= ELF::SHN_LORESERVE)
           HasLargeSectionIndex = true;
       } else {
@@ -697,7 +677,7 @@ void ELFWriter::computeSymbolTable(MCAssembler &Asm,
 
       if (Mode == NonDwoOnly && isDwoSection(Section))
         continue;
-      MSD.SectionIndex = SectionIndexMap.lookup(&Section);
+      MSD.SectionIndex = Section.getLayoutOrder();
       assert(MSD.SectionIndex && "Invalid section index!");
       if (MSD.SectionIndex >= ELF::SHN_LORESERVE)
         HasLargeSectionIndex = true;
@@ -775,7 +755,7 @@ void ELFWriter::computeSymbolTable(MCAssembler &Asm,
   }
 
   uint64_t SecEnd = W.OS.tell();
-  SectionOffsets[SymtabSection] = std::make_pair(SecStart, SecEnd);
+  SymtabSection->setOffsets(SecStart, SecEnd);
 
   ArrayRef<uint32_t> ShndxIndexes = Writer.getShndxIndexes();
   if (ShndxIndexes.empty()) {
@@ -785,12 +765,11 @@ void ELFWriter::computeSymbolTable(MCAssembler &Asm,
   assert(SymtabShndxSectionIndex != 0);
 
   SecStart = W.OS.tell();
-  const MCSectionELF *SymtabShndxSection =
-      SectionTable[SymtabShndxSectionIndex - 1];
+  MCSectionELF *SymtabShndxSection = SectionTable[SymtabShndxSectionIndex - 1];
   for (uint32_t Index : ShndxIndexes)
     write(Index);
   SecEnd = W.OS.tell();
-  SectionOffsets[SymtabShndxSection] = std::make_pair(SecStart, SecEnd);
+  SymtabShndxSection->setOffsets(SecStart, SecEnd);
 }
 
 void ELFWriter::writeAddrsigSection() {
@@ -1030,8 +1009,7 @@ void ELFWriter::writeRelocations(const MCAssembler &Asm,
   }
 }
 
-void ELFWriter::writeSection(const SectionIndexMapTy &SectionIndexMap,
-                             uint32_t GroupSymbolIndex, uint64_t Offset,
+void ELFWriter::writeSection(uint32_t GroupSymbolIndex, uint64_t Offset,
                              uint64_t Size, const MCSectionELF &Section) {
   uint64_t sh_link = 0;
   uint64_t sh_info = 0;
@@ -1050,7 +1028,7 @@ void ELFWriter::writeSection(const SectionIndexMapTy &SectionIndexMap,
     sh_link = SymbolTableIndex;
     assert(sh_link && ".symtab not found");
     const MCSection *InfoSection = Section.getLinkedToSection();
-    sh_info = SectionIndexMap.lookup(cast<MCSectionELF>(InfoSection));
+    sh_info = InfoSection->getLayoutOrder();
     break;
   }
 
@@ -1075,10 +1053,8 @@ void ELFWriter::writeSection(const SectionIndexMapTy &SectionIndexMap,
     // If the value in the associated metadata is not a definition, Sym will be
     // undefined. Represent this with sh_link=0.
     const MCSymbol *Sym = Section.getLinkedToSymbol();
-    if (Sym && Sym->isInSection()) {
-      const MCSectionELF *Sec = cast<MCSectionELF>(&Sym->getSection());
-      sh_link = SectionIndexMap.lookup(Sec);
-    }
+    if (Sym && Sym->isInSection())
+      sh_link = Sym->getSection().getLayoutOrder();
   }
 
   WriteSecHdrEntry(StrTabBuilder.getOffset(Section.getName()),
@@ -1087,9 +1063,7 @@ void ELFWriter::writeSection(const SectionIndexMapTy &SectionIndexMap,
                    Section.getEntrySize());
 }
 
-void ELFWriter::writeSectionHeader(const MCAssembler &Asm,
-                                   const SectionIndexMapTy &SectionIndexMap,
-                                   const SectionOffsetsTy &SectionOffsets) {
+void ELFWriter::writeSectionHeader(const MCAssembler &Asm) {
   const unsigned NumSections = SectionTable.size();
 
   // Null section first.
@@ -1105,16 +1079,14 @@ void ELFWriter::writeSectionHeader(const MCAssembler &Asm,
     else
       GroupSymbolIndex = Section->getGroup()->getIndex();
 
-    const std::pair<uint64_t, uint64_t> &Offsets =
-        SectionOffsets.find(Section)->second;
+    std::pair<uint64_t, uint64_t> Offsets = Section->getOffsets();
     uint64_t Size;
     if (Type == ELF::SHT_NOBITS)
       Size = Asm.getSectionAddressSize(*Section);
     else
       Size = Offsets.second - Offsets.first;
 
-    writeSection(SectionIndexMap, GroupSymbolIndex, Offsets.first, Size,
-                 *Section);
+    writeSection(GroupSymbolIndex, Offsets.first, Size, *Section);
   }
 }
 
@@ -1126,18 +1098,14 @@ uint64_t ELFWriter::writeObject(MCAssembler &Asm) {
       Ctx.getELFSection(".strtab", ELF::SHT_STRTAB, 0);
   StringTableIndex = addToSectionTable(StrtabSection);
 
-  RevGroupMapTy RevGroupMap;
-  SectionIndexMapTy SectionIndexMap;
-
-  DenseMap<const MCSymbol *, SmallVector<const MCSectionELF *, 0>> GroupMembers;
-
   // Write out the ELF header ...
   writeHeader(Asm);
 
   // ... then the sections ...
-  SectionOffsetsTy SectionOffsets;
-  std::vector<MCSectionELF *> Groups;
-  std::vector<MCSectionELF *> Relocations;
+  SmallVector<std::pair<MCSectionELF *, SmallVector<unsigned>>, 0> Groups;
+  // Map from group section index to group
+  SmallVector<unsigned, 0> GroupMap;
+  SmallVector<MCSectionELF *> Relocations;
   for (MCSection &Sec : Asm) {
     MCSectionELF &Section = static_cast<MCSectionELF &>(Sec);
     if (Mode == NonDwoOnly && isDwoSection(Section))
@@ -1152,49 +1120,51 @@ uint64_t ELFWriter::writeObject(MCAssembler &Asm) {
     writeSectionData(Asm, Section);
 
     uint64_t SecEnd = W.OS.tell();
-    SectionOffsets[&Section] = std::make_pair(SecStart, SecEnd);
+    Section.setOffsets(SecStart, SecEnd);
 
     MCSectionELF *RelSection = createRelocationSection(Ctx, Section);
 
+    unsigned GroupIdx = 0;
     if (SignatureSymbol) {
-      unsigned &GroupIdx = RevGroupMap[SignatureSymbol];
+      GroupIdx = SignatureSymbol->getGroupIndex();
       if (!GroupIdx) {
         MCSectionELF *Group =
             Ctx.createELFGroupSection(SignatureSymbol, Section.isComdat());
         GroupIdx = addToSectionTable(Group);
+        SignatureSymbol->setGroupIndex(GroupIdx);
         Group->setAlignment(Align(4));
-        Groups.push_back(Group);
+
+        GroupMap.resize(GroupIdx + 1);
+        GroupMap[GroupIdx] = Groups.size();
+        Groups.emplace_back(Group, SmallVector<unsigned>{});
       }
-      SmallVector<const MCSectionELF *, 0> &Members =
-          GroupMembers[SignatureSymbol];
-      Members.push_back(&Section);
-      if (RelSection)
-        Members.push_back(RelSection);
     }
 
-    SectionIndexMap[&Section] = addToSectionTable(&Section);
+    Section.setLayoutOrder(addToSectionTable(&Section));
     if (RelSection) {
-      SectionIndexMap[RelSection] = addToSectionTable(RelSection);
+      RelSection->setLayoutOrder(addToSectionTable(RelSection));
       Relocations.push_back(RelSection);
     }
 
+    if (GroupIdx) {
+      auto &Members = Groups[GroupMap[GroupIdx]];
+      Members.second.push_back(Section.getLayoutOrder());
+      if (RelSection)
+        Members.second.push_back(RelSection->getLayoutOrder());
+    }
+
     OWriter.TargetObjectWriter->addTargetSectionFlags(Ctx, Section);
   }
 
-  for (MCSectionELF *Group : Groups) {
+  for (auto &[Group, Members] : Groups) {
     // Remember the offset into the file for this section.
     const uint64_t SecStart = align(Group->getAlign());
 
-    const MCSymbol *SignatureSymbol = Group->getGroup();
-    assert(SignatureSymbol);
     write(uint32_t(Group->isComdat() ? unsigned(ELF::GRP_COMDAT) : 0));
-    for (const MCSectionELF *Member : GroupMembers[SignatureSymbol]) {
-      uint32_t SecIndex = SectionIndexMap.lookup(Member);
-      write(SecIndex);
-    }
+    W.write<unsigned>(Members);
 
     uint64_t SecEnd = W.OS.tell();
-    SectionOffsets[Group] = std::make_pair(SecStart, SecEnd);
+    Group->setOffsets(SecStart, SecEnd);
   }
 
   if (Mode == DwoOnly) {
@@ -1210,7 +1180,7 @@ uint64_t ELFWriter::writeObject(MCAssembler &Asm) {
     }
 
     // Compute symbol table information.
-    computeSymbolTable(Asm, SectionIndexMap, RevGroupMap, SectionOffsets);
+    computeSymbolTable(Asm);
 
     for (MCSectionELF *RelSection : Relocations) {
       // Remember the offset into the file for this section.
@@ -1220,27 +1190,27 @@ uint64_t ELFWriter::writeObject(MCAssembler &Asm) {
                        cast<MCSectionELF>(*RelSection->getLinkedToSection()));
 
       uint64_t SecEnd = W.OS.tell();
-      SectionOffsets[RelSection] = std::make_pair(SecStart, SecEnd);
+      RelSection->setOffsets(SecStart, SecEnd);
     }
 
     if (OWriter.EmitAddrsigSection) {
       uint64_t SecStart = W.OS.tell();
       writeAddrsigSection();
       uint64_t SecEnd = W.OS.tell();
-      SectionOffsets[AddrsigSection] = std::make_pair(SecStart, SecEnd);
+      AddrsigSection->setOffsets(SecStart, SecEnd);
     }
   }
 
   {
     uint64_t SecStart = W.OS.tell();
     StrTabBuilder.write(W.OS);
-    SectionOffsets[StrtabSection] = std::make_pair(SecStart, W.OS.tell());
+    StrtabSection->setOffsets(SecStart, W.OS.tell());
   }
 
   const uint64_t SectionHeaderOffset = align(is64Bit() ? Align(8) : Align(4));
 
   // ... then the section header table ...
-  writeSectionHeader(Asm, SectionIndexMap, SectionOffsets);
+  writeSectionHeader(Asm);
 
   uint16_t NumSections = support::endian::byte_swap<uint16_t>(
       (SectionTable.size() + 1 >= ELF::SHN_LORESERVE) ? (uint16_t)ELF::SHN_UNDEF

``````````

</details>


https://github.com/llvm/llvm-project/pull/97421


More information about the llvm-commits mailing list