"raw" patch adding comdat support to lld elf2

Rafael EspĂ­ndola via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 8 15:06:37 PDT 2015


The attached patch adds comdat handling to the new elf lld.

The implementation is direct translation to c++ of the rules in

http://www.sco.com/developers/gabi/latest/ch4.sheader.html#section_groups

The patch is "raw" is that it still has duplicated code and is missing comments.

I have attached the patch, run script and benchmark results linking
clang, but the summary is

gold:
* clang is 77 898 792 bytes.
* 1.125165215 seconds time elapsed

trunk:
* clang is 92 442 088 bytes.
* 0.495383463 seconds time elapsed

patch:
* clang is 87 491 896 bytes.
* 0.475477433 seconds time elapsed

Cheers,
Rafael
-------------- next part --------------
diff --git a/ELF/InputFiles.cpp b/ELF/InputFiles.cpp
index fb70fcc..be77ba4 100644
--- a/ELF/InputFiles.cpp
+++ b/ELF/InputFiles.cpp
@@ -11,6 +11,7 @@
 #include "InputSection.h"
 #include "Error.h"
 #include "Symbols.h"
+#include "SymbolTable.h"
 #include "llvm/ADT/STLExtras.h"
 
 using namespace llvm;
@@ -95,19 +96,55 @@ typename ObjectFile<ELFT>::Elf_Sym_Range ObjectFile<ELFT>::getLocalSymbols() {
   return this->getSymbolsHelper(true);
 }
 
-template <class ELFT> void elf2::ObjectFile<ELFT>::parse() {
+template <class ELFT> void elf2::ObjectFile<ELFT>::parse(SymbolTable &Symtab) {
   // Read section and symbol tables.
-  initializeSections();
+  initializeSections(Symtab);
   initializeSymbols();
 }
 
-template <class ELFT> void elf2::ObjectFile<ELFT>::initializeSections() {
+template <class ELFT>
+void elf2::ObjectFile<ELFT>::initializeSections(SymbolTable &Symtab) {
   uint64_t Size = this->ELFObj.getNumSections();
   Sections.resize(Size);
-  unsigned I = 0;
+  unsigned I = -1;
   const ELFFile<ELFT> &Obj = this->ELFObj;
   for (const Elf_Shdr &Sec : Obj.sections()) {
+    ++I;
+    if (Sections[I] == &InputSection<ELFT>::Discarded)
+      continue;
+
     switch (Sec.sh_type) {
+    case SHT_GROUP: {
+      Sections[I] = &InputSection<ELFT>::Discarded;
+      uint32_t SymtabdSectionIndex = Sec.sh_link;
+      ErrorOr<const Elf_Shdr *> SecOrErr = Obj.getSection(SymtabdSectionIndex);
+      error(SecOrErr);
+      const Elf_Shdr *SymtabSec = *SecOrErr;
+      uint32_t SymIndex = Sec.sh_info;
+      const Elf_Sym *Sym = Obj.getSymbol(SymtabSec, SymIndex);
+      ErrorOr<StringRef> StringTableOrErr =
+          Obj.getStringTableForSymtab(*SymtabSec);
+      error(StringTableOrErr);
+      ErrorOr<StringRef> SignatureOrErr = Sym->getName(*StringTableOrErr);
+      error(SignatureOrErr);
+      if (Symtab.insertComdat(*SignatureOrErr))
+        continue;
+      typedef support::detail::packed_endian_specific_integral<
+          uint32_t, ELFT::TargetEndianness, 2> EntryType;
+      ErrorOr<ArrayRef<EntryType>> EntriesOrErr =
+          Obj.template getSectionContentsAsArray<EntryType>(&Sec);
+      error(EntriesOrErr.getError());
+      ArrayRef<EntryType> Entries = *EntriesOrErr;
+      if (Entries.empty() || Entries[0] != GRP_COMDAT)
+        error("Unsupported SHT_GROUP format");
+      for (EntryType E : Entries.slice(1)) {
+        uint32_t SecIndex = E;
+        if (SecIndex >= Size)
+          error("Invalid section index in group");
+        Sections[SecIndex] = &InputSection<ELFT>::Discarded;
+      }
+      break;
+    }
     case SHT_SYMTAB:
       this->Symtab = &Sec;
       break;
@@ -135,7 +172,6 @@ template <class ELFT> void elf2::ObjectFile<ELFT>::initializeSections() {
       Sections[I] = new (Alloc) InputSection<ELFT>(this, &Sec);
       break;
     }
-    ++I;
   }
 }
 
@@ -177,8 +213,12 @@ SymbolBody *elf2::ObjectFile<ELFT>::createSymbolBody(StringRef StringTable,
     error("unexpected binding");
   case STB_GLOBAL:
   case STB_WEAK:
-  case STB_GNU_UNIQUE:
-    return new (Alloc) DefinedRegular<ELFT>(Name, *Sym, *Sections[SecIndex]);
+  case STB_GNU_UNIQUE: {
+    InputSection<ELFT> *Sec = Sections[SecIndex];
+    if (Sec == &InputSection<ELFT>::Discarded)
+      return new (Alloc) Undefined<ELFT>(Name, *Sym);
+    return new (Alloc) DefinedRegular<ELFT>(Name, *Sym, *Sec);
+  }
   }
 }
 
diff --git a/ELF/InputFiles.h b/ELF/InputFiles.h
index dfb0f21..e5ee088 100644
--- a/ELF/InputFiles.h
+++ b/ELF/InputFiles.h
@@ -29,6 +29,7 @@ using llvm::object::Archive;
 class InputFile;
 class Lazy;
 class SymbolBody;
+class SymbolTable;
 
 // The root class of input files.
 class InputFile {
@@ -37,9 +38,6 @@ public:
   Kind kind() const { return FileKind; }
   virtual ~InputFile() {}
 
-  // Reads a file (constructors don't do that).
-  virtual void parse() = 0;
-
   StringRef getName() const { return MB.getBufferIdentifier(); }
 
 protected:
@@ -75,6 +73,7 @@ public:
   static bool classof(const InputFile *F) { return F->kind() == ObjectKind; }
 
   ArrayRef<SymbolBody *> getSymbols() { return SymbolBodies; }
+  virtual void parse(SymbolTable &Symtab) = 0;
 
 protected:
   // List of all symbols referenced or defined by this file.
@@ -135,7 +134,7 @@ public:
   }
 
   explicit ObjectFile(MemoryBufferRef M);
-  void parse() override;
+  void parse(SymbolTable &Symtab) override;
 
   ArrayRef<InputSection<ELFT> *> getSections() const { return Sections; }
 
@@ -152,7 +151,7 @@ public:
   ArrayRef<Elf_Word> getSymbolTableShndx() const { return SymtabSHNDX; };
 
 private:
-  void initializeSections();
+  void initializeSections(SymbolTable &Symtab);
   void initializeSymbols();
 
   SymbolBody *createSymbolBody(StringRef StringTable, const Elf_Sym *Sym);
@@ -167,7 +166,7 @@ class ArchiveFile : public InputFile {
 public:
   explicit ArchiveFile(MemoryBufferRef M) : InputFile(ArchiveKind, M) {}
   static bool classof(const InputFile *F) { return F->kind() == ArchiveKind; }
-  void parse() override;
+  void parse();
 
   // Returns a memory buffer for a given symbol. An empty memory buffer
   // is returned if we have already returned the same memory buffer.
@@ -194,6 +193,7 @@ public:
   static bool classof(const InputFile *F) { return F->kind() == SharedKind; }
   StringRef getSoName() const { return SoName; }
   virtual void parseSoName() = 0;
+  virtual void parse() = 0;
 };
 
 template <class ELFT>
diff --git a/ELF/InputSection.h b/ELF/InputSection.h
index 53e0f9f..038c659 100644
--- a/ELF/InputSection.h
+++ b/ELF/InputSection.h
@@ -56,6 +56,8 @@ public:
   // Relocation sections that refer to this one.
   SmallVector<const Elf_Shdr *, 1> RelocSections;
 
+  static InputSection<ELFT> Discarded;
+
 private:
   template <bool isRela>
   void relocate(uint8_t *Buf,
@@ -75,6 +77,9 @@ private:
   const Elf_Shdr *Header;
 };
 
+template <class ELFT>
+InputSection<ELFT> InputSection<ELFT>::Discarded(nullptr, nullptr);
+
 } // namespace elf2
 } // namespace lld
 
diff --git a/ELF/OutputSections.cpp b/ELF/OutputSections.cpp
index 8133b43..50d4594 100644
--- a/ELF/OutputSections.cpp
+++ b/ELF/OutputSections.cpp
@@ -325,9 +325,9 @@ template <class ELFT> void DynamicSection<ELFT>::finalize() {
     Out<ELFT>::DynStrTab->add(File->getSoName());
   NumEntries += SharedFiles.size();
 
-  if (Symbol *S = SymTab.getSymbols().lookup(Config->Init))
+  if (Symbol *S = SymTab.lookup(Config->Init))
     InitSym = dyn_cast<ELFSymbolBody<ELFT>>(S->Body);
-  if (Symbol *S = SymTab.getSymbols().lookup(Config->Fini))
+  if (Symbol *S = SymTab.lookup(Config->Fini))
     FiniSym = dyn_cast<ELFSymbolBody<ELFT>>(S->Body);
   if (InitSym)
     ++NumEntries; // DT_INIT
@@ -471,6 +471,9 @@ lld::elf2::getLocalSymVA(const typename ELFFile<ELFT>::Elf_Sym *Sym,
         Sym, File.getSymbolTable(), File.getSymbolTableShndx());
   ArrayRef<InputSection<ELFT> *> Sections = File.getSections();
   InputSection<ELFT> *Section = Sections[SecIndex];
+  // .eh_frame, explain
+  if (Section == &InputSection<ELFT>::Discarded)
+    return 0;
   OutputSection<ELFT> *OutSec = Section->getOutputSection();
   return OutSec->getVA() + Section->getOutputSectionOff() + Sym->st_value;
 }
@@ -593,19 +596,27 @@ void SymbolTableSection<ELFT>::writeLocalSymbols(uint8_t *&Buf) {
       ErrorOr<StringRef> SymName = Sym.getName(File.getStringTable());
       if (SymName && !shouldKeepInSymtab<ELFT>(*SymName, Sym))
         continue;
+      uint32_t SecIndex = Sym.st_shndx;
+      if (SecIndex != SHN_ABS) {
+        if (SecIndex == SHN_XINDEX)
+          SecIndex = File.getObj().getExtendedSymbolTableIndex(
+              &Sym, File.getSymbolTable(), File.getSymbolTableShndx());
+        ArrayRef<InputSection<ELFT> *> Sections = File.getSections();
+        const InputSection<ELFT> *Section = Sections[SecIndex];
+        if (Section == &InputSection<ELFT>::Discarded)
+          continue;
+      }
+
       auto *ESym = reinterpret_cast<Elf_Sym *>(Buf);
       Buf += sizeof(*ESym);
       ESym->st_name = (SymName) ? StrTabSec.getFileOff(*SymName) : 0;
       ESym->st_size = Sym.st_size;
       ESym->setBindingAndType(Sym.getBinding(), Sym.getType());
-      uint32_t SecIndex = Sym.st_shndx;
+
       uintX_t VA = Sym.st_value;
       if (SecIndex == SHN_ABS) {
         ESym->st_shndx = SHN_ABS;
       } else {
-        if (SecIndex == SHN_XINDEX)
-          SecIndex = File.getObj().getExtendedSymbolTableIndex(
-              &Sym, File.getSymbolTable(), File.getSymbolTableShndx());
         ArrayRef<InputSection<ELFT> *> Sections = File.getSections();
         const InputSection<ELFT> *Section = Sections[SecIndex];
         const OutputSection<ELFT> *OutSec = Section->getOutputSection();
@@ -625,6 +636,8 @@ void SymbolTableSection<ELFT>::writeGlobalSymbols(uint8_t *&Buf) {
   for (const std::pair<StringRef, Symbol *> &P : Table.getSymbols()) {
     StringRef Name = P.first;
     Symbol *Sym = P.second;
+    if (!Sym)
+      continue;
     SymbolBody *Body = Sym->Body;
     if (!includeInSymtab<ELFT>(*Body))
       continue;
diff --git a/ELF/SymbolTable.cpp b/ELF/SymbolTable.cpp
index 025b4e1..170e45b 100644
--- a/ELF/SymbolTable.cpp
+++ b/ELF/SymbolTable.cpp
@@ -27,6 +27,14 @@ bool SymbolTable::shouldUseRela() const {
   return K == ELF64LEKind || K == ELF64BEKind;
 }
 
+bool SymbolTable::insertComdat(StringRef Signature) {
+  llvm::PointerIntPair<Symbol *, 1> &V = Symtab[Signature];
+  if (V.getInt())
+    return false;
+  V.setInt(1);
+  return true;
+}
+
 void SymbolTable::addFile(std::unique_ptr<InputFile> File) {
   if (auto *AF = dyn_cast<ArchiveFile>(File.get())) {
     File.release();
@@ -45,8 +53,11 @@ void SymbolTable::addFile(std::unique_ptr<InputFile> File) {
     S->parseSoName();
     if (!IncludedSoNames.insert(S->getSoName()).second)
       return;
+    S->parse();
+  } else {
+    cast<ObjectFileBase>(File.get())->parse(*this);
   }
-  File->parse();
+
   addELFFile(cast<ELFFileBase>(File.release()));
 }
 
@@ -241,10 +252,12 @@ template <class ELFT> void SymbolTable::resolve(SymbolBody *New) {
 Symbol *SymbolTable::insert(SymbolBody *New) {
   // Find an existing Symbol or create and insert a new one.
   StringRef Name = New->getName();
-  Symbol *&Sym = Symtab[Name];
+  PointerIntPair<Symbol *, 1> &P = Symtab[Name];
+  Symbol *Sym = P.getPointer();
   if (!Sym) {
     Sym = new (Alloc) Symbol(New);
     New->setBackref(Sym);
+    P.setPointer(Sym);
     return Sym;
   }
   New->setBackref(Sym);
diff --git a/ELF/SymbolTable.h b/ELF/SymbolTable.h
index 93b9b20..f3eb2dd 100644
--- a/ELF/SymbolTable.h
+++ b/ELF/SymbolTable.h
@@ -11,6 +11,7 @@
 #define LLD_ELF_SYMBOL_TABLE_H
 
 #include "InputFiles.h"
+#include "llvm/ADT/iterator.h"
 #include "llvm/ADT/MapVector.h"
 
 namespace lld {
@@ -29,6 +30,19 @@ struct Symbol;
 // to replace the lazy symbol. The logic is implemented in resolve().
 class SymbolTable {
 public:
+  typedef llvm::MapVector<StringRef, llvm::PointerIntPair<Symbol *, 1>> MapType;
+  typedef MapType::const_iterator raw_iter;
+  struct iter : public llvm::iterator_adaptor_base<
+                    iter, raw_iter,
+                    typename std::iterator_traits<raw_iter>::iterator_category,
+                    const std::pair<StringRef, Symbol *>> {
+    iter(raw_iter u) : iter::iterator_adaptor_base(u) {}
+    const std::pair<StringRef, Symbol *> operator*() const {
+      const std::pair<StringRef, llvm::PointerIntPair<Symbol *, 1>> &V = *I;
+      std::pair<StringRef, Symbol *> V2(V.first, V.second.getPointer());
+      return V2;
+    }
+  };
   SymbolTable();
 
   void addFile(std::unique_ptr<InputFile> File);
@@ -43,10 +57,18 @@ public:
 
   bool shouldUseRela() const;
 
-  const llvm::MapVector<StringRef, Symbol *> &getSymbols() const {
-    return Symtab;
+  llvm::iterator_range<iter> getSymbols() const {
+    iter Begin = Symtab.begin();
+    iter End = Symtab.end();
+    return make_range(Begin, End);
   }
 
+  Symbol *lookup(StringRef Key) const {
+    return Symtab.lookup(Key).getPointer();
+  }
+
+  bool insertComdat(StringRef Signature);
+
   const std::vector<std::unique_ptr<ObjectFileBase>> &getObjectFiles() const {
     return ObjectFiles;
   }
@@ -91,7 +113,7 @@ private:
   // a bit inefficient.
   // FIXME: Experiment with passing in a custom hashing or sorting the symbols
   // once symbol resolution is finished.
-  llvm::MapVector<StringRef, Symbol *> Symtab;
+  MapType Symtab;
   llvm::BumpPtrAllocator Alloc;
 
   // The writer needs to infer the machine type from the object files.
diff --git a/ELF/Writer.cpp b/ELF/Writer.cpp
index 9079f41..4e16a76 100644
--- a/ELF/Writer.cpp
+++ b/ELF/Writer.cpp
@@ -325,16 +325,30 @@ template <class ELFT> void Writer<ELFT>::createSections() {
         ErrorOr<StringRef> SymNameOrErr = Sym.getName(File.getStringTable());
         error(SymNameOrErr);
         StringRef SymName = *SymNameOrErr;
-        if (shouldKeepInSymtab<ELFT>(SymName, Sym))
-          Out<ELFT>::SymTab->addSymbol(SymName, true);
+        if (!shouldKeepInSymtab<ELFT>(SymName, Sym))
+          continue;
+
+        // FIXME: duplicated
+        uint32_t SecIndex = Sym.st_shndx;
+        if (SecIndex != SHN_ABS) {
+          if (SecIndex == SHN_XINDEX)
+            SecIndex = File.getObj().getExtendedSymbolTableIndex(
+                &Sym, File.getSymbolTable(), File.getSymbolTableShndx());
+          ArrayRef<InputSection<ELFT> *> Sections = File.getSections();
+          const InputSection<ELFT> *Section = Sections[SecIndex];
+          if (Section == &InputSection<ELFT>::Discarded)
+            continue;
+        }
+
+        Out<ELFT>::SymTab->addSymbol(SymName, true);
       }
     }
     for (InputSection<ELFT> *C : File.getSections()) {
-      if (!C)
+      if (!C || C == &InputSection<ELFT>::Discarded)
         continue;
       const Elf_Shdr *H = C->getSectionHdr();
-      SectionKey<ELFT::Is64Bits> Key{C->getSectionName(), H->sh_type,
-                                     H->sh_flags};
+      uintX_t OutFlags = H->sh_flags & ~SHF_GROUP;
+      SectionKey<ELFT::Is64Bits> Key{C->getSectionName(), H->sh_type, OutFlags};
       OutputSection<ELFT> *&Sec = Map[Key];
       if (!Sec) {
         Sec = new (CAlloc.Allocate())
@@ -372,7 +386,10 @@ template <class ELFT> void Writer<ELFT>::createSections() {
   std::vector<DefinedCommon<ELFT> *> CommonSymbols;
   for (auto &P : Symtab.getSymbols()) {
     StringRef Name = P.first;
-    SymbolBody *Body = P.second->Body;
+    Symbol *Sym = P.second;
+    if (!Sym)
+      continue;
+    SymbolBody *Body = Sym->Body;
     if (auto *U = dyn_cast<Undefined<ELFT>>(Body)) {
       if (!U->isWeak() && !U->canKeepUndefined())
         reportUndefined<ELFT>(Symtab, *Body);
diff --git a/test/elf2/Inputs/comdat.s b/test/elf2/Inputs/comdat.s
new file mode 100644
index 0000000..467bfa4
--- /dev/null
+++ b/test/elf2/Inputs/comdat.s
@@ -0,0 +1,3 @@
+        .section .text3,"axG", at progbits,zed,comdat,unique,0
+        .global abc
+abc:
diff --git a/test/elf2/comdat.s b/test/elf2/comdat.s
new file mode 100644
index 0000000..6143529
--- /dev/null
+++ b/test/elf2/comdat.s
@@ -0,0 +1,72 @@
+// RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o
+// RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %p/Inputs/comdat.s -o %t2.o
+// RUN: ld.lld2 -shared %t.o %t.o %t2.o -o %t
+// RUN: llvm-objdump -d %t | FileCheck %s
+// RUN: llvm-readobj -s -t %t | FileCheck --check-prefix=READ %s
+// REQUIRES: x86
+
+        .section	.text2,"axG", at progbits,foo,comdat,unique,0
+foo:
+        nop
+
+// CHECK: Disassembly of section .text2:
+// CHECK-NEXT: foo:
+// CHECK-NEXT:   2000: {{.*}}  nop
+// CHECK-NOT: nop
+
+        .section bar, "ax"
+        call foo
+
+// CHECK: Disassembly of section bar:
+// CHECK-NEXT: bar:
+// 0x2000 - 0x2001 - 5 = -6
+// 0      - 0x2006 - 5 = -8203
+// CHECK-NEXT:   2001:	{{.*}}  callq  -6
+// CHECK-NEXT:   2006:	{{.*}}  callq  -8203
+
+        .section .text3,"axG", at progbits,zed,comdat,unique,0
+
+
+// READ:      Name: .text2
+// READ-NEXT: Type: SHT_PROGBITS
+// READ-NEXT: Flags [
+// READ-NEXT:   SHF_ALLOC
+// READ-NEXT:   SHF_EXECINSTR
+// READ-NEXT: ]
+
+// READ:      Name: .text3
+// READ-NEXT: Type: SHT_PROGBITS
+// READ-NEXT: Flags [
+// READ-NEXT:   SHF_ALLOC
+// READ-NEXT:   SHF_EXECINSTR
+// READ-NEXT: ]
+
+// READ:      Symbols [
+// READ-NEXT:   Symbol {
+// READ-NEXT:     Name:  (0)
+// READ-NEXT:     Value: 0x0
+// READ-NEXT:     Size: 0
+// READ-NEXT:     Binding: Local
+// READ-NEXT:     Type: None
+// READ-NEXT:     Other: 0
+// READ-NEXT:     Section: Undefined
+// READ-NEXT:   }
+// READ-NEXT:   Symbol {
+// READ-NEXT:     Name: foo
+// READ-NEXT:     Value
+// READ-NEXT:     Size: 0
+// READ-NEXT:     Binding: Local
+// READ-NEXT:     Type: None
+// READ-NEXT:     Other: 0
+// READ-NEXT:     Section: .text
+// READ-NEXT:   }
+// READ-NEXT:   Symbol {
+// READ-NEXT:     Name: abc
+// READ-NEXT:     Value: 0x0
+// READ-NEXT:     Size: 0
+// READ-NEXT:     Binding: Global
+// READ-NEXT:     Type: None
+// READ-NEXT:     Other: 0
+// READ-NEXT:     Section: Undefined
+// READ-NEXT:   }
+// READ-NEXT: ]
-------------- next part --------------
A non-text attachment was scrubbed...
Name: run.sh
Type: application/x-sh
Size: 7863 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20151008/01998da7/attachment-0001.sh>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: gold-perf
Type: application/octet-stream
Size: 1051 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20151008/01998da7/attachment-0003.obj>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: lld-master-perf
Type: application/octet-stream
Size: 1020 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20151008/01998da7/attachment-0004.obj>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: lld-patch-perf
Type: application/octet-stream
Size: 1021 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20151008/01998da7/attachment-0005.obj>


More information about the llvm-commits mailing list