"raw" patch adding comdat support to lld elf2
Rafael EspĂndola via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 8 15:06:37 PDT 2015
The attached patch adds comdat handling to the new elf lld.
The implementation is direct translation to c++ of the rules in
http://www.sco.com/developers/gabi/latest/ch4.sheader.html#section_groups
The patch is "raw" is that it still has duplicated code and is missing comments.
I have attached the patch, run script and benchmark results linking
clang, but the summary is
gold:
* clang is 77 898 792 bytes.
* 1.125165215 seconds time elapsed
trunk:
* clang is 92 442 088 bytes.
* 0.495383463 seconds time elapsed
patch:
* clang is 87 491 896 bytes.
* 0.475477433 seconds time elapsed
Cheers,
Rafael
-------------- next part --------------
diff --git a/ELF/InputFiles.cpp b/ELF/InputFiles.cpp
index fb70fcc..be77ba4 100644
--- a/ELF/InputFiles.cpp
+++ b/ELF/InputFiles.cpp
@@ -11,6 +11,7 @@
#include "InputSection.h"
#include "Error.h"
#include "Symbols.h"
+#include "SymbolTable.h"
#include "llvm/ADT/STLExtras.h"
using namespace llvm;
@@ -95,19 +96,55 @@ typename ObjectFile<ELFT>::Elf_Sym_Range ObjectFile<ELFT>::getLocalSymbols() {
return this->getSymbolsHelper(true);
}
-template <class ELFT> void elf2::ObjectFile<ELFT>::parse() {
+template <class ELFT> void elf2::ObjectFile<ELFT>::parse(SymbolTable &Symtab) {
// Read section and symbol tables.
- initializeSections();
+ initializeSections(Symtab);
initializeSymbols();
}
-template <class ELFT> void elf2::ObjectFile<ELFT>::initializeSections() {
+template <class ELFT>
+void elf2::ObjectFile<ELFT>::initializeSections(SymbolTable &Symtab) {
uint64_t Size = this->ELFObj.getNumSections();
Sections.resize(Size);
- unsigned I = 0;
+ unsigned I = -1;
const ELFFile<ELFT> &Obj = this->ELFObj;
for (const Elf_Shdr &Sec : Obj.sections()) {
+ ++I;
+ if (Sections[I] == &InputSection<ELFT>::Discarded)
+ continue;
+
switch (Sec.sh_type) {
+ case SHT_GROUP: {
+ Sections[I] = &InputSection<ELFT>::Discarded;
+ uint32_t SymtabdSectionIndex = Sec.sh_link;
+ ErrorOr<const Elf_Shdr *> SecOrErr = Obj.getSection(SymtabdSectionIndex);
+ error(SecOrErr);
+ const Elf_Shdr *SymtabSec = *SecOrErr;
+ uint32_t SymIndex = Sec.sh_info;
+ const Elf_Sym *Sym = Obj.getSymbol(SymtabSec, SymIndex);
+ ErrorOr<StringRef> StringTableOrErr =
+ Obj.getStringTableForSymtab(*SymtabSec);
+ error(StringTableOrErr);
+ ErrorOr<StringRef> SignatureOrErr = Sym->getName(*StringTableOrErr);
+ error(SignatureOrErr);
+ if (Symtab.insertComdat(*SignatureOrErr))
+ continue;
+ typedef support::detail::packed_endian_specific_integral<
+ uint32_t, ELFT::TargetEndianness, 2> EntryType;
+ ErrorOr<ArrayRef<EntryType>> EntriesOrErr =
+ Obj.template getSectionContentsAsArray<EntryType>(&Sec);
+ error(EntriesOrErr.getError());
+ ArrayRef<EntryType> Entries = *EntriesOrErr;
+ if (Entries.empty() || Entries[0] != GRP_COMDAT)
+ error("Unsupported SHT_GROUP format");
+ for (EntryType E : Entries.slice(1)) {
+ uint32_t SecIndex = E;
+ if (SecIndex >= Size)
+ error("Invalid section index in group");
+ Sections[SecIndex] = &InputSection<ELFT>::Discarded;
+ }
+ break;
+ }
case SHT_SYMTAB:
this->Symtab = &Sec;
break;
@@ -135,7 +172,6 @@ template <class ELFT> void elf2::ObjectFile<ELFT>::initializeSections() {
Sections[I] = new (Alloc) InputSection<ELFT>(this, &Sec);
break;
}
- ++I;
}
}
@@ -177,8 +213,12 @@ SymbolBody *elf2::ObjectFile<ELFT>::createSymbolBody(StringRef StringTable,
error("unexpected binding");
case STB_GLOBAL:
case STB_WEAK:
- case STB_GNU_UNIQUE:
- return new (Alloc) DefinedRegular<ELFT>(Name, *Sym, *Sections[SecIndex]);
+ case STB_GNU_UNIQUE: {
+ InputSection<ELFT> *Sec = Sections[SecIndex];
+ if (Sec == &InputSection<ELFT>::Discarded)
+ return new (Alloc) Undefined<ELFT>(Name, *Sym);
+ return new (Alloc) DefinedRegular<ELFT>(Name, *Sym, *Sec);
+ }
}
}
diff --git a/ELF/InputFiles.h b/ELF/InputFiles.h
index dfb0f21..e5ee088 100644
--- a/ELF/InputFiles.h
+++ b/ELF/InputFiles.h
@@ -29,6 +29,7 @@ using llvm::object::Archive;
class InputFile;
class Lazy;
class SymbolBody;
+class SymbolTable;
// The root class of input files.
class InputFile {
@@ -37,9 +38,6 @@ public:
Kind kind() const { return FileKind; }
virtual ~InputFile() {}
- // Reads a file (constructors don't do that).
- virtual void parse() = 0;
-
StringRef getName() const { return MB.getBufferIdentifier(); }
protected:
@@ -75,6 +73,7 @@ public:
static bool classof(const InputFile *F) { return F->kind() == ObjectKind; }
ArrayRef<SymbolBody *> getSymbols() { return SymbolBodies; }
+ virtual void parse(SymbolTable &Symtab) = 0;
protected:
// List of all symbols referenced or defined by this file.
@@ -135,7 +134,7 @@ public:
}
explicit ObjectFile(MemoryBufferRef M);
- void parse() override;
+ void parse(SymbolTable &Symtab) override;
ArrayRef<InputSection<ELFT> *> getSections() const { return Sections; }
@@ -152,7 +151,7 @@ public:
ArrayRef<Elf_Word> getSymbolTableShndx() const { return SymtabSHNDX; };
private:
- void initializeSections();
+ void initializeSections(SymbolTable &Symtab);
void initializeSymbols();
SymbolBody *createSymbolBody(StringRef StringTable, const Elf_Sym *Sym);
@@ -167,7 +166,7 @@ class ArchiveFile : public InputFile {
public:
explicit ArchiveFile(MemoryBufferRef M) : InputFile(ArchiveKind, M) {}
static bool classof(const InputFile *F) { return F->kind() == ArchiveKind; }
- void parse() override;
+ void parse();
// Returns a memory buffer for a given symbol. An empty memory buffer
// is returned if we have already returned the same memory buffer.
@@ -194,6 +193,7 @@ public:
static bool classof(const InputFile *F) { return F->kind() == SharedKind; }
StringRef getSoName() const { return SoName; }
virtual void parseSoName() = 0;
+ virtual void parse() = 0;
};
template <class ELFT>
diff --git a/ELF/InputSection.h b/ELF/InputSection.h
index 53e0f9f..038c659 100644
--- a/ELF/InputSection.h
+++ b/ELF/InputSection.h
@@ -56,6 +56,8 @@ public:
// Relocation sections that refer to this one.
SmallVector<const Elf_Shdr *, 1> RelocSections;
+ static InputSection<ELFT> Discarded;
+
private:
template <bool isRela>
void relocate(uint8_t *Buf,
@@ -75,6 +77,9 @@ private:
const Elf_Shdr *Header;
};
+template <class ELFT>
+InputSection<ELFT> InputSection<ELFT>::Discarded(nullptr, nullptr);
+
} // namespace elf2
} // namespace lld
diff --git a/ELF/OutputSections.cpp b/ELF/OutputSections.cpp
index 8133b43..50d4594 100644
--- a/ELF/OutputSections.cpp
+++ b/ELF/OutputSections.cpp
@@ -325,9 +325,9 @@ template <class ELFT> void DynamicSection<ELFT>::finalize() {
Out<ELFT>::DynStrTab->add(File->getSoName());
NumEntries += SharedFiles.size();
- if (Symbol *S = SymTab.getSymbols().lookup(Config->Init))
+ if (Symbol *S = SymTab.lookup(Config->Init))
InitSym = dyn_cast<ELFSymbolBody<ELFT>>(S->Body);
- if (Symbol *S = SymTab.getSymbols().lookup(Config->Fini))
+ if (Symbol *S = SymTab.lookup(Config->Fini))
FiniSym = dyn_cast<ELFSymbolBody<ELFT>>(S->Body);
if (InitSym)
++NumEntries; // DT_INIT
@@ -471,6 +471,9 @@ lld::elf2::getLocalSymVA(const typename ELFFile<ELFT>::Elf_Sym *Sym,
Sym, File.getSymbolTable(), File.getSymbolTableShndx());
ArrayRef<InputSection<ELFT> *> Sections = File.getSections();
InputSection<ELFT> *Section = Sections[SecIndex];
+ // .eh_frame, explain
+ if (Section == &InputSection<ELFT>::Discarded)
+ return 0;
OutputSection<ELFT> *OutSec = Section->getOutputSection();
return OutSec->getVA() + Section->getOutputSectionOff() + Sym->st_value;
}
@@ -593,19 +596,27 @@ void SymbolTableSection<ELFT>::writeLocalSymbols(uint8_t *&Buf) {
ErrorOr<StringRef> SymName = Sym.getName(File.getStringTable());
if (SymName && !shouldKeepInSymtab<ELFT>(*SymName, Sym))
continue;
+ uint32_t SecIndex = Sym.st_shndx;
+ if (SecIndex != SHN_ABS) {
+ if (SecIndex == SHN_XINDEX)
+ SecIndex = File.getObj().getExtendedSymbolTableIndex(
+ &Sym, File.getSymbolTable(), File.getSymbolTableShndx());
+ ArrayRef<InputSection<ELFT> *> Sections = File.getSections();
+ const InputSection<ELFT> *Section = Sections[SecIndex];
+ if (Section == &InputSection<ELFT>::Discarded)
+ continue;
+ }
+
auto *ESym = reinterpret_cast<Elf_Sym *>(Buf);
Buf += sizeof(*ESym);
ESym->st_name = (SymName) ? StrTabSec.getFileOff(*SymName) : 0;
ESym->st_size = Sym.st_size;
ESym->setBindingAndType(Sym.getBinding(), Sym.getType());
- uint32_t SecIndex = Sym.st_shndx;
+
uintX_t VA = Sym.st_value;
if (SecIndex == SHN_ABS) {
ESym->st_shndx = SHN_ABS;
} else {
- if (SecIndex == SHN_XINDEX)
- SecIndex = File.getObj().getExtendedSymbolTableIndex(
- &Sym, File.getSymbolTable(), File.getSymbolTableShndx());
ArrayRef<InputSection<ELFT> *> Sections = File.getSections();
const InputSection<ELFT> *Section = Sections[SecIndex];
const OutputSection<ELFT> *OutSec = Section->getOutputSection();
@@ -625,6 +636,8 @@ void SymbolTableSection<ELFT>::writeGlobalSymbols(uint8_t *&Buf) {
for (const std::pair<StringRef, Symbol *> &P : Table.getSymbols()) {
StringRef Name = P.first;
Symbol *Sym = P.second;
+ if (!Sym)
+ continue;
SymbolBody *Body = Sym->Body;
if (!includeInSymtab<ELFT>(*Body))
continue;
diff --git a/ELF/SymbolTable.cpp b/ELF/SymbolTable.cpp
index 025b4e1..170e45b 100644
--- a/ELF/SymbolTable.cpp
+++ b/ELF/SymbolTable.cpp
@@ -27,6 +27,14 @@ bool SymbolTable::shouldUseRela() const {
return K == ELF64LEKind || K == ELF64BEKind;
}
+bool SymbolTable::insertComdat(StringRef Signature) {
+ llvm::PointerIntPair<Symbol *, 1> &V = Symtab[Signature];
+ if (V.getInt())
+ return false;
+ V.setInt(1);
+ return true;
+}
+
void SymbolTable::addFile(std::unique_ptr<InputFile> File) {
if (auto *AF = dyn_cast<ArchiveFile>(File.get())) {
File.release();
@@ -45,8 +53,11 @@ void SymbolTable::addFile(std::unique_ptr<InputFile> File) {
S->parseSoName();
if (!IncludedSoNames.insert(S->getSoName()).second)
return;
+ S->parse();
+ } else {
+ cast<ObjectFileBase>(File.get())->parse(*this);
}
- File->parse();
+
addELFFile(cast<ELFFileBase>(File.release()));
}
@@ -241,10 +252,12 @@ template <class ELFT> void SymbolTable::resolve(SymbolBody *New) {
Symbol *SymbolTable::insert(SymbolBody *New) {
// Find an existing Symbol or create and insert a new one.
StringRef Name = New->getName();
- Symbol *&Sym = Symtab[Name];
+ PointerIntPair<Symbol *, 1> &P = Symtab[Name];
+ Symbol *Sym = P.getPointer();
if (!Sym) {
Sym = new (Alloc) Symbol(New);
New->setBackref(Sym);
+ P.setPointer(Sym);
return Sym;
}
New->setBackref(Sym);
diff --git a/ELF/SymbolTable.h b/ELF/SymbolTable.h
index 93b9b20..f3eb2dd 100644
--- a/ELF/SymbolTable.h
+++ b/ELF/SymbolTable.h
@@ -11,6 +11,7 @@
#define LLD_ELF_SYMBOL_TABLE_H
#include "InputFiles.h"
+#include "llvm/ADT/iterator.h"
#include "llvm/ADT/MapVector.h"
namespace lld {
@@ -29,6 +30,19 @@ struct Symbol;
// to replace the lazy symbol. The logic is implemented in resolve().
class SymbolTable {
public:
+ typedef llvm::MapVector<StringRef, llvm::PointerIntPair<Symbol *, 1>> MapType;
+ typedef MapType::const_iterator raw_iter;
+ struct iter : public llvm::iterator_adaptor_base<
+ iter, raw_iter,
+ typename std::iterator_traits<raw_iter>::iterator_category,
+ const std::pair<StringRef, Symbol *>> {
+ iter(raw_iter u) : iter::iterator_adaptor_base(u) {}
+ const std::pair<StringRef, Symbol *> operator*() const {
+ const std::pair<StringRef, llvm::PointerIntPair<Symbol *, 1>> &V = *I;
+ std::pair<StringRef, Symbol *> V2(V.first, V.second.getPointer());
+ return V2;
+ }
+ };
SymbolTable();
void addFile(std::unique_ptr<InputFile> File);
@@ -43,10 +57,18 @@ public:
bool shouldUseRela() const;
- const llvm::MapVector<StringRef, Symbol *> &getSymbols() const {
- return Symtab;
+ llvm::iterator_range<iter> getSymbols() const {
+ iter Begin = Symtab.begin();
+ iter End = Symtab.end();
+ return make_range(Begin, End);
}
+ Symbol *lookup(StringRef Key) const {
+ return Symtab.lookup(Key).getPointer();
+ }
+
+ bool insertComdat(StringRef Signature);
+
const std::vector<std::unique_ptr<ObjectFileBase>> &getObjectFiles() const {
return ObjectFiles;
}
@@ -91,7 +113,7 @@ private:
// a bit inefficient.
// FIXME: Experiment with passing in a custom hashing or sorting the symbols
// once symbol resolution is finished.
- llvm::MapVector<StringRef, Symbol *> Symtab;
+ MapType Symtab;
llvm::BumpPtrAllocator Alloc;
// The writer needs to infer the machine type from the object files.
diff --git a/ELF/Writer.cpp b/ELF/Writer.cpp
index 9079f41..4e16a76 100644
--- a/ELF/Writer.cpp
+++ b/ELF/Writer.cpp
@@ -325,16 +325,30 @@ template <class ELFT> void Writer<ELFT>::createSections() {
ErrorOr<StringRef> SymNameOrErr = Sym.getName(File.getStringTable());
error(SymNameOrErr);
StringRef SymName = *SymNameOrErr;
- if (shouldKeepInSymtab<ELFT>(SymName, Sym))
- Out<ELFT>::SymTab->addSymbol(SymName, true);
+ if (!shouldKeepInSymtab<ELFT>(SymName, Sym))
+ continue;
+
+ // FIXME: duplicated
+ uint32_t SecIndex = Sym.st_shndx;
+ if (SecIndex != SHN_ABS) {
+ if (SecIndex == SHN_XINDEX)
+ SecIndex = File.getObj().getExtendedSymbolTableIndex(
+ &Sym, File.getSymbolTable(), File.getSymbolTableShndx());
+ ArrayRef<InputSection<ELFT> *> Sections = File.getSections();
+ const InputSection<ELFT> *Section = Sections[SecIndex];
+ if (Section == &InputSection<ELFT>::Discarded)
+ continue;
+ }
+
+ Out<ELFT>::SymTab->addSymbol(SymName, true);
}
}
for (InputSection<ELFT> *C : File.getSections()) {
- if (!C)
+ if (!C || C == &InputSection<ELFT>::Discarded)
continue;
const Elf_Shdr *H = C->getSectionHdr();
- SectionKey<ELFT::Is64Bits> Key{C->getSectionName(), H->sh_type,
- H->sh_flags};
+ uintX_t OutFlags = H->sh_flags & ~SHF_GROUP;
+ SectionKey<ELFT::Is64Bits> Key{C->getSectionName(), H->sh_type, OutFlags};
OutputSection<ELFT> *&Sec = Map[Key];
if (!Sec) {
Sec = new (CAlloc.Allocate())
@@ -372,7 +386,10 @@ template <class ELFT> void Writer<ELFT>::createSections() {
std::vector<DefinedCommon<ELFT> *> CommonSymbols;
for (auto &P : Symtab.getSymbols()) {
StringRef Name = P.first;
- SymbolBody *Body = P.second->Body;
+ Symbol *Sym = P.second;
+ if (!Sym)
+ continue;
+ SymbolBody *Body = Sym->Body;
if (auto *U = dyn_cast<Undefined<ELFT>>(Body)) {
if (!U->isWeak() && !U->canKeepUndefined())
reportUndefined<ELFT>(Symtab, *Body);
diff --git a/test/elf2/Inputs/comdat.s b/test/elf2/Inputs/comdat.s
new file mode 100644
index 0000000..467bfa4
--- /dev/null
+++ b/test/elf2/Inputs/comdat.s
@@ -0,0 +1,3 @@
+ .section .text3,"axG", at progbits,zed,comdat,unique,0
+ .global abc
+abc:
diff --git a/test/elf2/comdat.s b/test/elf2/comdat.s
new file mode 100644
index 0000000..6143529
--- /dev/null
+++ b/test/elf2/comdat.s
@@ -0,0 +1,72 @@
+// RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o
+// RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %p/Inputs/comdat.s -o %t2.o
+// RUN: ld.lld2 -shared %t.o %t.o %t2.o -o %t
+// RUN: llvm-objdump -d %t | FileCheck %s
+// RUN: llvm-readobj -s -t %t | FileCheck --check-prefix=READ %s
+// REQUIRES: x86
+
+ .section .text2,"axG", at progbits,foo,comdat,unique,0
+foo:
+ nop
+
+// CHECK: Disassembly of section .text2:
+// CHECK-NEXT: foo:
+// CHECK-NEXT: 2000: {{.*}} nop
+// CHECK-NOT: nop
+
+ .section bar, "ax"
+ call foo
+
+// CHECK: Disassembly of section bar:
+// CHECK-NEXT: bar:
+// 0x2000 - 0x2001 - 5 = -6
+// 0 - 0x2006 - 5 = -8203
+// CHECK-NEXT: 2001: {{.*}} callq -6
+// CHECK-NEXT: 2006: {{.*}} callq -8203
+
+ .section .text3,"axG", at progbits,zed,comdat,unique,0
+
+
+// READ: Name: .text2
+// READ-NEXT: Type: SHT_PROGBITS
+// READ-NEXT: Flags [
+// READ-NEXT: SHF_ALLOC
+// READ-NEXT: SHF_EXECINSTR
+// READ-NEXT: ]
+
+// READ: Name: .text3
+// READ-NEXT: Type: SHT_PROGBITS
+// READ-NEXT: Flags [
+// READ-NEXT: SHF_ALLOC
+// READ-NEXT: SHF_EXECINSTR
+// READ-NEXT: ]
+
+// READ: Symbols [
+// READ-NEXT: Symbol {
+// READ-NEXT: Name: (0)
+// READ-NEXT: Value: 0x0
+// READ-NEXT: Size: 0
+// READ-NEXT: Binding: Local
+// READ-NEXT: Type: None
+// READ-NEXT: Other: 0
+// READ-NEXT: Section: Undefined
+// READ-NEXT: }
+// READ-NEXT: Symbol {
+// READ-NEXT: Name: foo
+// READ-NEXT: Value
+// READ-NEXT: Size: 0
+// READ-NEXT: Binding: Local
+// READ-NEXT: Type: None
+// READ-NEXT: Other: 0
+// READ-NEXT: Section: .text
+// READ-NEXT: }
+// READ-NEXT: Symbol {
+// READ-NEXT: Name: abc
+// READ-NEXT: Value: 0x0
+// READ-NEXT: Size: 0
+// READ-NEXT: Binding: Global
+// READ-NEXT: Type: None
+// READ-NEXT: Other: 0
+// READ-NEXT: Section: Undefined
+// READ-NEXT: }
+// READ-NEXT: ]
-------------- next part --------------
A non-text attachment was scrubbed...
Name: run.sh
Type: application/x-sh
Size: 7863 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20151008/01998da7/attachment-0001.sh>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: gold-perf
Type: application/octet-stream
Size: 1051 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20151008/01998da7/attachment-0003.obj>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: lld-master-perf
Type: application/octet-stream
Size: 1020 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20151008/01998da7/attachment-0004.obj>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: lld-patch-perf
Type: application/octet-stream
Size: 1021 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20151008/01998da7/attachment-0005.obj>
More information about the llvm-commits
mailing list