[PATCH] D12545: [elf2] Add basic archive file support.
Rafael Espíndola via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 3 12:32:22 PDT 2015
Patch rebased on current trunk.
On 3 September 2015 at 13:40, Rui Ueyama <ruiu at google.com> wrote:
> On Thu, Sep 3, 2015 at 6:17 AM, Rafael Espíndola
> <rafael.espindola at gmail.com> wrote:
>>
>> Another thing: I see that you moved createFile out of Driver.cpp.
>>
>> That is not what COFF does and I think I agree with COFF on this one.
>> The files we will support at the top level are not the ones we support
>> inside a .a. It is better to error early if we are given a .so or a
>> linker script inside an archive.
>
>
> Yes, that's an deliberate choice that I made. A few more examples: In COFF,
> import files are given only as members of archive files. Both in COFF and
> ELF, archive files cannot be members of archive files.
>
>>
>>
>> Cheers,
>> Rafael
>>
>>
>>
>> On 2 September 2015 at 16:48, Rafael Espíndola
>> <rafael.espindola at gmail.com> wrote:
>> > The warnings with the file kind were really an issue with the existing
>> > code. I have fixed that and rebased your patch. I also include the
>> > change to use std::vector<Lazy> LazySymbols;.
>> >
>> > The remaining warnings look like new issues:
>> >
>> > /home/espindola/llvm/llvm/tools/lld/ELF/Writer.cpp:317:13: warning:
>> > enumeration value 'LazyKind' not handled in switch [-Wswitch]
>> > switch (Body->kind()) {
>> > ^
>> > /home/espindola/llvm/llvm/tools/lld/ELF/Writer.cpp:317:13: warning:
>> > enumeration value 'LazyKind' not handled in switch [-Wswitch]
>> > /home/espindola/llvm/llvm/tools/lld/ELF/Writer.cpp:317:13: warning:
>> > enumeration value 'LazyKind' not handled in switch [-Wswitch]
>> > /home/espindola/llvm/llvm/tools/lld/ELF/Writer.cpp:317:13: warning:
>> > enumeration value 'LazyKind' not handled in switch [-Wswitch]
>> > /home/espindola/llvm/llvm/tools/lld/ELF/Writer.cpp:317:13: warning:
>> > enumeration value 'LazyKind' not handled in switch [-Wswitch
>> >
>> > Please fix them and upload a new patch.
>> >
>> >
>> > On 2 September 2015 at 11:50, Rafael Espíndola
>> > <rafael.espindola at gmail.com> wrote:
>> >> Why use a llvm::MallocAllocator? It seems better to not have an
>> >> allocator and use a smart pointer or use a BumpPtrAllocator.
>> >>
>> >> Given that the allocator is used for exactly one allocation per file,
>> >> wouldn't it be the same to replace
>> >>
>> >> std::vector<Lazy *> LazySymbols;
>> >> llvm::MallocAllocator Alloc;
>> >>
>> >> With
>> >>
>> >> std::vector<Lazy> LazySymbols
>> >> ?
>> >>
>> >> On 2 September 2015 at 11:33, Rafael Ávila de Espíndola
>> >> <llvm-commits at lists.llvm.org> wrote:
>> >>> rafael added inline comments.
>> >>>
>> >>> ================
>> >>> Comment at: ELF/InputFiles.cpp:15
>> >>> @@ -14,2 +14,3 @@
>> >>> #include "llvm/ADT/STLExtras.h"
>> >>> +#include "llvm/Support/FileSystem.h"
>> >>>
>> >>> ----------------
>> >>> Not used.
>> >>>
>> >>>
>> >>> http://reviews.llvm.org/D12545
>> >>>
>> >>>
>> >>>
>> >>> _______________________________________________
>> >>> llvm-commits mailing list
>> >>> llvm-commits at lists.llvm.org
>> >>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
>
-------------- next part --------------
diff --git a/ELF/Driver.cpp b/ELF/Driver.cpp
index e7abc41..5ae7335 100644
--- a/ELF/Driver.cpp
+++ b/ELF/Driver.cpp
@@ -44,25 +44,6 @@ MemoryBufferRef LinkerDriver::openFile(StringRef Path) {
return MBRef;
}
-static std::unique_ptr<InputFile> createFile(MemoryBufferRef MB) {
- std::pair<unsigned char, unsigned char> Type =
- object::getElfArchType(MB.getBuffer());
- if (Type.second != ELF::ELFDATA2LSB && Type.second != ELF::ELFDATA2MSB)
- error("Invalid data encoding");
-
- if (Type.first == ELF::ELFCLASS32) {
- if (Type.second == ELF::ELFDATA2LSB)
- return make_unique<ObjectFile<object::ELF32LE>>(MB);
- return make_unique<ObjectFile<object::ELF32BE>>(MB);
- }
- if (Type.first == ELF::ELFCLASS64) {
- if (Type.second == ELF::ELFDATA2LSB)
- return make_unique<ObjectFile<object::ELF64LE>>(MB);
- return make_unique<ObjectFile<object::ELF64BE>>(MB);
- }
- error("Invalid file class");
-}
-
void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
// Parse command line options.
opt::InputArgList Args = Parser.parse(ArgsArr);
diff --git a/ELF/InputFiles.cpp b/ELF/InputFiles.cpp
index 7cc47de..4f08cc4 100644
--- a/ELF/InputFiles.cpp
+++ b/ELF/InputFiles.cpp
@@ -13,7 +13,11 @@
#include "Symbols.h"
#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
using namespace llvm::ELF;
+using namespace llvm::object;
+using llvm::sys::fs::identify_magic;
+using llvm::sys::fs::file_magic;
using namespace lld;
using namespace lld::elf2;
@@ -124,6 +128,63 @@ SymbolBody *elf2::ObjectFile<ELFT>::createSymbolBody(StringRef StringTable,
}
}
+void ArchiveFile::parse() {
+ auto ArchiveOrErr = Archive::create(MB);
+ error(ArchiveOrErr, "Failed to parse archive");
+ File = std::move(*ArchiveOrErr);
+
+ // Allocate a buffer for Lazy objects.
+ size_t NumSyms = File->getNumberOfSymbols();
+ LazySymbols.reserve(NumSyms);
+
+ // Read the symbol table to construct Lazy objects.
+ for (const Archive::Symbol &Sym : File->symbols())
+ LazySymbols.emplace_back(this, Sym);
+}
+
+// Returns a buffer pointing to a member file containing a given symbol.
+MemoryBufferRef ArchiveFile::getMember(const Archive::Symbol *Sym) {
+ auto ItOrErr = Sym->getMember();
+ error(ItOrErr,
+ Twine("Could not get the member for symbol ") + Sym->getName());
+ Archive::child_iterator It = *ItOrErr;
+
+ // Return an empty buffer if we have already returned the same buffer.
+ bool &SeenMember = Seen[It->getChildOffset()];
+ if (SeenMember) {
+ return MemoryBufferRef();
+ }
+ SeenMember = true;
+ ErrorOr<MemoryBufferRef> Ret = It->getMemoryBufferRef();
+ error(Ret, Twine("Could not get the buffer for the member defining symbol ") +
+ Sym->getName());
+ return *Ret;
+}
+
+std::unique_ptr<InputFile> lld::elf2::createFile(MemoryBufferRef MB) {
+ file_magic Magic = identify_magic(MB.getBuffer());
+
+ if (Magic == file_magic::archive)
+ return llvm::make_unique<ArchiveFile>(MB);
+
+ std::pair<unsigned char, unsigned char> Type =
+ object::getElfArchType(MB.getBuffer());
+ if (Type.second != ELF::ELFDATA2LSB && Type.second != ELF::ELFDATA2MSB)
+ error("Invalid data encoding");
+
+ if (Type.first == ELF::ELFCLASS32) {
+ if (Type.second == ELF::ELFDATA2LSB)
+ return make_unique<ObjectFile<object::ELF32LE>>(MB);
+ return make_unique<ObjectFile<object::ELF32BE>>(MB);
+ }
+ if (Type.first == ELF::ELFCLASS64) {
+ if (Type.second == ELF::ELFDATA2LSB)
+ return make_unique<ObjectFile<object::ELF64LE>>(MB);
+ return make_unique<ObjectFile<object::ELF64BE>>(MB);
+ }
+ error("Invalid file class");
+}
+
namespace lld {
namespace elf2 {
template class elf2::ObjectFile<llvm::object::ELF32LE>;
diff --git a/ELF/InputFiles.h b/ELF/InputFiles.h
index 8a0a0be..5386b5e 100644
--- a/ELF/InputFiles.h
+++ b/ELF/InputFiles.h
@@ -14,16 +14,22 @@
#include "Symbols.h"
#include "lld/Core/LLVM.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Object/Archive.h"
#include "llvm/Object/ELF.h"
namespace lld {
namespace elf2 {
+
+using llvm::object::Archive;
+
+class Lazy;
class SymbolBody;
// The root class of input files.
class InputFile {
public:
- enum Kind { ObjectKind };
+ enum Kind { ObjectKind, ArchiveKind };
Kind kind() const { return FileKind; }
virtual ~InputFile() {}
@@ -119,6 +125,27 @@ private:
ArrayRef<Elf_Word> SymtabSHNDX;
};
+class ArchiveFile : public InputFile {
+public:
+ explicit ArchiveFile(MemoryBufferRef M) : InputFile(ArchiveKind, M) {}
+ static bool classof(const InputFile *F) { return F->kind() == ArchiveKind; }
+ void parse() override;
+
+ // Returns a memory buffer for a given symbol. An empty memory buffer
+ // is returned if we have already returned the same memory buffer.
+ // (So that we don't instantiate same members more than once.)
+ MemoryBufferRef getMember(const Archive::Symbol *Sym);
+
+ llvm::MutableArrayRef<Lazy> getLazySymbols() { return LazySymbols; }
+
+private:
+ std::unique_ptr<Archive> File;
+ std::vector<Lazy> LazySymbols;
+ llvm::DenseMap<uint64_t, bool> Seen;
+};
+
+std::unique_ptr<InputFile> createFile(MemoryBufferRef MB);
+
} // namespace elf2
} // namespace lld
diff --git a/ELF/SymbolTable.cpp b/ELF/SymbolTable.cpp
index 5e9a1c9..037af7d 100644
--- a/ELF/SymbolTable.cpp
+++ b/ELF/SymbolTable.cpp
@@ -22,9 +22,14 @@ SymbolTable::SymbolTable() {
void SymbolTable::addFile(std::unique_ptr<InputFile> File) {
File->parse();
- InputFile *FileP = File.release();
- auto *P = cast<ObjectFileBase>(FileP);
- addObject(P);
+ InputFile *FileP = File.get();
+ Files.push_back(std::move(File));
+ if (auto *AF = dyn_cast<ArchiveFile>(FileP)) {
+ for (Lazy &Sym : AF->getLazySymbols())
+ addLazy(&Sym);
+ return;
+ }
+ addObject(cast<ObjectFileBase>(FileP));
}
template <class ELFT> void SymbolTable::init() {
@@ -83,22 +88,65 @@ void SymbolTable::reportRemainingUndefines() {
// This function resolves conflicts if there's an existing symbol with
// the same name. Decisions are made based on symbol type.
template <class ELFT> void SymbolTable::resolve(SymbolBody *New) {
+ Symbol *Sym = insert(New);
+ if (Sym->Body == New)
+ return;
+
+ SymbolBody *Existing = Sym->Body;
+
+ if (Lazy *L = dyn_cast<Lazy>(Existing)) {
+ if (New->isUndefined()) {
+ addMemberFile(L);
+ return;
+ }
+
+ // Found a definition for something also in an archive. Ignore the archive
+ // definition.
+ Sym->Body = New;
+ return;
+ }
+
+ // compare() returns -1, 0, or 1 if the lhs symbol is less preferable,
+ // equivalent (conflicting), or more preferable, respectively.
+ int comp = Existing->compare<ELFT>(New);
+ if (comp < 0)
+ Sym->Body = New;
+ if (comp == 0)
+ error(Twine("duplicate symbol: ") + Sym->Body->getName());
+}
+
+Symbol *SymbolTable::insert(SymbolBody *New) {
// Find an existing Symbol or create and insert a new one.
StringRef Name = New->getName();
Symbol *&Sym = Symtab[Name];
if (!Sym) {
Sym = new (Alloc) Symbol(New);
New->setBackref(Sym);
- return;
+ return Sym;
}
New->setBackref(Sym);
+ return Sym;
+}
- // compare() returns -1, 0, or 1 if the lhs symbol is less preferable,
- // equivalent (conflicting), or more preferable, respectively.
+void SymbolTable::addLazy(Lazy *New) {
+ Symbol *Sym = insert(New);
+ if (Sym->Body == New)
+ return;
SymbolBody *Existing = Sym->Body;
- int comp = Existing->compare<ELFT>(New);
- if (comp < 0)
- Sym->Body = New;
- if (comp == 0)
- error(Twine("duplicate symbol: ") + Name);
+ if (Existing->isDefined() || isa<Lazy>(Existing))
+ return;
+ Sym->Body = New;
+ if (Existing->isUndefined())
+ addMemberFile(New);
+}
+
+void SymbolTable::addMemberFile(Lazy *Body) {
+ std::unique_ptr<InputFile> File = Body->getMember();
+
+ // getMember returns an empty buffer if the member was already
+ // read from the library.
+ if (!File)
+ return;
+
+ addFile(std::move(File));
}
diff --git a/ELF/SymbolTable.h b/ELF/SymbolTable.h
index 84fbe00..a7b116d 100644
--- a/ELF/SymbolTable.h
+++ b/ELF/SymbolTable.h
@@ -36,7 +36,7 @@ public:
ObjectFileBase *getFirstObject() const {
if (!ObjectFiles.empty())
- return ObjectFiles[0].get();
+ return ObjectFiles[0];
return nullptr;
}
@@ -47,21 +47,26 @@ public:
return Symtab;
}
- const std::vector<std::unique_ptr<ObjectFileBase>> &getObjectFiles() const {
+ const std::vector<ObjectFileBase *> &getObjectFiles() const {
return ObjectFiles;
}
private:
+ Symbol *insert(SymbolBody *New);
void addObject(ObjectFileBase *File);
+ void addLazy(Lazy *New);
+ void addMemberFile(Lazy *Body);
template <class ELFT> void init();
template <class ELFT> void resolve(SymbolBody *Body);
+ std::vector<std::unique_ptr<InputFile>> Files;
+
llvm::DenseMap<StringRef, Symbol *> Symtab;
llvm::BumpPtrAllocator Alloc;
// The writer needs to infer the machine type from the object files.
- std::vector<std::unique_ptr<ObjectFileBase>> ObjectFiles;
+ std::vector<ObjectFileBase *> ObjectFiles;
};
} // namespace elf2
diff --git a/ELF/Symbols.cpp b/ELF/Symbols.cpp
index 37e0033..1da4ab5 100644
--- a/ELF/Symbols.cpp
+++ b/ELF/Symbols.cpp
@@ -29,6 +29,7 @@ static uint8_t getMinVisibility(uint8_t VA, uint8_t VB) {
// Returns 1, 0 or -1 if this symbol should take precedence
// over the Other, tie or lose, respectively.
template <class ELFT> int SymbolBody::compare(SymbolBody *Other) {
+ assert(!isLazy() && !Other->isLazy());
std::pair<bool, bool> L(isDefined(), !isWeak());
std::pair<bool, bool> R(Other->isDefined(), !Other->isWeak());
@@ -67,6 +68,17 @@ template <class ELFT> int SymbolBody::compare(SymbolBody *Other) {
return 1;
}
+std::unique_ptr<InputFile> Lazy::getMember() {
+ MemoryBufferRef MBRef = File->getMember(&Sym);
+
+ // getMember returns an empty buffer if the member was already
+ // read from the library.
+ if (MBRef.getBuffer().empty())
+ return std::unique_ptr<InputFile>(nullptr);
+
+ return createFile(MBRef);
+}
+
template int SymbolBody::compare<ELF32LE>(SymbolBody *Other);
template int SymbolBody::compare<ELF32BE>(SymbolBody *Other);
template int SymbolBody::compare<ELF64LE>(SymbolBody *Other);
diff --git a/ELF/Symbols.h b/ELF/Symbols.h
index 5051890..fd27d96 100644
--- a/ELF/Symbols.h
+++ b/ELF/Symbols.h
@@ -13,13 +13,13 @@
#include "Chunks.h"
#include "lld/Core/LLVM.h"
+#include "llvm/Object/Archive.h"
#include "llvm/Object/ELF.h"
namespace lld {
namespace elf2 {
-using llvm::object::ELFFile;
-
+class ArchiveFile;
class Chunk;
class InputFile;
class SymbolBody;
@@ -42,7 +42,8 @@ public:
DefinedAbsoluteKind = 1,
DefinedCommonKind = 2,
DefinedLast = 2,
- UndefinedKind = 3
+ UndefinedKind = 3,
+ LazyKind = 4,
};
Kind kind() const { return static_cast<Kind>(SymbolKind); }
@@ -52,6 +53,7 @@ public:
bool isDefined() const { return !isUndefined(); }
bool isStrongUndefined() const { return !IsWeak && isUndefined(); }
bool isCommon() const { return SymbolKind == DefinedCommonKind; }
+ bool isLazy() const { return SymbolKind == LazyKind; }
// Returns the symbol name.
StringRef getName() const { return Name; }
@@ -200,6 +202,28 @@ public:
template <class ELFT>
typename Undefined<ELFT>::Elf_Sym Undefined<ELFT>::Synthetic;
+// This class represents a symbol defined in an archive file. It is
+// created from an archive file header, and it knows how to load an
+// object file from an archive to replace itself with a defined
+// symbol. If the resolver finds both Undefined and Lazy for
+// the same name, it will ask the Lazy to load a file.
+class Lazy : public SymbolBody {
+public:
+ Lazy(ArchiveFile *F, const llvm::object::Archive::Symbol S)
+ : SymbolBody(LazyKind, S.getName(), false, llvm::ELF::STV_DEFAULT),
+ File(F), Sym(S) {}
+
+ static bool classof(const SymbolBody *S) { return S->kind() == LazyKind; }
+
+ // Returns an object file for this symbol, or a nullptr if the file
+ // was already returned.
+ std::unique_ptr<InputFile> getMember();
+
+private:
+ ArchiveFile *File;
+ const llvm::object::Archive::Symbol Sym;
+};
+
} // namespace elf2
} // namespace lld
diff --git a/ELF/Writer.cpp b/ELF/Writer.cpp
index f498742..8577c32 100644
--- a/ELF/Writer.cpp
+++ b/ELF/Writer.cpp
@@ -432,7 +432,7 @@ template <class ELFT> void Writer<ELFT>::createSections() {
};
const SymbolTable &Symtab = SymTable.getSymTable();
- for (const std::unique_ptr<ObjectFileBase> &FileB : Symtab.getObjectFiles()) {
+ for (ObjectFileBase *FileB : Symtab.getObjectFiles()) {
auto &File = cast<ObjectFile<ELFT>>(*FileB);
for (SectionChunk<ELFT> *C : File.getChunks()) {
if (!C)
diff --git a/test/elf2/archive.s b/test/elf2/archive.s
new file mode 100644
index 0000000..7a7c1c0
--- /dev/null
+++ b/test/elf2/archive.s
@@ -0,0 +1,21 @@
+// RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t
+// RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %S/basic.s -o %t2
+// RUN: llvm-ar rcs %tar %t2
+// RUN: lld -flavor gnu2 %t %tar -o %tout
+// RUN: llvm-objdump -t -d %tout | FileCheck %s
+// REQUIRES: x86
+
+
+.section .text,"ax"
+call _start
+
+// CHECK: Disassembly of section .text:
+// CHECK: .text:
+// CHECK: e8 03 00 00 00 callq 3
+// CHECK: _start:
+// CHECK: 48 c7 c0 3c 00 00 00 movq $60, %rax
+// CHECK: 48 c7 c7 2a 00 00 00 movq $42, %rdi
+// CHECK: 0f 05 syscall
+// CHECK: SYMBOL TABLE:
+// CHECK: 0000000000000000 *UND* 00000000
+// CHECK: 0000000000001008 .text 00000000 _start
More information about the llvm-commits
mailing list