[llvm-commits] ELFReader.cpp update - Add support for References.
Michael Spencer
bigcheesegs at gmail.com
Fri Aug 31 22:04:24 PDT 2012
On Fri, Aug 31, 2012 at 12:40 PM, Sid Manning <sidneym at codeaurora.org> wrote:
>
> This patch adds support for collection of References in object files.
>
> The ELFReference class is a derived class of Reference and adds
> a new member, TargetNameOffset. TargetNameOffset is used to
> reference the index ELF_R_SYM would point to. These offsets are
> recorded in a vector SymbolNames", added to the ELFFile class.
> A findAtom(StringRef) method was added to the ELFFile class that uses
> the above to locate matching atoms to fill in the Reference's target Atom.
>
> The ELFDefinedAtom class has been extended to take a ReferenceStartIndex
> and a ReferenceEndIndex. Each Atom that contains references outside
> its own scope will have a, "Reference" stored in the Reference class.
>
> The FileELF method now records the contains of the .rela/rel sections
> and stores those into a map, (sectionName, reloclist[])
> std::map<llvm::StringRef, std::vector<Elf_Rel *>> RelocationReferences;
>
>
> --
> Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by
> The Linux Foundation
> Index: lib/ReaderWriter/ELF/ReaderELF.cpp
> ===================================================================
> --- lib/ReaderWriter/ELF/ReaderELF.cpp (revision 162996)
> +++ lib/ReaderWriter/ELF/ReaderELF.cpp (working copy)
> @@ -14,8 +14,10 @@
>
> #include "lld/ReaderWriter/ReaderELF.h"
> #include "lld/Core/File.h"
> +#include "lld/Core/Reference.h"
>
> #include "llvm/ADT/ArrayRef.h"
> +#include "llvm/ADT/SmallString.h"
> #include "llvm/ADT/StringRef.h"
> #include "llvm/Object/ELF.h"
> #include "llvm/Object/ObjectFile.h"
> @@ -39,7 +41,68 @@
>
> namespace { // anonymous
>
> +
> +//
> +// Relocation References: Defined Atom may contain
> +// references that will need to be patched before
> +// the executable is written.
> +//
> +class ELFReference : public Reference {
> +public:
> + ELFReference(Reference::Kind K,
> + uint64_t O,
> + const Atom *T,
> + uint64_t N,
> + Reference::Addend A)
> + : Target(T)
> + , TargetNameOffset(N)
> + , OffsetInAtom(O)
> + , Addend(A)
> + , Kind(K) { }
Why doesn't this just take a tagged union of Elf_Rel_Impl* (for Rel and Rela)
and grab the info from that on a call?
> +
> + virtual uint64_t offsetInAtom() const {
> + return OffsetInAtom;
> + }
> +
> + virtual Kind kind() const {
> + return Kind;
> + }
> +
> + virtual void setKind(Kind k) {
> + Kind = k;
> + }
> +
> + virtual const Atom* target() const {
> + return Target;
> + }
> +
> + virtual uint64_t targetNameOffset() const {
> + return TargetNameOffset;
> + }
> +
> + virtual Addend addend() const {
> + return Addend;
> + }
> +
> + virtual void setAddend(Addend a) {
> + Addend = a;
> + }
> +
> + virtual void setTarget(const Atom* newAtom) {
> + Target = newAtom;
> + }
> +private:
> + const Atom* Target;
> + uint64_t TargetNameOffset;
TargetNameOffset is a weird way to handle mapping from ELF_R_SYM to Atom*. I
think the best way to do this would be to add a public
Elf_Sym *getSymbol(uint32_t Index)
function to ELFObjectFile. Then add a DenseMap<ElfSym*, Atom*> to go from
ELF_R_SYM to Atom*.
> + uint64_t OffsetInAtom;
> + Addend Addend;
> + Kind Kind;
> +};
> +
> +
> +
> // This atom class corresponds to absolute symbol
> +template<llvm::support::endianness target_endianness, bool is64Bits>
> class ELFAbsoluteAtom: public AbsoluteAtom {
>
> public:
> @@ -126,15 +189,21 @@
> llvm::StringRef SN,
> const Elf_Sym *E,
> const Elf_Shdr *S,
> - llvm::ArrayRef<uint8_t> D)
> + llvm::ArrayRef<uint8_t> D,
> + unsigned int RS,
> + unsigned int RE)
> +
> : OwningFile(F)
> , SymbolName(N)
> , SectionName(SN)
> , Symbol(E)
> , Section(S)
> - , ContentData(D) {
> + , ContentData(D)
> + , ReferenceStartIndex(RS)
> + , ReferenceEndIndex(RE) {
> static uint64_t ordernumber = 0;
> - _ordinal = ++ordernumber;
> + Ordinal = ++ordernumber;
> +
> }
>
> virtual const class File &file() const {
> @@ -146,7 +215,7 @@
> }
>
> virtual uint64_t ordinal() const {
> - return _ordinal;
> + return Ordinal;
> }
>
> virtual uint64_t size() const {
> @@ -284,31 +353,45 @@
> return ContentData;
> }
>
> - virtual reference_iterator begin() const {
> - return reference_iterator(*this, nullptr);
> + DefinedAtom::reference_iterator begin() const {
> + uintptr_t index = ReferenceStartIndex;
> + const void* it = reinterpret_cast<const void*>(index);
> + return reference_iterator(*this, it);
> }
>
> - virtual reference_iterator end() const {
> - return reference_iterator(*this, nullptr);
> + DefinedAtom::reference_iterator end() const {
> + uintptr_t index = ReferenceEndIndex;
> + const void* it = reinterpret_cast<const void*>(index);
> + return reference_iterator(*this, it);
> }
>
> -private:
> - virtual const Reference *derefIterator(const void *iter) const {
> + const Reference* derefIterator(const void* it) const {
> + uintptr_t index = reinterpret_cast<uintptr_t>(it);
> + assert(index >= ReferenceStartIndex);
> + assert(index < ReferenceEndIndex);
> return nullptr;
> }
> - virtual void incrementIterator(const void *&iter) const {
> +
> + void incrementIterator(const void*& it) const {
> + uintptr_t index = reinterpret_cast<uintptr_t>(it);
> + ++index;
> + it = reinterpret_cast<const void*>(index);
> }
>
> +private:
> +
> const File &OwningFile;
> llvm::StringRef SymbolName;
> llvm::StringRef SectionName;
> const Elf_Sym *Symbol;
> const Elf_Shdr *Section;
>
> - // ContentData will hold the bits that make up the atom.
> + // ContentData will hold the bits that make up the atom.
> llvm::ArrayRef<uint8_t> ContentData;
>
> - uint64_t _ordinal;
> + uint64_t Ordinal;
> + unsigned int ReferenceStartIndex;
> + unsigned int ReferenceEndIndex;
> };
>
>
> @@ -318,9 +401,23 @@
> template<llvm::support::endianness target_endianness, bool is64Bits>
> class FileELF: public File {
>
> - typedef llvm::object::Elf_Sym_Impl<target_endianness, is64Bits> Elf_Sym;
> - typedef llvm::object::Elf_Shdr_Impl<target_endianness, is64Bits> Elf_Shdr;
> + typedef llvm::object::Elf_Sym_Impl
> + <target_endianness, is64Bits> Elf_Sym;
> + typedef llvm::object::Elf_Shdr_Impl
> + <target_endianness, is64Bits> Elf_Shdr;
> + typedef llvm::object::Elf_Rel_Impl
> + <target_endianness, is64Bits, false> Elf_Rel;
> + typedef llvm::object::Elf_Rel_Impl
> + <target_endianness, is64Bits, true> Elf_Rela;
>
> + struct NameAtomPair {
> + NameAtomPair(StringRef N,
> + Atom *A)
> + : name(N), atom(A) {}
> + StringRef name;
> + Atom *atom;
> + };
> +
Use std::pair.
> public:
> FileELF(std::unique_ptr<llvm::MemoryBuffer> MB, llvm::error_code &EC) :
> File(MB->getBufferIdentifier()) {
> @@ -343,39 +440,105 @@
>
> std::map< const Elf_Shdr *, std::vector<const Elf_Sym *>> SectionSymbols;
>
> +// Handle: SHT_REL and SHT_RELA sections:
> +// Increment over the sections, when REL/RELA section types are
> +// found add the contents to the RelocationReferences map.
> +
> + llvm::object::section_iterator sit(Obj->begin_sections());
> + llvm::object::section_iterator sie(Obj->end_sections());
> + for (; sit != sie; sit.increment(EC)) {
> + if (EC)
> + return;
> +
> + const Elf_Shdr *section = Obj->getElfSection(sit);
> +
> + if (section->sh_type == llvm::ELF::SHT_RELA)
> + {
No new line before {
> + StringRef contents;
> + if ((EC = Obj->getSectionContents(section, contents)))
> + return;
> +
> + llvm::StringRef sectionName;
> + if ((EC = Obj->getSectionName(section, sectionName)))
> + return;
> +
> + Elf_Rela *relocs = const_cast<Elf_Rela *>
> + (reinterpret_cast<const Elf_Rela *>(contents.data()));
> +
> + // Get rid of the leading .rela so Atoms can use their own section
> + // name to find the relocs.
> + sectionName = sectionName.drop_front(5);
> + for (unsigned int i=0; i<contents.size()/sizeof(Elf_Rela); i++) {
This is incorrect. The distance between relocations is defined by
Elf_Shdr::st_entsize. This type of iteration should probably be added to
ELFObjectFile.
> + RelocationAddendReferences[sectionName].push_back(relocs+i);
A reference to RelocationAddendReferences[sectionName] should be
hoisted outside of the loop.
> + }
> +
> + }
> +
> + if (section->sh_type == llvm::ELF::SHT_REL)
> + {
> + StringRef contents;
> + if ((EC = Obj->getSectionContents(section, contents)))
> + return;
> +
> + llvm::StringRef sectionName;
> + if ((EC = Obj->getSectionName(section, sectionName)))
> + return;
> +
> + Elf_Rel *relocs = const_cast<Elf_Rel *>
> + (reinterpret_cast<const Elf_Rel *>(contents.data()));
> +
> + // Get rid of the leading .rel so Atoms can use their own section
> + // name to find the relocs.
> + sectionName = sectionName.drop_front(4);
> + for (unsigned int i=0; i<contents.size()/sizeof(Elf_Rel); i++) {
> + RelocationReferences[sectionName].push_back(relocs+i);
> + }
> +
> + }
> + }
> +
> +// Increment over all the symbols collecting atoms and symbol
> +// names for later use.
> +
> + SymbolNames.push_back("\0"); // ELF: Entry 0 default NULL
> llvm::object::symbol_iterator it(Obj->begin_symbols());
> llvm::object::symbol_iterator ie(Obj->end_symbols());
>
> for (; it != ie; it.increment(EC)) {
> if (EC)
> return;
> - llvm::object::SectionRef SR;
> - llvm::object::section_iterator section(SR);
>
> - if ((EC = it->getSection(section)))
> + if ((EC = it->getSection(sit)))
> return;
>
> - const Elf_Shdr *Section = Obj->getElfSection(section);
> + const Elf_Shdr *Section = Obj->getElfSection(sit);
> const Elf_Sym *Symbol = Obj->getElfSymbol(it);
>
> llvm::StringRef SymbolName;
> if ((EC = Obj->getSymbolName(Section, Symbol, SymbolName)))
> return;
>
> + // Push the symbol names to a vector for easy dereferencing via
> + SymbolNames.push_back(SymbolName);
> +
> if (Symbol->st_shndx == llvm::ELF::SHN_ABS) {
> // Create an absolute atom.
> - AbsoluteAtoms._atoms.push_back(
> - new (AtomStorage.Allocate<ELFAbsoluteAtom> ())
> - ELFAbsoluteAtom(*this, SymbolName,
> - Symbol->st_value));
> + ELFAbsoluteAtom<target_endianness, is64Bits> *NewAtom = new
> + ELFAbsoluteAtom<target_endianness, is64Bits> (*this,
> + SymbolName,
> + Symbol->st_value);
Why is this not using placement new in AtomStorage? Also, you can use auto here.
Same for the rest of these.
> + AbsoluteAtoms._atoms.push_back(NewAtom);
> + NameToAtomMapping.push_back(NameAtomPair(SymbolName, NewAtom));
>
> } else if (Symbol->st_shndx == llvm::ELF::SHN_UNDEF) {
> // Create an undefined atom.
> - UndefinedAtoms._atoms.push_back(
> - new (AtomStorage.Allocate<ELFUndefinedAtom<
> - target_endianness, is64Bits>>())
> - ELFUndefinedAtom<target_endianness, is64Bits> (
> - *this, SymbolName, Symbol));
> + ELFUndefinedAtom<target_endianness, is64Bits> *NewAtom = new
> + ELFUndefinedAtom<target_endianness, is64Bits> (*this,
> + SymbolName, Symbol);
> +
> + UndefinedAtoms._atoms.push_back(NewAtom);
> + NameToAtomMapping.push_back(NameAtomPair(SymbolName, NewAtom));
> +
> } else {
> // This is actually a defined symbol. Add it to its section's list of
> // symbols.
> @@ -432,28 +595,84 @@
>
> // Get the symbol's content:
> llvm::ArrayRef<uint8_t> SymbolData;
> + uint64_t contentSize;
> if (si + 1 == se) {
> // if this is the last symbol, take up the remaining data.
> - SymbolData = llvm::ArrayRef<uint8_t>((uint8_t *)symbolContents.data()
> - + (*si)->st_value,
> - (IsCommon) ? 0 :
> - ((i.first)->sh_size - (*si)->st_value));
> + contentSize = (IsCommon) ? 0 : ((i.first)->sh_size - (*si)->st_value);
> }
> else {
> - SymbolData = llvm::ArrayRef<uint8_t>((uint8_t *)symbolContents.data()
> - + (*si)->st_value,
> - (IsCommon) ? 0 :
> - (*(si + 1))->st_value - (*si)->st_value);
> + contentSize = (IsCommon) ? 0 : (*(si + 1))->st_value - (*si)->st_value;
> }
>
> - DefinedAtoms._atoms.push_back(
> - new (AtomStorage.Allocate<ELFDefinedAtom<
> - target_endianness, is64Bits> > ())
> - ELFDefinedAtom<target_endianness, is64Bits> (*this,
> + SymbolData = llvm::ArrayRef<uint8_t>((uint8_t *)symbolContents.data()
> + + (*si)->st_value, contentSize);
> +
> +
> + unsigned int referenceStart = References.size();
> +
> + // Make Elf_Rela references
> + typename std::vector<Elf_Rela *>::iterator rai =
> + RelocationAddendReferences[SectionName].begin();
> + typename std::vector<Elf_Rela *>::iterator rae =
> + RelocationAddendReferences[SectionName].end();
> +
> + // Only relocations that are inside the domain of the atom are
> + // added.
> + for (; rai != rae; rai++) {
This can use a c++11 for loop.
> + if (((*rai)->r_offset >= (*si)->st_value) &&
> + ((*rai)->r_offset < (*si)->st_value+contentSize)) {
> +
> + ELFReference *eref = new ELFReference (
> + (*rai)->getType(), (*rai)->r_offset-(*si)->st_value,
> + nullptr, (*rai)->getSymbol(), (*rai)->r_addend);
This should use a BumpPtrAllocator too. Currently it's just leaked.
> +
> + References.push_back(eref);
> + }
> + }
> +
> + // Make Elf_Rel references
> + typename std::vector<Elf_Rel *>::iterator ri =
> + RelocationReferences[SectionName].begin();
> + typename std::vector<Elf_Rel *>::iterator re =
> + RelocationReferences[SectionName].end();
> +
> + // Only relocations that are inside the domain of the atom are
> + // added.
> + for (; ri != re; ri++) {
> + if (((*ri)->r_offset >= (*si)->st_value) &&
> + ((*ri)->r_offset < (*si)->st_value+contentSize)) {
> +
> + ELFReference *eref = new ELFReference (
> + (*ri)->getType(), (*ri)->r_offset-(*si)->st_value,
> + nullptr, (*ri)->getSymbol(), 0);
> +
> + References.push_back(eref);
> + }
> + }
> +
> + ELFDefinedAtom<target_endianness, is64Bits> *NewAtom = new
> + ELFDefinedAtom<target_endianness, is64Bits> (*this,
> SymbolName, SectionName,
> - *si, i.first, SymbolData));
> + *si, i.first, SymbolData,
> + referenceStart, References.size());
> +
> + DefinedAtoms._atoms.push_back(NewAtom);
> + NameToAtomMapping.push_back(NameAtomPair(SymbolName, NewAtom));
> +
> }
> }
> +
> +// All the Atoms and References are created. Now update each Reference's
> +// target with the Atom pointer it refers to.
> + typename std::vector<ELFReference *>::iterator ri = References.begin();
> + typename std::vector<ELFReference *>::iterator eri = References.end();
> +
> + for (; ri != eri; ri++) {
> + StringRef TargetSymbolName = SymbolNames[(*ri)->targetNameOffset()];
> + Atom *target = findAtom (TargetSymbolName);
> + (*ri)->setTarget(target);
> + }
> +
> }
>
> virtual void addAtom(const Atom&) {
> @@ -476,6 +695,15 @@
> return AbsoluteAtoms;
> }
>
> + Atom *findAtom(StringRef name) {
> + for (auto &ci : NameToAtomMapping) {
> + if (ci.name == name)
> + return ci.atom;
> + }
> + return nullptr;
> + }
> +
> +
> private:
> std::unique_ptr<llvm::object::ELFObjectFile<target_endianness, is64Bits> >
> Obj;
> @@ -483,8 +711,20 @@
> atom_collection_vector<UndefinedAtom> UndefinedAtoms;
> atom_collection_vector<SharedLibraryAtom> SharedLibraryAtoms;
> atom_collection_vector<AbsoluteAtom> AbsoluteAtoms;
> +
> +// This contains a list of relocations references. In ELF if a
> +// section named, ".text" that has relocations will also have
> +// a section named ".rel.text" or ".rela.text" which will hold the
> +// entries. -- .rel or .rela is prepended to create the SHT_REL(A) section.
> + std::map<llvm::StringRef, std::vector<Elf_Rela *>> RelocationAddendReferences;
> + std::map<llvm::StringRef, std::vector<Elf_Rel *>> RelocationReferences;
> +
> +// Store symbols in a form that allows access from ELF_Rel->r_info
> + std::vector<StringRef> SymbolNames;
> +
> + std::vector<ELFReference *> References;
> + std::vector<NameAtomPair> NameToAtomMapping;
> llvm::BumpPtrAllocator AtomStorage;
> -
> };
>
> // ReaderELF is reader object that will instantiate correct FileELF
>
- Michael Spencer
More information about the llvm-commits
mailing list