[llvm-commits] ELFReader.cpp update - Add support for References.

Michael Spencer bigcheesegs at gmail.com
Fri Aug 31 22:04:24 PDT 2012


On Fri, Aug 31, 2012 at 12:40 PM, Sid Manning <sidneym at codeaurora.org> wrote:
>
> This patch adds support for collection of References in object files.
>
> The ELFReference class is a derived class of Reference and adds
> a new member, TargetNameOffset.  TargetNameOffset is used to
> reference the index ELF_R_SYM would point to.  These offsets are
> recorded in a vector SymbolNames", added to the ELFFile class.
> A findAtom(StringRef) method was added to the ELFFile class that uses
> the above to locate matching atoms to fill in the Reference's target Atom.
>
> The ELFDefinedAtom class has been extended to take a ReferenceStartIndex
> and a ReferenceEndIndex.  Each Atom that contains references outside
> its own scope will have a, "Reference" stored in the Reference class.
>
> The FileELF method now records the contains of the .rela/rel sections
> and stores those into a map, (sectionName, reloclist[])
>   std::map<llvm::StringRef, std::vector<Elf_Rel *>> RelocationReferences;
>
>
> --
> Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by
> The Linux Foundation

> Index: lib/ReaderWriter/ELF/ReaderELF.cpp
> ===================================================================
> --- lib/ReaderWriter/ELF/ReaderELF.cpp	(revision 162996)
> +++ lib/ReaderWriter/ELF/ReaderELF.cpp	(working copy)
> @@ -14,8 +14,10 @@
>
>  #include "lld/ReaderWriter/ReaderELF.h"
>  #include "lld/Core/File.h"
> +#include "lld/Core/Reference.h"
>
>  #include "llvm/ADT/ArrayRef.h"
> +#include "llvm/ADT/SmallString.h"
>  #include "llvm/ADT/StringRef.h"
>  #include "llvm/Object/ELF.h"
>  #include "llvm/Object/ObjectFile.h"
> @@ -39,7 +41,68 @@
>
>  namespace { // anonymous
>
> +
> +//
> +// Relocation References: Defined Atom may contain
> +// references that will need to be patched before
> +// the executable is written.
> +//
> +class ELFReference : public Reference {
> +public:
> +  ELFReference(Reference::Kind K,
> +               uint64_t O,
> +               const Atom *T,
> +               uint64_t N,
> +               Reference::Addend A)
> +    : Target(T)
> +    , TargetNameOffset(N)
> +    , OffsetInAtom(O)
> +    , Addend(A)
> +    , Kind(K) { }

Why doesn't this just take a tagged union of Elf_Rel_Impl* (for Rel and Rela)
and grab the info from that on a call?

> +
> +  virtual uint64_t offsetInAtom() const {
> +    return OffsetInAtom;
> +  }
> +
> +  virtual Kind kind() const {
> +    return Kind;
> +  }
> +
> +  virtual void setKind(Kind k) {
> +    Kind = k;
> +  }
> +
> +  virtual const Atom* target() const {
> +    return Target;
> +  }
> +
> +  virtual uint64_t targetNameOffset() const {
> +    return TargetNameOffset;
> +  }
> +
> +  virtual Addend addend() const {
> +    return Addend;
> +  }
> +
> +  virtual void setAddend(Addend a) {
> +    Addend = a;
> +  }
> +
> +  virtual void setTarget(const Atom* newAtom) {
> +    Target = newAtom;
> +  }
> +private:
> +  const Atom*  Target;
> +  uint64_t     TargetNameOffset;

TargetNameOffset is a weird way to handle mapping from ELF_R_SYM to Atom*. I
think the best way to do this would be to add a public
Elf_Sym *getSymbol(uint32_t Index)
function to ELFObjectFile. Then add a DenseMap<ElfSym*, Atom*> to go from
ELF_R_SYM to Atom*.

> +  uint64_t     OffsetInAtom;
> +  Addend       Addend;
> +  Kind         Kind;
> +};
> +
> +
> +
>  // This atom class corresponds to absolute symbol
> +template<llvm::support::endianness target_endianness, bool is64Bits>
>  class ELFAbsoluteAtom: public AbsoluteAtom {
>
>  public:
> @@ -126,15 +189,21 @@
>                   llvm::StringRef SN,
>                   const Elf_Sym *E,
>                   const Elf_Shdr *S,
> -                 llvm::ArrayRef<uint8_t> D)
> +                 llvm::ArrayRef<uint8_t> D,
> +                 unsigned int RS,
> +                 unsigned int RE)
> +
>      : OwningFile(F)
>      , SymbolName(N)
>      , SectionName(SN)
>      , Symbol(E)
>      , Section(S)
> -    , ContentData(D) {
> +    , ContentData(D)
> +    , ReferenceStartIndex(RS)
> +    , ReferenceEndIndex(RE) {
>      static uint64_t ordernumber = 0;
> -    _ordinal = ++ordernumber;
> +    Ordinal = ++ordernumber;
> +
>    }
>
>    virtual const class File &file() const {
> @@ -146,7 +215,7 @@
>    }
>
>    virtual uint64_t ordinal() const {
> -    return _ordinal;
> +    return Ordinal;
>    }
>
>    virtual uint64_t size() const {
> @@ -284,31 +353,45 @@
>      return ContentData;
>    }
>
> -  virtual reference_iterator begin() const {
> -    return reference_iterator(*this, nullptr);
> +  DefinedAtom::reference_iterator begin() const {
> +    uintptr_t index = ReferenceStartIndex;
> +    const void* it = reinterpret_cast<const void*>(index);
> +    return reference_iterator(*this, it);
>    }
>
> -  virtual reference_iterator end() const {
> -    return reference_iterator(*this, nullptr);
> +  DefinedAtom::reference_iterator end() const {
> +    uintptr_t index = ReferenceEndIndex;
> +    const void* it = reinterpret_cast<const void*>(index);
> +    return reference_iterator(*this, it);
>    }
>
> -private:
> -  virtual const Reference *derefIterator(const void *iter) const {
> +  const Reference* derefIterator(const void* it) const {
> +    uintptr_t index = reinterpret_cast<uintptr_t>(it);
> +    assert(index >= ReferenceStartIndex);
> +    assert(index < ReferenceEndIndex);
>      return nullptr;
>    }
> -  virtual void incrementIterator(const void *&iter) const {
> +
> +  void incrementIterator(const void*& it) const {
> +    uintptr_t index = reinterpret_cast<uintptr_t>(it);
> +    ++index;
> +    it = reinterpret_cast<const void*>(index);
>    }
>
> +private:
> +
>    const File &OwningFile;
>    llvm::StringRef SymbolName;
>    llvm::StringRef SectionName;
>    const Elf_Sym *Symbol;
>    const Elf_Shdr *Section;
>
> -  // ContentData will hold the bits that make up the atom.
> +  //  ContentData will hold the bits that make up the atom.
>    llvm::ArrayRef<uint8_t> ContentData;
>
> -  uint64_t _ordinal;
> +  uint64_t Ordinal;
> +  unsigned int ReferenceStartIndex;
> +  unsigned int ReferenceEndIndex;
>  };
>
>
> @@ -318,9 +401,23 @@
>  template<llvm::support::endianness target_endianness, bool is64Bits>
>  class FileELF: public File {
>
> -  typedef llvm::object::Elf_Sym_Impl<target_endianness, is64Bits> Elf_Sym;
> -  typedef llvm::object::Elf_Shdr_Impl<target_endianness, is64Bits> Elf_Shdr;
> +  typedef llvm::object::Elf_Sym_Impl
> +                        <target_endianness, is64Bits> Elf_Sym;
> +  typedef llvm::object::Elf_Shdr_Impl
> +                        <target_endianness, is64Bits> Elf_Shdr;
> +  typedef llvm::object::Elf_Rel_Impl
> +                        <target_endianness, is64Bits, false> Elf_Rel;
> +  typedef llvm::object::Elf_Rel_Impl
> +                        <target_endianness, is64Bits, true> Elf_Rela;
>
> +  struct NameAtomPair {
> +                 NameAtomPair(StringRef N,
> +                              Atom *A)
> +                   : name(N), atom(A) {}
> +    StringRef name;
> +    Atom *atom;
> +  };
> +

Use std::pair.

>  public:
>    FileELF(std::unique_ptr<llvm::MemoryBuffer> MB, llvm::error_code &EC) :
>            File(MB->getBufferIdentifier()) {
> @@ -343,39 +440,105 @@
>
>      std::map< const Elf_Shdr *, std::vector<const Elf_Sym *>> SectionSymbols;
>
> +//  Handle: SHT_REL and SHT_RELA sections:
> +//  Increment over the sections, when REL/RELA section types are
> +//  found add the contents to the RelocationReferences map.
> +
> +    llvm::object::section_iterator sit(Obj->begin_sections());
> +    llvm::object::section_iterator sie(Obj->end_sections());
> +    for (; sit != sie; sit.increment(EC)) {
> +      if (EC)
> +        return;
> +
> +      const Elf_Shdr *section = Obj->getElfSection(sit);
> +
> +      if (section->sh_type == llvm::ELF::SHT_RELA)
> +      {

No new line before {

> +        StringRef contents;
> +        if ((EC = Obj->getSectionContents(section, contents)))
> +          return;
> +
> +        llvm::StringRef sectionName;
> +        if ((EC = Obj->getSectionName(section, sectionName)))
> +          return;
> +
> +        Elf_Rela *relocs = const_cast<Elf_Rela *>
> +                    (reinterpret_cast<const Elf_Rela *>(contents.data()));
> +
> +        // Get rid of the leading .rela so Atoms can use their own section
> +        // name to find the relocs.
> +        sectionName = sectionName.drop_front(5);
> +        for (unsigned int i=0; i<contents.size()/sizeof(Elf_Rela); i++) {

This is incorrect. The distance between relocations is defined by
Elf_Shdr::st_entsize. This type of iteration should probably be added to
ELFObjectFile.

> +          RelocationAddendReferences[sectionName].push_back(relocs+i);

A reference to RelocationAddendReferences[sectionName] should be
hoisted outside of the loop.

> +        }
> +
> +      }
> +
> +      if (section->sh_type == llvm::ELF::SHT_REL)
> +      {
> +        StringRef contents;
> +        if ((EC = Obj->getSectionContents(section, contents)))
> +          return;
> +
> +        llvm::StringRef sectionName;
> +        if ((EC = Obj->getSectionName(section, sectionName)))
> +          return;
> +
> +        Elf_Rel *relocs = const_cast<Elf_Rel *>
> +                    (reinterpret_cast<const Elf_Rel *>(contents.data()));
> +
> +        // Get rid of the leading .rel so Atoms can use their own section
> +        // name to find the relocs.
> +        sectionName = sectionName.drop_front(4);
> +        for (unsigned int i=0; i<contents.size()/sizeof(Elf_Rel); i++) {
> +          RelocationReferences[sectionName].push_back(relocs+i);
> +        }
> +
> +      }
> +    }
> +
> +//  Increment over all the symbols collecting atoms and symbol
> +//  names for later use.
> +
> +    SymbolNames.push_back("\0"); // ELF: Entry 0 default NULL
>      llvm::object::symbol_iterator it(Obj->begin_symbols());
>      llvm::object::symbol_iterator ie(Obj->end_symbols());
>
>      for (; it != ie; it.increment(EC)) {
>        if (EC)
>          return;
> -      llvm::object::SectionRef SR;
> -      llvm::object::section_iterator section(SR);
>
> -      if ((EC = it->getSection(section)))
> +      if ((EC = it->getSection(sit)))
>          return;
>
> -      const Elf_Shdr *Section = Obj->getElfSection(section);
> +      const Elf_Shdr *Section = Obj->getElfSection(sit);
>        const Elf_Sym  *Symbol  = Obj->getElfSymbol(it);
>
>        llvm::StringRef SymbolName;
>        if ((EC = Obj->getSymbolName(Section, Symbol, SymbolName)))
>          return;
>
> +      // Push the symbol names to a vector for easy dereferencing via
> +      SymbolNames.push_back(SymbolName);
> +
>        if (Symbol->st_shndx == llvm::ELF::SHN_ABS) {
>          // Create an absolute atom.
> -        AbsoluteAtoms._atoms.push_back(
> -                             new (AtomStorage.Allocate<ELFAbsoluteAtom> ())
> -                             ELFAbsoluteAtom(*this, SymbolName,
> -                                             Symbol->st_value));
> +        ELFAbsoluteAtom<target_endianness, is64Bits> *NewAtom = new
> +        ELFAbsoluteAtom<target_endianness, is64Bits> (*this,
> +                                                      SymbolName,
> +                                                      Symbol->st_value);

Why is this not using placement new in AtomStorage? Also, you can use auto here.
Same for the rest of these.

> +        AbsoluteAtoms._atoms.push_back(NewAtom);
> +        NameToAtomMapping.push_back(NameAtomPair(SymbolName, NewAtom));
>
>        } else if (Symbol->st_shndx == llvm::ELF::SHN_UNDEF) {
>          // Create an undefined atom.
> -        UndefinedAtoms._atoms.push_back(
> -            new (AtomStorage.Allocate<ELFUndefinedAtom<
> -                 target_endianness, is64Bits>>())
> -                 ELFUndefinedAtom<target_endianness, is64Bits> (
> -                                 *this, SymbolName, Symbol));
> +        ELFUndefinedAtom<target_endianness, is64Bits> *NewAtom = new
> +        ELFUndefinedAtom<target_endianness, is64Bits> (*this,
> +                                                       SymbolName, Symbol);
> +
> +        UndefinedAtoms._atoms.push_back(NewAtom);
> +        NameToAtomMapping.push_back(NameAtomPair(SymbolName, NewAtom));
> +
>        } else {
>          // This is actually a defined symbol. Add it to its section's list of
>          // symbols.
> @@ -432,28 +595,84 @@
>
>          // Get the symbol's content:
>          llvm::ArrayRef<uint8_t> SymbolData;
> +        uint64_t contentSize;
>          if (si + 1 == se) {
>            // if this is the last symbol, take up the remaining data.
> -          SymbolData = llvm::ArrayRef<uint8_t>((uint8_t *)symbolContents.data()
> -                                    + (*si)->st_value,
> -                                    (IsCommon) ? 0 :
> -                                    ((i.first)->sh_size - (*si)->st_value));
> +          contentSize = (IsCommon) ? 0 : ((i.first)->sh_size - (*si)->st_value);
>          }
>          else {
> -          SymbolData = llvm::ArrayRef<uint8_t>((uint8_t *)symbolContents.data()
> -                                    + (*si)->st_value,
> -                                    (IsCommon) ? 0 :
> -                                    (*(si + 1))->st_value - (*si)->st_value);
> +          contentSize = (IsCommon) ? 0 : (*(si + 1))->st_value - (*si)->st_value;
>          }
>
> -        DefinedAtoms._atoms.push_back(
> -          new (AtomStorage.Allocate<ELFDefinedAtom<
> -               target_endianness, is64Bits> > ())
> -               ELFDefinedAtom<target_endianness, is64Bits> (*this,
> +        SymbolData = llvm::ArrayRef<uint8_t>((uint8_t *)symbolContents.data()
> +                                    + (*si)->st_value, contentSize);
> +
> +
> +        unsigned int referenceStart = References.size();
> +
> +        // Make Elf_Rela references
> +        typename std::vector<Elf_Rela *>::iterator rai  =
> +                      RelocationAddendReferences[SectionName].begin();
> +        typename std::vector<Elf_Rela *>::iterator rae =
> +                      RelocationAddendReferences[SectionName].end();
> +
> +        // Only relocations that are inside the domain of the atom are
> +        // added.
> +        for (; rai != rae; rai++) {

This can use a c++11 for loop.

> +          if (((*rai)->r_offset >= (*si)->st_value) &&
> +              ((*rai)->r_offset < (*si)->st_value+contentSize)) {
> +
> +            ELFReference *eref = new ELFReference (
> +                (*rai)->getType(), (*rai)->r_offset-(*si)->st_value,
> +                nullptr, (*rai)->getSymbol(), (*rai)->r_addend);

This should use a BumpPtrAllocator too. Currently it's just leaked.

> +
> +            References.push_back(eref);
> +          }
> +        }
> +
> +        // Make Elf_Rel references
> +        typename std::vector<Elf_Rel *>::iterator ri  =
> +                      RelocationReferences[SectionName].begin();
> +        typename std::vector<Elf_Rel *>::iterator re =
> +                      RelocationReferences[SectionName].end();
> +
> +        // Only relocations that are inside the domain of the atom are
> +        // added.
> +        for (; ri != re; ri++) {
> +          if (((*ri)->r_offset >= (*si)->st_value) &&
> +              ((*ri)->r_offset < (*si)->st_value+contentSize)) {
> +
> +            ELFReference *eref = new ELFReference (
> +                (*ri)->getType(), (*ri)->r_offset-(*si)->st_value,
> +                nullptr, (*ri)->getSymbol(), 0);
> +
> +            References.push_back(eref);
> +          }
> +        }
> +
> +        ELFDefinedAtom<target_endianness, is64Bits> *NewAtom = new
> +        ELFDefinedAtom<target_endianness, is64Bits> (*this,
>                               SymbolName, SectionName,
> -                             *si, i.first, SymbolData));
> +                             *si, i.first, SymbolData,
> +                             referenceStart, References.size());
> +
> +        DefinedAtoms._atoms.push_back(NewAtom);
> +        NameToAtomMapping.push_back(NameAtomPair(SymbolName, NewAtom));
> +
>        }
>      }
> +
> +// All the Atoms and References are created.  Now update each Reference's
> +// target with the Atom pointer it refers to.
> +    typename std::vector<ELFReference *>::iterator ri  = References.begin();
> +    typename std::vector<ELFReference *>::iterator eri = References.end();
> +
> +    for (; ri != eri; ri++) {
> +      StringRef TargetSymbolName = SymbolNames[(*ri)->targetNameOffset()];
> +      Atom *target = findAtom (TargetSymbolName);
> +      (*ri)->setTarget(target);
> +    }
> +
>    }
>
>    virtual void addAtom(const Atom&) {
> @@ -476,6 +695,15 @@
>      return AbsoluteAtoms;
>    }
>
> +  Atom *findAtom(StringRef name) {
> +    for (auto &ci : NameToAtomMapping) {
> +      if (ci.name == name)
> +        return ci.atom;
> +    }
> +    return nullptr;
> +  }
> +
> +
>  private:
>    std::unique_ptr<llvm::object::ELFObjectFile<target_endianness, is64Bits> >
>        Obj;
> @@ -483,8 +711,20 @@
>    atom_collection_vector<UndefinedAtom>     UndefinedAtoms;
>    atom_collection_vector<SharedLibraryAtom> SharedLibraryAtoms;
>    atom_collection_vector<AbsoluteAtom>      AbsoluteAtoms;
> +
> +// This contains a list of relocations references.  In ELF if a
> +// section named, ".text" that has relocations will also have
> +// a section named ".rel.text" or ".rela.text" which will hold the
> +// entries. -- .rel or .rela is prepended to create the SHT_REL(A) section.
> +  std::map<llvm::StringRef, std::vector<Elf_Rela *>> RelocationAddendReferences;
> +  std::map<llvm::StringRef, std::vector<Elf_Rel *>> RelocationReferences;
> +
> +// Store symbols in a form that allows access from ELF_Rel->r_info
> +  std::vector<StringRef> SymbolNames;
> +
> +  std::vector<ELFReference *> References;
> +  std::vector<NameAtomPair> NameToAtomMapping;
>    llvm::BumpPtrAllocator AtomStorage;
> -
>  };
>
>  //  ReaderELF is reader object that will instantiate correct FileELF
>

- Michael Spencer



More information about the llvm-commits mailing list