> <span style="font-family:arial,sans-serif;font-size:13px;background-color:rgb(255,255,255)">Not that your patch should address this, but we need proper diagnostics.</span><div><font face="arial, sans-serif"><br></font></div>
<div><font face="arial, sans-serif">I think a model based on Clang DiagnosticsEngine and DiagnosticClient + TableGen is probably a good way to go (maybe starting from scratch we could make the TableGen part of it a bit more structured with less repetition).</font></div>
<div><font face="arial, sans-serif"><br></font></div><div><font face="arial, sans-serif">--Sean Silva<br></font><br><div class="gmail_quote">On Wed, Jun 13, 2012 at 10:45 PM, Michael Spencer <span dir="ltr"><<a href="mailto:bigcheesegs@gmail.com" target="_blank">bigcheesegs@gmail.com</a>></span> wrote:<br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div class="im">On Wed, Jun 13, 2012 at 5:07 PM, Hemant Kulkarni <<a href="mailto:khemant@codeaurora.org">khemant@codeaurora.org</a>> wrote:<br>

</div><div><div class="h5">> Hello Everyone ,<br>
><br>
><br>
><br>
> I am working to get ELF port in LLD linker. I am attaching a patch that<br>
> fills in basic sub-classes in the ReaderELF.  I request feedback for this<br>
> code.  I also want suggestions for ways to test this Reader as I notice<br>
> there is no way right now to test COFF reader as well.<br>
><br>
><br>
><br>
> The ReaderELF will read an object file and instantiate an ELFObjectFile<br>
> object and fill in the atom subclasses that correspond for ELF.<br>
><br>
><br>
><br>
><br>
><br>
> --<br>
><br>
> Hemant Kulkarni<br>
><br>
> <a href="mailto:khemant@codeaurora.org">khemant@codeaurora.org</a><br>
><br>
> Qualcomm Innovation Center, Inc is a member of Code Aurora Forum<br>
<br>
</div></div>The major issue I see is that the generic object API is too high<br>
level. You should be using the ELF specific API that gives you access<br>
to the raw symbol and section structures as the COFF reader does. You<br>
should also take a look at the llvm style guide.<br>
<br>
As for testing we need a yaml2obj for ELF. Although I'm fine with<br>
testing with binary files temporarily.<br>
<br>
Additional comments inline.<br>
<br>
diff --git a/lib/ReaderWriter/ELF/ReaderELF.cpp<br>
b/lib/ReaderWriter/ELF/ReaderELF.cpp<br>
index ba23990..f4bd52d 100644<br>
--- a/lib/ReaderWriter/ELF/ReaderELF.cpp<br>
+++ b/lib/ReaderWriter/ELF/ReaderELF.cpp<br>
@@ -1,41 +1,456 @@<br>
-//===- lib/ReaderWriter/ELF/ReaderELF.cpp --------------------------------===//<br>
+//===- lib/ReaderWriter/ELF/ReaderELF.cpp<br>
---------------------------------===//<br>
 //<br>
 //                             The LLVM Linker<br>
 //<br>
 // This file is distributed under the University of Illinois Open Source<br>
 // License. See LICENSE.TXT for details.<br>
 //<br>
-//===----------------------------------------------------------------------===//<br>
+//===---------------------------------------------------------------------===//<br>
+//<br>
+// This file contains the ELF Reader and all helper sub classes<br>
+// to consume an ELF file and produces atoms out of it.<br>
+//<br>
+//===---------------------------------------------------------------------===//<br>
<br>
 #include "lld/ReaderWriter/ReaderELF.h"<br>
 #include "lld/Core/File.h"<br>
<br>
+#include "llvm/ADT/ArrayRef.h"<br>
+#include "llvm/ADT/StringRef.h"<br>
+#include "llvm/Support/Allocator.h"<br>
 #include "llvm/Support/Casting.h"<br>
 #include "llvm/Support/ErrorHandling.h"<br>
 #include "llvm/Support/Memory.h"<br>
 #include "llvm/Support/MemoryBuffer.h"<br>
 #include "llvm/Support/raw_ostream.h"<br>
 #include "llvm/Support/system_error.h"<br>
+#include "llvm/Support/Endian.h"<br>
+#include "llvm/Support/MathExtras.h"<br>
+#include "llvm/Object/ObjectFile.h"<br>
+#include "llvm/Object/ELF.h"<br>
+#include "llvm/Support/ELF.h"<br>
<br>
 #include <map><br>
 #include <vector><br>
<br>
+using namespace lld;<br>
<br>
-namespace lld {<br>
+//     This atom class corresponds to absolute symbol<br>
<br>
-ReaderOptionsELF::ReaderOptionsELF() {<br>
-}<br>
+class ELFAbsoluteAtom: public AbsoluteAtom {<br>
+public:<br>
+       ELFAbsoluteAtom(const File &F, llvm::StringRef N, uint64_t V) :<br>
+               OwningFile(F), Name(N), Value(V) {<br>
+       }<br>
<br>
-ReaderOptionsELF::~ReaderOptionsELF() {<br>
-}<br>
+       virtual const class File& file() const {<br>
+               return OwningFile;<br>
+       }<br>
<br>
+       virtual llvm::StringRef name() const {<br>
+               return Name;<br>
+       }<br>
<br>
+       virtual uint64_t value() const {<br>
+               return Value;<br>
+       }<br>
<br>
-Reader* createReaderELF(const ReaderOptionsELF &options) {<br>
-  assert(0 && "ELF Reader not yet implemented");<br>
-  return nullptr;<br>
-}<br>
+private:<br>
+       const File &OwningFile;<br>
+       llvm::StringRef Name;<br>
+       uint64_t Value;<br>
+};<br>
+<br>
+//     This atom corresponds to undefined symbols.<br>
+class ELFUndefinedAtom: public UndefinedAtom {<br>
+public:<br>
+       ELFUndefinedAtom(const File &F, llvm::StringRef N,<br>
+                       llvm::object::symbol_iterator Symb) :<br>
+               OwningFile(F), Name(N), Symbol(Symb) {<br>
+       }<br>
+<br>
+       virtual const class File& file() const {<br>
+               return OwningFile;<br>
+       }<br>
+<br>
+       virtual llvm::StringRef name() const {<br>
+               return Name;<br>
+       }<br>
+<br>
+       //       FIXME What distinguishes a symbol in ELF that can help<br>
+       //       decide if the symbol is undefined only during build and not<br>
+       //       runtime? This will make us choose canBeNullAtBuildtime and<br>
+       //       canBeNullAtRuntime<br>
+       //<br>
+       virtual CanBeNull canBeNull() const {<br>
+               uint32_t Result;<br>
+<br>
+               Symbol->getFlags(Result);<br>
+               if (Result == llvm::object::SymbolRef::SF_Weak)<br>
+                       return CanBeNull::canBeNullAtBuildtime;<br>
+               else<br>
+                       return CanBeNull::canBeNullNever;<br>
+       }<br>
+<br>
+private:<br>
+       const File &OwningFile;<br>
+       llvm::StringRef Name;<br>
+       llvm::object::symbol_iterator Symbol;<br>
+};<br>
+<br>
+template<llvm::support::endianness target_endianness, bool is64Bits><br>
+class ELFDefinedAtom: public DefinedAtom {<br>
+public:<br>
+       ELFDefinedAtom(const File &F, llvm::StringRef N,<br>
+                       llvm::object::symbol_iterator Symb,<br>
+                       llvm::object::section_iterator Sec, llvm::ArrayRef<uint8_t> D) :<br>
<br>
This should be using the Object/ELF.h interface instead of the generic<br>
interface. This would solve a lot of the ambiguity issues below.<br>
<br>
+               OwningFile(F), Name(N), Symbol(Symb), Section(Sec), Data(D) {<br>
+               static uint64_t ordernumber = 0;<br>
+               _ordinal = ++ordernumber;<br>
+       }<br>
+<br>
+       virtual const class File& file() const {<br>
+               return OwningFile;<br>
+       }<br>
+<br>
+       virtual llvm::StringRef name() const {<br>
+               return Name;<br>
+       }<br>
+<br>
+       virtual uint64_t ordinal() const {<br>
+               return _ordinal;<br>
+       }<br>
+<br>
+       virtual uint64_t size() const {<br>
+               uint64_t size;<br>
+               Symbol->getSize(size);<br>
<br>
This might not always be true. We need to make sure that all the data<br>
in a section is covered by an atom.<br>
<br>
+               return size;<br>
+       }<br>
+<br>
+       //       FIXME  We also need to find visibility<br>
+       //       This will help us determine scopeLinkageUnit<br>
+       //       we also need to incorporate other visibilities<br>
+<br>
+<br>
+       virtual Scope scope() const {<br>
+               uint32_t Result;<br>
+               Symbol->getFlags(Result);<br>
+               if (Result == llvm::object::SymbolRef::SF_Global)<br>
+                       return scopeGlobal;<br>
+               else<br>
+                       return scopeTranslationUnit;<br>
+       }<br>
+<br>
+       //       FIXME   need to revisit this in future<br>
+<br>
+       virtual Interposable interposable() const {<br>
+               return interposeNo;<br>
+       }<br>
+<br>
+       //      FIXME What ways can we determine this in ELF?<br>
+<br>
+       virtual Merge merge() const {<br>
+               return mergeNo;<br>
+       }<br>
+<br>
+       virtual ContentType contentType() const {<br>
+               ContentType type;<br>
+               bool Result = false;<br>
+               llvm::error_code ec;<br>
+               ec = Section->isText(Result);<br>
+               if (!ec && Result)<br>
+                       return typeCode;<br>
+<br>
+               ec = Section->isData(Result);<br>
+               if (!ec && Result)<br>
+                       return typeData;<br>
+<br>
+//              The ZeroFill type is a bit ambiguous in case of ELF<br>
+//              Should this mean it occupies no space in the disk image?<br>
+//              A .bss is such section<br>
<br>
This would be easier to figure out using the Object/ELF.h interface.<br>
<br>
+<br>
+<br>
+               ec = Section->isBSS(Result);<br>
+               if (!ec && Result)<br>
+                       return typeZeroFill;<br>
+<br>
+               return typeUnknown;<br>
+       }<br>
+<br>
+       virtual Alignment alignment() const {<br>
+               uint64_t Result;<br>
+               llvm::error_code ec;<br>
+               ec = Section->getAlignment(Result);<br>
+               if (ec)<br>
+                       return Alignment(-1);<br>
+               return Alignment(llvm::Log2_64(Result));<br>
+<br>
+       }<br>
+<br>
+       virtual SectionChoice sectionChoice() const {<br>
+               return sectionBasedOnContent;<br>
+       }<br>
+<br>
+       virtual llvm::StringRef customSectionName() const {<br>
+               return "";<br>
+       }<br>
+<br>
+       virtual DeadStripKind deadStrip() const {<br>
+               return deadStripNormal;<br>
+       }<br>
+<br>
+       //        This does not cover the possibility of the initial<br>
+       //        write permissions for PLT stubs<br>
+       //        The API available right now is an ugly way of doing it.<br>
+       //        We  need a way to explicitly<br>
+       //        query the SHF_* attributes of a section.<br>
<br>
Same issue as above.<br>
<br>
+<br>
+       virtual ContentPermissions permissions() const {<br>
+               char Result;<br>
+               Symbol->getNMTypeChar(Result);<br>
+               if (Result == 'r')<br>
+                       return permR__;<br>
+               else if (Result == 'd')<br>
+                       return permRW_;<br>
+               else if (Result == 't')<br>
+                       return permR_X;<br>
+               else<br>
+                       return perm___;<br>
+<br>
+       }<br>
+<br>
+       //       Many non ARM architectures use ELF file format<br>
+       //       This not really a place to put a architecture<br>
+       //       specific method in an atom. A better approach is<br>
+       //       needed.<br>
+       //<br>
<br>
Agreed.<br>
<br>
+       virtual bool isThumb() const {<br>
+               return false;<br>
+       }<br>
+<br>
+       //      FIXME Not Sure if ELF supports alias atoms. Find out more.<br>
+       virtual bool isAlias() const {<br>
+               return false;<br>
+       }<br>
+<br>
+       virtual llvm::ArrayRef<uint8_t> rawContent() const {<br>
+               return Data;<br>
+       }<br>
<br>
+       virtual reference_iterator begin() const {<br>
+               return reference_iterator(*this, nullptr);<br>
+       }<br>
<br>
-} // namespace<br>
+       virtual reference_iterator end() const {<br>
+               return reference_iterator(*this, nullptr);<br>
+       }<br>
<br>
+private:<br>
+       virtual const Reference* derefIterator(const void* iter) const {<br>
+               return nullptr;<br>
+       }<br>
+       virtual void incrementIterator(const void*& iter) const {<br>
+       }<br>
+<br>
+       const File &OwningFile;<br>
+       llvm::StringRef Name;<br>
+       const llvm::object::symbol_iterator Symbol;<br>
+       const llvm::object::section_iterator Section;<br>
+       llvm::ArrayRef<uint8_t> Data;<br>
+       uint64_t _ordinal;<br>
+};<br>
+<br>
+//      FileELF will read a binary, find out based on the symbol table contents<br>
+//      what kind of symbol it is and create corresponding atoms for it<br>
+<br>
+template<llvm::support::endianness target_endianness, bool is64Bits><br>
+class FileELF: public File {<br>
+public:<br>
+       FileELF(std::unique_ptr<llvm::MemoryBuffer> MB, llvm::error_code &EC) :<br>
+               File(MB->getBufferIdentifier()) {<br>
+               llvm::OwningPtr<llvm::object::Binary> Bin;<br>
+               EC = llvm::object::createBinary(MB.release(), Bin);<br>
+               if (EC)<br>
+                       return;<br>
+               //               Point Obj to correct class and bitwidth ELF object<br>
+<br>
+<br>
+               Obj.reset(llvm::dyn_cast<llvm::object::ELFObjectFile<target_endianness,<br>
+                               is64Bits> >(Bin.get()));<br>
+<br>
+               if (!Obj) {<br>
+                       EC = make_error_code(llvm::object::object_error::invalid_file_type);<br>
+                       return;<br>
+               }<br>
+               Bin.take();<br>
+<br>
+               //               Now we need to iterate over symtab  and<br>
+               //               get all the symbols<br>
+               //               convert these symbols to atoms<br>
+               //               FIXME what about dynamic symbols?<br>
<br>
I believe the Object/ELF.h interface has a way to do this. It was added for<br>
RuntimeDyld.<br>
<br>
+<br>
+<br>
+               llvm::object::symbol_iterator si(Obj->begin_symbols());<br>
+               llvm::object::symbol_iterator se(Obj->end_symbols());<br>
+<br>
+               for (; si != se; si.increment(EC)) {<br>
+                       if (EC)<br>
+                               llvm::report_fatal_error("Could not read all symbols");<br>
<br>
Not that your patch should address this, but we need proper diagnostics.<br>
<br>
+                       llvm::StringRef Name;<br>
+                       llvm::object::SymbolRef::Type type;<br>
+                       uint32_t flags = 0;<br>
+<br>
+                       EC = si->getName(Name);<br>
+                       if (EC)<br>
+                               llvm::report_fatal_error("Could not get symbol name");<br>
+<br>
+                       EC = si->getType(type);<br>
+                       if (EC)<br>
+                               llvm::report_fatal_error("Could not get symbol type");<br>
+<br>
+                       EC = si->getFlags(flags);<br>
+                       if (EC)<br>
+                               llvm::report_fatal_error("Could not get symbol flags");<br>
+<br>
+                       if (flags & llvm::object::SymbolRef::SF_Absolute<br>
+                                       == llvm::object::SymbolRef::SF_Absolute) {<br>
+                               uint64_t AbsValue;<br>
+<br>
+                               EC = si->getAddress(AbsValue);<br>
+                               if (EC)<br>
+                                       llvm::report_fatal_error(<br>
+                                                       "Could not get absolute symbol st_value");<br>
+<br>
+                               AbsoluteAtoms._atoms.push_back(new (AtomStorage.Allocate<<br>
+                                               ELFAbsoluteAtom>())ELFAbsoluteAtom(*this, Name,<br>
+                                               AbsValue));<br>
+                       } else if (flags & llvm::object::SymbolRef::SF_Undefined<br>
+                                       == llvm::object::SymbolRef::SF_Undefined) {<br>
+                               UndefinedAtoms._atoms.push_back(new (AtomStorage.Allocate<<br>
+                               ELFUndefinedAtom>()) ELFUndefinedAtom(*this, Name, si));<br>
+<br>
+                       } else {<br>
+                               bool Result;<br>
+                               //FIXME need to figure out a way to handle common type<br>
+                               //symbols. For now we abort<br>
+<br>
+                               EC = si->isCommon(Result);<br>
+                               assert(!Result && "STT_COMMON type is not yet implemented");<br>
+                               //TODO We need to think about how to put other<br>
+                               //symbol types into the defined atom model.<br>
<br>
This is a major point. We need a better way to handle different types of atoms<br>
then just the predefined 4.<br>
<br>
+<br>
+<br>
+                               if (type == llvm::object::SymbolRef::ST_Data || type<br>
+                                               == llvm::object::SymbolRef::ST_Function) {<br>
+<br>
+                                       llvm::object::SectionRef SR;<br>
+                                       llvm::object::section_iterator section(SR);<br>
+<br>
+                                       EC = si->getSection(section);<br>
+                                       if (EC)<br>
+                                               return;<br>
+                                       //FIXME We need an API to get the actual st_value<br>
+                                       //for now shorting this array reference to<br>
+                                       //suppress errors during build time.<br>
+                                       //Need to fix this<br>
<br>
We have this in Object/ELF.h.<br>
<br>
+<br>
+                                       llvm::ArrayRef<uint8_t> val;<br>
+                                       DefinedAtoms._atoms.push_back(<br>
+                                               new (AtomStorage.Allocate<ELFDefinedAtom<<br>
+                                                       target_endianness, is64Bits> >()) ELFDefinedAtom<<br>
+                                                       target_endianness, is64Bits>(*this, Name,<br>
+                                                       si, section, val));<br>
+<br>
+                               }<br>
+                       }<br>
+               }<br>
+       }<br>
+<br>
+       virtual void addAtom(const Atom&) {<br>
+               llvm_unreachable("cannot add atoms to native .o files");<br>
+       }<br>
+<br>
+       virtual const atom_collection<DefinedAtom> &defined() const {<br>
+               return DefinedAtoms;<br>
+       }<br>
+<br>
+       virtual const atom_collection<UndefinedAtom> &undefined() const {<br>
+               return UndefinedAtoms;<br>
+       }<br>
+<br>
+       virtual const atom_collection<SharedLibraryAtom> &sharedLibrary() const {<br>
+               return SharedLibraryAtoms;<br>
+       }<br>
+<br>
+       virtual const atom_collection<AbsoluteAtom> &absolute() const {<br>
+               return AbsoluteAtoms;<br>
+       }<br>
+<br>
+private:<br>
+       std::unique_ptr<llvm::object::ELFObjectFile<target_endianness, is64Bits> ><br>
+                       Obj;<br>
+       atom_collection_vector<DefinedAtom> DefinedAtoms;<br>
+       atom_collection_vector<UndefinedAtom> UndefinedAtoms;<br>
+       atom_collection_vector<SharedLibraryAtom> SharedLibraryAtoms;<br>
+       atom_collection_vector<AbsoluteAtom> AbsoluteAtoms;<br>
+       llvm::BumpPtrAllocator AtomStorage;<br>
+<br>
+};<br>
+<br>
+//     ReaderELF is reader object that will instantiate correct FileELF<br>
+//     by examining the memory buffer for ELF class and bitwidth<br>
+<br>
+class ReaderELF: public Reader {<br>
+public:<br>
+       ReaderELF(const ReaderOptionsELF &options) :<br>
+               _options(options) {<br>
+       }<br>
+       error_code parseFile(std::unique_ptr<MemoryBuffer> mb, std::vector<<br>
+                       std::unique_ptr<File> > &result) {<br>
+<br>
+               std::pair<unsigned char, unsigned char> Ident =<br>
+                               llvm::object::getElfArchType(&*mb);<br>
+               llvm::error_code ec;<br>
+               //              Instantiate the correct FileELF template instance<br>
+               //              based on the Ident pair. Once the File is created<br>
+               //               we push the file to the vector of files already<br>
+               //               created during parser's life.<br>
+<br>
+               std::unique_ptr<File> f;<br>
+               if (Ident.first == llvm::ELF::ELFCLASS32 && Ident.second<br>
+                               == llvm::ELF::ELFDATA2LSB) {<br>
+                       f.reset(new FileELF<llvm::support::little, false> (std::move(mb),<br>
+                                       ec));<br>
+               } else if (Ident.first == llvm::ELF::ELFCLASS32 && Ident.second<br>
+                               == llvm::ELF::ELFDATA2MSB) {<br>
+                       f.reset(new FileELF<llvm::support::big, false> (std::move(mb), ec));<br>
+               } else if (Ident.first == llvm::ELF::ELFCLASS64 && Ident.second<br>
+                               == llvm::ELF::ELFDATA2MSB) {<br>
+                       f.reset(new FileELF<llvm::support::big, true> (std::move(mb), ec));<br>
+               } else if (Ident.first == llvm::ELF::ELFCLASS64 && Ident.second<br>
+                               == llvm::ELF::ELFDATA2LSB) {<br>
+                       f.reset(new FileELF<llvm::support::little, true>(std::move(mb),<br>
+                                       ec));<br>
+               }<br>
+               if (ec)<br>
+                       return ec;<br>
+               result.push_back(std::move(f));<br>
+               return error_code::success();<br>
+       }<br>
+private:<br>
+       const ReaderOptionsELF &_options;<br>
+};<br>
+<br>
+ReaderOptionsELF::ReaderOptionsELF() {<br>
+}<br>
+<br>
+ReaderOptionsELF::~ReaderOptionsELF() {<br>
+}<br>
+<br>
+namespace lld {<br>
+//This reader is not complete so I wont "enable" the knob<br>
+       Reader* createReaderELF(const ReaderOptionsELF &options) {<br>
+               assert(0 && "ELF Reader not yet implemented");<br>
<br>
It's fine to enable it. And we use llvm_unreachable instead of assert 0.<br>
<br>
+               return new ReaderELF(options);<br>
+       }<br>
+} // namespace lld<br>
<br>
Thanks for working on this. I think with using the ELF api and style<br>
fixes this would be good.<br>
<span class="HOEnZb"><font color="#888888"><br>
- Michael Spencer<br>
</font></span><div class="HOEnZb"><div class="h5"><br>
_______________________________________________<br>
llvm-commits mailing list<br>
<a href="mailto:llvm-commits@cs.uiuc.edu">llvm-commits@cs.uiuc.edu</a><br>
<a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits" target="_blank">http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits</a><br>
</div></div></blockquote></div><br></div>