[llvm-commits] LLD PATCH: add ELF reader basic skeleton code

Michael Spencer bigcheesegs at gmail.com
Wed Jun 13 22:45:45 PDT 2012


On Wed, Jun 13, 2012 at 5:07 PM, Hemant Kulkarni <khemant at codeaurora.org> wrote:
> Hello Everyone ,
>
>
>
> I am working to get ELF port in LLD linker. I am attaching a patch that
> fills in basic sub-classes in the ReaderELF.  I request feedback for this
> code.  I also want suggestions for ways to test this Reader as I notice
> there is no way right now to test COFF reader as well.
>
>
>
> The ReaderELF will read an object file and instantiate an ELFObjectFile
> object and fill in the atom subclasses that correspond for ELF.
>
>
>
>
>
> --
>
> Hemant Kulkarni
>
> khemant at codeaurora.org
>
> Qualcomm Innovation Center, Inc is a member of Code Aurora Forum

The major issue I see is that the generic object API is too high
level. You should be using the ELF specific API that gives you access
to the raw symbol and section structures as the COFF reader does. You
should also take a look at the llvm style guide.

As for testing we need a yaml2obj for ELF. Although I'm fine with
testing with binary files temporarily.

Additional comments inline.

diff --git a/lib/ReaderWriter/ELF/ReaderELF.cpp
b/lib/ReaderWriter/ELF/ReaderELF.cpp
index ba23990..f4bd52d 100644
--- a/lib/ReaderWriter/ELF/ReaderELF.cpp
+++ b/lib/ReaderWriter/ELF/ReaderELF.cpp
@@ -1,41 +1,456 @@
-//===- lib/ReaderWriter/ELF/ReaderELF.cpp --------------------------------===//
+//===- lib/ReaderWriter/ELF/ReaderELF.cpp
---------------------------------===//
 //
 //                             The LLVM Linker
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
-//===----------------------------------------------------------------------===//
+//===---------------------------------------------------------------------===//
+//
+// This file contains the ELF Reader and all helper sub classes
+// to consume an ELF file and produces atoms out of it.
+//
+//===---------------------------------------------------------------------===//

 #include "lld/ReaderWriter/ReaderELF.h"
 #include "lld/Core/File.h"

+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Allocator.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Memory.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/system_error.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/ELF.h"
+#include "llvm/Support/ELF.h"

 #include <map>
 #include <vector>

+using namespace lld;

-namespace lld {
+// 	This atom class corresponds to absolute symbol

-ReaderOptionsELF::ReaderOptionsELF() {
-}
+class ELFAbsoluteAtom: public AbsoluteAtom {
+public:
+	ELFAbsoluteAtom(const File &F, llvm::StringRef N, uint64_t V) :
+		OwningFile(F), Name(N), Value(V) {
+	}

-ReaderOptionsELF::~ReaderOptionsELF() {
-}
+	virtual const class File& file() const {
+		return OwningFile;
+	}

+	virtual llvm::StringRef name() const {
+		return Name;
+	}

+	virtual uint64_t value() const {
+		return Value;
+	}

-Reader* createReaderELF(const ReaderOptionsELF &options) {
-  assert(0 && "ELF Reader not yet implemented");
-  return nullptr;
-}
+private:
+	const File &OwningFile;
+	llvm::StringRef Name;
+	uint64_t Value;
+};
+
+//	This atom corresponds to undefined symbols.
+class ELFUndefinedAtom: public UndefinedAtom {
+public:
+	ELFUndefinedAtom(const File &F, llvm::StringRef N,
+			llvm::object::symbol_iterator Symb) :
+		OwningFile(F), Name(N), Symbol(Symb) {
+	}
+
+	virtual const class File& file() const {
+		return OwningFile;
+	}
+
+	virtual llvm::StringRef name() const {
+		return Name;
+	}
+
+	//	 FIXME What distinguishes a symbol in ELF that can help
+	//	 decide if the symbol is undefined only during build and not
+	//	 runtime? This will make us choose canBeNullAtBuildtime and
+	//	 canBeNullAtRuntime
+	//
+	virtual CanBeNull canBeNull() const {
+		uint32_t Result;
+
+		Symbol->getFlags(Result);
+		if (Result == llvm::object::SymbolRef::SF_Weak)
+			return CanBeNull::canBeNullAtBuildtime;
+		else
+			return CanBeNull::canBeNullNever;
+	}
+
+private:
+	const File &OwningFile;
+	llvm::StringRef Name;
+	llvm::object::symbol_iterator Symbol;
+};
+
+template<llvm::support::endianness target_endianness, bool is64Bits>
+class ELFDefinedAtom: public DefinedAtom {
+public:
+	ELFDefinedAtom(const File &F, llvm::StringRef N,
+			llvm::object::symbol_iterator Symb,
+			llvm::object::section_iterator Sec, llvm::ArrayRef<uint8_t> D) :

This should be using the Object/ELF.h interface instead of the generic
interface. This would solve a lot of the ambiguity issues below.

+		OwningFile(F), Name(N), Symbol(Symb), Section(Sec), Data(D) {
+		static uint64_t ordernumber = 0;
+		_ordinal = ++ordernumber;
+	}
+
+	virtual const class File& file() const {
+		return OwningFile;
+	}
+
+	virtual llvm::StringRef name() const {
+		return Name;
+	}
+
+	virtual uint64_t ordinal() const {
+		return _ordinal;
+	}
+
+	virtual uint64_t size() const {
+		uint64_t size;
+		Symbol->getSize(size);

This might not always be true. We need to make sure that all the data
in a section is covered by an atom.

+		return size;
+	}
+
+	//	 FIXME  We also need to find visibility
+	//	 This will help us determine scopeLinkageUnit
+	//	 we also need to incorporate other visibilities
+
+
+	virtual Scope scope() const {
+		uint32_t Result;
+		Symbol->getFlags(Result);
+		if (Result == llvm::object::SymbolRef::SF_Global)
+			return scopeGlobal;
+		else
+			return scopeTranslationUnit;
+	}
+
+	//	 FIXME   need to revisit this in future
+
+	virtual Interposable interposable() const {
+		return interposeNo;
+	}
+
+	//	FIXME What ways can we determine this in ELF?
+
+	virtual Merge merge() const {
+		return mergeNo;
+	}
+
+	virtual ContentType contentType() const {
+		ContentType type;
+		bool Result = false;
+		llvm::error_code ec;
+		ec = Section->isText(Result);
+		if (!ec && Result)
+			return typeCode;
+
+		ec = Section->isData(Result);
+		if (!ec && Result)
+			return typeData;
+
+//		 The ZeroFill type is a bit ambiguous in case of ELF
+//		 Should this mean it occupies no space in the disk image?
+//		 A .bss is such section

This would be easier to figure out using the Object/ELF.h interface.

+
+
+		ec = Section->isBSS(Result);
+		if (!ec && Result)
+			return typeZeroFill;
+
+		return typeUnknown;
+	}
+
+	virtual Alignment alignment() const {
+		uint64_t Result;
+		llvm::error_code ec;
+		ec = Section->getAlignment(Result);
+		if (ec)
+			return Alignment(-1);
+		return Alignment(llvm::Log2_64(Result));
+
+	}
+
+	virtual SectionChoice sectionChoice() const {
+		return sectionBasedOnContent;
+	}
+
+	virtual llvm::StringRef customSectionName() const {
+		return "";
+	}
+
+	virtual DeadStripKind deadStrip() const {
+		return deadStripNormal;
+	}
+
+	//	  This does not cover the possibility of the initial
+	//	  write permissions for PLT stubs
+	//	  The API available right now is an ugly way of doing it.
+	//	  We  need a way to explicitly
+	//	  query the SHF_* attributes of a section.

Same issue as above.

+
+	virtual ContentPermissions permissions() const {
+		char Result;
+		Symbol->getNMTypeChar(Result);
+		if (Result == 'r')
+			return permR__;
+		else if (Result == 'd')
+			return permRW_;
+		else if (Result == 't')
+			return permR_X;
+		else
+			return perm___;
+
+	}
+
+	//	 Many non ARM architectures use ELF file format
+	//	 This not really a place to put a architecture
+	//	 specific method in an atom. A better approach is
+	//	 needed.
+	//

Agreed.

+	virtual bool isThumb() const {
+		return false;
+	}
+
+	//	FIXME Not Sure if ELF supports alias atoms. Find out more.
+	virtual bool isAlias() const {
+		return false;
+	}
+
+	virtual llvm::ArrayRef<uint8_t> rawContent() const {
+		return Data;
+	}

+	virtual reference_iterator begin() const {
+		return reference_iterator(*this, nullptr);
+	}

-} // namespace
+	virtual reference_iterator end() const {
+		return reference_iterator(*this, nullptr);
+	}

+private:
+	virtual const Reference* derefIterator(const void* iter) const {
+		return nullptr;
+	}
+	virtual void incrementIterator(const void*& iter) const {
+	}
+
+	const File &OwningFile;
+	llvm::StringRef Name;
+	const llvm::object::symbol_iterator Symbol;
+	const llvm::object::section_iterator Section;
+	llvm::ArrayRef<uint8_t> Data;
+	uint64_t _ordinal;
+};
+
+//	 FileELF will read a binary, find out based on the symbol table contents
+//	 what kind of symbol it is and create corresponding atoms for it
+
+template<llvm::support::endianness target_endianness, bool is64Bits>
+class FileELF: public File {
+public:
+	FileELF(std::unique_ptr<llvm::MemoryBuffer> MB, llvm::error_code &EC) :
+		File(MB->getBufferIdentifier()) {
+		llvm::OwningPtr<llvm::object::Binary> Bin;
+		EC = llvm::object::createBinary(MB.release(), Bin);
+		if (EC)
+			return;
+		//		 Point Obj to correct class and bitwidth ELF object
+
+
+		Obj.reset(llvm::dyn_cast<llvm::object::ELFObjectFile<target_endianness,
+				is64Bits> >(Bin.get()));
+
+		if (!Obj) {
+			EC = make_error_code(llvm::object::object_error::invalid_file_type);
+			return;
+		}
+		Bin.take();
+
+		//		 Now we need to iterate over symtab  and
+		//		 get all the symbols
+		//		 convert these symbols to atoms
+		//		 FIXME what about dynamic symbols?

I believe the Object/ELF.h interface has a way to do this. It was added for
RuntimeDyld.

+
+
+		llvm::object::symbol_iterator si(Obj->begin_symbols());
+		llvm::object::symbol_iterator se(Obj->end_symbols());
+
+		for (; si != se; si.increment(EC)) {
+			if (EC)
+				llvm::report_fatal_error("Could not read all symbols");

Not that your patch should address this, but we need proper diagnostics.

+			llvm::StringRef Name;
+			llvm::object::SymbolRef::Type type;
+			uint32_t flags = 0;
+
+			EC = si->getName(Name);
+			if (EC)
+				llvm::report_fatal_error("Could not get symbol name");
+
+			EC = si->getType(type);
+			if (EC)
+				llvm::report_fatal_error("Could not get symbol type");
+
+			EC = si->getFlags(flags);
+			if (EC)
+				llvm::report_fatal_error("Could not get symbol flags");
+
+			if (flags & llvm::object::SymbolRef::SF_Absolute
+					== llvm::object::SymbolRef::SF_Absolute) {
+				uint64_t AbsValue;
+
+				EC = si->getAddress(AbsValue);
+				if (EC)
+					llvm::report_fatal_error(
+							"Could not get absolute symbol st_value");
+
+				AbsoluteAtoms._atoms.push_back(new (AtomStorage.Allocate<
+						ELFAbsoluteAtom>())ELFAbsoluteAtom(*this, Name,
+						AbsValue));
+			} else if (flags & llvm::object::SymbolRef::SF_Undefined
+					== llvm::object::SymbolRef::SF_Undefined) {
+				UndefinedAtoms._atoms.push_back(new (AtomStorage.Allocate<
+				ELFUndefinedAtom>()) ELFUndefinedAtom(*this, Name, si));
+
+			} else {
+				bool Result;
+				//FIXME need to figure out a way to handle common type
+				//symbols. For now we abort
+
+				EC = si->isCommon(Result);
+				assert(!Result && "STT_COMMON type is not yet implemented");
+				//TODO We need to think about how to put other
+				//symbol types into the defined atom model.

This is a major point. We need a better way to handle different types of atoms
then just the predefined 4.

+
+
+				if (type == llvm::object::SymbolRef::ST_Data || type
+						== llvm::object::SymbolRef::ST_Function) {
+
+					llvm::object::SectionRef SR;
+					llvm::object::section_iterator section(SR);
+
+					EC = si->getSection(section);
+					if (EC)
+						return;
+					//FIXME We need an API to get the actual st_value
+					//for now shorting this array reference to
+					//suppress errors during build time.
+					//Need to fix this

We have this in Object/ELF.h.

+
+					llvm::ArrayRef<uint8_t> val;
+					DefinedAtoms._atoms.push_back(
+						new (AtomStorage.Allocate<ELFDefinedAtom<
+							target_endianness, is64Bits> >()) ELFDefinedAtom<
+							target_endianness, is64Bits>(*this, Name,
+							si, section, val));
+
+				}
+			}
+		}
+	}
+
+	virtual void addAtom(const Atom&) {
+		llvm_unreachable("cannot add atoms to native .o files");
+	}
+
+	virtual const atom_collection<DefinedAtom> &defined() const {
+		return DefinedAtoms;
+	}
+
+	virtual const atom_collection<UndefinedAtom> &undefined() const {
+		return UndefinedAtoms;
+	}
+
+	virtual const atom_collection<SharedLibraryAtom> &sharedLibrary() const {
+		return SharedLibraryAtoms;
+	}
+
+	virtual const atom_collection<AbsoluteAtom> &absolute() const {
+		return AbsoluteAtoms;
+	}
+
+private:
+	std::unique_ptr<llvm::object::ELFObjectFile<target_endianness, is64Bits> >
+			Obj;
+	atom_collection_vector<DefinedAtom> DefinedAtoms;
+	atom_collection_vector<UndefinedAtom> UndefinedAtoms;
+	atom_collection_vector<SharedLibraryAtom> SharedLibraryAtoms;
+	atom_collection_vector<AbsoluteAtom> AbsoluteAtoms;
+	llvm::BumpPtrAllocator AtomStorage;
+
+};
+
+//	ReaderELF is reader object that will instantiate correct FileELF
+//	by examining the memory buffer for ELF class and bitwidth
+
+class ReaderELF: public Reader {
+public:
+	ReaderELF(const ReaderOptionsELF &options) :
+		_options(options) {
+	}
+	error_code parseFile(std::unique_ptr<MemoryBuffer> mb, std::vector<
+			std::unique_ptr<File> > &result) {
+
+		std::pair<unsigned char, unsigned char> Ident =
+				llvm::object::getElfArchType(&*mb);
+		llvm::error_code ec;
+		//		Instantiate the correct FileELF template instance
+		//		based on the Ident pair. Once the File is created
+		//		 we push the file to the vector of files already
+		//		 created during parser's life.
+
+		std::unique_ptr<File> f;
+		if (Ident.first == llvm::ELF::ELFCLASS32 && Ident.second
+				== llvm::ELF::ELFDATA2LSB) {
+			f.reset(new FileELF<llvm::support::little, false> (std::move(mb),
+					ec));
+		} else if (Ident.first == llvm::ELF::ELFCLASS32 && Ident.second
+				== llvm::ELF::ELFDATA2MSB) {
+			f.reset(new FileELF<llvm::support::big, false> (std::move(mb), ec));
+		} else if (Ident.first == llvm::ELF::ELFCLASS64 && Ident.second
+				== llvm::ELF::ELFDATA2MSB) {
+			f.reset(new FileELF<llvm::support::big, true> (std::move(mb), ec));
+		} else if (Ident.first == llvm::ELF::ELFCLASS64 && Ident.second
+				== llvm::ELF::ELFDATA2LSB) {
+			f.reset(new FileELF<llvm::support::little, true>(std::move(mb),
+					ec));
+		}
+		if (ec)
+			return ec;
+		result.push_back(std::move(f));
+		return error_code::success();
+	}
+private:
+	const ReaderOptionsELF &_options;
+};
+
+ReaderOptionsELF::ReaderOptionsELF() {
+}
+
+ReaderOptionsELF::~ReaderOptionsELF() {
+}
+
+namespace lld {
+//This reader is not complete so I wont "enable" the knob
+	Reader* createReaderELF(const ReaderOptionsELF &options) {
+		assert(0 && "ELF Reader not yet implemented");

It's fine to enable it. And we use llvm_unreachable instead of assert 0.

+		return new ReaderELF(options);
+	}
+} // namespace lld

Thanks for working on this. I think with using the ELF api and style
fixes this would be good.

- Michael Spencer




More information about the llvm-commits mailing list