[llvm-commits] [lld] Support reading archive libraries

Nick Kledzik kledzik at apple.com
Thu Nov 1 15:47:34 PDT 2012


On Nov 1, 2012, at 12:33 PM, Shankar Easwaran wrote:
> Please review the patch to add reading archive libraries in lld.
> 
> added support in lld core to read archive libraries
> changed llvm lib to support reading GNU style archives
> added a new flag in llvm-nm to display archive index
> added testsuite for llvm-nm

>  
> +  ReaderOptionsArchive readerOptionsArchive(reader, argc, argv);
> +  ReaderArchive *readerArchive = new ReaderArchive(readerOptionsArchive);
> +
> +
>    for (auto path : cmdLineInputFilePaths) {
>      std::vector<std::unique_ptr<File>> files;
> -    if ( error(reader->readFile(path, files)) )
> -      return 1;
> +    File::Kind kind;
> +    error_code EC;
ec

> +    
> +    if (readerSelected == readerYAML) {
> +        kind = File::kindYAML;
> +    }
> +    else 
> +    if ((EC = (File::FileType(path, kind)))) {
> +    	return 1;
> +    }
> +    switch(kind)
> +    {
> +      case File::kindObject:
> +      case File::kindYAML:
> +        if ( error(reader->readFile(path, files)))
> +        	return 1;
> +        break;
> +      
> +      case File::kindArchiveLibrary:
> +        if (error(readerArchive->parseFile(path, files)))
> +        	return 1;
> +        break;
> +      
> +      case File::kindSharedLibrary:
> +        if (error(readerArchive->parseFile(path, files)))
> +        	return 1;
> +        break;
Why are shared libraries going to the archive reader?


> +      
> +      default:
> +        return 1;
> +    }
>      inputFiles.appendFiles(files);
>    }
We need a clean way to invoke the correct Reader for each format.  This work above would have to be replicated by other code wanting to do linking functionality.   I know Michael and I have discussed different ways to organize this.  I think it would be cleaner to leave lld-core's main() as-is and instead in each Reader (e.g. ReaderELF, after it has created the MemoryBuffer, have it call llvm::sys::IdentifyFileType() and if the result is an archive, have it (ReaderELF) hand off the file to ReaderArchive.   


> +///
> +/// The ReaderOptionsArchive encapsulates the options used by the ReaderArchive.
> +/// The option objects are the only way to control the behaviour of Readers.
> +///
> +class ReaderOptionsArchive
> +{
> +public:
> +  /* Create a reader options archive object with the parameters
> +   * reader - the reader object to read files
> +   * argc,argv - for iterating through the command line options
> +   */
> +  ReaderOptionsArchive(Reader *reader, int argc, char *argv[]):
> +                                 _is_force_load(false),
> +                                 _reader(reader)
> +  {
> +  }
> +  
> +  bool is_force_load() const
> +  {
> +    return _is_force_load;
> +  }
> +  
> +  Reader *reader() const
> +  {
> +    return _reader;
> +  }
> +  
> +private:
> +  bool _is_force_load;
> +  Reader *_reader;
> +};
lld uses use camelCase (e.g. isForceLoad() and _isForceLoad).

> 
>    /// Kinds of files that are supported.
>    enum Kind {
> +    kindYAML,              /// < yaml file (.yaml)
>      kindObject,            ///< object file (.o)
>      kindSharedLibrary,     ///< shared library (.so)
>      kindArchiveLibrary,    ///< archive (.a)
> @@ -52,6 +61,63 @@
>      return kindObject;
>    }
> 
There should not be kindYAML here.   These kinds represent the role not the format.



> +  static llvm::error_code FileType(StringRef fileName, Kind &kind) {
> +    OwningPtr<MemoryBuffer> File;
> +    
> +    if (MemoryBuffer::getFile(fileName, File))
> +      return llvm::object::object_error::invalid_file_type;
> +    
> +    MemoryBuffer *Object = File.take();
> +    
> +    if (!Object || Object->getBufferSize() < 64)
> +      return llvm::object::object_error::invalid_file_type;
> +    
> +    llvm::sys::LLVMFileType type = llvm::sys::IdentifyFileType(Object->getBufferStart(),
> +                                    static_cast<unsigned>(Object->getBufferSize()));
> +    switch (type) {
> +      case llvm::sys::ELF_Relocatable_FileType:
> +        kind = kindObject;
> +        return llvm::object::object_error::success;
> +      
> +      case llvm::sys::ELF_SharedObject_FileType:
> +        kind = kindSharedLibrary;
> +        return llvm::object::object_error::success;
> +      
> +      case llvm::sys::Mach_O_Object_FileType:
> +        kind = kindObject;
> +        return llvm::object::object_error::success;
> +      
> +      case llvm::sys::Mach_O_FixedVirtualMemorySharedLib_FileType:
> +      case llvm::sys::Mach_O_DynamicallyLinkedSharedLib_FileType:
> +      case llvm::sys::Mach_O_DynamicLinker_FileType:
> +      case llvm::sys::Mach_O_Bundle_FileType:
> +      case llvm::sys::Mach_O_DynamicallyLinkedSharedLibStub_FileType:
> +      case llvm::sys::Mach_O_DSYMCompanion_FileType:
> +        kind = kindSharedLibrary;
> +        return llvm::object::object_error::success;
> +      
> +      case llvm::sys::COFF_FileType:
> +        kind = kindObject;
> +        return llvm::object::object_error::success;
> +      
> +      case llvm::sys::Archive_FileType:
> +        kind = kindArchiveLibrary;
> +        return llvm::object::object_error::success;
> +      
> +      case llvm::sys::Mach_O_Core_FileType:
> +      case llvm::sys::Mach_O_Executable_FileType:
> +      case llvm::sys::Mach_O_PreloadExecutable_FileType:
> +        return llvm::object::object_error::invalid_file_type;
> +      
> +      case llvm::sys::ELF_Executable_FileType:
> +      case llvm::sys::ELF_Core_FileType:
> +        return llvm::object::object_error::invalid_file_type;
> +      
> +      default:
> +        return llvm::object::object_error::invalid_file_type;
> +    }
> +  }
As I said above, I don't think we need this function here.  It is also inefficient in that it is opening and reading the file.  Putting this functionality later in the specific Readers (e.g. ReaderELF) will be more efficient because the file is already read.  I think this implementation all leaks the MemoryBuffer object.


> +  /// Check if any member of the archive contains an Atom with the
> +  /// specified name and return the File object for that member, or nullptr.
> +  virtual const File *find(StringRef name, bool dataSymbolOnly) const 
> +  {
> +    error_code ec;  
> +    StringRef symname;
> +    std::vector<std::unique_ptr<File>> result;
Move the variables down to their first use.


> +    llvm::object::Archive *_archive;
If this is local variable it should not have an underscore prefix.



> +    
> +    _archive = new llvm::object::Archive(_mb, ec);
Why is the llvm Archive object instantiated on each call to find().  It should be instantiated once.

> +    
> +    if (ec)
> +      return NULL;
> +    
> +    for (auto bs = _archive->begin_symbols(), 
> +              es = _archive->end_symbols(); bs != es; ++bs)
> +    {
> +      if ((ec = bs->getName(symname)))
> +          return NULL;
> +      
> +      if (symname == name) {
> +        llvm::object::Archive::child_iterator ci;
> +        if ((ec = bs->getMember(ci)))
> +          return NULL;
> +        
Can we add a find() method to  llvm::object::Archive?  and let it walk the symbols or use whatever hashing is already provided?

> +        if ((ec = getSymbolForType(ci->getBuffer(), symname, dataSymbolOnly)))
> +          return NULL;
The isDataSym is rarely true.  The fast case should be when it is false to skip this test.


> +        
> +        if ((ec = _options.reader()->parseFile(std::unique_ptr<MemoryBuffer>
> +                                               (ci->getBuffer()), result)))
> +          return NULL;
> +        
> +        for (std::unique_ptr<File> &f : result) {
> +          return f.release();
> +        }
This needs more comments explaining all the ownership transferring that is going on.


> +      }
> +    }
> +    return NULL;
> +  }
> +
> +  virtual void addAtom(const Atom&) {
> +    llvm_unreachable("cannot add atoms to native .o files");
> +  }
> +
> +  virtual const atom_collection<DefinedAtom> &defined() const {
> +    return _definedAtoms;
> +  }
> +
> +  virtual const atom_collection<UndefinedAtom> &undefined() const {
> +    return _undefinedAtoms;
> +  }
> +
> +  virtual const atom_collection<SharedLibraryAtom> &sharedLibrary() const {
> +    return _sharedLibraryAtoms;
> +  }
> +
> +  virtual const atom_collection<AbsoluteAtom> &absolute() const {
> +    return _absoluteAtoms;
> +  }
> +
> +protected:
> +  error_code getSymbolForType(MemoryBuffer *mb, StringRef &symbol, 
> +                              bool &isDataSym) const
Why are the last two parameters by reference?  Are they in-out?  

I think what you are trying to do in this function is check if a particular symbol name in an object file is a data symbol (as opposed to function).  If so, the function could be name isDataSymbol().  You might want to look into using a StringRef instead of a MemoryBuffer, since this is just pointers into the mapped archive file.


> +  {
> +    llvm::object::ObjectFile *obj = 
> +                  llvm::object::ObjectFile::createObjectFile(mb);
> +    error_code ec;
> +    llvm::object::SymbolRef::Type symtype;
> +    uint32_t symflags;
> +    llvm::object::symbol_iterator ibegin = obj->begin_symbols();
> +    llvm::object::symbol_iterator iend = obj->end_symbols();
> +    StringRef symbolname;
> +
> +    for (llvm::object::symbol_iterator i = ibegin; i != iend; i.increment(ec)) {
> +      if (ec) return ec;
> +      
> +      // Get symbol name
> +      if ((ec = (i->getName(symbolname)))) return ec;
> +      
> +      if (symbolname != symbol) 
> +          continue;
> +      
> +      // Get symbol flags
> +      if ((ec = (i->getFlags(symflags)))) return ec;
> +      
> +      if (symflags <= llvm::object::SymbolRef::SF_Undefined)
> +          continue;
> +      
> +      // Get Symbol Type
> +      if ((ec = (i->getType(symtype)))) return ec;
> +      
> +      // If the symbol is a data symbol and we need a data symbol
> +      // return success if there is a match
> +      if (isDataSym) {
> +          if (symtype == llvm::object::SymbolRef::ST_Data) {
> +            return error_code::success();
> +          }
> +          continue;
> +      }
> +      return error_code::success();
> +    }
> +    return llvm::object::object_error::parse_failed;
> +  }
> +
> +private:
> +  llvm::MemoryBuffer *_mb;
> +  ReaderOptionsArchive _options;
> +  atom_collection_vector<DefinedAtom>       _definedAtoms;
> +  atom_collection_vector<UndefinedAtom>     _undefinedAtoms;
> +  atom_collection_vector<SharedLibraryAtom> _sharedLibraryAtoms;
> +  atom_collection_vector<AbsoluteAtom>      _absoluteAtoms;
> +
> +public:
> +  /// only subclasses of ArchiveLibraryFile can be instantiated 
> +  FileArchive(llvm::MemoryBuffer *mb, 
> +    ReaderOptionsArchive &options, StringRef path) : ArchiveLibraryFile(path),
> +                                                     _mb(mb),
> +                                                     _options(options)
> +  { 
> +  }
> +};
> +
> +} // namespace lld
> +
> +#endif // LLD_CORE_FILE_ARCHIVE_FILE_H_
> Index: lib/ReaderWriter/ReaderArchive.cpp
> ===================================================================
> --- lib/ReaderWriter/ReaderArchive.cpp	(revision 0)
> +++ lib/ReaderWriter/ReaderArchive.cpp	(revision 0)
> @@ -0,0 +1,44 @@
> +//===- lib/ReaderWriter/ReaderArchive.cpp - Archive Library Reader--------===//
> +//
> +//                             The LLVM Linker
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===---------------------------------------------------------------------===//
> +#include "lld/ReaderWriter/ReaderArchive.h"
> +#include "lld/Core/FileArchive.h"
> +
> +namespace lld 
> +{
> +  /* Returns a vector of Files that are contained in the archive file */
> +  error_code ReaderArchive::parseFile(StringRef path,
> +  		std::vector<std::unique_ptr<File>> &result)
> +  {
> +    error_code ec;
> +    OwningPtr<llvm::MemoryBuffer> opmb;
> +    if ((ec = llvm::MemoryBuffer::getFile(path, opmb)))
> +    	return ec;
If we change ReaderArchive::parseFile() to be called by specific readers after they've mapped the file, you'll want to change the interface to this method to take a MemoryBuffer instead of the file's path. 


> +    
> +    if (_options.is_force_load())
> +    {
> +      _archive = new llvm::object::Archive(opmb.take(), ec);
How is _archive ever deleted?  If you make the ivar a std::unique_ptr() it will be destroyed automatically in the ReaderArchive() destructor.


> +      if (ec)
> +        return ec;
> +      
> +      for (auto mf = _archive->begin_children(), 
> +                me = _archive->end_children(); mf != me; ++mf)
> +      {
> +      	if ((ec = _options.reader()->parseFile(std::unique_ptr<MemoryBuffer>
> +                                               (mf->getBuffer()), result)))
> +          return ec;
> +      }
> +    }
> +    else {
> +      std::unique_ptr<File> f;
> +      f.reset(new FileArchive(opmb.take(), _options, path));
> +      result.push_back(std::move(f));
> +    }
> +    return llvm::error_code::success();
> +  }
> +}
> Index: lib/ReaderWriter/CMakeLists.txt
> ===================================================================
> --- lib/ReaderWriter/CMakeLists.txt	(revision 166441)
> +++ lib/ReaderWriter/CMakeLists.txt	(working copy)
> @@ -6,4 +6,5 @@
>  add_lld_library(lldReaderWriter
>    Reader.cpp
>    Writer.cpp
> +  ReaderArchive.cpp
>    )


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20121101/ea871020/attachment.html>


More information about the llvm-commits mailing list