[lld] r185283 - [PECOFF][Reader] Create a jump table for functions exported by DLL.

Rui Ueyama ruiu at google.com
Sun Jun 30 19:01:09 PDT 2013


On Sun, Jun 30, 2013 at 10:54 AM, Reid Kleckner <rnk at google.com> wrote:

> On Sun, Jun 30, 2013 at 6:33 AM, Rui Ueyama <ruiu at google.com> wrote:
>
>> Author: ruiu
>> Date: Sun Jun 30 08:33:36 2013
>> New Revision: 185283
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=185283&view=rev
>> Log:
>> [PECOFF][Reader] Create a jump table for functions exported by DLL.
>>
>> Modified:
>>     lld/trunk/lib/ReaderWriter/PECOFF/Atoms.h
>>     lld/trunk/lib/ReaderWriter/PECOFF/GroupedSectionsPass.h
>>     lld/trunk/lib/ReaderWriter/PECOFF/ReaderImportHeader.cpp
>>     lld/trunk/test/pecoff/importlib.test
>>
>> Modified: lld/trunk/lib/ReaderWriter/PECOFF/Atoms.h
>> URL:
>> http://llvm.org/viewvc/llvm-project/lld/trunk/lib/ReaderWriter/PECOFF/Atoms.h?rev=185283&r1=185282&r2=185283&view=diff
>>
>> ==============================================================================
>> --- lld/trunk/lib/ReaderWriter/PECOFF/Atoms.h (original)
>> +++ lld/trunk/lib/ReaderWriter/PECOFF/Atoms.h Sun Jun 30 08:33:36 2013
>> @@ -235,28 +235,22 @@ private:
>>
>>  class COFFSharedLibraryAtom : public SharedLibraryAtom {
>>  public:
>> -  enum class Kind {
>> -    DATA, FUNC
>> -  };
>> +  COFFSharedLibraryAtom(const File &file, StringRef symbolName,
>> +                        StringRef originalName, StringRef loadName)
>> +      : _file(file), _symbolName(symbolName), _loadName(loadName),
>> +        _originalName(originalName) {}
>>
>>    virtual const File &file() const { return _file; }
>>    virtual StringRef name() const { return _symbolName; }
>>    virtual StringRef loadName() const { return _loadName; }
>>    virtual bool canBeNullAtRuntime() const { return false; }
>> -
>> -  Kind getKind() const { return _kind; }
>> -
>> -protected:
>> -  COFFSharedLibraryAtom(const File &file, StringRef symbolName,
>> -                        StringRef loadName, Kind kind)
>> -      : _file(file), _symbolName(symbolName), _loadName(loadName),
>> _kind(kind) {
>> -  }
>> +  virtual StringRef originalName() const { return _originalName; }
>>
>>  private:
>>    const File &_file;
>>    StringRef _symbolName;
>>    StringRef _loadName;
>> -  Kind _kind;
>> +  StringRef _originalName;
>>  };
>>
>>
>>  //===----------------------------------------------------------------------===//
>>
>> Modified: lld/trunk/lib/ReaderWriter/PECOFF/GroupedSectionsPass.h
>> URL:
>> http://llvm.org/viewvc/llvm-project/lld/trunk/lib/ReaderWriter/PECOFF/GroupedSectionsPass.h?rev=185283&r1=185282&r2=185283&view=diff
>>
>> ==============================================================================
>> --- lld/trunk/lib/ReaderWriter/PECOFF/GroupedSectionsPass.h (original)
>> +++ lld/trunk/lib/ReaderWriter/PECOFF/GroupedSectionsPass.h Sun Jun 30
>> 08:33:36 2013
>> @@ -45,6 +45,7 @@
>>  #include <algorithm>
>>  #include <map>
>>
>> +using lld::coff::COFFBaseDefinedAtom;
>>  using lld::coff::COFFDefinedAtom;
>>
>>  namespace lld {
>> @@ -76,8 +77,8 @@ private:
>>    SectionToAtomsT filterHeadAtoms(MutableFile &mutableFile) const {
>>      SectionToAtomsT result;
>>      for (const DefinedAtom *atom : mutableFile.defined()) {
>> -      auto *coffAtom = (COFFDefinedAtom *)atom;
>> -      if (coffAtom->ordinal() == 0)
>> +      auto *coffAtom = dyn_cast<COFFDefinedAtom>((COFFBaseDefinedAtom
>> *)atom);
>> +      if (coffAtom && coffAtom->ordinal() == 0)
>>          result[coffAtom->getSectionName()].push_back(coffAtom);
>>      }
>>      return std::move(result);
>>
>> Modified: lld/trunk/lib/ReaderWriter/PECOFF/ReaderImportHeader.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/lld/trunk/lib/ReaderWriter/PECOFF/ReaderImportHeader.cpp?rev=185283&r1=185282&r2=185283&view=diff
>>
>> ==============================================================================
>> --- lld/trunk/lib/ReaderWriter/PECOFF/ReaderImportHeader.cpp (original)
>> +++ lld/trunk/lib/ReaderWriter/PECOFF/ReaderImportHeader.cpp Sun Jun 30
>> 08:33:36 2013
>> @@ -7,8 +7,11 @@
>>  //
>>
>>  //===----------------------------------------------------------------------===//
>>  ///
>> -/// \file \brief This file provides a way to read an import library
>> -/// member in a .lib file.
>> +/// \file \brief This file provides a way to read an import library
>> member in a
>> +/// .lib file.
>> +///
>> +/// Archive Files in Windows
>> +/// ========================
>>  ///
>>  /// In Windows, archive files with .lib file extension serve two
>> different
>>  /// purposes.
>> @@ -17,19 +20,90 @@
>>  ///    normal .obj files and is used for static linking. This is the same
>>  ///    usage as .a file in Unix.
>>  ///
>> -///  - For dynamic linking: An archive file in this case contains pseudo
>> .obj
>> -///    files to describe exported symbols of a DLL. Each .obj file in an
>> archive
>> -///    has a name of an exported symbol and a DLL filename from which
>> the symbol
>> -///    can be imported. When you link a DLL on Windows, you pass the
>> name of the
>> -///    .lib file for the DLL instead of the DLL filename itself. That is
>> the
>> -///    Windows way of linking a shared library.
>> -///
>> -/// This file contains a function to parse the pseudo object file.
>> +///  - For dynamic linking: An archive file in this use case contains
>> pseudo
>> +///    .obj files to describe exported symbols of a DLL. Each pseudo
>> .obj file
>> +///    in an archive has a name of an exported symbol and a DLL filename
>> from
>> +///    which the symbol can be imported. When you link a DLL on Windows,
>> you
>> +///    pass the name of the .lib file for the DLL instead of the DLL
>> filename
>> +///    itself. That is the Windows way of linking against a shared
>> library.
>> +///
>> +/// This file contains a function to handle the pseudo object file.
>> +///
>> +/// Windows Loader and Import Address Table
>> +/// =======================================
>> +///
>> +/// Windows supports a GOT-like mechanism for DLLs. The executable using
>> DLLs
>> +/// contains a list of DLL names and list of symbols that need to be
>> resolved by
>> +/// the loader. Windows loader maps the executable and all the DLLs to
>> memory,
>> +/// resolves the symbols referencing items in DLLs, and updates the
>> import
>> +/// address table in memory. The import address table is an array of
>> pointers to
>> +/// all of the data or functions in DLL referenced by the executable.
>> You cannot
>> +/// access items in DLLs directly. They have to be accessed through an
>> extra
>> +/// level of indirection.
>> +///
>> +/// So, if you want to access an item in DLL, you have to go through a
>> +/// pointer. How do you actually do that? For each symbol in DLL, there
>> is
>> +/// another set of symbols with "_imp__" prefix. For example, if you
>> have a
>> +/// global variable "foo" in a DLL, a pointer to the variable is
>> exported from
>> +/// the DLL as "_imp__foo". You cannot directly use "foo" but need to go
>> through
>> +/// "_imp__foo", because symbol "foo" is not exported.
>>
>
> This terminology seems confusing to me.  "foo" is presumably annotated
> with dllexport, so I consider it to be exported.  Also, the IAT is part of
> the importing image, and the IAT is basically an array of __imp_ symbols,
> right?
>

Variable "foo" is exported from DLL but only as "_imp__foo". Unmanged name
is not exported from DLL for data. As to the IAT, yes, the IAT is an array
of _imp__ symbols.


>
>> +/// Is this OK? That's not that complicated. Because items in a DLL are
>> not
>> +/// directly accessible, you need to access through a pointer, and the
>> pointer
>> +/// is available as a symbol with "_imp__" prefix.
>> +///
>> +/// Trick 1: Although you can write code with "_imp__" prefix, today's
>> compiler
>> +/// and linker let you write code as if there's no extra level of
>> +/// indirection. That's why you haven't seen lots of _imp__ in your
>> code. A
>> +/// variable or a function declared with "dllimport" attributes is
>> treated as an
>> +/// item in a DLL, and the compiler automatically mangles its name and
>> inserts
>> +/// the extra level of indirection when accessing the item. Here are some
>> +/// examples:
>> +///
>> +///   __declspec(dllimport) int var_in_dll;
>> +///   var_in_dll = 3; // is equivalent to *_imp__var_in_dll = 3;
>> +///
>> +///   __declspec(dllimport) int fn_in_dll(void);
>> +///   fn_in_dll();     // is equivalent to (*_imp__fn_in_dll)();
>> +///
>> +/// It's just the compiler rewrites code for you so that you don't need
>> to
>> +/// handle the indirection youself.
>> +///
>> +/// Trick 2: __declspec(dllimport) is mandatory for data but optional for
>> +/// function. For a function, the linker creates a jump table with the
>> original
>> +/// symbol name, so that the function is accessible without "_imp__"
>> prefix. The
>> +/// same function in a DLL can be called through two different symbols
>> if it's
>> +/// not dllimport'ed.
>>
>
> Cool.  I actually spent some time trying to make the Clang/LLVM shared
> library build work on Windows by generating map files from dumpbin
> /symbols, but there was too much data that had to be annotated with
> dllimport.  The resulting build was also too slow for development, which
> was my true goal.
>

Yeah. I guess it's rare for a DLL to export data so it should cover most
cases.

BTW I noticed while reading the PE/COFF spec that, if we carefully craft
the IAT we might be able to let the loader to fix up .text section instead
of .idata section, so that dllimport for data can be omitted. It's just
theoretically possible and has some disadvantages (such as breaking
read-only page sharing), so we don't probably want to do that, though.


>
>> +///   (*_imp__fn)()
>> +///   fn()
>> +///
>> +/// The above functions do the same thing. fn's content is a JMP
>> instruction to
>> +/// branch to the address pointed by _imp__fn. The latter may be a
>> little bit
>> +/// slower than the former because it will execute the extra JMP
>> instruction, but
>> +/// that's not an important point here.
>> +///
>> +/// If a function is dllimport'ed, which is usually done in a header
>> file,
>> +/// mangled name will be used at compile time so the jump table will not
>> be
>> +/// used.
>> +///
>> +/// Because there's no way to hide the indirection for data access at
>> link time,
>> +/// data has to be accessed through dllimport'ed symbols or explicit
>> "_imp__"
>> +/// prefix.
>> +///
>> +/// Creating Atoms for the Import Address Table
>> +/// ===========================================
>> +///
>> +/// This file is to read a pseudo object file and create at most two
>> atoms. One
>> +/// is a shared library atom for "_imp__" symbol. The another is a
>> defined atom
>> +/// for the JMP instruction if the symbol is for a function.
>>  ///
>>
>>  //===----------------------------------------------------------------------===//
>>
>>  #define DEBUG_TYPE "ReaderImportHeader"
>>
>> +#include "Atoms.h"
>> +
>>  #include "lld/Core/File.h"
>>  #include "lld/Core/Error.h"
>>  #include "lld/Core/SharedLibraryAtom.h"
>> @@ -58,20 +132,25 @@ namespace coff {
>>
>>  namespace {
>>
>> -class COFFDynamicAtom : public SharedLibraryAtom {
>> +/// The defined atom for jump table.
>> +class FuncAtom : public COFFBaseDefinedAtom {
>>  public:
>> -  COFFDynamicAtom(File &file, StringRef symbolName, StringRef dllName)
>> -      : _owningFile(file), _symbolName(symbolName), _dllName(dllName) {}
>> +  FuncAtom(const File &file, StringRef symbolName)
>> +      : COFFBaseDefinedAtom(file, symbolName, &rawContent) {}
>>
>> -  virtual const File &file() const { return _owningFile; }
>> -  virtual StringRef name() const { return _symbolName; }
>> -  virtual StringRef loadName() const { return _dllName; }
>> -  virtual bool canBeNullAtRuntime() const { return true; }
>> +  virtual uint64_t ordinal() const { return 0; }
>> +  virtual Scope scope() const { return scopeGlobal; }
>> +  virtual ContentType contentType() const { return typeCode; }
>> +  virtual Alignment alignment() const { return Alignment(1); }
>> +  virtual ContentPermissions permissions() const { return permR_X; }
>>
>>  private:
>> -  const File &_owningFile;
>> -  StringRef _symbolName;
>> -  StringRef _dllName;
>> +  static std::vector<uint8_t> rawContent;
>> +};
>> +
>> +std::vector<uint8_t> FuncAtom::rawContent = {
>> +  0xff, 0x25, 0x00, 0x00, 0x00, 0x00,  // jmp *0x0
>> +  0x90, 0x90,                          // nop; nop
>>  };
>>
>>  class FileImportLibrary : public File {
>> @@ -96,14 +175,17 @@ public:
>>      StringRef symbolName(buf + 20);
>>      StringRef dllName(buf + 20 + symbolName.size() + 1);
>>
>> -    auto *atom = new (allocator.Allocate<COFFDynamicAtom>())
>> -        COFFDynamicAtom(*this, symbolName, dllName);
>> -    _sharedLibraryAtoms._atoms.push_back(atom);
>> +    const COFFSharedLibraryAtom *dataAtom =
>> addSharedLibraryAtom(symbolName,
>> +
>> dllName);
>> +    int type = *reinterpret_cast<const support::ulittle16_t *>(buf + 18)
>> >> 16;
>> +    if (type == llvm::COFF::IMPORT_CODE)
>> +      addDefinedAtom(symbolName, dllName, dataAtom);
>> +
>>      ec = error_code::success();
>>    }
>>
>>    virtual const atom_collection<DefinedAtom> &defined() const {
>> -    return _noDefinedAtoms;
>> +    return _definedAtoms;
>>    }
>>
>>    virtual const atom_collection<UndefinedAtom> &undefined() const {
>> @@ -121,6 +203,28 @@ public:
>>    virtual const TargetInfo &getTargetInfo() const { return _targetInfo; }
>>
>>  private:
>> +  const COFFSharedLibraryAtom *addSharedLibraryAtom(StringRef symbolName,
>> +                                                    StringRef dllName) {
>> +    auto *name = new (allocator.Allocate<std::string>())
>> std::string("__imp_");
>> +    name->append(symbolName);
>> +    auto *atom = new (allocator.Allocate<COFFSharedLibraryAtom>())
>> +        COFFSharedLibraryAtom(*this, *name, symbolName, dllName);
>> +    _sharedLibraryAtoms._atoms.push_back(atom);
>> +    return atom;
>> +  }
>> +
>> +  void addDefinedAtom(StringRef symbolName, StringRef dllName,
>> +                      const COFFSharedLibraryAtom *dataAtom) {
>> +    auto *atom = new (allocator.Allocate<FuncAtom>())
>> +        FuncAtom(*this, symbolName);
>> +
>> +    // The first two byte of the atom is JMP instruction.
>> +    atom->addReference(std::unique_ptr<COFFReference>(
>> +        new COFFReference(dataAtom, 2,
>> llvm::COFF::IMAGE_REL_I386_DIR32)));
>> +    _definedAtoms._atoms.push_back(atom);
>> +  }
>> +
>> +  atom_collection_vector<DefinedAtom> _definedAtoms;
>>    atom_collection_vector<SharedLibraryAtom> _sharedLibraryAtoms;
>>    const TargetInfo &_targetInfo;
>>    mutable llvm::BumpPtrAllocator allocator;
>>
>> Modified: lld/trunk/test/pecoff/importlib.test
>> URL:
>> http://llvm.org/viewvc/llvm-project/lld/trunk/test/pecoff/importlib.test?rev=185283&r1=185282&r2=185283&view=diff
>>
>> ==============================================================================
>> --- lld/trunk/test/pecoff/importlib.test (original)
>> +++ lld/trunk/test/pecoff/importlib.test Sun Jun 30 08:33:36 2013
>> @@ -7,6 +7,12 @@
>>
>>  CHECK: Disassembly of section .text:
>>  CHECK: .text:
>> -CHECK:     1000:       a1 00 00 40 00
>> -CHECK:     1005:       03 05 00 00 40 00
>> -CHECK:     100b:       c3
>> +CHECK:     1000:       a1 0c 10 40 00            movl    4198412, %eax
>> +CHECK:     1005:       03 05 14 10 40 00         addl    4198420, %eax
>> +CHECK:     100b:       c3                        ret
>> +CHECK:     100c:       ff 25 00 00 40 00         jmpl    *4194304
>> +CHECK:     1012:       90                        nop
>> +CHECK:     1013:       90                        nop
>> +CHECK:     1014:       ff 25 00 00 40 00         jmpl    *4194304
>> +CHECK:     101a:       90                        nop
>> +CHECK:     101b:       90                        nop
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20130630/9e1b3904/attachment.html>


More information about the llvm-commits mailing list