[lld] a9ff1ce - [LLD] [COFF] Support linking directly against DLLs in MinGW mode

Martin Storsjö via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 1 23:49:38 PDT 2021


Author: Martin Storsjö
Date: 2021-07-02T09:49:13+03:00
New Revision: a9ff1ce1b9a52add7557cf0579d424c9d0678860

URL: https://github.com/llvm/llvm-project/commit/a9ff1ce1b9a52add7557cf0579d424c9d0678860
DIFF: https://github.com/llvm/llvm-project/commit/a9ff1ce1b9a52add7557cf0579d424c9d0678860.diff

LOG: [LLD] [COFF] Support linking directly against DLLs in MinGW mode

GNU ld.bfd supports linking directly against DLLs without using an
import library, and some projects have picked up on this habit.
(There's no one single unsurmountable issue with using import
libraries, but this is a regularly surfacing missing feature.)

As long as one is linking by name (instead of by ordinal), the DLL
export table contains most of the information needed. (One can
inspect what section a symbol points at, to see if it's a function
or data symbol. The practical implementation of this loops over all
sections for each symbol, but as long as they're not very many, that
should hopefully be tolerable performance wise.)

One exception where the information in the DLL isn't entirely enough
is on i386 with stdcall functions; depending on how they're done,
the exported function name can be a plain undecorated name, while
the import library would contain the full decorated symbol name. This
issue is addressed separately in a different patch.

This is implemented mimicing the structure of a regular import library,
with one InputFile corresponding to the static archive that just adds
lazy symbols, which then are fetched when they are needed. When such
a symbol is fetched, we synthesize a coff_import_header structure
in memory and create a regular ImportFile out of it.

The implementation could be even smaller by just creating ImportFiles
for every symbol available immediately, but that would have the
drawback of actually ending up importing all symbols unless running
with GC enabled (and mingw mode defaults to having it disabled for
historical reasons).

Differential Revision: https://reviews.llvm.org/D104530

Added: 
    lld/test/COFF/link-dll-i386.s
    lld/test/COFF/link-dll.s

Modified: 
    lld/COFF/Driver.cpp
    lld/COFF/InputFiles.cpp
    lld/COFF/InputFiles.h
    lld/COFF/SymbolTable.cpp
    lld/COFF/SymbolTable.h
    lld/COFF/Symbols.cpp
    lld/COFF/Symbols.h
    lld/COFF/Writer.cpp

Removed: 
    


################################################################################
diff  --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp
index 0bf82182ec0f4..937f605590bc8 100644
--- a/lld/COFF/Driver.cpp
+++ b/lld/COFF/Driver.cpp
@@ -234,6 +234,10 @@ void LinkerDriver::addBuffer(std::unique_ptr<MemoryBuffer> mb,
     error(filename + ": is not a native COFF file. Recompile without /GL");
     break;
   case file_magic::pecoff_executable:
+    if (config->mingw) {
+      symtab->addFile(make<DLLFile>(mbref));
+      break;
+    }
     if (filename.endswith_insensitive(".dll")) {
       error(filename + ": bad file type. Did you specify a DLL instead of an "
                        "import library?");

diff  --git a/lld/COFF/InputFiles.cpp b/lld/COFF/InputFiles.cpp
index 9d36b5e25cfaa..7488ed947698b 100644
--- a/lld/COFF/InputFiles.cpp
+++ b/lld/COFF/InputFiles.cpp
@@ -1138,3 +1138,93 @@ std::string lld::coff::replaceThinLTOSuffix(StringRef path) {
     return (path + repl).str();
   return std::string(path);
 }
+
+static bool isRVACode(COFFObjectFile *coffObj, uint64_t rva, InputFile *file) {
+  for (size_t i = 1, e = coffObj->getNumberOfSections(); i <= e; i++) {
+    const coff_section *sec = CHECK(coffObj->getSection(i), file);
+    if (rva >= sec->VirtualAddress &&
+        rva <= sec->VirtualAddress + sec->VirtualSize) {
+      return (sec->Characteristics & COFF::IMAGE_SCN_CNT_CODE) != 0;
+    }
+  }
+  return false;
+}
+
+void DLLFile::parse() {
+  // Parse a memory buffer as a PE-COFF executable.
+  std::unique_ptr<Binary> bin = CHECK(createBinary(mb), this);
+
+  if (auto *obj = dyn_cast<COFFObjectFile>(bin.get())) {
+    bin.release();
+    coffObj.reset(obj);
+  } else {
+    error(toString(this) + " is not a COFF file");
+    return;
+  }
+
+  if (!coffObj->getPE32Header() && !coffObj->getPE32PlusHeader()) {
+    error(toString(this) + " is not a PE-COFF executable");
+    return;
+  }
+
+  for (const auto &exp : coffObj->export_directories()) {
+    StringRef dllName, symbolName;
+    uint32_t exportRVA;
+    checkError(exp.getDllName(dllName));
+    checkError(exp.getSymbolName(symbolName));
+    checkError(exp.getExportRVA(exportRVA));
+
+    if (symbolName.empty())
+      continue;
+
+    bool code = isRVACode(coffObj.get(), exportRVA, this);
+
+    Symbol *s = make<Symbol>();
+    s->dllName = dllName;
+    s->symbolName = symbolName;
+    s->importType = code ? ImportType::IMPORT_CODE : ImportType::IMPORT_DATA;
+    s->nameType = ImportNameType::IMPORT_NAME;
+
+    if (coffObj->getMachine() == I386) {
+      s->symbolName = symbolName = saver.save("_" + symbolName);
+      s->nameType = ImportNameType::IMPORT_NAME_NOPREFIX;
+    }
+
+    StringRef impName = saver.save("__imp_" + symbolName);
+    symtab->addLazyDLLSymbol(this, s, impName);
+    if (code)
+      symtab->addLazyDLLSymbol(this, s, symbolName);
+  }
+}
+
+MachineTypes DLLFile::getMachineType() {
+  if (coffObj)
+    return static_cast<MachineTypes>(coffObj->getMachine());
+  return IMAGE_FILE_MACHINE_UNKNOWN;
+}
+
+void DLLFile::makeImport(DLLFile::Symbol *s) {
+  if (!seen.insert(s->symbolName).second)
+    return;
+
+  size_t impSize = s->dllName.size() + s->symbolName.size() + 2; // +2 for NULs
+  size_t size = sizeof(coff_import_header) + impSize;
+  char *buf = bAlloc.Allocate<char>(size);
+  memset(buf, 0, size);
+  char *p = buf;
+  auto *imp = reinterpret_cast<coff_import_header *>(p);
+  p += sizeof(*imp);
+  imp->Sig2 = 0xFFFF;
+  imp->Machine = coffObj->getMachine();
+  imp->SizeOfData = impSize;
+  imp->OrdinalHint = 0; // Only linking by name
+  imp->TypeInfo = (s->nameType << 2) | s->importType;
+
+  // Write symbol name and DLL name.
+  memcpy(p, s->symbolName.data(), s->symbolName.size());
+  p += s->symbolName.size() + 1;
+  memcpy(p, s->dllName.data(), s->dllName.size());
+  MemoryBufferRef mbref = MemoryBufferRef(StringRef(buf, size), s->dllName);
+  ImportFile *impFile = make<ImportFile>(mbref);
+  symtab->addFile(impFile);
+}

diff  --git a/lld/COFF/InputFiles.h b/lld/COFF/InputFiles.h
index 644369f9ee984..7fc2429372924 100644
--- a/lld/COFF/InputFiles.h
+++ b/lld/COFF/InputFiles.h
@@ -14,6 +14,7 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/StringSet.h"
 #include "llvm/BinaryFormat/Magic.h"
 #include "llvm/Object/Archive.h"
 #include "llvm/Object/COFF.h"
@@ -67,7 +68,8 @@ class InputFile {
     LazyObjectKind,
     PDBKind,
     ImportKind,
-    BitcodeKind
+    BitcodeKind,
+    DLLKind
   };
   Kind kind() const { return fileKind; }
   virtual ~InputFile() {}
@@ -393,6 +395,28 @@ class BitcodeFile : public InputFile {
   std::vector<Symbol *> symbols;
 };
 
+// .dll file.
+class DLLFile : public InputFile {
+public:
+  explicit DLLFile(MemoryBufferRef m) : InputFile(DLLKind, m) {}
+  static bool classof(const InputFile *f) { return f->kind() == DLLKind; }
+  void parse() override;
+  MachineTypes getMachineType() override;
+
+  struct Symbol {
+    StringRef dllName;
+    StringRef symbolName;
+    llvm::COFF::ImportNameType nameType;
+    llvm::COFF::ImportType importType;
+  };
+
+  void makeImport(Symbol *s);
+
+private:
+  std::unique_ptr<COFFObjectFile> coffObj;
+  llvm::StringSet<> seen;
+};
+
 inline bool isBitcode(MemoryBufferRef mb) {
   return identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode;
 }

diff  --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp
index 024a408ca4545..3e0741839757c 100644
--- a/lld/COFF/SymbolTable.cpp
+++ b/lld/COFF/SymbolTable.cpp
@@ -75,6 +75,11 @@ static void forceLazy(Symbol *s) {
   case Symbol::Kind::LazyObjectKind:
     cast<LazyObject>(s)->file->fetch();
     break;
+  case Symbol::Kind::LazyDLLSymbolKind: {
+    auto *l = cast<LazyDLLSymbol>(s);
+    l->file->makeImport(l->sym);
+    break;
+  }
   default:
     llvm_unreachable(
         "symbol passed to forceLazy is not a LazyArchive or LazyObject");
@@ -540,6 +545,22 @@ void SymbolTable::addLazyObject(LazyObjFile *f, StringRef n) {
   f->fetch();
 }
 
+void SymbolTable::addLazyDLLSymbol(DLLFile *f, DLLFile::Symbol *sym,
+                                   StringRef n) {
+  Symbol *s;
+  bool wasInserted;
+  std::tie(s, wasInserted) = insert(n);
+  if (wasInserted) {
+    replaceSymbol<LazyDLLSymbol>(s, f, sym, n);
+    return;
+  }
+  auto *u = dyn_cast<Undefined>(s);
+  if (!u || u->weakAlias || s->pendingArchiveLoad)
+    return;
+  s->pendingArchiveLoad = true;
+  f->makeImport(sym);
+}
+
 static std::string getSourceLocationBitcode(BitcodeFile *file) {
   std::string res("\n>>> defined at ");
   StringRef source = file->obj->getSourceFileName();

diff  --git a/lld/COFF/SymbolTable.h b/lld/COFF/SymbolTable.h
index 870a7151fa8ea..2d3ec65eda236 100644
--- a/lld/COFF/SymbolTable.h
+++ b/lld/COFF/SymbolTable.h
@@ -87,6 +87,7 @@ class SymbolTable {
   Symbol *addUndefined(StringRef name, InputFile *f, bool isWeakAlias);
   void addLazyArchive(ArchiveFile *f, const Archive::Symbol &sym);
   void addLazyObject(LazyObjFile *f, StringRef n);
+  void addLazyDLLSymbol(DLLFile *f, DLLFile::Symbol *sym, StringRef n);
   Symbol *addAbsolute(StringRef n, COFFSymbolRef s);
   Symbol *addRegular(InputFile *f, StringRef n,
                      const llvm::object::coff_symbol_generic *s = nullptr,

diff  --git a/lld/COFF/Symbols.cpp b/lld/COFF/Symbols.cpp
index 60ff72aeb5225..8a6a9b27d45fa 100644
--- a/lld/COFF/Symbols.cpp
+++ b/lld/COFF/Symbols.cpp
@@ -70,6 +70,8 @@ InputFile *Symbol::getFile() {
     return sym->file;
   if (auto *sym = dyn_cast<LazyObject>(this))
     return sym->file;
+  if (auto *sym = dyn_cast<LazyDLLSymbol>(this))
+    return sym->file;
   return nullptr;
 }
 

diff  --git a/lld/COFF/Symbols.h b/lld/COFF/Symbols.h
index 13e7488d6b879..65412362ef15c 100644
--- a/lld/COFF/Symbols.h
+++ b/lld/COFF/Symbols.h
@@ -61,6 +61,7 @@ class Symbol {
     UndefinedKind,
     LazyArchiveKind,
     LazyObjectKind,
+    LazyDLLSymbolKind,
 
     LastDefinedCOFFKind = DefinedCommonKind,
     LastDefinedKind = DefinedSyntheticKind,
@@ -92,7 +93,8 @@ class Symbol {
   bool isLive() const;
 
   bool isLazy() const {
-    return symbolKind == LazyArchiveKind || symbolKind == LazyObjectKind;
+    return symbolKind == LazyArchiveKind || symbolKind == LazyObjectKind ||
+           symbolKind == LazyDLLSymbolKind;
   }
 
 private:
@@ -309,6 +311,18 @@ class LazyObject : public Symbol {
   LazyObjFile *file;
 };
 
+class LazyDLLSymbol : public Symbol {
+public:
+  LazyDLLSymbol(DLLFile *f, DLLFile::Symbol *s, StringRef n)
+      : Symbol(LazyDLLSymbolKind, n), file(f), sym(s) {}
+  static bool classof(const Symbol *s) {
+    return s->kind() == LazyDLLSymbolKind;
+  }
+
+  DLLFile *file;
+  DLLFile::Symbol *sym;
+};
+
 // Undefined symbols.
 class Undefined : public Symbol {
 public:
@@ -423,6 +437,7 @@ inline uint64_t Defined::getRVA() {
     return cast<DefinedRegular>(this)->getRVA();
   case LazyArchiveKind:
   case LazyObjectKind:
+  case LazyDLLSymbolKind:
   case UndefinedKind:
     llvm_unreachable("Cannot get the address for an undefined symbol.");
   }
@@ -447,6 +462,7 @@ inline Chunk *Defined::getChunk() {
     return cast<DefinedCommon>(this)->getChunk();
   case LazyArchiveKind:
   case LazyObjectKind:
+  case LazyDLLSymbolKind:
   case UndefinedKind:
     llvm_unreachable("Cannot get the chunk of an undefined symbol.");
   }
@@ -467,6 +483,7 @@ union SymbolUnion {
   alignas(DefinedImportThunk) char h[sizeof(DefinedImportThunk)];
   alignas(DefinedLocalImport) char i[sizeof(DefinedLocalImport)];
   alignas(LazyObject) char j[sizeof(LazyObject)];
+  alignas(LazyDLLSymbol) char k[sizeof(LazyDLLSymbol)];
 };
 
 template <typename T, typename... ArgT>

diff  --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp
index 7a85ecbd456a8..37cbe2bb96a80 100644
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp
@@ -1583,6 +1583,7 @@ static void maybeAddAddressTakenFunction(SymbolRVASet &addressTakenSyms,
     break;
   case Symbol::LazyArchiveKind:
   case Symbol::LazyObjectKind:
+  case Symbol::LazyDLLSymbolKind:
   case Symbol::UndefinedKind:
     // Undefined symbols resolve to zero, so they don't have an RVA. Lazy
     // symbols shouldn't have relocations.

diff  --git a/lld/test/COFF/link-dll-i386.s b/lld/test/COFF/link-dll-i386.s
new file mode 100644
index 0000000000000..7121678770109
--- /dev/null
+++ b/lld/test/COFF/link-dll-i386.s
@@ -0,0 +1,64 @@
+# REQUIRES: x86
+
+## Test creating a DLL and linking against the DLL without using an import
+## library.
+
+## Test on i386 with cdecl decorated symbols.
+
+## Linking the executable with -opt:noref, to make sure that we don't
+## pull in more import entries than what's needed, even if not running GC.
+
+# RUN: split-file %s %t.dir
+
+# RUN: llvm-mc -filetype=obj -triple=i386-windows-gnu %t.dir/lib.s -o %t.lib.o
+# RUN: lld-link -safeseh:no -noentry -dll -def:%t.dir/lib.def %t.lib.o -out:%t.lib.dll -implib:%t.implib.lib
+# RUN: llvm-mc -filetype=obj -triple=i386-windows-gnu %t.dir/main.s -o %t.main.o
+# RUN: lld-link -lldmingw %t.main.o -out:%t.main.exe %t.lib.dll -opt:noref -verbose 2>&1 | FileCheck --check-prefix=LOG %s
+# RUN: llvm-readobj --coff-imports %t.main.exe | FileCheck %s
+
+#--- lib.s
+.text
+.global _func1
+_func1:
+  ret
+.global _func2
+_func2:
+  ret
+.global _func3
+_func3:
+  ret
+.data
+.global _variable
+_variable:
+  .int 42
+
+#--- lib.def
+EXPORTS
+func1
+func2
+func3
+variable
+
+#--- main.s
+.text
+.global _mainCRTStartup
+_mainCRTStartup:
+  call _func2
+  movl .refptr._variable, %eax
+  movl (%eax), %eax
+  ret
+
+.section .rdata$.refptr._variable,"dr",discard,.refptr._variable
+.globl .refptr._variable
+.refptr._variable:
+  .long _variable
+
+# CHECK:      Import {
+# CHECK-NEXT:   Name: link-dll-i386.s.tmp.lib.dll
+# CHECK-NEXT:   ImportLookupTableRVA:
+# CHECK-NEXT:   ImportAddressTableRVA
+# CHECK-NEXT:   Symbol: func2
+# CHECK-NEXT:   Symbol: variable
+# CHECK-NEXT: }
+
+# LOG: Automatically importing _variable from link-dll-i386.s.tmp.lib.dll

diff  --git a/lld/test/COFF/link-dll.s b/lld/test/COFF/link-dll.s
new file mode 100644
index 0000000000000..997c5b02c7aba
--- /dev/null
+++ b/lld/test/COFF/link-dll.s
@@ -0,0 +1,66 @@
+# REQUIRES: x86
+
+## Test creating a DLL and linking against the DLL without using an import
+## library.
+
+## Explicitly creating an import library but naming it 
diff erently than the
+## DLL, to avoid any risk of implicitly referencing it instead of the DLL
+## itself.
+
+## Linking the executable with -opt:noref, to make sure that we don't
+## pull in more import entries than what's needed, even if not running GC.
+
+# RUN: split-file %s %t.dir
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-windows-gnu %t.dir/lib.s -o %t.lib.o
+# RUN: lld-link -noentry -dll -def:%t.dir/lib.def %t.lib.o -out:%t.lib.dll -implib:%t.implib.lib
+# RUN: llvm-mc -filetype=obj -triple=x86_64-windows-gnu %t.dir/main.s -o %t.main.o
+# RUN: lld-link -lldmingw %t.main.o -out:%t.main.exe %t.lib.dll -opt:noref -verbose 2>&1 | FileCheck --check-prefix=LOG %s
+# RUN: llvm-readobj --coff-imports %t.main.exe | FileCheck %s
+
+#--- lib.s
+.text
+.global func1
+func1:
+  ret
+.global func2
+func2:
+  ret
+.global func3
+func3:
+  ret
+.data
+.global variable
+variable:
+  .int 42
+
+#--- lib.def
+EXPORTS
+func1
+func2
+func3
+variable
+
+#--- main.s
+.text
+.global mainCRTStartup
+mainCRTStartup:
+  call func2
+  movq .refptr.variable(%rip), %rax
+  movl (%rax), %eax
+  ret
+
+.section .rdata$.refptr.variable,"dr",discard,.refptr.variable
+.globl .refptr.variable
+.refptr.variable:
+  .quad variable
+
+# CHECK:      Import {
+# CHECK-NEXT:   Name: link-dll.s.tmp.lib.dll
+# CHECK-NEXT:   ImportLookupTableRVA:
+# CHECK-NEXT:   ImportAddressTableRVA
+# CHECK-NEXT:   Symbol: func2
+# CHECK-NEXT:   Symbol: variable
+# CHECK-NEXT: }
+
+# LOG: Automatically importing variable from link-dll.s.tmp.lib.dll


        


More information about the llvm-commits mailing list