[lld] 3587de2 - [lld-macho] Support __dso_handle for C++

Jez Ng via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 30 14:30:32 PDT 2020


Author: Jez Ng
Date: 2020-07-30T14:28:41-07:00
New Revision: 3587de22819869a2925994d8bd75fa1386464660

URL: https://github.com/llvm/llvm-project/commit/3587de22819869a2925994d8bd75fa1386464660
DIFF: https://github.com/llvm/llvm-project/commit/3587de22819869a2925994d8bd75fa1386464660.diff

LOG: [lld-macho] Support __dso_handle for C++

The C++ ABI requires dylibs to pass a pointer to __cxa_atexit which does
e.g. cleanup of static global variables. The C++ spec says that the pointer
can point to any address in one of the dylib's segments, but in practice
ld64 seems to set it to point to the header, so that's what's implemented
here.

Reviewed By: #lld-macho, smeenai

Differential Revision: https://reviews.llvm.org/D83603

Added: 
    lld/test/MachO/dso-handle.s
    lld/test/MachO/invalid/dso-handle-duplicate.s

Modified: 
    lld/MachO/Driver.cpp
    lld/MachO/SymbolTable.cpp
    lld/MachO/SymbolTable.h
    lld/MachO/Symbols.cpp
    lld/MachO/Symbols.h
    lld/MachO/SyntheticSections.h
    lld/MachO/Writer.cpp

Removed: 
    


################################################################################
diff  --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index d76e0115d10f..a6d3eb69b8a0 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -13,6 +13,7 @@
 #include "OutputSegment.h"
 #include "SymbolTable.h"
 #include "Symbols.h"
+#include "SyntheticSections.h"
 #include "Target.h"
 #include "Writer.h"
 
@@ -479,6 +480,7 @@ bool macho::link(llvm::ArrayRef<const char *> argsArr, bool canExitEarly,
   }
 
   createSyntheticSections();
+  symtab->addDSOHandle(in.header);
 
   // Initialize InputSections.
   for (InputFile *file : inputFiles) {

diff  --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp
index 061642d73f44..1a8a1d5ac065 100644
--- a/lld/MachO/SymbolTable.cpp
+++ b/lld/MachO/SymbolTable.cpp
@@ -94,4 +94,17 @@ Symbol *SymbolTable::addLazy(StringRef name, ArchiveFile *file,
   return s;
 }
 
+Symbol *SymbolTable::addDSOHandle(const MachHeaderSection *header) {
+  Symbol *s;
+  bool wasInserted;
+  std::tie(s, wasInserted) = insert(DSOHandle::name);
+  if (!wasInserted) {
+    if (auto *defined = dyn_cast<Defined>(s))
+      error("found defined symbol from " + defined->isec->file->getName() +
+            " with illegal name " + DSOHandle::name);
+  }
+  replaceSymbol<DSOHandle>(s, header);
+  return s;
+}
+
 SymbolTable *macho::symtab;

diff  --git a/lld/MachO/SymbolTable.h b/lld/MachO/SymbolTable.h
index 088b0e97c840..822eb5b35dac 100644
--- a/lld/MachO/SymbolTable.h
+++ b/lld/MachO/SymbolTable.h
@@ -20,6 +20,7 @@ namespace macho {
 class ArchiveFile;
 class DylibFile;
 class InputSection;
+class MachHeaderSection;
 class Symbol;
 
 /*
@@ -40,6 +41,8 @@ class SymbolTable {
   Symbol *addLazy(StringRef name, ArchiveFile *file,
                   const llvm::object::Archive::Symbol &sym);
 
+  Symbol *addDSOHandle(const MachHeaderSection *);
+
   ArrayRef<Symbol *> getSymbols() const { return symVector; }
   Symbol *find(StringRef name);
 

diff  --git a/lld/MachO/Symbols.cpp b/lld/MachO/Symbols.cpp
index fbafa8a92a4f..af5d9d25029f 100644
--- a/lld/MachO/Symbols.cpp
+++ b/lld/MachO/Symbols.cpp
@@ -8,6 +8,7 @@
 
 #include "Symbols.h"
 #include "InputFiles.h"
+#include "SyntheticSections.h"
 
 using namespace llvm;
 using namespace lld;
@@ -21,3 +22,9 @@ std::string lld::toString(const Symbol &sym) {
     return *s;
   return std::string(sym.getName());
 }
+
+uint64_t DSOHandle::getVA() const { return header->addr; }
+
+uint64_t DSOHandle::getFileOffset() const { return header->fileOff; }
+
+constexpr StringRef DSOHandle::name;

diff  --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h
index 2dcccd03a8d0..1e0767a6a12b 100644
--- a/lld/MachO/Symbols.h
+++ b/lld/MachO/Symbols.h
@@ -19,6 +19,7 @@ namespace lld {
 namespace macho {
 
 class InputSection;
+class MachHeaderSection;
 class DylibFile;
 class ArchiveFile;
 
@@ -37,6 +38,7 @@ class Symbol {
     UndefinedKind,
     DylibKind,
     LazyKind,
+    DSOHandleKind,
   };
 
   virtual ~Symbol() {}
@@ -45,9 +47,11 @@ class Symbol {
 
   StringRef getName() const { return {name.data, name.size}; }
 
-  uint64_t getVA() const;
+  virtual uint64_t getVA() const { return 0; }
 
-  uint64_t getFileOffset() const;
+  virtual uint64_t getFileOffset() const {
+    llvm_unreachable("attempt to get an offset from a non-defined symbol");
+  }
 
   virtual bool isWeakDef() const { llvm_unreachable("cannot be weak"); }
 
@@ -70,6 +74,12 @@ class Defined : public Symbol {
 
   static bool classof(const Symbol *s) { return s->kind() == DefinedKind; }
 
+  uint64_t getVA() const override { return isec->getVA() + value; }
+
+  uint64_t getFileOffset() const override {
+    return isec->getFileOffset() + value;
+  }
+
   InputSection *isec;
   uint32_t value;
 
@@ -115,17 +125,32 @@ class LazySymbol : public Symbol {
   const llvm::object::Archive::Symbol sym;
 };
 
-inline uint64_t Symbol::getVA() const {
-  if (auto *d = dyn_cast<Defined>(this))
-    return d->isec->getVA() + d->value;
-  return 0;
-}
+// The Itanium C++ ABI requires dylibs to pass a pointer to __cxa_atexit which
+// does e.g. cleanup of static global variables. The ABI document says that the
+// pointer can point to any address in one of the dylib's segments, but in
+// practice ld64 seems to set it to point to the header, so that's what's
+// implemented here.
+//
+// The ARM C++ ABI uses __dso_handle similarly, but I (int3) have not yet
+// tested this on an ARM platform.
+//
+// DSOHandle effectively functions like a Defined symbol, but it doesn't belong
+// to an InputSection.
+class DSOHandle : public Symbol {
+public:
+  DSOHandle(const MachHeaderSection *header)
+      : Symbol(DSOHandleKind, name), header(header) {}
 
-inline uint64_t Symbol::getFileOffset() const {
-  if (auto *d = dyn_cast<Defined>(this))
-    return d->isec->getFileOffset() + d->value;
-  llvm_unreachable("attempt to get an offset from an undefined symbol");
-}
+  const MachHeaderSection *header;
+
+  uint64_t getVA() const override;
+
+  uint64_t getFileOffset() const override;
+
+  static constexpr StringRef name = "___dso_handle";
+
+  static bool classof(const Symbol *s) { return s->kind() == DefinedKind; }
+};
 
 union SymbolUnion {
   alignas(Defined) char a[sizeof(Defined)];

diff  --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h
index a8fbf6c8a265..f3052000be5f 100644
--- a/lld/MachO/SyntheticSections.h
+++ b/lld/MachO/SyntheticSections.h
@@ -273,6 +273,7 @@ class SymtabSection : public SyntheticSection {
 };
 
 struct InStruct {
+  MachHeaderSection *header = nullptr;
   BindingSection *binding = nullptr;
   GotSection *got = nullptr;
   LazyPointerSection *lazyPointers = nullptr;

diff  --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index c9070e90f97e..593e24f6b859 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -53,7 +53,7 @@ class Writer {
   std::unique_ptr<FileOutputBuffer> &buffer;
   uint64_t addr = 0;
   uint64_t fileOff = 0;
-  MachHeaderSection *headerSection = nullptr;
+  MachHeaderSection *header = nullptr;
   LazyBindingSection *lazyBindingSection = nullptr;
   ExportSection *exportSection = nullptr;
   StringTableSection *stringTableSection = nullptr;
@@ -264,20 +264,18 @@ void Writer::scanRelocations() {
 }
 
 void Writer::createLoadCommands() {
-  headerSection->addLoadCommand(
+  in.header->addLoadCommand(
       make<LCDyldInfo>(in.binding, lazyBindingSection, exportSection));
-  headerSection->addLoadCommand(
-      make<LCSymtab>(symtabSection, stringTableSection));
-  headerSection->addLoadCommand(make<LCDysymtab>());
+  in.header->addLoadCommand(make<LCSymtab>(symtabSection, stringTableSection));
+  in.header->addLoadCommand(make<LCDysymtab>());
 
   switch (config->outputType) {
   case MH_EXECUTE:
-    headerSection->addLoadCommand(make<LCMain>());
-    headerSection->addLoadCommand(make<LCLoadDylinker>());
+    in.header->addLoadCommand(make<LCMain>());
+    in.header->addLoadCommand(make<LCLoadDylinker>());
     break;
   case MH_DYLIB:
-    headerSection->addLoadCommand(
-        make<LCDylib>(LC_ID_DYLIB, config->installName));
+    in.header->addLoadCommand(make<LCDylib>(LC_ID_DYLIB, config->installName));
     break;
   default:
     llvm_unreachable("unhandled output file type");
@@ -285,19 +283,19 @@ void Writer::createLoadCommands() {
 
   uint8_t segIndex = 0;
   for (OutputSegment *seg : outputSegments) {
-    headerSection->addLoadCommand(make<LCSegment>(seg->name, seg));
+    in.header->addLoadCommand(make<LCSegment>(seg->name, seg));
     seg->index = segIndex++;
   }
 
   uint64_t dylibOrdinal = 1;
   for (InputFile *file : inputFiles) {
     if (auto *dylibFile = dyn_cast<DylibFile>(file)) {
-      headerSection->addLoadCommand(
+      in.header->addLoadCommand(
           make<LCDylib>(LC_LOAD_DYLIB, dylibFile->dylibName));
       dylibFile->ordinal = dylibOrdinal++;
 
       if (dylibFile->reexport)
-        headerSection->addLoadCommand(
+        in.header->addLoadCommand(
             make<LCDylib>(LC_REEXPORT_DYLIB, dylibFile->dylibName));
     }
   }
@@ -406,7 +404,6 @@ static void sortSegmentsAndSections() {
 
 void Writer::createOutputSections() {
   // First, create hidden sections
-  headerSection = make<MachHeaderSection>();
   lazyBindingSection = make<LazyBindingSection>();
   stringTableSection = make<StringTableSection>();
   symtabSection = make<SymtabSection>(*stringTableSection);
@@ -539,6 +536,7 @@ void Writer::run() {
 void macho::writeResult() { Writer().run(); }
 
 void macho::createSyntheticSections() {
+  in.header = make<MachHeaderSection>();
   in.binding = make<BindingSection>();
   in.got = make<GotSection>();
   in.lazyPointers = make<LazyPointerSection>();

diff  --git a/lld/test/MachO/dso-handle.s b/lld/test/MachO/dso-handle.s
new file mode 100644
index 000000000000..f57ec7260fe0
--- /dev/null
+++ b/lld/test/MachO/dso-handle.s
@@ -0,0 +1,16 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o
+
+# RUN: lld -flavor darwinnew %t.o -o %t
+# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s
+# CHECK: leaq {{.*}} # 100000000
+
+# RUN: lld -flavor darwinnew -dylib %t.o -o %t.dylib
+# RUN: llvm-objdump -d --no-show-raw-insn %t.dylib | FileCheck %s --check-prefix=DYLIB-CHECK
+# DYLIB-CHECK: leaq {{.*}} # 0
+
+.globl _main
+.text
+_main:
+  leaq ___dso_handle(%rip), %rdx
+  ret

diff  --git a/lld/test/MachO/invalid/dso-handle-duplicate.s b/lld/test/MachO/invalid/dso-handle-duplicate.s
new file mode 100644
index 000000000000..5991c6faff88
--- /dev/null
+++ b/lld/test/MachO/invalid/dso-handle-duplicate.s
@@ -0,0 +1,20 @@
+# REQUIRES: x86
+
+## If for some bizarre reason the input file defines its own ___dso_handle, we
+## should raise an error. At least, we've implemented this behavior if the
+## conflicting symbol is a global. A local symbol of the same name will still
+## take priority in our implementation, unlike in ld64. But that's a pretty
+## far-out edge case that should be safe to ignore.
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o
+# RUN: not lld -flavor darwinnew -dylib %t.o -o %t.dylib 2>&1 | FileCheck %s -DFILE=%t.o
+# CHECK: error: found defined symbol from [[FILE]] with illegal name ___dso_handle
+
+.globl _main, ___dso_handle
+.text
+_main:
+  leaq ___dso_handle(%rip), %rdx
+  ret
+
+___dso_handle:
+  .space 1


        


More information about the llvm-commits mailing list